xref: /netbsd-src/sys/kern/vfs_subr.c (revision da9817918ec7e88db2912a2882967c7570a83f47)
1 /*	$NetBSD: vfs_subr.c,v 1.379 2009/05/16 08:29:53 yamt Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1989, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  * (c) UNIX System Laboratories, Inc.
37  * All or some portions of this file are derived from material licensed
38  * to the University of California by American Telephone and Telegraph
39  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40  * the permission of UNIX System Laboratories, Inc.
41  *
42  * Redistribution and use in source and binary forms, with or without
43  * modification, are permitted provided that the following conditions
44  * are met:
45  * 1. Redistributions of source code must retain the above copyright
46  *    notice, this list of conditions and the following disclaimer.
47  * 2. Redistributions in binary form must reproduce the above copyright
48  *    notice, this list of conditions and the following disclaimer in the
49  *    documentation and/or other materials provided with the distribution.
50  * 3. Neither the name of the University nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  *
66  *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
67  */
68 
69 /*
70  * Note on v_usecount and locking:
71  *
72  * At nearly all points it is known that v_usecount could be zero, the
73  * vnode interlock will be held.
74  *
75  * To change v_usecount away from zero, the interlock must be held.  To
76  * change from a non-zero value to zero, again the interlock must be
77  * held.
78  *
79  * There's a flag bit, VC_XLOCK, embedded in v_usecount.
80  * To raise v_usecount, if the VC_XLOCK bit is set in it, the interlock
81  * must be held.
82  * To modify the VC_XLOCK bit, the interlock must be held.
83  * We always keep the usecount (v_usecount & VC_MASK) non-zero while the
84  * VC_XLOCK bit is set.
85  *
86  * Unless the VC_XLOCK bit is set, changing the usecount from a non-zero
87  * value to a non-zero value can safely be done using atomic operations,
88  * without the interlock held.
89  * Even if the VC_XLOCK bit is set, decreasing the usecount to a non-zero
90  * value can be done using atomic operations, without the interlock held.
91  */
92 
93 #include <sys/cdefs.h>
94 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.379 2009/05/16 08:29:53 yamt Exp $");
95 
96 #include "opt_ddb.h"
97 #include "opt_compat_netbsd.h"
98 #include "opt_compat_43.h"
99 
100 #include <sys/param.h>
101 #include <sys/systm.h>
102 #include <sys/conf.h>
103 #include <sys/proc.h>
104 #include <sys/kernel.h>
105 #include <sys/mount.h>
106 #include <sys/fcntl.h>
107 #include <sys/vnode.h>
108 #include <sys/stat.h>
109 #include <sys/namei.h>
110 #include <sys/ucred.h>
111 #include <sys/buf.h>
112 #include <sys/errno.h>
113 #include <sys/kmem.h>
114 #include <sys/syscallargs.h>
115 #include <sys/device.h>
116 #include <sys/filedesc.h>
117 #include <sys/kauth.h>
118 #include <sys/atomic.h>
119 #include <sys/kthread.h>
120 #include <sys/wapbl.h>
121 
122 #include <miscfs/specfs/specdev.h>
123 #include <miscfs/syncfs/syncfs.h>
124 
125 #include <uvm/uvm.h>
126 #include <uvm/uvm_readahead.h>
127 #include <uvm/uvm_ddb.h>
128 
129 #include <sys/sysctl.h>
130 
131 const enum vtype iftovt_tab[16] = {
132 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
133 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
134 };
135 const int	vttoif_tab[9] = {
136 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
137 	S_IFSOCK, S_IFIFO, S_IFMT,
138 };
139 
140 /*
141  * Insq/Remq for the vnode usage lists.
142  */
143 #define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
144 #define	bufremvn(bp) {							\
145 	LIST_REMOVE(bp, b_vnbufs);					\
146 	(bp)->b_vnbufs.le_next = NOLIST;				\
147 }
148 
149 int doforce = 1;		/* 1 => permit forcible unmounting */
150 int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
151 
152 static vnodelst_t vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
153 static vnodelst_t vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
154 static vnodelst_t vrele_list = TAILQ_HEAD_INITIALIZER(vrele_list);
155 
156 struct mntlist mountlist =			/* mounted filesystem list */
157     CIRCLEQ_HEAD_INITIALIZER(mountlist);
158 
159 u_int numvnodes;
160 static specificdata_domain_t mount_specificdata_domain;
161 
162 static int vrele_pending;
163 static int vrele_gen;
164 static kmutex_t	vrele_lock;
165 static kcondvar_t vrele_cv;
166 static lwp_t *vrele_lwp;
167 
168 kmutex_t mountlist_lock;
169 kmutex_t mntid_lock;
170 kmutex_t mntvnode_lock;
171 kmutex_t vnode_free_list_lock;
172 kmutex_t vfs_list_lock;
173 
174 static pool_cache_t vnode_cache;
175 
176 /*
177  * These define the root filesystem and device.
178  */
179 struct vnode *rootvnode;
180 struct device *root_device;			/* root device */
181 
182 /*
183  * Local declarations.
184  */
185 
186 static void vrele_thread(void *);
187 static void insmntque(vnode_t *, struct mount *);
188 static int getdevvp(dev_t, vnode_t **, enum vtype);
189 static vnode_t *getcleanvnode(void);
190 void vpanic(vnode_t *, const char *);
191 
192 #ifdef DEBUG
193 void printlockedvnodes(void);
194 #endif
195 
196 #ifdef DIAGNOSTIC
197 void
198 vpanic(vnode_t *vp, const char *msg)
199 {
200 
201 	vprint(NULL, vp);
202 	panic("%s\n", msg);
203 }
204 #else
205 #define	vpanic(vp, msg)	/* nothing */
206 #endif
207 
208 void
209 vn_init1(void)
210 {
211 
212 	vnode_cache = pool_cache_init(sizeof(struct vnode), 0, 0, 0, "vnodepl",
213 	    NULL, IPL_NONE, NULL, NULL, NULL);
214 	KASSERT(vnode_cache != NULL);
215 
216 	/* Create deferred release thread. */
217 	mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE);
218 	cv_init(&vrele_cv, "vrele");
219 	if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread,
220 	    NULL, &vrele_lwp, "vrele"))
221 		panic("fork vrele");
222 }
223 
224 /*
225  * Initialize the vnode management data structures.
226  */
227 void
228 vntblinit(void)
229 {
230 
231 	mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE);
232 	mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE);
233 	mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE);
234 	mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE);
235 	mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE);
236 
237 	mount_specificdata_domain = specificdata_domain_create();
238 
239 	/* Initialize the filesystem syncer. */
240 	vn_initialize_syncerd();
241 	vn_init1();
242 }
243 
244 int
245 vfs_drainvnodes(long target, struct lwp *l)
246 {
247 
248 	while (numvnodes > target) {
249 		vnode_t *vp;
250 
251 		mutex_enter(&vnode_free_list_lock);
252 		vp = getcleanvnode();
253 		if (vp == NULL)
254 			return EBUSY; /* give up */
255 		ungetnewvnode(vp);
256 	}
257 
258 	return 0;
259 }
260 
261 /*
262  * Lookup a mount point by filesystem identifier.
263  *
264  * XXX Needs to add a reference to the mount point.
265  */
266 struct mount *
267 vfs_getvfs(fsid_t *fsid)
268 {
269 	struct mount *mp;
270 
271 	mutex_enter(&mountlist_lock);
272 	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
273 		if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
274 		    mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
275 			mutex_exit(&mountlist_lock);
276 			return (mp);
277 		}
278 	}
279 	mutex_exit(&mountlist_lock);
280 	return ((struct mount *)0);
281 }
282 
283 /*
284  * Drop a reference to a mount structure, freeing if the last reference.
285  */
286 void
287 vfs_destroy(struct mount *mp)
288 {
289 
290 	if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) {
291 		return;
292 	}
293 
294 	/*
295 	 * Nothing else has visibility of the mount: we can now
296 	 * free the data structures.
297 	 */
298 	KASSERT(mp->mnt_refcnt == 0);
299 	specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
300 	rw_destroy(&mp->mnt_unmounting);
301 	mutex_destroy(&mp->mnt_updating);
302 	mutex_destroy(&mp->mnt_renamelock);
303 	if (mp->mnt_op != NULL) {
304 		vfs_delref(mp->mnt_op);
305 	}
306 	kmem_free(mp, sizeof(*mp));
307 }
308 
309 /*
310  * grab a vnode from freelist and clean it.
311  */
312 vnode_t *
313 getcleanvnode(void)
314 {
315 	vnode_t *vp;
316 	vnodelst_t *listhd;
317 
318 	KASSERT(mutex_owned(&vnode_free_list_lock));
319 
320 retry:
321 	listhd = &vnode_free_list;
322 try_nextlist:
323 	TAILQ_FOREACH(vp, listhd, v_freelist) {
324 		/*
325 		 * It's safe to test v_usecount and v_iflag
326 		 * without holding the interlock here, since
327 		 * these vnodes should never appear on the
328 		 * lists.
329 		 */
330 		if (vp->v_usecount != 0) {
331 			vpanic(vp, "free vnode isn't");
332 		}
333 		if ((vp->v_iflag & VI_CLEAN) != 0) {
334 			vpanic(vp, "clean vnode on freelist");
335 		}
336 		if (vp->v_freelisthd != listhd) {
337 			printf("vnode sez %p, listhd %p\n", vp->v_freelisthd, listhd);
338 			vpanic(vp, "list head mismatch");
339 		}
340 		if (!mutex_tryenter(&vp->v_interlock))
341 			continue;
342 		/*
343 		 * Our lwp might hold the underlying vnode
344 		 * locked, so don't try to reclaim a VI_LAYER
345 		 * node if it's locked.
346 		 */
347 		if ((vp->v_iflag & VI_XLOCK) == 0 &&
348 		    ((vp->v_iflag & VI_LAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
349 			break;
350 		}
351 		mutex_exit(&vp->v_interlock);
352 	}
353 
354 	if (vp == NULL) {
355 		if (listhd == &vnode_free_list) {
356 			listhd = &vnode_hold_list;
357 			goto try_nextlist;
358 		}
359 		mutex_exit(&vnode_free_list_lock);
360 		return NULL;
361 	}
362 
363 	/* Remove it from the freelist. */
364 	TAILQ_REMOVE(listhd, vp, v_freelist);
365 	vp->v_freelisthd = NULL;
366 	mutex_exit(&vnode_free_list_lock);
367 
368 	/*
369 	 * The vnode is still associated with a file system, so we must
370 	 * clean it out before reusing it.  We need to add a reference
371 	 * before doing this.  If the vnode gains another reference while
372 	 * being cleaned out then we lose - retry.
373 	 */
374 	atomic_add_int(&vp->v_usecount, 1 + VC_XLOCK);
375 	vclean(vp, DOCLOSE);
376 	KASSERT(vp->v_usecount >= 1 + VC_XLOCK);
377 	atomic_add_int(&vp->v_usecount, -VC_XLOCK);
378 	if (vp->v_usecount == 1) {
379 		/* We're about to dirty it. */
380 		vp->v_iflag &= ~VI_CLEAN;
381 		mutex_exit(&vp->v_interlock);
382 		if (vp->v_type == VBLK || vp->v_type == VCHR) {
383 			spec_node_destroy(vp);
384 		}
385 		vp->v_type = VNON;
386 	} else {
387 		/*
388 		 * Don't return to freelist - the holder of the last
389 		 * reference will destroy it.
390 		 */
391 		vrelel(vp, 0); /* releases vp->v_interlock */
392 		mutex_enter(&vnode_free_list_lock);
393 		goto retry;
394 	}
395 
396 	if (vp->v_data != NULL || vp->v_uobj.uo_npages != 0 ||
397 	    !TAILQ_EMPTY(&vp->v_uobj.memq)) {
398 		vpanic(vp, "cleaned vnode isn't");
399 	}
400 	if (vp->v_numoutput != 0) {
401 		vpanic(vp, "clean vnode has pending I/O's");
402 	}
403 	if ((vp->v_iflag & VI_ONWORKLST) != 0) {
404 		vpanic(vp, "clean vnode on syncer list");
405 	}
406 
407 	return vp;
408 }
409 
410 /*
411  * Mark a mount point as busy, and gain a new reference to it.  Used to
412  * prevent the file system from being unmounted during critical sections.
413  *
414  * => The caller must hold a pre-existing reference to the mount.
415  * => Will fail if the file system is being unmounted, or is unmounted.
416  */
417 int
418 vfs_busy(struct mount *mp, struct mount **nextp)
419 {
420 
421 	KASSERT(mp->mnt_refcnt > 0);
422 
423 	if (__predict_false(!rw_tryenter(&mp->mnt_unmounting, RW_READER))) {
424 		if (nextp != NULL) {
425 			KASSERT(mutex_owned(&mountlist_lock));
426 			*nextp = CIRCLEQ_NEXT(mp, mnt_list);
427 		}
428 		return EBUSY;
429 	}
430 	if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) {
431 		rw_exit(&mp->mnt_unmounting);
432 		if (nextp != NULL) {
433 			KASSERT(mutex_owned(&mountlist_lock));
434 			*nextp = CIRCLEQ_NEXT(mp, mnt_list);
435 		}
436 		return ENOENT;
437 	}
438 	if (nextp != NULL) {
439 		mutex_exit(&mountlist_lock);
440 	}
441 	atomic_inc_uint(&mp->mnt_refcnt);
442 	return 0;
443 }
444 
445 /*
446  * Unbusy a busy filesystem.
447  *
448  * => If keepref is true, preserve reference added by vfs_busy().
449  * => If nextp != NULL, acquire mountlist_lock.
450  */
451 void
452 vfs_unbusy(struct mount *mp, bool keepref, struct mount **nextp)
453 {
454 
455 	KASSERT(mp->mnt_refcnt > 0);
456 
457 	if (nextp != NULL) {
458 		mutex_enter(&mountlist_lock);
459 	}
460 	rw_exit(&mp->mnt_unmounting);
461 	if (!keepref) {
462 		vfs_destroy(mp);
463 	}
464 	if (nextp != NULL) {
465 		KASSERT(mutex_owned(&mountlist_lock));
466 		*nextp = CIRCLEQ_NEXT(mp, mnt_list);
467 	}
468 }
469 
470 struct mount *
471 vfs_mountalloc(struct vfsops *vfsops, struct vnode *vp)
472 {
473 	int error;
474 	struct mount *mp;
475 
476 	mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
477 	if (mp == NULL)
478 		return NULL;
479 
480 	mp->mnt_op = vfsops;
481 	mp->mnt_refcnt = 1;
482 	TAILQ_INIT(&mp->mnt_vnodelist);
483 	rw_init(&mp->mnt_unmounting);
484 	mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
485 	mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
486 	error = vfs_busy(mp, NULL);
487 	KASSERT(error == 0);
488 	mp->mnt_vnodecovered = vp;
489 	mount_initspecific(mp);
490 
491 	return mp;
492 }
493 
494 /*
495  * Lookup a filesystem type, and if found allocate and initialize
496  * a mount structure for it.
497  *
498  * Devname is usually updated by mount(8) after booting.
499  */
500 int
501 vfs_rootmountalloc(const char *fstypename, const char *devname,
502     struct mount **mpp)
503 {
504 	struct vfsops *vfsp = NULL;
505 	struct mount *mp;
506 
507 	mutex_enter(&vfs_list_lock);
508 	LIST_FOREACH(vfsp, &vfs_list, vfs_list)
509 		if (!strncmp(vfsp->vfs_name, fstypename,
510 		    sizeof(mp->mnt_stat.f_fstypename)))
511 			break;
512 	if (vfsp == NULL) {
513 		mutex_exit(&vfs_list_lock);
514 		return (ENODEV);
515 	}
516 	vfsp->vfs_refcount++;
517 	mutex_exit(&vfs_list_lock);
518 
519 	if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL)
520 		return ENOMEM;
521 	mp->mnt_flag = MNT_RDONLY;
522 	(void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
523 	    sizeof(mp->mnt_stat.f_fstypename));
524 	mp->mnt_stat.f_mntonname[0] = '/';
525 	mp->mnt_stat.f_mntonname[1] = '\0';
526 	mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
527 	    '\0';
528 	(void)copystr(devname, mp->mnt_stat.f_mntfromname,
529 	    sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
530 	*mpp = mp;
531 	return (0);
532 }
533 
534 /*
535  * Routines having to do with the management of the vnode table.
536  */
537 extern int (**dead_vnodeop_p)(void *);
538 
539 /*
540  * Return the next vnode from the free list.
541  */
542 int
543 getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
544 	    vnode_t **vpp)
545 {
546 	struct uvm_object *uobj;
547 	static int toggle;
548 	vnode_t *vp;
549 	int error = 0, tryalloc;
550 
551  try_again:
552 	if (mp != NULL) {
553 		/*
554 		 * Mark filesystem busy while we're creating a
555 		 * vnode.  If unmount is in progress, this will
556 		 * fail.
557 		 */
558 		error = vfs_busy(mp, NULL);
559 		if (error)
560 			return error;
561 	}
562 
563 	/*
564 	 * We must choose whether to allocate a new vnode or recycle an
565 	 * existing one. The criterion for allocating a new one is that
566 	 * the total number of vnodes is less than the number desired or
567 	 * there are no vnodes on either free list. Generally we only
568 	 * want to recycle vnodes that have no buffers associated with
569 	 * them, so we look first on the vnode_free_list. If it is empty,
570 	 * we next consider vnodes with referencing buffers on the
571 	 * vnode_hold_list. The toggle ensures that half the time we
572 	 * will use a buffer from the vnode_hold_list, and half the time
573 	 * we will allocate a new one unless the list has grown to twice
574 	 * the desired size. We are reticent to recycle vnodes from the
575 	 * vnode_hold_list because we will lose the identity of all its
576 	 * referencing buffers.
577 	 */
578 
579 	vp = NULL;
580 
581 	mutex_enter(&vnode_free_list_lock);
582 
583 	toggle ^= 1;
584 	if (numvnodes > 2 * desiredvnodes)
585 		toggle = 0;
586 
587 	tryalloc = numvnodes < desiredvnodes ||
588 	    (TAILQ_FIRST(&vnode_free_list) == NULL &&
589 	     (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
590 
591 	if (tryalloc) {
592 		numvnodes++;
593 		mutex_exit(&vnode_free_list_lock);
594 		if ((vp = vnalloc(NULL)) == NULL) {
595 			mutex_enter(&vnode_free_list_lock);
596 			numvnodes--;
597 		} else
598 			vp->v_usecount = 1;
599 	}
600 
601 	if (vp == NULL) {
602 		vp = getcleanvnode();
603 		if (vp == NULL) {
604 			if (mp != NULL) {
605 				vfs_unbusy(mp, false, NULL);
606 			}
607 			if (tryalloc) {
608 				printf("WARNING: unable to allocate new "
609 				    "vnode, retrying...\n");
610 				kpause("newvn", false, hz, NULL);
611 				goto try_again;
612 			}
613 			tablefull("vnode", "increase kern.maxvnodes or NVNODE");
614 			*vpp = 0;
615 			return (ENFILE);
616 		}
617 		vp->v_iflag = 0;
618 		vp->v_vflag = 0;
619 		vp->v_uflag = 0;
620 		vp->v_socket = NULL;
621 	}
622 
623 	KASSERT(vp->v_usecount == 1);
624 	KASSERT(vp->v_freelisthd == NULL);
625 	KASSERT(LIST_EMPTY(&vp->v_nclist));
626 	KASSERT(LIST_EMPTY(&vp->v_dnclist));
627 
628 	vp->v_type = VNON;
629 	vp->v_vnlock = &vp->v_lock;
630 	vp->v_tag = tag;
631 	vp->v_op = vops;
632 	insmntque(vp, mp);
633 	*vpp = vp;
634 	vp->v_data = 0;
635 
636 	/*
637 	 * initialize uvm_object within vnode.
638 	 */
639 
640 	uobj = &vp->v_uobj;
641 	KASSERT(uobj->pgops == &uvm_vnodeops);
642 	KASSERT(uobj->uo_npages == 0);
643 	KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
644 	vp->v_size = vp->v_writesize = VSIZENOTSET;
645 
646 	if (mp != NULL) {
647 		if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
648 			vp->v_vflag |= VV_MPSAFE;
649 		vfs_unbusy(mp, true, NULL);
650 	}
651 
652 	return (0);
653 }
654 
655 /*
656  * This is really just the reverse of getnewvnode(). Needed for
657  * VFS_VGET functions who may need to push back a vnode in case
658  * of a locking race.
659  */
660 void
661 ungetnewvnode(vnode_t *vp)
662 {
663 
664 	KASSERT(vp->v_usecount == 1);
665 	KASSERT(vp->v_data == NULL);
666 	KASSERT(vp->v_freelisthd == NULL);
667 
668 	mutex_enter(&vp->v_interlock);
669 	vp->v_iflag |= VI_CLEAN;
670 	vrelel(vp, 0);
671 }
672 
673 /*
674  * Allocate a new, uninitialized vnode.  If 'mp' is non-NULL, this is a
675  * marker vnode and we are prepared to wait for the allocation.
676  */
677 vnode_t *
678 vnalloc(struct mount *mp)
679 {
680 	vnode_t *vp;
681 
682 	vp = pool_cache_get(vnode_cache, (mp != NULL ? PR_WAITOK : PR_NOWAIT));
683 	if (vp == NULL) {
684 		return NULL;
685 	}
686 
687 	memset(vp, 0, sizeof(*vp));
688 	UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 0);
689 	cv_init(&vp->v_cv, "vnode");
690 	/*
691 	 * done by memset() above.
692 	 *	LIST_INIT(&vp->v_nclist);
693 	 *	LIST_INIT(&vp->v_dnclist);
694 	 */
695 
696 	if (mp != NULL) {
697 		vp->v_mount = mp;
698 		vp->v_type = VBAD;
699 		vp->v_iflag = VI_MARKER;
700 	} else {
701 		rw_init(&vp->v_lock.vl_lock);
702 	}
703 
704 	return vp;
705 }
706 
707 /*
708  * Free an unused, unreferenced vnode.
709  */
710 void
711 vnfree(vnode_t *vp)
712 {
713 
714 	KASSERT(vp->v_usecount == 0);
715 
716 	if ((vp->v_iflag & VI_MARKER) == 0) {
717 		rw_destroy(&vp->v_lock.vl_lock);
718 		mutex_enter(&vnode_free_list_lock);
719 		numvnodes--;
720 		mutex_exit(&vnode_free_list_lock);
721 	}
722 
723 	UVM_OBJ_DESTROY(&vp->v_uobj);
724 	cv_destroy(&vp->v_cv);
725 	pool_cache_put(vnode_cache, vp);
726 }
727 
728 /*
729  * Remove a vnode from its freelist.
730  */
731 static inline void
732 vremfree(vnode_t *vp)
733 {
734 
735 	KASSERT(mutex_owned(&vp->v_interlock));
736 	KASSERT(vp->v_usecount == 0);
737 
738 	/*
739 	 * Note that the reference count must not change until
740 	 * the vnode is removed.
741 	 */
742 	mutex_enter(&vnode_free_list_lock);
743 	if (vp->v_holdcnt > 0) {
744 		KASSERT(vp->v_freelisthd == &vnode_hold_list);
745 	} else {
746 		KASSERT(vp->v_freelisthd == &vnode_free_list);
747 	}
748 	TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
749 	vp->v_freelisthd = NULL;
750 	mutex_exit(&vnode_free_list_lock);
751 }
752 
753 /*
754  * Move a vnode from one mount queue to another.
755  */
756 static void
757 insmntque(vnode_t *vp, struct mount *mp)
758 {
759 	struct mount *omp;
760 
761 #ifdef DIAGNOSTIC
762 	if ((mp != NULL) &&
763 	    (mp->mnt_iflag & IMNT_UNMOUNT) &&
764 	    vp->v_tag != VT_VFS) {
765 		panic("insmntque into dying filesystem");
766 	}
767 #endif
768 
769 	mutex_enter(&mntvnode_lock);
770 	/*
771 	 * Delete from old mount point vnode list, if on one.
772 	 */
773 	if ((omp = vp->v_mount) != NULL)
774 		TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes);
775 	/*
776 	 * Insert into list of vnodes for the new mount point, if
777 	 * available.  The caller must take a reference on the mount
778 	 * structure and donate to the vnode.
779 	 */
780 	if ((vp->v_mount = mp) != NULL)
781 		TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes);
782 	mutex_exit(&mntvnode_lock);
783 
784 	if (omp != NULL) {
785 		/* Release reference to old mount. */
786 		vfs_destroy(omp);
787 	}
788 }
789 
790 /*
791  * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or
792  * recycled.
793  */
794 void
795 vwait(vnode_t *vp, int flags)
796 {
797 
798 	KASSERT(mutex_owned(&vp->v_interlock));
799 	KASSERT(vp->v_usecount != 0);
800 
801 	while ((vp->v_iflag & flags) != 0)
802 		cv_wait(&vp->v_cv, &vp->v_interlock);
803 }
804 
805 /*
806  * Insert a marker vnode into a mount's vnode list, after the
807  * specified vnode.  mntvnode_lock must be held.
808  */
809 void
810 vmark(vnode_t *mvp, vnode_t *vp)
811 {
812 	struct mount *mp;
813 
814 	mp = mvp->v_mount;
815 
816 	KASSERT(mutex_owned(&mntvnode_lock));
817 	KASSERT((mvp->v_iflag & VI_MARKER) != 0);
818 	KASSERT(vp->v_mount == mp);
819 
820 	TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vp, mvp, v_mntvnodes);
821 }
822 
823 /*
824  * Remove a marker vnode from a mount's vnode list, and return
825  * a pointer to the next vnode in the list.  mntvnode_lock must
826  * be held.
827  */
828 vnode_t *
829 vunmark(vnode_t *mvp)
830 {
831 	vnode_t *vp;
832 	struct mount *mp;
833 
834 	mp = mvp->v_mount;
835 
836 	KASSERT(mutex_owned(&mntvnode_lock));
837 	KASSERT((mvp->v_iflag & VI_MARKER) != 0);
838 
839 	vp = TAILQ_NEXT(mvp, v_mntvnodes);
840 	TAILQ_REMOVE(&mp->mnt_vnodelist, mvp, v_mntvnodes);
841 
842 	KASSERT(vp == NULL || vp->v_mount == mp);
843 
844 	return vp;
845 }
846 
847 /*
848  * Update outstanding I/O count and do wakeup if requested.
849  */
850 void
851 vwakeup(struct buf *bp)
852 {
853 	struct vnode *vp;
854 
855 	if ((vp = bp->b_vp) == NULL)
856 		return;
857 
858 	KASSERT(bp->b_objlock == &vp->v_interlock);
859 	KASSERT(mutex_owned(bp->b_objlock));
860 
861 	if (--vp->v_numoutput < 0)
862 		panic("vwakeup: neg numoutput, vp %p", vp);
863 	if (vp->v_numoutput == 0)
864 		cv_broadcast(&vp->v_cv);
865 }
866 
867 /*
868  * Flush out and invalidate all buffers associated with a vnode.
869  * Called with the underlying vnode locked, which should prevent new dirty
870  * buffers from being queued.
871  */
872 int
873 vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l,
874 	  bool catch, int slptimeo)
875 {
876 	struct buf *bp, *nbp;
877 	int error;
878 	int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
879 	    (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0);
880 
881 	/* XXXUBC this doesn't look at flags or slp* */
882 	mutex_enter(&vp->v_interlock);
883 	error = VOP_PUTPAGES(vp, 0, 0, flushflags);
884 	if (error) {
885 		return error;
886 	}
887 
888 	if (flags & V_SAVE) {
889 		error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0);
890 		if (error)
891 		        return (error);
892 		KASSERT(LIST_EMPTY(&vp->v_dirtyblkhd));
893 	}
894 
895 	mutex_enter(&bufcache_lock);
896 restart:
897 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
898 		nbp = LIST_NEXT(bp, b_vnbufs);
899 		error = bbusy(bp, catch, slptimeo, NULL);
900 		if (error != 0) {
901 			if (error == EPASSTHROUGH)
902 				goto restart;
903 			mutex_exit(&bufcache_lock);
904 			return (error);
905 		}
906 		brelsel(bp, BC_INVAL | BC_VFLUSH);
907 	}
908 
909 	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
910 		nbp = LIST_NEXT(bp, b_vnbufs);
911 		error = bbusy(bp, catch, slptimeo, NULL);
912 		if (error != 0) {
913 			if (error == EPASSTHROUGH)
914 				goto restart;
915 			mutex_exit(&bufcache_lock);
916 			return (error);
917 		}
918 		/*
919 		 * XXX Since there are no node locks for NFS, I believe
920 		 * there is a slight chance that a delayed write will
921 		 * occur while sleeping just above, so check for it.
922 		 */
923 		if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) {
924 #ifdef DEBUG
925 			printf("buffer still DELWRI\n");
926 #endif
927 			bp->b_cflags |= BC_BUSY | BC_VFLUSH;
928 			mutex_exit(&bufcache_lock);
929 			VOP_BWRITE(bp);
930 			mutex_enter(&bufcache_lock);
931 			goto restart;
932 		}
933 		brelsel(bp, BC_INVAL | BC_VFLUSH);
934 	}
935 
936 #ifdef DIAGNOSTIC
937 	if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
938 		panic("vinvalbuf: flush failed, vp %p", vp);
939 #endif
940 
941 	mutex_exit(&bufcache_lock);
942 
943 	return (0);
944 }
945 
946 /*
947  * Destroy any in core blocks past the truncation length.
948  * Called with the underlying vnode locked, which should prevent new dirty
949  * buffers from being queued.
950  */
951 int
952 vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo)
953 {
954 	struct buf *bp, *nbp;
955 	int error;
956 	voff_t off;
957 
958 	off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
959 	mutex_enter(&vp->v_interlock);
960 	error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
961 	if (error) {
962 		return error;
963 	}
964 
965 	mutex_enter(&bufcache_lock);
966 restart:
967 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
968 		nbp = LIST_NEXT(bp, b_vnbufs);
969 		if (bp->b_lblkno < lbn)
970 			continue;
971 		error = bbusy(bp, catch, slptimeo, NULL);
972 		if (error != 0) {
973 			if (error == EPASSTHROUGH)
974 				goto restart;
975 			mutex_exit(&bufcache_lock);
976 			return (error);
977 		}
978 		brelsel(bp, BC_INVAL | BC_VFLUSH);
979 	}
980 
981 	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
982 		nbp = LIST_NEXT(bp, b_vnbufs);
983 		if (bp->b_lblkno < lbn)
984 			continue;
985 		error = bbusy(bp, catch, slptimeo, NULL);
986 		if (error != 0) {
987 			if (error == EPASSTHROUGH)
988 				goto restart;
989 			mutex_exit(&bufcache_lock);
990 			return (error);
991 		}
992 		brelsel(bp, BC_INVAL | BC_VFLUSH);
993 	}
994 	mutex_exit(&bufcache_lock);
995 
996 	return (0);
997 }
998 
999 /*
1000  * Flush all dirty buffers from a vnode.
1001  * Called with the underlying vnode locked, which should prevent new dirty
1002  * buffers from being queued.
1003  */
1004 void
1005 vflushbuf(struct vnode *vp, int sync)
1006 {
1007 	struct buf *bp, *nbp;
1008 	int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
1009 	bool dirty;
1010 
1011 	mutex_enter(&vp->v_interlock);
1012 	(void) VOP_PUTPAGES(vp, 0, 0, flags);
1013 
1014 loop:
1015 	mutex_enter(&bufcache_lock);
1016 	for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
1017 		nbp = LIST_NEXT(bp, b_vnbufs);
1018 		if ((bp->b_cflags & BC_BUSY))
1019 			continue;
1020 		if ((bp->b_oflags & BO_DELWRI) == 0)
1021 			panic("vflushbuf: not dirty, bp %p", bp);
1022 		bp->b_cflags |= BC_BUSY | BC_VFLUSH;
1023 		mutex_exit(&bufcache_lock);
1024 		/*
1025 		 * Wait for I/O associated with indirect blocks to complete,
1026 		 * since there is no way to quickly wait for them below.
1027 		 */
1028 		if (bp->b_vp == vp || sync == 0)
1029 			(void) bawrite(bp);
1030 		else
1031 			(void) bwrite(bp);
1032 		goto loop;
1033 	}
1034 	mutex_exit(&bufcache_lock);
1035 
1036 	if (sync == 0)
1037 		return;
1038 
1039 	mutex_enter(&vp->v_interlock);
1040 	while (vp->v_numoutput != 0)
1041 		cv_wait(&vp->v_cv, &vp->v_interlock);
1042 	dirty = !LIST_EMPTY(&vp->v_dirtyblkhd);
1043 	mutex_exit(&vp->v_interlock);
1044 
1045 	if (dirty) {
1046 		vprint("vflushbuf: dirty", vp);
1047 		goto loop;
1048 	}
1049 }
1050 
1051 /*
1052  * Create a vnode for a block device.
1053  * Used for root filesystem and swap areas.
1054  * Also used for memory file system special devices.
1055  */
1056 int
1057 bdevvp(dev_t dev, vnode_t **vpp)
1058 {
1059 
1060 	return (getdevvp(dev, vpp, VBLK));
1061 }
1062 
1063 /*
1064  * Create a vnode for a character device.
1065  * Used for kernfs and some console handling.
1066  */
1067 int
1068 cdevvp(dev_t dev, vnode_t **vpp)
1069 {
1070 
1071 	return (getdevvp(dev, vpp, VCHR));
1072 }
1073 
1074 /*
1075  * Associate a buffer with a vnode.  There must already be a hold on
1076  * the vnode.
1077  */
1078 void
1079 bgetvp(struct vnode *vp, struct buf *bp)
1080 {
1081 
1082 	KASSERT(bp->b_vp == NULL);
1083 	KASSERT(bp->b_objlock == &buffer_lock);
1084 	KASSERT(mutex_owned(&vp->v_interlock));
1085 	KASSERT(mutex_owned(&bufcache_lock));
1086 	KASSERT((bp->b_cflags & BC_BUSY) != 0);
1087 	KASSERT(!cv_has_waiters(&bp->b_done));
1088 
1089 	vholdl(vp);
1090 	bp->b_vp = vp;
1091 	if (vp->v_type == VBLK || vp->v_type == VCHR)
1092 		bp->b_dev = vp->v_rdev;
1093 	else
1094 		bp->b_dev = NODEV;
1095 
1096 	/*
1097 	 * Insert onto list for new vnode.
1098 	 */
1099 	bufinsvn(bp, &vp->v_cleanblkhd);
1100 	bp->b_objlock = &vp->v_interlock;
1101 }
1102 
1103 /*
1104  * Disassociate a buffer from a vnode.
1105  */
1106 void
1107 brelvp(struct buf *bp)
1108 {
1109 	struct vnode *vp = bp->b_vp;
1110 
1111 	KASSERT(vp != NULL);
1112 	KASSERT(bp->b_objlock == &vp->v_interlock);
1113 	KASSERT(mutex_owned(&vp->v_interlock));
1114 	KASSERT(mutex_owned(&bufcache_lock));
1115 	KASSERT((bp->b_cflags & BC_BUSY) != 0);
1116 	KASSERT(!cv_has_waiters(&bp->b_done));
1117 
1118 	/*
1119 	 * Delete from old vnode list, if on one.
1120 	 */
1121 	if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1122 		bufremvn(bp);
1123 
1124 	if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_iflag & VI_ONWORKLST) &&
1125 	    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
1126 		vp->v_iflag &= ~VI_WRMAPDIRTY;
1127 		vn_syncer_remove_from_worklist(vp);
1128 	}
1129 
1130 	bp->b_objlock = &buffer_lock;
1131 	bp->b_vp = NULL;
1132 	holdrelel(vp);
1133 }
1134 
1135 /*
1136  * Reassign a buffer from one vnode list to another.
1137  * The list reassignment must be within the same vnode.
1138  * Used to assign file specific control information
1139  * (indirect blocks) to the list to which they belong.
1140  */
1141 void
1142 reassignbuf(struct buf *bp, struct vnode *vp)
1143 {
1144 	struct buflists *listheadp;
1145 	int delayx;
1146 
1147 	KASSERT(mutex_owned(&bufcache_lock));
1148 	KASSERT(bp->b_objlock == &vp->v_interlock);
1149 	KASSERT(mutex_owned(&vp->v_interlock));
1150 	KASSERT((bp->b_cflags & BC_BUSY) != 0);
1151 
1152 	/*
1153 	 * Delete from old vnode list, if on one.
1154 	 */
1155 	if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
1156 		bufremvn(bp);
1157 
1158 	/*
1159 	 * If dirty, put on list of dirty buffers;
1160 	 * otherwise insert onto list of clean buffers.
1161 	 */
1162 	if ((bp->b_oflags & BO_DELWRI) == 0) {
1163 		listheadp = &vp->v_cleanblkhd;
1164 		if (TAILQ_EMPTY(&vp->v_uobj.memq) &&
1165 		    (vp->v_iflag & VI_ONWORKLST) &&
1166 		    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
1167 			vp->v_iflag &= ~VI_WRMAPDIRTY;
1168 			vn_syncer_remove_from_worklist(vp);
1169 		}
1170 	} else {
1171 		listheadp = &vp->v_dirtyblkhd;
1172 		if ((vp->v_iflag & VI_ONWORKLST) == 0) {
1173 			switch (vp->v_type) {
1174 			case VDIR:
1175 				delayx = dirdelay;
1176 				break;
1177 			case VBLK:
1178 				if (vp->v_specmountpoint != NULL) {
1179 					delayx = metadelay;
1180 					break;
1181 				}
1182 				/* fall through */
1183 			default:
1184 				delayx = filedelay;
1185 				break;
1186 			}
1187 			if (!vp->v_mount ||
1188 			    (vp->v_mount->mnt_flag & MNT_ASYNC) == 0)
1189 				vn_syncer_add_to_worklist(vp, delayx);
1190 		}
1191 	}
1192 	bufinsvn(bp, listheadp);
1193 }
1194 
1195 /*
1196  * Create a vnode for a device.
1197  * Used by bdevvp (block device) for root file system etc.,
1198  * and by cdevvp (character device) for console and kernfs.
1199  */
1200 static int
1201 getdevvp(dev_t dev, vnode_t **vpp, enum vtype type)
1202 {
1203 	vnode_t *vp;
1204 	vnode_t *nvp;
1205 	int error;
1206 
1207 	if (dev == NODEV) {
1208 		*vpp = NULL;
1209 		return (0);
1210 	}
1211 	error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
1212 	if (error) {
1213 		*vpp = NULL;
1214 		return (error);
1215 	}
1216 	vp = nvp;
1217 	vp->v_type = type;
1218 	vp->v_vflag |= VV_MPSAFE;
1219 	uvm_vnp_setsize(vp, 0);
1220 	spec_node_init(vp, dev);
1221 	*vpp = vp;
1222 	return (0);
1223 }
1224 
1225 /*
1226  * Try to gain a reference to a vnode, without acquiring its interlock.
1227  * The caller must hold a lock that will prevent the vnode from being
1228  * recycled or freed.
1229  */
1230 bool
1231 vtryget(vnode_t *vp)
1232 {
1233 	u_int use, next;
1234 
1235 	/*
1236 	 * If the vnode is being freed, don't make life any harder
1237 	 * for vclean() by adding another reference without waiting.
1238 	 * This is not strictly necessary, but we'll do it anyway.
1239 	 */
1240 	if (__predict_false((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0)) {
1241 		return false;
1242 	}
1243 	for (use = vp->v_usecount;; use = next) {
1244 		if (use == 0 || __predict_false((use & VC_XLOCK) != 0)) {
1245 			/* Need interlock held if first reference. */
1246 			return false;
1247 		}
1248 		next = atomic_cas_uint(&vp->v_usecount, use, use + 1);
1249 		if (__predict_true(next == use)) {
1250 			return true;
1251 		}
1252 	}
1253 }
1254 
1255 /*
1256  * Grab a particular vnode from the free list, increment its
1257  * reference count and lock it. If the vnode lock bit is set the
1258  * vnode is being eliminated in vgone. In that case, we can not
1259  * grab the vnode, so the process is awakened when the transition is
1260  * completed, and an error returned to indicate that the vnode is no
1261  * longer usable (possibly having been changed to a new file system type).
1262  */
1263 int
1264 vget(vnode_t *vp, int flags)
1265 {
1266 	int error;
1267 
1268 	KASSERT((vp->v_iflag & VI_MARKER) == 0);
1269 
1270 	if ((flags & LK_INTERLOCK) == 0)
1271 		mutex_enter(&vp->v_interlock);
1272 
1273 	/*
1274 	 * Before adding a reference, we must remove the vnode
1275 	 * from its freelist.
1276 	 */
1277 	if (vp->v_usecount == 0) {
1278 		vremfree(vp);
1279 		vp->v_usecount = 1;
1280 	} else {
1281 		atomic_inc_uint(&vp->v_usecount);
1282 	}
1283 
1284 	/*
1285 	 * If the vnode is in the process of being cleaned out for
1286 	 * another use, we wait for the cleaning to finish and then
1287 	 * return failure.  Cleaning is determined by checking if
1288 	 * the VI_XLOCK or VI_FREEING flags are set.
1289 	 */
1290 	if ((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0) {
1291 		if ((flags & LK_NOWAIT) != 0) {
1292 			vrelel(vp, 0);
1293 			return EBUSY;
1294 		}
1295 		vwait(vp, VI_XLOCK | VI_FREEING);
1296 		vrelel(vp, 0);
1297 		return ENOENT;
1298 	}
1299 	if (flags & LK_TYPE_MASK) {
1300 		error = vn_lock(vp, flags | LK_INTERLOCK);
1301 		if (error != 0) {
1302 			vrele(vp);
1303 		}
1304 		return error;
1305 	}
1306 	mutex_exit(&vp->v_interlock);
1307 	return 0;
1308 }
1309 
1310 /*
1311  * vput(), just unlock and vrele()
1312  */
1313 void
1314 vput(vnode_t *vp)
1315 {
1316 
1317 	KASSERT((vp->v_iflag & VI_MARKER) == 0);
1318 
1319 	VOP_UNLOCK(vp, 0);
1320 	vrele(vp);
1321 }
1322 
1323 /*
1324  * Try to drop reference on a vnode.  Abort if we are releasing the
1325  * last reference.  Note: this _must_ succeed if not the last reference.
1326  */
1327 static inline bool
1328 vtryrele(vnode_t *vp)
1329 {
1330 	u_int use, next;
1331 
1332 	for (use = vp->v_usecount;; use = next) {
1333 		if (use == 1) {
1334 			return false;
1335 		}
1336 		KASSERT((use & VC_MASK) > 1);
1337 		next = atomic_cas_uint(&vp->v_usecount, use, use - 1);
1338 		if (__predict_true(next == use)) {
1339 			return true;
1340 		}
1341 	}
1342 }
1343 
1344 /*
1345  * Vnode release.  If reference count drops to zero, call inactive
1346  * routine and either return to freelist or free to the pool.
1347  */
1348 void
1349 vrelel(vnode_t *vp, int flags)
1350 {
1351 	bool recycle, defer;
1352 	int error;
1353 
1354 	KASSERT(mutex_owned(&vp->v_interlock));
1355 	KASSERT((vp->v_iflag & VI_MARKER) == 0);
1356 	KASSERT(vp->v_freelisthd == NULL);
1357 
1358 	if (__predict_false(vp->v_op == dead_vnodeop_p &&
1359 	    (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) {
1360 		vpanic(vp, "dead but not clean");
1361 	}
1362 
1363 	/*
1364 	 * If not the last reference, just drop the reference count
1365 	 * and unlock.
1366 	 */
1367 	if (vtryrele(vp)) {
1368 		vp->v_iflag |= VI_INACTREDO;
1369 		mutex_exit(&vp->v_interlock);
1370 		return;
1371 	}
1372 	if (vp->v_usecount <= 0 || vp->v_writecount != 0) {
1373 		vpanic(vp, "vrelel: bad ref count");
1374 	}
1375 
1376 	KASSERT((vp->v_iflag & VI_XLOCK) == 0);
1377 
1378 	/*
1379 	 * If not clean, deactivate the vnode, but preserve
1380 	 * our reference across the call to VOP_INACTIVE().
1381 	 */
1382  retry:
1383 	if ((vp->v_iflag & VI_CLEAN) == 0) {
1384 		recycle = false;
1385 		vp->v_iflag |= VI_INACTNOW;
1386 
1387 		/*
1388 		 * XXX This ugly block can be largely eliminated if
1389 		 * locking is pushed down into the file systems.
1390 		 */
1391 		if (curlwp == uvm.pagedaemon_lwp) {
1392 			/* The pagedaemon can't wait around; defer. */
1393 			defer = true;
1394 		} else if (curlwp == vrele_lwp) {
1395 			/* We have to try harder. */
1396 			vp->v_iflag &= ~VI_INACTREDO;
1397 			error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
1398 			    LK_RETRY);
1399 			if (error != 0) {
1400 				/* XXX */
1401 				vpanic(vp, "vrele: unable to lock %p");
1402 			}
1403 			defer = false;
1404 		} else if ((vp->v_iflag & VI_LAYER) != 0) {
1405 			/*
1406 			 * Acquiring the stack's lock in vclean() even
1407 			 * for an honest vput/vrele is dangerous because
1408 			 * our caller may hold other vnode locks; defer.
1409 			 */
1410 			defer = true;
1411 		} else {
1412 			/* If we can't acquire the lock, then defer. */
1413 			vp->v_iflag &= ~VI_INACTREDO;
1414 			error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
1415 			    LK_NOWAIT);
1416 			if (error != 0) {
1417 				defer = true;
1418 				mutex_enter(&vp->v_interlock);
1419 			} else {
1420 				defer = false;
1421 			}
1422 		}
1423 
1424 		if (defer) {
1425 			/*
1426 			 * Defer reclaim to the kthread; it's not safe to
1427 			 * clean it here.  We donate it our last reference.
1428 			 */
1429 			KASSERT(mutex_owned(&vp->v_interlock));
1430 			KASSERT((vp->v_iflag & VI_INACTPEND) == 0);
1431 			vp->v_iflag &= ~VI_INACTNOW;
1432 			vp->v_iflag |= VI_INACTPEND;
1433 			mutex_enter(&vrele_lock);
1434 			TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist);
1435 			if (++vrele_pending > (desiredvnodes >> 8))
1436 				cv_signal(&vrele_cv);
1437 			mutex_exit(&vrele_lock);
1438 			mutex_exit(&vp->v_interlock);
1439 			return;
1440 		}
1441 
1442 #ifdef DIAGNOSTIC
1443 		if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
1444 		    vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) {
1445 			vprint("vrelel: missing VOP_CLOSE()", vp);
1446 		}
1447 #endif
1448 
1449 		/*
1450 		 * The vnode can gain another reference while being
1451 		 * deactivated.  If VOP_INACTIVE() indicates that
1452 		 * the described file has been deleted, then recycle
1453 		 * the vnode irrespective of additional references.
1454 		 * Another thread may be waiting to re-use the on-disk
1455 		 * inode.
1456 		 *
1457 		 * Note that VOP_INACTIVE() will drop the vnode lock.
1458 		 */
1459 		VOP_INACTIVE(vp, &recycle);
1460 		mutex_enter(&vp->v_interlock);
1461 		vp->v_iflag &= ~VI_INACTNOW;
1462 		if (!recycle) {
1463 			if (vtryrele(vp)) {
1464 				mutex_exit(&vp->v_interlock);
1465 				return;
1466 			}
1467 
1468 			/*
1469 			 * If we grew another reference while
1470 			 * VOP_INACTIVE() was underway, retry.
1471 			 */
1472 			if ((vp->v_iflag & VI_INACTREDO) != 0) {
1473 				goto retry;
1474 			}
1475 		}
1476 
1477 		/* Take care of space accounting. */
1478 		if (vp->v_iflag & VI_EXECMAP) {
1479 			atomic_add_int(&uvmexp.execpages,
1480 			    -vp->v_uobj.uo_npages);
1481 			atomic_add_int(&uvmexp.filepages,
1482 			    vp->v_uobj.uo_npages);
1483 		}
1484 		vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP);
1485 		vp->v_vflag &= ~VV_MAPPED;
1486 
1487 		/*
1488 		 * Recycle the vnode if the file is now unused (unlinked),
1489 		 * otherwise just free it.
1490 		 */
1491 		if (recycle) {
1492 			vclean(vp, DOCLOSE);
1493 		}
1494 		KASSERT(vp->v_usecount > 0);
1495 	}
1496 
1497 	if (atomic_dec_uint_nv(&vp->v_usecount) != 0) {
1498 		/* Gained another reference while being reclaimed. */
1499 		mutex_exit(&vp->v_interlock);
1500 		return;
1501 	}
1502 
1503 	if ((vp->v_iflag & VI_CLEAN) != 0) {
1504 		/*
1505 		 * It's clean so destroy it.  It isn't referenced
1506 		 * anywhere since it has been reclaimed.
1507 		 */
1508 		KASSERT(vp->v_holdcnt == 0);
1509 		KASSERT(vp->v_writecount == 0);
1510 		mutex_exit(&vp->v_interlock);
1511 		insmntque(vp, NULL);
1512 		if (vp->v_type == VBLK || vp->v_type == VCHR) {
1513 			spec_node_destroy(vp);
1514 		}
1515 		vnfree(vp);
1516 	} else {
1517 		/*
1518 		 * Otherwise, put it back onto the freelist.  It
1519 		 * can't be destroyed while still associated with
1520 		 * a file system.
1521 		 */
1522 		mutex_enter(&vnode_free_list_lock);
1523 		if (vp->v_holdcnt > 0) {
1524 			vp->v_freelisthd = &vnode_hold_list;
1525 		} else {
1526 			vp->v_freelisthd = &vnode_free_list;
1527 		}
1528 		TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
1529 		mutex_exit(&vnode_free_list_lock);
1530 		mutex_exit(&vp->v_interlock);
1531 	}
1532 }
1533 
1534 void
1535 vrele(vnode_t *vp)
1536 {
1537 
1538 	KASSERT((vp->v_iflag & VI_MARKER) == 0);
1539 
1540 	if ((vp->v_iflag & VI_INACTNOW) == 0 && vtryrele(vp)) {
1541 		return;
1542 	}
1543 	mutex_enter(&vp->v_interlock);
1544 	vrelel(vp, 0);
1545 }
1546 
1547 static void
1548 vrele_thread(void *cookie)
1549 {
1550 	vnode_t *vp;
1551 
1552 	for (;;) {
1553 		mutex_enter(&vrele_lock);
1554 		while (TAILQ_EMPTY(&vrele_list)) {
1555 			vrele_gen++;
1556 			cv_broadcast(&vrele_cv);
1557 			cv_timedwait(&vrele_cv, &vrele_lock, hz);
1558 		}
1559 		vp = TAILQ_FIRST(&vrele_list);
1560 		TAILQ_REMOVE(&vrele_list, vp, v_freelist);
1561 		vrele_pending--;
1562 		mutex_exit(&vrele_lock);
1563 
1564 		/*
1565 		 * If not the last reference, then ignore the vnode
1566 		 * and look for more work.
1567 		 */
1568 		mutex_enter(&vp->v_interlock);
1569 		KASSERT((vp->v_iflag & VI_INACTPEND) != 0);
1570 		vp->v_iflag &= ~VI_INACTPEND;
1571 		vrelel(vp, 0);
1572 	}
1573 }
1574 
1575 /*
1576  * Page or buffer structure gets a reference.
1577  * Called with v_interlock held.
1578  */
1579 void
1580 vholdl(vnode_t *vp)
1581 {
1582 
1583 	KASSERT(mutex_owned(&vp->v_interlock));
1584 	KASSERT((vp->v_iflag & VI_MARKER) == 0);
1585 
1586 	if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) {
1587 		mutex_enter(&vnode_free_list_lock);
1588 		KASSERT(vp->v_freelisthd == &vnode_free_list);
1589 		TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
1590 		vp->v_freelisthd = &vnode_hold_list;
1591 		TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
1592 		mutex_exit(&vnode_free_list_lock);
1593 	}
1594 }
1595 
1596 /*
1597  * Page or buffer structure frees a reference.
1598  * Called with v_interlock held.
1599  */
1600 void
1601 holdrelel(vnode_t *vp)
1602 {
1603 
1604 	KASSERT(mutex_owned(&vp->v_interlock));
1605 	KASSERT((vp->v_iflag & VI_MARKER) == 0);
1606 
1607 	if (vp->v_holdcnt <= 0) {
1608 		vpanic(vp, "holdrelel: holdcnt vp %p");
1609 	}
1610 
1611 	vp->v_holdcnt--;
1612 	if (vp->v_holdcnt == 0 && vp->v_usecount == 0) {
1613 		mutex_enter(&vnode_free_list_lock);
1614 		KASSERT(vp->v_freelisthd == &vnode_hold_list);
1615 		TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
1616 		vp->v_freelisthd = &vnode_free_list;
1617 		TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
1618 		mutex_exit(&vnode_free_list_lock);
1619 	}
1620 }
1621 
1622 /*
1623  * Vnode reference, where a reference is already held by some other
1624  * object (for example, a file structure).
1625  */
1626 void
1627 vref(vnode_t *vp)
1628 {
1629 
1630 	KASSERT((vp->v_iflag & VI_MARKER) == 0);
1631 	KASSERT(vp->v_usecount != 0);
1632 
1633 	atomic_inc_uint(&vp->v_usecount);
1634 }
1635 
1636 /*
1637  * Remove any vnodes in the vnode table belonging to mount point mp.
1638  *
1639  * If FORCECLOSE is not specified, there should not be any active ones,
1640  * return error if any are found (nb: this is a user error, not a
1641  * system error). If FORCECLOSE is specified, detach any active vnodes
1642  * that are found.
1643  *
1644  * If WRITECLOSE is set, only flush out regular file vnodes open for
1645  * writing.
1646  *
1647  * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
1648  */
1649 #ifdef DEBUG
1650 int busyprt = 0;	/* print out busy vnodes */
1651 struct ctldebug debug1 = { "busyprt", &busyprt };
1652 #endif
1653 
1654 static vnode_t *
1655 vflushnext(vnode_t *mvp, int *when)
1656 {
1657 
1658 	if (hardclock_ticks > *when) {
1659 		mutex_exit(&mntvnode_lock);
1660 		yield();
1661 		mutex_enter(&mntvnode_lock);
1662 		*when = hardclock_ticks + hz / 10;
1663 	}
1664 
1665 	return vunmark(mvp);
1666 }
1667 
1668 int
1669 vflush(struct mount *mp, vnode_t *skipvp, int flags)
1670 {
1671 	vnode_t *vp, *mvp;
1672 	int busy = 0, when = 0, gen;
1673 
1674 	/*
1675 	 * First, flush out any vnode references from vrele_list.
1676 	 */
1677 	mutex_enter(&vrele_lock);
1678 	gen = vrele_gen;
1679 	while (vrele_pending && gen == vrele_gen) {
1680 		cv_broadcast(&vrele_cv);
1681 		cv_wait(&vrele_cv, &vrele_lock);
1682 	}
1683 	mutex_exit(&vrele_lock);
1684 
1685 	/* Allocate a marker vnode. */
1686 	if ((mvp = vnalloc(mp)) == NULL)
1687 		return (ENOMEM);
1688 
1689 	/*
1690 	 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1691 	 * and vclean() are called
1692 	 */
1693 	mutex_enter(&mntvnode_lock);
1694 	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp != NULL;
1695 	    vp = vflushnext(mvp, &when)) {
1696 		vmark(mvp, vp);
1697 		if (vp->v_mount != mp || vismarker(vp))
1698 			continue;
1699 		/*
1700 		 * Skip over a selected vnode.
1701 		 */
1702 		if (vp == skipvp)
1703 			continue;
1704 		mutex_enter(&vp->v_interlock);
1705 		/*
1706 		 * Ignore clean but still referenced vnodes.
1707 		 */
1708 		if ((vp->v_iflag & VI_CLEAN) != 0) {
1709 			mutex_exit(&vp->v_interlock);
1710 			continue;
1711 		}
1712 		/*
1713 		 * Skip over a vnodes marked VSYSTEM.
1714 		 */
1715 		if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) {
1716 			mutex_exit(&vp->v_interlock);
1717 			continue;
1718 		}
1719 		/*
1720 		 * If WRITECLOSE is set, only flush out regular file
1721 		 * vnodes open for writing.
1722 		 */
1723 		if ((flags & WRITECLOSE) &&
1724 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
1725 			mutex_exit(&vp->v_interlock);
1726 			continue;
1727 		}
1728 		/*
1729 		 * With v_usecount == 0, all we need to do is clear
1730 		 * out the vnode data structures and we are done.
1731 		 */
1732 		if (vp->v_usecount == 0) {
1733 			mutex_exit(&mntvnode_lock);
1734 			vremfree(vp);
1735 			vp->v_usecount = 1;
1736 			vclean(vp, DOCLOSE);
1737 			vrelel(vp, 0);
1738 			mutex_enter(&mntvnode_lock);
1739 			continue;
1740 		}
1741 		/*
1742 		 * If FORCECLOSE is set, forcibly close the vnode.
1743 		 * For block or character devices, revert to an
1744 		 * anonymous device.  For all other files, just
1745 		 * kill them.
1746 		 */
1747 		if (flags & FORCECLOSE) {
1748 			mutex_exit(&mntvnode_lock);
1749 			atomic_inc_uint(&vp->v_usecount);
1750 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
1751 				vclean(vp, DOCLOSE);
1752 				vrelel(vp, 0);
1753 			} else {
1754 				vclean(vp, 0);
1755 				vp->v_op = spec_vnodeop_p; /* XXXSMP */
1756 				mutex_exit(&vp->v_interlock);
1757 				/*
1758 				 * The vnode isn't clean, but still resides
1759 				 * on the mount list.  Remove it. XXX This
1760 				 * is a bit dodgy.
1761 				 */
1762 				insmntque(vp, NULL);
1763 				vrele(vp);
1764 			}
1765 			mutex_enter(&mntvnode_lock);
1766 			continue;
1767 		}
1768 #ifdef DEBUG
1769 		if (busyprt)
1770 			vprint("vflush: busy vnode", vp);
1771 #endif
1772 		mutex_exit(&vp->v_interlock);
1773 		busy++;
1774 	}
1775 	mutex_exit(&mntvnode_lock);
1776 	vnfree(mvp);
1777 	if (busy)
1778 		return (EBUSY);
1779 	return (0);
1780 }
1781 
1782 /*
1783  * Disassociate the underlying file system from a vnode.
1784  *
1785  * Must be called with the interlock held, and will return with it held.
1786  */
1787 void
1788 vclean(vnode_t *vp, int flags)
1789 {
1790 	lwp_t *l = curlwp;
1791 	bool recycle, active;
1792 	int error;
1793 
1794 	KASSERT(mutex_owned(&vp->v_interlock));
1795 	KASSERT((vp->v_iflag & VI_MARKER) == 0);
1796 	KASSERT(vp->v_usecount != 0);
1797 
1798 	/* If cleaning is already in progress wait until done and return. */
1799 	if (vp->v_iflag & VI_XLOCK) {
1800 		vwait(vp, VI_XLOCK);
1801 		return;
1802 	}
1803 
1804 	/* If already clean, nothing to do. */
1805 	if ((vp->v_iflag & VI_CLEAN) != 0) {
1806 		return;
1807 	}
1808 
1809 	/*
1810 	 * Prevent the vnode from being recycled or brought into use
1811 	 * while we clean it out.
1812 	 */
1813 	vp->v_iflag |= VI_XLOCK;
1814 	if (vp->v_iflag & VI_EXECMAP) {
1815 		atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
1816 		atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
1817 	}
1818 	vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP);
1819 	active = (vp->v_usecount > 1);
1820 
1821 	/* XXXAD should not lock vnode under layer */
1822 	VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK);
1823 
1824 	/*
1825 	 * Clean out any cached data associated with the vnode.
1826 	 * If purging an active vnode, it must be closed and
1827 	 * deactivated before being reclaimed. Note that the
1828 	 * VOP_INACTIVE will unlock the vnode.
1829 	 */
1830 	if (flags & DOCLOSE) {
1831 		error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
1832 		if (error != 0) {
1833 			/* XXX, fix vn_start_write's grab of mp and use that. */
1834 
1835 			if (wapbl_vphaswapbl(vp))
1836 				WAPBL_DISCARD(wapbl_vptomp(vp));
1837 			error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
1838 		}
1839 		KASSERT(error == 0);
1840 		KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1841 		if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) {
1842 			 spec_node_revoke(vp);
1843 		}
1844 	}
1845 	if (active) {
1846 		VOP_INACTIVE(vp, &recycle);
1847 	} else {
1848 		/*
1849 		 * Any other processes trying to obtain this lock must first
1850 		 * wait for VI_XLOCK to clear, then call the new lock operation.
1851 		 */
1852 		VOP_UNLOCK(vp, 0);
1853 	}
1854 
1855 	/* Disassociate the underlying file system from the vnode. */
1856 	if (VOP_RECLAIM(vp)) {
1857 		vpanic(vp, "vclean: cannot reclaim");
1858 	}
1859 
1860 	KASSERT(vp->v_uobj.uo_npages == 0);
1861 	if (vp->v_type == VREG && vp->v_ractx != NULL) {
1862 		uvm_ra_freectx(vp->v_ractx);
1863 		vp->v_ractx = NULL;
1864 	}
1865 	cache_purge(vp);
1866 
1867 	/* Done with purge, notify sleepers of the grim news. */
1868 	mutex_enter(&vp->v_interlock);
1869 	vp->v_op = dead_vnodeop_p;
1870 	vp->v_tag = VT_NON;
1871 	vp->v_vnlock = &vp->v_lock;
1872 	KNOTE(&vp->v_klist, NOTE_REVOKE);
1873 	vp->v_iflag &= ~(VI_XLOCK | VI_FREEING);
1874 	vp->v_vflag &= ~VV_LOCKSWORK;
1875 	if ((flags & DOCLOSE) != 0) {
1876 		vp->v_iflag |= VI_CLEAN;
1877 	}
1878 	cv_broadcast(&vp->v_cv);
1879 
1880 	KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
1881 }
1882 
1883 /*
1884  * Recycle an unused vnode to the front of the free list.
1885  * Release the passed interlock if the vnode will be recycled.
1886  */
1887 int
1888 vrecycle(vnode_t *vp, kmutex_t *inter_lkp, struct lwp *l)
1889 {
1890 
1891 	KASSERT((vp->v_iflag & VI_MARKER) == 0);
1892 
1893 	mutex_enter(&vp->v_interlock);
1894 	if (vp->v_usecount != 0) {
1895 		mutex_exit(&vp->v_interlock);
1896 		return (0);
1897 	}
1898 	if (inter_lkp)
1899 		mutex_exit(inter_lkp);
1900 	vremfree(vp);
1901 	vp->v_usecount = 1;
1902 	vclean(vp, DOCLOSE);
1903 	vrelel(vp, 0);
1904 	return (1);
1905 }
1906 
1907 /*
1908  * Eliminate all activity associated with a vnode in preparation for
1909  * reuse.  Drops a reference from the vnode.
1910  */
1911 void
1912 vgone(vnode_t *vp)
1913 {
1914 
1915 	mutex_enter(&vp->v_interlock);
1916 	vclean(vp, DOCLOSE);
1917 	vrelel(vp, 0);
1918 }
1919 
1920 /*
1921  * Lookup a vnode by device number.
1922  */
1923 int
1924 vfinddev(dev_t dev, enum vtype type, vnode_t **vpp)
1925 {
1926 	vnode_t *vp;
1927 	int rc = 0;
1928 
1929 	mutex_enter(&device_lock);
1930 	for (vp = specfs_hash[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1931 		if (dev != vp->v_rdev || type != vp->v_type)
1932 			continue;
1933 		*vpp = vp;
1934 		rc = 1;
1935 		break;
1936 	}
1937 	mutex_exit(&device_lock);
1938 	return (rc);
1939 }
1940 
1941 /*
1942  * Revoke all the vnodes corresponding to the specified minor number
1943  * range (endpoints inclusive) of the specified major.
1944  */
1945 void
1946 vdevgone(int maj, int minl, int minh, enum vtype type)
1947 {
1948 	vnode_t *vp, **vpp;
1949 	dev_t dev;
1950 	int mn;
1951 
1952 	vp = NULL;	/* XXX gcc */
1953 
1954 	mutex_enter(&device_lock);
1955 	for (mn = minl; mn <= minh; mn++) {
1956 		dev = makedev(maj, mn);
1957 		vpp = &specfs_hash[SPECHASH(dev)];
1958 		for (vp = *vpp; vp != NULL;) {
1959 			mutex_enter(&vp->v_interlock);
1960 			if ((vp->v_iflag & VI_CLEAN) != 0 ||
1961 			    dev != vp->v_rdev || type != vp->v_type) {
1962 				mutex_exit(&vp->v_interlock);
1963 				vp = vp->v_specnext;
1964 				continue;
1965 			}
1966 			mutex_exit(&device_lock);
1967 			if (vget(vp, LK_INTERLOCK) == 0) {
1968 				VOP_REVOKE(vp, REVOKEALL);
1969 				vrele(vp);
1970 			}
1971 			mutex_enter(&device_lock);
1972 			vp = *vpp;
1973 		}
1974 	}
1975 	mutex_exit(&device_lock);
1976 }
1977 
1978 /*
1979  * Calculate the total number of references to a special device.
1980  */
1981 int
1982 vcount(vnode_t *vp)
1983 {
1984 	int count;
1985 
1986 	mutex_enter(&device_lock);
1987 	mutex_enter(&vp->v_interlock);
1988 	if (vp->v_specnode == NULL) {
1989 		count = vp->v_usecount - ((vp->v_iflag & VI_INACTPEND) != 0);
1990 		mutex_exit(&vp->v_interlock);
1991 		mutex_exit(&device_lock);
1992 		return (count);
1993 	}
1994 	mutex_exit(&vp->v_interlock);
1995 	count = vp->v_specnode->sn_dev->sd_opencnt;
1996 	mutex_exit(&device_lock);
1997 	return (count);
1998 }
1999 
2000 /*
2001  * Eliminate all activity associated with the requested vnode
2002  * and with all vnodes aliased to the requested vnode.
2003  */
2004 void
2005 vrevoke(vnode_t *vp)
2006 {
2007 	vnode_t *vq, **vpp;
2008 	enum vtype type;
2009 	dev_t dev;
2010 
2011 	KASSERT(vp->v_usecount > 0);
2012 
2013 	mutex_enter(&vp->v_interlock);
2014 	if ((vp->v_iflag & VI_CLEAN) != 0) {
2015 		mutex_exit(&vp->v_interlock);
2016 		return;
2017 	} else if (vp->v_type != VBLK && vp->v_type != VCHR) {
2018 		atomic_inc_uint(&vp->v_usecount);
2019 		vclean(vp, DOCLOSE);
2020 		vrelel(vp, 0);
2021 		return;
2022 	} else {
2023 		dev = vp->v_rdev;
2024 		type = vp->v_type;
2025 		mutex_exit(&vp->v_interlock);
2026 	}
2027 
2028 	vpp = &specfs_hash[SPECHASH(dev)];
2029 	mutex_enter(&device_lock);
2030 	for (vq = *vpp; vq != NULL;) {
2031 		/* If clean or being cleaned, then ignore it. */
2032 		mutex_enter(&vq->v_interlock);
2033 		if ((vq->v_iflag & (VI_CLEAN | VI_XLOCK)) != 0 ||
2034 		    vq->v_rdev != dev || vq->v_type != type) {
2035 			mutex_exit(&vq->v_interlock);
2036 			vq = vq->v_specnext;
2037 			continue;
2038 		}
2039 		mutex_exit(&device_lock);
2040 		if (vq->v_usecount == 0) {
2041 			vremfree(vq);
2042 			vq->v_usecount = 1;
2043 		} else {
2044 			atomic_inc_uint(&vq->v_usecount);
2045 		}
2046 		vclean(vq, DOCLOSE);
2047 		vrelel(vq, 0);
2048 		mutex_enter(&device_lock);
2049 		vq = *vpp;
2050 	}
2051 	mutex_exit(&device_lock);
2052 }
2053 
2054 /*
2055  * sysctl helper routine to return list of supported fstypes
2056  */
2057 int
2058 sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
2059 {
2060 	char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)];
2061 	char *where = oldp;
2062 	struct vfsops *v;
2063 	size_t needed, left, slen;
2064 	int error, first;
2065 
2066 	if (newp != NULL)
2067 		return (EPERM);
2068 	if (namelen != 0)
2069 		return (EINVAL);
2070 
2071 	first = 1;
2072 	error = 0;
2073 	needed = 0;
2074 	left = *oldlenp;
2075 
2076 	sysctl_unlock();
2077 	mutex_enter(&vfs_list_lock);
2078 	LIST_FOREACH(v, &vfs_list, vfs_list) {
2079 		if (where == NULL)
2080 			needed += strlen(v->vfs_name) + 1;
2081 		else {
2082 			memset(bf, 0, sizeof(bf));
2083 			if (first) {
2084 				strncpy(bf, v->vfs_name, sizeof(bf));
2085 				first = 0;
2086 			} else {
2087 				bf[0] = ' ';
2088 				strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1);
2089 			}
2090 			bf[sizeof(bf)-1] = '\0';
2091 			slen = strlen(bf);
2092 			if (left < slen + 1)
2093 				break;
2094 			v->vfs_refcount++;
2095 			mutex_exit(&vfs_list_lock);
2096 			/* +1 to copy out the trailing NUL byte */
2097 			error = copyout(bf, where, slen + 1);
2098 			mutex_enter(&vfs_list_lock);
2099 			v->vfs_refcount--;
2100 			if (error)
2101 				break;
2102 			where += slen;
2103 			needed += slen;
2104 			left -= slen;
2105 		}
2106 	}
2107 	mutex_exit(&vfs_list_lock);
2108 	sysctl_relock();
2109 	*oldlenp = needed;
2110 	return (error);
2111 }
2112 
2113 
2114 int kinfo_vdebug = 1;
2115 int kinfo_vgetfailed;
2116 #define KINFO_VNODESLOP	10
2117 /*
2118  * Dump vnode list (via sysctl).
2119  * Copyout address of vnode followed by vnode.
2120  */
2121 /* ARGSUSED */
2122 int
2123 sysctl_kern_vnode(SYSCTLFN_ARGS)
2124 {
2125 	char *where = oldp;
2126 	size_t *sizep = oldlenp;
2127 	struct mount *mp, *nmp;
2128 	vnode_t *vp, *mvp, vbuf;
2129 	char *bp = where, *savebp;
2130 	char *ewhere;
2131 	int error;
2132 
2133 	if (namelen != 0)
2134 		return (EOPNOTSUPP);
2135 	if (newp != NULL)
2136 		return (EPERM);
2137 
2138 #define VPTRSZ	sizeof(vnode_t *)
2139 #define VNODESZ	sizeof(vnode_t)
2140 	if (where == NULL) {
2141 		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2142 		return (0);
2143 	}
2144 	ewhere = where + *sizep;
2145 
2146 	sysctl_unlock();
2147 	mutex_enter(&mountlist_lock);
2148 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
2149 	     mp = nmp) {
2150 		if (vfs_busy(mp, &nmp)) {
2151 			continue;
2152 		}
2153 		savebp = bp;
2154 		/* Allocate a marker vnode. */
2155 		mvp = vnalloc(mp);
2156 		/* Should never fail for mp != NULL */
2157 		KASSERT(mvp != NULL);
2158 		mutex_enter(&mntvnode_lock);
2159 		for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
2160 			vmark(mvp, vp);
2161 			/*
2162 			 * Check that the vp is still associated with
2163 			 * this filesystem.  RACE: could have been
2164 			 * recycled onto the same filesystem.
2165 			 */
2166 			if (vp->v_mount != mp || vismarker(vp))
2167 				continue;
2168 			if (bp + VPTRSZ + VNODESZ > ewhere) {
2169 				(void)vunmark(mvp);
2170 				mutex_exit(&mntvnode_lock);
2171 				vnfree(mvp);
2172 				sysctl_relock();
2173 				*sizep = bp - where;
2174 				return (ENOMEM);
2175 			}
2176 			memcpy(&vbuf, vp, VNODESZ);
2177 			mutex_exit(&mntvnode_lock);
2178 			if ((error = copyout(&vp, bp, VPTRSZ)) ||
2179 			   (error = copyout(&vbuf, bp + VPTRSZ, VNODESZ))) {
2180 			   	mutex_enter(&mntvnode_lock);
2181 				(void)vunmark(mvp);
2182 				mutex_exit(&mntvnode_lock);
2183 				vnfree(mvp);
2184 				sysctl_relock();
2185 				return (error);
2186 			}
2187 			bp += VPTRSZ + VNODESZ;
2188 			mutex_enter(&mntvnode_lock);
2189 		}
2190 		mutex_exit(&mntvnode_lock);
2191 		vnfree(mvp);
2192 		vfs_unbusy(mp, false, &nmp);
2193 	}
2194 	mutex_exit(&mountlist_lock);
2195 	sysctl_relock();
2196 
2197 	*sizep = bp - where;
2198 	return (0);
2199 }
2200 
2201 /*
2202  * Remove clean vnodes from a mountpoint's vnode list.
2203  */
2204 void
2205 vfs_scrubvnlist(struct mount *mp)
2206 {
2207 	vnode_t *vp, *nvp;
2208 
2209  retry:
2210 	mutex_enter(&mntvnode_lock);
2211 	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
2212 		nvp = TAILQ_NEXT(vp, v_mntvnodes);
2213 		mutex_enter(&vp->v_interlock);
2214 		if ((vp->v_iflag & VI_CLEAN) != 0) {
2215 			TAILQ_REMOVE(&mp->mnt_vnodelist, vp, v_mntvnodes);
2216 			vp->v_mount = NULL;
2217 			mutex_exit(&mntvnode_lock);
2218 			mutex_exit(&vp->v_interlock);
2219 			vfs_destroy(mp);
2220 			goto retry;
2221 		}
2222 		mutex_exit(&vp->v_interlock);
2223 	}
2224 	mutex_exit(&mntvnode_lock);
2225 }
2226 
2227 /*
2228  * Check to see if a filesystem is mounted on a block device.
2229  */
2230 int
2231 vfs_mountedon(vnode_t *vp)
2232 {
2233 	vnode_t *vq;
2234 	int error = 0;
2235 
2236 	if (vp->v_type != VBLK)
2237 		return ENOTBLK;
2238 	if (vp->v_specmountpoint != NULL)
2239 		return (EBUSY);
2240 	mutex_enter(&device_lock);
2241 	for (vq = specfs_hash[SPECHASH(vp->v_rdev)]; vq != NULL;
2242 	    vq = vq->v_specnext) {
2243 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
2244 			continue;
2245 		if (vq->v_specmountpoint != NULL) {
2246 			error = EBUSY;
2247 			break;
2248 		}
2249 	}
2250 	mutex_exit(&device_lock);
2251 	return (error);
2252 }
2253 
2254 /*
2255  * Unmount all file systems.
2256  * We traverse the list in reverse order under the assumption that doing so
2257  * will avoid needing to worry about dependencies.
2258  */
2259 bool
2260 vfs_unmountall(struct lwp *l)
2261 {
2262 	printf("unmounting file systems...");
2263 	return vfs_unmountall1(l, true, true);
2264 }
2265 
2266 bool
2267 vfs_unmountall1(struct lwp *l, bool force, bool verbose)
2268 {
2269 	struct mount *mp, *nmp;
2270 	bool any_error, progress;
2271 	int error;
2272 
2273 	for (any_error = false, mp = CIRCLEQ_LAST(&mountlist);
2274 	     !CIRCLEQ_EMPTY(&mountlist);
2275 	     mp = nmp) {
2276 		nmp = CIRCLEQ_PREV(mp, mnt_list);
2277 #ifdef DEBUG
2278 		printf("\nunmounting %s (%s)...",
2279 		    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
2280 #endif
2281 		atomic_inc_uint(&mp->mnt_refcnt);
2282 		if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0)
2283 			progress = true;
2284 		else {
2285 			if (verbose) {
2286 				printf("unmount of %s failed with error %d\n",
2287 				    mp->mnt_stat.f_mntonname, error);
2288 			}
2289 			any_error = true;
2290 		}
2291 	}
2292 	if (verbose)
2293 		printf(" done\n");
2294 	if (any_error && verbose)
2295 		printf("WARNING: some file systems would not unmount\n");
2296 	return progress;
2297 }
2298 
2299 /*
2300  * Sync and unmount file systems before shutting down.
2301  */
2302 void
2303 vfs_shutdown(void)
2304 {
2305 	struct lwp *l;
2306 
2307 	/* XXX we're certainly not running in lwp0's context! */
2308 	l = (curlwp == NULL) ? &lwp0 : curlwp;
2309 
2310 	printf("syncing disks... ");
2311 
2312 	/* remove user processes from run queue */
2313 	suspendsched();
2314 	(void) spl0();
2315 
2316 	/* avoid coming back this way again if we panic. */
2317 	doing_shutdown = 1;
2318 
2319 	sys_sync(l, NULL, NULL);
2320 
2321 	/* Wait for sync to finish. */
2322 	if (buf_syncwait() != 0) {
2323 #if defined(DDB) && defined(DEBUG_HALT_BUSY)
2324 		Debugger();
2325 #endif
2326 		printf("giving up\n");
2327 		return;
2328 	} else
2329 		printf("done\n");
2330 
2331 	/*
2332 	 * If we've panic'd, don't make the situation potentially
2333 	 * worse by unmounting the file systems.
2334 	 */
2335 	if (panicstr != NULL)
2336 		return;
2337 
2338 	/* Release inodes held by texts before update. */
2339 #ifdef notdef
2340 	vnshutdown();
2341 #endif
2342 	/* Unmount file systems. */
2343 	vfs_unmountall(l);
2344 }
2345 
2346 /*
2347  * Mount the root file system.  If the operator didn't specify a
2348  * file system to use, try all possible file systems until one
2349  * succeeds.
2350  */
2351 int
2352 vfs_mountroot(void)
2353 {
2354 	struct vfsops *v;
2355 	int error = ENODEV;
2356 
2357 	if (root_device == NULL)
2358 		panic("vfs_mountroot: root device unknown");
2359 
2360 	switch (device_class(root_device)) {
2361 	case DV_IFNET:
2362 		if (rootdev != NODEV)
2363 			panic("vfs_mountroot: rootdev set for DV_IFNET "
2364 			    "(0x%llx -> %llu,%llu)",
2365 			    (unsigned long long)rootdev,
2366 			    (unsigned long long)major(rootdev),
2367 			    (unsigned long long)minor(rootdev));
2368 		break;
2369 
2370 	case DV_DISK:
2371 		if (rootdev == NODEV)
2372 			panic("vfs_mountroot: rootdev not set for DV_DISK");
2373 	        if (bdevvp(rootdev, &rootvp))
2374 	                panic("vfs_mountroot: can't get vnode for rootdev");
2375 		error = VOP_OPEN(rootvp, FREAD, FSCRED);
2376 		if (error) {
2377 			printf("vfs_mountroot: can't open root device\n");
2378 			return (error);
2379 		}
2380 		break;
2381 
2382 	default:
2383 		printf("%s: inappropriate for root file system\n",
2384 		    device_xname(root_device));
2385 		return (ENODEV);
2386 	}
2387 
2388 	/*
2389 	 * If user specified a root fs type, use it.  Make sure the
2390 	 * specified type exists and has a mount_root()
2391 	 */
2392 	if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) {
2393 		v = vfs_getopsbyname(rootfstype);
2394 		error = EFTYPE;
2395 		if (v != NULL) {
2396 			if (v->vfs_mountroot != NULL) {
2397 				error = (v->vfs_mountroot)();
2398 			}
2399 			v->vfs_refcount--;
2400 		}
2401 		goto done;
2402 	}
2403 
2404 	/*
2405 	 * Try each file system currently configured into the kernel.
2406 	 */
2407 	mutex_enter(&vfs_list_lock);
2408 	LIST_FOREACH(v, &vfs_list, vfs_list) {
2409 		if (v->vfs_mountroot == NULL)
2410 			continue;
2411 #ifdef DEBUG
2412 		aprint_normal("mountroot: trying %s...\n", v->vfs_name);
2413 #endif
2414 		v->vfs_refcount++;
2415 		mutex_exit(&vfs_list_lock);
2416 		error = (*v->vfs_mountroot)();
2417 		mutex_enter(&vfs_list_lock);
2418 		v->vfs_refcount--;
2419 		if (!error) {
2420 			aprint_normal("root file system type: %s\n",
2421 			    v->vfs_name);
2422 			break;
2423 		}
2424 	}
2425 	mutex_exit(&vfs_list_lock);
2426 
2427 	if (v == NULL) {
2428 		printf("no file system for %s", device_xname(root_device));
2429 		if (device_class(root_device) == DV_DISK)
2430 			printf(" (dev 0x%llx)", (unsigned long long)rootdev);
2431 		printf("\n");
2432 		error = EFTYPE;
2433 	}
2434 
2435 done:
2436 	if (error && device_class(root_device) == DV_DISK) {
2437 		VOP_CLOSE(rootvp, FREAD, FSCRED);
2438 		vrele(rootvp);
2439 	}
2440 	return (error);
2441 }
2442 
2443 /*
2444  * Get a new unique fsid
2445  */
2446 void
2447 vfs_getnewfsid(struct mount *mp)
2448 {
2449 	static u_short xxxfs_mntid;
2450 	fsid_t tfsid;
2451 	int mtype;
2452 
2453 	mutex_enter(&mntid_lock);
2454 	mtype = makefstype(mp->mnt_op->vfs_name);
2455 	mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
2456 	mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
2457 	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
2458 	if (xxxfs_mntid == 0)
2459 		++xxxfs_mntid;
2460 	tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
2461 	tfsid.__fsid_val[1] = mtype;
2462 	if (!CIRCLEQ_EMPTY(&mountlist)) {
2463 		while (vfs_getvfs(&tfsid)) {
2464 			tfsid.__fsid_val[0]++;
2465 			xxxfs_mntid++;
2466 		}
2467 	}
2468 	mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
2469 	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
2470 	mutex_exit(&mntid_lock);
2471 }
2472 
2473 /*
2474  * Make a 'unique' number from a mount type name.
2475  */
2476 long
2477 makefstype(const char *type)
2478 {
2479 	long rv;
2480 
2481 	for (rv = 0; *type; type++) {
2482 		rv <<= 2;
2483 		rv ^= *type;
2484 	}
2485 	return rv;
2486 }
2487 
2488 /*
2489  * Set vnode attributes to VNOVAL
2490  */
2491 void
2492 vattr_null(struct vattr *vap)
2493 {
2494 
2495 	vap->va_type = VNON;
2496 
2497 	/*
2498 	 * Assign individually so that it is safe even if size and
2499 	 * sign of each member are varied.
2500 	 */
2501 	vap->va_mode = VNOVAL;
2502 	vap->va_nlink = VNOVAL;
2503 	vap->va_uid = VNOVAL;
2504 	vap->va_gid = VNOVAL;
2505 	vap->va_fsid = VNOVAL;
2506 	vap->va_fileid = VNOVAL;
2507 	vap->va_size = VNOVAL;
2508 	vap->va_blocksize = VNOVAL;
2509 	vap->va_atime.tv_sec =
2510 	    vap->va_mtime.tv_sec =
2511 	    vap->va_ctime.tv_sec =
2512 	    vap->va_birthtime.tv_sec = VNOVAL;
2513 	vap->va_atime.tv_nsec =
2514 	    vap->va_mtime.tv_nsec =
2515 	    vap->va_ctime.tv_nsec =
2516 	    vap->va_birthtime.tv_nsec = VNOVAL;
2517 	vap->va_gen = VNOVAL;
2518 	vap->va_flags = VNOVAL;
2519 	vap->va_rdev = VNOVAL;
2520 	vap->va_bytes = VNOVAL;
2521 	vap->va_vaflags = 0;
2522 }
2523 
2524 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
2525 #define ARRAY_PRINT(idx, arr) \
2526     ((unsigned int)(idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN")
2527 
2528 const char * const vnode_tags[] = { VNODE_TAGS };
2529 const char * const vnode_types[] = { VNODE_TYPES };
2530 const char vnode_flagbits[] = VNODE_FLAGBITS;
2531 
2532 /*
2533  * Print out a description of a vnode.
2534  */
2535 void
2536 vprint(const char *label, struct vnode *vp)
2537 {
2538 	struct vnlock *vl;
2539 	char bf[96];
2540 	int flag;
2541 
2542 	vl = (vp->v_vnlock != NULL ? vp->v_vnlock : &vp->v_lock);
2543 	flag = vp->v_iflag | vp->v_vflag | vp->v_uflag;
2544 	snprintb(bf, sizeof(bf), vnode_flagbits, flag);
2545 
2546 	if (label != NULL)
2547 		printf("%s: ", label);
2548 	printf("vnode @ %p, flags (%s)\n\ttag %s(%d), type %s(%d), "
2549 	    "usecount %d, writecount %d, holdcount %d\n"
2550 	    "\tfreelisthd %p, mount %p, data %p lock %p recursecnt %d\n",
2551 	    vp, bf, ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
2552 	    ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
2553 	    vp->v_usecount, vp->v_writecount, vp->v_holdcnt,
2554 	    vp->v_freelisthd, vp->v_mount, vp->v_data, vl, vl->vl_recursecnt);
2555 	if (vp->v_data != NULL) {
2556 		printf("\t");
2557 		VOP_PRINT(vp);
2558 	}
2559 }
2560 
2561 #ifdef DEBUG
2562 /*
2563  * List all of the locked vnodes in the system.
2564  * Called when debugging the kernel.
2565  */
2566 void
2567 printlockedvnodes(void)
2568 {
2569 	struct mount *mp, *nmp;
2570 	struct vnode *vp;
2571 
2572 	printf("Locked vnodes\n");
2573 	mutex_enter(&mountlist_lock);
2574 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
2575 	     mp = nmp) {
2576 		if (vfs_busy(mp, &nmp)) {
2577 			continue;
2578 		}
2579 		TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
2580 			if (VOP_ISLOCKED(vp))
2581 				vprint(NULL, vp);
2582 		}
2583 		mutex_enter(&mountlist_lock);
2584 		vfs_unbusy(mp, false, &nmp);
2585 	}
2586 	mutex_exit(&mountlist_lock);
2587 }
2588 #endif
2589 
2590 /*
2591  * Do the usual access checking.
2592  * file_mode, uid and gid are from the vnode in question,
2593  * while acc_mode and cred are from the VOP_ACCESS parameter list
2594  */
2595 int
2596 vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid,
2597     mode_t acc_mode, kauth_cred_t cred)
2598 {
2599 	mode_t mask;
2600 	int error, ismember;
2601 
2602 	/*
2603 	 * Super-user always gets read/write access, but execute access depends
2604 	 * on at least one execute bit being set.
2605 	 */
2606 	if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) == 0) {
2607 		if ((acc_mode & VEXEC) && type != VDIR &&
2608 		    (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
2609 			return (EACCES);
2610 		return (0);
2611 	}
2612 
2613 	mask = 0;
2614 
2615 	/* Otherwise, check the owner. */
2616 	if (kauth_cred_geteuid(cred) == uid) {
2617 		if (acc_mode & VEXEC)
2618 			mask |= S_IXUSR;
2619 		if (acc_mode & VREAD)
2620 			mask |= S_IRUSR;
2621 		if (acc_mode & VWRITE)
2622 			mask |= S_IWUSR;
2623 		return ((file_mode & mask) == mask ? 0 : EACCES);
2624 	}
2625 
2626 	/* Otherwise, check the groups. */
2627 	error = kauth_cred_ismember_gid(cred, gid, &ismember);
2628 	if (error)
2629 		return (error);
2630 	if (kauth_cred_getegid(cred) == gid || ismember) {
2631 		if (acc_mode & VEXEC)
2632 			mask |= S_IXGRP;
2633 		if (acc_mode & VREAD)
2634 			mask |= S_IRGRP;
2635 		if (acc_mode & VWRITE)
2636 			mask |= S_IWGRP;
2637 		return ((file_mode & mask) == mask ? 0 : EACCES);
2638 	}
2639 
2640 	/* Otherwise, check everyone else. */
2641 	if (acc_mode & VEXEC)
2642 		mask |= S_IXOTH;
2643 	if (acc_mode & VREAD)
2644 		mask |= S_IROTH;
2645 	if (acc_mode & VWRITE)
2646 		mask |= S_IWOTH;
2647 	return ((file_mode & mask) == mask ? 0 : EACCES);
2648 }
2649 
2650 /*
2651  * Given a file system name, look up the vfsops for that
2652  * file system, or return NULL if file system isn't present
2653  * in the kernel.
2654  */
2655 struct vfsops *
2656 vfs_getopsbyname(const char *name)
2657 {
2658 	struct vfsops *v;
2659 
2660 	mutex_enter(&vfs_list_lock);
2661 	LIST_FOREACH(v, &vfs_list, vfs_list) {
2662 		if (strcmp(v->vfs_name, name) == 0)
2663 			break;
2664 	}
2665 	if (v != NULL)
2666 		v->vfs_refcount++;
2667 	mutex_exit(&vfs_list_lock);
2668 
2669 	return (v);
2670 }
2671 
2672 void
2673 copy_statvfs_info(struct statvfs *sbp, const struct mount *mp)
2674 {
2675 	const struct statvfs *mbp;
2676 
2677 	if (sbp == (mbp = &mp->mnt_stat))
2678 		return;
2679 
2680 	(void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
2681 	sbp->f_fsid = mbp->f_fsid;
2682 	sbp->f_owner = mbp->f_owner;
2683 	sbp->f_flag = mbp->f_flag;
2684 	sbp->f_syncwrites = mbp->f_syncwrites;
2685 	sbp->f_asyncwrites = mbp->f_asyncwrites;
2686 	sbp->f_syncreads = mbp->f_syncreads;
2687 	sbp->f_asyncreads = mbp->f_asyncreads;
2688 	(void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
2689 	(void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
2690 	    sizeof(sbp->f_fstypename));
2691 	(void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
2692 	    sizeof(sbp->f_mntonname));
2693 	(void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
2694 	    sizeof(sbp->f_mntfromname));
2695 	sbp->f_namemax = mbp->f_namemax;
2696 }
2697 
2698 int
2699 set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
2700     const char *vfsname, struct mount *mp, struct lwp *l)
2701 {
2702 	int error;
2703 	size_t size;
2704 	struct statvfs *sfs = &mp->mnt_stat;
2705 	int (*fun)(const void *, void *, size_t, size_t *);
2706 
2707 	(void)strlcpy(mp->mnt_stat.f_fstypename, vfsname,
2708 	    sizeof(mp->mnt_stat.f_fstypename));
2709 
2710 	if (onp) {
2711 		struct cwdinfo *cwdi = l->l_proc->p_cwdi;
2712 		fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
2713 		if (cwdi->cwdi_rdir != NULL) {
2714 			size_t len;
2715 			char *bp;
2716 			char *path = PNBUF_GET();
2717 
2718 			bp = path + MAXPATHLEN;
2719 			*--bp = '\0';
2720 			rw_enter(&cwdi->cwdi_lock, RW_READER);
2721 			error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
2722 			    path, MAXPATHLEN / 2, 0, l);
2723 			rw_exit(&cwdi->cwdi_lock);
2724 			if (error) {
2725 				PNBUF_PUT(path);
2726 				return error;
2727 			}
2728 
2729 			len = strlen(bp);
2730 			if (len > sizeof(sfs->f_mntonname) - 1)
2731 				len = sizeof(sfs->f_mntonname) - 1;
2732 			(void)strncpy(sfs->f_mntonname, bp, len);
2733 			PNBUF_PUT(path);
2734 
2735 			if (len < sizeof(sfs->f_mntonname) - 1) {
2736 				error = (*fun)(onp, &sfs->f_mntonname[len],
2737 				    sizeof(sfs->f_mntonname) - len - 1, &size);
2738 				if (error)
2739 					return error;
2740 				size += len;
2741 			} else {
2742 				size = len;
2743 			}
2744 		} else {
2745 			error = (*fun)(onp, &sfs->f_mntonname,
2746 			    sizeof(sfs->f_mntonname) - 1, &size);
2747 			if (error)
2748 				return error;
2749 		}
2750 		(void)memset(sfs->f_mntonname + size, 0,
2751 		    sizeof(sfs->f_mntonname) - size);
2752 	}
2753 
2754 	if (fromp) {
2755 		fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
2756 		error = (*fun)(fromp, sfs->f_mntfromname,
2757 		    sizeof(sfs->f_mntfromname) - 1, &size);
2758 		if (error)
2759 			return error;
2760 		(void)memset(sfs->f_mntfromname + size, 0,
2761 		    sizeof(sfs->f_mntfromname) - size);
2762 	}
2763 	return 0;
2764 }
2765 
2766 void
2767 vfs_timestamp(struct timespec *ts)
2768 {
2769 
2770 	nanotime(ts);
2771 }
2772 
2773 time_t	rootfstime;			/* recorded root fs time, if known */
2774 void
2775 setrootfstime(time_t t)
2776 {
2777 	rootfstime = t;
2778 }
2779 
2780 /*
2781  * Sham lock manager for vnodes.  This is a temporary measure.
2782  */
2783 int
2784 vlockmgr(struct vnlock *vl, int flags)
2785 {
2786 
2787 	KASSERT((flags & ~(LK_CANRECURSE | LK_NOWAIT | LK_TYPE_MASK)) == 0);
2788 
2789 	switch (flags & LK_TYPE_MASK) {
2790 	case LK_SHARED:
2791 		if (rw_tryenter(&vl->vl_lock, RW_READER)) {
2792 			return 0;
2793 		}
2794 		if ((flags & LK_NOWAIT) != 0) {
2795 			return EBUSY;
2796 		}
2797 		rw_enter(&vl->vl_lock, RW_READER);
2798 		return 0;
2799 
2800 	case LK_EXCLUSIVE:
2801 		if (rw_tryenter(&vl->vl_lock, RW_WRITER)) {
2802 			return 0;
2803 		}
2804 		if ((vl->vl_canrecurse || (flags & LK_CANRECURSE) != 0) &&
2805 		    rw_write_held(&vl->vl_lock)) {
2806 			vl->vl_recursecnt++;
2807 			return 0;
2808 		}
2809 		if ((flags & LK_NOWAIT) != 0) {
2810 			return EBUSY;
2811 		}
2812 		rw_enter(&vl->vl_lock, RW_WRITER);
2813 		return 0;
2814 
2815 	case LK_RELEASE:
2816 		if (vl->vl_recursecnt != 0) {
2817 			KASSERT(rw_write_held(&vl->vl_lock));
2818 			vl->vl_recursecnt--;
2819 			return 0;
2820 		}
2821 		rw_exit(&vl->vl_lock);
2822 		return 0;
2823 
2824 	default:
2825 		panic("vlockmgr: flags %x", flags);
2826 	}
2827 }
2828 
2829 int
2830 vlockstatus(struct vnlock *vl)
2831 {
2832 
2833 	if (rw_write_held(&vl->vl_lock)) {
2834 		return LK_EXCLUSIVE;
2835 	}
2836 	if (rw_read_held(&vl->vl_lock)) {
2837 		return LK_SHARED;
2838 	}
2839 	return 0;
2840 }
2841 
2842 /*
2843  * mount_specific_key_create --
2844  *	Create a key for subsystem mount-specific data.
2845  */
2846 int
2847 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
2848 {
2849 
2850 	return (specificdata_key_create(mount_specificdata_domain, keyp, dtor));
2851 }
2852 
2853 /*
2854  * mount_specific_key_delete --
2855  *	Delete a key for subsystem mount-specific data.
2856  */
2857 void
2858 mount_specific_key_delete(specificdata_key_t key)
2859 {
2860 
2861 	specificdata_key_delete(mount_specificdata_domain, key);
2862 }
2863 
2864 /*
2865  * mount_initspecific --
2866  *	Initialize a mount's specificdata container.
2867  */
2868 void
2869 mount_initspecific(struct mount *mp)
2870 {
2871 	int error;
2872 
2873 	error = specificdata_init(mount_specificdata_domain,
2874 				  &mp->mnt_specdataref);
2875 	KASSERT(error == 0);
2876 }
2877 
2878 /*
2879  * mount_finispecific --
2880  *	Finalize a mount's specificdata container.
2881  */
2882 void
2883 mount_finispecific(struct mount *mp)
2884 {
2885 
2886 	specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
2887 }
2888 
2889 /*
2890  * mount_getspecific --
2891  *	Return mount-specific data corresponding to the specified key.
2892  */
2893 void *
2894 mount_getspecific(struct mount *mp, specificdata_key_t key)
2895 {
2896 
2897 	return (specificdata_getspecific(mount_specificdata_domain,
2898 					 &mp->mnt_specdataref, key));
2899 }
2900 
2901 /*
2902  * mount_setspecific --
2903  *	Set mount-specific data corresponding to the specified key.
2904  */
2905 void
2906 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data)
2907 {
2908 
2909 	specificdata_setspecific(mount_specificdata_domain,
2910 				 &mp->mnt_specdataref, key, data);
2911 }
2912 
2913 int
2914 VFS_MOUNT(struct mount *mp, const char *a, void *b, size_t *c)
2915 {
2916 	int error;
2917 
2918 	KERNEL_LOCK(1, NULL);
2919 	error = (*(mp->mnt_op->vfs_mount))(mp, a, b, c);
2920 	KERNEL_UNLOCK_ONE(NULL);
2921 
2922 	return error;
2923 }
2924 
2925 int
2926 VFS_START(struct mount *mp, int a)
2927 {
2928 	int error;
2929 
2930 	if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2931 		KERNEL_LOCK(1, NULL);
2932 	}
2933 	error = (*(mp->mnt_op->vfs_start))(mp, a);
2934 	if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2935 		KERNEL_UNLOCK_ONE(NULL);
2936 	}
2937 
2938 	return error;
2939 }
2940 
2941 int
2942 VFS_UNMOUNT(struct mount *mp, int a)
2943 {
2944 	int error;
2945 
2946 	KERNEL_LOCK(1, NULL);
2947 	error = (*(mp->mnt_op->vfs_unmount))(mp, a);
2948 	KERNEL_UNLOCK_ONE(NULL);
2949 
2950 	return error;
2951 }
2952 
2953 int
2954 VFS_ROOT(struct mount *mp, struct vnode **a)
2955 {
2956 	int error;
2957 
2958 	if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2959 		KERNEL_LOCK(1, NULL);
2960 	}
2961 	error = (*(mp->mnt_op->vfs_root))(mp, a);
2962 	if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2963 		KERNEL_UNLOCK_ONE(NULL);
2964 	}
2965 
2966 	return error;
2967 }
2968 
2969 int
2970 VFS_QUOTACTL(struct mount *mp, int a, uid_t b, void *c)
2971 {
2972 	int error;
2973 
2974 	if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2975 		KERNEL_LOCK(1, NULL);
2976 	}
2977 	error = (*(mp->mnt_op->vfs_quotactl))(mp, a, b, c);
2978 	if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2979 		KERNEL_UNLOCK_ONE(NULL);
2980 	}
2981 
2982 	return error;
2983 }
2984 
2985 int
2986 VFS_STATVFS(struct mount *mp, struct statvfs *a)
2987 {
2988 	int error;
2989 
2990 	if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2991 		KERNEL_LOCK(1, NULL);
2992 	}
2993 	error = (*(mp->mnt_op->vfs_statvfs))(mp, a);
2994 	if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
2995 		KERNEL_UNLOCK_ONE(NULL);
2996 	}
2997 
2998 	return error;
2999 }
3000 
3001 int
3002 VFS_SYNC(struct mount *mp, int a, struct kauth_cred *b)
3003 {
3004 	int error;
3005 
3006 	if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3007 		KERNEL_LOCK(1, NULL);
3008 	}
3009 	error = (*(mp->mnt_op->vfs_sync))(mp, a, b);
3010 	if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3011 		KERNEL_UNLOCK_ONE(NULL);
3012 	}
3013 
3014 	return error;
3015 }
3016 
3017 int
3018 VFS_FHTOVP(struct mount *mp, struct fid *a, struct vnode **b)
3019 {
3020 	int error;
3021 
3022 	if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3023 		KERNEL_LOCK(1, NULL);
3024 	}
3025 	error = (*(mp->mnt_op->vfs_fhtovp))(mp, a, b);
3026 	if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3027 		KERNEL_UNLOCK_ONE(NULL);
3028 	}
3029 
3030 	return error;
3031 }
3032 
3033 int
3034 VFS_VPTOFH(struct vnode *vp, struct fid *a, size_t *b)
3035 {
3036 	int error;
3037 
3038 	if ((vp->v_vflag & VV_MPSAFE) == 0) {
3039 		KERNEL_LOCK(1, NULL);
3040 	}
3041 	error = (*(vp->v_mount->mnt_op->vfs_vptofh))(vp, a, b);
3042 	if ((vp->v_vflag & VV_MPSAFE) == 0) {
3043 		KERNEL_UNLOCK_ONE(NULL);
3044 	}
3045 
3046 	return error;
3047 }
3048 
3049 int
3050 VFS_SNAPSHOT(struct mount *mp, struct vnode *a, struct timespec *b)
3051 {
3052 	int error;
3053 
3054 	if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3055 		KERNEL_LOCK(1, NULL);
3056 	}
3057 	error = (*(mp->mnt_op->vfs_snapshot))(mp, a, b);
3058 	if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3059 		KERNEL_UNLOCK_ONE(NULL);
3060 	}
3061 
3062 	return error;
3063 }
3064 
3065 int
3066 VFS_EXTATTRCTL(struct mount *mp, int a, struct vnode *b, int c, const char *d)
3067 {
3068 	int error;
3069 
3070 	KERNEL_LOCK(1, NULL);		/* XXXSMP check ffs */
3071 	error = (*(mp->mnt_op->vfs_extattrctl))(mp, a, b, c, d);
3072 	KERNEL_UNLOCK_ONE(NULL);	/* XXX */
3073 
3074 	return error;
3075 }
3076 
3077 int
3078 VFS_SUSPENDCTL(struct mount *mp, int a)
3079 {
3080 	int error;
3081 
3082 	if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3083 		KERNEL_LOCK(1, NULL);
3084 	}
3085 	error = (*(mp->mnt_op->vfs_suspendctl))(mp, a);
3086 	if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
3087 		KERNEL_UNLOCK_ONE(NULL);
3088 	}
3089 
3090 	return error;
3091 }
3092 
3093 #if defined(DDB) || defined(DEBUGPRINT)
3094 static const char buf_flagbits[] = BUF_FLAGBITS;
3095 
3096 void
3097 vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...))
3098 {
3099 	char bf[1024];
3100 
3101 	(*pr)("  vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%"
3102 	    PRIx64 " dev 0x%x\n",
3103 	    bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev);
3104 
3105 	snprintb(bf, sizeof(bf),
3106 	    buf_flagbits, bp->b_flags | bp->b_oflags | bp->b_cflags);
3107 	(*pr)("  error %d flags 0x%s\n", bp->b_error, bf);
3108 
3109 	(*pr)("  bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
3110 		  bp->b_bufsize, bp->b_bcount, bp->b_resid);
3111 	(*pr)("  data %p saveaddr %p\n",
3112 		  bp->b_data, bp->b_saveaddr);
3113 	(*pr)("  iodone %p objlock %p\n", bp->b_iodone, bp->b_objlock);
3114 }
3115 
3116 
3117 void
3118 vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...))
3119 {
3120 	char bf[256];
3121 
3122 	uvm_object_printit(&vp->v_uobj, full, pr);
3123 	snprintb(bf, sizeof(bf),
3124 	    vnode_flagbits, vp->v_iflag | vp->v_vflag | vp->v_uflag);
3125 	(*pr)("\nVNODE flags %s\n", bf);
3126 	(*pr)("mp %p numoutput %d size 0x%llx writesize 0x%llx\n",
3127 	      vp->v_mount, vp->v_numoutput, vp->v_size, vp->v_writesize);
3128 
3129 	(*pr)("data %p writecount %ld holdcnt %ld\n",
3130 	      vp->v_data, vp->v_writecount, vp->v_holdcnt);
3131 
3132 	(*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n",
3133 	      ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
3134 	      ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
3135 	      vp->v_mount, vp->v_mountedhere);
3136 
3137 	(*pr)("v_lock %p v_vnlock %p\n", &vp->v_lock, vp->v_vnlock);
3138 
3139 	if (full) {
3140 		struct buf *bp;
3141 
3142 		(*pr)("clean bufs:\n");
3143 		LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
3144 			(*pr)(" bp %p\n", bp);
3145 			vfs_buf_print(bp, full, pr);
3146 		}
3147 
3148 		(*pr)("dirty bufs:\n");
3149 		LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
3150 			(*pr)(" bp %p\n", bp);
3151 			vfs_buf_print(bp, full, pr);
3152 		}
3153 	}
3154 }
3155 
3156 void
3157 vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...))
3158 {
3159 	char sbuf[256];
3160 
3161 	(*pr)("vnodecovered = %p syncer = %p data = %p\n",
3162 			mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data);
3163 
3164 	(*pr)("fs_bshift %d dev_bshift = %d\n",
3165 			mp->mnt_fs_bshift,mp->mnt_dev_bshift);
3166 
3167 	snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_flag);
3168 	(*pr)("flag = %s\n", sbuf);
3169 
3170 	snprintb(sbuf, sizeof(sbuf), __IMNT_FLAG_BITS, mp->mnt_iflag);
3171 	(*pr)("iflag = %s\n", sbuf);
3172 
3173 	(*pr)("refcnt = %d unmounting @ %p updating @ %p\n", mp->mnt_refcnt,
3174 	    &mp->mnt_unmounting, &mp->mnt_updating);
3175 
3176 	(*pr)("statvfs cache:\n");
3177 	(*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize);
3178 	(*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize);
3179 	(*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize);
3180 
3181 	(*pr)("\tblocks = %"PRIu64"\n",mp->mnt_stat.f_blocks);
3182 	(*pr)("\tbfree = %"PRIu64"\n",mp->mnt_stat.f_bfree);
3183 	(*pr)("\tbavail = %"PRIu64"\n",mp->mnt_stat.f_bavail);
3184 	(*pr)("\tbresvd = %"PRIu64"\n",mp->mnt_stat.f_bresvd);
3185 
3186 	(*pr)("\tfiles = %"PRIu64"\n",mp->mnt_stat.f_files);
3187 	(*pr)("\tffree = %"PRIu64"\n",mp->mnt_stat.f_ffree);
3188 	(*pr)("\tfavail = %"PRIu64"\n",mp->mnt_stat.f_favail);
3189 	(*pr)("\tfresvd = %"PRIu64"\n",mp->mnt_stat.f_fresvd);
3190 
3191 	(*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
3192 			mp->mnt_stat.f_fsidx.__fsid_val[0],
3193 			mp->mnt_stat.f_fsidx.__fsid_val[1]);
3194 
3195 	(*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner);
3196 	(*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax);
3197 
3198 	snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_stat.f_flag);
3199 
3200 	(*pr)("\tflag = %s\n",sbuf);
3201 	(*pr)("\tsyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_syncwrites);
3202 	(*pr)("\tasyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_asyncwrites);
3203 	(*pr)("\tsyncreads = %" PRIu64 "\n",mp->mnt_stat.f_syncreads);
3204 	(*pr)("\tasyncreads = %" PRIu64 "\n",mp->mnt_stat.f_asyncreads);
3205 	(*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename);
3206 	(*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname);
3207 	(*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname);
3208 
3209 	{
3210 		int cnt = 0;
3211 		struct vnode *vp;
3212 		(*pr)("locked vnodes =");
3213 		TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
3214 			if (VOP_ISLOCKED(vp)) {
3215 				if ((++cnt % 6) == 0) {
3216 					(*pr)(" %p,\n\t", vp);
3217 				} else {
3218 					(*pr)(" %p,", vp);
3219 				}
3220 			}
3221 		}
3222 		(*pr)("\n");
3223 	}
3224 
3225 	if (full) {
3226 		int cnt = 0;
3227 		struct vnode *vp;
3228 		(*pr)("all vnodes =");
3229 		TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
3230 			if (!TAILQ_NEXT(vp, v_mntvnodes)) {
3231 				(*pr)(" %p", vp);
3232 			} else if ((++cnt % 6) == 0) {
3233 				(*pr)(" %p,\n\t", vp);
3234 			} else {
3235 				(*pr)(" %p,", vp);
3236 			}
3237 		}
3238 		(*pr)("\n", vp);
3239 	}
3240 }
3241 #endif /* DDB || DEBUGPRINT */
3242 
3243