xref: /openbsd-src/sys/kern/vfs_subr.c (revision 3a3fbb3f2e2521ab7c4a56b7ff7462ebd9095ec5)
1 /*	$OpenBSD: vfs_subr.c,v 1.80 2001/12/19 08:58:06 art Exp $	*/
2 /*	$NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
42  */
43 
44 /*
45  * External virtual filesystem routines
46  */
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/proc.h>
51 #include <sys/mount.h>
52 #include <sys/time.h>
53 #include <sys/fcntl.h>
54 #include <sys/kernel.h>
55 #include <sys/vnode.h>
56 #include <sys/stat.h>
57 #include <sys/namei.h>
58 #include <sys/ucred.h>
59 #include <sys/buf.h>
60 #include <sys/errno.h>
61 #include <sys/malloc.h>
62 #include <sys/domain.h>
63 #include <sys/mbuf.h>
64 #include <sys/syscallargs.h>
65 #include <sys/pool.h>
66 
67 #include <uvm/uvm_extern.h>
68 #include <sys/sysctl.h>
69 
70 #include <miscfs/specfs/specdev.h>
71 
72 enum vtype iftovt_tab[16] = {
73 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
74 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
75 };
76 int	vttoif_tab[9] = {
77 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
78 	S_IFSOCK, S_IFIFO, S_IFMT,
79 };
80 
81 int doforce = 1;		/* 1 => permit forcible unmounting */
82 int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
83 int suid_clear = 1;		/* 1 => clear SUID / SGID on owner change */
84 
85 /*
86  * Insq/Remq for the vnode usage lists.
87  */
88 #define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
89 #define	bufremvn(bp) {							\
90 	LIST_REMOVE(bp, b_vnbufs);					\
91 	(bp)->b_vnbufs.le_next = NOLIST;				\
92 }
93 
94 struct freelst vnode_hold_list;   /* list of vnodes referencing buffers */
95 struct freelst vnode_free_list;   /* vnode free list */
96 
97 struct mntlist mountlist;			/* mounted filesystem list */
98 struct simplelock mountlist_slock;
99 static struct simplelock mntid_slock;
100 struct simplelock mntvnode_slock;
101 struct simplelock vnode_free_list_slock;
102 struct simplelock spechash_slock;
103 
104 void	vclean __P((struct vnode *, int, struct proc *));
105 
106 void insmntque __P((struct vnode *, struct mount *));
107 int getdevvp __P((dev_t, struct vnode **, enum vtype));
108 
109 int vfs_hang_addrlist __P((struct mount *, struct netexport *,
110 				  struct export_args *));
111 int vfs_free_netcred __P((struct radix_node *, void *));
112 void vfs_free_addrlist __P((struct netexport *));
113 static __inline__ void vputonfreelist __P((struct vnode *));
114 
115 int vflush_vnode(struct vnode *, void *);
116 
117 #ifdef DEBUG
118 void printlockedvnodes __P((void));
119 #endif
120 
121 #define VN_KNOTE(vp, b) \
122 	KNOTE((struct klist *)&vp->v_selectinfo.vsi_selinfo.si_note, (b))
123 
124 struct pool vnode_pool;
125 
126 /*
127  * Initialize the vnode management data structures.
128  */
129 void
130 vntblinit()
131 {
132 
133 	pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodes",
134 		0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE);
135 	simple_lock_init(&mntvnode_slock);
136 	simple_lock_init(&mntid_slock);
137 	simple_lock_init(&spechash_slock);
138 	TAILQ_INIT(&vnode_hold_list);
139 	TAILQ_INIT(&vnode_free_list);
140 	simple_lock_init(&vnode_free_list_slock);
141 	CIRCLEQ_INIT(&mountlist);
142 	simple_lock_init(&mountlist_slock);
143 	/*
144 	 * Initialize the filesystem syncer.
145 	 */
146 	vn_initialize_syncerd();
147 }
148 
149 
150 /*
151  * Mark a mount point as busy. Used to synchronize access and to delay
152  * unmounting. Interlock is not released on failure.
153  */
154 
155 int
156 vfs_busy(mp, flags, interlkp, p)
157 	struct mount *mp;
158 	int flags;
159 	struct simplelock *interlkp;
160 	struct proc *p;
161 {
162 	int lkflags;
163 
164 	if (mp->mnt_flag & MNT_UNMOUNT) {
165 		if (flags & LK_NOWAIT)
166 			return (ENOENT);
167 		mp->mnt_flag |= MNT_MWAIT;
168 		if (interlkp)
169 			simple_unlock(interlkp);
170 		/*
171 		 * Since all busy locks are shared except the exclusive
172 		 * lock granted when unmounting, the only place that a
173 		 * wakeup needs to be done is at the release of the
174 		 * exclusive lock at the end of dounmount.
175 		 */
176  		sleep((caddr_t)mp, PVFS);
177 		if (interlkp)
178 			simple_lock(interlkp);
179 		return (ENOENT);
180 	}
181 	lkflags = LK_SHARED;
182 	if (interlkp)
183 		lkflags |= LK_INTERLOCK;
184 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
185 		panic("vfs_busy: unexpected lock failure");
186 	return (0);
187 }
188 
189 
190 /*
191  * Free a busy file system
192  */
193 void
194 vfs_unbusy(mp, p)
195 	struct mount *mp;
196 	struct proc *p;
197 {
198 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
199 }
200 
201 int
202 vfs_isbusy(struct mount *mp)
203 {
204 	return (lockstatus(&mp->mnt_lock));
205 }
206 
207 /*
208  * Lookup a filesystem type, and if found allocate and initialize
209  * a mount structure for it.
210  *
211  * Devname is usually updated by mount(8) after booting.
212  */
213 
214 int
215 vfs_rootmountalloc(fstypename, devname, mpp)
216 	char *fstypename;
217 	char *devname;
218 	struct mount **mpp;
219 {
220 	struct proc *p = curproc;	/* XXX */
221 	struct vfsconf *vfsp;
222 	struct mount *mp;
223 
224 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
225 		if (!strcmp(vfsp->vfc_name, fstypename))
226 			break;
227 	if (vfsp == NULL)
228 		return (ENODEV);
229 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
230 	bzero((char *)mp, (u_long)sizeof(struct mount));
231 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
232 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
233 	LIST_INIT(&mp->mnt_vnodelist);
234 	mp->mnt_vfc = vfsp;
235 	mp->mnt_op = vfsp->vfc_vfsops;
236 	mp->mnt_flag = MNT_RDONLY;
237 	mp->mnt_vnodecovered = NULLVP;
238 	vfsp->vfc_refcount++;
239 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
240 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
241 	mp->mnt_stat.f_mntonname[0] = '/';
242 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
243 	*mpp = mp;
244  	return (0);
245  }
246 
247 /*
248  * Find an appropriate filesystem to use for the root. If a filesystem
249  * has not been preselected, walk through the list of known filesystems
250  * trying those that have mountroot routines, and try them until one
251  * works or we have tried them all.
252   */
253 int
254 vfs_mountroot()
255 {
256 	struct vfsconf *vfsp;
257 	extern int (*mountroot)(void);
258 	int error;
259 
260 	if (mountroot != NULL)
261 		return ((*mountroot)());
262 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
263 		if (vfsp->vfc_mountroot == NULL)
264 			continue;
265 		if ((error = (*vfsp->vfc_mountroot)()) == 0)
266 			return (0);
267 		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
268  	}
269 	return (ENODEV);
270 }
271 
272 /*
273  * Lookup a mount point by filesystem identifier.
274  */
275 struct mount *
276 vfs_getvfs(fsid)
277 	fsid_t *fsid;
278 {
279 	register struct mount *mp;
280 
281 	simple_lock(&mountlist_slock);
282 	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
283 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
284 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
285 			simple_unlock(&mountlist_slock);
286 			return (mp);
287 		}
288 	}
289 	simple_unlock(&mountlist_slock);
290 	return ((struct mount *)0);
291 }
292 
293 
294 /*
295  * Get a new unique fsid
296  */
297 void
298 vfs_getnewfsid(mp)
299 	struct mount *mp;
300 {
301 	static u_short xxxfs_mntid;
302 
303 	fsid_t tfsid;
304 	int mtype;
305 
306 	simple_lock(&mntid_slock);
307 	mtype = mp->mnt_vfc->vfc_typenum;
308 	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
309 	mp->mnt_stat.f_fsid.val[1] = mtype;
310 	if (xxxfs_mntid == 0)
311 		++xxxfs_mntid;
312 	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
313 	tfsid.val[1] = mtype;
314 	if (!CIRCLEQ_EMPTY(&mountlist)) {
315 		while (vfs_getvfs(&tfsid)) {
316 			tfsid.val[0]++;
317 			xxxfs_mntid++;
318 		}
319 	}
320 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
321 	simple_unlock(&mntid_slock);
322 }
323 
324 /*
325  * Make a 'unique' number from a mount type name.
326  * Note that this is no longer used for ffs which
327  * now has an on-disk filesystem id.
328  */
329 long
330 makefstype(type)
331 	char *type;
332 {
333 	long rv;
334 
335 	for (rv = 0; *type; type++) {
336 		rv <<= 2;
337 		rv ^= *type;
338 	}
339 	return rv;
340 }
341 
342 /*
343  * Set vnode attributes to VNOVAL
344  */
345 void
346 vattr_null(vap)
347 	register struct vattr *vap;
348 {
349 
350 	vap->va_type = VNON;
351 	/* XXX These next two used to be one line, but for a GCC bug. */
352 	vap->va_size = VNOVAL;
353 	vap->va_bytes = VNOVAL;
354 	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
355 		vap->va_fsid = vap->va_fileid =
356 		vap->va_blocksize = vap->va_rdev =
357 		vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
358 		vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
359 		vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
360 		vap->va_flags = vap->va_gen = VNOVAL;
361 	vap->va_vaflags = 0;
362 }
363 
364 /*
365  * Routines having to do with the management of the vnode table.
366  */
367 extern int (**dead_vnodeop_p) __P((void *));
368 long numvnodes;
369 
370 /*
371  * Return the next vnode from the free list.
372  */
373 int
374 getnewvnode(tag, mp, vops, vpp)
375 	enum vtagtype tag;
376 	struct mount *mp;
377 	int (**vops) __P((void *));
378 	struct vnode **vpp;
379 {
380 	struct proc *p = curproc;			/* XXX */
381 	struct freelst *listhd;
382 	static int toggle;
383 	struct vnode *vp;
384 	int s;
385 
386 	/*
387 	 * We must choose whether to allocate a new vnode or recycle an
388 	 * existing one. The criterion for allocating a new one is that
389 	 * the total number of vnodes is less than the number desired or
390 	 * there are no vnodes on either free list. Generally we only
391 	 * want to recycle vnodes that have no buffers associated with
392 	 * them, so we look first on the vnode_free_list. If it is empty,
393 	 * we next consider vnodes with referencing buffers on the
394 	 * vnode_hold_list. The toggle ensures that half the time we
395 	 * will use a buffer from the vnode_hold_list, and half the time
396 	 * we will allocate a new one unless the list has grown to twice
397 	 * the desired size. We are reticent to recycle vnodes from the
398 	 * vnode_hold_list because we will lose the identity of all its
399 	 * referencing buffers.
400 	 */
401 	toggle ^= 1;
402 	if (numvnodes > 2 * desiredvnodes)
403 		toggle = 0;
404 
405 	simple_lock(&vnode_free_list_slock);
406 	s = splbio();
407 	if ((numvnodes < desiredvnodes) ||
408 	    ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) &&
409 	    ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) {
410 		splx(s);
411 		simple_unlock(&vnode_free_list_slock);
412 		vp = pool_get(&vnode_pool, PR_WAITOK);
413 		bzero((char *)vp, sizeof *vp);
414 		numvnodes++;
415 	} else {
416 		for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
417 		    vp = TAILQ_NEXT(vp, v_freelist)) {
418 			if (simple_lock_try(&vp->v_interlock))
419 				break;
420 		}
421 		/*
422 		 * Unless this is a bad time of the month, at most
423 		 * the first NCPUS items on the free list are
424 		 * locked, so this is close enough to being empty.
425 		 */
426 		if (vp == NULL) {
427 			splx(s);
428 			simple_unlock(&vnode_free_list_slock);
429 			tablefull("vnode");
430 			*vpp = 0;
431 			return (ENFILE);
432 		}
433 		if (vp->v_usecount) {
434 			vprint("free vnode", vp);
435 			panic("free vnode isn't");
436 		}
437 
438 		TAILQ_REMOVE(listhd, vp, v_freelist);
439 		vp->v_bioflag &= ~VBIOONFREELIST;
440 		splx(s);
441 
442 		simple_unlock(&vnode_free_list_slock);
443 		if (vp->v_type != VBAD)
444 			vgonel(vp, p);
445 		else
446 			simple_unlock(&vp->v_interlock);
447 #ifdef DIAGNOSTIC
448 		if (vp->v_data) {
449 			vprint("cleaned vnode", vp);
450 			panic("cleaned vnode isn't");
451 		}
452 		s = splbio();
453 		if (vp->v_numoutput)
454 			panic("Clean vnode has pending I/O's");
455 		splx(s);
456 #endif
457 		vp->v_flag = 0;
458 		vp->v_socket = 0;
459 	}
460 	vp->v_type = VNON;
461 	cache_purge(vp);
462 	vp->v_tag = tag;
463 	vp->v_op = vops;
464 	insmntque(vp, mp);
465 	*vpp = vp;
466 	vp->v_usecount = 1;
467 	vp->v_data = 0;
468 	simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
469 	return (0);
470 }
471 
472 /*
473  * Move a vnode from one mount queue to another.
474  */
475 void
476 insmntque(vp, mp)
477 	register struct vnode *vp;
478 	register struct mount *mp;
479 {
480 	simple_lock(&mntvnode_slock);
481 	/*
482 	 * Delete from old mount point vnode list, if on one.
483 	 */
484 
485 	if (vp->v_mount != NULL)
486 		LIST_REMOVE(vp, v_mntvnodes);
487 	/*
488 	 * Insert into list of vnodes for the new mount point, if available.
489 	 */
490 	if ((vp->v_mount = mp) != NULL)
491 		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
492 	simple_unlock(&mntvnode_slock);
493 }
494 
495 
496 /*
497  * Create a vnode for a block device.
498  * Used for root filesystem, argdev, and swap areas.
499  * Also used for memory file system special devices.
500  */
501 int
502 bdevvp(dev, vpp)
503 	dev_t dev;
504 	struct vnode **vpp;
505 {
506 
507 	return (getdevvp(dev, vpp, VBLK));
508 }
509 
510 /*
511  * Create a vnode for a character device.
512  * Used for kernfs and some console handling.
513  */
514 int
515 cdevvp(dev, vpp)
516 	dev_t dev;
517 	struct vnode **vpp;
518 {
519 
520 	return (getdevvp(dev, vpp, VCHR));
521 }
522 
523 /*
524  * Create a vnode for a device.
525  * Used by bdevvp (block device) for root file system etc.,
526  * and by cdevvp (character device) for console and kernfs.
527  */
528 int
529 getdevvp(dev, vpp, type)
530 	dev_t dev;
531 	struct vnode **vpp;
532 	enum vtype type;
533 {
534 	register struct vnode *vp;
535 	struct vnode *nvp;
536 	int error;
537 
538 	if (dev == NODEV) {
539 		*vpp = NULLVP;
540 		return (0);
541 	}
542 	error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
543 	if (error) {
544 		*vpp = NULLVP;
545 		return (error);
546 	}
547 	vp = nvp;
548 	vp->v_type = type;
549 	if ((nvp = checkalias(vp, dev, NULL)) != 0) {
550 		vput(vp);
551 		vp = nvp;
552 	}
553 	*vpp = vp;
554 	return (0);
555 }
556 
557 /*
558  * Check to see if the new vnode represents a special device
559  * for which we already have a vnode (either because of
560  * bdevvp() or because of a different vnode representing
561  * the same block device). If such an alias exists, deallocate
562  * the existing contents and return the aliased vnode. The
563  * caller is responsible for filling it with its new contents.
564  */
565 struct vnode *
566 checkalias(nvp, nvp_rdev, mp)
567 	register struct vnode *nvp;
568 	dev_t nvp_rdev;
569 	struct mount *mp;
570 {
571 	struct proc *p = curproc;
572 	register struct vnode *vp;
573 	struct vnode **vpp;
574 
575 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
576 		return (NULLVP);
577 
578 	vpp = &speclisth[SPECHASH(nvp_rdev)];
579 loop:
580 	simple_lock(&spechash_slock);
581 	for (vp = *vpp; vp; vp = vp->v_specnext) {
582 		simple_lock(&vp->v_interlock);
583 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) {
584 			simple_unlock(&vp->v_interlock);
585 			continue;
586 		}
587 		/*
588 		 * Alias, but not in use, so flush it out.
589 		 */
590 		if (vp->v_usecount == 0) {
591 			simple_unlock(&spechash_slock);
592 			vgonel(vp, p);
593 			goto loop;
594 		}
595 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
596 			simple_unlock(&spechash_slock);
597 			goto loop;
598 		}
599 		break;
600 	}
601 
602 	/*
603 	 * Common case is actually in the if statement
604 	 */
605 	if (vp == NULL || !(vp->v_tag == VT_NON && vp->v_type == VBLK)) {
606 		MALLOC(nvp->v_specinfo, struct specinfo *,
607 			sizeof(struct specinfo), M_VNODE, M_WAITOK);
608 		nvp->v_rdev = nvp_rdev;
609 		nvp->v_hashchain = vpp;
610 		nvp->v_specnext = *vpp;
611 		nvp->v_specmountpoint = NULL;
612 		nvp->v_speclockf = NULL;
613 		simple_unlock(&spechash_slock);
614 		*vpp = nvp;
615 		if (vp != NULLVP) {
616 			nvp->v_flag |= VALIASED;
617 			vp->v_flag |= VALIASED;
618 			vput(vp);
619 		}
620 		return (NULLVP);
621 	}
622 
623 	/*
624 	 * This code is the uncommon case. It is called in case
625 	 * we found an alias that was VT_NON && vtype of VBLK
626 	 * This means we found a block device that was created
627 	 * using bdevvp.
628 	 * An example of such a vnode is the root partition device vnode
629 	 * created in ffs_mountroot.
630 	 *
631 	 * The vnodes created by bdevvp should not be aliased (why?).
632 	 */
633 
634 	simple_unlock(&spechash_slock);
635 	VOP_UNLOCK(vp, 0, p);
636 	simple_lock(&vp->v_interlock);
637 	vclean(vp, 0, p);
638 	vp->v_op = nvp->v_op;
639 	vp->v_tag = nvp->v_tag;
640 	nvp->v_type = VNON;
641 	insmntque(vp, mp);
642 	return (vp);
643 }
644 
645 /*
646  * Grab a particular vnode from the free list, increment its
647  * reference count and lock it. The vnode lock bit is set the
648  * vnode is being eliminated in vgone. The process is awakened
649  * when the transition is completed, and an error returned to
650  * indicate that the vnode is no longer usable (possibly having
651  * been changed to a new file system type).
652  */
653 int
654 vget(vp, flags, p)
655 	struct vnode *vp;
656 	int flags;
657 	struct proc *p;
658 {
659 	int error;
660 	int s;
661 	/*
662 	 * If the vnode is in the process of being cleaned out for
663 	 * another use, we wait for the cleaning to finish and then
664 	 * return failure. Cleaning is determined by checking that
665 	 * the VXLOCK flag is set.
666 	 */
667 	if ((flags & LK_INTERLOCK) == 0) {
668 		simple_lock(&vp->v_interlock);
669 		flags |= LK_INTERLOCK;
670 	}
671 	if (vp->v_flag & VXLOCK) {
672  		vp->v_flag |= VXWANT;
673 		simple_unlock(&vp->v_interlock);
674 		tsleep((caddr_t)vp, PINOD, "vget", 0);
675 		return (ENOENT);
676  	}
677 	if (vp->v_usecount == 0 &&
678 	    (vp->v_bioflag & VBIOONFREELIST)) {
679 		s = splbio();
680 		simple_lock(&vnode_free_list_slock);
681 		if (vp->v_holdcnt > 0)
682 			TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
683 		else
684 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
685 		simple_unlock(&vnode_free_list_slock);
686 		vp->v_bioflag &= ~VBIOONFREELIST;
687 		splx(s);
688 	}
689  	vp->v_usecount++;
690 	if (flags & LK_TYPE_MASK) {
691 		if ((error = vn_lock(vp, flags, p)) != 0) {
692 			vp->v_usecount--;
693 			if (vp->v_usecount == 0)
694 				vputonfreelist(vp);
695 
696 			simple_unlock(&vp->v_interlock);
697 		}
698 		return (error);
699 	}
700 	simple_unlock(&vp->v_interlock);
701 	return (0);
702 }
703 
704 
705 #ifdef DIAGNOSTIC
706 /*
707  * Vnode reference.
708  */
709 void
710 vref(vp)
711 	struct vnode *vp;
712 {
713 	simple_lock(&vp->v_interlock);
714 	if (vp->v_usecount == 0)
715 		panic("vref used where vget required");
716 	vp->v_usecount++;
717 	simple_unlock(&vp->v_interlock);
718 }
719 #endif /* DIAGNOSTIC */
720 
721 static __inline__ void
722 vputonfreelist(vp)
723 	struct vnode *vp;
724 {
725 	int s;
726 	struct freelst *lst;
727 
728 	s = splbio();
729 #ifdef DIAGNOSTIC
730 	if (vp->v_usecount != 0)
731 		panic("Use count is not zero!");
732 
733 	if (vp->v_bioflag & VBIOONFREELIST) {
734 		vprint("vnode already on free list: ", vp);
735 		panic("vnode already on free list");
736 	}
737 #endif
738 
739 	vp->v_bioflag |= VBIOONFREELIST;
740 
741 	if (vp->v_holdcnt > 0)
742 		lst = &vnode_hold_list;
743 	else
744 		lst = &vnode_free_list;
745 
746 	if (vp->v_type == VBAD)
747 		TAILQ_INSERT_HEAD(lst, vp, v_freelist);
748 	else
749 		TAILQ_INSERT_TAIL(lst, vp, v_freelist);
750 
751 	splx(s);
752 }
753 
754 /*
755  * vput(), just unlock and vrele()
756  */
757 void
758 vput(vp)
759 	register struct vnode *vp;
760 {
761 	struct proc *p = curproc;	/* XXX */
762 
763 #ifdef DIAGNOSTIC
764 	if (vp == NULL)
765 		panic("vput: null vp");
766 #endif
767 	simple_lock(&vp->v_interlock);
768 
769 #ifdef DIAGNOSTIC
770 	if (vp->v_usecount == 0) {
771 		vprint("vput: bad ref count", vp);
772 		panic("vput: ref cnt");
773 	}
774 #endif
775 	vp->v_usecount--;
776 	if (vp->v_usecount > 0) {
777 		simple_unlock(&vp->v_interlock);
778 		VOP_UNLOCK(vp, 0, p);
779 		return;
780 	}
781 
782 #ifdef DIAGNOSTIC
783 	if (vp->v_writecount != 0) {
784 		vprint("vput: bad writecount", vp);
785 		panic("vput: v_writecount != 0");
786 	}
787 #endif
788 	vputonfreelist(vp);
789 
790 	simple_unlock(&vp->v_interlock);
791 
792 	VOP_INACTIVE(vp, p);
793 }
794 
795 /*
796  * Vnode release - use for active VNODES.
797  * If count drops to zero, call inactive routine and return to freelist.
798  */
799 void
800 vrele(vp)
801 	register struct vnode *vp;
802 {
803 	struct proc *p = curproc;	/* XXX */
804 
805 #ifdef DIAGNOSTIC
806 	if (vp == NULL)
807 		panic("vrele: null vp");
808 #endif
809 	simple_lock(&vp->v_interlock);
810 #ifdef DIAGNOSTIC
811 	if (vp->v_usecount == 0) {
812 		vprint("vrele: bad ref count", vp);
813 		panic("vrele: ref cnt");
814 	}
815 #endif
816 	vp->v_usecount--;
817 	if (vp->v_usecount > 0) {
818 		simple_unlock(&vp->v_interlock);
819 		return;
820 	}
821 
822 #ifdef DIAGNOSTIC
823 	if (vp->v_writecount != 0) {
824 		vprint("vrele: bad writecount", vp);
825 		panic("vrele: v_writecount != 0");
826 	}
827 #endif
828 	vputonfreelist(vp);
829 
830 	if (vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p) == 0)
831 		VOP_INACTIVE(vp, p);
832 }
833 
834 void vhold __P((struct vnode *vp));
835 
836 /*
837  * Page or buffer structure gets a reference.
838  */
839 void
840 vhold(vp)
841 	register struct vnode *vp;
842 {
843 
844 	/*
845 	 * If it is on the freelist and the hold count is currently
846 	 * zero, move it to the hold list.
847 	 */
848   	simple_lock(&vp->v_interlock);
849 	if ((vp->v_bioflag & VBIOONFREELIST) &&
850 	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
851 		simple_lock(&vnode_free_list_slock);
852 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
853 		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
854 		simple_unlock(&vnode_free_list_slock);
855 	}
856 	vp->v_holdcnt++;
857 	simple_unlock(&vp->v_interlock);
858 }
859 
860 /*
861  * Remove any vnodes in the vnode table belonging to mount point mp.
862  *
863  * If MNT_NOFORCE is specified, there should not be any active ones,
864  * return error if any are found (nb: this is a user error, not a
865  * system error). If MNT_FORCE is specified, detach any active vnodes
866  * that are found.
867  */
868 #ifdef DEBUG
869 int busyprt = 0;	/* print out busy vnodes */
870 struct ctldebug debug1 = { "busyprt", &busyprt };
871 #endif
872 
873 int
874 vfs_mount_foreach_vnode(struct mount *mp,
875     int (*func)(struct vnode *, void *), void *arg) {
876 	struct vnode *vp, *nvp;
877 	int error = 0;
878 
879 	simple_lock(&mntvnode_slock);
880 loop:
881 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
882 		if (vp->v_mount != mp)
883 			goto loop;
884 		nvp = vp->v_mntvnodes.le_next;
885 		simple_lock(&vp->v_interlock);
886 		simple_unlock(&mntvnode_slock);
887 
888 		error = func(vp, arg);
889 
890 		simple_lock(&mntvnode_slock);
891 
892 		if (error != 0)
893 			break;
894 	}
895 	simple_unlock(&mntvnode_slock);
896 
897 	return (error);
898 }
899 
900 
901 struct vflush_args {
902 	struct vnode *skipvp;
903 	int busy;
904 	int flags;
905 };
906 
907 int
908 vflush_vnode(struct vnode *vp, void *arg) {
909 	struct vflush_args *va = arg;
910 	struct proc *p = curproc;
911 
912 	if (vp == va->skipvp) {
913 		simple_unlock(&vp->v_interlock);
914 		return (0);
915 	}
916 
917 	if ((va->flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
918 		simple_unlock(&vp->v_interlock);
919 		return (0);
920 	}
921 
922 	/*
923 	 * If WRITECLOSE is set, only flush out regular file
924 	 * vnodes open for writing.
925 	 */
926 	if ((va->flags & WRITECLOSE) &&
927 	    (vp->v_writecount == 0 || vp->v_type != VREG)) {
928 		simple_unlock(&vp->v_interlock);
929 		return (0);
930 	}
931 
932 	/*
933 	 * With v_usecount == 0, all we need to do is clear
934 	 * out the vnode data structures and we are done.
935 	 */
936 	if (vp->v_usecount == 0) {
937 		vgonel(vp, p);
938 		return (0);
939 	}
940 
941 	/*
942 	 * If FORCECLOSE is set, forcibly close the vnode.
943 	 * For block or character devices, revert to an
944 	 * anonymous device. For all other files, just kill them.
945 	 */
946 	if (va->flags & FORCECLOSE) {
947 		if (vp->v_type != VBLK && vp->v_type != VCHR) {
948 			vgonel(vp, p);
949 		} else {
950 			vclean(vp, 0, p);
951 			vp->v_op = spec_vnodeop_p;
952 			insmntque(vp, (struct mount *)0);
953 		}
954 		return (0);
955 	}
956 
957 #ifdef DEBUG
958 	if (busyprt)
959 		vprint("vflush: busy vnode", vp);
960 #endif
961 	simple_unlock(&vp->v_interlock);
962 	va->busy++;
963 	return (0);
964 }
965 
966 int
967 vflush(mp, skipvp, flags)
968 	struct mount *mp;
969 	struct vnode *skipvp;
970 	int flags;
971 {
972 	struct vflush_args va;
973 	va.skipvp = skipvp;
974 	va.busy = 0;
975 	va.flags = flags;
976 
977 	vfs_mount_foreach_vnode(mp, vflush_vnode, &va);
978 
979 	if (va.busy)
980 		return (EBUSY);
981 	return (0);
982 }
983 
984 /*
985  * Disassociate the underlying file system from a vnode.
986  * The vnode interlock is held on entry.
987  */
988 void
989 vclean(vp, flags, p)
990 	register struct vnode *vp;
991 	int flags;
992 	struct proc *p;
993 {
994 	int active;
995 
996 	/*
997 	 * Check to see if the vnode is in use.
998 	 * If so we have to reference it before we clean it out
999 	 * so that its count cannot fall to zero and generate a
1000 	 * race against ourselves to recycle it.
1001 	 */
1002 	if ((active = vp->v_usecount) != 0)
1003 		vp->v_usecount++;
1004 
1005 	/*
1006 	 * Prevent the vnode from being recycled or
1007 	 * brought into use while we clean it out.
1008 	 */
1009 	if (vp->v_flag & VXLOCK)
1010 		panic("vclean: deadlock");
1011 	vp->v_flag |= VXLOCK;
1012 	/*
1013 	 * Even if the count is zero, the VOP_INACTIVE routine may still
1014 	 * have the object locked while it cleans it out. The VOP_LOCK
1015 	 * ensures that the VOP_INACTIVE routine is done with its work.
1016 	 * For active vnodes, it ensures that no other activity can
1017 	 * occur while the underlying object is being cleaned out.
1018 	 */
1019 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1020 
1021 	/*
1022 	 * clean out any VM data associated with the vnode.
1023 	 */
1024 	uvm_vnp_terminate(vp);
1025 	/*
1026 	 * Clean out any buffers associated with the vnode.
1027 	 */
1028 	if (flags & DOCLOSE)
1029 		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1030 	/*
1031 	 * If purging an active vnode, it must be closed and
1032 	 * deactivated before being reclaimed. Note that the
1033 	 * VOP_INACTIVE will unlock the vnode
1034 	 */
1035 	if (active) {
1036 		if (flags & DOCLOSE)
1037 			VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
1038 		VOP_INACTIVE(vp, p);
1039 	} else {
1040 		/*
1041 		 * Any other processes trying to obtain this lock must first
1042 		 * wait for VXLOCK to clear, then call the new lock operation.
1043 		 */
1044 		VOP_UNLOCK(vp, 0, p);
1045 	}
1046 
1047 	/*
1048 	 * Reclaim the vnode.
1049 	 */
1050 	if (VOP_RECLAIM(vp, p))
1051 		panic("vclean: cannot reclaim");
1052 	if (active) {
1053 		simple_lock(&vp->v_interlock);
1054 
1055 		vp->v_usecount--;
1056 		if (vp->v_usecount == 0) {
1057 			if (vp->v_holdcnt > 0)
1058 				panic("vclean: not clean");
1059 			vputonfreelist(vp);
1060 		}
1061 
1062 		simple_unlock(&vp->v_interlock);
1063 	}
1064 	cache_purge(vp);
1065 	if (vp->v_vnlock) {
1066 		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1067 			vprint("vclean: lock not drained", vp);
1068 		FREE(vp->v_vnlock, M_VNODE);
1069 		vp->v_vnlock = NULL;
1070 	}
1071 
1072 	/*
1073 	 * Done with purge, notify sleepers of the grim news.
1074 	 */
1075 	vp->v_op = dead_vnodeop_p;
1076 	simple_lock(&vp->v_selectinfo.vsi_lock);
1077 	VN_KNOTE(vp, NOTE_REVOKE);
1078 	simple_unlock(&vp->v_selectinfo.vsi_lock);
1079 	vp->v_tag = VT_NON;
1080 	vp->v_flag &= ~VXLOCK;
1081 #ifdef DIAGNOSTIC
1082 	vp->v_flag &= ~VLOCKSWORK;
1083 #endif
1084 	if (vp->v_flag & VXWANT) {
1085 		vp->v_flag &= ~VXWANT;
1086 		wakeup((caddr_t)vp);
1087 	}
1088 }
1089 
1090 
1091 
1092 /*
1093  * Recycle an unused vnode to the front of the free list.
1094  * Release the passed interlock if the vnode will be recycled.
1095  */
1096 int
1097 vrecycle(vp, inter_lkp, p)
1098 	struct vnode *vp;
1099 	struct simplelock *inter_lkp;
1100 	struct proc *p;
1101 {
1102 
1103 	simple_lock(&vp->v_interlock);
1104 	if (vp->v_usecount == 0) {
1105 		if (inter_lkp)
1106 			simple_unlock(inter_lkp);
1107 		vgonel(vp, p);
1108 		return (1);
1109 	}
1110 	simple_unlock(&vp->v_interlock);
1111 	return (0);
1112 }
1113 
1114 
1115 /*
1116  * Eliminate all activity associated with a vnode
1117  * in preparation for reuse.
1118  */
1119 void
1120 vgone(vp)
1121 	register struct vnode *vp;
1122 {
1123 	struct proc *p = curproc;
1124 
1125 	simple_lock (&vp->v_interlock);
1126 	vgonel(vp, p);
1127 }
1128 
1129 /*
1130  * vgone, with the vp interlock held.
1131  */
1132 void
1133 vgonel(vp, p)
1134 	struct vnode *vp;
1135 	struct proc *p;
1136 {
1137 	register struct vnode *vq;
1138 	struct vnode *vx;
1139 
1140 	/*
1141 	 * If a vgone (or vclean) is already in progress,
1142 	 * wait until it is done and return.
1143 	 */
1144 	if (vp->v_flag & VXLOCK) {
1145 		vp->v_flag |= VXWANT;
1146 		simple_unlock(&vp->v_interlock);
1147 		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1148 		return;
1149 	}
1150 	/*
1151 	 * Clean out the filesystem specific data.
1152 	 */
1153 	vclean(vp, DOCLOSE, p);
1154 	/*
1155 	 * Delete from old mount point vnode list, if on one.
1156 	 */
1157 	if (vp->v_mount != NULL)
1158 		insmntque(vp, (struct mount *)0);
1159 	/*
1160 	 * If special device, remove it from special device alias list
1161 	 * if it is on one.
1162 	 */
1163 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1164 		simple_lock(&spechash_slock);
1165 		if (*vp->v_hashchain == vp) {
1166 			*vp->v_hashchain = vp->v_specnext;
1167 		} else {
1168 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1169 				if (vq->v_specnext != vp)
1170 					continue;
1171 				vq->v_specnext = vp->v_specnext;
1172 				break;
1173 			}
1174 			if (vq == NULL)
1175 				panic("missing bdev");
1176 		}
1177 		if (vp->v_flag & VALIASED) {
1178 			vx = NULL;
1179 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1180 				if (vq->v_rdev != vp->v_rdev ||
1181 				    vq->v_type != vp->v_type)
1182 					continue;
1183 				if (vx)
1184 					break;
1185 				vx = vq;
1186 			}
1187 			if (vx == NULL)
1188 				panic("missing alias");
1189 			if (vq == NULL)
1190 				vx->v_flag &= ~VALIASED;
1191 			vp->v_flag &= ~VALIASED;
1192 		}
1193 		simple_unlock(&spechash_slock);
1194 		FREE(vp->v_specinfo, M_VNODE);
1195 		vp->v_specinfo = NULL;
1196 	}
1197 	/*
1198 	 * If it is on the freelist and not already at the head,
1199 	 * move it to the head of the list.
1200 	 */
1201 	vp->v_type = VBAD;
1202 
1203 	/*
1204 	 * Move onto the free list, unless we were called from
1205 	 * getnewvnode and we're not on any free list
1206 	 */
1207 	if (vp->v_usecount == 0 &&
1208 	    (vp->v_bioflag & VBIOONFREELIST)) {
1209 		int s;
1210 
1211 		simple_lock(&vnode_free_list_slock);
1212 		s = splbio();
1213 
1214 		if (vp->v_holdcnt > 0)
1215 			panic("vgonel: not clean");
1216 
1217 		if (TAILQ_FIRST(&vnode_free_list) != vp) {
1218 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1219 			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1220 		}
1221 		splx(s);
1222 		simple_unlock(&vnode_free_list_slock);
1223 	}
1224 }
1225 
1226 /*
1227  * Lookup a vnode by device number.
1228  */
1229 int
1230 vfinddev(dev, type, vpp)
1231 	dev_t dev;
1232 	enum vtype type;
1233 	struct vnode **vpp;
1234 {
1235 	register struct vnode *vp;
1236 	int rc =0;
1237 
1238 	simple_lock(&spechash_slock);
1239 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1240 		if (dev != vp->v_rdev || type != vp->v_type)
1241 			continue;
1242 		*vpp = vp;
1243 		rc = 1;
1244 		break;
1245 	}
1246 	simple_unlock(&spechash_slock);
1247 	return (rc);
1248 }
1249 
1250 /*
1251  * Revoke all the vnodes corresponding to the specified minor number
1252  * range (endpoints inclusive) of the specified major.
1253  */
1254 void
1255 vdevgone(maj, minl, minh, type)
1256 	int maj, minl, minh;
1257 	enum vtype type;
1258 {
1259 	struct vnode *vp;
1260 	int mn;
1261 
1262 	for (mn = minl; mn <= minh; mn++)
1263 		if (vfinddev(makedev(maj, mn), type, &vp))
1264 			VOP_REVOKE(vp, REVOKEALL);
1265 }
1266 
1267 /*
1268  * Calculate the total number of references to a special device.
1269  */
1270 int
1271 vcount(vp)
1272 	struct vnode *vp;
1273 {
1274 	struct vnode *vq, *vnext;
1275 	int count;
1276 
1277 loop:
1278 	if ((vp->v_flag & VALIASED) == 0)
1279 		return (vp->v_usecount);
1280 	simple_lock(&spechash_slock);
1281 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1282 		vnext = vq->v_specnext;
1283 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1284 			continue;
1285 		/*
1286 		 * Alias, but not in use, so flush it out.
1287 		 */
1288 		if (vq->v_usecount == 0 && vq != vp) {
1289 			simple_unlock(&spechash_slock);
1290 			vgone(vq);
1291 			goto loop;
1292 		}
1293 		count += vq->v_usecount;
1294 	}
1295 	simple_unlock(&spechash_slock);
1296 	return (count);
1297 }
1298 
1299 /*
1300  * Print out a description of a vnode.
1301  */
1302 static char *typename[] =
1303    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1304 
1305 void
1306 vprint(label, vp)
1307 	char *label;
1308 	register struct vnode *vp;
1309 {
1310 	char buf[64];
1311 
1312 	if (label != NULL)
1313 		printf("%s: ", label);
1314 	printf("type %s, usecount %u, writecount %u, holdcount %u,",
1315 		typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1316 		vp->v_holdcnt);
1317 	buf[0] = '\0';
1318 	if (vp->v_flag & VROOT)
1319 		strcat(buf, "|VROOT");
1320 	if (vp->v_flag & VTEXT)
1321 		strcat(buf, "|VTEXT");
1322 	if (vp->v_flag & VSYSTEM)
1323 		strcat(buf, "|VSYSTEM");
1324 	if (vp->v_flag & VXLOCK)
1325 		strcat(buf, "|VXLOCK");
1326 	if (vp->v_flag & VXWANT)
1327 		strcat(buf, "|VXWANT");
1328 	if (vp->v_bioflag & VBIOWAIT)
1329 		strcat(buf, "| VBIOWAIT");
1330 	if (vp->v_flag & VALIASED)
1331 		strcat(buf, "|VALIASED");
1332 	if (buf[0] != '\0')
1333 		printf(" flags (%s)", &buf[1]);
1334 	if (vp->v_data == NULL) {
1335 		printf("\n");
1336 	} else {
1337 		printf("\n\t");
1338 		VOP_PRINT(vp);
1339 	}
1340 }
1341 
1342 #ifdef DEBUG
1343 /*
1344  * List all of the locked vnodes in the system.
1345  * Called when debugging the kernel.
1346  */
1347 void
1348 printlockedvnodes()
1349 {
1350 	struct proc *p = curproc;
1351 	register struct mount *mp, *nmp;
1352 	register struct vnode *vp;
1353 
1354 	printf("Locked vnodes\n");
1355 	simple_lock(&mountlist_slock);
1356 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1357 	    mp = nmp) {
1358 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1359 			nmp = CIRCLEQ_NEXT(mp, mnt_list);
1360 			continue;
1361 		}
1362 		for (vp = mp->mnt_vnodelist.lh_first; vp;
1363 		    vp = vp->v_mntvnodes.le_next) {
1364 			if (VOP_ISLOCKED(vp))
1365 				vprint((char *)0, vp);
1366 		}
1367 		simple_lock(&mountlist_slock);
1368 		nmp = CIRCLEQ_NEXT(mp, mnt_list);
1369 		vfs_unbusy(mp, p);
1370  	}
1371 	simple_unlock(&mountlist_slock);
1372 
1373 }
1374 #endif
1375 
1376 /*
1377  * Top level filesystem related information gathering.
1378  */
1379 int
1380 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1381 	int *name;
1382 	u_int namelen;
1383 	void *oldp;
1384 	size_t *oldlenp;
1385 	void *newp;
1386 	size_t newlen;
1387 	struct proc *p;
1388 {
1389 	struct vfsconf *vfsp;
1390 
1391 	/* all sysctl names at this level are at least name and field */
1392 	if (namelen < 2)
1393 		return (ENOTDIR);		/* overloaded */
1394 	if (name[0] != VFS_GENERIC) {
1395 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1396 			if (vfsp->vfc_typenum == name[0])
1397 				break;
1398 		if (vfsp == NULL)
1399 			return (EOPNOTSUPP);
1400 		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1401 		    oldp, oldlenp, newp, newlen, p));
1402 	}
1403 	switch (name[1]) {
1404 	case VFS_MAXTYPENUM:
1405 		return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1406 	case VFS_CONF:
1407 		if (namelen < 3)
1408 			return (ENOTDIR);	/* overloaded */
1409 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1410 			if (vfsp->vfc_typenum == name[2])
1411 				break;
1412 		if (vfsp == NULL)
1413 			return (EOPNOTSUPP);
1414 		return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
1415 		    sizeof(struct vfsconf)));
1416 	}
1417 	return (EOPNOTSUPP);
1418 }
1419 
1420 
1421 int kinfo_vdebug = 1;
1422 int kinfo_vgetfailed;
1423 #define KINFO_VNODESLOP	10
1424 /*
1425  * Dump vnode list (via sysctl).
1426  * Copyout address of vnode followed by vnode.
1427  */
1428 /* ARGSUSED */
1429 int
1430 sysctl_vnode(where, sizep, p)
1431 	char *where;
1432 	size_t *sizep;
1433 	struct proc *p;
1434 {
1435 	register struct mount *mp, *nmp;
1436 	struct vnode *vp, *nvp;
1437 	register char *bp = where, *savebp;
1438 	char *ewhere;
1439 	int error;
1440 
1441 #define VPTRSZ	sizeof (struct vnode *)
1442 #define VNODESZ	sizeof (struct vnode)
1443 	if (where == NULL) {
1444 		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1445 		return (0);
1446 	}
1447 	ewhere = where + *sizep;
1448 
1449 	simple_lock(&mountlist_slock);
1450 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1451 	    mp = nmp) {
1452 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1453 			nmp = CIRCLEQ_NEXT(mp, mnt_list);
1454 			continue;
1455 		}
1456 		savebp = bp;
1457 again:
1458 		for (vp = mp->mnt_vnodelist.lh_first; vp != NULL;
1459 		    vp = nvp) {
1460 			/*
1461 			 * Check that the vp is still associated with
1462 			 * this filesystem.  RACE: could have been
1463 			 * recycled onto the same filesystem.
1464 			 */
1465 			if (vp->v_mount != mp) {
1466 				simple_unlock(&mntvnode_slock);
1467 				if (kinfo_vdebug)
1468 					printf("kinfo: vp changed\n");
1469 				bp = savebp;
1470 				goto again;
1471 			}
1472 			nvp = vp->v_mntvnodes.le_next;
1473 			if (bp + VPTRSZ + VNODESZ > ewhere) {
1474 				simple_unlock(&mntvnode_slock);
1475 				*sizep = bp - where;
1476 				return (ENOMEM);
1477 			}
1478 			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1479 			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
1480 				return (error);
1481 			bp += VPTRSZ + VNODESZ;
1482 			simple_lock(&mntvnode_slock);
1483 		}
1484 
1485 		simple_unlock(&mntvnode_slock);
1486 		simple_lock(&mountlist_slock);
1487 		nmp = CIRCLEQ_NEXT(mp, mnt_list);
1488 		vfs_unbusy(mp, p);
1489 	}
1490 
1491 	simple_unlock(&mountlist_slock);
1492 
1493 	*sizep = bp - where;
1494 	return (0);
1495 }
1496 
1497 /*
1498  * Check to see if a filesystem is mounted on a block device.
1499  */
1500 int
1501 vfs_mountedon(vp)
1502 	register struct vnode *vp;
1503 {
1504 	register struct vnode *vq;
1505 	int error = 0;
1506 
1507  	if (vp->v_specmountpoint != NULL)
1508 		return (EBUSY);
1509 	if (vp->v_flag & VALIASED) {
1510 		simple_lock(&spechash_slock);
1511 		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1512 			if (vq->v_rdev != vp->v_rdev ||
1513 			    vq->v_type != vp->v_type)
1514 				continue;
1515 			if (vq->v_specmountpoint != NULL) {
1516 				error = EBUSY;
1517 				break;
1518 			}
1519  		}
1520 		simple_unlock(&spechash_slock);
1521 	}
1522 	return (error);
1523 }
1524 
1525 /*
1526  * Build hash lists of net addresses and hang them off the mount point.
1527  * Called by ufs_mount() to set up the lists of export addresses.
1528  */
1529 int
1530 vfs_hang_addrlist(mp, nep, argp)
1531 	struct mount *mp;
1532 	struct netexport *nep;
1533 	struct export_args *argp;
1534 {
1535 	register struct netcred *np;
1536 	register struct radix_node_head *rnh;
1537 	register int i;
1538 	struct radix_node *rn;
1539 	struct sockaddr *saddr, *smask = 0;
1540 	struct domain *dom;
1541 	int error;
1542 
1543 	if (argp->ex_addrlen == 0) {
1544 		if (mp->mnt_flag & MNT_DEFEXPORTED)
1545 			return (EPERM);
1546 		np = &nep->ne_defexported;
1547 		np->netc_exflags = argp->ex_flags;
1548 		np->netc_anon = argp->ex_anon;
1549 		np->netc_anon.cr_ref = 1;
1550 		mp->mnt_flag |= MNT_DEFEXPORTED;
1551 		return (0);
1552 	}
1553 	if (argp->ex_addrlen > MLEN)
1554 		return (EINVAL);
1555 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1556 	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1557 	bzero((caddr_t)np, i);
1558 	saddr = (struct sockaddr *)(np + 1);
1559 	error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
1560 	if (error)
1561 		goto out;
1562 	if (saddr->sa_len > argp->ex_addrlen)
1563 		saddr->sa_len = argp->ex_addrlen;
1564 	if (argp->ex_masklen) {
1565 		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1566 		error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
1567 		if (error)
1568 			goto out;
1569 		if (smask->sa_len > argp->ex_masklen)
1570 			smask->sa_len = argp->ex_masklen;
1571 	}
1572 	i = saddr->sa_family;
1573 	if (i < 0 || i > AF_MAX) {
1574 		error = EINVAL;
1575 		goto out;
1576 	}
1577 	if ((rnh = nep->ne_rtable[i]) == 0) {
1578 		/*
1579 		 * Seems silly to initialize every AF when most are not
1580 		 * used, do so on demand here
1581 		 */
1582 		for (dom = domains; dom; dom = dom->dom_next)
1583 			if (dom->dom_family == i && dom->dom_rtattach) {
1584 				dom->dom_rtattach((void **)&nep->ne_rtable[i],
1585 					dom->dom_rtoffset);
1586 				break;
1587 			}
1588 		if ((rnh = nep->ne_rtable[i]) == 0) {
1589 			error = ENOBUFS;
1590 			goto out;
1591 		}
1592 	}
1593 	rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1594 		np->netc_rnodes);
1595 	if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
1596 		error = EPERM;
1597 		goto out;
1598 	}
1599 	np->netc_exflags = argp->ex_flags;
1600 	np->netc_anon = argp->ex_anon;
1601 	np->netc_anon.cr_ref = 1;
1602 	return (0);
1603 out:
1604 	free(np, M_NETADDR);
1605 	return (error);
1606 }
1607 
1608 /* ARGSUSED */
1609 int
1610 vfs_free_netcred(rn, w)
1611 	struct radix_node *rn;
1612 	void *w;
1613 {
1614 	register struct radix_node_head *rnh = (struct radix_node_head *)w;
1615 
1616 	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
1617 	free((caddr_t)rn, M_NETADDR);
1618 	return (0);
1619 }
1620 
1621 /*
1622  * Free the net address hash lists that are hanging off the mount points.
1623  */
1624 void
1625 vfs_free_addrlist(nep)
1626 	struct netexport *nep;
1627 {
1628 	register int i;
1629 	register struct radix_node_head *rnh;
1630 
1631 	for (i = 0; i <= AF_MAX; i++)
1632 		if ((rnh = nep->ne_rtable[i]) != NULL) {
1633 			(*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
1634 			free((caddr_t)rnh, M_RTABLE);
1635 			nep->ne_rtable[i] = 0;
1636 		}
1637 }
1638 
1639 int
1640 vfs_export(mp, nep, argp)
1641 	struct mount *mp;
1642 	struct netexport *nep;
1643 	struct export_args *argp;
1644 {
1645 	int error;
1646 
1647 	if (argp->ex_flags & MNT_DELEXPORT) {
1648 		vfs_free_addrlist(nep);
1649 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1650 	}
1651 	if (argp->ex_flags & MNT_EXPORTED) {
1652 		if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
1653 			return (error);
1654 		mp->mnt_flag |= MNT_EXPORTED;
1655 	}
1656 	return (0);
1657 }
1658 
1659 struct netcred *
1660 vfs_export_lookup(mp, nep, nam)
1661 	register struct mount *mp;
1662 	struct netexport *nep;
1663 	struct mbuf *nam;
1664 {
1665 	register struct netcred *np;
1666 	register struct radix_node_head *rnh;
1667 	struct sockaddr *saddr;
1668 
1669 	np = NULL;
1670 	if (mp->mnt_flag & MNT_EXPORTED) {
1671 		/*
1672 		 * Lookup in the export list first.
1673 		 */
1674 		if (nam != NULL) {
1675 			saddr = mtod(nam, struct sockaddr *);
1676 			rnh = nep->ne_rtable[saddr->sa_family];
1677 			if (rnh != NULL) {
1678 				np = (struct netcred *)
1679 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
1680 					    rnh);
1681 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1682 					np = NULL;
1683 			}
1684 		}
1685 		/*
1686 		 * If no address match, use the default if it exists.
1687 		 */
1688 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1689 			np = &nep->ne_defexported;
1690 	}
1691 	return (np);
1692 }
1693 
1694 /*
1695  * Do the usual access checking.
1696  * file_mode, uid and gid are from the vnode in question,
1697  * while acc_mode and cred are from the VOP_ACCESS parameter list
1698  */
1699 int
1700 vaccess(file_mode, uid, gid, acc_mode, cred)
1701 	mode_t file_mode;
1702 	uid_t uid;
1703 	gid_t gid;
1704 	mode_t acc_mode;
1705 	struct ucred *cred;
1706 {
1707 	mode_t mask;
1708 
1709 	/* User id 0 always gets access. */
1710 	if (cred->cr_uid == 0)
1711 		return 0;
1712 
1713 	mask = 0;
1714 
1715 	/* Otherwise, check the owner. */
1716 	if (cred->cr_uid == uid) {
1717 		if (acc_mode & VEXEC)
1718 			mask |= S_IXUSR;
1719 		if (acc_mode & VREAD)
1720 			mask |= S_IRUSR;
1721 		if (acc_mode & VWRITE)
1722 			mask |= S_IWUSR;
1723 		return (file_mode & mask) == mask ? 0 : EACCES;
1724 	}
1725 
1726 	/* Otherwise, check the groups. */
1727 	if (cred->cr_gid == gid || groupmember(gid, cred)) {
1728 		if (acc_mode & VEXEC)
1729 			mask |= S_IXGRP;
1730 		if (acc_mode & VREAD)
1731 			mask |= S_IRGRP;
1732 		if (acc_mode & VWRITE)
1733 			mask |= S_IWGRP;
1734 		return (file_mode & mask) == mask ? 0 : EACCES;
1735 	}
1736 
1737 	/* Otherwise, check everyone else. */
1738 	if (acc_mode & VEXEC)
1739 		mask |= S_IXOTH;
1740 	if (acc_mode & VREAD)
1741 		mask |= S_IROTH;
1742 	if (acc_mode & VWRITE)
1743 		mask |= S_IWOTH;
1744 	return (file_mode & mask) == mask ? 0 : EACCES;
1745 }
1746 
1747 /*
1748  * Unmount all file systems.
1749  * We traverse the list in reverse order under the assumption that doing so
1750  * will avoid needing to worry about dependencies.
1751  */
1752 void
1753 vfs_unmountall()
1754 {
1755 	register struct mount *mp, *nmp;
1756 	int allerror, error, again = 1;
1757 
1758  retry:
1759 	allerror = 0;
1760 	for (mp = CIRCLEQ_LAST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1761 	    mp = nmp) {
1762 		nmp = CIRCLEQ_PREV(mp, mnt_list);
1763 		if ((error = dounmount(mp, MNT_FORCE, curproc)) != 0) {
1764 			printf("unmount of %s failed with error %d\n",
1765 			    mp->mnt_stat.f_mntonname, error);
1766 			allerror = 1;
1767 		}
1768 	}
1769 
1770 	if (allerror) {
1771 		printf("WARNING: some file systems would not unmount\n");
1772 		if (again) {
1773 			printf("retrying\n");
1774 			again = 0;
1775 			goto retry;
1776 		}
1777 	}
1778 }
1779 
1780 /*
1781  * Sync and unmount file systems before shutting down.
1782  */
1783 void
1784 vfs_shutdown()
1785 {
1786 	/* XXX Should suspend scheduling. */
1787 	(void) spl0();
1788 
1789 	printf("syncing disks... ");
1790 
1791 	if (panicstr == 0) {
1792 		/* Sync before unmount, in case we hang on something. */
1793 		sys_sync(&proc0, (void *)0, (register_t *)0);
1794 
1795 		/* Unmount file systems. */
1796 		vfs_unmountall();
1797 	}
1798 
1799 	if (vfs_syncwait(1))
1800 		printf("giving up\n");
1801 	else
1802 		printf("done\n");
1803 }
1804 
1805 /*
1806  * perform sync() operation and wait for buffers to flush.
1807  * assumtions: called w/ scheduler disabled and physical io enabled
1808  * for now called at spl0() XXX
1809  */
1810 int
1811 vfs_syncwait(verbose)
1812 	int verbose;
1813 {
1814 	register struct buf *bp;
1815 	int iter, nbusy, dcount, s;
1816 	struct proc *p;
1817 
1818 	p = curproc? curproc : &proc0;
1819 	sys_sync(p, (void *)0, (register_t *)0);
1820 
1821 	/* Wait for sync to finish. */
1822 	dcount = 10000;
1823 	for (iter = 0; iter < 20; iter++) {
1824 		nbusy = 0;
1825 		for (bp = &buf[nbuf]; --bp >= buf; ) {
1826 			if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
1827 				nbusy++;
1828 			/*
1829 			 * With soft updates, some buffers that are
1830 			 * written will be remarked as dirty until other
1831 			 * buffers are written.
1832 			 */
1833 			if (bp->b_flags & B_DELWRI) {
1834 				s = splbio();
1835 				bremfree(bp);
1836 				bp->b_flags |= B_BUSY;
1837 				splx(s);
1838 				nbusy++;
1839 				bawrite(bp);
1840 				if (dcount-- <= 0) {
1841 					if (verbose)
1842 						printf("softdep ");
1843 					return 1;
1844 				}
1845 			}
1846 		}
1847 		if (nbusy == 0)
1848 			break;
1849 		if (verbose)
1850 			printf("%d ", nbusy);
1851 		DELAY(40000 * iter);
1852 	}
1853 
1854 	return nbusy;
1855 }
1856 
1857 /*
1858  * posix file system related system variables.
1859  */
1860 int
1861 fs_posix_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1862 	int *name;
1863 	u_int namelen;
1864 	void *oldp;
1865 	size_t *oldlenp;
1866 	void *newp;
1867 	size_t newlen;
1868 	struct proc *p;
1869 {
1870 	/* all sysctl names at this level are terminal */
1871 	if (namelen != 1)
1872 		return (ENOTDIR);
1873 
1874 	switch (name[0]) {
1875 	case FS_POSIX_SETUID:
1876 		if (newp && securelevel > 0)
1877 			return (EPERM);
1878 		return(sysctl_int(oldp, oldlenp, newp, newlen, &suid_clear));
1879 	default:
1880 		return (EOPNOTSUPP);
1881 	}
1882 	/* NOTREACHED */
1883 }
1884 
1885 /*
1886  * file system related system variables.
1887  */
1888 int
1889 fs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1890 	int *name;
1891 	u_int namelen;
1892 	void *oldp;
1893 	size_t *oldlenp;
1894 	void *newp;
1895 	size_t newlen;
1896 	struct proc *p;
1897 {
1898 	sysctlfn *fn;
1899 
1900 	switch (name[0]) {
1901 	case FS_POSIX:
1902 		fn = fs_posix_sysctl;
1903 		break;
1904 	default:
1905 		return (EOPNOTSUPP);
1906 	}
1907 	return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p);
1908 }
1909 
1910 
1911 /*
1912  * Routines dealing with vnodes and buffers
1913  */
1914 
1915 /*
1916  * Wait for all outstanding I/Os to complete
1917  *
1918  * Manipulates v_numoutput. Must be called at splbio()
1919  */
1920 int
1921 vwaitforio(vp, slpflag, wmesg, timeo)
1922 	struct vnode *vp;
1923 	int slpflag, timeo;
1924 	char *wmesg;
1925 {
1926 	int error = 0;
1927 
1928 	while (vp->v_numoutput) {
1929 		vp->v_bioflag |= VBIOWAIT;
1930 		error = tsleep((caddr_t)&vp->v_numoutput,
1931 		    slpflag | (PRIBIO + 1), wmesg, timeo);
1932 		if (error)
1933 			break;
1934 	}
1935 
1936 	return (error);
1937 }
1938 
1939 
1940 /*
1941  * Update outstanding I/O count and do wakeup if requested.
1942  *
1943  * Manipulates v_numoutput. Must be called at splbio()
1944  */
1945 void
1946 vwakeup(vp)
1947 	struct vnode *vp;
1948 {
1949 	if (vp != NULL) {
1950 		if (vp->v_numoutput-- == 0)
1951 			panic("vwakeup: neg numoutput");
1952 		if ((vp->v_bioflag & VBIOWAIT) && vp->v_numoutput == 0) {
1953 			vp->v_bioflag &= ~VBIOWAIT;
1954 			wakeup((caddr_t)&vp->v_numoutput);
1955 		}
1956 	}
1957 }
1958 
1959 /*
1960  * Flush out and invalidate all buffers associated with a vnode.
1961  * Called with the underlying object locked.
1962  */
1963 int
1964 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
1965 	register struct vnode *vp;
1966 	int flags;
1967 	struct ucred *cred;
1968 	struct proc *p;
1969 	int slpflag, slptimeo;
1970 {
1971 	register struct buf *bp;
1972 	struct buf *nbp, *blist;
1973 	int s, error;
1974 
1975 	if (flags & V_SAVE) {
1976 		s = splbio();
1977 		vwaitforio(vp, 0, "vinvalbuf", 0);
1978 		if (vp->v_dirtyblkhd.lh_first != NULL) {
1979 			splx(s);
1980 			if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
1981 				return (error);
1982 			s = splbio();
1983 			if (vp->v_numoutput > 0 ||
1984 			    vp->v_dirtyblkhd.lh_first != NULL)
1985 				panic("vinvalbuf: dirty bufs");
1986 		}
1987 		splx(s);
1988 	}
1989 loop:
1990 	s = splbio();
1991 	for (;;) {
1992 		if ((blist = vp->v_cleanblkhd.lh_first) &&
1993 		    (flags & V_SAVEMETA))
1994 			while (blist && blist->b_lblkno < 0)
1995 				blist = blist->b_vnbufs.le_next;
1996 		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
1997 		    (flags & V_SAVEMETA))
1998 			while (blist && blist->b_lblkno < 0)
1999 				blist = blist->b_vnbufs.le_next;
2000 		if (!blist)
2001 			break;
2002 
2003 		for (bp = blist; bp; bp = nbp) {
2004 			nbp = bp->b_vnbufs.le_next;
2005 			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
2006 				continue;
2007 			if (bp->b_flags & B_BUSY) {
2008 				bp->b_flags |= B_WANTED;
2009 				error = tsleep((caddr_t)bp,
2010 					slpflag | (PRIBIO + 1), "vinvalbuf",
2011 					slptimeo);
2012 				if (error) {
2013 					splx(s);
2014 					return (error);
2015 				}
2016 				break;
2017 			}
2018 			bremfree(bp);
2019 			bp->b_flags |= B_BUSY;
2020 			/*
2021 			 * XXX Since there are no node locks for NFS, I believe
2022 			 * there is a slight chance that a delayed write will
2023 			 * occur while sleeping just above, so check for it.
2024 			 */
2025 			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
2026 				splx(s);
2027 				(void) VOP_BWRITE(bp);
2028 				goto loop;
2029 			}
2030 			bp->b_flags |= B_INVAL;
2031 			brelse(bp);
2032 		}
2033 	}
2034 	if (!(flags & V_SAVEMETA) &&
2035 	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
2036 		panic("vinvalbuf: flush failed");
2037 	splx(s);
2038 	return (0);
2039 }
2040 
2041 void
2042 vflushbuf(vp, sync)
2043 	register struct vnode *vp;
2044 	int sync;
2045 {
2046 	register struct buf *bp, *nbp;
2047 	int s;
2048 
2049 loop:
2050 	s = splbio();
2051 	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
2052 		nbp = bp->b_vnbufs.le_next;
2053 		if ((bp->b_flags & B_BUSY))
2054 			continue;
2055 		if ((bp->b_flags & B_DELWRI) == 0)
2056 			panic("vflushbuf: not dirty");
2057 		bremfree(bp);
2058 		bp->b_flags |= B_BUSY;
2059 		splx(s);
2060 		/*
2061 		 * Wait for I/O associated with indirect blocks to complete,
2062 		 * since there is no way to quickly wait for them below.
2063 		 */
2064 		if (bp->b_vp == vp || sync == 0)
2065 			(void) bawrite(bp);
2066 		else
2067 			(void) bwrite(bp);
2068 		goto loop;
2069 	}
2070 	if (sync == 0) {
2071 		splx(s);
2072 		return;
2073 	}
2074 	vwaitforio(vp, 0, "vflushbuf", 0);
2075 	if (vp->v_dirtyblkhd.lh_first != NULL) {
2076 		splx(s);
2077 		vprint("vflushbuf: dirty", vp);
2078 		goto loop;
2079 	}
2080 	splx(s);
2081 }
2082 
2083 /*
2084  * Associate a buffer with a vnode.
2085  *
2086  * Manipulates buffer vnode queues. Must be called at splbio().
2087  */
2088 void
2089 bgetvp(vp, bp)
2090 	register struct vnode *vp;
2091 	register struct buf *bp;
2092 {
2093 
2094 	if (bp->b_vp)
2095 		panic("bgetvp: not free");
2096 	vhold(vp);
2097 	bp->b_vp = vp;
2098 	if (vp->v_type == VBLK || vp->v_type == VCHR)
2099 		bp->b_dev = vp->v_rdev;
2100 	else
2101 		bp->b_dev = NODEV;
2102 	/*
2103 	 * Insert onto list for new vnode.
2104 	 */
2105 	bufinsvn(bp, &vp->v_cleanblkhd);
2106 }
2107 
2108 /*
2109  * Disassociate a buffer from a vnode.
2110  *
2111  * Manipulates vnode buffer queues. Must be called at splbio().
2112  */
2113 void
2114 brelvp(bp)
2115 	register struct buf *bp;
2116 {
2117 	struct vnode *vp;
2118 
2119 	if ((vp = bp->b_vp) == (struct vnode *) 0)
2120 		panic("brelvp: NULL");
2121 	/*
2122 	 * Delete from old vnode list, if on one.
2123 	 */
2124 	if (bp->b_vnbufs.le_next != NOLIST)
2125 		bufremvn(bp);
2126 	if ((vp->v_bioflag & VBIOONSYNCLIST) &&
2127 	    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2128 		vp->v_bioflag &= ~VBIOONSYNCLIST;
2129 		LIST_REMOVE(vp, v_synclist);
2130 	}
2131 	bp->b_vp = (struct vnode *) 0;
2132 
2133 	simple_lock(&vp->v_interlock);
2134 #ifdef DIAGNOSTIC
2135 	if (vp->v_holdcnt == 0)
2136 		panic("brelvp: holdcnt");
2137 #endif
2138 	vp->v_holdcnt--;
2139 
2140 	/*
2141 	 * If it is on the holdlist and the hold count drops to
2142 	 * zero, move it to the free list.
2143 	 */
2144 	if ((vp->v_bioflag & VBIOONFREELIST) &&
2145 	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
2146 		simple_lock(&vnode_free_list_slock);
2147 		TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
2148 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
2149 		simple_unlock(&vnode_free_list_slock);
2150 	}
2151 	simple_unlock(&vp->v_interlock);
2152 }
2153 
2154 /*
2155  * Replaces the current vnode associated with the buffer, if any
2156  * with a new vnode.
2157  *
2158  * If an output I/O is pending on the buffer, the old vnode is
2159  * I/O count is adjusted.
2160  *
2161  * Ignores vnode buffer queues. Must be called at splbio().
2162  */
2163 void
2164 buf_replacevnode(bp, newvp)
2165 	struct buf *bp;
2166 	struct vnode *newvp;
2167 {
2168 	struct vnode *oldvp = bp->b_vp;
2169 
2170 	if (oldvp)
2171 		brelvp(bp);
2172 
2173 	if ((bp->b_flags & (B_READ | B_DONE)) == 0) {
2174 		newvp->v_numoutput++;	/* put it on swapdev */
2175 		vwakeup(oldvp);
2176 	}
2177 
2178 	bgetvp(newvp, bp);
2179 	bufremvn(bp);
2180 }
2181 
2182 /*
2183  * Used to assign buffers to the appropriate clean or dirty list on
2184  * the vnode and to add newly dirty vnodes to the appropriate
2185  * filesystem syncer list.
2186  *
2187  * Manipulates vnode buffer queues. Must be called at splbio().
2188  */
2189 void
2190 reassignbuf(bp)
2191 	struct buf *bp;
2192 {
2193 	struct buflists *listheadp;
2194 	int delay;
2195 	struct vnode *vp = bp->b_vp;
2196 
2197 	/*
2198 	 * Delete from old vnode list, if on one.
2199 	 */
2200 	if (bp->b_vnbufs.le_next != NOLIST)
2201 		bufremvn(bp);
2202 	/*
2203 	 * If dirty, put on list of dirty buffers;
2204 	 * otherwise insert onto list of clean buffers.
2205 	 */
2206 	if ((bp->b_flags & B_DELWRI) == 0) {
2207 		listheadp = &vp->v_cleanblkhd;
2208 		if ((vp->v_bioflag & VBIOONSYNCLIST) &&
2209 		    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2210 			vp->v_bioflag &= ~VBIOONSYNCLIST;
2211 			LIST_REMOVE(vp, v_synclist);
2212 		}
2213 	} else {
2214 		listheadp = &vp->v_dirtyblkhd;
2215 		if ((vp->v_bioflag & VBIOONSYNCLIST) == 0) {
2216 			switch (vp->v_type) {
2217 			case VDIR:
2218 				delay = syncdelay / 2;
2219 				break;
2220 			case VBLK:
2221 				if (vp->v_specmountpoint != NULL) {
2222 					delay = syncdelay / 3;
2223 					break;
2224 				}
2225 				/* fall through */
2226 			default:
2227 				delay = syncdelay;
2228 			}
2229 			vn_syncer_add_to_worklist(vp, delay);
2230 		}
2231 	}
2232 	bufinsvn(bp, listheadp);
2233 }
2234 
2235 int
2236 vfs_register(vfs)
2237 	struct vfsconf *vfs;
2238 {
2239 	struct vfsconf *vfsp;
2240 	struct vfsconf **vfspp;
2241 
2242 #ifdef DIAGNOSTIC
2243 	/* Paranoia? */
2244 	if (vfs->vfc_refcount != 0)
2245 		printf("vfs_register called with vfc_refcount > 0\n");
2246 #endif
2247 
2248 	/* Check if filesystem already known */
2249 	for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2250 	    vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next)
2251 		if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2252 			return (EEXIST);
2253 
2254 	if (vfs->vfc_typenum > maxvfsconf)
2255 		maxvfsconf = vfs->vfc_typenum;
2256 
2257 	vfs->vfc_next = NULL;
2258 
2259 	/* Add to the end of the list */
2260 	*vfspp = vfs;
2261 
2262 	/* Call vfs_init() */
2263 	if (vfs->vfc_vfsops->vfs_init)
2264 		(*(vfs->vfc_vfsops->vfs_init))(vfs);
2265 
2266 	return 0;
2267 }
2268 
2269 int
2270 vfs_unregister(vfs)
2271 	struct vfsconf *vfs;
2272 {
2273 	struct vfsconf *vfsp;
2274 	struct vfsconf **vfspp;
2275 	int maxtypenum;
2276 
2277 	/* Find our vfsconf struct */
2278 	for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2279 	    vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next) {
2280 		if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2281 			break;
2282 	}
2283 
2284 	if (!vfsp)			/* Not found */
2285 		return (ENOENT);
2286 
2287 	if (vfsp->vfc_refcount)		/* In use */
2288 		return (EBUSY);
2289 
2290 	/* Remove from list and free */
2291 	*vfspp = vfsp->vfc_next;
2292 
2293 	maxtypenum = 0;
2294 
2295 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2296 		if (vfsp->vfc_typenum > maxtypenum)
2297 			maxtypenum = vfsp->vfc_typenum;
2298 
2299 	maxvfsconf = maxtypenum;
2300 	return 0;
2301 }
2302 
2303 /*
2304  * Check if vnode represents a disk device
2305  */
2306 int
2307 vn_isdisk(vp, errp)
2308 	struct vnode *vp;
2309 	int *errp;
2310 {
2311 	if (vp->v_type != VBLK && vp->v_type != VCHR)
2312 		return (0);
2313 
2314 	return (1);
2315 }
2316