xref: /openbsd-src/sys/kern/vfs_subr.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: vfs_subr.c,v 1.65 2001/08/02 08:16:45 assar Exp $	*/
2 /*	$NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
42  */
43 
44 /*
45  * External virtual filesystem routines
46  */
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/proc.h>
51 #include <sys/mount.h>
52 #include <sys/time.h>
53 #include <sys/fcntl.h>
54 #include <sys/kernel.h>
55 #include <sys/vnode.h>
56 #include <sys/stat.h>
57 #include <sys/namei.h>
58 #include <sys/ucred.h>
59 #include <sys/buf.h>
60 #include <sys/errno.h>
61 #include <sys/malloc.h>
62 #include <sys/domain.h>
63 #include <sys/mbuf.h>
64 #include <sys/syscallargs.h>
65 #include <sys/pool.h>
66 
67 #include <vm/vm.h>
68 #include <sys/sysctl.h>
69 
70 #include <miscfs/specfs/specdev.h>
71 
72 #include <uvm/uvm_extern.h>
73 
74 enum vtype iftovt_tab[16] = {
75 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
76 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
77 };
78 int	vttoif_tab[9] = {
79 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
80 	S_IFSOCK, S_IFIFO, S_IFMT,
81 };
82 
83 int doforce = 1;		/* 1 => permit forcible unmounting */
84 int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
85 int suid_clear = 1;		/* 1 => clear SUID / SGID on owner change */
86 
87 /*
88  * Insq/Remq for the vnode usage lists.
89  */
90 #define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
91 #define	bufremvn(bp) {							\
92 	LIST_REMOVE(bp, b_vnbufs);					\
93 	(bp)->b_vnbufs.le_next = NOLIST;				\
94 }
95 
96 struct freelst vnode_hold_list;   /* list of vnodes referencing buffers */
97 struct freelst vnode_free_list;   /* vnode free list */
98 
99 struct mntlist mountlist;			/* mounted filesystem list */
100 struct simplelock mountlist_slock;
101 static struct simplelock mntid_slock;
102 struct simplelock mntvnode_slock;
103 struct simplelock vnode_free_list_slock;
104 struct simplelock spechash_slock;
105 
106 void	vclean __P((struct vnode *, int, struct proc *));
107 
108 void insmntque __P((struct vnode *, struct mount *));
109 int getdevvp __P((dev_t, struct vnode **, enum vtype));
110 
111 int vfs_hang_addrlist __P((struct mount *, struct netexport *,
112 				  struct export_args *));
113 int vfs_free_netcred __P((struct radix_node *, void *));
114 void vfs_free_addrlist __P((struct netexport *));
115 static __inline__ void vputonfreelist __P((struct vnode *));
116 
117 #ifdef DEBUG
118 void printlockedvnodes __P((void));
119 #endif
120 
121 #define VN_KNOTE(vp, b) \
122 	KNOTE((struct klist *)&vp->v_selectinfo.vsi_selinfo.si_note, (b))
123 
124 struct pool vnode_pool;
125 
126 /*
127  * Initialize the vnode management data structures.
128  */
129 void
130 vntblinit()
131 {
132 
133 	pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodes",
134 		0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE);
135 	simple_lock_init(&mntvnode_slock);
136 	simple_lock_init(&mntid_slock);
137 	simple_lock_init(&spechash_slock);
138 	TAILQ_INIT(&vnode_hold_list);
139 	TAILQ_INIT(&vnode_free_list);
140 	simple_lock_init(&vnode_free_list_slock);
141 	CIRCLEQ_INIT(&mountlist);
142 	simple_lock_init(&mountlist_slock);
143 	/*
144 	 * Initialize the filesystem syncer.
145 	 */
146 	vn_initialize_syncerd();
147 }
148 
149 
150 /*
151  * Mark a mount point as busy. Used to synchronize access and to delay
152  * unmounting. Interlock is not released on failure.
153  */
154 
155 int
156 vfs_busy(mp, flags, interlkp, p)
157 	struct mount *mp;
158 	int flags;
159 	struct simplelock *interlkp;
160 	struct proc *p;
161 {
162 	int lkflags;
163 
164 	if (mp->mnt_flag & MNT_UNMOUNT) {
165 		if (flags & LK_NOWAIT)
166 			return (ENOENT);
167 		mp->mnt_flag |= MNT_MWAIT;
168 		if (interlkp)
169 			simple_unlock(interlkp);
170 		/*
171 		 * Since all busy locks are shared except the exclusive
172 		 * lock granted when unmounting, the only place that a
173 		 * wakeup needs to be done is at the release of the
174 		 * exclusive lock at the end of dounmount.
175 		 */
176  		sleep((caddr_t)mp, PVFS);
177 		if (interlkp)
178 			simple_lock(interlkp);
179 		return (ENOENT);
180 	}
181 	lkflags = LK_SHARED;
182 	if (interlkp)
183 		lkflags |= LK_INTERLOCK;
184 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
185 		panic("vfs_busy: unexpected lock failure");
186 	return (0);
187 }
188 
189 
190 /*
191  * Free a busy file system
192  */
193 void
194 vfs_unbusy(mp, p)
195 	struct mount *mp;
196 	struct proc *p;
197 {
198 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
199 }
200 
201 /*
202  * Lookup a filesystem type, and if found allocate and initialize
203  * a mount structure for it.
204  *
205  * Devname is usually updated by mount(8) after booting.
206  */
207 
208 int
209 vfs_rootmountalloc(fstypename, devname, mpp)
210 	char *fstypename;
211 	char *devname;
212 	struct mount **mpp;
213 {
214 	struct proc *p = curproc;	/* XXX */
215 	struct vfsconf *vfsp;
216 	struct mount *mp;
217 
218 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
219 		if (!strcmp(vfsp->vfc_name, fstypename))
220 			break;
221 	if (vfsp == NULL)
222 		return (ENODEV);
223 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
224 	bzero((char *)mp, (u_long)sizeof(struct mount));
225 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
226 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
227 	LIST_INIT(&mp->mnt_vnodelist);
228 	mp->mnt_vfc = vfsp;
229 	mp->mnt_op = vfsp->vfc_vfsops;
230 	mp->mnt_flag = MNT_RDONLY;
231 	mp->mnt_vnodecovered = NULLVP;
232 	vfsp->vfc_refcount++;
233 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
234 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
235 	mp->mnt_stat.f_mntonname[0] = '/';
236 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
237 	*mpp = mp;
238  	return (0);
239  }
240 
241 /*
242  * Find an appropriate filesystem to use for the root. If a filesystem
243  * has not been preselected, walk through the list of known filesystems
244  * trying those that have mountroot routines, and try them until one
245  * works or we have tried them all.
246   */
247 int
248 vfs_mountroot()
249 {
250 	struct vfsconf *vfsp;
251 	extern int (*mountroot)(void);
252 	int error;
253 
254 	if (mountroot != NULL)
255 		return ((*mountroot)());
256 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
257 		if (vfsp->vfc_mountroot == NULL)
258 			continue;
259 		if ((error = (*vfsp->vfc_mountroot)()) == 0)
260 			return (0);
261 		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
262  	}
263 	return (ENODEV);
264 }
265 
266 /*
267  * Lookup a mount point by filesystem identifier.
268  */
269 struct mount *
270 vfs_getvfs(fsid)
271 	fsid_t *fsid;
272 {
273 	register struct mount *mp;
274 
275 	simple_lock(&mountlist_slock);
276 	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
277 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
278 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
279 			simple_unlock(&mountlist_slock);
280 			return (mp);
281 		}
282 	}
283 	simple_unlock(&mountlist_slock);
284 	return ((struct mount *)0);
285 }
286 
287 
288 /*
289  * Get a new unique fsid
290  */
291 void
292 vfs_getnewfsid(mp)
293 	struct mount *mp;
294 {
295 	static u_short xxxfs_mntid;
296 
297 	fsid_t tfsid;
298 	int mtype;
299 
300 	simple_lock(&mntid_slock);
301 	mtype = mp->mnt_vfc->vfc_typenum;
302 	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
303 	mp->mnt_stat.f_fsid.val[1] = mtype;
304 	if (xxxfs_mntid == 0)
305 		++xxxfs_mntid;
306 	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
307 	tfsid.val[1] = mtype;
308 	if (!CIRCLEQ_EMPTY(&mountlist)) {
309 		while (vfs_getvfs(&tfsid)) {
310 			tfsid.val[0]++;
311 			xxxfs_mntid++;
312 		}
313 	}
314 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
315 	simple_unlock(&mntid_slock);
316 }
317 
318 /*
319  * Make a 'unique' number from a mount type name.
320  * Note that this is no longer used for ffs which
321  * now has an on-disk filesystem id.
322  */
323 long
324 makefstype(type)
325 	char *type;
326 {
327 	long rv;
328 
329 	for (rv = 0; *type; type++) {
330 		rv <<= 2;
331 		rv ^= *type;
332 	}
333 	return rv;
334 }
335 
336 /*
337  * Set vnode attributes to VNOVAL
338  */
339 void
340 vattr_null(vap)
341 	register struct vattr *vap;
342 {
343 
344 	vap->va_type = VNON;
345 	/* XXX These next two used to be one line, but for a GCC bug. */
346 	vap->va_size = VNOVAL;
347 	vap->va_bytes = VNOVAL;
348 	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
349 		vap->va_fsid = vap->va_fileid =
350 		vap->va_blocksize = vap->va_rdev =
351 		vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
352 		vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
353 		vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
354 		vap->va_flags = vap->va_gen = VNOVAL;
355 	vap->va_vaflags = 0;
356 }
357 
358 /*
359  * Routines having to do with the management of the vnode table.
360  */
361 extern int (**dead_vnodeop_p) __P((void *));
362 long numvnodes;
363 
364 /*
365  * Return the next vnode from the free list.
366  */
367 int
368 getnewvnode(tag, mp, vops, vpp)
369 	enum vtagtype tag;
370 	struct mount *mp;
371 	int (**vops) __P((void *));
372 	struct vnode **vpp;
373 {
374 	struct proc *p = curproc;			/* XXX */
375 	struct freelst *listhd;
376 	static int toggle;
377 	struct vnode *vp;
378 	int s;
379 
380 	/*
381 	 * We must choose whether to allocate a new vnode or recycle an
382 	 * existing one. The criterion for allocating a new one is that
383 	 * the total number of vnodes is less than the number desired or
384 	 * there are no vnodes on either free list. Generally we only
385 	 * want to recycle vnodes that have no buffers associated with
386 	 * them, so we look first on the vnode_free_list. If it is empty,
387 	 * we next consider vnodes with referencing buffers on the
388 	 * vnode_hold_list. The toggle ensures that half the time we
389 	 * will use a buffer from the vnode_hold_list, and half the time
390 	 * we will allocate a new one unless the list has grown to twice
391 	 * the desired size. We are reticent to recycle vnodes from the
392 	 * vnode_hold_list because we will lose the identity of all its
393 	 * referencing buffers.
394 	 */
395 	toggle ^= 1;
396 	if (numvnodes > 2 * desiredvnodes)
397 		toggle = 0;
398 
399 	simple_lock(&vnode_free_list_slock);
400 	s = splbio();
401 	if ((numvnodes < desiredvnodes) ||
402 	    ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) &&
403 	    ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) {
404 		splx(s);
405 		simple_unlock(&vnode_free_list_slock);
406 		vp = pool_get(&vnode_pool, PR_WAITOK);
407 		bzero((char *)vp, sizeof *vp);
408 		numvnodes++;
409 	} else {
410 		for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
411 		    vp = TAILQ_NEXT(vp, v_freelist)) {
412 			if (simple_lock_try(&vp->v_interlock))
413 				break;
414 		}
415 		/*
416 		 * Unless this is a bad time of the month, at most
417 		 * the first NCPUS items on the free list are
418 		 * locked, so this is close enough to being empty.
419 		 */
420 		if (vp == NULLVP) {
421 			splx(s);
422 			simple_unlock(&vnode_free_list_slock);
423 			tablefull("vnode");
424 			*vpp = 0;
425 			return (ENFILE);
426 		}
427 		if (vp->v_usecount) {
428 			vprint("free vnode", vp);
429 			panic("free vnode isn't");
430 		}
431 
432 		TAILQ_REMOVE(listhd, vp, v_freelist);
433 		vp->v_bioflag &= ~VBIOONFREELIST;
434 		splx(s);
435 
436 		simple_unlock(&vnode_free_list_slock);
437 		if (vp->v_type != VBAD)
438 			vgonel(vp, p);
439 		else
440 			simple_unlock(&vp->v_interlock);
441 #ifdef DIAGNOSTIC
442 		if (vp->v_data) {
443 			vprint("cleaned vnode", vp);
444 			panic("cleaned vnode isn't");
445 		}
446 		s = splbio();
447 		if (vp->v_numoutput)
448 			panic("Clean vnode has pending I/O's");
449 		splx(s);
450 #endif
451 		vp->v_flag = 0;
452 		vp->v_bioflag = 0;
453 		vp->v_socket = 0;
454 	}
455 	vp->v_type = VNON;
456 	cache_purge(vp);
457 	vp->v_tag = tag;
458 	vp->v_op = vops;
459 	insmntque(vp, mp);
460 	*vpp = vp;
461 	vp->v_usecount = 1;
462 	vp->v_data = 0;
463 	simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
464 	return (0);
465 }
466 
467 /*
468  * Move a vnode from one mount queue to another.
469  */
470 void
471 insmntque(vp, mp)
472 	register struct vnode *vp;
473 	register struct mount *mp;
474 {
475 	simple_lock(&mntvnode_slock);
476 	/*
477 	 * Delete from old mount point vnode list, if on one.
478 	 */
479 
480 	if (vp->v_mount != NULL)
481 		LIST_REMOVE(vp, v_mntvnodes);
482 	/*
483 	 * Insert into list of vnodes for the new mount point, if available.
484 	 */
485 	if ((vp->v_mount = mp) != NULL)
486 		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
487 	simple_unlock(&mntvnode_slock);
488 }
489 
490 
491 /*
492  * Create a vnode for a block device.
493  * Used for root filesystem, argdev, and swap areas.
494  * Also used for memory file system special devices.
495  */
496 int
497 bdevvp(dev, vpp)
498 	dev_t dev;
499 	struct vnode **vpp;
500 {
501 
502 	return (getdevvp(dev, vpp, VBLK));
503 }
504 
505 /*
506  * Create a vnode for a character device.
507  * Used for kernfs and some console handling.
508  */
509 int
510 cdevvp(dev, vpp)
511 	dev_t dev;
512 	struct vnode **vpp;
513 {
514 
515 	return (getdevvp(dev, vpp, VCHR));
516 }
517 
518 /*
519  * Create a vnode for a device.
520  * Used by bdevvp (block device) for root file system etc.,
521  * and by cdevvp (character device) for console and kernfs.
522  */
523 int
524 getdevvp(dev, vpp, type)
525 	dev_t dev;
526 	struct vnode **vpp;
527 	enum vtype type;
528 {
529 	register struct vnode *vp;
530 	struct vnode *nvp;
531 	int error;
532 
533 	if (dev == NODEV) {
534 		*vpp = NULLVP;
535 		return (0);
536 	}
537 	error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
538 	if (error) {
539 		*vpp = NULLVP;
540 		return (error);
541 	}
542 	vp = nvp;
543 	vp->v_type = type;
544 	if ((nvp = checkalias(vp, dev, NULL)) != 0) {
545 		vput(vp);
546 		vp = nvp;
547 	}
548 	*vpp = vp;
549 	return (0);
550 }
551 
552 /*
553  * Check to see if the new vnode represents a special device
554  * for which we already have a vnode (either because of
555  * bdevvp() or because of a different vnode representing
556  * the same block device). If such an alias exists, deallocate
557  * the existing contents and return the aliased vnode. The
558  * caller is responsible for filling it with its new contents.
559  */
560 struct vnode *
561 checkalias(nvp, nvp_rdev, mp)
562 	register struct vnode *nvp;
563 	dev_t nvp_rdev;
564 	struct mount *mp;
565 {
566 	struct proc *p = curproc;
567 	register struct vnode *vp;
568 	struct vnode **vpp;
569 
570 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
571 		return (NULLVP);
572 
573 	vpp = &speclisth[SPECHASH(nvp_rdev)];
574 loop:
575 	simple_lock(&spechash_slock);
576 	for (vp = *vpp; vp; vp = vp->v_specnext) {
577 		simple_lock(&vp->v_interlock);
578 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) {
579 			simple_unlock(&vp->v_interlock);
580 			continue;
581 		}
582 		/*
583 		 * Alias, but not in use, so flush it out.
584 		 */
585 		if (vp->v_usecount == 0) {
586 			simple_unlock(&spechash_slock);
587 			vgonel(vp, p);
588 			goto loop;
589 		}
590 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
591 			simple_unlock(&spechash_slock);
592 			goto loop;
593 		}
594 		break;
595 	}
596 
597 	/*
598 	 * Common case is actually in the if statement
599 	 */
600 	if (vp == NULL || !(vp->v_tag == VT_NON && vp->v_type == VBLK)) {
601 		MALLOC(nvp->v_specinfo, struct specinfo *,
602 			sizeof(struct specinfo), M_VNODE, M_WAITOK);
603 		nvp->v_rdev = nvp_rdev;
604 		nvp->v_hashchain = vpp;
605 		nvp->v_specnext = *vpp;
606 		nvp->v_specmountpoint = NULL;
607 		nvp->v_speclockf = NULL;
608 		simple_unlock(&spechash_slock);
609 		*vpp = nvp;
610 		if (vp != NULLVP) {
611 			nvp->v_flag |= VALIASED;
612 			vp->v_flag |= VALIASED;
613 			vput(vp);
614 		}
615 		return (NULLVP);
616 	}
617 
618 	/*
619 	 * This code is the uncommon case. It is called in case
620 	 * we found an alias that was VT_NON && vtype of VBLK
621 	 * This means we found a block device that was created
622 	 * using bdevvp.
623 	 * An example of such a vnode is the root partition device vnode
624 	 * created in ffs_mountroot.
625 	 *
626 	 * The vnodes created by bdevvp should not be aliased (why?).
627 	 */
628 
629 	simple_unlock(&spechash_slock);
630 	VOP_UNLOCK(vp, 0, p);
631 	simple_lock(&vp->v_interlock);
632 	vclean(vp, 0, p);
633 	vp->v_op = nvp->v_op;
634 	vp->v_tag = nvp->v_tag;
635 	nvp->v_type = VNON;
636 	insmntque(vp, mp);
637 	return (vp);
638 }
639 
640 /*
641  * Grab a particular vnode from the free list, increment its
642  * reference count and lock it. The vnode lock bit is set the
643  * vnode is being eliminated in vgone. The process is awakened
644  * when the transition is completed, and an error returned to
645  * indicate that the vnode is no longer usable (possibly having
646  * been changed to a new file system type).
647  */
648 int
649 vget(vp, flags, p)
650 	struct vnode *vp;
651 	int flags;
652 	struct proc *p;
653 {
654 	int error;
655 	int s;
656 	/*
657 	 * If the vnode is in the process of being cleaned out for
658 	 * another use, we wait for the cleaning to finish and then
659 	 * return failure. Cleaning is determined by checking that
660 	 * the VXLOCK flag is set.
661 	 */
662 	if ((flags & LK_INTERLOCK) == 0) {
663 		simple_lock(&vp->v_interlock);
664 		flags |= LK_INTERLOCK;
665 	}
666 	if (vp->v_flag & VXLOCK) {
667  		vp->v_flag |= VXWANT;
668 		simple_unlock(&vp->v_interlock);
669 		tsleep((caddr_t)vp, PINOD, "vget", 0);
670 		return (ENOENT);
671  	}
672 	if (vp->v_usecount == 0 &&
673 	    (vp->v_bioflag & VBIOONFREELIST)) {
674 		s = splbio();
675 		simple_lock(&vnode_free_list_slock);
676 		if (vp->v_holdcnt > 0)
677 			TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
678 		else
679 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
680 		simple_unlock(&vnode_free_list_slock);
681 		vp->v_bioflag &= ~VBIOONFREELIST;
682 		splx(s);
683 	}
684  	vp->v_usecount++;
685 	if (flags & LK_TYPE_MASK) {
686 		if ((error = vn_lock(vp, flags, p)) != 0) {
687 			vp->v_usecount--;
688 			if (vp->v_usecount == 0)
689 				vputonfreelist(vp);
690 
691 			simple_unlock(&vp->v_interlock);
692 		}
693 		return (error);
694 	}
695 	simple_unlock(&vp->v_interlock);
696 	return (0);
697 }
698 
699 
700 #ifdef DIAGNOSTIC
701 /*
702  * Vnode reference.
703  */
704 void
705 vref(vp)
706 	struct vnode *vp;
707 {
708 	simple_lock(&vp->v_interlock);
709 	if (vp->v_usecount == 0)
710 		panic("vref used where vget required");
711 	vp->v_usecount++;
712 	simple_unlock(&vp->v_interlock);
713 }
714 #endif /* DIAGNOSTIC */
715 
716 static __inline__ void
717 vputonfreelist(vp)
718 	struct vnode *vp;
719 {
720 	int s;
721 	struct freelst *lst;
722 
723 	s = splbio();
724 #ifdef DIAGNOSTIC
725 	if (vp->v_usecount != 0)
726 		panic("Use count is not zero!");
727 
728 	if (vp->v_bioflag & VBIOONFREELIST) {
729 		vprint("vnode already on free list: ", vp);
730 		panic("vnode already on free list");
731 	}
732 #endif
733 
734 	vp->v_bioflag |= VBIOONFREELIST;
735 
736 	if (vp->v_holdcnt > 0)
737 		lst = &vnode_hold_list;
738 	else
739 		lst = &vnode_free_list;
740 
741 	if (vp->v_type == VBAD)
742 		TAILQ_INSERT_HEAD(lst, vp, v_freelist);
743 	else
744 		TAILQ_INSERT_TAIL(lst, vp, v_freelist);
745 
746 	splx(s);
747 }
748 
749 /*
750  * vput(), just unlock and vrele()
751  */
752 void
753 vput(vp)
754 	register struct vnode *vp;
755 {
756 	struct proc *p = curproc;	/* XXX */
757 
758 #ifdef DIAGNOSTIC
759 	if (vp == NULL)
760 		panic("vput: null vp");
761 #endif
762 	simple_lock(&vp->v_interlock);
763 
764 #ifdef DIAGNOSTIC
765 	if (vp->v_usecount == 0) {
766 		vprint("vput: bad ref count", vp);
767 		panic("vput: ref cnt");
768 	}
769 #endif
770 	vp->v_usecount--;
771 	if (vp->v_usecount > 0) {
772 		simple_unlock(&vp->v_interlock);
773 		VOP_UNLOCK(vp, 0, p);
774 		return;
775 	}
776 
777 #ifdef DIAGNOSTIC
778 	if (vp->v_writecount != 0) {
779 		vprint("vput: bad writecount", vp);
780 		panic("vput: v_writecount != 0");
781 	}
782 #endif
783 	vputonfreelist(vp);
784 
785 	simple_unlock(&vp->v_interlock);
786 
787 	VOP_INACTIVE(vp, p);
788 }
789 
790 /*
791  * Vnode release - use for active VNODES.
792  * If count drops to zero, call inactive routine and return to freelist.
793  */
794 void
795 vrele(vp)
796 	register struct vnode *vp;
797 {
798 	struct proc *p = curproc;	/* XXX */
799 
800 #ifdef DIAGNOSTIC
801 	if (vp == NULL)
802 		panic("vrele: null vp");
803 #endif
804 	simple_lock(&vp->v_interlock);
805 #ifdef DIAGNOSTIC
806 	if (vp->v_usecount == 0) {
807 		vprint("vrele: bad ref count", vp);
808 		panic("vrele: ref cnt");
809 	}
810 #endif
811 	vp->v_usecount--;
812 	if (vp->v_usecount > 0) {
813 		simple_unlock(&vp->v_interlock);
814 		return;
815 	}
816 
817 #ifdef DIAGNOSTIC
818 	if (vp->v_writecount != 0) {
819 		vprint("vrele: bad writecount", vp);
820 		panic("vrele: v_writecount != 0");
821 	}
822 #endif
823 	vputonfreelist(vp);
824 
825 	if (vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p) == 0)
826 		VOP_INACTIVE(vp, p);
827 }
828 
829 void vhold __P((struct vnode *vp));
830 
831 /*
832  * Page or buffer structure gets a reference.
833  */
834 void
835 vhold(vp)
836 	register struct vnode *vp;
837 {
838 
839 	/*
840 	 * If it is on the freelist and the hold count is currently
841 	 * zero, move it to the hold list.
842 	 */
843   	simple_lock(&vp->v_interlock);
844 	if ((vp->v_bioflag & VBIOONFREELIST) &&
845 	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
846 		simple_lock(&vnode_free_list_slock);
847 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
848 		TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
849 		simple_unlock(&vnode_free_list_slock);
850 	}
851 	vp->v_holdcnt++;
852 	simple_unlock(&vp->v_interlock);
853 }
854 
855 /*
856  * Remove any vnodes in the vnode table belonging to mount point mp.
857  *
858  * If MNT_NOFORCE is specified, there should not be any active ones,
859  * return error if any are found (nb: this is a user error, not a
860  * system error). If MNT_FORCE is specified, detach any active vnodes
861  * that are found.
862  */
863 #ifdef DEBUG
864 int busyprt = 0;	/* print out busy vnodes */
865 struct ctldebug debug1 = { "busyprt", &busyprt };
866 #endif
867 
868 int
869 vflush(mp, skipvp, flags)
870 	struct mount *mp;
871 	struct vnode *skipvp;
872 	int flags;
873 {
874 	struct proc *p = curproc;
875 	register struct vnode *vp, *nvp;
876 	int busy = 0;
877 
878 	simple_lock(&mntvnode_slock);
879 loop:
880 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
881 		if (vp->v_mount != mp)
882 			goto loop;
883 		nvp = vp->v_mntvnodes.le_next;
884 		/*
885 		 * Skip over a selected vnode.
886 		 */
887 		if (vp == skipvp)
888 			continue;
889 
890 		simple_lock(&vp->v_interlock);
891 		/*
892 		 * Skip over a vnodes marked VSYSTEM.
893 		 */
894 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
895 			simple_unlock(&vp->v_interlock);
896 			continue;
897 		}
898 		/*
899 		 * If WRITECLOSE is set, only flush out regular file
900 		 * vnodes open for writing.
901 		 */
902 		if ((flags & WRITECLOSE) &&
903 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
904 			simple_unlock(&vp->v_interlock);
905 			continue;
906 		}
907 		/*
908 		 * With v_usecount == 0, all we need to do is clear
909 		 * out the vnode data structures and we are done.
910 		 */
911 		if (vp->v_usecount == 0) {
912 			simple_unlock(&mntvnode_slock);
913 			vgonel(vp, p);
914 			simple_lock(&mntvnode_slock);
915 			continue;
916 		}
917 		/*
918 		 * If FORCECLOSE is set, forcibly close the vnode.
919 		 * For block or character devices, revert to an
920 		 * anonymous device. For all other files, just kill them.
921 		 */
922 		if (flags & FORCECLOSE) {
923 			simple_unlock(&mntvnode_slock);
924 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
925 				vgonel(vp, p);
926 			} else {
927 				vclean(vp, 0, p);
928 				vp->v_op = spec_vnodeop_p;
929 				insmntque(vp, (struct mount *)0);
930 			}
931 			simple_lock(&mntvnode_slock);
932 			continue;
933 		}
934 #ifdef DEBUG
935 		if (busyprt)
936 			vprint("vflush: busy vnode", vp);
937 #endif
938 		simple_unlock(&vp->v_interlock);
939 		busy++;
940 	}
941 	simple_unlock(&mntvnode_slock);
942 	if (busy)
943 		return (EBUSY);
944 	return (0);
945 }
946 
947 /*
948  * Disassociate the underlying file system from a vnode.
949  * The vnode interlock is held on entry.
950  */
951 void
952 vclean(vp, flags, p)
953 	register struct vnode *vp;
954 	int flags;
955 	struct proc *p;
956 {
957 	int active;
958 
959 	/*
960 	 * Check to see if the vnode is in use.
961 	 * If so we have to reference it before we clean it out
962 	 * so that its count cannot fall to zero and generate a
963 	 * race against ourselves to recycle it.
964 	 */
965 	if ((active = vp->v_usecount) != 0)
966 		vp->v_usecount++;
967 
968 	/*
969 	 * Prevent the vnode from being recycled or
970 	 * brought into use while we clean it out.
971 	 */
972 	if (vp->v_flag & VXLOCK)
973 		panic("vclean: deadlock");
974 	vp->v_flag |= VXLOCK;
975 	/*
976 	 * Even if the count is zero, the VOP_INACTIVE routine may still
977 	 * have the object locked while it cleans it out. The VOP_LOCK
978 	 * ensures that the VOP_INACTIVE routine is done with its work.
979 	 * For active vnodes, it ensures that no other activity can
980 	 * occur while the underlying object is being cleaned out.
981 	 */
982 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
983 
984 	/*
985 	 * clean out any VM data associated with the vnode.
986 	 */
987 	uvm_vnp_terminate(vp);
988 	/*
989 	 * Clean out any buffers associated with the vnode.
990 	 */
991 	if (flags & DOCLOSE)
992 		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
993 	/*
994 	 * If purging an active vnode, it must be closed and
995 	 * deactivated before being reclaimed. Note that the
996 	 * VOP_INACTIVE will unlock the vnode
997 	 */
998 	if (active) {
999 		if (flags & DOCLOSE)
1000 			VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
1001 		VOP_INACTIVE(vp, p);
1002 	} else {
1003 		/*
1004 		 * Any other processes trying to obtain this lock must first
1005 		 * wait for VXLOCK to clear, then call the new lock operation.
1006 		 */
1007 		VOP_UNLOCK(vp, 0, p);
1008 	}
1009 
1010 	/*
1011 	 * Reclaim the vnode.
1012 	 */
1013 	if (VOP_RECLAIM(vp, p))
1014 		panic("vclean: cannot reclaim");
1015 	if (active) {
1016 		simple_lock(&vp->v_interlock);
1017 
1018 		vp->v_usecount--;
1019 		if (vp->v_usecount == 0) {
1020 			if (vp->v_holdcnt > 0)
1021 				panic("vclean: not clean");
1022 			vputonfreelist(vp);
1023 		}
1024 
1025 		simple_unlock(&vp->v_interlock);
1026 	}
1027 	cache_purge(vp);
1028 	if (vp->v_vnlock) {
1029 		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1030 			vprint("vclean: lock not drained", vp);
1031 		FREE(vp->v_vnlock, M_VNODE);
1032 		vp->v_vnlock = NULL;
1033 	}
1034 
1035 	/*
1036 	 * Done with purge, notify sleepers of the grim news.
1037 	 */
1038 	vp->v_op = dead_vnodeop_p;
1039 	simple_lock(&vp->v_selectinfo.vsi_lock);
1040 	VN_KNOTE(vp, NOTE_REVOKE);
1041 	simple_unlock(&vp->v_selectinfo.vsi_lock);
1042 	vp->v_tag = VT_NON;
1043 	vp->v_flag &= ~VXLOCK;
1044 #ifdef DIAGNOSTIC
1045 	vp->v_flag &= ~VLOCKSWORK;
1046 #endif
1047 	if (vp->v_flag & VXWANT) {
1048 		vp->v_flag &= ~VXWANT;
1049 		wakeup((caddr_t)vp);
1050 	}
1051 }
1052 
1053 
1054 
1055 /*
1056  * Recycle an unused vnode to the front of the free list.
1057  * Release the passed interlock if the vnode will be recycled.
1058  */
1059 int
1060 vrecycle(vp, inter_lkp, p)
1061 	struct vnode *vp;
1062 	struct simplelock *inter_lkp;
1063 	struct proc *p;
1064 {
1065 
1066 	simple_lock(&vp->v_interlock);
1067 	if (vp->v_usecount == 0) {
1068 		if (inter_lkp)
1069 			simple_unlock(inter_lkp);
1070 		vgonel(vp, p);
1071 		return (1);
1072 	}
1073 	simple_unlock(&vp->v_interlock);
1074 	return (0);
1075 }
1076 
1077 
1078 /*
1079  * Eliminate all activity associated with a vnode
1080  * in preparation for reuse.
1081  */
1082 void
1083 vgone(vp)
1084 	register struct vnode *vp;
1085 {
1086 	struct proc *p = curproc;
1087 
1088 	simple_lock (&vp->v_interlock);
1089 	vgonel(vp, p);
1090 }
1091 
1092 /*
1093  * vgone, with the vp interlock held.
1094  */
1095 void
1096 vgonel(vp, p)
1097 	struct vnode *vp;
1098 	struct proc *p;
1099 {
1100 	register struct vnode *vq;
1101 	struct vnode *vx;
1102 
1103 	/*
1104 	 * If a vgone (or vclean) is already in progress,
1105 	 * wait until it is done and return.
1106 	 */
1107 	if (vp->v_flag & VXLOCK) {
1108 		vp->v_flag |= VXWANT;
1109 		simple_unlock(&vp->v_interlock);
1110 		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1111 		return;
1112 	}
1113 	/*
1114 	 * Clean out the filesystem specific data.
1115 	 */
1116 	vclean(vp, DOCLOSE, p);
1117 	/*
1118 	 * Delete from old mount point vnode list, if on one.
1119 	 */
1120 	if (vp->v_mount != NULL)
1121 		insmntque(vp, (struct mount *)0);
1122 	/*
1123 	 * If special device, remove it from special device alias list
1124 	 * if it is on one.
1125 	 */
1126 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1127 		simple_lock(&spechash_slock);
1128 		if (*vp->v_hashchain == vp) {
1129 			*vp->v_hashchain = vp->v_specnext;
1130 		} else {
1131 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1132 				if (vq->v_specnext != vp)
1133 					continue;
1134 				vq->v_specnext = vp->v_specnext;
1135 				break;
1136 			}
1137 			if (vq == NULL)
1138 				panic("missing bdev");
1139 		}
1140 		if (vp->v_flag & VALIASED) {
1141 			vx = NULL;
1142 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1143 				if (vq->v_rdev != vp->v_rdev ||
1144 				    vq->v_type != vp->v_type)
1145 					continue;
1146 				if (vx)
1147 					break;
1148 				vx = vq;
1149 			}
1150 			if (vx == NULL)
1151 				panic("missing alias");
1152 			if (vq == NULL)
1153 				vx->v_flag &= ~VALIASED;
1154 			vp->v_flag &= ~VALIASED;
1155 		}
1156 		simple_unlock(&spechash_slock);
1157 		FREE(vp->v_specinfo, M_VNODE);
1158 		vp->v_specinfo = NULL;
1159 	}
1160 	/*
1161 	 * If it is on the freelist and not already at the head,
1162 	 * move it to the head of the list.
1163 	 */
1164 	vp->v_type = VBAD;
1165 
1166 	/*
1167 	 * Move onto the free list, unless we were called from
1168 	 * getnewvnode and we're not on any free list
1169 	 */
1170 	if (vp->v_usecount == 0 &&
1171 	    (vp->v_bioflag & VBIOONFREELIST)) {
1172 		int s;
1173 
1174 		simple_lock(&vnode_free_list_slock);
1175 		s = splbio();
1176 
1177 		if (vp->v_holdcnt > 0)
1178 			panic("vgonel: not clean");
1179 
1180 		if (TAILQ_FIRST(&vnode_free_list) != vp) {
1181 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1182 			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1183 		}
1184 		splx(s);
1185 		simple_unlock(&vnode_free_list_slock);
1186 	}
1187 }
1188 
1189 /*
1190  * Lookup a vnode by device number.
1191  */
1192 int
1193 vfinddev(dev, type, vpp)
1194 	dev_t dev;
1195 	enum vtype type;
1196 	struct vnode **vpp;
1197 {
1198 	register struct vnode *vp;
1199 	int rc =0;
1200 
1201 	simple_lock(&spechash_slock);
1202 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1203 		if (dev != vp->v_rdev || type != vp->v_type)
1204 			continue;
1205 		*vpp = vp;
1206 		rc = 1;
1207 		break;
1208 	}
1209 	simple_unlock(&spechash_slock);
1210 	return (rc);
1211 }
1212 
1213 /*
1214  * Revoke all the vnodes corresponding to the specified minor number
1215  * range (endpoints inclusive) of the specified major.
1216  */
1217 void
1218 vdevgone(maj, minl, minh, type)
1219 	int maj, minl, minh;
1220 	enum vtype type;
1221 {
1222 	struct vnode *vp;
1223 	int mn;
1224 
1225 	for (mn = minl; mn <= minh; mn++)
1226 		if (vfinddev(makedev(maj, mn), type, &vp))
1227 			VOP_REVOKE(vp, REVOKEALL);
1228 }
1229 
1230 /*
1231  * Calculate the total number of references to a special device.
1232  */
1233 int
1234 vcount(vp)
1235 	struct vnode *vp;
1236 {
1237 	struct vnode *vq, *vnext;
1238 	int count;
1239 
1240 loop:
1241 	if ((vp->v_flag & VALIASED) == 0)
1242 		return (vp->v_usecount);
1243 	simple_lock(&spechash_slock);
1244 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1245 		vnext = vq->v_specnext;
1246 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1247 			continue;
1248 		/*
1249 		 * Alias, but not in use, so flush it out.
1250 		 */
1251 		if (vq->v_usecount == 0 && vq != vp) {
1252 			simple_unlock(&spechash_slock);
1253 			vgone(vq);
1254 			goto loop;
1255 		}
1256 		count += vq->v_usecount;
1257 	}
1258 	simple_unlock(&spechash_slock);
1259 	return (count);
1260 }
1261 
1262 /*
1263  * Print out a description of a vnode.
1264  */
1265 static char *typename[] =
1266    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1267 
1268 void
1269 vprint(label, vp)
1270 	char *label;
1271 	register struct vnode *vp;
1272 {
1273 	char buf[64];
1274 
1275 	if (label != NULL)
1276 		printf("%s: ", label);
1277 	printf("type %s, usecount %u, writecount %u, holdcount %u,",
1278 		typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1279 		vp->v_holdcnt);
1280 	buf[0] = '\0';
1281 	if (vp->v_flag & VROOT)
1282 		strcat(buf, "|VROOT");
1283 	if (vp->v_flag & VTEXT)
1284 		strcat(buf, "|VTEXT");
1285 	if (vp->v_flag & VSYSTEM)
1286 		strcat(buf, "|VSYSTEM");
1287 	if (vp->v_flag & VXLOCK)
1288 		strcat(buf, "|VXLOCK");
1289 	if (vp->v_flag & VXWANT)
1290 		strcat(buf, "|VXWANT");
1291 	if (vp->v_bioflag & VBIOWAIT)
1292 		strcat(buf, "| VBIOWAIT");
1293 	if (vp->v_flag & VALIASED)
1294 		strcat(buf, "|VALIASED");
1295 	if (buf[0] != '\0')
1296 		printf(" flags (%s)", &buf[1]);
1297 	if (vp->v_data == NULL) {
1298 		printf("\n");
1299 	} else {
1300 		printf("\n\t");
1301 		VOP_PRINT(vp);
1302 	}
1303 }
1304 
1305 #ifdef DEBUG
1306 /*
1307  * List all of the locked vnodes in the system.
1308  * Called when debugging the kernel.
1309  */
1310 void
1311 printlockedvnodes()
1312 {
1313 	struct proc *p = curproc;
1314 	register struct mount *mp, *nmp;
1315 	register struct vnode *vp;
1316 
1317 	printf("Locked vnodes\n");
1318 	simple_lock(&mountlist_slock);
1319 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1320 	    mp = nmp) {
1321 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1322 			nmp = CIRCLEQ_NEXT(mp, mnt_list);
1323 			continue;
1324 		}
1325 		for (vp = mp->mnt_vnodelist.lh_first; vp;
1326 		    vp = vp->v_mntvnodes.le_next) {
1327 			if (VOP_ISLOCKED(vp))
1328 				vprint((char *)0, vp);
1329 		}
1330 		simple_lock(&mountlist_slock);
1331 		nmp = CIRCLEQ_NEXT(mp, mnt_list);
1332 		vfs_unbusy(mp, p);
1333  	}
1334 	simple_unlock(&mountlist_slock);
1335 
1336 }
1337 #endif
1338 
1339 /*
1340  * Top level filesystem related information gathering.
1341  */
1342 int
1343 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1344 	int *name;
1345 	u_int namelen;
1346 	void *oldp;
1347 	size_t *oldlenp;
1348 	void *newp;
1349 	size_t newlen;
1350 	struct proc *p;
1351 {
1352 	struct vfsconf *vfsp;
1353 
1354 	/* all sysctl names at this level are at least name and field */
1355 	if (namelen < 2)
1356 		return (ENOTDIR);		/* overloaded */
1357 	if (name[0] != VFS_GENERIC) {
1358 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1359 			if (vfsp->vfc_typenum == name[0])
1360 				break;
1361 		if (vfsp == NULL)
1362 			return (EOPNOTSUPP);
1363 		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1364 		    oldp, oldlenp, newp, newlen, p));
1365 	}
1366 	switch (name[1]) {
1367 	case VFS_MAXTYPENUM:
1368 		return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1369 	case VFS_CONF:
1370 		if (namelen < 3)
1371 			return (ENOTDIR);	/* overloaded */
1372 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1373 			if (vfsp->vfc_typenum == name[2])
1374 				break;
1375 		if (vfsp == NULL)
1376 			return (EOPNOTSUPP);
1377 		return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
1378 		    sizeof(struct vfsconf)));
1379 	}
1380 	return (EOPNOTSUPP);
1381 }
1382 
1383 
1384 int kinfo_vdebug = 1;
1385 int kinfo_vgetfailed;
1386 #define KINFO_VNODESLOP	10
1387 /*
1388  * Dump vnode list (via sysctl).
1389  * Copyout address of vnode followed by vnode.
1390  */
1391 /* ARGSUSED */
1392 int
1393 sysctl_vnode(where, sizep, p)
1394 	char *where;
1395 	size_t *sizep;
1396 	struct proc *p;
1397 {
1398 	register struct mount *mp, *nmp;
1399 	struct vnode *vp, *nvp;
1400 	register char *bp = where, *savebp;
1401 	char *ewhere;
1402 	int error;
1403 
1404 #define VPTRSZ	sizeof (struct vnode *)
1405 #define VNODESZ	sizeof (struct vnode)
1406 	if (where == NULL) {
1407 		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1408 		return (0);
1409 	}
1410 	ewhere = where + *sizep;
1411 
1412 	simple_lock(&mountlist_slock);
1413 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1414 	    mp = nmp) {
1415 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1416 			nmp = CIRCLEQ_NEXT(mp, mnt_list);
1417 			continue;
1418 		}
1419 		savebp = bp;
1420 again:
1421 		for (vp = mp->mnt_vnodelist.lh_first; vp != NULL;
1422 		    vp = nvp) {
1423 			/*
1424 			 * Check that the vp is still associated with
1425 			 * this filesystem.  RACE: could have been
1426 			 * recycled onto the same filesystem.
1427 			 */
1428 			if (vp->v_mount != mp) {
1429 				simple_unlock(&mntvnode_slock);
1430 				if (kinfo_vdebug)
1431 					printf("kinfo: vp changed\n");
1432 				bp = savebp;
1433 				goto again;
1434 			}
1435 			nvp = vp->v_mntvnodes.le_next;
1436 			if (bp + VPTRSZ + VNODESZ > ewhere) {
1437 				simple_unlock(&mntvnode_slock);
1438 				*sizep = bp - where;
1439 				return (ENOMEM);
1440 			}
1441 			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1442 			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
1443 				return (error);
1444 			bp += VPTRSZ + VNODESZ;
1445 			simple_lock(&mntvnode_slock);
1446 		}
1447 
1448 		simple_unlock(&mntvnode_slock);
1449 		simple_lock(&mountlist_slock);
1450 		nmp = CIRCLEQ_NEXT(mp, mnt_list);
1451 		vfs_unbusy(mp, p);
1452 	}
1453 
1454 	simple_unlock(&mountlist_slock);
1455 
1456 	*sizep = bp - where;
1457 	return (0);
1458 }
1459 
1460 /*
1461  * Check to see if a filesystem is mounted on a block device.
1462  */
1463 int
1464 vfs_mountedon(vp)
1465 	register struct vnode *vp;
1466 {
1467 	register struct vnode *vq;
1468 	int error = 0;
1469 
1470  	if (vp->v_specmountpoint != NULL)
1471 		return (EBUSY);
1472 	if (vp->v_flag & VALIASED) {
1473 		simple_lock(&spechash_slock);
1474 		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1475 			if (vq->v_rdev != vp->v_rdev ||
1476 			    vq->v_type != vp->v_type)
1477 				continue;
1478 			if (vq->v_specmountpoint != NULL) {
1479 				error = EBUSY;
1480 				break;
1481 			}
1482  		}
1483 		simple_unlock(&spechash_slock);
1484 	}
1485 	return (error);
1486 }
1487 
1488 /*
1489  * Build hash lists of net addresses and hang them off the mount point.
1490  * Called by ufs_mount() to set up the lists of export addresses.
1491  */
1492 int
1493 vfs_hang_addrlist(mp, nep, argp)
1494 	struct mount *mp;
1495 	struct netexport *nep;
1496 	struct export_args *argp;
1497 {
1498 	register struct netcred *np;
1499 	register struct radix_node_head *rnh;
1500 	register int i;
1501 	struct radix_node *rn;
1502 	struct sockaddr *saddr, *smask = 0;
1503 	struct domain *dom;
1504 	int error;
1505 
1506 	if (argp->ex_addrlen == 0) {
1507 		if (mp->mnt_flag & MNT_DEFEXPORTED)
1508 			return (EPERM);
1509 		np = &nep->ne_defexported;
1510 		np->netc_exflags = argp->ex_flags;
1511 		np->netc_anon = argp->ex_anon;
1512 		np->netc_anon.cr_ref = 1;
1513 		mp->mnt_flag |= MNT_DEFEXPORTED;
1514 		return (0);
1515 	}
1516 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1517 	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1518 	bzero((caddr_t)np, i);
1519 	saddr = (struct sockaddr *)(np + 1);
1520 	error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
1521 	if (error)
1522 		goto out;
1523 	if (saddr->sa_len > argp->ex_addrlen)
1524 		saddr->sa_len = argp->ex_addrlen;
1525 	if (argp->ex_masklen) {
1526 		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1527 		error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
1528 		if (error)
1529 			goto out;
1530 		if (smask->sa_len > argp->ex_masklen)
1531 			smask->sa_len = argp->ex_masklen;
1532 	}
1533 	i = saddr->sa_family;
1534 	if ((rnh = nep->ne_rtable[i]) == 0) {
1535 		/*
1536 		 * Seems silly to initialize every AF when most are not
1537 		 * used, do so on demand here
1538 		 */
1539 		for (dom = domains; dom; dom = dom->dom_next)
1540 			if (dom->dom_family == i && dom->dom_rtattach) {
1541 				dom->dom_rtattach((void **)&nep->ne_rtable[i],
1542 					dom->dom_rtoffset);
1543 				break;
1544 			}
1545 		if ((rnh = nep->ne_rtable[i]) == 0) {
1546 			error = ENOBUFS;
1547 			goto out;
1548 		}
1549 	}
1550 	rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1551 		np->netc_rnodes);
1552 	if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
1553 		error = EPERM;
1554 		goto out;
1555 	}
1556 	np->netc_exflags = argp->ex_flags;
1557 	np->netc_anon = argp->ex_anon;
1558 	np->netc_anon.cr_ref = 1;
1559 	return (0);
1560 out:
1561 	free(np, M_NETADDR);
1562 	return (error);
1563 }
1564 
1565 /* ARGSUSED */
1566 int
1567 vfs_free_netcred(rn, w)
1568 	struct radix_node *rn;
1569 	void *w;
1570 {
1571 	register struct radix_node_head *rnh = (struct radix_node_head *)w;
1572 
1573 	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
1574 	free((caddr_t)rn, M_NETADDR);
1575 	return (0);
1576 }
1577 
1578 /*
1579  * Free the net address hash lists that are hanging off the mount points.
1580  */
1581 void
1582 vfs_free_addrlist(nep)
1583 	struct netexport *nep;
1584 {
1585 	register int i;
1586 	register struct radix_node_head *rnh;
1587 
1588 	for (i = 0; i <= AF_MAX; i++)
1589 		if ((rnh = nep->ne_rtable[i]) != NULL) {
1590 			(*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
1591 			free((caddr_t)rnh, M_RTABLE);
1592 			nep->ne_rtable[i] = 0;
1593 		}
1594 }
1595 
1596 int
1597 vfs_export(mp, nep, argp)
1598 	struct mount *mp;
1599 	struct netexport *nep;
1600 	struct export_args *argp;
1601 {
1602 	int error;
1603 
1604 	if (argp->ex_flags & MNT_DELEXPORT) {
1605 		vfs_free_addrlist(nep);
1606 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1607 	}
1608 	if (argp->ex_flags & MNT_EXPORTED) {
1609 		if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
1610 			return (error);
1611 		mp->mnt_flag |= MNT_EXPORTED;
1612 	}
1613 	return (0);
1614 }
1615 
1616 struct netcred *
1617 vfs_export_lookup(mp, nep, nam)
1618 	register struct mount *mp;
1619 	struct netexport *nep;
1620 	struct mbuf *nam;
1621 {
1622 	register struct netcred *np;
1623 	register struct radix_node_head *rnh;
1624 	struct sockaddr *saddr;
1625 
1626 	np = NULL;
1627 	if (mp->mnt_flag & MNT_EXPORTED) {
1628 		/*
1629 		 * Lookup in the export list first.
1630 		 */
1631 		if (nam != NULL) {
1632 			saddr = mtod(nam, struct sockaddr *);
1633 			rnh = nep->ne_rtable[saddr->sa_family];
1634 			if (rnh != NULL) {
1635 				np = (struct netcred *)
1636 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
1637 					    rnh);
1638 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1639 					np = NULL;
1640 			}
1641 		}
1642 		/*
1643 		 * If no address match, use the default if it exists.
1644 		 */
1645 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1646 			np = &nep->ne_defexported;
1647 	}
1648 	return (np);
1649 }
1650 
1651 /*
1652  * Do the usual access checking.
1653  * file_mode, uid and gid are from the vnode in question,
1654  * while acc_mode and cred are from the VOP_ACCESS parameter list
1655  */
1656 int
1657 vaccess(file_mode, uid, gid, acc_mode, cred)
1658 	mode_t file_mode;
1659 	uid_t uid;
1660 	gid_t gid;
1661 	mode_t acc_mode;
1662 	struct ucred *cred;
1663 {
1664 	mode_t mask;
1665 
1666 	/* User id 0 always gets access. */
1667 	if (cred->cr_uid == 0)
1668 		return 0;
1669 
1670 	mask = 0;
1671 
1672 	/* Otherwise, check the owner. */
1673 	if (cred->cr_uid == uid) {
1674 		if (acc_mode & VEXEC)
1675 			mask |= S_IXUSR;
1676 		if (acc_mode & VREAD)
1677 			mask |= S_IRUSR;
1678 		if (acc_mode & VWRITE)
1679 			mask |= S_IWUSR;
1680 		return (file_mode & mask) == mask ? 0 : EACCES;
1681 	}
1682 
1683 	/* Otherwise, check the groups. */
1684 	if (cred->cr_gid == gid || groupmember(gid, cred)) {
1685 		if (acc_mode & VEXEC)
1686 			mask |= S_IXGRP;
1687 		if (acc_mode & VREAD)
1688 			mask |= S_IRGRP;
1689 		if (acc_mode & VWRITE)
1690 			mask |= S_IWGRP;
1691 		return (file_mode & mask) == mask ? 0 : EACCES;
1692 	}
1693 
1694 	/* Otherwise, check everyone else. */
1695 	if (acc_mode & VEXEC)
1696 		mask |= S_IXOTH;
1697 	if (acc_mode & VREAD)
1698 		mask |= S_IROTH;
1699 	if (acc_mode & VWRITE)
1700 		mask |= S_IWOTH;
1701 	return (file_mode & mask) == mask ? 0 : EACCES;
1702 }
1703 
1704 /*
1705  * Unmount all file systems.
1706  * We traverse the list in reverse order under the assumption that doing so
1707  * will avoid needing to worry about dependencies.
1708  */
1709 void
1710 vfs_unmountall()
1711 {
1712 	register struct mount *mp, *nmp;
1713 	int allerror, error, again = 1;
1714 
1715  retry:
1716 	allerror = 0;
1717 	for (mp = CIRCLEQ_LAST(&mountlist); mp != CIRCLEQ_END(&mountlist);
1718 	    mp = nmp) {
1719 		nmp = CIRCLEQ_PREV(mp, mnt_list);
1720 		if ((error = dounmount(mp, MNT_FORCE, curproc)) != 0) {
1721 			printf("unmount of %s failed with error %d\n",
1722 			    mp->mnt_stat.f_mntonname, error);
1723 			allerror = 1;
1724 		}
1725 	}
1726 
1727 	if (allerror) {
1728 		printf("WARNING: some file systems would not unmount\n");
1729 		if (again) {
1730 			printf("retrying\n");
1731 			again = 0;
1732 			goto retry;
1733 		}
1734 	}
1735 }
1736 
1737 /*
1738  * Sync and unmount file systems before shutting down.
1739  */
1740 void
1741 vfs_shutdown()
1742 {
1743 	/* XXX Should suspend scheduling. */
1744 	(void) spl0();
1745 
1746 	printf("syncing disks... ");
1747 
1748 	if (panicstr == 0) {
1749 		/* Sync before unmount, in case we hang on something. */
1750 		sys_sync(&proc0, (void *)0, (register_t *)0);
1751 
1752 		/* Unmount file systems. */
1753 		vfs_unmountall();
1754 	}
1755 
1756 	if (vfs_syncwait(1))
1757 		printf("giving up\n");
1758 	else
1759 		printf("done\n");
1760 }
1761 
1762 /*
1763  * perform sync() operation and wait for buffers to flush.
1764  * assumtions: called w/ scheduler disabled and physical io enabled
1765  * for now called at spl0() XXX
1766  */
1767 int
1768 vfs_syncwait(verbose)
1769 	int verbose;
1770 {
1771 	register struct buf *bp;
1772 	int iter, nbusy, dcount, s;
1773 	struct proc *p;
1774 
1775 	p = curproc? curproc : &proc0;
1776 	sys_sync(p, (void *)0, (register_t *)0);
1777 
1778 	/* Wait for sync to finish. */
1779 	dcount = 10000;
1780 	for (iter = 0; iter < 20; iter++) {
1781 		nbusy = 0;
1782 		for (bp = &buf[nbuf]; --bp >= buf; ) {
1783 			if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
1784 				nbusy++;
1785 			/*
1786 			 * With soft updates, some buffers that are
1787 			 * written will be remarked as dirty until other
1788 			 * buffers are written.
1789 			 */
1790 			if (bp->b_flags & B_DELWRI) {
1791 				s = splbio();
1792 				bremfree(bp);
1793 				bp->b_flags |= B_BUSY;
1794 				splx(s);
1795 				nbusy++;
1796 				bawrite(bp);
1797 				if (dcount-- <= 0) {
1798 					if (verbose)
1799 						printf("softdep ");
1800 					return 1;
1801 				}
1802 			}
1803 		}
1804 		if (nbusy == 0)
1805 			break;
1806 		if (verbose)
1807 			printf("%d ", nbusy);
1808 		DELAY(40000 * iter);
1809 	}
1810 
1811 	return nbusy;
1812 }
1813 
1814 /*
1815  * posix file system related system variables.
1816  */
1817 int
1818 fs_posix_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1819 	int *name;
1820 	u_int namelen;
1821 	void *oldp;
1822 	size_t *oldlenp;
1823 	void *newp;
1824 	size_t newlen;
1825 	struct proc *p;
1826 {
1827 	/* all sysctl names at this level are terminal */
1828 	if (namelen != 1)
1829 		return (ENOTDIR);
1830 
1831 	switch (name[0]) {
1832 	case FS_POSIX_SETUID:
1833 		if (newp && securelevel > 0)
1834 			return (EPERM);
1835 		return(sysctl_int(oldp, oldlenp, newp, newlen, &suid_clear));
1836 	default:
1837 		return (EOPNOTSUPP);
1838 	}
1839 	/* NOTREACHED */
1840 }
1841 
1842 /*
1843  * file system related system variables.
1844  */
1845 int
1846 fs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1847 	int *name;
1848 	u_int namelen;
1849 	void *oldp;
1850 	size_t *oldlenp;
1851 	void *newp;
1852 	size_t newlen;
1853 	struct proc *p;
1854 {
1855 	sysctlfn *fn;
1856 
1857 	switch (name[0]) {
1858 	case FS_POSIX:
1859 		fn = fs_posix_sysctl;
1860 		break;
1861 	default:
1862 		return (EOPNOTSUPP);
1863 	}
1864 	return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p);
1865 }
1866 
1867 
1868 /*
1869  * Routines dealing with vnodes and buffers
1870  */
1871 
1872 /*
1873  * Wait for all outstanding I/Os to complete
1874  *
1875  * Manipulates v_numoutput. Must be called at splbio()
1876  */
1877 int
1878 vwaitforio(vp, slpflag, wmesg, timeo)
1879 	struct vnode *vp;
1880 	int slpflag, timeo;
1881 	char *wmesg;
1882 {
1883 	int error = 0;
1884 
1885 	while (vp->v_numoutput) {
1886 		vp->v_bioflag |= VBIOWAIT;
1887 		error = tsleep((caddr_t)&vp->v_numoutput,
1888 		    slpflag | (PRIBIO + 1), wmesg, timeo);
1889 		if (error)
1890 			break;
1891 	}
1892 
1893 	return (error);
1894 }
1895 
1896 
1897 /*
1898  * Update outstanding I/O count and do wakeup if requested.
1899  *
1900  * Manipulates v_numoutput. Must be called at splbio()
1901  */
1902 void
1903 vwakeup(vp)
1904 	struct vnode *vp;
1905 {
1906 	if (vp != NULL) {
1907 		if (vp->v_numoutput-- == 0)
1908 			panic("vwakeup: neg numoutput");
1909 		if ((vp->v_bioflag & VBIOWAIT) && vp->v_numoutput == 0) {
1910 			vp->v_bioflag &= ~VBIOWAIT;
1911 			wakeup((caddr_t)&vp->v_numoutput);
1912 		}
1913 	}
1914 }
1915 
1916 /*
1917  * Flush out and invalidate all buffers associated with a vnode.
1918  * Called with the underlying object locked.
1919  */
1920 int
1921 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
1922 	register struct vnode *vp;
1923 	int flags;
1924 	struct ucred *cred;
1925 	struct proc *p;
1926 	int slpflag, slptimeo;
1927 {
1928 	register struct buf *bp;
1929 	struct buf *nbp, *blist;
1930 	int s, error;
1931 
1932 	if (flags & V_SAVE) {
1933 		s = splbio();
1934 		vwaitforio(vp, 0, "vinvalbuf", 0);
1935 		if (vp->v_dirtyblkhd.lh_first != NULL) {
1936 			splx(s);
1937 			if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
1938 				return (error);
1939 			s = splbio();
1940 			if (vp->v_numoutput > 0 ||
1941 			    vp->v_dirtyblkhd.lh_first != NULL)
1942 				panic("vinvalbuf: dirty bufs");
1943 		}
1944 		splx(s);
1945 	}
1946 loop:
1947 	s = splbio();
1948 	for (;;) {
1949 		if ((blist = vp->v_cleanblkhd.lh_first) &&
1950 		    (flags & V_SAVEMETA))
1951 			while (blist && blist->b_lblkno < 0)
1952 				blist = blist->b_vnbufs.le_next;
1953 		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
1954 		    (flags & V_SAVEMETA))
1955 			while (blist && blist->b_lblkno < 0)
1956 				blist = blist->b_vnbufs.le_next;
1957 		if (!blist)
1958 			break;
1959 
1960 		for (bp = blist; bp; bp = nbp) {
1961 			nbp = bp->b_vnbufs.le_next;
1962 			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
1963 				continue;
1964 			if (bp->b_flags & B_BUSY) {
1965 				bp->b_flags |= B_WANTED;
1966 				error = tsleep((caddr_t)bp,
1967 					slpflag | (PRIBIO + 1), "vinvalbuf",
1968 					slptimeo);
1969 				if (error) {
1970 					splx(s);
1971 					return (error);
1972 				}
1973 				break;
1974 			}
1975 			bp->b_flags |= B_BUSY | B_VFLUSH;
1976 			/*
1977 			 * XXX Since there are no node locks for NFS, I believe
1978 			 * there is a slight chance that a delayed write will
1979 			 * occur while sleeping just above, so check for it.
1980 			 */
1981 			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
1982 				splx(s);
1983 				(void) VOP_BWRITE(bp);
1984 				goto loop;
1985 			}
1986 			bp->b_flags |= B_INVAL;
1987 			brelse(bp);
1988 		}
1989 	}
1990 	if (!(flags & V_SAVEMETA) &&
1991 	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
1992 		panic("vinvalbuf: flush failed");
1993 	splx(s);
1994 	return (0);
1995 }
1996 
1997 void
1998 vflushbuf(vp, sync)
1999 	register struct vnode *vp;
2000 	int sync;
2001 {
2002 	register struct buf *bp, *nbp;
2003 	int s;
2004 
2005 loop:
2006 	s = splbio();
2007 	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
2008 		nbp = bp->b_vnbufs.le_next;
2009 		if ((bp->b_flags & B_BUSY))
2010 			continue;
2011 		if ((bp->b_flags & B_DELWRI) == 0)
2012 			panic("vflushbuf: not dirty");
2013 		bp->b_flags |= B_BUSY | B_VFLUSH;
2014 		splx(s);
2015 		/*
2016 		 * Wait for I/O associated with indirect blocks to complete,
2017 		 * since there is no way to quickly wait for them below.
2018 		 */
2019 		if (bp->b_vp == vp || sync == 0)
2020 			(void) bawrite(bp);
2021 		else
2022 			(void) bwrite(bp);
2023 		goto loop;
2024 	}
2025 	if (sync == 0) {
2026 		splx(s);
2027 		return;
2028 	}
2029 	vwaitforio(vp, 0, "vflushbuf", 0);
2030 	if (vp->v_dirtyblkhd.lh_first != NULL) {
2031 		splx(s);
2032 		vprint("vflushbuf: dirty", vp);
2033 		goto loop;
2034 	}
2035 	splx(s);
2036 }
2037 
2038 /*
2039  * Associate a buffer with a vnode.
2040  *
2041  * Manipulates buffer vnode queues. Must be called at splbio().
2042  */
2043 void
2044 bgetvp(vp, bp)
2045 	register struct vnode *vp;
2046 	register struct buf *bp;
2047 {
2048 
2049 	if (bp->b_vp)
2050 		panic("bgetvp: not free");
2051 	vhold(vp);
2052 	bp->b_vp = vp;
2053 	if (vp->v_type == VBLK || vp->v_type == VCHR)
2054 		bp->b_dev = vp->v_rdev;
2055 	else
2056 		bp->b_dev = NODEV;
2057 	/*
2058 	 * Insert onto list for new vnode.
2059 	 */
2060 	bufinsvn(bp, &vp->v_cleanblkhd);
2061 }
2062 
2063 /*
2064  * Disassociate a buffer from a vnode.
2065  *
2066  * Manipulates vnode buffer queues. Must be called at splbio().
2067  */
2068 void
2069 brelvp(bp)
2070 	register struct buf *bp;
2071 {
2072 	struct vnode *vp;
2073 
2074 	if ((vp = bp->b_vp) == (struct vnode *) 0)
2075 		panic("brelvp: NULL");
2076 	/*
2077 	 * Delete from old vnode list, if on one.
2078 	 */
2079 	if (bp->b_vnbufs.le_next != NOLIST)
2080 		bufremvn(bp);
2081 	if ((vp->v_bioflag & VBIOONSYNCLIST) &&
2082 	    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2083 		vp->v_bioflag &= ~VBIOONSYNCLIST;
2084 		LIST_REMOVE(vp, v_synclist);
2085 	}
2086 	bp->b_vp = (struct vnode *) 0;
2087 
2088 	simple_lock(&vp->v_interlock);
2089 #ifdef DIAGNOSTIC
2090 	if (vp->v_holdcnt == 0)
2091 		panic("brelvp: holdcnt");
2092 #endif
2093 	vp->v_holdcnt--;
2094 
2095 	/*
2096 	 * If it is on the holdlist and the hold count drops to
2097 	 * zero, move it to the free list.
2098 	 */
2099 	if ((vp->v_bioflag & VBIOONFREELIST) &&
2100 	    vp->v_holdcnt == 0 && vp->v_usecount == 0) {
2101 		simple_lock(&vnode_free_list_slock);
2102 		TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
2103 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
2104 		simple_unlock(&vnode_free_list_slock);
2105 	}
2106 	simple_unlock(&vp->v_interlock);
2107 }
2108 
2109 /*
2110  * Replaces the current vnode associated with the buffer, if any
2111  * with a new vnode.
2112  *
2113  * If an output I/O is pending on the buffer, the old vnode is
2114  * I/O count is adjusted.
2115  *
2116  * Ignores vnode buffer queues. Must be called at splbio().
2117  */
2118 void
2119 buf_replacevnode(bp, newvp)
2120 	struct buf *bp;
2121 	struct vnode *newvp;
2122 {
2123 	struct vnode *oldvp = bp->b_vp;
2124 
2125 	if (oldvp)
2126 		brelvp(bp);
2127 
2128 	if ((bp->b_flags & (B_READ | B_DONE)) == 0) {
2129 		newvp->v_numoutput++;	/* put it on swapdev */
2130 		vwakeup(oldvp);
2131 	}
2132 
2133 	bgetvp(newvp, bp);
2134 	bufremvn(bp);
2135 }
2136 
2137 /*
2138  * Used to assign buffers to the appropriate clean or dirty list on
2139  * the vnode and to add newly dirty vnodes to the appropriate
2140  * filesystem syncer list.
2141  *
2142  * Manipulates vnode buffer queues. Must be called at splbio().
2143  */
2144 void
2145 reassignbuf(bp)
2146 	register struct buf *bp;
2147 {
2148 	struct buflists *listheadp;
2149 	int delay;
2150 	struct vnode *vp = bp->b_vp;
2151 
2152 	if (vp == NULL) {
2153 		printf("reassignbuf: NULL");
2154 		return;
2155 	}
2156 	/*
2157 	 * Delete from old vnode list, if on one.
2158 	 */
2159 	if (bp->b_vnbufs.le_next != NOLIST)
2160 		bufremvn(bp);
2161 	/*
2162 	 * If dirty, put on list of dirty buffers;
2163 	 * otherwise insert onto list of clean buffers.
2164 	 */
2165 	if ((bp->b_flags & B_DELWRI) == 0) {
2166 		listheadp = &vp->v_cleanblkhd;
2167 		if ((vp->v_bioflag & VBIOONSYNCLIST) &&
2168 		    LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
2169 			vp->v_bioflag &= ~VBIOONSYNCLIST;
2170 			LIST_REMOVE(vp, v_synclist);
2171 		}
2172 	} else {
2173 		listheadp = &vp->v_dirtyblkhd;
2174 		if ((vp->v_bioflag & VBIOONSYNCLIST) == 0) {
2175 			switch (vp->v_type) {
2176 			case VDIR:
2177 				delay = syncdelay / 2;
2178 				break;
2179 			case VBLK:
2180 				if (vp->v_specmountpoint != NULL) {
2181 					delay = syncdelay / 3;
2182 					break;
2183 				}
2184 				/* fall through */
2185 			default:
2186 				delay = syncdelay;
2187 			}
2188 			vn_syncer_add_to_worklist(vp, delay);
2189 		}
2190 	}
2191 	bufinsvn(bp, listheadp);
2192 }
2193 
2194 int
2195 vfs_register(vfs)
2196 	struct vfsconf *vfs;
2197 {
2198 	struct vfsconf *vfsp;
2199 	struct vfsconf **vfspp;
2200 
2201 #ifdef DIAGNOSTIC
2202 	/* Paranoia? */
2203 	if (vfs->vfc_refcount != 0)
2204 		printf("vfs_register called with vfc_refcount > 0\n");
2205 #endif
2206 
2207 	/* Check if filesystem already known */
2208 	for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2209 	    vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next)
2210 		if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2211 			return (EEXIST);
2212 
2213 	if (vfs->vfc_typenum > maxvfsconf)
2214 		maxvfsconf = vfs->vfc_typenum;
2215 
2216 	vfs->vfc_next = NULL;
2217 
2218 	/* Add to the end of the list */
2219 	*vfspp = vfs;
2220 
2221 	/* Call vfs_init() */
2222 	if (vfs->vfc_vfsops->vfs_init)
2223 		(*(vfs->vfc_vfsops->vfs_init))(vfs);
2224 
2225 	return 0;
2226 }
2227 
2228 int
2229 vfs_unregister(vfs)
2230 	struct vfsconf *vfs;
2231 {
2232 	struct vfsconf *vfsp;
2233 	struct vfsconf **vfspp;
2234 	int maxtypenum;
2235 
2236 	/* Find our vfsconf struct */
2237 	for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
2238 	    vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next) {
2239 		if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
2240 			break;
2241 	}
2242 
2243 	if (!vfsp)			/* Not found */
2244 		return (ENOENT);
2245 
2246 	if (vfsp->vfc_refcount)		/* In use */
2247 		return (EBUSY);
2248 
2249 	/* Remove from list and free */
2250 	*vfspp = vfsp->vfc_next;
2251 
2252 	maxtypenum = 0;
2253 
2254 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2255 		if (vfsp->vfc_typenum > maxtypenum)
2256 			maxtypenum = vfsp->vfc_typenum;
2257 
2258 	maxvfsconf = maxtypenum;
2259 	return 0;
2260 }
2261 
2262 /*
2263  * Check if vnode represents a disk device
2264  */
2265 int
2266 vn_isdisk(vp, errp)
2267 	struct vnode *vp;
2268 	int *errp;
2269 {
2270 	if (vp->v_type != VBLK && vp->v_type != VCHR)
2271 		return (0);
2272 
2273 	return (1);
2274 }
2275