xref: /netbsd-src/sys/kern/vfs_subr.c (revision 7c7c171d130af9949261bc7dce2150a03c3d239c)
1 /*	$NetBSD: vfs_subr.c,v 1.83 1998/03/04 09:13:48 fvdl Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Copyright (c) 1989, 1993
42  *	The Regents of the University of California.  All rights reserved.
43  * (c) UNIX System Laboratories, Inc.
44  * All or some portions of this file are derived from material licensed
45  * to the University of California by American Telephone and Telegraph
46  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47  * the permission of UNIX System Laboratories, Inc.
48  *
49  * Redistribution and use in source and binary forms, with or without
50  * modification, are permitted provided that the following conditions
51  * are met:
52  * 1. Redistributions of source code must retain the above copyright
53  *    notice, this list of conditions and the following disclaimer.
54  * 2. Redistributions in binary form must reproduce the above copyright
55  *    notice, this list of conditions and the following disclaimer in the
56  *    documentation and/or other materials provided with the distribution.
57  * 3. All advertising materials mentioning features or use of this software
58  *    must display the following acknowledgement:
59  *	This product includes software developed by the University of
60  *	California, Berkeley and its contributors.
61  * 4. Neither the name of the University nor the names of its contributors
62  *    may be used to endorse or promote products derived from this software
63  *    without specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
66  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
69  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
70  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
71  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
72  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
73  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
74  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
75  * SUCH DAMAGE.
76  *
77  *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
78  */
79 
80 /*
81  * External virtual filesystem routines
82  */
83 
84 #include "opt_uvm.h"
85 
86 #include <sys/param.h>
87 #include <sys/systm.h>
88 #include <sys/proc.h>
89 #include <sys/mount.h>
90 #include <sys/time.h>
91 #include <sys/fcntl.h>
92 #include <sys/vnode.h>
93 #include <sys/stat.h>
94 #include <sys/namei.h>
95 #include <sys/ucred.h>
96 #include <sys/buf.h>
97 #include <sys/errno.h>
98 #include <sys/malloc.h>
99 #include <sys/domain.h>
100 #include <sys/mbuf.h>
101 #include <sys/syscallargs.h>
102 #include <sys/device.h>
103 #include <sys/dirent.h>
104 
105 #include <vm/vm.h>
106 #include <sys/sysctl.h>
107 
108 #include <miscfs/specfs/specdev.h>
109 
110 #if defined(UVM)
111 #include <uvm/uvm_extern.h>
112 #endif
113 
114 enum vtype iftovt_tab[16] = {
115 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
116 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
117 };
118 int	vttoif_tab[9] = {
119 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
120 	S_IFSOCK, S_IFIFO, S_IFMT,
121 };
122 
123 int doforce = 1;		/* 1 => permit forcible unmounting */
124 int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
125 
126 /*
127  * Insq/Remq for the vnode usage lists.
128  */
129 #define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
130 #define	bufremvn(bp) {							\
131 	LIST_REMOVE(bp, b_vnbufs);					\
132 	(bp)->b_vnbufs.le_next = NOLIST;				\
133 }
134 TAILQ_HEAD(freelst, vnode) vnode_free_list =	/* vnode free list */
135     TAILQ_HEAD_INITIALIZER(vnode_free_list);
136 struct mntlist mountlist =			/* mounted filesystem list */
137     CIRCLEQ_HEAD_INITIALIZER(mountlist);
138 struct vfs_list_head vfs_list =			/* vfs list */
139 	 LIST_HEAD_INITIALIZER(vfs_list);
140 
141 struct nfs_public nfs_pub;			/* publicly exported FS */
142 
143 struct simplelock mountlist_slock;
144 static struct simplelock mntid_slock;
145 struct simplelock mntvnode_slock;
146 struct simplelock vnode_free_list_slock;
147 struct simplelock spechash_slock;
148 
149 /*
150  * These define the root filesystem and device.
151  */
152 struct mount *rootfs;
153 struct vnode *rootvnode;
154 struct device *root_device;			/* root device */
155 
156 struct mount *vfs_getvfs __P((fsid_t *));
157 void vattr_null __P((struct vattr *));
158 int getnewvnode __P((enum vtagtype, struct mount *, int (**)(void *),
159 		     struct vnode **));
160 void insmntque __P((struct vnode *, struct mount *));
161 int vinvalbuf __P((struct vnode *, int, struct ucred *, struct proc *, int,
162 		   int));
163 void vflushbuf __P((struct vnode *, int));
164 void brelvp __P((struct buf *));
165 int bdevvp __P((dev_t, struct vnode **));
166 int cdevvp __P((dev_t, struct vnode **));
167 int getdevvp __P((dev_t, struct vnode **, enum vtype));
168 struct vnode *checkalias __P((struct vnode *, dev_t, struct mount *));
169 void vput __P((struct vnode *));
170 void vrele __P((struct vnode *));
171 int vflush __P((struct mount *, struct vnode *, int));
172 void vgoneall __P((struct vnode *));
173 void vgone __P((struct vnode *));
174 void vgonel __P((struct vnode *vp, struct proc *p));
175 int vcount __P((struct vnode *));
176 void vprint __P((char *, struct vnode *));
177 int vfs_mountedon __P((struct vnode *));
178 int vfs_export __P((struct mount *, struct netexport *, struct export_args *));
179 struct netcred *vfs_export_lookup __P((struct mount *, struct netexport *,
180 				       struct mbuf *));
181 int vaccess __P((enum vtype, mode_t, uid_t, gid_t, mode_t, struct ucred *));
182 void vfs_unmountall __P((void));
183 void vfs_shutdown __P((void));
184 
185 static int vfs_hang_addrlist __P((struct mount *, struct netexport *,
186 				  struct export_args *));
187 static int vfs_free_netcred __P((struct radix_node *, void *));
188 static void vfs_free_addrlist __P((struct netexport *));
189 
190 #ifdef DEBUG
191 void printlockedvnodes __P((void));
192 #endif
193 
194 /*
195  * Initialize the vnode management data structures.
196  */
197 void
198 vntblinit()
199 {
200 
201 	simple_lock_init(&mntvnode_slock);
202 	simple_lock_init(&mntid_slock);
203 	simple_lock_init(&spechash_slock);
204 	simple_lock_init(&vnode_free_list_slock);
205 }
206 
207 /*
208  * Mark a mount point as busy. Used to synchronize access and to delay
209  * unmounting. Interlock is not released on failure.
210  */
211 int
212 vfs_busy(mp, flags, interlkp)
213 	struct mount *mp;
214 	int flags;
215 	struct simplelock *interlkp;
216 {
217 	int lkflags;
218 
219 	if (mp->mnt_flag & MNT_UNMOUNT) {
220 		if (flags & LK_NOWAIT)
221 			return (ENOENT);
222 		mp->mnt_flag |= MNT_MWAIT;
223 		if (interlkp)
224 			simple_unlock(interlkp);
225 		/*
226 		 * Since all busy locks are shared except the exclusive
227 		 * lock granted when unmounting, the only place that a
228 		 * wakeup needs to be done is at the release of the
229 		 * exclusive lock at the end of dounmount.
230 		 */
231 		sleep((caddr_t)mp, PVFS);
232 		if (interlkp)
233 			simple_lock(interlkp);
234 		return (ENOENT);
235 	}
236 	lkflags = LK_SHARED;
237 	if (interlkp)
238 		lkflags |= LK_INTERLOCK;
239 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
240 		panic("vfs_busy: unexpected lock failure");
241 	return (0);
242 }
243 
244 /*
245  * Free a busy filesystem.
246  */
247 void
248 vfs_unbusy(mp)
249 	struct mount *mp;
250 {
251 
252 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
253 }
254 
255 /*
256  * Lookup a filesystem type, and if found allocate and initialize
257  * a mount structure for it.
258  *
259  * Devname is usually updated by mount(8) after booting.
260  */
261 int
262 vfs_rootmountalloc(fstypename, devname, mpp)
263 	char *fstypename;
264 	char *devname;
265 	struct mount **mpp;
266 {
267 	struct vfsops *vfsp = NULL;
268 	struct mount *mp;
269 
270 	for (vfsp = LIST_FIRST(&vfs_list); vfsp != NULL;
271 	     vfsp = LIST_NEXT(vfsp, vfs_list))
272 		if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN))
273 			break;
274 
275 	if (vfsp == NULL)
276 		return (ENODEV);
277 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
278 	bzero((char *)mp, (u_long)sizeof(struct mount));
279 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
280 	(void)vfs_busy(mp, LK_NOWAIT, 0);
281 	LIST_INIT(&mp->mnt_vnodelist);
282 	mp->mnt_op = vfsp;
283 	mp->mnt_flag = MNT_RDONLY;
284 	mp->mnt_vnodecovered = NULLVP;
285 	vfsp->vfs_refcount++;
286 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN);
287 	mp->mnt_stat.f_mntonname[0] = '/';
288 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
289 	*mpp = mp;
290 	return (0);
291 }
292 
293 /*
294  * Lookup a mount point by filesystem identifier.
295  */
296 struct mount *
297 vfs_getvfs(fsid)
298 	fsid_t *fsid;
299 {
300 	register struct mount *mp;
301 
302 	simple_lock(&mountlist_slock);
303 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
304 	     mp = mp->mnt_list.cqe_next) {
305 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
306 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
307 			simple_unlock(&mountlist_slock);
308 			return (mp);
309 		}
310 	}
311 	simple_unlock(&mountlist_slock);
312 	return ((struct mount *)0);
313 }
314 
315 /*
316  * Get a new unique fsid
317  */
318 void
319 vfs_getnewfsid(mp, fstypename)
320 	struct mount *mp;
321 	char *fstypename;
322 {
323 	static u_short xxxfs_mntid;
324 	fsid_t tfsid;
325 	int mtype;
326 
327 	simple_lock(&mntid_slock);
328 	mtype = makefstype(fstypename);
329 	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
330 	mp->mnt_stat.f_fsid.val[1] = mtype;
331 	if (xxxfs_mntid == 0)
332 		++xxxfs_mntid;
333 	tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid);
334 	tfsid.val[1] = mtype;
335 	if (mountlist.cqh_first != (void *)&mountlist) {
336 		while (vfs_getvfs(&tfsid)) {
337 			tfsid.val[0]++;
338 			xxxfs_mntid++;
339 		}
340 	}
341 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
342 	simple_unlock(&mntid_slock);
343 }
344 
345 /*
346  * Make a 'unique' number from a mount type name.
347  */
348 long
349 makefstype(type)
350 	char *type;
351 {
352 	long rv;
353 
354 	for (rv = 0; *type; type++) {
355 		rv <<= 2;
356 		rv ^= *type;
357 	}
358 	return rv;
359 }
360 
361 
362 /*
363  * Set vnode attributes to VNOVAL
364  */
365 void
366 vattr_null(vap)
367 	register struct vattr *vap;
368 {
369 
370 	vap->va_type = VNON;
371 
372 	/*
373 	 * Assign individually so that it is safe even if size and
374 	 * sign of each member are varied.
375 	 */
376 	vap->va_mode = VNOVAL;
377 	vap->va_nlink = VNOVAL;
378 	vap->va_uid = VNOVAL;
379 	vap->va_gid = VNOVAL;
380 	vap->va_fsid = VNOVAL;
381 	vap->va_fileid = VNOVAL;
382 	vap->va_size = VNOVAL;
383 	vap->va_blocksize = VNOVAL;
384 	vap->va_atime.tv_sec =
385 	    vap->va_mtime.tv_sec =
386 	    vap->va_ctime.tv_sec = VNOVAL;
387 	vap->va_atime.tv_nsec =
388 	    vap->va_mtime.tv_nsec =
389 	    vap->va_ctime.tv_nsec = VNOVAL;
390 	vap->va_gen = VNOVAL;
391 	vap->va_flags = VNOVAL;
392 	vap->va_rdev = VNOVAL;
393 	vap->va_bytes = VNOVAL;
394 	vap->va_vaflags = 0;
395 }
396 
397 /*
398  * Routines having to do with the management of the vnode table.
399  */
400 extern int (**dead_vnodeop_p) __P((void *));
401 long numvnodes;
402 
403 /*
404  * Return the next vnode from the free list.
405  */
406 int
407 getnewvnode(tag, mp, vops, vpp)
408 	enum vtagtype tag;
409 	struct mount *mp;
410 	int (**vops) __P((void *));
411 	struct vnode **vpp;
412 {
413 	struct proc *p = curproc;	/* XXX */
414 	struct vnode *vp;
415 #ifdef DIAGNOSTIC
416 	int s;
417 #endif
418 
419 	simple_lock(&vnode_free_list_slock);
420 	if ((vnode_free_list.tqh_first == NULL &&
421 	     numvnodes < 2 * desiredvnodes) ||
422 	    numvnodes < desiredvnodes) {
423 		simple_unlock(&vnode_free_list_slock);
424 		vp = (struct vnode *)malloc((u_long)sizeof *vp,
425 		    M_VNODE, M_WAITOK);
426 		bzero((char *)vp, sizeof *vp);
427 		numvnodes++;
428 	} else {
429 		for (vp = vnode_free_list.tqh_first;
430 				vp != NULLVP; vp = vp->v_freelist.tqe_next) {
431 			if (simple_lock_try(&vp->v_interlock))
432 				break;
433 		}
434 		/*
435 		 * Unless this is a bad time of the month, at most
436 		 * the first NCPUS items on the free list are
437 		 * locked, so this is close enough to being empty.
438 		 */
439 		if (vp == NULLVP) {
440 			simple_unlock(&vnode_free_list_slock);
441 			tablefull("vnode");
442 			*vpp = 0;
443 			return (ENFILE);
444 		}
445 		if (vp->v_usecount)
446 			panic("free vnode isn't");
447 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
448 		/* see comment on why 0xdeadb is set at end of vgone (below) */
449 		vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
450 		simple_unlock(&vnode_free_list_slock);
451 		vp->v_lease = NULL;
452 		if (vp->v_type != VBAD)
453 			vgonel(vp, p);
454 		else
455 			simple_unlock(&vp->v_interlock);
456 #ifdef DIAGNOSTIC
457 		if (vp->v_data)
458 			panic("cleaned vnode isn't");
459 		s = splbio();
460 		if (vp->v_numoutput)
461 			panic("Clean vnode has pending I/O's");
462 		splx(s);
463 #endif
464 		vp->v_flag = 0;
465 		vp->v_lastr = 0;
466 		vp->v_ralen = 0;
467 		vp->v_maxra = 0;
468 		vp->v_lastw = 0;
469 		vp->v_lasta = 0;
470 		vp->v_cstart = 0;
471 		vp->v_clen = 0;
472 		vp->v_socket = 0;
473 	}
474 	vp->v_type = VNON;
475 	cache_purge(vp);
476 	vp->v_tag = tag;
477 	vp->v_op = vops;
478 	insmntque(vp, mp);
479 	*vpp = vp;
480 	vp->v_usecount = 1;
481 	vp->v_data = 0;
482 #ifdef UVM
483 	simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
484 #endif
485 	return (0);
486 }
487 
488 /*
489  * Move a vnode from one mount queue to another.
490  */
491 void
492 insmntque(vp, mp)
493 	register struct vnode *vp;
494 	register struct mount *mp;
495 {
496 
497 	simple_lock(&mntvnode_slock);
498 	/*
499 	 * Delete from old mount point vnode list, if on one.
500 	 */
501 	if (vp->v_mount != NULL)
502 		LIST_REMOVE(vp, v_mntvnodes);
503 	/*
504 	 * Insert into list of vnodes for the new mount point, if available.
505 	 */
506 	if ((vp->v_mount = mp) != NULL)
507 		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
508 	simple_unlock(&mntvnode_slock);
509 }
510 
511 /*
512  * Update outstanding I/O count and do wakeup if requested.
513  */
514 void
515 vwakeup(bp)
516 	register struct buf *bp;
517 {
518 	register struct vnode *vp;
519 
520 	bp->b_flags &= ~B_WRITEINPROG;
521 	if ((vp = bp->b_vp) != NULL) {
522 		if (--vp->v_numoutput < 0)
523 			panic("vwakeup: neg numoutput");
524 		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
525 			vp->v_flag &= ~VBWAIT;
526 			wakeup((caddr_t)&vp->v_numoutput);
527 		}
528 	}
529 }
530 
531 /*
532  * Flush out and invalidate all buffers associated with a vnode.
533  * Called with the underlying object locked.
534  */
535 int
536 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
537 	register struct vnode *vp;
538 	int flags;
539 	struct ucred *cred;
540 	struct proc *p;
541 	int slpflag, slptimeo;
542 {
543 	register struct buf *bp;
544 	struct buf *nbp, *blist;
545 	int s, error;
546 
547 	if (flags & V_SAVE) {
548 		if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
549 			return (error);
550 		if (vp->v_dirtyblkhd.lh_first != NULL)
551 			panic("vinvalbuf: dirty bufs");
552 	}
553 	for (;;) {
554 		if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
555 			while (blist && blist->b_lblkno < 0)
556 				blist = blist->b_vnbufs.le_next;
557 		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
558 		    (flags & V_SAVEMETA))
559 			while (blist && blist->b_lblkno < 0)
560 				blist = blist->b_vnbufs.le_next;
561 		if (!blist)
562 			break;
563 
564 		for (bp = blist; bp; bp = nbp) {
565 			nbp = bp->b_vnbufs.le_next;
566 			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
567 				continue;
568 			s = splbio();
569 			if (bp->b_flags & B_BUSY) {
570 				bp->b_flags |= B_WANTED;
571 				error = tsleep((caddr_t)bp,
572 					slpflag | (PRIBIO + 1), "vinvalbuf",
573 					slptimeo);
574 				splx(s);
575 				if (error)
576 					return (error);
577 				break;
578 			}
579 			bp->b_flags |= B_BUSY | B_VFLUSH;
580 			splx(s);
581 			/*
582 			 * XXX Since there are no node locks for NFS, I believe
583 			 * there is a slight chance that a delayed write will
584 			 * occur while sleeping just above, so check for it.
585 			 */
586 			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
587 				(void) VOP_BWRITE(bp);
588 				break;
589 			}
590 			bp->b_flags |= B_INVAL;
591 			brelse(bp);
592 		}
593 	}
594 	if (!(flags & V_SAVEMETA) &&
595 	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
596 		panic("vinvalbuf: flush failed");
597 	return (0);
598 }
599 
600 void
601 vflushbuf(vp, sync)
602 	register struct vnode *vp;
603 	int sync;
604 {
605 	register struct buf *bp, *nbp;
606 	int s;
607 
608 loop:
609 	s = splbio();
610 	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
611 		nbp = bp->b_vnbufs.le_next;
612 		if ((bp->b_flags & B_BUSY))
613 			continue;
614 		if ((bp->b_flags & B_DELWRI) == 0)
615 			panic("vflushbuf: not dirty");
616 		bp->b_flags |= B_BUSY | B_VFLUSH;
617 		splx(s);
618 		/*
619 		 * Wait for I/O associated with indirect blocks to complete,
620 		 * since there is no way to quickly wait for them below.
621 		 */
622 		if (bp->b_vp == vp || sync == 0)
623 			(void) bawrite(bp);
624 		else
625 			(void) bwrite(bp);
626 		goto loop;
627 	}
628 	if (sync == 0) {
629 		splx(s);
630 		return;
631 	}
632 	while (vp->v_numoutput) {
633 		vp->v_flag |= VBWAIT;
634 		tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0);
635 	}
636 	splx(s);
637 	if (vp->v_dirtyblkhd.lh_first != NULL) {
638 		vprint("vflushbuf: dirty", vp);
639 		goto loop;
640 	}
641 }
642 
643 /*
644  * Associate a buffer with a vnode.
645  */
646 void
647 bgetvp(vp, bp)
648 	register struct vnode *vp;
649 	register struct buf *bp;
650 {
651 
652 	if (bp->b_vp)
653 		panic("bgetvp: not free");
654 	VHOLD(vp);
655 	bp->b_vp = vp;
656 	if (vp->v_type == VBLK || vp->v_type == VCHR)
657 		bp->b_dev = vp->v_rdev;
658 	else
659 		bp->b_dev = NODEV;
660 	/*
661 	 * Insert onto list for new vnode.
662 	 */
663 	bufinsvn(bp, &vp->v_cleanblkhd);
664 }
665 
666 /*
667  * Disassociate a buffer from a vnode.
668  */
669 void
670 brelvp(bp)
671 	register struct buf *bp;
672 {
673 	struct vnode *vp;
674 
675 	if (bp->b_vp == (struct vnode *) 0)
676 		panic("brelvp: NULL");
677 	/*
678 	 * Delete from old vnode list, if on one.
679 	 */
680 	if (bp->b_vnbufs.le_next != NOLIST)
681 		bufremvn(bp);
682 	vp = bp->b_vp;
683 	bp->b_vp = (struct vnode *) 0;
684 	HOLDRELE(vp);
685 }
686 
687 /*
688  * Reassign a buffer from one vnode to another.
689  * Used to assign file specific control information
690  * (indirect blocks) to the vnode to which they belong.
691  */
692 void
693 reassignbuf(bp, newvp)
694 	register struct buf *bp;
695 	register struct vnode *newvp;
696 {
697 	register struct buflists *listheadp;
698 
699 	if (newvp == NULL) {
700 		printf("reassignbuf: NULL");
701 		return;
702 	}
703 	/*
704 	 * Delete from old vnode list, if on one.
705 	 */
706 	if (bp->b_vnbufs.le_next != NOLIST)
707 		bufremvn(bp);
708 	/*
709 	 * If dirty, put on list of dirty buffers;
710 	 * otherwise insert onto list of clean buffers.
711 	 */
712 	if (bp->b_flags & B_DELWRI)
713 		listheadp = &newvp->v_dirtyblkhd;
714 	else
715 		listheadp = &newvp->v_cleanblkhd;
716 	bufinsvn(bp, listheadp);
717 }
718 
719 /*
720  * Create a vnode for a block device.
721  * Used for root filesystem and swap areas.
722  * Also used for memory file system special devices.
723  */
724 int
725 bdevvp(dev, vpp)
726 	dev_t dev;
727 	struct vnode **vpp;
728 {
729 
730 	return (getdevvp(dev, vpp, VBLK));
731 }
732 
733 /*
734  * Create a vnode for a character device.
735  * Used for kernfs and some console handling.
736  */
737 int
738 cdevvp(dev, vpp)
739 	dev_t dev;
740 	struct vnode **vpp;
741 {
742 
743 	return (getdevvp(dev, vpp, VCHR));
744 }
745 
746 /*
747  * Create a vnode for a device.
748  * Used by bdevvp (block device) for root file system etc.,
749  * and by cdevvp (character device) for console and kernfs.
750  */
751 int
752 getdevvp(dev, vpp, type)
753 	dev_t dev;
754 	struct vnode **vpp;
755 	enum vtype type;
756 {
757 	register struct vnode *vp;
758 	struct vnode *nvp;
759 	int error;
760 
761 	if (dev == NODEV) {
762 		*vpp = NULLVP;
763 		return (0);
764 	}
765 	error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
766 	if (error) {
767 		*vpp = NULLVP;
768 		return (error);
769 	}
770 	vp = nvp;
771 	vp->v_type = type;
772 	if ((nvp = checkalias(vp, dev, NULL)) != 0) {
773 		vput(vp);
774 		vp = nvp;
775 	}
776 	*vpp = vp;
777 	return (0);
778 }
779 
780 /*
781  * Check to see if the new vnode represents a special device
782  * for which we already have a vnode (either because of
783  * bdevvp() or because of a different vnode representing
784  * the same block device). If such an alias exists, deallocate
785  * the existing contents and return the aliased vnode. The
786  * caller is responsible for filling it with its new contents.
787  */
788 struct vnode *
789 checkalias(nvp, nvp_rdev, mp)
790 	register struct vnode *nvp;
791 	dev_t nvp_rdev;
792 	struct mount *mp;
793 {
794 	struct proc *p = curproc;       /* XXX */
795 	register struct vnode *vp;
796 	struct vnode **vpp;
797 
798 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
799 		return (NULLVP);
800 
801 	vpp = &speclisth[SPECHASH(nvp_rdev)];
802 loop:
803 	simple_lock(&spechash_slock);
804 	for (vp = *vpp; vp; vp = vp->v_specnext) {
805 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
806 			continue;
807 		/*
808 		 * Alias, but not in use, so flush it out.
809 		 */
810 		simple_lock(&vp->v_interlock);
811 		if (vp->v_usecount == 0) {
812 			simple_unlock(&spechash_slock);
813 			vgonel(vp, p);
814 			goto loop;
815 		}
816 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
817 			simple_unlock(&spechash_slock);
818 			goto loop;
819 		}
820 		break;
821 	}
822 	if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
823 		MALLOC(nvp->v_specinfo, struct specinfo *,
824 			sizeof(struct specinfo), M_VNODE, M_WAITOK);
825 		nvp->v_rdev = nvp_rdev;
826 		nvp->v_hashchain = vpp;
827 		nvp->v_specnext = *vpp;
828 		nvp->v_specflags = 0;
829 		simple_unlock(&spechash_slock);
830 		nvp->v_speclockf = NULL;
831 		*vpp = nvp;
832 		if (vp != NULLVP) {
833 			nvp->v_flag |= VALIASED;
834 			vp->v_flag |= VALIASED;
835 			vput(vp);
836 		}
837 		return (NULLVP);
838 	}
839 	simple_unlock(&spechash_slock);
840 	VOP_UNLOCK(vp, 0);
841 	simple_lock(&vp->v_interlock);
842 	vclean(vp, 0, p);
843 	vp->v_op = nvp->v_op;
844 	vp->v_tag = nvp->v_tag;
845 	nvp->v_type = VNON;
846 	insmntque(vp, mp);
847 	return (vp);
848 }
849 
850 /*
851  * Grab a particular vnode from the free list, increment its
852  * reference count and lock it. If the vnode lock bit is set the
853  * vnode is being eliminated in vgone. In that case, we can not
854  * grab the vnode, so the process is awakened when the transition is
855  * completed, and an error returned to indicate that the vnode is no
856  * longer usable (possibly having been changed to a new file system type).
857  */
858 int
859 vget(vp, flags)
860 	struct vnode *vp;
861 	int flags;
862 {
863 	int error;
864 
865 	/*
866 	 * If the vnode is in the process of being cleaned out for
867 	 * another use, we wait for the cleaning to finish and then
868 	 * return failure. Cleaning is determined by checking that
869 	 * the VXLOCK flag is set.
870 	 */
871 	if ((flags & LK_INTERLOCK) == 0)
872 		simple_lock(&vp->v_interlock);
873 	if (vp->v_flag & VXLOCK) {
874 		vp->v_flag |= VXWANT;
875 		simple_unlock(&vp->v_interlock);
876 		tsleep((caddr_t)vp, PINOD, "vget", 0);
877 		return (ENOENT);
878 	}
879 	if (vp->v_usecount == 0) {
880 		simple_lock(&vnode_free_list_slock);
881 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
882 		simple_unlock(&vnode_free_list_slock);
883 	}
884 	vp->v_usecount++;
885 	if (flags & LK_TYPE_MASK) {
886 		if ((error = vn_lock(vp, flags | LK_INTERLOCK)))
887 			vrele(vp);
888 		return (error);
889 	}
890 	simple_unlock(&vp->v_interlock);
891 	return (0);
892 }
893 
894 /*
895  * vput(), just unlock and vrele()
896  */
897 void
898 vput(vp)
899 	struct vnode *vp;
900 {
901 	struct proc *p = curproc;	/* XXX */
902 
903 #ifdef DIGANOSTIC
904 	if (vp == NULL)
905 		panic("vput: null vp");
906 #endif
907 	simple_lock(&vp->v_interlock);
908 	vp->v_usecount--;
909 	if (vp->v_usecount > 0) {
910 		simple_unlock(&vp->v_interlock);
911 		VOP_UNLOCK(vp, 0);
912 		return;
913 	}
914 #ifdef DIAGNOSTIC
915 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
916 		vprint("vput: bad ref count", vp);
917 		panic("vput: ref cnt");
918 	}
919 #endif
920 	/*
921 	 * insert at tail of LRU list
922 	 */
923 	simple_lock(&vnode_free_list_slock);
924 	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
925 	simple_unlock(&vnode_free_list_slock);
926 	simple_unlock(&vp->v_interlock);
927 	VOP_INACTIVE(vp, p);
928 }
929 
930 /*
931  * Vnode release.
932  * If count drops to zero, call inactive routine and return to freelist.
933  */
934 void
935 vrele(vp)
936 	struct vnode *vp;
937 {
938 	struct proc *p = curproc;	/* XXX */
939 
940 #ifdef DIAGNOSTIC
941 	if (vp == NULL)
942 		panic("vrele: null vp");
943 #endif
944 	simple_lock(&vp->v_interlock);
945 	vp->v_usecount--;
946 	if (vp->v_usecount > 0) {
947 		simple_unlock(&vp->v_interlock);
948 		return;
949 	}
950 #ifdef DIAGNOSTIC
951 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
952 		vprint("vrele: bad ref count", vp);
953 		panic("vrele: ref cnt");
954 	}
955 #endif
956 	/*
957 	 * insert at tail of LRU list
958 	 */
959 	simple_lock(&vnode_free_list_slock);
960 	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
961 	simple_unlock(&vnode_free_list_slock);
962 	if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
963 		VOP_INACTIVE(vp, p);
964 }
965 
966 #ifdef DIAGNOSTIC
967 /*
968  * Page or buffer structure gets a reference.
969  */
970 void
971 vhold(vp)
972 	register struct vnode *vp;
973 {
974 
975 	simple_lock(&vp->v_interlock);
976 	vp->v_holdcnt++;
977 	simple_unlock(&vp->v_interlock);
978 }
979 
980 /*
981  * Page or buffer structure frees a reference.
982  */
983 void
984 holdrele(vp)
985 	register struct vnode *vp;
986 {
987 
988 	simple_lock(&vp->v_interlock);
989 	if (vp->v_holdcnt <= 0)
990 		panic("holdrele: holdcnt");
991 	vp->v_holdcnt--;
992 	simple_unlock(&vp->v_interlock);
993 }
994 
995 /*
996  * Vnode reference.
997  */
998 void
999 vref(vp)
1000 	struct vnode *vp;
1001 {
1002 
1003 	simple_lock(&vp->v_interlock);
1004 	if (vp->v_usecount <= 0)
1005 		panic("vref used where vget required");
1006 	vp->v_usecount++;
1007 	simple_unlock(&vp->v_interlock);
1008 }
1009 #endif /* DIAGNOSTIC */
1010 
1011 /*
1012  * Remove any vnodes in the vnode table belonging to mount point mp.
1013  *
1014  * If MNT_NOFORCE is specified, there should not be any active ones,
1015  * return error if any are found (nb: this is a user error, not a
1016  * system error). If MNT_FORCE is specified, detach any active vnodes
1017  * that are found.
1018  */
1019 #ifdef DEBUG
1020 int busyprt = 0;	/* print out busy vnodes */
1021 struct ctldebug debug1 = { "busyprt", &busyprt };
1022 #endif
1023 
1024 int
1025 vflush(mp, skipvp, flags)
1026 	struct mount *mp;
1027 	struct vnode *skipvp;
1028 	int flags;
1029 {
1030 	struct proc *p = curproc;	/* XXX */
1031 	register struct vnode *vp, *nvp;
1032 	int busy = 0;
1033 
1034 	simple_lock(&mntvnode_slock);
1035 loop:
1036 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1037 		if (vp->v_mount != mp)
1038 			goto loop;
1039 		nvp = vp->v_mntvnodes.le_next;
1040 		/*
1041 		 * Skip over a selected vnode.
1042 		 */
1043 		if (vp == skipvp)
1044 			continue;
1045 		simple_lock(&vp->v_interlock);
1046 		/*
1047 		 * Skip over a vnodes marked VSYSTEM.
1048 		 */
1049 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1050 			simple_unlock(&vp->v_interlock);
1051 			continue;
1052 		}
1053 		/*
1054 		 * If WRITECLOSE is set, only flush out regular file
1055 		 * vnodes open for writing.
1056 		 */
1057 		if ((flags & WRITECLOSE) &&
1058 		    (vp->v_writecount == 0 || vp->v_type != VREG))
1059 			continue;
1060 		/*
1061 		 * With v_usecount == 0, all we need to do is clear
1062 		 * out the vnode data structures and we are done.
1063 		 */
1064 		if (vp->v_usecount == 0) {
1065 			simple_unlock(&mntvnode_slock);
1066 			vgonel(vp, p);
1067 			simple_lock(&mntvnode_slock);
1068 			continue;
1069 		}
1070 		/*
1071 		 * If FORCECLOSE is set, forcibly close the vnode.
1072 		 * For block or character devices, revert to an
1073 		 * anonymous device. For all other files, just kill them.
1074 		 */
1075 		if (flags & FORCECLOSE) {
1076 			simple_unlock(&mntvnode_slock);
1077 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
1078 				vgonel(vp, p);
1079 			} else {
1080 				vclean(vp, 0, p);
1081 				vp->v_op = spec_vnodeop_p;
1082 				insmntque(vp, (struct mount *)0);
1083 			}
1084 			simple_lock(&mntvnode_slock);
1085 			continue;
1086 		}
1087 #ifdef DEBUG
1088 		if (busyprt)
1089 			vprint("vflush: busy vnode", vp);
1090 #endif
1091 		simple_unlock(&vp->v_interlock);
1092 		busy++;
1093 	}
1094 	simple_unlock(&mntvnode_slock);
1095 	if (busy)
1096 		return (EBUSY);
1097 	return (0);
1098 }
1099 
1100 /*
1101  * Disassociate the underlying file system from a vnode.
1102  */
1103 void
1104 vclean(vp, flags, p)
1105 	register struct vnode *vp;
1106 	int flags;
1107 	struct proc *p;
1108 {
1109 	int active;
1110 
1111 	/*
1112 	 * Check to see if the vnode is in use.
1113 	 * If so we have to reference it before we clean it out
1114 	 * so that its count cannot fall to zero and generate a
1115 	 * race against ourselves to recycle it.
1116 	 */
1117 	if ((active = vp->v_usecount) != 0)
1118 		VREF(vp);
1119 	/*
1120 	 * Prevent the vnode from being recycled or
1121 	 * brought into use while we clean it out.
1122 	 */
1123 	if (vp->v_flag & VXLOCK)
1124 		panic("vclean: deadlock");
1125 	vp->v_flag |= VXLOCK;
1126 #ifdef UVM
1127 	/*
1128 	 * clean out any VM data associated with the vnode.
1129 	 */
1130 	uvm_vnp_terminate(vp);
1131 #endif
1132 	/*
1133 	 * Even if the count is zero, the VOP_INACTIVE routine may still
1134 	 * have the object locked while it cleans it out. The VOP_LOCK
1135 	 * ensures that the VOP_INACTIVE routine is done with its work.
1136 	 * For active vnodes, it ensures that no other activity can
1137 	 * occur while the underlying object is being cleaned out.
1138 	 */
1139 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK);
1140 
1141 	/*
1142 	 * Clean out any buffers associated with the vnode.
1143 	 */
1144 	if (flags & DOCLOSE)
1145 		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1146 
1147 	/*
1148 	 * If purging an active vnode, it must be closed and
1149 	 * deactivated before being reclaimed. Note that the
1150 	 * VOP_INACTIVE will unlock the vnode.
1151 	 */
1152 	if (active) {
1153 		if (flags & DOCLOSE)
1154 			VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL);
1155 		VOP_INACTIVE(vp, p);
1156 	} else {
1157 		/*
1158 		 * Any other processes trying to obtain this lock must first
1159 		 * wait for VXLOCK to clear, then call the new lock operation.
1160 		 */
1161 		VOP_UNLOCK(vp, 0);
1162 	}
1163 	/*
1164 	 * Reclaim the vnode.
1165 	 */
1166 	if (VOP_RECLAIM(vp, p))
1167 		panic("vclean: cannot reclaim");
1168 	if (active)
1169 		vrele(vp);
1170 
1171 	cache_purge(vp);
1172 	if (vp->v_vnlock) {
1173 		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1174 			vprint("vclean: lock not drained", vp);
1175 		FREE(vp->v_vnlock, M_VNODE);
1176 		vp->v_vnlock = NULL;
1177 	}
1178 
1179 	/*
1180 	 * Done with purge, notify sleepers of the grim news.
1181 	 */
1182 	vp->v_op = dead_vnodeop_p;
1183 	vp->v_tag = VT_NON;
1184 	vp->v_flag &= ~VXLOCK;
1185 	if (vp->v_flag & VXWANT) {
1186 		vp->v_flag &= ~VXWANT;
1187 		wakeup((caddr_t)vp);
1188 	}
1189 }
1190 
1191 /*
1192  * Recycle an unused vnode to the front of the free list.
1193  * Release the passed interlock if the vnode will be recycled.
1194  */
1195 int
1196 vrecycle(vp, inter_lkp, p)
1197 	struct vnode *vp;
1198 	struct simplelock *inter_lkp;
1199 	struct proc *p;
1200 {
1201 
1202 	simple_lock(&vp->v_interlock);
1203 	if (vp->v_usecount == 0) {
1204 		if (inter_lkp)
1205 			simple_unlock(inter_lkp);
1206 		vgonel(vp, p);
1207 		return (1);
1208 	}
1209 	simple_unlock(&vp->v_interlock);
1210 	return (0);
1211 }
1212 
1213 /*
1214  * Eliminate all activity associated with a vnode
1215  * in preparation for reuse.
1216  */
1217 void
1218 vgone(vp)
1219 	struct vnode *vp;
1220 {
1221 	struct proc *p = curproc;	/* XXX */
1222 
1223 	simple_lock(&vp->v_interlock);
1224 	vgonel(vp, p);
1225 }
1226 
1227 /*
1228  * vgone, with the vp interlock held.
1229  */
1230 void
1231 vgonel(vp, p)
1232 	register struct vnode *vp;
1233 	struct proc *p;
1234 {
1235 	struct vnode *vq;
1236 	struct vnode *vx;
1237 
1238 	/*
1239 	 * If a vgone (or vclean) is already in progress,
1240 	 * wait until it is done and return.
1241 	 */
1242 	if (vp->v_flag & VXLOCK) {
1243 		vp->v_flag |= VXWANT;
1244 		simple_unlock(&vp->v_interlock);
1245 		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1246 		return;
1247 	}
1248 	/*
1249 	 * Clean out the filesystem specific data.
1250 	 */
1251 	vclean(vp, DOCLOSE, p);
1252 	/*
1253 	 * Delete from old mount point vnode list, if on one.
1254 	 */
1255 	if (vp->v_mount != NULL)
1256 		insmntque(vp, (struct mount *)0);
1257 	/*
1258 	 * If special device, remove it from special device alias list.
1259 	 * if it is on one.
1260 	 */
1261 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1262 		simple_lock(&spechash_slock);
1263 		if (*vp->v_hashchain == vp) {
1264 			*vp->v_hashchain = vp->v_specnext;
1265 		} else {
1266 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1267 				if (vq->v_specnext != vp)
1268 					continue;
1269 				vq->v_specnext = vp->v_specnext;
1270 				break;
1271 			}
1272 			if (vq == NULL)
1273 				panic("missing bdev");
1274 		}
1275 		if (vp->v_flag & VALIASED) {
1276 			vx = NULL;
1277 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1278 				if (vq->v_rdev != vp->v_rdev ||
1279 				    vq->v_type != vp->v_type)
1280 					continue;
1281 				if (vx)
1282 					break;
1283 				vx = vq;
1284 			}
1285 			if (vx == NULL)
1286 				panic("missing alias");
1287 			if (vq == NULL)
1288 				vx->v_flag &= ~VALIASED;
1289 			vp->v_flag &= ~VALIASED;
1290 		}
1291 		simple_unlock(&spechash_slock);
1292 		FREE(vp->v_specinfo, M_VNODE);
1293 		vp->v_specinfo = NULL;
1294 	}
1295 	/*
1296 	 * If it is on the freelist and not already at the head,
1297 	 * move it to the head of the list. The test of the back
1298 	 * pointer and the reference count of zero is because
1299 	 * it will be removed from the free list by getnewvnode,
1300 	 * but will not have its reference count incremented until
1301 	 * after calling vgone. If the reference count were
1302 	 * incremented first, vgone would (incorrectly) try to
1303 	 * close the previous instance of the underlying object.
1304 	 * So, the back pointer is explicitly set to `0xdeadb' in
1305 	 * getnewvnode after removing it from the freelist to ensure
1306 	 * that we do not try to move it here.
1307 	 */
1308 	if (vp->v_usecount == 0) {
1309 		simple_lock(&vnode_free_list_slock);
1310 		if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
1311 		    vnode_free_list.tqh_first != vp) {
1312 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1313 			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1314 		}
1315 		simple_unlock(&vnode_free_list_slock);
1316 	}
1317 	vp->v_type = VBAD;
1318 }
1319 
1320 /*
1321  * Lookup a vnode by device number.
1322  */
1323 int
1324 vfinddev(dev, type, vpp)
1325 	dev_t dev;
1326 	enum vtype type;
1327 	struct vnode **vpp;
1328 {
1329 	struct vnode *vp;
1330 	int rc = 0;
1331 
1332 	simple_lock(&spechash_slock);
1333 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1334 		if (dev != vp->v_rdev || type != vp->v_type)
1335 			continue;
1336 		*vpp = vp;
1337 		rc = 1;
1338 		break;
1339 	}
1340 	simple_unlock(&spechash_slock);
1341 	return (rc);
1342 }
1343 
1344 /*
1345  * Calculate the total number of references to a special device.
1346  */
1347 int
1348 vcount(vp)
1349 	register struct vnode *vp;
1350 {
1351 	register struct vnode *vq, *vnext;
1352 	int count;
1353 
1354 loop:
1355 	if ((vp->v_flag & VALIASED) == 0)
1356 		return (vp->v_usecount);
1357 	simple_lock(&spechash_slock);
1358 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1359 		vnext = vq->v_specnext;
1360 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1361 			continue;
1362 		/*
1363 		 * Alias, but not in use, so flush it out.
1364 		 */
1365 		if (vq->v_usecount == 0 && vq != vp) {
1366 			simple_unlock(&spechash_slock);
1367 			vgone(vq);
1368 			goto loop;
1369 		}
1370 		count += vq->v_usecount;
1371 	}
1372 	simple_unlock(&spechash_slock);
1373 	return (count);
1374 }
1375 
1376 /*
1377  * Print out a description of a vnode.
1378  */
1379 static char *typename[] =
1380    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1381 
1382 void
1383 vprint(label, vp)
1384 	char *label;
1385 	register struct vnode *vp;
1386 {
1387 	char buf[64];
1388 
1389 	if (label != NULL)
1390 		printf("%s: ", label);
1391 	printf("type %s, usecount %d, writecount %d, refcount %ld,",
1392 	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1393 	    vp->v_holdcnt);
1394 	buf[0] = '\0';
1395 	if (vp->v_flag & VROOT)
1396 		strcat(buf, "|VROOT");
1397 	if (vp->v_flag & VTEXT)
1398 		strcat(buf, "|VTEXT");
1399 	if (vp->v_flag & VSYSTEM)
1400 		strcat(buf, "|VSYSTEM");
1401 	if (vp->v_flag & VXLOCK)
1402 		strcat(buf, "|VXLOCK");
1403 	if (vp->v_flag & VXWANT)
1404 		strcat(buf, "|VXWANT");
1405 	if (vp->v_flag & VBWAIT)
1406 		strcat(buf, "|VBWAIT");
1407 	if (vp->v_flag & VALIASED)
1408 		strcat(buf, "|VALIASED");
1409 	if (buf[0] != '\0')
1410 		printf(" flags (%s)", &buf[1]);
1411 	if (vp->v_data == NULL) {
1412 		printf("\n");
1413 	} else {
1414 		printf("\n\t");
1415 		VOP_PRINT(vp);
1416 	}
1417 }
1418 
1419 #ifdef DEBUG
1420 /*
1421  * List all of the locked vnodes in the system.
1422  * Called when debugging the kernel.
1423  */
1424 void
1425 printlockedvnodes()
1426 {
1427 	struct mount *mp, *nmp;
1428 	struct vnode *vp;
1429 
1430 	printf("Locked vnodes\n");
1431 	simple_lock(&mountlist_slock);
1432 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1433 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1434 			nmp = mp->mnt_list.cqe_next;
1435 			continue;
1436 		}
1437 		for (vp = mp->mnt_vnodelist.lh_first;
1438 		     vp != NULL;
1439 		     vp = vp->v_mntvnodes.le_next) {
1440 			if (VOP_ISLOCKED(vp))
1441 				vprint((char *)0, vp);
1442 		}
1443 		simple_lock(&mountlist_slock);
1444 		nmp = mp->mnt_list.cqe_next;
1445 		vfs_unbusy(mp);
1446 	}
1447 	simple_unlock(&mountlist_slock);
1448 }
1449 #endif
1450 
1451 extern const char *mountcompatnames[];
1452 extern const int nmountcompatnames;
1453 
1454 /*
1455  * Top level filesystem related information gathering.
1456  */
1457 int
1458 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1459 	int *name;
1460 	u_int namelen;
1461 	void *oldp;
1462 	size_t *oldlenp;
1463 	void *newp;
1464 	size_t newlen;
1465 	struct proc *p;
1466 {
1467 /*
1468  * XXX needs work. Old interface (Lite2) used fs type numbers, can't
1469  * do that anymore. It should work with names. Provide some compat
1470  * code.
1471  */
1472 #ifdef COMPAT_44
1473 	struct vfsconf vfc;
1474 	struct vfsops *vfsp;
1475 
1476 	/* all sysctl names at this level are at least name and field */
1477 	if (namelen < 2)
1478 		return (ENOTDIR);		/* overloaded */
1479 	if (name[0] != VFS_GENERIC) {
1480 		if (name[0] >= nmountcompatnames || name[0] < 0 ||
1481 		    mountcompatnames[name[0]] == NULL)
1482 			return (EOPNOTSUPP);
1483 		vfsp = vfs_getopsbyname(mountcompatnames[name[0]]);
1484 		if (vfsp == NULL)
1485 			return (EINVAL);
1486 		return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1,
1487 		    oldp, oldlenp, newp, newlen, p));
1488 	}
1489 	switch (name[1]) {
1490 	case VFS_MAXTYPENUM:
1491 		return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames));
1492 	case VFS_CONF:
1493 		if (namelen < 3)
1494 			return (ENOTDIR);	/* overloaded */
1495 		if (name[2] >= nmountcompatnames || name[2] < 0 ||
1496 		    mountcompatnames[name[2]] == NULL)
1497 			return (EOPNOTSUPP);
1498 		vfsp = vfs_getopsbyname(mountcompatnames[name[2]]);
1499 		if (vfsp == NULL)
1500 			return (EINVAL);
1501 		vfc.vfc_vfsops = NULL;	/* XXX point to vfsops->vfs_mount? */
1502 		strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN);
1503 		vfc.vfc_typenum = name[2];
1504 		vfc.vfc_flags = 0;
1505 		vfc.vfc_mountroot = vfsp->vfs_mountroot;
1506 		vfc.vfc_next = NULL;
1507 		return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc,
1508 		    sizeof(struct vfsconf)));
1509 	}
1510 #endif
1511 	return (EOPNOTSUPP);
1512 }
1513 
1514 int kinfo_vdebug = 1;
1515 int kinfo_vgetfailed;
1516 #define KINFO_VNODESLOP	10
1517 /*
1518  * Dump vnode list (via sysctl).
1519  * Copyout address of vnode followed by vnode.
1520  */
1521 /* ARGSUSED */
1522 int
1523 sysctl_vnode(where, sizep, p)
1524 	char *where;
1525 	size_t *sizep;
1526 	struct proc *p;
1527 {
1528 	struct mount *mp, *nmp;
1529 	struct vnode *nvp, *vp;
1530 	char *bp = where, *savebp;
1531 	char *ewhere;
1532 	int error;
1533 
1534 #define VPTRSZ	sizeof (struct vnode *)
1535 #define VNODESZ	sizeof (struct vnode)
1536 	if (where == NULL) {
1537 		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1538 		return (0);
1539 	}
1540 	ewhere = where + *sizep;
1541 
1542 	simple_lock(&mountlist_slock);
1543 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1544 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1545 			nmp = mp->mnt_list.cqe_next;
1546 			continue;
1547 		}
1548 		savebp = bp;
1549 again:
1550 		simple_lock(&mntvnode_slock);
1551 		for (vp = mp->mnt_vnodelist.lh_first;
1552 		     vp != NULL;
1553 		     vp = nvp) {
1554 			/*
1555 			 * Check that the vp is still associated with
1556 			 * this filesystem.  RACE: could have been
1557 			 * recycled onto the same filesystem.
1558 			 */
1559 			if (vp->v_mount != mp) {
1560 				simple_unlock(&mntvnode_slock);
1561 				if (kinfo_vdebug)
1562 					printf("kinfo: vp changed\n");
1563 				bp = savebp;
1564 				goto again;
1565 			}
1566 			nvp = vp->v_mntvnodes.le_next;
1567 			if (bp + VPTRSZ + VNODESZ > ewhere) {
1568 				simple_unlock(&mntvnode_slock);
1569 				*sizep = bp - where;
1570 				return (ENOMEM);
1571 			}
1572 			simple_unlock(&mntvnode_slock);
1573 			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1574 			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
1575 				return (error);
1576 			bp += VPTRSZ + VNODESZ;
1577 			simple_lock(&mntvnode_slock);
1578 		}
1579 		simple_unlock(&mntvnode_slock);
1580 		simple_lock(&mountlist_slock);
1581 		nmp = mp->mnt_list.cqe_next;
1582 		vfs_unbusy(mp);
1583 	}
1584 	simple_unlock(&mountlist_slock);
1585 
1586 	*sizep = bp - where;
1587 	return (0);
1588 }
1589 
1590 /*
1591  * Check to see if a filesystem is mounted on a block device.
1592  */
1593 int
1594 vfs_mountedon(vp)
1595 	struct vnode *vp;
1596 {
1597 	struct vnode *vq;
1598 	int error = 0;
1599 
1600 	if (vp->v_specflags & SI_MOUNTEDON)
1601 		return (EBUSY);
1602 	if (vp->v_flag & VALIASED) {
1603 		simple_lock(&spechash_slock);
1604 		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1605 			if (vq->v_rdev != vp->v_rdev ||
1606 			    vq->v_type != vp->v_type)
1607 				continue;
1608 			if (vq->v_specflags & SI_MOUNTEDON) {
1609 				error = EBUSY;
1610 				break;
1611 			}
1612 		}
1613 		simple_unlock(&spechash_slock);
1614 	}
1615 	return (error);
1616 }
1617 
1618 /*
1619  * Build hash lists of net addresses and hang them off the mount point.
1620  * Called by ufs_mount() to set up the lists of export addresses.
1621  */
1622 static int
1623 vfs_hang_addrlist(mp, nep, argp)
1624 	struct mount *mp;
1625 	struct netexport *nep;
1626 	struct export_args *argp;
1627 {
1628 	register struct netcred *np, *enp;
1629 	register struct radix_node_head *rnh;
1630 	register int i;
1631 	struct radix_node *rn;
1632 	struct sockaddr *saddr, *smask = 0;
1633 	struct domain *dom;
1634 	int error;
1635 
1636 	if (argp->ex_addrlen == 0) {
1637 		if (mp->mnt_flag & MNT_DEFEXPORTED)
1638 			return (EPERM);
1639 		np = &nep->ne_defexported;
1640 		np->netc_exflags = argp->ex_flags;
1641 		np->netc_anon = argp->ex_anon;
1642 		np->netc_anon.cr_ref = 1;
1643 		mp->mnt_flag |= MNT_DEFEXPORTED;
1644 		return (0);
1645 	}
1646 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1647 	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1648 	bzero((caddr_t)np, i);
1649 	saddr = (struct sockaddr *)(np + 1);
1650 	error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
1651 	if (error)
1652 		goto out;
1653 	if (saddr->sa_len > argp->ex_addrlen)
1654 		saddr->sa_len = argp->ex_addrlen;
1655 	if (argp->ex_masklen) {
1656 		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1657 		error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
1658 		if (error)
1659 			goto out;
1660 		if (smask->sa_len > argp->ex_masklen)
1661 			smask->sa_len = argp->ex_masklen;
1662 	}
1663 	i = saddr->sa_family;
1664 	if ((rnh = nep->ne_rtable[i]) == 0) {
1665 		/*
1666 		 * Seems silly to initialize every AF when most are not
1667 		 * used, do so on demand here
1668 		 */
1669 		for (dom = domains; dom; dom = dom->dom_next)
1670 			if (dom->dom_family == i && dom->dom_rtattach) {
1671 				dom->dom_rtattach((void **)&nep->ne_rtable[i],
1672 					dom->dom_rtoffset);
1673 				break;
1674 			}
1675 		if ((rnh = nep->ne_rtable[i]) == 0) {
1676 			error = ENOBUFS;
1677 			goto out;
1678 		}
1679 	}
1680 	rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1681 		np->netc_rnodes);
1682 	if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
1683 		if (rn == 0) {
1684 			enp = (struct netcred *)(*rnh->rnh_lookup)(saddr,
1685 				smask, rnh);
1686 			if (enp == 0) {
1687 				error = EPERM;
1688 				goto out;
1689 			}
1690 		} else
1691 			enp = (struct netcred *)rn;
1692 
1693 		if (enp->netc_exflags != argp->ex_flags ||
1694 		    enp->netc_anon.cr_uid != argp->ex_anon.cr_uid ||
1695 		    enp->netc_anon.cr_gid != argp->ex_anon.cr_gid ||
1696 		    enp->netc_anon.cr_ngroups != argp->ex_anon.cr_ngroups ||
1697 		    bcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups,
1698 			enp->netc_anon.cr_ngroups))
1699 				error = EPERM;
1700 		else
1701 			error = 0;
1702 		goto out;
1703 	}
1704 	np->netc_exflags = argp->ex_flags;
1705 	np->netc_anon = argp->ex_anon;
1706 	np->netc_anon.cr_ref = 1;
1707 	return (0);
1708 out:
1709 	free(np, M_NETADDR);
1710 	return (error);
1711 }
1712 
1713 /* ARGSUSED */
1714 static int
1715 vfs_free_netcred(rn, w)
1716 	struct radix_node *rn;
1717 	void *w;
1718 {
1719 	register struct radix_node_head *rnh = (struct radix_node_head *)w;
1720 
1721 	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
1722 	free((caddr_t)rn, M_NETADDR);
1723 	return (0);
1724 }
1725 
1726 /*
1727  * Free the net address hash lists that are hanging off the mount points.
1728  */
1729 static void
1730 vfs_free_addrlist(nep)
1731 	struct netexport *nep;
1732 {
1733 	register int i;
1734 	register struct radix_node_head *rnh;
1735 
1736 	for (i = 0; i <= AF_MAX; i++)
1737 		if ((rnh = nep->ne_rtable[i]) != NULL) {
1738 			(*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
1739 			free((caddr_t)rnh, M_RTABLE);
1740 			nep->ne_rtable[i] = 0;
1741 		}
1742 }
1743 
1744 int
1745 vfs_export(mp, nep, argp)
1746 	struct mount *mp;
1747 	struct netexport *nep;
1748 	struct export_args *argp;
1749 {
1750 	int error;
1751 
1752 	if (argp->ex_flags & MNT_DELEXPORT) {
1753 		if (mp->mnt_flag & MNT_EXPUBLIC) {
1754 			vfs_setpublicfs(NULL, NULL, NULL);
1755 			mp->mnt_flag &= ~MNT_EXPUBLIC;
1756 		}
1757 		vfs_free_addrlist(nep);
1758 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1759 	}
1760 	if (argp->ex_flags & MNT_EXPORTED) {
1761 		if (argp->ex_flags & MNT_EXPUBLIC) {
1762 			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
1763 				return (error);
1764 			mp->mnt_flag |= MNT_EXPUBLIC;
1765 		}
1766 		if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
1767 			return (error);
1768 		mp->mnt_flag |= MNT_EXPORTED;
1769 	}
1770 	return (0);
1771 }
1772 
1773 /*
1774  * Set the publicly exported filesystem (WebNFS). Currently, only
1775  * one public filesystem is possible in the spec (RFC 2054 and 2055)
1776  */
1777 int
1778 vfs_setpublicfs(mp, nep, argp)
1779 	struct mount *mp;
1780 	struct netexport *nep;
1781 	struct export_args *argp;
1782 {
1783 	int error;
1784 	struct vnode *rvp;
1785 	char *cp;
1786 
1787 	/*
1788 	 * mp == NULL -> invalidate the current info, the FS is
1789 	 * no longer exported. May be called from either vfs_export
1790 	 * or unmount, so check if it hasn't already been done.
1791 	 */
1792 	if (mp == NULL) {
1793 		if (nfs_pub.np_valid) {
1794 			nfs_pub.np_valid = 0;
1795 			if (nfs_pub.np_index != NULL) {
1796 				FREE(nfs_pub.np_index, M_TEMP);
1797 				nfs_pub.np_index = NULL;
1798 			}
1799 		}
1800 		return (0);
1801 	}
1802 
1803 	/*
1804 	 * Only one allowed at a time.
1805 	 */
1806 	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
1807 		return (EBUSY);
1808 
1809 	/*
1810 	 * Get real filehandle for root of exported FS.
1811 	 */
1812 	bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
1813 	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
1814 
1815 	if ((error = VFS_ROOT(mp, &rvp)))
1816 		return (error);
1817 
1818 	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
1819 		return (error);
1820 
1821 	vput(rvp);
1822 
1823 	/*
1824 	 * If an indexfile was specified, pull it in.
1825 	 */
1826 	if (argp->ex_indexfile != NULL) {
1827 		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
1828 		    M_WAITOK);
1829 		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
1830 		    MAXNAMLEN, (size_t *)0);
1831 		if (!error) {
1832 			/*
1833 			 * Check for illegal filenames.
1834 			 */
1835 			for (cp = nfs_pub.np_index; *cp; cp++) {
1836 				if (*cp == '/') {
1837 					error = EINVAL;
1838 					break;
1839 				}
1840 			}
1841 		}
1842 		if (error) {
1843 			FREE(nfs_pub.np_index, M_TEMP);
1844 			return (error);
1845 		}
1846 	}
1847 
1848 	nfs_pub.np_mount = mp;
1849 	nfs_pub.np_valid = 1;
1850 	return (0);
1851 }
1852 
1853 struct netcred *
1854 vfs_export_lookup(mp, nep, nam)
1855 	register struct mount *mp;
1856 	struct netexport *nep;
1857 	struct mbuf *nam;
1858 {
1859 	register struct netcred *np;
1860 	register struct radix_node_head *rnh;
1861 	struct sockaddr *saddr;
1862 
1863 	np = NULL;
1864 	if (mp->mnt_flag & MNT_EXPORTED) {
1865 		/*
1866 		 * Lookup in the export list first.
1867 		 */
1868 		if (nam != NULL) {
1869 			saddr = mtod(nam, struct sockaddr *);
1870 			rnh = nep->ne_rtable[saddr->sa_family];
1871 			if (rnh != NULL) {
1872 				np = (struct netcred *)
1873 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
1874 							      rnh);
1875 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1876 					np = NULL;
1877 			}
1878 		}
1879 		/*
1880 		 * If no address match, use the default if it exists.
1881 		 */
1882 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1883 			np = &nep->ne_defexported;
1884 	}
1885 	return (np);
1886 }
1887 
1888 /*
1889  * Do the usual access checking.
1890  * file_mode, uid and gid are from the vnode in question,
1891  * while acc_mode and cred are from the VOP_ACCESS parameter list
1892  */
1893 int
1894 vaccess(type, file_mode, uid, gid, acc_mode, cred)
1895 	enum vtype type;
1896 	mode_t file_mode;
1897 	uid_t uid;
1898 	gid_t gid;
1899 	mode_t acc_mode;
1900 	struct ucred *cred;
1901 {
1902 	mode_t mask;
1903 
1904 	/*
1905 	 * Super-user always gets read/write access, but execute access depends
1906 	 * on at least one execute bit being set.
1907 	 */
1908 	if (cred->cr_uid == 0) {
1909 		if ((acc_mode & VEXEC) && type != VDIR &&
1910 		    (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
1911 			return (EACCES);
1912 		return (0);
1913 	}
1914 
1915 	mask = 0;
1916 
1917 	/* Otherwise, check the owner. */
1918 	if (cred->cr_uid == uid) {
1919 		if (acc_mode & VEXEC)
1920 			mask |= S_IXUSR;
1921 		if (acc_mode & VREAD)
1922 			mask |= S_IRUSR;
1923 		if (acc_mode & VWRITE)
1924 			mask |= S_IWUSR;
1925 		return ((file_mode & mask) == mask ? 0 : EACCES);
1926 	}
1927 
1928 	/* Otherwise, check the groups. */
1929 	if (cred->cr_gid == gid || groupmember(gid, cred)) {
1930 		if (acc_mode & VEXEC)
1931 			mask |= S_IXGRP;
1932 		if (acc_mode & VREAD)
1933 			mask |= S_IRGRP;
1934 		if (acc_mode & VWRITE)
1935 			mask |= S_IWGRP;
1936 		return ((file_mode & mask) == mask ? 0 : EACCES);
1937 	}
1938 
1939 	/* Otherwise, check everyone else. */
1940 	if (acc_mode & VEXEC)
1941 		mask |= S_IXOTH;
1942 	if (acc_mode & VREAD)
1943 		mask |= S_IROTH;
1944 	if (acc_mode & VWRITE)
1945 		mask |= S_IWOTH;
1946 	return ((file_mode & mask) == mask ? 0 : EACCES);
1947 }
1948 
1949 /*
1950  * Unmount all file systems.
1951  * We traverse the list in reverse order under the assumption that doing so
1952  * will avoid needing to worry about dependencies.
1953  */
1954 void
1955 vfs_unmountall()
1956 {
1957 	register struct mount *mp, *nmp;
1958 	int allerror, error;
1959 	struct proc *p = curproc;	/* XXX */
1960 
1961 	for (allerror = 0,
1962 	     mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1963 		nmp = mp->mnt_list.cqe_prev;
1964 #ifdef DEBUG
1965 		printf("unmounting %s (%s)...\n",
1966 		    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
1967 #endif
1968 		if (vfs_busy(mp, 0, 0))
1969 			continue;
1970 		if ((error = dounmount(mp, MNT_FORCE, p)) != 0) {
1971 			printf("unmount of %s failed with error %d\n",
1972 			    mp->mnt_stat.f_mntonname, error);
1973 			allerror = 1;
1974 		}
1975 	}
1976 	if (allerror)
1977 		printf("WARNING: some file systems would not unmount\n");
1978 }
1979 
1980 /*
1981  * Sync and unmount file systems before shutting down.
1982  */
1983 void
1984 vfs_shutdown()
1985 {
1986 	register struct buf *bp;
1987 	int iter, nbusy, unmountem;
1988 
1989 	/*
1990 	 * If we've panic'd, don't make the situation potentially
1991 	 * worse by unmounting the file systems; just attempt to
1992 	 * sync.
1993 	 */
1994 	if (panicstr != NULL)
1995 		unmountem = 0;
1996 	else
1997 		unmountem = 1;
1998 
1999 	printf("syncing disks... ");
2000 
2001 	/* XXX Should suspend scheduling. */
2002 	(void) spl0();
2003 
2004 	sys_sync(&proc0, (void *)0, (register_t *)0);
2005 
2006 	/* Wait for sync to finish. */
2007 	for (iter = 0; iter < 20; iter++) {
2008 		nbusy = 0;
2009 		for (bp = &buf[nbuf]; --bp >= buf; )
2010 			if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY)
2011 				nbusy++;
2012 		if (nbusy == 0)
2013 			break;
2014 		printf("%d ", nbusy);
2015 		DELAY(40000 * iter);
2016 	}
2017 	if (nbusy) {
2018 		printf("giving up\n");
2019 		unmountem = 0;
2020 	} else
2021 		printf("done\n");
2022 
2023 	if (unmountem) {
2024 		/* Release inodes held by texts before update. */
2025 #if !defined(UVM)
2026 		vnode_pager_umount(NULL);
2027 #endif
2028 #ifdef notdef
2029 		vnshutdown();
2030 #endif
2031 		/* Unmount file systems. */
2032 		vfs_unmountall();
2033 	}
2034 }
2035 
2036 /*
2037  * Mount the root file system.  If the operator didn't specify a
2038  * file system to use, try all possible file systems until one
2039  * succeeds.
2040  */
2041 int
2042 vfs_mountroot()
2043 {
2044 	extern int (*mountroot) __P((void));
2045 	struct vfsops *v;
2046 
2047 	if (root_device == NULL)
2048 		panic("vfs_mountroot: root device unknown");
2049 
2050 	switch (root_device->dv_class) {
2051 	case DV_IFNET:
2052 		if (rootdev != NODEV)
2053 			panic("vfs_mountroot: rootdev set for DV_IFNET");
2054 		break;
2055 
2056 	case DV_DISK:
2057 		if (rootdev == NODEV)
2058 			panic("vfs_mountroot: rootdev not set for DV_DISK");
2059 		break;
2060 
2061 	default:
2062 		printf("%s: inappropriate for root file system\n",
2063 		    root_device->dv_xname);
2064 		return (ENODEV);
2065 	}
2066 
2067 	/*
2068 	 * If user specified a file system, use it.
2069 	 */
2070 	if (mountroot != NULL)
2071 		return ((*mountroot)());
2072 
2073 	/*
2074 	 * Try each file system currently configured into the kernel.
2075 	 */
2076 	for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
2077 		if (v->vfs_mountroot == NULL)
2078 			continue;
2079 #ifdef DEBUG
2080 		printf("mountroot: trying %s...\n", v->vfs_name);
2081 #endif
2082 		if ((*v->vfs_mountroot)() == 0) {
2083 			printf("root file system type: %s\n", v->vfs_name);
2084 			break;
2085 		}
2086 	}
2087 
2088 	if (v == NULL) {
2089 		printf("no file system for %s", root_device->dv_xname);
2090 		if (root_device->dv_class == DV_DISK)
2091 			printf(" (dev 0x%x)", rootdev);
2092 		printf("\n");
2093 		return (EFTYPE);
2094 	}
2095 	return (0);
2096 }
2097 
2098 /*
2099  * Given a file system name, look up the vfsops for that
2100  * file system, or return NULL if file system isn't present
2101  * in the kernel.
2102  */
2103 struct vfsops *
2104 vfs_getopsbyname(name)
2105 	const char *name;
2106 {
2107 	struct vfsops *v;
2108 
2109 	for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
2110 		if (strcmp(v->vfs_name, name) == 0)
2111 			break;
2112 	}
2113 
2114 	return (v);
2115 }
2116 
2117 /*
2118  * Establish a file system and initialize it.
2119  */
2120 int
2121 vfs_attach(vfs)
2122 	struct vfsops *vfs;
2123 {
2124 	struct vfsops *v;
2125 	int error = 0;
2126 
2127 
2128 	/*
2129 	 * Make sure this file system doesn't already exist.
2130 	 */
2131 	for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
2132 		if (strcmp(vfs->vfs_name, v->vfs_name) == 0) {
2133 			error = EEXIST;
2134 			goto out;
2135 		}
2136 	}
2137 
2138 	/*
2139 	 * Initialize the vnode operations for this file system.
2140 	 */
2141 	vfs_opv_init(vfs->vfs_opv_descs);
2142 
2143 	/*
2144 	 * Now initialize the file system itself.
2145 	 */
2146 	(*vfs->vfs_init)();
2147 
2148 	/*
2149 	 * ...and link it into the kernel's list.
2150 	 */
2151 	LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list);
2152 
2153 	/*
2154 	 * Sanity: make sure the reference count is 0.
2155 	 */
2156 	vfs->vfs_refcount = 0;
2157 
2158  out:
2159 	return (error);
2160 }
2161 
2162 /*
2163  * Remove a file system from the kernel.
2164  */
2165 int
2166 vfs_detach(vfs)
2167 	struct vfsops *vfs;
2168 {
2169 	struct vfsops *v;
2170 
2171 	/*
2172 	 * Make sure no one is using the filesystem.
2173 	 */
2174 	if (vfs->vfs_refcount != 0)
2175 		return (EBUSY);
2176 
2177 	/*
2178 	 * ...and remove it from the kernel's list.
2179 	 */
2180 	for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
2181 		if (v == vfs) {
2182 			LIST_REMOVE(v, vfs_list);
2183 			break;
2184 		}
2185 	}
2186 
2187 	if (v == NULL)
2188 		return (ESRCH);
2189 
2190 	/*
2191 	 * Free the vnode operations vector.
2192 	 */
2193 	vfs_opv_free(vfs->vfs_opv_descs);
2194 	return (0);
2195 }
2196