xref: /netbsd-src/sys/kern/vfs_subr.c (revision 3b435a73967be44dfb4a27315acd72bfacde430c)
1 /*	$NetBSD: vfs_subr.c,v 1.112 1999/10/01 22:03:17 mycroft Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Copyright (c) 1989, 1993
42  *	The Regents of the University of California.  All rights reserved.
43  * (c) UNIX System Laboratories, Inc.
44  * All or some portions of this file are derived from material licensed
45  * to the University of California by American Telephone and Telegraph
46  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47  * the permission of UNIX System Laboratories, Inc.
48  *
49  * Redistribution and use in source and binary forms, with or without
50  * modification, are permitted provided that the following conditions
51  * are met:
52  * 1. Redistributions of source code must retain the above copyright
53  *    notice, this list of conditions and the following disclaimer.
54  * 2. Redistributions in binary form must reproduce the above copyright
55  *    notice, this list of conditions and the following disclaimer in the
56  *    documentation and/or other materials provided with the distribution.
57  * 3. All advertising materials mentioning features or use of this software
58  *    must display the following acknowledgement:
59  *	This product includes software developed by the University of
60  *	California, Berkeley and its contributors.
61  * 4. Neither the name of the University nor the names of its contributors
62  *    may be used to endorse or promote products derived from this software
63  *    without specific prior written permission.
64  *
65  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
66  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
69  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
70  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
71  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
72  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
73  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
74  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
75  * SUCH DAMAGE.
76  *
77  *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
78  */
79 
80 /*
81  * External virtual filesystem routines
82  */
83 
84 #include "opt_compat_netbsd.h"
85 #include "opt_compat_43.h"
86 
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/proc.h>
90 #include <sys/mount.h>
91 #include <sys/time.h>
92 #include <sys/fcntl.h>
93 #include <sys/vnode.h>
94 #include <sys/stat.h>
95 #include <sys/namei.h>
96 #include <sys/ucred.h>
97 #include <sys/buf.h>
98 #include <sys/errno.h>
99 #include <sys/malloc.h>
100 #include <sys/domain.h>
101 #include <sys/mbuf.h>
102 #include <sys/syscallargs.h>
103 #include <sys/device.h>
104 #include <sys/dirent.h>
105 
106 #include <vm/vm.h>
107 #include <sys/sysctl.h>
108 
109 #include <miscfs/specfs/specdev.h>
110 
111 #include <uvm/uvm_extern.h>
112 
113 enum vtype iftovt_tab[16] = {
114 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
115 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
116 };
117 int	vttoif_tab[9] = {
118 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
119 	S_IFSOCK, S_IFIFO, S_IFMT,
120 };
121 
122 int doforce = 1;		/* 1 => permit forcible unmounting */
123 int prtactive = 0;		/* 1 => print out reclaim of active vnodes */
124 
125 /*
126  * Insq/Remq for the vnode usage lists.
127  */
128 #define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
129 #define	bufremvn(bp) {							\
130 	LIST_REMOVE(bp, b_vnbufs);					\
131 	(bp)->b_vnbufs.le_next = NOLIST;				\
132 }
133 TAILQ_HEAD(freelst, vnode) vnode_free_list =	/* vnode free list */
134     TAILQ_HEAD_INITIALIZER(vnode_free_list);
135 struct mntlist mountlist =			/* mounted filesystem list */
136     CIRCLEQ_HEAD_INITIALIZER(mountlist);
137 struct vfs_list_head vfs_list =			/* vfs list */
138 	 LIST_HEAD_INITIALIZER(vfs_list);
139 
140 struct nfs_public nfs_pub;			/* publicly exported FS */
141 
142 struct simplelock mountlist_slock;
143 static struct simplelock mntid_slock;
144 struct simplelock mntvnode_slock;
145 struct simplelock vnode_free_list_slock;
146 struct simplelock spechash_slock;
147 
148 /*
149  * These define the root filesystem and device.
150  */
151 struct mount *rootfs;
152 struct vnode *rootvnode;
153 struct device *root_device;			/* root device */
154 
155 struct pool vnode_pool;				/* memory pool for vnodes */
156 
157 /*
158  * Local declarations.
159  */
160 void insmntque __P((struct vnode *, struct mount *));
161 int getdevvp __P((dev_t, struct vnode **, enum vtype));
162 void vgoneall __P((struct vnode *));
163 
164 static int vfs_hang_addrlist __P((struct mount *, struct netexport *,
165 				  struct export_args *));
166 static int vfs_free_netcred __P((struct radix_node *, void *));
167 static void vfs_free_addrlist __P((struct netexport *));
168 
169 #ifdef DEBUG
170 void printlockedvnodes __P((void));
171 #endif
172 
173 /*
174  * Initialize the vnode management data structures.
175  */
176 void
177 vntblinit()
178 {
179 
180 	simple_lock_init(&mntvnode_slock);
181 	simple_lock_init(&mntid_slock);
182 	simple_lock_init(&spechash_slock);
183 	simple_lock_init(&vnode_free_list_slock);
184 
185 	pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
186 	    0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE);
187 }
188 
189 /*
190  * Mark a mount point as busy. Used to synchronize access and to delay
191  * unmounting. Interlock is not released on failure.
192  */
193 int
194 vfs_busy(mp, flags, interlkp)
195 	struct mount *mp;
196 	int flags;
197 	struct simplelock *interlkp;
198 {
199 	int lkflags;
200 
201 	while (mp->mnt_flag & MNT_UNMOUNT) {
202 		int gone;
203 
204 		if (flags & LK_NOWAIT)
205 			return (ENOENT);
206 		if (interlkp)
207 			simple_unlock(interlkp);
208 		/*
209 		 * Since all busy locks are shared except the exclusive
210 		 * lock granted when unmounting, the only place that a
211 		 * wakeup needs to be done is at the release of the
212 		 * exclusive lock at the end of dounmount.
213 		 *
214 		 * XXX MP: add spinlock protecting mnt_wcnt here once you
215 		 * can atomically unlock-and-sleep.
216 		 */
217 		mp->mnt_wcnt++;
218 		sleep((caddr_t)mp, PVFS);
219 		mp->mnt_wcnt--;
220 		gone = mp->mnt_flag & MNT_GONE;
221 
222 		if (mp->mnt_wcnt == 0)
223 			wakeup(&mp->mnt_wcnt);
224 		if (interlkp)
225 			simple_lock(interlkp);
226 		if (gone)
227 			return (ENOENT);
228 	}
229 	lkflags = LK_SHARED;
230 	if (interlkp)
231 		lkflags |= LK_INTERLOCK;
232 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
233 		panic("vfs_busy: unexpected lock failure");
234 	return (0);
235 }
236 
237 /*
238  * Free a busy filesystem.
239  */
240 void
241 vfs_unbusy(mp)
242 	struct mount *mp;
243 {
244 
245 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
246 }
247 
248 /*
249  * Lookup a filesystem type, and if found allocate and initialize
250  * a mount structure for it.
251  *
252  * Devname is usually updated by mount(8) after booting.
253  */
254 int
255 vfs_rootmountalloc(fstypename, devname, mpp)
256 	char *fstypename;
257 	char *devname;
258 	struct mount **mpp;
259 {
260 	struct vfsops *vfsp = NULL;
261 	struct mount *mp;
262 
263 	for (vfsp = LIST_FIRST(&vfs_list); vfsp != NULL;
264 	     vfsp = LIST_NEXT(vfsp, vfs_list))
265 		if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN))
266 			break;
267 
268 	if (vfsp == NULL)
269 		return (ENODEV);
270 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
271 	memset((char *)mp, 0, (u_long)sizeof(struct mount));
272 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
273 	(void)vfs_busy(mp, LK_NOWAIT, 0);
274 	LIST_INIT(&mp->mnt_vnodelist);
275 	mp->mnt_op = vfsp;
276 	mp->mnt_flag = MNT_RDONLY;
277 	mp->mnt_vnodecovered = NULLVP;
278 	vfsp->vfs_refcount++;
279 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN);
280 	mp->mnt_stat.f_mntonname[0] = '/';
281 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
282 	*mpp = mp;
283 	return (0);
284 }
285 
286 /*
287  * Lookup a mount point by filesystem identifier.
288  */
289 struct mount *
290 vfs_getvfs(fsid)
291 	fsid_t *fsid;
292 {
293 	register struct mount *mp;
294 
295 	simple_lock(&mountlist_slock);
296 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
297 	     mp = mp->mnt_list.cqe_next) {
298 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
299 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
300 			simple_unlock(&mountlist_slock);
301 			return (mp);
302 		}
303 	}
304 	simple_unlock(&mountlist_slock);
305 	return ((struct mount *)0);
306 }
307 
308 /*
309  * Get a new unique fsid
310  */
311 void
312 vfs_getnewfsid(mp, fstypename)
313 	struct mount *mp;
314 	char *fstypename;
315 {
316 	static u_short xxxfs_mntid;
317 	fsid_t tfsid;
318 	int mtype;
319 
320 	simple_lock(&mntid_slock);
321 	mtype = makefstype(fstypename);
322 	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
323 	mp->mnt_stat.f_fsid.val[1] = mtype;
324 	if (xxxfs_mntid == 0)
325 		++xxxfs_mntid;
326 	tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid);
327 	tfsid.val[1] = mtype;
328 	if (mountlist.cqh_first != (void *)&mountlist) {
329 		while (vfs_getvfs(&tfsid)) {
330 			tfsid.val[0]++;
331 			xxxfs_mntid++;
332 		}
333 	}
334 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
335 	simple_unlock(&mntid_slock);
336 }
337 
338 /*
339  * Make a 'unique' number from a mount type name.
340  */
341 long
342 makefstype(type)
343 	char *type;
344 {
345 	long rv;
346 
347 	for (rv = 0; *type; type++) {
348 		rv <<= 2;
349 		rv ^= *type;
350 	}
351 	return rv;
352 }
353 
354 
355 /*
356  * Set vnode attributes to VNOVAL
357  */
358 void
359 vattr_null(vap)
360 	register struct vattr *vap;
361 {
362 
363 	vap->va_type = VNON;
364 
365 	/*
366 	 * Assign individually so that it is safe even if size and
367 	 * sign of each member are varied.
368 	 */
369 	vap->va_mode = VNOVAL;
370 	vap->va_nlink = VNOVAL;
371 	vap->va_uid = VNOVAL;
372 	vap->va_gid = VNOVAL;
373 	vap->va_fsid = VNOVAL;
374 	vap->va_fileid = VNOVAL;
375 	vap->va_size = VNOVAL;
376 	vap->va_blocksize = VNOVAL;
377 	vap->va_atime.tv_sec =
378 	    vap->va_mtime.tv_sec =
379 	    vap->va_ctime.tv_sec = VNOVAL;
380 	vap->va_atime.tv_nsec =
381 	    vap->va_mtime.tv_nsec =
382 	    vap->va_ctime.tv_nsec = VNOVAL;
383 	vap->va_gen = VNOVAL;
384 	vap->va_flags = VNOVAL;
385 	vap->va_rdev = VNOVAL;
386 	vap->va_bytes = VNOVAL;
387 	vap->va_vaflags = 0;
388 }
389 
390 /*
391  * Routines having to do with the management of the vnode table.
392  */
393 extern int (**dead_vnodeop_p) __P((void *));
394 long numvnodes;
395 
396 /*
397  * Return the next vnode from the free list.
398  */
399 int
400 getnewvnode(tag, mp, vops, vpp)
401 	enum vtagtype tag;
402 	struct mount *mp;
403 	int (**vops) __P((void *));
404 	struct vnode **vpp;
405 {
406 	struct proc *p = curproc;	/* XXX */
407 	struct vnode *vp;
408 	int error;
409 #ifdef DIAGNOSTIC
410 	int s;
411 #endif
412 	if (mp) {
413 		/*
414 		 * Mark filesystem busy while we're creating a vnode.
415 		 * If unmount is in progress, this will wait; if the
416 		 * unmount succeeds (only if umount -f), this will
417 		 * return an error.  If the unmount fails, we'll keep
418 		 * going afterwards.
419 		 * (This puts the per-mount vnode list logically under
420 		 * the protection of the vfs_busy lock).
421 		 */
422 		error = vfs_busy(mp, 0, 0);
423 		if (error)
424 			return error;
425 	}
426 
427 	simple_lock(&vnode_free_list_slock);
428 	if ((vnode_free_list.tqh_first == NULL &&
429 	     numvnodes < 2 * desiredvnodes) ||
430 	    numvnodes < desiredvnodes) {
431 		simple_unlock(&vnode_free_list_slock);
432 		vp = pool_get(&vnode_pool, PR_WAITOK);
433 		memset((char *)vp, 0, sizeof(*vp));
434 		simple_lock_init(&vp->v_interlock);
435 		numvnodes++;
436 	} else {
437 		for (vp = vnode_free_list.tqh_first;
438 				vp != NULLVP; vp = vp->v_freelist.tqe_next) {
439 			if (simple_lock_try(&vp->v_interlock)) {
440 				if ((vp->v_flag & VLAYER) == 0) {
441 					break;
442 				}
443 				if (VOP_ISLOCKED(vp) == 0)
444 					break;
445 				else
446 					simple_unlock(&vp->v_interlock);
447 			}
448 		}
449 		/*
450 		 * Unless this is a bad time of the month, at most
451 		 * the first NCPUS items on the free list are
452 		 * locked, so this is close enough to being empty.
453 		 */
454 		if (vp == NULLVP) {
455 			simple_unlock(&vnode_free_list_slock);
456 			if (mp) vfs_unbusy(mp);
457 			tablefull("vnode");
458 			*vpp = 0;
459 			return (ENFILE);
460 		}
461 		if (vp->v_usecount)
462 			panic("free vnode isn't");
463 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
464 		/* see comment on why 0xdeadb is set at end of vgone (below) */
465 		vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
466 		simple_unlock(&vnode_free_list_slock);
467 		vp->v_lease = NULL;
468 		if (vp->v_type != VBAD)
469 			vgonel(vp, p);
470 		else
471 			simple_unlock(&vp->v_interlock);
472 #ifdef DIAGNOSTIC
473 		if (vp->v_data)
474 			panic("cleaned vnode isn't");
475 		s = splbio();
476 		if (vp->v_numoutput)
477 			panic("Clean vnode has pending I/O's");
478 		splx(s);
479 #endif
480 		vp->v_flag = 0;
481 		vp->v_lastr = 0;
482 		vp->v_ralen = 0;
483 		vp->v_maxra = 0;
484 		vp->v_lastw = 0;
485 		vp->v_lasta = 0;
486 		vp->v_cstart = 0;
487 		vp->v_clen = 0;
488 		vp->v_socket = 0;
489 	}
490 	vp->v_type = VNON;
491 	vp->v_vnlock = &vp->v_lock;
492 	lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
493 	cache_purge(vp);
494 	vp->v_tag = tag;
495 	vp->v_op = vops;
496 	insmntque(vp, mp);
497 	*vpp = vp;
498 	vp->v_usecount = 1;
499 	vp->v_data = 0;
500 	simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
501 	if (mp) vfs_unbusy(mp);
502 	return (0);
503 }
504 
505 /*
506  * Move a vnode from one mount queue to another.
507  */
508 void
509 insmntque(vp, mp)
510 	register struct vnode *vp;
511 	register struct mount *mp;
512 {
513 
514 #ifdef DIAGNOSTIC
515 	if ((mp != NULL) &&
516 	    (mp->mnt_flag & MNT_UNMOUNT)) {
517 		panic("insmntque into dying filesystem");
518 	}
519 #endif
520 
521 	simple_lock(&mntvnode_slock);
522 	/*
523 	 * Delete from old mount point vnode list, if on one.
524 	 */
525 	if (vp->v_mount != NULL)
526 		LIST_REMOVE(vp, v_mntvnodes);
527 	/*
528 	 * Insert into list of vnodes for the new mount point, if available.
529 	 */
530 	if ((vp->v_mount = mp) != NULL)
531 		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
532 	simple_unlock(&mntvnode_slock);
533 }
534 
535 /*
536  * Update outstanding I/O count and do wakeup if requested.
537  */
538 void
539 vwakeup(bp)
540 	register struct buf *bp;
541 {
542 	register struct vnode *vp;
543 
544 	bp->b_flags &= ~B_WRITEINPROG;
545 	if ((vp = bp->b_vp) != NULL) {
546 		if (--vp->v_numoutput < 0)
547 			panic("vwakeup: neg numoutput");
548 		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
549 			vp->v_flag &= ~VBWAIT;
550 			wakeup((caddr_t)&vp->v_numoutput);
551 		}
552 	}
553 }
554 
555 /*
556  * Flush out and invalidate all buffers associated with a vnode.
557  * Called with the underlying object locked.
558  */
559 int
560 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
561 	register struct vnode *vp;
562 	int flags;
563 	struct ucred *cred;
564 	struct proc *p;
565 	int slpflag, slptimeo;
566 {
567 	register struct buf *bp;
568 	struct buf *nbp, *blist;
569 	int s, error;
570 
571 	if (flags & V_SAVE) {
572 		error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, p);
573 		if (error != 0)
574 			return (error);
575 		if (vp->v_dirtyblkhd.lh_first != NULL)
576 			panic("vinvalbuf: dirty bufs");
577 	}
578 	for (;;) {
579 		if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
580 			while (blist && blist->b_lblkno < 0)
581 				blist = blist->b_vnbufs.le_next;
582 		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
583 		    (flags & V_SAVEMETA))
584 			while (blist && blist->b_lblkno < 0)
585 				blist = blist->b_vnbufs.le_next;
586 		if (!blist)
587 			break;
588 
589 		for (bp = blist; bp; bp = nbp) {
590 			nbp = bp->b_vnbufs.le_next;
591 			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
592 				continue;
593 			s = splbio();
594 			if (bp->b_flags & B_BUSY) {
595 				bp->b_flags |= B_WANTED;
596 				error = tsleep((caddr_t)bp,
597 					slpflag | (PRIBIO + 1), "vinvalbuf",
598 					slptimeo);
599 				splx(s);
600 				if (error)
601 					return (error);
602 				break;
603 			}
604 			bp->b_flags |= B_BUSY | B_VFLUSH;
605 			splx(s);
606 			/*
607 			 * XXX Since there are no node locks for NFS, I believe
608 			 * there is a slight chance that a delayed write will
609 			 * occur while sleeping just above, so check for it.
610 			 */
611 			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
612 				(void) VOP_BWRITE(bp);
613 				break;
614 			}
615 			bp->b_flags |= B_INVAL;
616 			brelse(bp);
617 		}
618 	}
619 	if (!(flags & V_SAVEMETA) &&
620 	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
621 		panic("vinvalbuf: flush failed");
622 	return (0);
623 }
624 
625 void
626 vflushbuf(vp, sync)
627 	register struct vnode *vp;
628 	int sync;
629 {
630 	register struct buf *bp, *nbp;
631 	int s;
632 
633 loop:
634 	s = splbio();
635 	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
636 		nbp = bp->b_vnbufs.le_next;
637 		if ((bp->b_flags & B_BUSY))
638 			continue;
639 		if ((bp->b_flags & B_DELWRI) == 0)
640 			panic("vflushbuf: not dirty");
641 		bp->b_flags |= B_BUSY | B_VFLUSH;
642 		splx(s);
643 		/*
644 		 * Wait for I/O associated with indirect blocks to complete,
645 		 * since there is no way to quickly wait for them below.
646 		 */
647 		if (bp->b_vp == vp || sync == 0)
648 			(void) bawrite(bp);
649 		else
650 			(void) bwrite(bp);
651 		goto loop;
652 	}
653 	if (sync == 0) {
654 		splx(s);
655 		return;
656 	}
657 	while (vp->v_numoutput) {
658 		vp->v_flag |= VBWAIT;
659 		tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0);
660 	}
661 	splx(s);
662 	if (vp->v_dirtyblkhd.lh_first != NULL) {
663 		vprint("vflushbuf: dirty", vp);
664 		goto loop;
665 	}
666 }
667 
668 /*
669  * Associate a buffer with a vnode.
670  */
671 void
672 bgetvp(vp, bp)
673 	register struct vnode *vp;
674 	register struct buf *bp;
675 {
676 
677 	if (bp->b_vp)
678 		panic("bgetvp: not free");
679 	VHOLD(vp);
680 	bp->b_vp = vp;
681 	if (vp->v_type == VBLK || vp->v_type == VCHR)
682 		bp->b_dev = vp->v_rdev;
683 	else
684 		bp->b_dev = NODEV;
685 	/*
686 	 * Insert onto list for new vnode.
687 	 */
688 	bufinsvn(bp, &vp->v_cleanblkhd);
689 }
690 
691 /*
692  * Disassociate a buffer from a vnode.
693  */
694 void
695 brelvp(bp)
696 	register struct buf *bp;
697 {
698 	struct vnode *vp;
699 
700 	if (bp->b_vp == (struct vnode *) 0)
701 		panic("brelvp: NULL");
702 	/*
703 	 * Delete from old vnode list, if on one.
704 	 */
705 	if (bp->b_vnbufs.le_next != NOLIST)
706 		bufremvn(bp);
707 	vp = bp->b_vp;
708 	bp->b_vp = (struct vnode *) 0;
709 	HOLDRELE(vp);
710 }
711 
712 /*
713  * Reassign a buffer from one vnode to another.
714  * Used to assign file specific control information
715  * (indirect blocks) to the vnode to which they belong.
716  */
717 void
718 reassignbuf(bp, newvp)
719 	register struct buf *bp;
720 	register struct vnode *newvp;
721 {
722 	register struct buflists *listheadp;
723 
724 	if (newvp == NULL) {
725 		printf("reassignbuf: NULL");
726 		return;
727 	}
728 	/*
729 	 * Delete from old vnode list, if on one.
730 	 */
731 	if (bp->b_vnbufs.le_next != NOLIST)
732 		bufremvn(bp);
733 	/*
734 	 * If dirty, put on list of dirty buffers;
735 	 * otherwise insert onto list of clean buffers.
736 	 */
737 	if (bp->b_flags & B_DELWRI)
738 		listheadp = &newvp->v_dirtyblkhd;
739 	else
740 		listheadp = &newvp->v_cleanblkhd;
741 	bufinsvn(bp, listheadp);
742 }
743 
744 /*
745  * Create a vnode for a block device.
746  * Used for root filesystem and swap areas.
747  * Also used for memory file system special devices.
748  */
749 int
750 bdevvp(dev, vpp)
751 	dev_t dev;
752 	struct vnode **vpp;
753 {
754 
755 	return (getdevvp(dev, vpp, VBLK));
756 }
757 
758 /*
759  * Create a vnode for a character device.
760  * Used for kernfs and some console handling.
761  */
762 int
763 cdevvp(dev, vpp)
764 	dev_t dev;
765 	struct vnode **vpp;
766 {
767 
768 	return (getdevvp(dev, vpp, VCHR));
769 }
770 
771 /*
772  * Create a vnode for a device.
773  * Used by bdevvp (block device) for root file system etc.,
774  * and by cdevvp (character device) for console and kernfs.
775  */
776 int
777 getdevvp(dev, vpp, type)
778 	dev_t dev;
779 	struct vnode **vpp;
780 	enum vtype type;
781 {
782 	register struct vnode *vp;
783 	struct vnode *nvp;
784 	int error;
785 
786 	if (dev == NODEV) {
787 		*vpp = NULLVP;
788 		return (0);
789 	}
790 	error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
791 	if (error) {
792 		*vpp = NULLVP;
793 		return (error);
794 	}
795 	vp = nvp;
796 	vp->v_type = type;
797 	if ((nvp = checkalias(vp, dev, NULL)) != 0) {
798 		vput(vp);
799 		vp = nvp;
800 	}
801 	*vpp = vp;
802 	return (0);
803 }
804 
805 /*
806  * Check to see if the new vnode represents a special device
807  * for which we already have a vnode (either because of
808  * bdevvp() or because of a different vnode representing
809  * the same block device). If such an alias exists, deallocate
810  * the existing contents and return the aliased vnode. The
811  * caller is responsible for filling it with its new contents.
812  */
813 struct vnode *
814 checkalias(nvp, nvp_rdev, mp)
815 	register struct vnode *nvp;
816 	dev_t nvp_rdev;
817 	struct mount *mp;
818 {
819 	struct proc *p = curproc;       /* XXX */
820 	register struct vnode *vp;
821 	struct vnode **vpp;
822 
823 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
824 		return (NULLVP);
825 
826 	vpp = &speclisth[SPECHASH(nvp_rdev)];
827 loop:
828 	simple_lock(&spechash_slock);
829 	for (vp = *vpp; vp; vp = vp->v_specnext) {
830 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
831 			continue;
832 		/*
833 		 * Alias, but not in use, so flush it out.
834 		 */
835 		simple_lock(&vp->v_interlock);
836 		if (vp->v_usecount == 0) {
837 			simple_unlock(&spechash_slock);
838 			vgonel(vp, p);
839 			goto loop;
840 		}
841 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) {
842 			simple_unlock(&spechash_slock);
843 			goto loop;
844 		}
845 		break;
846 	}
847 	if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
848 		MALLOC(nvp->v_specinfo, struct specinfo *,
849 			sizeof(struct specinfo), M_VNODE, M_WAITOK);
850 		nvp->v_rdev = nvp_rdev;
851 		nvp->v_hashchain = vpp;
852 		nvp->v_specnext = *vpp;
853 		nvp->v_specflags = 0;
854 		simple_unlock(&spechash_slock);
855 		nvp->v_speclockf = NULL;
856 		*vpp = nvp;
857 		if (vp != NULLVP) {
858 			nvp->v_flag |= VALIASED;
859 			vp->v_flag |= VALIASED;
860 			vput(vp);
861 		}
862 		return (NULLVP);
863 	}
864 	simple_unlock(&spechash_slock);
865 	VOP_UNLOCK(vp, 0);
866 	simple_lock(&vp->v_interlock);
867 	vclean(vp, 0, p);
868 	vp->v_op = nvp->v_op;
869 	vp->v_tag = nvp->v_tag;
870 	vp->v_vnlock = &vp->v_lock;
871 	lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
872 	nvp->v_type = VNON;
873 	insmntque(vp, mp);
874 	return (vp);
875 }
876 
877 /*
878  * Grab a particular vnode from the free list, increment its
879  * reference count and lock it. If the vnode lock bit is set the
880  * vnode is being eliminated in vgone. In that case, we can not
881  * grab the vnode, so the process is awakened when the transition is
882  * completed, and an error returned to indicate that the vnode is no
883  * longer usable (possibly having been changed to a new file system type).
884  */
885 int
886 vget(vp, flags)
887 	struct vnode *vp;
888 	int flags;
889 {
890 	int error;
891 
892 	/*
893 	 * If the vnode is in the process of being cleaned out for
894 	 * another use, we wait for the cleaning to finish and then
895 	 * return failure. Cleaning is determined by checking that
896 	 * the VXLOCK flag is set.
897 	 */
898 	if ((flags & LK_INTERLOCK) == 0)
899 		simple_lock(&vp->v_interlock);
900 	if (vp->v_flag & VXLOCK) {
901 		vp->v_flag |= VXWANT;
902 		simple_unlock(&vp->v_interlock);
903 		tsleep((caddr_t)vp, PINOD, "vget", 0);
904 		return (ENOENT);
905 	}
906 	if (vp->v_usecount == 0) {
907 		simple_lock(&vnode_free_list_slock);
908 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
909 		simple_unlock(&vnode_free_list_slock);
910 	}
911 	vp->v_usecount++;
912 #ifdef DIAGNOSTIC
913 	if (vp->v_usecount == 0) {
914 		vprint("vget", vp);
915 		panic("vget: usecount overflow");
916 	}
917 #endif
918 	if (flags & LK_TYPE_MASK) {
919 		if ((error = vn_lock(vp, flags | LK_INTERLOCK)))
920 			vrele(vp);
921 		return (error);
922 	}
923 	simple_unlock(&vp->v_interlock);
924 	return (0);
925 }
926 
927 /*
928  * vput(), just unlock and vrele()
929  */
930 void
931 vput(vp)
932 	struct vnode *vp;
933 {
934 	struct proc *p = curproc;	/* XXX */
935 
936 #ifdef DIAGNOSTIC
937 	if (vp == NULL)
938 		panic("vput: null vp");
939 #endif
940 	simple_lock(&vp->v_interlock);
941 	vp->v_usecount--;
942 	if (vp->v_usecount > 0) {
943 		simple_unlock(&vp->v_interlock);
944 		VOP_UNLOCK(vp, 0);
945 		return;
946 	}
947 #ifdef DIAGNOSTIC
948 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
949 		vprint("vput: bad ref count", vp);
950 		panic("vput: ref cnt");
951 	}
952 #endif
953 	/*
954 	 * Insert at tail of LRU list.
955 	 */
956 	simple_lock(&vnode_free_list_slock);
957 	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
958 	simple_unlock(&vnode_free_list_slock);
959 	simple_unlock(&vp->v_interlock);
960 	VOP_INACTIVE(vp, p);
961 }
962 
963 /*
964  * Vnode release.
965  * If count drops to zero, call inactive routine and return to freelist.
966  */
967 void
968 vrele(vp)
969 	struct vnode *vp;
970 {
971 	struct proc *p = curproc;	/* XXX */
972 
973 #ifdef DIAGNOSTIC
974 	if (vp == NULL)
975 		panic("vrele: null vp");
976 #endif
977 	simple_lock(&vp->v_interlock);
978 	vp->v_usecount--;
979 	if (vp->v_usecount > 0) {
980 		simple_unlock(&vp->v_interlock);
981 		return;
982 	}
983 #ifdef DIAGNOSTIC
984 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
985 		vprint("vrele: bad ref count", vp);
986 		panic("vrele: ref cnt");
987 	}
988 #endif
989 	/*
990 	 * Insert at tail of LRU list.
991 	 */
992 	simple_lock(&vnode_free_list_slock);
993 	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
994 	simple_unlock(&vnode_free_list_slock);
995 	if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
996 		VOP_INACTIVE(vp, p);
997 }
998 
999 #ifdef DIAGNOSTIC
1000 /*
1001  * Page or buffer structure gets a reference.
1002  */
1003 void
1004 vhold(vp)
1005 	register struct vnode *vp;
1006 {
1007 
1008 	simple_lock(&vp->v_interlock);
1009 	vp->v_holdcnt++;
1010 	simple_unlock(&vp->v_interlock);
1011 }
1012 
1013 /*
1014  * Page or buffer structure frees a reference.
1015  */
1016 void
1017 holdrele(vp)
1018 	register struct vnode *vp;
1019 {
1020 
1021 	simple_lock(&vp->v_interlock);
1022 	if (vp->v_holdcnt <= 0)
1023 		panic("holdrele: holdcnt");
1024 	vp->v_holdcnt--;
1025 	simple_unlock(&vp->v_interlock);
1026 }
1027 
1028 /*
1029  * Vnode reference.
1030  */
1031 void
1032 vref(vp)
1033 	struct vnode *vp;
1034 {
1035 
1036 	simple_lock(&vp->v_interlock);
1037 	if (vp->v_usecount <= 0)
1038 		panic("vref used where vget required");
1039 	vp->v_usecount++;
1040 #ifdef DIAGNOSTIC
1041 	if (vp->v_usecount == 0) {
1042 		vprint("vref", vp);
1043 		panic("vref: usecount overflow");
1044 	}
1045 #endif
1046 	simple_unlock(&vp->v_interlock);
1047 }
1048 #endif /* DIAGNOSTIC */
1049 
1050 /*
1051  * Remove any vnodes in the vnode table belonging to mount point mp.
1052  *
1053  * If MNT_NOFORCE is specified, there should not be any active ones,
1054  * return error if any are found (nb: this is a user error, not a
1055  * system error). If MNT_FORCE is specified, detach any active vnodes
1056  * that are found.
1057  */
1058 #ifdef DEBUG
1059 int busyprt = 0;	/* print out busy vnodes */
1060 struct ctldebug debug1 = { "busyprt", &busyprt };
1061 #endif
1062 
1063 int
1064 vflush(mp, skipvp, flags)
1065 	struct mount *mp;
1066 	struct vnode *skipvp;
1067 	int flags;
1068 {
1069 	struct proc *p = curproc;	/* XXX */
1070 	register struct vnode *vp, *nvp;
1071 	int busy = 0;
1072 
1073 	simple_lock(&mntvnode_slock);
1074 loop:
1075 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
1076 		if (vp->v_mount != mp)
1077 			goto loop;
1078 		nvp = vp->v_mntvnodes.le_next;
1079 		/*
1080 		 * Skip over a selected vnode.
1081 		 */
1082 		if (vp == skipvp)
1083 			continue;
1084 		simple_lock(&vp->v_interlock);
1085 		/*
1086 		 * Skip over a vnodes marked VSYSTEM.
1087 		 */
1088 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
1089 			simple_unlock(&vp->v_interlock);
1090 			continue;
1091 		}
1092 		/*
1093 		 * If WRITECLOSE is set, only flush out regular file
1094 		 * vnodes open for writing.
1095 		 */
1096 		if ((flags & WRITECLOSE) &&
1097 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
1098 			simple_unlock(&vp->v_interlock);
1099 			continue;
1100 		}
1101 		/*
1102 		 * With v_usecount == 0, all we need to do is clear
1103 		 * out the vnode data structures and we are done.
1104 		 */
1105 		if (vp->v_usecount == 0) {
1106 			simple_unlock(&mntvnode_slock);
1107 			vgonel(vp, p);
1108 			simple_lock(&mntvnode_slock);
1109 			continue;
1110 		}
1111 		/*
1112 		 * If FORCECLOSE is set, forcibly close the vnode.
1113 		 * For block or character devices, revert to an
1114 		 * anonymous device. For all other files, just kill them.
1115 		 */
1116 		if (flags & FORCECLOSE) {
1117 			simple_unlock(&mntvnode_slock);
1118 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
1119 				vgonel(vp, p);
1120 			} else {
1121 				vclean(vp, 0, p);
1122 				vp->v_op = spec_vnodeop_p;
1123 				insmntque(vp, (struct mount *)0);
1124 			}
1125 			simple_lock(&mntvnode_slock);
1126 			continue;
1127 		}
1128 #ifdef DEBUG
1129 		if (busyprt)
1130 			vprint("vflush: busy vnode", vp);
1131 #endif
1132 		simple_unlock(&vp->v_interlock);
1133 		busy++;
1134 	}
1135 	simple_unlock(&mntvnode_slock);
1136 	if (busy)
1137 		return (EBUSY);
1138 	return (0);
1139 }
1140 
1141 /*
1142  * Disassociate the underlying file system from a vnode.
1143  */
1144 void
1145 vclean(vp, flags, p)
1146 	register struct vnode *vp;
1147 	int flags;
1148 	struct proc *p;
1149 {
1150 	int active;
1151 
1152 	/*
1153 	 * Check to see if the vnode is in use.
1154 	 * If so we have to reference it before we clean it out
1155 	 * so that its count cannot fall to zero and generate a
1156 	 * race against ourselves to recycle it.
1157 	 */
1158 	if ((active = vp->v_usecount) != 0) {
1159 		/* We have the vnode interlock. */
1160 		vp->v_usecount++;
1161 #ifdef DIAGNOSTIC
1162 		if (vp->v_usecount == 0) {
1163 			vprint("vclean", vp);
1164 			panic("vclean: usecount overflow");
1165 		}
1166 #endif
1167 	}
1168 
1169 	/*
1170 	 * Prevent the vnode from being recycled or
1171 	 * brought into use while we clean it out.
1172 	 */
1173 	if (vp->v_flag & VXLOCK)
1174 		panic("vclean: deadlock");
1175 	vp->v_flag |= VXLOCK;
1176 	/*
1177 	 * Even if the count is zero, the VOP_INACTIVE routine may still
1178 	 * have the object locked while it cleans it out. The VOP_LOCK
1179 	 * ensures that the VOP_INACTIVE routine is done with its work.
1180 	 * For active vnodes, it ensures that no other activity can
1181 	 * occur while the underlying object is being cleaned out.
1182 	 */
1183 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK);
1184 
1185 	/*
1186 	 * clean out any VM data associated with the vnode.
1187 	 */
1188 	uvm_vnp_terminate(vp);
1189 	/*
1190 	 * Clean out any buffers associated with the vnode.
1191 	 */
1192 	if (flags & DOCLOSE)
1193 		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1194 
1195 	/*
1196 	 * If purging an active vnode, it must be closed and
1197 	 * deactivated before being reclaimed. Note that the
1198 	 * VOP_INACTIVE will unlock the vnode.
1199 	 */
1200 	if (active) {
1201 		if (flags & DOCLOSE)
1202 			VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
1203 		VOP_INACTIVE(vp, p);
1204 	} else {
1205 		/*
1206 		 * Any other processes trying to obtain this lock must first
1207 		 * wait for VXLOCK to clear, then call the new lock operation.
1208 		 */
1209 		VOP_UNLOCK(vp, 0);
1210 	}
1211 	/*
1212 	 * Reclaim the vnode.
1213 	 */
1214 	if (VOP_RECLAIM(vp, p))
1215 		panic("vclean: cannot reclaim");
1216 
1217 	if (active) {
1218 		/*
1219 		 * Inline copy of vrele() since VOP_INACTIVE
1220 		 * has already been called.
1221 		 */
1222 		simple_lock(&vp->v_interlock);
1223 		if (--vp->v_usecount <= 0) {
1224 #ifdef DIAGNOSTIC
1225 			if (vp->v_usecount < 0 || vp->v_writecount != 0) {
1226 				vprint("vclean: bad ref count", vp);
1227 				panic("vclean: ref cnt");
1228 			}
1229 #endif
1230 			/*
1231 			 * Insert at tail of LRU list.
1232 			 */
1233 			simple_lock(&vnode_free_list_slock);
1234 #ifdef DIAGNOSTIC
1235 			if (vp->v_vnlock) {
1236 				if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1237 					vprint("vclean: lock not drained", vp);
1238 			}
1239 #endif
1240 			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1241 			simple_unlock(&vnode_free_list_slock);
1242 		}
1243 		simple_unlock(&vp->v_interlock);
1244 	}
1245 
1246 	cache_purge(vp);
1247 
1248 	/*
1249 	 * Done with purge, notify sleepers of the grim news.
1250 	 */
1251 	vp->v_op = dead_vnodeop_p;
1252 	vp->v_tag = VT_NON;
1253 	vp->v_flag &= ~VXLOCK;
1254 	if (vp->v_flag & VXWANT) {
1255 		vp->v_flag &= ~VXWANT;
1256 		wakeup((caddr_t)vp);
1257 	}
1258 }
1259 
1260 /*
1261  * Recycle an unused vnode to the front of the free list.
1262  * Release the passed interlock if the vnode will be recycled.
1263  */
1264 int
1265 vrecycle(vp, inter_lkp, p)
1266 	struct vnode *vp;
1267 	struct simplelock *inter_lkp;
1268 	struct proc *p;
1269 {
1270 
1271 	simple_lock(&vp->v_interlock);
1272 	if (vp->v_usecount == 0) {
1273 		if (inter_lkp)
1274 			simple_unlock(inter_lkp);
1275 		vgonel(vp, p);
1276 		return (1);
1277 	}
1278 	simple_unlock(&vp->v_interlock);
1279 	return (0);
1280 }
1281 
1282 /*
1283  * Eliminate all activity associated with a vnode
1284  * in preparation for reuse.
1285  */
1286 void
1287 vgone(vp)
1288 	struct vnode *vp;
1289 {
1290 	struct proc *p = curproc;	/* XXX */
1291 
1292 	simple_lock(&vp->v_interlock);
1293 	vgonel(vp, p);
1294 }
1295 
1296 /*
1297  * vgone, with the vp interlock held.
1298  */
1299 void
1300 vgonel(vp, p)
1301 	register struct vnode *vp;
1302 	struct proc *p;
1303 {
1304 	struct vnode *vq;
1305 	struct vnode *vx;
1306 
1307 	/*
1308 	 * If a vgone (or vclean) is already in progress,
1309 	 * wait until it is done and return.
1310 	 */
1311 	if (vp->v_flag & VXLOCK) {
1312 		vp->v_flag |= VXWANT;
1313 		simple_unlock(&vp->v_interlock);
1314 		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1315 		return;
1316 	}
1317 	/*
1318 	 * Clean out the filesystem specific data.
1319 	 */
1320 	vclean(vp, DOCLOSE, p);
1321 	/*
1322 	 * Delete from old mount point vnode list, if on one.
1323 	 */
1324 	if (vp->v_mount != NULL)
1325 		insmntque(vp, (struct mount *)0);
1326 	/*
1327 	 * If special device, remove it from special device alias list.
1328 	 * if it is on one.
1329 	 */
1330 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1331 		simple_lock(&spechash_slock);
1332 		if (vp->v_hashchain != NULL) {
1333 			if (*vp->v_hashchain == vp) {
1334 				*vp->v_hashchain = vp->v_specnext;
1335 			} else {
1336 				for (vq = *vp->v_hashchain; vq;
1337 							vq = vq->v_specnext) {
1338 					if (vq->v_specnext != vp)
1339 						continue;
1340 					vq->v_specnext = vp->v_specnext;
1341 					break;
1342 				}
1343 				if (vq == NULL)
1344 					panic("missing bdev");
1345 			}
1346 			if (vp->v_flag & VALIASED) {
1347 				vx = NULL;
1348 				for (vq = *vp->v_hashchain; vq;
1349 							vq = vq->v_specnext) {
1350 					if (vq->v_rdev != vp->v_rdev ||
1351 					    vq->v_type != vp->v_type)
1352 						continue;
1353 					if (vx)
1354 						break;
1355 					vx = vq;
1356 				}
1357 				if (vx == NULL)
1358 					panic("missing alias");
1359 				if (vq == NULL)
1360 					vx->v_flag &= ~VALIASED;
1361 				vp->v_flag &= ~VALIASED;
1362 			}
1363 		}
1364 		simple_unlock(&spechash_slock);
1365 		FREE(vp->v_specinfo, M_VNODE);
1366 		vp->v_specinfo = NULL;
1367 	}
1368 	/*
1369 	 * If it is on the freelist and not already at the head,
1370 	 * move it to the head of the list. The test of the back
1371 	 * pointer and the reference count of zero is because
1372 	 * it will be removed from the free list by getnewvnode,
1373 	 * but will not have its reference count incremented until
1374 	 * after calling vgone. If the reference count were
1375 	 * incremented first, vgone would (incorrectly) try to
1376 	 * close the previous instance of the underlying object.
1377 	 * So, the back pointer is explicitly set to `0xdeadb' in
1378 	 * getnewvnode after removing it from the freelist to ensure
1379 	 * that we do not try to move it here.
1380 	 */
1381 	if (vp->v_usecount == 0) {
1382 		simple_lock(&vnode_free_list_slock);
1383 		if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
1384 		    vnode_free_list.tqh_first != vp) {
1385 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1386 			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1387 		}
1388 		simple_unlock(&vnode_free_list_slock);
1389 	}
1390 	vp->v_type = VBAD;
1391 }
1392 
1393 /*
1394  * Lookup a vnode by device number.
1395  */
1396 int
1397 vfinddev(dev, type, vpp)
1398 	dev_t dev;
1399 	enum vtype type;
1400 	struct vnode **vpp;
1401 {
1402 	struct vnode *vp;
1403 	int rc = 0;
1404 
1405 	simple_lock(&spechash_slock);
1406 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1407 		if (dev != vp->v_rdev || type != vp->v_type)
1408 			continue;
1409 		*vpp = vp;
1410 		rc = 1;
1411 		break;
1412 	}
1413 	simple_unlock(&spechash_slock);
1414 	return (rc);
1415 }
1416 
1417 /*
1418  * Revoke all the vnodes corresponding to the specified minor number
1419  * range (endpoints inclusive) of the specified major.
1420  */
1421 void
1422 vdevgone(maj, minl, minh, type)
1423 	int maj, minl, minh;
1424 	enum vtype type;
1425 {
1426 	struct vnode *vp;
1427 	int mn;
1428 
1429 	for (mn = minl; mn <= minh; mn++)
1430 		if (vfinddev(makedev(maj, mn), type, &vp))
1431 			VOP_REVOKE(vp, REVOKEALL);
1432 }
1433 
1434 /*
1435  * Calculate the total number of references to a special device.
1436  */
1437 int
1438 vcount(vp)
1439 	register struct vnode *vp;
1440 {
1441 	register struct vnode *vq, *vnext;
1442 	int count;
1443 
1444 loop:
1445 	if ((vp->v_flag & VALIASED) == 0)
1446 		return (vp->v_usecount);
1447 	simple_lock(&spechash_slock);
1448 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1449 		vnext = vq->v_specnext;
1450 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1451 			continue;
1452 		/*
1453 		 * Alias, but not in use, so flush it out.
1454 		 */
1455 		if (vq->v_usecount == 0 && vq != vp) {
1456 			simple_unlock(&spechash_slock);
1457 			vgone(vq);
1458 			goto loop;
1459 		}
1460 		count += vq->v_usecount;
1461 	}
1462 	simple_unlock(&spechash_slock);
1463 	return (count);
1464 }
1465 
1466 /*
1467  * Print out a description of a vnode.
1468  */
1469 static char *typename[] =
1470    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1471 
1472 void
1473 vprint(label, vp)
1474 	char *label;
1475 	register struct vnode *vp;
1476 {
1477 	char buf[64];
1478 
1479 	if (label != NULL)
1480 		printf("%s: ", label);
1481 	printf("type %s, usecount %ld, writecount %ld, refcount %ld,",
1482 	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1483 	    vp->v_holdcnt);
1484 	buf[0] = '\0';
1485 	if (vp->v_flag & VROOT)
1486 		strcat(buf, "|VROOT");
1487 	if (vp->v_flag & VTEXT)
1488 		strcat(buf, "|VTEXT");
1489 	if (vp->v_flag & VSYSTEM)
1490 		strcat(buf, "|VSYSTEM");
1491 	if (vp->v_flag & VXLOCK)
1492 		strcat(buf, "|VXLOCK");
1493 	if (vp->v_flag & VXWANT)
1494 		strcat(buf, "|VXWANT");
1495 	if (vp->v_flag & VBWAIT)
1496 		strcat(buf, "|VBWAIT");
1497 	if (vp->v_flag & VALIASED)
1498 		strcat(buf, "|VALIASED");
1499 	if (buf[0] != '\0')
1500 		printf(" flags (%s)", &buf[1]);
1501 	if (vp->v_data == NULL) {
1502 		printf("\n");
1503 	} else {
1504 		printf("\n\t");
1505 		VOP_PRINT(vp);
1506 	}
1507 }
1508 
1509 #ifdef DEBUG
1510 /*
1511  * List all of the locked vnodes in the system.
1512  * Called when debugging the kernel.
1513  */
1514 void
1515 printlockedvnodes()
1516 {
1517 	struct mount *mp, *nmp;
1518 	struct vnode *vp;
1519 
1520 	printf("Locked vnodes\n");
1521 	simple_lock(&mountlist_slock);
1522 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1523 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1524 			nmp = mp->mnt_list.cqe_next;
1525 			continue;
1526 		}
1527 		for (vp = mp->mnt_vnodelist.lh_first;
1528 		     vp != NULL;
1529 		     vp = vp->v_mntvnodes.le_next) {
1530 			if (VOP_ISLOCKED(vp))
1531 				vprint((char *)0, vp);
1532 		}
1533 		simple_lock(&mountlist_slock);
1534 		nmp = mp->mnt_list.cqe_next;
1535 		vfs_unbusy(mp);
1536 	}
1537 	simple_unlock(&mountlist_slock);
1538 }
1539 #endif
1540 
1541 extern const char *mountcompatnames[];
1542 extern const int nmountcompatnames;
1543 
1544 /*
1545  * Top level filesystem related information gathering.
1546  */
1547 int
1548 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1549 	int *name;
1550 	u_int namelen;
1551 	void *oldp;
1552 	size_t *oldlenp;
1553 	void *newp;
1554 	size_t newlen;
1555 	struct proc *p;
1556 {
1557 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1558 	struct vfsconf vfc;
1559 #endif
1560 	struct vfsops *vfsp;
1561 
1562 	/* all sysctl names at this level are at least name and field */
1563 	if (namelen < 2)
1564 		return (ENOTDIR);		/* overloaded */
1565 
1566 	/* Not generic: goes to file system. */
1567 	if (name[0] != VFS_GENERIC) {
1568 		if (name[0] >= nmountcompatnames || name[0] < 0 ||
1569 		    mountcompatnames[name[0]] == NULL)
1570 			return (EOPNOTSUPP);
1571 		vfsp = vfs_getopsbyname(mountcompatnames[name[0]]);
1572 		if (vfsp == NULL || vfsp->vfs_sysctl == NULL)
1573 			return (EOPNOTSUPP);
1574 		return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1,
1575 		    oldp, oldlenp, newp, newlen, p));
1576 	}
1577 
1578 	/* The rest are generic vfs sysctls. */
1579 	switch (name[1]) {
1580 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
1581 	case VFS_MAXTYPENUM:
1582 		/*
1583 		 * Provided for 4.4BSD-Lite2 compatibility.
1584 		 */
1585 		return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames));
1586 	case VFS_CONF:
1587 		/*
1588 		 * Special: a node, next is a file system name.
1589 		 * Provided for 4.4BSD-Lite2 compatibility.
1590 		 */
1591 		if (namelen < 3)
1592 			return (ENOTDIR);	/* overloaded */
1593 		if (name[2] >= nmountcompatnames || name[2] < 0 ||
1594 		    mountcompatnames[name[2]] == NULL)
1595 			return (EOPNOTSUPP);
1596 		vfsp = vfs_getopsbyname(mountcompatnames[name[2]]);
1597 		if (vfsp == NULL)
1598 			return (EOPNOTSUPP);
1599 		vfc.vfc_vfsops = vfsp;
1600 		strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN);
1601 		vfc.vfc_typenum = name[2];
1602 		vfc.vfc_refcount = vfsp->vfs_refcount;
1603 		vfc.vfc_flags = 0;
1604 		vfc.vfc_mountroot = vfsp->vfs_mountroot;
1605 		vfc.vfc_next = NULL;
1606 		return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc,
1607 		    sizeof(struct vfsconf)));
1608 #endif
1609 	default:
1610 		break;
1611 	}
1612 	return (EOPNOTSUPP);
1613 }
1614 
1615 int kinfo_vdebug = 1;
1616 int kinfo_vgetfailed;
1617 #define KINFO_VNODESLOP	10
1618 /*
1619  * Dump vnode list (via sysctl).
1620  * Copyout address of vnode followed by vnode.
1621  */
1622 /* ARGSUSED */
1623 int
1624 sysctl_vnode(where, sizep, p)
1625 	char *where;
1626 	size_t *sizep;
1627 	struct proc *p;
1628 {
1629 	struct mount *mp, *nmp;
1630 	struct vnode *nvp, *vp;
1631 	char *bp = where, *savebp;
1632 	char *ewhere;
1633 	int error;
1634 
1635 #define VPTRSZ	sizeof(struct vnode *)
1636 #define VNODESZ	sizeof(struct vnode)
1637 	if (where == NULL) {
1638 		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1639 		return (0);
1640 	}
1641 	ewhere = where + *sizep;
1642 
1643 	simple_lock(&mountlist_slock);
1644 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1645 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
1646 			nmp = mp->mnt_list.cqe_next;
1647 			continue;
1648 		}
1649 		savebp = bp;
1650 again:
1651 		simple_lock(&mntvnode_slock);
1652 		for (vp = mp->mnt_vnodelist.lh_first;
1653 		     vp != NULL;
1654 		     vp = nvp) {
1655 			/*
1656 			 * Check that the vp is still associated with
1657 			 * this filesystem.  RACE: could have been
1658 			 * recycled onto the same filesystem.
1659 			 */
1660 			if (vp->v_mount != mp) {
1661 				simple_unlock(&mntvnode_slock);
1662 				if (kinfo_vdebug)
1663 					printf("kinfo: vp changed\n");
1664 				bp = savebp;
1665 				goto again;
1666 			}
1667 			nvp = vp->v_mntvnodes.le_next;
1668 			if (bp + VPTRSZ + VNODESZ > ewhere) {
1669 				simple_unlock(&mntvnode_slock);
1670 				*sizep = bp - where;
1671 				return (ENOMEM);
1672 			}
1673 			simple_unlock(&mntvnode_slock);
1674 			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1675 			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
1676 				return (error);
1677 			bp += VPTRSZ + VNODESZ;
1678 			simple_lock(&mntvnode_slock);
1679 		}
1680 		simple_unlock(&mntvnode_slock);
1681 		simple_lock(&mountlist_slock);
1682 		nmp = mp->mnt_list.cqe_next;
1683 		vfs_unbusy(mp);
1684 	}
1685 	simple_unlock(&mountlist_slock);
1686 
1687 	*sizep = bp - where;
1688 	return (0);
1689 }
1690 
1691 /*
1692  * Check to see if a filesystem is mounted on a block device.
1693  */
1694 int
1695 vfs_mountedon(vp)
1696 	struct vnode *vp;
1697 {
1698 	struct vnode *vq;
1699 	int error = 0;
1700 
1701 	if (vp->v_specflags & SI_MOUNTEDON)
1702 		return (EBUSY);
1703 	if (vp->v_flag & VALIASED) {
1704 		simple_lock(&spechash_slock);
1705 		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1706 			if (vq->v_rdev != vp->v_rdev ||
1707 			    vq->v_type != vp->v_type)
1708 				continue;
1709 			if (vq->v_specflags & SI_MOUNTEDON) {
1710 				error = EBUSY;
1711 				break;
1712 			}
1713 		}
1714 		simple_unlock(&spechash_slock);
1715 	}
1716 	return (error);
1717 }
1718 
1719 /*
1720  * Build hash lists of net addresses and hang them off the mount point.
1721  * Called by ufs_mount() to set up the lists of export addresses.
1722  */
1723 static int
1724 vfs_hang_addrlist(mp, nep, argp)
1725 	struct mount *mp;
1726 	struct netexport *nep;
1727 	struct export_args *argp;
1728 {
1729 	register struct netcred *np, *enp;
1730 	register struct radix_node_head *rnh;
1731 	register int i;
1732 	struct radix_node *rn;
1733 	struct sockaddr *saddr, *smask = 0;
1734 	struct domain *dom;
1735 	int error;
1736 
1737 	if (argp->ex_addrlen == 0) {
1738 		if (mp->mnt_flag & MNT_DEFEXPORTED)
1739 			return (EPERM);
1740 		np = &nep->ne_defexported;
1741 		np->netc_exflags = argp->ex_flags;
1742 		np->netc_anon = argp->ex_anon;
1743 		np->netc_anon.cr_ref = 1;
1744 		mp->mnt_flag |= MNT_DEFEXPORTED;
1745 		return (0);
1746 	}
1747 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1748 	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1749 	memset((caddr_t)np, 0, i);
1750 	saddr = (struct sockaddr *)(np + 1);
1751 	error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
1752 	if (error)
1753 		goto out;
1754 	if (saddr->sa_len > argp->ex_addrlen)
1755 		saddr->sa_len = argp->ex_addrlen;
1756 	if (argp->ex_masklen) {
1757 		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1758 		error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
1759 		if (error)
1760 			goto out;
1761 		if (smask->sa_len > argp->ex_masklen)
1762 			smask->sa_len = argp->ex_masklen;
1763 	}
1764 	i = saddr->sa_family;
1765 	if ((rnh = nep->ne_rtable[i]) == 0) {
1766 		/*
1767 		 * Seems silly to initialize every AF when most are not
1768 		 * used, do so on demand here
1769 		 */
1770 		for (dom = domains; dom; dom = dom->dom_next)
1771 			if (dom->dom_family == i && dom->dom_rtattach) {
1772 				dom->dom_rtattach((void **)&nep->ne_rtable[i],
1773 					dom->dom_rtoffset);
1774 				break;
1775 			}
1776 		if ((rnh = nep->ne_rtable[i]) == 0) {
1777 			error = ENOBUFS;
1778 			goto out;
1779 		}
1780 	}
1781 	rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1782 		np->netc_rnodes);
1783 	if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
1784 		if (rn == 0) {
1785 			enp = (struct netcred *)(*rnh->rnh_lookup)(saddr,
1786 				smask, rnh);
1787 			if (enp == 0) {
1788 				error = EPERM;
1789 				goto out;
1790 			}
1791 		} else
1792 			enp = (struct netcred *)rn;
1793 
1794 		if (enp->netc_exflags != argp->ex_flags ||
1795 		    enp->netc_anon.cr_uid != argp->ex_anon.cr_uid ||
1796 		    enp->netc_anon.cr_gid != argp->ex_anon.cr_gid ||
1797 		    enp->netc_anon.cr_ngroups != argp->ex_anon.cr_ngroups ||
1798 		    memcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups,
1799 			enp->netc_anon.cr_ngroups))
1800 				error = EPERM;
1801 		else
1802 			error = 0;
1803 		goto out;
1804 	}
1805 	np->netc_exflags = argp->ex_flags;
1806 	np->netc_anon = argp->ex_anon;
1807 	np->netc_anon.cr_ref = 1;
1808 	return (0);
1809 out:
1810 	free(np, M_NETADDR);
1811 	return (error);
1812 }
1813 
1814 /* ARGSUSED */
1815 static int
1816 vfs_free_netcred(rn, w)
1817 	struct radix_node *rn;
1818 	void *w;
1819 {
1820 	register struct radix_node_head *rnh = (struct radix_node_head *)w;
1821 
1822 	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
1823 	free((caddr_t)rn, M_NETADDR);
1824 	return (0);
1825 }
1826 
1827 /*
1828  * Free the net address hash lists that are hanging off the mount points.
1829  */
1830 static void
1831 vfs_free_addrlist(nep)
1832 	struct netexport *nep;
1833 {
1834 	register int i;
1835 	register struct radix_node_head *rnh;
1836 
1837 	for (i = 0; i <= AF_MAX; i++)
1838 		if ((rnh = nep->ne_rtable[i]) != NULL) {
1839 			(*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
1840 			free((caddr_t)rnh, M_RTABLE);
1841 			nep->ne_rtable[i] = 0;
1842 		}
1843 }
1844 
1845 int
1846 vfs_export(mp, nep, argp)
1847 	struct mount *mp;
1848 	struct netexport *nep;
1849 	struct export_args *argp;
1850 {
1851 	int error;
1852 
1853 	if (argp->ex_flags & MNT_DELEXPORT) {
1854 		if (mp->mnt_flag & MNT_EXPUBLIC) {
1855 			vfs_setpublicfs(NULL, NULL, NULL);
1856 			mp->mnt_flag &= ~MNT_EXPUBLIC;
1857 		}
1858 		vfs_free_addrlist(nep);
1859 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1860 	}
1861 	if (argp->ex_flags & MNT_EXPORTED) {
1862 		if (argp->ex_flags & MNT_EXPUBLIC) {
1863 			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
1864 				return (error);
1865 			mp->mnt_flag |= MNT_EXPUBLIC;
1866 		}
1867 		if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
1868 			return (error);
1869 		mp->mnt_flag |= MNT_EXPORTED;
1870 	}
1871 	return (0);
1872 }
1873 
1874 /*
1875  * Set the publicly exported filesystem (WebNFS). Currently, only
1876  * one public filesystem is possible in the spec (RFC 2054 and 2055)
1877  */
1878 int
1879 vfs_setpublicfs(mp, nep, argp)
1880 	struct mount *mp;
1881 	struct netexport *nep;
1882 	struct export_args *argp;
1883 {
1884 	int error;
1885 	struct vnode *rvp;
1886 	char *cp;
1887 
1888 	/*
1889 	 * mp == NULL -> invalidate the current info, the FS is
1890 	 * no longer exported. May be called from either vfs_export
1891 	 * or unmount, so check if it hasn't already been done.
1892 	 */
1893 	if (mp == NULL) {
1894 		if (nfs_pub.np_valid) {
1895 			nfs_pub.np_valid = 0;
1896 			if (nfs_pub.np_index != NULL) {
1897 				FREE(nfs_pub.np_index, M_TEMP);
1898 				nfs_pub.np_index = NULL;
1899 			}
1900 		}
1901 		return (0);
1902 	}
1903 
1904 	/*
1905 	 * Only one allowed at a time.
1906 	 */
1907 	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
1908 		return (EBUSY);
1909 
1910 	/*
1911 	 * Get real filehandle for root of exported FS.
1912 	 */
1913 	memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle));
1914 	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
1915 
1916 	if ((error = VFS_ROOT(mp, &rvp)))
1917 		return (error);
1918 
1919 	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
1920 		return (error);
1921 
1922 	vput(rvp);
1923 
1924 	/*
1925 	 * If an indexfile was specified, pull it in.
1926 	 */
1927 	if (argp->ex_indexfile != NULL) {
1928 		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
1929 		    M_WAITOK);
1930 		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
1931 		    MAXNAMLEN, (size_t *)0);
1932 		if (!error) {
1933 			/*
1934 			 * Check for illegal filenames.
1935 			 */
1936 			for (cp = nfs_pub.np_index; *cp; cp++) {
1937 				if (*cp == '/') {
1938 					error = EINVAL;
1939 					break;
1940 				}
1941 			}
1942 		}
1943 		if (error) {
1944 			FREE(nfs_pub.np_index, M_TEMP);
1945 			return (error);
1946 		}
1947 	}
1948 
1949 	nfs_pub.np_mount = mp;
1950 	nfs_pub.np_valid = 1;
1951 	return (0);
1952 }
1953 
1954 struct netcred *
1955 vfs_export_lookup(mp, nep, nam)
1956 	register struct mount *mp;
1957 	struct netexport *nep;
1958 	struct mbuf *nam;
1959 {
1960 	register struct netcred *np;
1961 	register struct radix_node_head *rnh;
1962 	struct sockaddr *saddr;
1963 
1964 	np = NULL;
1965 	if (mp->mnt_flag & MNT_EXPORTED) {
1966 		/*
1967 		 * Lookup in the export list first.
1968 		 */
1969 		if (nam != NULL) {
1970 			saddr = mtod(nam, struct sockaddr *);
1971 			rnh = nep->ne_rtable[saddr->sa_family];
1972 			if (rnh != NULL) {
1973 				np = (struct netcred *)
1974 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
1975 							      rnh);
1976 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1977 					np = NULL;
1978 			}
1979 		}
1980 		/*
1981 		 * If no address match, use the default if it exists.
1982 		 */
1983 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1984 			np = &nep->ne_defexported;
1985 	}
1986 	return (np);
1987 }
1988 
1989 /*
1990  * Do the usual access checking.
1991  * file_mode, uid and gid are from the vnode in question,
1992  * while acc_mode and cred are from the VOP_ACCESS parameter list
1993  */
1994 int
1995 vaccess(type, file_mode, uid, gid, acc_mode, cred)
1996 	enum vtype type;
1997 	mode_t file_mode;
1998 	uid_t uid;
1999 	gid_t gid;
2000 	mode_t acc_mode;
2001 	struct ucred *cred;
2002 {
2003 	mode_t mask;
2004 
2005 	/*
2006 	 * Super-user always gets read/write access, but execute access depends
2007 	 * on at least one execute bit being set.
2008 	 */
2009 	if (cred->cr_uid == 0) {
2010 		if ((acc_mode & VEXEC) && type != VDIR &&
2011 		    (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
2012 			return (EACCES);
2013 		return (0);
2014 	}
2015 
2016 	mask = 0;
2017 
2018 	/* Otherwise, check the owner. */
2019 	if (cred->cr_uid == uid) {
2020 		if (acc_mode & VEXEC)
2021 			mask |= S_IXUSR;
2022 		if (acc_mode & VREAD)
2023 			mask |= S_IRUSR;
2024 		if (acc_mode & VWRITE)
2025 			mask |= S_IWUSR;
2026 		return ((file_mode & mask) == mask ? 0 : EACCES);
2027 	}
2028 
2029 	/* Otherwise, check the groups. */
2030 	if (cred->cr_gid == gid || groupmember(gid, cred)) {
2031 		if (acc_mode & VEXEC)
2032 			mask |= S_IXGRP;
2033 		if (acc_mode & VREAD)
2034 			mask |= S_IRGRP;
2035 		if (acc_mode & VWRITE)
2036 			mask |= S_IWGRP;
2037 		return ((file_mode & mask) == mask ? 0 : EACCES);
2038 	}
2039 
2040 	/* Otherwise, check everyone else. */
2041 	if (acc_mode & VEXEC)
2042 		mask |= S_IXOTH;
2043 	if (acc_mode & VREAD)
2044 		mask |= S_IROTH;
2045 	if (acc_mode & VWRITE)
2046 		mask |= S_IWOTH;
2047 	return ((file_mode & mask) == mask ? 0 : EACCES);
2048 }
2049 
2050 /*
2051  * Unmount all file systems.
2052  * We traverse the list in reverse order under the assumption that doing so
2053  * will avoid needing to worry about dependencies.
2054  */
2055 void
2056 vfs_unmountall()
2057 {
2058 	register struct mount *mp, *nmp;
2059 	int allerror, error;
2060 	struct proc *p = curproc;	/* XXX */
2061 
2062 	/*
2063 	 * Unmounting a file system blocks the requesting process.
2064 	 * However, it's possible for this routine to be called when
2065 	 * curproc is NULL (e.g. panic situation, or via the debugger).
2066 	 * If we get stuck in this situation, just abort, since any
2067 	 * attempts to sleep will fault.
2068 	 */
2069 	if (p == NULL) {
2070 		printf("vfs_unmountall: no context, aborting\n");
2071 		return;
2072 	}
2073 
2074 	for (allerror = 0,
2075 	     mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
2076 		nmp = mp->mnt_list.cqe_prev;
2077 #ifdef DEBUG
2078 		printf("unmounting %s (%s)...\n",
2079 		    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
2080 #endif
2081 		if (vfs_busy(mp, 0, 0))
2082 			continue;
2083 		if ((error = dounmount(mp, MNT_FORCE, p)) != 0) {
2084 			printf("unmount of %s failed with error %d\n",
2085 			    mp->mnt_stat.f_mntonname, error);
2086 			allerror = 1;
2087 		}
2088 	}
2089 	if (allerror)
2090 		printf("WARNING: some file systems would not unmount\n");
2091 }
2092 
2093 /*
2094  * Sync and unmount file systems before shutting down.
2095  */
2096 void
2097 vfs_shutdown()
2098 {
2099 	register struct buf *bp;
2100 	int iter, nbusy;
2101 
2102 	printf("syncing disks... ");
2103 
2104 	/* XXX Should suspend scheduling. */
2105 	(void) spl0();
2106 
2107 	sys_sync(&proc0, (void *)0, (register_t *)0);
2108 
2109 	/* Wait for sync to finish. */
2110 	for (iter = 0; iter < 20; iter++) {
2111 		nbusy = 0;
2112 		for (bp = &buf[nbuf]; --bp >= buf; )
2113 			if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY)
2114 				nbusy++;
2115 		if (nbusy == 0)
2116 			break;
2117 		printf("%d ", nbusy);
2118 		DELAY(40000 * iter);
2119 	}
2120 	if (nbusy) {
2121 #ifdef DEBUG
2122 		printf("giving up\nPrinting vnodes for busy buffers\n");
2123 		for (bp = &buf[nbuf]; --bp >= buf; )
2124 			if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY)
2125 				vprint(NULL, bp->b_vp);
2126 #else
2127 		printf("giving up\n");
2128 #endif
2129 		return;
2130 	} else
2131 		printf("done\n");
2132 
2133 	/*
2134 	 * If we've panic'd, don't make the situation potentially
2135 	 * worse by unmounting the file systems.
2136 	 */
2137 	if (panicstr != NULL)
2138 		return;
2139 
2140 	/* Release inodes held by texts before update. */
2141 #ifdef notdef
2142 	vnshutdown();
2143 #endif
2144 	/* Unmount file systems. */
2145 	vfs_unmountall();
2146 }
2147 
2148 /*
2149  * Mount the root file system.  If the operator didn't specify a
2150  * file system to use, try all possible file systems until one
2151  * succeeds.
2152  */
2153 int
2154 vfs_mountroot()
2155 {
2156 	extern int (*mountroot) __P((void));
2157 	struct vfsops *v;
2158 
2159 	if (root_device == NULL)
2160 		panic("vfs_mountroot: root device unknown");
2161 
2162 	switch (root_device->dv_class) {
2163 	case DV_IFNET:
2164 		if (rootdev != NODEV)
2165 			panic("vfs_mountroot: rootdev set for DV_IFNET");
2166 		break;
2167 
2168 	case DV_DISK:
2169 		if (rootdev == NODEV)
2170 			panic("vfs_mountroot: rootdev not set for DV_DISK");
2171 		break;
2172 
2173 	default:
2174 		printf("%s: inappropriate for root file system\n",
2175 		    root_device->dv_xname);
2176 		return (ENODEV);
2177 	}
2178 
2179 	/*
2180 	 * If user specified a file system, use it.
2181 	 */
2182 	if (mountroot != NULL)
2183 		return ((*mountroot)());
2184 
2185 	/*
2186 	 * Try each file system currently configured into the kernel.
2187 	 */
2188 	for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
2189 		if (v->vfs_mountroot == NULL)
2190 			continue;
2191 #ifdef DEBUG
2192 		printf("mountroot: trying %s...\n", v->vfs_name);
2193 #endif
2194 		if ((*v->vfs_mountroot)() == 0) {
2195 			printf("root file system type: %s\n", v->vfs_name);
2196 			break;
2197 		}
2198 	}
2199 
2200 	if (v == NULL) {
2201 		printf("no file system for %s", root_device->dv_xname);
2202 		if (root_device->dv_class == DV_DISK)
2203 			printf(" (dev 0x%x)", rootdev);
2204 		printf("\n");
2205 		return (EFTYPE);
2206 	}
2207 	return (0);
2208 }
2209 
2210 /*
2211  * Given a file system name, look up the vfsops for that
2212  * file system, or return NULL if file system isn't present
2213  * in the kernel.
2214  */
2215 struct vfsops *
2216 vfs_getopsbyname(name)
2217 	const char *name;
2218 {
2219 	struct vfsops *v;
2220 
2221 	for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
2222 		if (strcmp(v->vfs_name, name) == 0)
2223 			break;
2224 	}
2225 
2226 	return (v);
2227 }
2228 
2229 /*
2230  * Establish a file system and initialize it.
2231  */
2232 int
2233 vfs_attach(vfs)
2234 	struct vfsops *vfs;
2235 {
2236 	struct vfsops *v;
2237 	int error = 0;
2238 
2239 
2240 	/*
2241 	 * Make sure this file system doesn't already exist.
2242 	 */
2243 	for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
2244 		if (strcmp(vfs->vfs_name, v->vfs_name) == 0) {
2245 			error = EEXIST;
2246 			goto out;
2247 		}
2248 	}
2249 
2250 	/*
2251 	 * Initialize the vnode operations for this file system.
2252 	 */
2253 	vfs_opv_init(vfs->vfs_opv_descs);
2254 
2255 	/*
2256 	 * Now initialize the file system itself.
2257 	 */
2258 	(*vfs->vfs_init)();
2259 
2260 	/*
2261 	 * ...and link it into the kernel's list.
2262 	 */
2263 	LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list);
2264 
2265 	/*
2266 	 * Sanity: make sure the reference count is 0.
2267 	 */
2268 	vfs->vfs_refcount = 0;
2269 
2270  out:
2271 	return (error);
2272 }
2273 
2274 /*
2275  * Remove a file system from the kernel.
2276  */
2277 int
2278 vfs_detach(vfs)
2279 	struct vfsops *vfs;
2280 {
2281 	struct vfsops *v;
2282 
2283 	/*
2284 	 * Make sure no one is using the filesystem.
2285 	 */
2286 	if (vfs->vfs_refcount != 0)
2287 		return (EBUSY);
2288 
2289 	/*
2290 	 * ...and remove it from the kernel's list.
2291 	 */
2292 	for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
2293 		if (v == vfs) {
2294 			LIST_REMOVE(v, vfs_list);
2295 			break;
2296 		}
2297 	}
2298 
2299 	if (v == NULL)
2300 		return (ESRCH);
2301 
2302 	/*
2303 	 * Free the vnode operations vector.
2304 	 */
2305 	vfs_opv_free(vfs->vfs_opv_descs);
2306 	return (0);
2307 }
2308