xref: /csrg-svn/sys/kern/vfs_subr.c (revision 69605)
137488Smckusick /*
263180Sbostic  * Copyright (c) 1989, 1993
363180Sbostic  *	The Regents of the University of California.  All rights reserved.
465771Sbostic  * (c) UNIX System Laboratories, Inc.
565771Sbostic  * All or some portions of this file are derived from material licensed
665771Sbostic  * to the University of California by American Telephone and Telegraph
765771Sbostic  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
865771Sbostic  * the permission of UNIX System Laboratories, Inc.
937488Smckusick  *
1044458Sbostic  * %sccs.include.redist.c%
1137488Smckusick  *
12*69605Smckusick  *	@(#)vfs_subr.c	8.30 (Berkeley) 05/22/95
1337488Smckusick  */
1437488Smckusick 
1537488Smckusick /*
1637488Smckusick  * External virtual filesystem routines
1737488Smckusick  */
1837488Smckusick 
1951460Sbostic #include <sys/param.h>
2053829Spendry #include <sys/systm.h>
2151460Sbostic #include <sys/proc.h>
2251460Sbostic #include <sys/mount.h>
2351460Sbostic #include <sys/time.h>
2451460Sbostic #include <sys/vnode.h>
2552415Smckusick #include <sys/stat.h>
2651460Sbostic #include <sys/namei.h>
2751460Sbostic #include <sys/ucred.h>
2851460Sbostic #include <sys/buf.h>
2951460Sbostic #include <sys/errno.h>
3051460Sbostic #include <sys/malloc.h>
3165679Shibler #include <sys/domain.h>
3265679Shibler #include <sys/mbuf.h>
3337488Smckusick 
3460930Smckusick #include <vm/vm.h>
3560930Smckusick #include <sys/sysctl.h>
3660930Smckusick 
3755050Spendry #include <miscfs/specfs/specdev.h>
3855050Spendry 
3952415Smckusick enum vtype iftovt_tab[16] = {
4052415Smckusick 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
4152415Smckusick 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
4252415Smckusick };
4352415Smckusick int	vttoif_tab[9] = {
4452415Smckusick 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
4552415Smckusick 	S_IFSOCK, S_IFIFO, S_IFMT,
4652415Smckusick };
4752415Smckusick 
4837488Smckusick /*
4956608Smckusick  * Insq/Remq for the vnode usage lists.
5056608Smckusick  */
5165260Smckusick #define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
5268319Scgd #define	bufremvn(bp) {							\
5368319Scgd 	LIST_REMOVE(bp, b_vnbufs);					\
5468319Scgd 	(bp)->b_vnbufs.le_next = NOLIST;				\
5565260Smckusick }
5665260Smckusick TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
5765260Smckusick struct mntlist mountlist;			/* mounted filesystem list */
5869578Smckusick struct simplelock mountlist_slock;
5969408Smckusick static struct simplelock mntid_slock;
6069408Smckusick struct simplelock mntvnode_slock;
6169408Smckusick static struct simplelock spechash_slock;
6269408Smckusick static struct simplelock vnode_free_list_slock;
6365260Smckusick 
6456608Smckusick /*
6565260Smckusick  * Initialize the vnode management data structures.
6637488Smckusick  */
6768319Scgd void
6865260Smckusick vntblinit()
6937488Smckusick {
7037488Smckusick 
7169408Smckusick 	simple_lock_init(&mntvnode_slock);
7269408Smckusick 	simple_lock_init(&mntid_slock);
7369408Smckusick 	simple_lock_init(&spechash_slock);
7465260Smckusick 	TAILQ_INIT(&vnode_free_list);
7569408Smckusick 	simple_lock_init(&vnode_free_list_slock);
7669325Smckusick 	CIRCLEQ_INIT(&mountlist);
7737488Smckusick }
7837488Smckusick 
7937488Smckusick /*
8069578Smckusick  * Mark a mount point as busy. Used to synchronize access and to delay
8169578Smckusick  * unmounting. Interlock is not released on failure.
8237488Smckusick  */
8368319Scgd int
8469578Smckusick vfs_busy(mp, flags, interlkp, p)
8569578Smckusick 	struct mount *mp;
8669578Smckusick 	int flags;
8769578Smckusick 	struct simplelock *interlkp;
8869578Smckusick 	struct proc *p;
8937488Smckusick {
9069578Smckusick 	int lkflags;
9137488Smckusick 
9269578Smckusick 	if (mp->mnt_flag & MNT_UNMOUNT) {
9369578Smckusick 		if (flags & LK_NOWAIT)
9469578Smckusick 			return (ENOENT);
9541400Smckusick 		mp->mnt_flag |= MNT_MWAIT;
96*69605Smckusick 		if (interlkp)
97*69605Smckusick 			simple_unlock(interlkp);
98*69605Smckusick 		/*
99*69605Smckusick 		 * Since all busy locks are shared except the exclusive
100*69605Smckusick 		 * lock granted when unmounting, the only place that a
101*69605Smckusick 		 * wakeup needs to be done is at the release of the
102*69605Smckusick 		 * exclusive lock at the end of dounmount.
103*69605Smckusick 		 */
10469578Smckusick 		sleep((caddr_t)mp, PVFS);
105*69605Smckusick 		if (interlkp)
106*69605Smckusick 			simple_lock(interlkp);
10769578Smckusick 		return (ENOENT);
10839045Smckusick 	}
10969578Smckusick 	lkflags = LK_SHARED;
11069578Smckusick 	if (interlkp)
11169578Smckusick 		lkflags |= LK_INTERLOCK;
11269578Smckusick 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
11369578Smckusick 		panic("vfs_busy: unexpected lock failure");
11437488Smckusick 	return (0);
11537488Smckusick }
11637488Smckusick 
11737488Smckusick /*
11841300Smckusick  * Free a busy filesystem.
11941300Smckusick  */
12068319Scgd void
12169578Smckusick vfs_unbusy(mp, p)
12269578Smckusick 	struct mount *mp;
12369578Smckusick 	struct proc *p;
12441300Smckusick {
12541300Smckusick 
12669578Smckusick 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
12741300Smckusick }
12841300Smckusick 
12941300Smckusick /*
13069378Smckusick  * Lookup a filesystem type, and if found allocate and initialize
13169378Smckusick  * a mount structure for it.
13269378Smckusick  *
13369378Smckusick  * Devname is usually updated by mount(8) after booting.
13469378Smckusick  */
13569378Smckusick int
13669378Smckusick vfs_rootmountalloc(fstypename, devname, mpp)
13769378Smckusick 	char *fstypename;
13869378Smckusick 	char *devname;
13969378Smckusick 	struct mount **mpp;
14069378Smckusick {
14169578Smckusick 	struct proc *p = curproc;	/* XXX */
14269378Smckusick 	struct vfsconf *vfsp;
14369378Smckusick 	struct mount *mp;
14469378Smckusick 
14569378Smckusick 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
14669378Smckusick 		if (!strcmp(vfsp->vfc_name, fstypename))
14769378Smckusick 			break;
14869378Smckusick 	if (vfsp == NULL)
14969378Smckusick 		return (ENODEV);
15069378Smckusick 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
15169378Smckusick 	bzero((char *)mp, (u_long)sizeof(struct mount));
15269578Smckusick 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
15369578Smckusick 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
15469378Smckusick 	LIST_INIT(&mp->mnt_vnodelist);
15569378Smckusick 	mp->mnt_vfc = vfsp;
15669378Smckusick 	mp->mnt_op = vfsp->vfc_vfsops;
15769378Smckusick 	mp->mnt_flag = MNT_RDONLY;
15869378Smckusick 	mp->mnt_vnodecovered = NULLVP;
15969378Smckusick 	vfsp->vfc_refcount++;
16069378Smckusick 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
16169378Smckusick 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
16269378Smckusick 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
16369378Smckusick 	mp->mnt_stat.f_mntonname[0] = '/';
16469378Smckusick 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
16569378Smckusick 	*mpp = mp;
16669378Smckusick 	return (0);
16769378Smckusick }
16869378Smckusick 
16969378Smckusick /*
17069378Smckusick  * Find an appropriate filesystem to use for the root. If a filesystem
17169378Smckusick  * has not been preselected, walk through the list of known filesystems
17269378Smckusick  * trying those that have mountroot routines, and try them until one
17369378Smckusick  * works or we have tried them all.
17469378Smckusick  */
17569378Smckusick int
17669378Smckusick vfs_mountroot()
17769378Smckusick {
17869378Smckusick 	struct vfsconf *vfsp;
17969378Smckusick 	extern int (*mountroot)(void);
18069378Smckusick 	int error;
18169378Smckusick 
18269378Smckusick 	if (mountroot != NULL)
18369537Smckusick 		return ((*mountroot)());
18469378Smckusick 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
18569378Smckusick 		if (vfsp->vfc_mountroot == NULL)
18669378Smckusick 			continue;
18769378Smckusick 		if ((error = (*vfsp->vfc_mountroot)()) == 0)
18869378Smckusick 			return (0);
18969378Smckusick 		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
19069378Smckusick 	}
19169378Smckusick 	return (ENODEV);
19269378Smckusick }
19369378Smckusick 
19469378Smckusick /*
19537488Smckusick  * Lookup a mount point by filesystem identifier.
19637488Smckusick  */
19737488Smckusick struct mount *
19868659Smckusick vfs_getvfs(fsid)
19937488Smckusick 	fsid_t *fsid;
20037488Smckusick {
20137488Smckusick 	register struct mount *mp;
20237488Smckusick 
20369578Smckusick 	simple_lock(&mountlist_slock);
20469325Smckusick 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
20569325Smckusick 	     mp = mp->mnt_list.cqe_next) {
20641400Smckusick 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
20769578Smckusick 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
20869578Smckusick 			simple_unlock(&mountlist_slock);
20938288Smckusick 			return (mp);
21069578Smckusick 		}
21165260Smckusick 	}
21269578Smckusick 	simple_unlock(&mountlist_slock);
21338288Smckusick 	return ((struct mount *)0);
21437488Smckusick }
21537488Smckusick 
21637488Smckusick /*
21753829Spendry  * Get a new unique fsid
21853829Spendry  */
21953829Spendry void
22068659Smckusick vfs_getnewfsid(mp)
22153829Spendry 	struct mount *mp;
22253829Spendry {
22353829Spendry static u_short xxxfs_mntid;
22453829Spendry 
22553829Spendry 	fsid_t tfsid;
22668659Smckusick 	int mtype;
22753829Spendry 
22869408Smckusick 	simple_lock(&mntid_slock);
22968659Smckusick 	mtype = mp->mnt_vfc->vfc_typenum;
23065507Spendry 	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
23153829Spendry 	mp->mnt_stat.f_fsid.val[1] = mtype;
23253829Spendry 	if (xxxfs_mntid == 0)
23353829Spendry 		++xxxfs_mntid;
23465507Spendry 	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
23553829Spendry 	tfsid.val[1] = mtype;
23669325Smckusick 	if (mountlist.cqh_first != (void *)&mountlist) {
23768659Smckusick 		while (vfs_getvfs(&tfsid)) {
23853936Spendry 			tfsid.val[0]++;
23953936Spendry 			xxxfs_mntid++;
24053936Spendry 		}
24153829Spendry 	}
24253829Spendry 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
24369408Smckusick 	simple_unlock(&mntid_slock);
24453829Spendry }
24553829Spendry 
24653829Spendry /*
24737488Smckusick  * Set vnode attributes to VNOVAL
24837488Smckusick  */
24968319Scgd void
25068319Scgd vattr_null(vap)
25137488Smckusick 	register struct vattr *vap;
25237488Smckusick {
25337488Smckusick 
25437488Smckusick 	vap->va_type = VNON;
25552005Smckusick 	vap->va_size = vap->va_bytes = VNOVAL;
25637488Smckusick 	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
25752005Smckusick 		vap->va_fsid = vap->va_fileid =
25852005Smckusick 		vap->va_blocksize = vap->va_rdev =
25954347Smckusick 		vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
26054347Smckusick 		vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
26154347Smckusick 		vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
26238258Smckusick 		vap->va_flags = vap->va_gen = VNOVAL;
26358548Sbostic 	vap->va_vaflags = 0;
26437488Smckusick }
26538265Smckusick 
26638265Smckusick /*
26739397Smckusick  * Routines having to do with the management of the vnode table.
26839397Smckusick  */
26953547Sheideman extern int (**dead_vnodeop_p)();
27069408Smckusick static void vclean __P((struct vnode *vp, int flag, struct proc *p));
27169408Smckusick extern void vgonel __P((struct vnode *vp, struct proc *p));
27240883Smckusick long numvnodes;
27353493Sheideman extern struct vattr va_null;
27439397Smckusick 
27539397Smckusick /*
27639397Smckusick  * Return the next vnode from the free list.
27739397Smckusick  */
27868319Scgd int
27939397Smckusick getnewvnode(tag, mp, vops, vpp)
28039397Smckusick 	enum vtagtype tag;
28139397Smckusick 	struct mount *mp;
28253495Sheideman 	int (**vops)();
28339397Smckusick 	struct vnode **vpp;
28439397Smckusick {
28569408Smckusick 	struct proc *p = curproc;	/* XXX */
28669408Smckusick 	struct vnode *vp;
28757042Smargo 	int s;
28869408Smckusick 	int cnt;
28939397Smckusick 
29069408Smckusick top:
29169408Smckusick 	simple_lock(&vnode_free_list_slock);
29265260Smckusick 	if ((vnode_free_list.tqh_first == NULL &&
29365260Smckusick 	     numvnodes < 2 * desiredvnodes) ||
29454347Smckusick 	    numvnodes < desiredvnodes) {
29569408Smckusick 		simple_unlock(&vnode_free_list_slock);
29645118Smckusick 		vp = (struct vnode *)malloc((u_long)sizeof *vp,
29745118Smckusick 		    M_VNODE, M_WAITOK);
29840883Smckusick 		bzero((char *)vp, sizeof *vp);
29940883Smckusick 		numvnodes++;
30040883Smckusick 	} else {
30169408Smckusick 		for (vp = vnode_free_list.tqh_first;
30269408Smckusick 				vp != NULLVP; vp = vp->v_freelist.tqe_next) {
30369408Smckusick 			if (simple_lock_try(&vp->v_interlock))
30469408Smckusick 				break;
30569408Smckusick 		}
30669408Smckusick 		/*
30769408Smckusick 		 * Unless this is a bad time of the month, at most
30869408Smckusick 		 * the first NCPUS items on the free list are
30969408Smckusick 		 * locked, so this is close enough to being empty.
31069408Smckusick 		 */
31169408Smckusick 		if (vp == NULLVP) {
31269408Smckusick 			simple_unlock(&vnode_free_list_slock);
31340883Smckusick 			tablefull("vnode");
31440883Smckusick 			*vpp = 0;
31540883Smckusick 			return (ENFILE);
31640883Smckusick 		}
31740883Smckusick 		if (vp->v_usecount)
31840883Smckusick 			panic("free vnode isn't");
31965260Smckusick 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
32065505Smckusick 		/* see comment on why 0xdeadb is set at end of vgone (below) */
32165505Smckusick 		vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
32269408Smckusick 		simple_unlock(&vnode_free_list_slock);
32352190Smckusick 		vp->v_lease = NULL;
32440883Smckusick 		if (vp->v_type != VBAD)
32569408Smckusick 			vgonel(vp, p);
32669408Smckusick 		else
32769408Smckusick 			simple_unlock(&vp->v_interlock);
32857042Smargo #ifdef DIAGNOSTIC
32952006Smckusick 		if (vp->v_data)
33052006Smckusick 			panic("cleaned vnode isn't");
33157042Smargo 		s = splbio();
33257042Smargo 		if (vp->v_numoutput)
33357042Smargo 			panic("Clean vnode has pending I/O's");
33457042Smargo 		splx(s);
33557042Smargo #endif
33640883Smckusick 		vp->v_flag = 0;
33740883Smckusick 		vp->v_lastr = 0;
33865745Shibler 		vp->v_ralen = 0;
33965745Shibler 		vp->v_maxra = 0;
34057042Smargo 		vp->v_lastw = 0;
34157042Smargo 		vp->v_lasta = 0;
34257042Smargo 		vp->v_cstart = 0;
34357042Smargo 		vp->v_clen = 0;
34440883Smckusick 		vp->v_socket = 0;
34539397Smckusick 	}
34639512Smckusick 	vp->v_type = VNON;
34739397Smckusick 	cache_purge(vp);
34839397Smckusick 	vp->v_tag = tag;
34939433Smckusick 	vp->v_op = vops;
35039397Smckusick 	insmntque(vp, mp);
35139397Smckusick 	*vpp = vp;
35265505Smckusick 	vp->v_usecount = 1;
35365260Smckusick 	vp->v_data = 0;
35439397Smckusick 	return (0);
35539397Smckusick }
35665679Shibler 
35739397Smckusick /*
35839397Smckusick  * Move a vnode from one mount queue to another.
35939397Smckusick  */
36068319Scgd void
36139397Smckusick insmntque(vp, mp)
36269408Smckusick 	struct vnode *vp;
36369408Smckusick 	struct mount *mp;
36439397Smckusick {
36539397Smckusick 
36669408Smckusick 	simple_lock(&mntvnode_slock);
36739397Smckusick 	/*
36839397Smckusick 	 * Delete from old mount point vnode list, if on one.
36939397Smckusick 	 */
37065679Shibler 	if (vp->v_mount != NULL)
37165260Smckusick 		LIST_REMOVE(vp, v_mntvnodes);
37239397Smckusick 	/*
37339397Smckusick 	 * Insert into list of vnodes for the new mount point, if available.
37439397Smckusick 	 */
37569408Smckusick 	if ((vp->v_mount = mp) != NULL)
37669408Smckusick 		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
37769408Smckusick 	simple_unlock(&mntvnode_slock);
37839397Smckusick }
37939397Smckusick 
38039397Smckusick /*
38149232Smckusick  * Update outstanding I/O count and do wakeup if requested.
38249232Smckusick  */
38368319Scgd void
38449232Smckusick vwakeup(bp)
38549232Smckusick 	register struct buf *bp;
38649232Smckusick {
38749232Smckusick 	register struct vnode *vp;
38849232Smckusick 
38957810Smckusick 	bp->b_flags &= ~B_WRITEINPROG;
39049232Smckusick 	if (vp = bp->b_vp) {
39168319Scgd 		if (--vp->v_numoutput < 0)
39257042Smargo 			panic("vwakeup: neg numoutput");
39349232Smckusick 		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
39449232Smckusick 			if (vp->v_numoutput < 0)
39568319Scgd 				panic("vwakeup: neg numoutput 2");
39649232Smckusick 			vp->v_flag &= ~VBWAIT;
39749232Smckusick 			wakeup((caddr_t)&vp->v_numoutput);
39849232Smckusick 		}
39949232Smckusick 	}
40049232Smckusick }
40149232Smckusick 
40249232Smckusick /*
40349232Smckusick  * Flush out and invalidate all buffers associated with a vnode.
40449232Smckusick  * Called with the underlying object locked.
40549232Smckusick  */
40654442Smckusick int
40757792Smckusick vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
40849232Smckusick 	register struct vnode *vp;
40956459Smargo 	int flags;
41054442Smckusick 	struct ucred *cred;
41154442Smckusick 	struct proc *p;
41257792Smckusick 	int slpflag, slptimeo;
41349232Smckusick {
41449232Smckusick 	register struct buf *bp;
41549232Smckusick 	struct buf *nbp, *blist;
41654442Smckusick 	int s, error;
41749232Smckusick 
41856459Smargo 	if (flags & V_SAVE) {
41954442Smckusick 		if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
42054442Smckusick 			return (error);
42165260Smckusick 		if (vp->v_dirtyblkhd.lh_first != NULL)
42254442Smckusick 			panic("vinvalbuf: dirty bufs");
42354442Smckusick 	}
42449232Smckusick 	for (;;) {
42565260Smckusick 		if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
42656459Smargo 			while (blist && blist->b_lblkno < 0)
42765260Smckusick 				blist = blist->b_vnbufs.le_next;
42868319Scgd 		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
42956608Smckusick 		    (flags & V_SAVEMETA))
43056459Smargo 			while (blist && blist->b_lblkno < 0)
43165260Smckusick 				blist = blist->b_vnbufs.le_next;
43256459Smargo 		if (!blist)
43349232Smckusick 			break;
43456459Smargo 
43549232Smckusick 		for (bp = blist; bp; bp = nbp) {
43665260Smckusick 			nbp = bp->b_vnbufs.le_next;
43756459Smargo 			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
43856459Smargo 				continue;
43949232Smckusick 			s = splbio();
44049232Smckusick 			if (bp->b_flags & B_BUSY) {
44149232Smckusick 				bp->b_flags |= B_WANTED;
44257792Smckusick 				error = tsleep((caddr_t)bp,
44357792Smckusick 					slpflag | (PRIBIO + 1), "vinvalbuf",
44457792Smckusick 					slptimeo);
44549232Smckusick 				splx(s);
44657792Smckusick 				if (error)
44757792Smckusick 					return (error);
44849232Smckusick 				break;
44949232Smckusick 			}
45049232Smckusick 			bremfree(bp);
45149232Smckusick 			bp->b_flags |= B_BUSY;
45249232Smckusick 			splx(s);
45357792Smckusick 			/*
45457792Smckusick 			 * XXX Since there are no node locks for NFS, I believe
45557792Smckusick 			 * there is a slight chance that a delayed write will
45657792Smckusick 			 * occur while sleeping just above, so check for it.
45757792Smckusick 			 */
45857792Smckusick 			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
45957792Smckusick 				(void) VOP_BWRITE(bp);
46057792Smckusick 				break;
46157792Smckusick 			}
46256459Smargo 			bp->b_flags |= B_INVAL;
46349232Smckusick 			brelse(bp);
46449232Smckusick 		}
46549232Smckusick 	}
46656608Smckusick 	if (!(flags & V_SAVEMETA) &&
46765260Smckusick 	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
46849232Smckusick 		panic("vinvalbuf: flush failed");
46954442Smckusick 	return (0);
47049232Smckusick }
47149232Smckusick 
47249232Smckusick /*
47349232Smckusick  * Associate a buffer with a vnode.
47449232Smckusick  */
47568319Scgd void
47649232Smckusick bgetvp(vp, bp)
47749232Smckusick 	register struct vnode *vp;
47849232Smckusick 	register struct buf *bp;
47949232Smckusick {
48049232Smckusick 
48149232Smckusick 	if (bp->b_vp)
48249232Smckusick 		panic("bgetvp: not free");
48349232Smckusick 	VHOLD(vp);
48449232Smckusick 	bp->b_vp = vp;
48549232Smckusick 	if (vp->v_type == VBLK || vp->v_type == VCHR)
48649232Smckusick 		bp->b_dev = vp->v_rdev;
48749232Smckusick 	else
48849232Smckusick 		bp->b_dev = NODEV;
48949232Smckusick 	/*
49049232Smckusick 	 * Insert onto list for new vnode.
49149232Smckusick 	 */
49256608Smckusick 	bufinsvn(bp, &vp->v_cleanblkhd);
49349232Smckusick }
49449232Smckusick 
49549232Smckusick /*
49649232Smckusick  * Disassociate a buffer from a vnode.
49749232Smckusick  */
49868319Scgd void
49949232Smckusick brelvp(bp)
50049232Smckusick 	register struct buf *bp;
50149232Smckusick {
50249232Smckusick 	struct vnode *vp;
50349232Smckusick 
50449232Smckusick 	if (bp->b_vp == (struct vnode *) 0)
50549232Smckusick 		panic("brelvp: NULL");
50649232Smckusick 	/*
50749232Smckusick 	 * Delete from old vnode list, if on one.
50849232Smckusick 	 */
50965260Smckusick 	if (bp->b_vnbufs.le_next != NOLIST)
51056608Smckusick 		bufremvn(bp);
51149232Smckusick 	vp = bp->b_vp;
51249232Smckusick 	bp->b_vp = (struct vnode *) 0;
51349232Smckusick 	HOLDRELE(vp);
51449232Smckusick }
51549232Smckusick 
51649232Smckusick /*
51749232Smckusick  * Reassign a buffer from one vnode to another.
51849232Smckusick  * Used to assign file specific control information
51949232Smckusick  * (indirect blocks) to the vnode to which they belong.
52049232Smckusick  */
52168319Scgd void
52249232Smckusick reassignbuf(bp, newvp)
52349232Smckusick 	register struct buf *bp;
52449232Smckusick 	register struct vnode *newvp;
52549232Smckusick {
52665260Smckusick 	register struct buflists *listheadp;
52749232Smckusick 
52852655Smckusick 	if (newvp == NULL) {
52952655Smckusick 		printf("reassignbuf: NULL");
53052655Smckusick 		return;
53152655Smckusick 	}
53249232Smckusick 	/*
53349232Smckusick 	 * Delete from old vnode list, if on one.
53449232Smckusick 	 */
53565260Smckusick 	if (bp->b_vnbufs.le_next != NOLIST)
53656608Smckusick 		bufremvn(bp);
53749232Smckusick 	/*
53849232Smckusick 	 * If dirty, put on list of dirty buffers;
53949232Smckusick 	 * otherwise insert onto list of clean buffers.
54049232Smckusick 	 */
54149232Smckusick 	if (bp->b_flags & B_DELWRI)
54249232Smckusick 		listheadp = &newvp->v_dirtyblkhd;
54349232Smckusick 	else
54449232Smckusick 		listheadp = &newvp->v_cleanblkhd;
54556608Smckusick 	bufinsvn(bp, listheadp);
54649232Smckusick }
54749232Smckusick 
54849232Smckusick /*
54939433Smckusick  * Create a vnode for a block device.
55039433Smckusick  * Used for root filesystem, argdev, and swap areas.
55139433Smckusick  * Also used for memory file system special devices.
55239397Smckusick  */
55368319Scgd int
55439433Smckusick bdevvp(dev, vpp)
55539433Smckusick 	dev_t dev;
55639433Smckusick 	struct vnode **vpp;
55739433Smckusick {
55839433Smckusick 	register struct vnode *vp;
55939433Smckusick 	struct vnode *nvp;
56039433Smckusick 	int error;
56139433Smckusick 
56269378Smckusick 	if (dev == NODEV) {
56369378Smckusick 		*vpp = NULLVP;
56469378Smckusick 		return (ENODEV);
56569378Smckusick 	}
56653547Sheideman 	error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
56739433Smckusick 	if (error) {
56868319Scgd 		*vpp = NULLVP;
56939433Smckusick 		return (error);
57039433Smckusick 	}
57139433Smckusick 	vp = nvp;
57239433Smckusick 	vp->v_type = VBLK;
57339615Smckusick 	if (nvp = checkalias(vp, dev, (struct mount *)0)) {
57439433Smckusick 		vput(vp);
57539433Smckusick 		vp = nvp;
57639433Smckusick 	}
57739433Smckusick 	*vpp = vp;
57839433Smckusick 	return (0);
57939433Smckusick }
58039433Smckusick 
58139433Smckusick /*
58239433Smckusick  * Check to see if the new vnode represents a special device
58339433Smckusick  * for which we already have a vnode (either because of
58439433Smckusick  * bdevvp() or because of a different vnode representing
58539433Smckusick  * the same block device). If such an alias exists, deallocate
58639509Smckusick  * the existing contents and return the aliased vnode. The
58739433Smckusick  * caller is responsible for filling it with its new contents.
58839433Smckusick  */
58939433Smckusick struct vnode *
59039615Smckusick checkalias(nvp, nvp_rdev, mp)
59139433Smckusick 	register struct vnode *nvp;
59239615Smckusick 	dev_t nvp_rdev;
59339433Smckusick 	struct mount *mp;
59439433Smckusick {
59569408Smckusick 	struct proc *p = curproc;	/* XXX */
59669408Smckusick 	struct vnode *vp;
59739615Smckusick 	struct vnode **vpp;
59839433Smckusick 
59939433Smckusick 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
60041400Smckusick 		return (NULLVP);
60139615Smckusick 
60239615Smckusick 	vpp = &speclisth[SPECHASH(nvp_rdev)];
60339433Smckusick loop:
60469408Smckusick 	simple_lock(&spechash_slock);
60539615Smckusick 	for (vp = *vpp; vp; vp = vp->v_specnext) {
60639615Smckusick 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
60739433Smckusick 			continue;
60839615Smckusick 		/*
60939615Smckusick 		 * Alias, but not in use, so flush it out.
61039615Smckusick 		 */
61169408Smckusick 		simple_lock(&vp->v_interlock);
61239809Smckusick 		if (vp->v_usecount == 0) {
61369408Smckusick 			simple_unlock(&spechash_slock);
61469408Smckusick 			vgonel(vp, p);
61539615Smckusick 			goto loop;
61639615Smckusick 		}
61769408Smckusick 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
61869408Smckusick 			simple_unlock(&spechash_slock);
61939633Smckusick 			goto loop;
62069408Smckusick 		}
62139433Smckusick 		break;
62239433Smckusick 	}
62339615Smckusick 	if (vp == NULL || vp->v_tag != VT_NON) {
62439615Smckusick 		MALLOC(nvp->v_specinfo, struct specinfo *,
62539615Smckusick 			sizeof(struct specinfo), M_VNODE, M_WAITOK);
62639615Smckusick 		nvp->v_rdev = nvp_rdev;
62739809Smckusick 		nvp->v_hashchain = vpp;
62839615Smckusick 		nvp->v_specnext = *vpp;
62942152Smckusick 		nvp->v_specflags = 0;
63069408Smckusick 		simple_unlock(&spechash_slock);
63139615Smckusick 		*vpp = nvp;
63269408Smckusick 		if (vp != NULLVP) {
63340640Smckusick 			nvp->v_flag |= VALIASED;
63440640Smckusick 			vp->v_flag |= VALIASED;
63540640Smckusick 			vput(vp);
63640640Smckusick 		}
63741400Smckusick 		return (NULLVP);
63839433Smckusick 	}
63969408Smckusick 	simple_unlock(&spechash_slock);
64069408Smckusick 	VOP_UNLOCK(vp, 0, p);
64169408Smckusick 	simple_lock(&vp->v_interlock);
64269408Smckusick 	vclean(vp, 0, p);
64339433Smckusick 	vp->v_op = nvp->v_op;
64439433Smckusick 	vp->v_tag = nvp->v_tag;
64539433Smckusick 	nvp->v_type = VNON;
64639433Smckusick 	insmntque(vp, mp);
64739433Smckusick 	return (vp);
64839433Smckusick }
64939433Smckusick 
65039433Smckusick /*
65139433Smckusick  * Grab a particular vnode from the free list, increment its
65239433Smckusick  * reference count and lock it. The vnode lock bit is set the
65339433Smckusick  * vnode is being eliminated in vgone. The process is awakened
65439433Smckusick  * when the transition is completed, and an error returned to
65539433Smckusick  * indicate that the vnode is no longer usable (possibly having
65639433Smckusick  * been changed to a new file system type).
65739433Smckusick  */
65868319Scgd int
65969408Smckusick vget(vp, flags, p)
66069408Smckusick 	struct vnode *vp;
66169408Smckusick 	int flags;
66269408Smckusick 	struct proc *p;
66339397Smckusick {
66469546Smckusick 	int error;
66539397Smckusick 
66666897Smckusick 	/*
66766897Smckusick 	 * If the vnode is in the process of being cleaned out for
66866897Smckusick 	 * another use, we wait for the cleaning to finish and then
66969408Smckusick 	 * return failure. Cleaning is determined by checking that
67069408Smckusick 	 * the VXLOCK flag is set.
67166897Smckusick 	 */
67269408Smckusick 	if ((flags & LK_INTERLOCK) == 0)
67369408Smckusick 		simple_lock(&vp->v_interlock);
67469408Smckusick 	if (vp->v_flag & VXLOCK) {
67539433Smckusick 		vp->v_flag |= VXWANT;
67669408Smckusick 		simple_unlock(&vp->v_interlock);
67768319Scgd 		tsleep((caddr_t)vp, PINOD, "vget", 0);
67869408Smckusick 		return (ENOENT);
67939433Smckusick 	}
68069408Smckusick 	if (vp->v_usecount == 0) {
68169408Smckusick 		simple_lock(&vnode_free_list_slock);
68265260Smckusick 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
68369408Smckusick 		simple_unlock(&vnode_free_list_slock);
68469408Smckusick 	}
68559450Smckusick 	vp->v_usecount++;
68669546Smckusick 	if (flags & LK_TYPE_MASK) {
68769546Smckusick 		if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
68869546Smckusick 			vrele(vp);
68969546Smckusick 		return (error);
69069546Smckusick 	}
69169408Smckusick 	simple_unlock(&vp->v_interlock);
69239433Smckusick 	return (0);
69339397Smckusick }
69439397Smckusick 
69539397Smckusick /*
69669408Smckusick  * Stubs to use when there is no locking to be done on the underlying object.
697*69605Smckusick  * A minimal shared lock is necessary to ensure that the underlying object
698*69605Smckusick  * is not revoked while an operation is in progress. So, an active shared
699*69605Smckusick  * count is maintained in an auxillary vnode lock structure.
70039397Smckusick  */
70169408Smckusick int
70269408Smckusick vop_nolock(ap)
70369408Smckusick 	struct vop_lock_args /* {
70469408Smckusick 		struct vnode *a_vp;
70569408Smckusick 		int a_flags;
70669408Smckusick 		struct proc *a_p;
70769408Smckusick 	} */ *ap;
70869408Smckusick {
709*69605Smckusick #ifdef notyet
710*69605Smckusick 	/*
711*69605Smckusick 	 * This code cannot be used until all the non-locking filesystems
712*69605Smckusick 	 * (notably NFS) are converted to properly lock and release nodes.
713*69605Smckusick 	 * Also, certain vnode operations change the locking state within
714*69605Smckusick 	 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
715*69605Smckusick 	 * and symlink). Ideally these operations should not change the
716*69605Smckusick 	 * lock state, but should be changed to let the caller of the
717*69605Smckusick 	 * function unlock them. Otherwise all intermediate vnode layers
718*69605Smckusick 	 * (such as union, umapfs, etc) must catch these functions to do
719*69605Smckusick 	 * the necessary locking at their layer. Note that the inactive
720*69605Smckusick 	 * and lookup operations also change their lock state, but this
721*69605Smckusick 	 * cannot be avoided, so these two operations will always need
722*69605Smckusick 	 * to be handled in intermediate layers.
723*69605Smckusick 	 */
72469408Smckusick 	struct vnode *vp = ap->a_vp;
725*69605Smckusick 	int vnflags, flags = ap->a_flags;
72669408Smckusick 
727*69605Smckusick 	if (vp->v_vnlock == NULL) {
728*69605Smckusick 		if ((flags & LK_TYPE_MASK) == LK_DRAIN)
729*69605Smckusick 			return (0);
730*69605Smckusick 		MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
731*69605Smckusick 		    M_VNODE, M_WAITOK);
732*69605Smckusick 		lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
733*69605Smckusick 	}
734*69605Smckusick 	switch (flags & LK_TYPE_MASK) {
735*69605Smckusick 	case LK_DRAIN:
736*69605Smckusick 		vnflags = LK_DRAIN;
737*69605Smckusick 		break;
738*69605Smckusick 	case LK_EXCLUSIVE:
739*69605Smckusick 	case LK_SHARED:
740*69605Smckusick 		vnflags = LK_SHARED;
741*69605Smckusick 		break;
742*69605Smckusick 	case LK_UPGRADE:
743*69605Smckusick 	case LK_EXCLUPGRADE:
744*69605Smckusick 	case LK_DOWNGRADE:
745*69605Smckusick 		return (0);
746*69605Smckusick 	case LK_RELEASE:
747*69605Smckusick 	default:
748*69605Smckusick 		panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
749*69605Smckusick 	}
750*69605Smckusick 	if (flags & LK_INTERLOCK)
751*69605Smckusick 		vnflags |= LK_INTERLOCK;
752*69605Smckusick 	return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
753*69605Smckusick #else /* for now */
75469408Smckusick 	/*
75569408Smckusick 	 * Since we are not using the lock manager, we must clear
75669408Smckusick 	 * the interlock here.
75769408Smckusick 	 */
75869408Smckusick 	if (ap->a_flags & LK_INTERLOCK)
759*69605Smckusick 		simple_unlock(&ap->a_vp->v_interlock);
76069408Smckusick 	return (0);
761*69605Smckusick #endif
76269408Smckusick }
76369408Smckusick 
76469408Smckusick /*
765*69605Smckusick  * Decrement the active use count.
76669408Smckusick  */
76769408Smckusick int
76869408Smckusick vop_nounlock(ap)
76969408Smckusick 	struct vop_unlock_args /* {
77069408Smckusick 		struct vnode *a_vp;
77169408Smckusick 		int a_flags;
77269408Smckusick 		struct proc *a_p;
77369408Smckusick 	} */ *ap;
77469408Smckusick {
775*69605Smckusick 	struct vnode *vp = ap->a_vp;
77669408Smckusick 
777*69605Smckusick 	if (vp->v_vnlock == NULL)
778*69605Smckusick 		return (0);
779*69605Smckusick 	return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p));
78069408Smckusick }
78169408Smckusick 
78269408Smckusick /*
783*69605Smckusick  * Return whether or not the node is in use.
78469408Smckusick  */
78569408Smckusick int
78669408Smckusick vop_noislocked(ap)
78769408Smckusick 	struct vop_islocked_args /* {
78869408Smckusick 		struct vnode *a_vp;
78969408Smckusick 	} */ *ap;
79069408Smckusick {
791*69605Smckusick 	struct vnode *vp = ap->a_vp;
79269408Smckusick 
793*69605Smckusick 	if (vp->v_vnlock == NULL)
794*69605Smckusick 		return (0);
795*69605Smckusick 	return (lockstatus(vp->v_vnlock));
79669408Smckusick }
79769408Smckusick 
79869408Smckusick /*
79969408Smckusick  * Vnode reference.
80069408Smckusick  */
80168319Scgd void
80268319Scgd vref(vp)
80339397Smckusick 	struct vnode *vp;
80439397Smckusick {
80539397Smckusick 
80669408Smckusick 	simple_lock(&vp->v_interlock);
80759450Smckusick 	if (vp->v_usecount <= 0)
80859450Smckusick 		panic("vref used where vget required");
80939809Smckusick 	vp->v_usecount++;
81069408Smckusick 	simple_unlock(&vp->v_interlock);
81139397Smckusick }
81239397Smckusick 
81339397Smckusick /*
81439397Smckusick  * vput(), just unlock and vrele()
81539397Smckusick  */
81668319Scgd void
81768319Scgd vput(vp)
81869408Smckusick 	struct vnode *vp;
81939397Smckusick {
82069408Smckusick 	struct proc *p = curproc;	/* XXX */
82152416Storek 
82269523Spendry #ifdef DIGANOSTIC
82369523Spendry 	if (vp == NULL)
82469523Spendry 		panic("vput: null vp");
82569523Spendry #endif
82669523Spendry 	simple_lock(&vp->v_interlock);
82769523Spendry 	vp->v_usecount--;
82869523Spendry 	if (vp->v_usecount > 0) {
82969523Spendry 		simple_unlock(&vp->v_interlock);
83069523Spendry 		VOP_UNLOCK(vp, 0, p);
83169523Spendry 		return;
83269523Spendry 	}
83369523Spendry #ifdef DIAGNOSTIC
83469523Spendry 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
83569523Spendry 		vprint("vput: bad ref count", vp);
83669523Spendry 		panic("vput: ref cnt");
83769523Spendry 	}
83869523Spendry #endif
83969523Spendry 	/*
84069523Spendry 	 * insert at tail of LRU list
84169523Spendry 	 */
84269523Spendry 	simple_lock(&vnode_free_list_slock);
84369523Spendry 	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
84469523Spendry 	simple_unlock(&vnode_free_list_slock);
84569523Spendry 	simple_unlock(&vp->v_interlock);
84669523Spendry 	VOP_INACTIVE(vp, p);
84739397Smckusick }
84839397Smckusick 
84939397Smckusick /*
85039397Smckusick  * Vnode release.
85139397Smckusick  * If count drops to zero, call inactive routine and return to freelist.
85239397Smckusick  */
85368319Scgd void
85468319Scgd vrele(vp)
85569408Smckusick 	struct vnode *vp;
85639397Smckusick {
85769408Smckusick 	struct proc *p = curproc;	/* XXX */
85839397Smckusick 
85950109Smckusick #ifdef DIAGNOSTIC
86039397Smckusick 	if (vp == NULL)
86139433Smckusick 		panic("vrele: null vp");
86250109Smckusick #endif
86369408Smckusick 	simple_lock(&vp->v_interlock);
86439809Smckusick 	vp->v_usecount--;
86569408Smckusick 	if (vp->v_usecount > 0) {
86669408Smckusick 		simple_unlock(&vp->v_interlock);
86739397Smckusick 		return;
86869408Smckusick 	}
86950109Smckusick #ifdef DIAGNOSTIC
87069408Smckusick 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
87150109Smckusick 		vprint("vrele: bad ref count", vp);
87250109Smckusick 		panic("vrele: ref cnt");
87350109Smckusick 	}
87450109Smckusick #endif
87555468Smckusick 	/*
87655468Smckusick 	 * insert at tail of LRU list
87755468Smckusick 	 */
87869408Smckusick 	simple_lock(&vnode_free_list_slock);
87965260Smckusick 	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
88069408Smckusick 	simple_unlock(&vnode_free_list_slock);
88169408Smckusick 	if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0)
88269408Smckusick 		VOP_INACTIVE(vp, p);
88339397Smckusick }
88439433Smckusick 
88569408Smckusick #ifdef DIAGNOSTIC
88639433Smckusick /*
88739809Smckusick  * Page or buffer structure gets a reference.
88839809Smckusick  */
88968319Scgd void
89068319Scgd vhold(vp)
89139809Smckusick 	register struct vnode *vp;
89239809Smckusick {
89339809Smckusick 
89469408Smckusick 	simple_lock(&vp->v_interlock);
89539809Smckusick 	vp->v_holdcnt++;
89669408Smckusick 	simple_unlock(&vp->v_interlock);
89739809Smckusick }
89839809Smckusick 
89939809Smckusick /*
90039809Smckusick  * Page or buffer structure frees a reference.
90139809Smckusick  */
90268319Scgd void
90368319Scgd holdrele(vp)
90439809Smckusick 	register struct vnode *vp;
90539809Smckusick {
90639809Smckusick 
90769408Smckusick 	simple_lock(&vp->v_interlock);
90839809Smckusick 	if (vp->v_holdcnt <= 0)
90939809Smckusick 		panic("holdrele: holdcnt");
91039809Smckusick 	vp->v_holdcnt--;
91169408Smckusick 	simple_unlock(&vp->v_interlock);
91239809Smckusick }
91369408Smckusick #endif /* DIAGNOSTIC */
91439809Smckusick 
91539809Smckusick /*
91639509Smckusick  * Remove any vnodes in the vnode table belonging to mount point mp.
91739509Smckusick  *
91839509Smckusick  * If MNT_NOFORCE is specified, there should not be any active ones,
91939509Smckusick  * return error if any are found (nb: this is a user error, not a
92039509Smckusick  * system error). If MNT_FORCE is specified, detach any active vnodes
92139509Smckusick  * that are found.
92239509Smckusick  */
92365679Shibler #ifdef DIAGNOSTIC
92460930Smckusick int busyprt = 0;	/* print out busy vnodes */
92560930Smckusick struct ctldebug debug1 = { "busyprt", &busyprt };
92665679Shibler #endif
92739509Smckusick 
92868319Scgd int
92939509Smckusick vflush(mp, skipvp, flags)
93039509Smckusick 	struct mount *mp;
93139509Smckusick 	struct vnode *skipvp;
93239509Smckusick 	int flags;
93339509Smckusick {
93469408Smckusick 	struct proc *p = curproc;	/* XXX */
93569408Smckusick 	struct vnode *vp, *nvp;
93639509Smckusick 	int busy = 0;
93739509Smckusick 
93869408Smckusick 	simple_lock(&mntvnode_slock);
93941421Smckusick loop:
94065260Smckusick 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
94141421Smckusick 		if (vp->v_mount != mp)
94241421Smckusick 			goto loop;
94365260Smckusick 		nvp = vp->v_mntvnodes.le_next;
94439509Smckusick 		/*
94539509Smckusick 		 * Skip over a selected vnode.
94639509Smckusick 		 */
94739509Smckusick 		if (vp == skipvp)
94839509Smckusick 			continue;
94969408Smckusick 
95069408Smckusick 		simple_lock(&vp->v_interlock);
95139509Smckusick 		/*
95241300Smckusick 		 * Skip over a vnodes marked VSYSTEM.
95341300Smckusick 		 */
95469408Smckusick 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
95569408Smckusick 			simple_unlock(&vp->v_interlock);
95641300Smckusick 			continue;
95769408Smckusick 		}
95841300Smckusick 		/*
95957040Smckusick 		 * If WRITECLOSE is set, only flush out regular file
96057040Smckusick 		 * vnodes open for writing.
96157040Smckusick 		 */
96257040Smckusick 		if ((flags & WRITECLOSE) &&
96369408Smckusick 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
96469408Smckusick 			simple_unlock(&vp->v_interlock);
96557040Smckusick 			continue;
96669408Smckusick 		}
96757040Smckusick 		/*
96839809Smckusick 		 * With v_usecount == 0, all we need to do is clear
96939509Smckusick 		 * out the vnode data structures and we are done.
97039509Smckusick 		 */
97139809Smckusick 		if (vp->v_usecount == 0) {
97269408Smckusick 			simple_unlock(&mntvnode_slock);
97369408Smckusick 			vgonel(vp, p);
97469408Smckusick 			simple_lock(&mntvnode_slock);
97539509Smckusick 			continue;
97639509Smckusick 		}
97739509Smckusick 		/*
97857040Smckusick 		 * If FORCECLOSE is set, forcibly close the vnode.
97939509Smckusick 		 * For block or character devices, revert to an
98039509Smckusick 		 * anonymous device. For all other files, just kill them.
98139509Smckusick 		 */
98241300Smckusick 		if (flags & FORCECLOSE) {
98369408Smckusick 			simple_unlock(&mntvnode_slock);
98439509Smckusick 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
98569408Smckusick 				vgonel(vp, p);
98639509Smckusick 			} else {
98769408Smckusick 				vclean(vp, 0, p);
98853547Sheideman 				vp->v_op = spec_vnodeop_p;
98939509Smckusick 				insmntque(vp, (struct mount *)0);
99039509Smckusick 			}
99169408Smckusick 			simple_lock(&mntvnode_slock);
99239509Smckusick 			continue;
99339509Smckusick 		}
99465679Shibler #ifdef DIAGNOSTIC
99539509Smckusick 		if (busyprt)
99639667Smckusick 			vprint("vflush: busy vnode", vp);
99765679Shibler #endif
99869408Smckusick 		simple_unlock(&vp->v_interlock);
99939509Smckusick 		busy++;
100039509Smckusick 	}
100169408Smckusick 	simple_unlock(&mntvnode_slock);
100239509Smckusick 	if (busy)
100339509Smckusick 		return (EBUSY);
100439509Smckusick 	return (0);
100539509Smckusick }
100639509Smckusick 
100739509Smckusick /*
100839433Smckusick  * Disassociate the underlying file system from a vnode.
100969408Smckusick  * The vnode interlock is held on entry.
101039433Smckusick  */
101169408Smckusick static void
101269408Smckusick vclean(vp, flags, p)
101369408Smckusick 	struct vnode *vp;
101445118Smckusick 	int flags;
101569408Smckusick 	struct proc *p;
101639433Smckusick {
101739484Smckusick 	int active;
101839433Smckusick 
101939484Smckusick 	/*
102039484Smckusick 	 * Check to see if the vnode is in use.
102139667Smckusick 	 * If so we have to reference it before we clean it out
102239667Smckusick 	 * so that its count cannot fall to zero and generate a
102339667Smckusick 	 * race against ourselves to recycle it.
102439484Smckusick 	 */
102539809Smckusick 	if (active = vp->v_usecount)
102669408Smckusick 		vp->v_usecount++;
102739484Smckusick 	/*
102869408Smckusick 	 * Prevent the vnode from being recycled or
102969408Smckusick 	 * brought into use while we clean it out.
103069408Smckusick 	 */
103169408Smckusick 	if (vp->v_flag & VXLOCK)
103269408Smckusick 		panic("vclean: deadlock");
103369408Smckusick 	vp->v_flag |= VXLOCK;
103469408Smckusick 	/*
103556805Smckusick 	 * Even if the count is zero, the VOP_INACTIVE routine may still
103656805Smckusick 	 * have the object locked while it cleans it out. The VOP_LOCK
103756805Smckusick 	 * ensures that the VOP_INACTIVE routine is done with its work.
103856805Smckusick 	 * For active vnodes, it ensures that no other activity can
103956805Smckusick 	 * occur while the underlying object is being cleaned out.
104056805Smckusick 	 */
104169408Smckusick 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
104256805Smckusick 	/*
104356805Smckusick 	 * Clean out any buffers associated with the vnode.
104439667Smckusick 	 */
104541300Smckusick 	if (flags & DOCLOSE)
104669559Spendry 		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
104739667Smckusick 	/*
104856805Smckusick 	 * If purging an active vnode, it must be closed and
104969408Smckusick 	 * deactivated before being reclaimed. Note that the
105069408Smckusick 	 * VOP_INACTIVE will unlock the vnode.
105139433Smckusick 	 */
105239484Smckusick 	if (active) {
105356805Smckusick 		if (flags & DOCLOSE)
105469408Smckusick 			VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
105569408Smckusick 		VOP_INACTIVE(vp, p);
105669408Smckusick 	} else {
105769408Smckusick 		/*
105869408Smckusick 		 * Any other processes trying to obtain this lock must first
105969408Smckusick 		 * wait for VXLOCK to clear, then call the new lock operation.
106069408Smckusick 		 */
106169408Smckusick 		VOP_UNLOCK(vp, 0, p);
106239433Smckusick 	}
106339433Smckusick 	/*
106439433Smckusick 	 * Reclaim the vnode.
106539433Smckusick 	 */
106669408Smckusick 	if (VOP_RECLAIM(vp, p))
106739433Smckusick 		panic("vclean: cannot reclaim");
106839484Smckusick 	if (active)
106939484Smckusick 		vrele(vp);
107068784Smckusick 	cache_purge(vp);
1071*69605Smckusick 	if (vp->v_vnlock) {
1072*69605Smckusick 		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1073*69605Smckusick 			vprint("vclean: lock not drained", vp);
1074*69605Smckusick 		FREE(vp->v_vnlock, M_VNODE);
1075*69605Smckusick 		vp->v_vnlock = NULL;
1076*69605Smckusick 	}
107753580Sheideman 
107839433Smckusick 	/*
107956805Smckusick 	 * Done with purge, notify sleepers of the grim news.
108039433Smckusick 	 */
108156805Smckusick 	vp->v_op = dead_vnodeop_p;
108256805Smckusick 	vp->v_tag = VT_NON;
108339433Smckusick 	vp->v_flag &= ~VXLOCK;
108439433Smckusick 	if (vp->v_flag & VXWANT) {
108539433Smckusick 		vp->v_flag &= ~VXWANT;
108639433Smckusick 		wakeup((caddr_t)vp);
108739433Smckusick 	}
108839433Smckusick }
108939433Smckusick 
109039433Smckusick /*
109139633Smckusick  * Eliminate all activity associated with  the requested vnode
109239633Smckusick  * and with all vnodes aliased to the requested vnode.
109339633Smckusick  */
109468424Smckusick int
109568424Smckusick vop_revoke(ap)
109668424Smckusick 	struct vop_revoke_args /* {
109768424Smckusick 		struct vnode *a_vp;
109868424Smckusick 		int a_flags;
109968424Smckusick 	} */ *ap;
110039633Smckusick {
110169408Smckusick 	struct vnode *vp, *vq;
110269408Smckusick 	struct proc *p = curproc;	/* XXX */
110339633Smckusick 
110469408Smckusick #ifdef DIAGNOSTIC
110569408Smckusick 	if ((ap->a_flags & REVOKEALL) == 0)
110669408Smckusick 		panic("vop_revoke");
110769408Smckusick #endif
110869408Smckusick 
110968424Smckusick 	vp = ap->a_vp;
111069408Smckusick 	simple_lock(&vp->v_interlock);
111169408Smckusick 
111269408Smckusick 	if (vp->v_flag & VALIASED) {
111340665Smckusick 		/*
111440665Smckusick 		 * If a vgone (or vclean) is already in progress,
111540665Smckusick 		 * wait until it is done and return.
111640665Smckusick 		 */
111740665Smckusick 		if (vp->v_flag & VXLOCK) {
111840665Smckusick 			vp->v_flag |= VXWANT;
111969408Smckusick 			simple_unlock(&vp->v_interlock);
112068424Smckusick 			tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
112168424Smckusick 			return (0);
112239633Smckusick 		}
112340665Smckusick 		/*
112440665Smckusick 		 * Ensure that vp will not be vgone'd while we
112540665Smckusick 		 * are eliminating its aliases.
112640665Smckusick 		 */
112740665Smckusick 		vp->v_flag |= VXLOCK;
112869408Smckusick 		simple_unlock(&vp->v_interlock);
112940665Smckusick 		while (vp->v_flag & VALIASED) {
113069408Smckusick 			simple_lock(&spechash_slock);
113140665Smckusick 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
113240665Smckusick 				if (vq->v_rdev != vp->v_rdev ||
113340665Smckusick 				    vq->v_type != vp->v_type || vp == vq)
113440665Smckusick 					continue;
113569408Smckusick 				simple_unlock(&spechash_slock);
113640665Smckusick 				vgone(vq);
113740665Smckusick 				break;
113840665Smckusick 			}
113969408Smckusick 			if (vq == NULLVP)
114069408Smckusick 				simple_unlock(&spechash_slock);
114140665Smckusick 		}
114240665Smckusick 		/*
114340665Smckusick 		 * Remove the lock so that vgone below will
114440665Smckusick 		 * really eliminate the vnode after which time
114540665Smckusick 		 * vgone will awaken any sleepers.
114640665Smckusick 		 */
114769408Smckusick 		simple_lock(&vp->v_interlock);
114840665Smckusick 		vp->v_flag &= ~VXLOCK;
114939633Smckusick 	}
115069408Smckusick 	vgonel(vp, p);
115168424Smckusick 	return (0);
115239633Smckusick }
115339633Smckusick 
115439633Smckusick /*
115569408Smckusick  * Recycle an unused vnode to the front of the free list.
115669408Smckusick  * Release the passed interlock if the vnode will be recycled.
115769408Smckusick  */
115869408Smckusick int
115969408Smckusick vrecycle(vp, inter_lkp, p)
116069408Smckusick 	struct vnode *vp;
116169408Smckusick 	struct simplelock *inter_lkp;
116269408Smckusick 	struct proc *p;
116369408Smckusick {
116469408Smckusick 
116569408Smckusick 	simple_lock(&vp->v_interlock);
116669408Smckusick 	if (vp->v_usecount == 0) {
116769408Smckusick 		if (inter_lkp)
116869408Smckusick 			simple_unlock(inter_lkp);
116969408Smckusick 		vgonel(vp, p);
117069408Smckusick 		return (1);
117169408Smckusick 	}
117269408Smckusick 	simple_unlock(&vp->v_interlock);
117369408Smckusick 	return (0);
117469408Smckusick }
117569408Smckusick 
117669408Smckusick /*
117739433Smckusick  * Eliminate all activity associated with a vnode
117839433Smckusick  * in preparation for reuse.
117939433Smckusick  */
118068319Scgd void
118168319Scgd vgone(vp)
118269408Smckusick 	struct vnode *vp;
118339433Smckusick {
118469408Smckusick 	struct proc *p = curproc;	/* XXX */
118569408Smckusick 
118669408Smckusick 	simple_lock(&vp->v_interlock);
118769408Smckusick 	vgonel(vp, p);
118869408Smckusick }
118969408Smckusick 
119069408Smckusick /*
119169408Smckusick  * vgone, with the vp interlock held.
119269408Smckusick  */
119369408Smckusick void
119469408Smckusick vgonel(vp, p)
119569408Smckusick 	struct vnode *vp;
119669408Smckusick 	struct proc *p;
119769408Smckusick {
119869408Smckusick 	struct vnode *vq;
119939615Smckusick 	struct vnode *vx;
120039433Smckusick 
120139433Smckusick 	/*
120240548Smckusick 	 * If a vgone (or vclean) is already in progress,
120340548Smckusick 	 * wait until it is done and return.
120440548Smckusick 	 */
120540548Smckusick 	if (vp->v_flag & VXLOCK) {
120640548Smckusick 		vp->v_flag |= VXWANT;
120769408Smckusick 		simple_unlock(&vp->v_interlock);
120868319Scgd 		tsleep((caddr_t)vp, PINOD, "vgone", 0);
120940548Smckusick 		return;
121040548Smckusick 	}
121140548Smckusick 	/*
121239433Smckusick 	 * Clean out the filesystem specific data.
121339433Smckusick 	 */
121469408Smckusick 	vclean(vp, DOCLOSE, p);
121539433Smckusick 	/*
121639433Smckusick 	 * Delete from old mount point vnode list, if on one.
121739433Smckusick 	 */
121869408Smckusick 	if (vp->v_mount != NULL)
121969408Smckusick 		insmntque(vp, (struct mount *)0);
122039433Smckusick 	/*
122168592Smckusick 	 * If special device, remove it from special device alias list
122268592Smckusick 	 * if it is on one.
122339433Smckusick 	 */
122468592Smckusick 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
122569408Smckusick 		simple_lock(&spechash_slock);
122639809Smckusick 		if (*vp->v_hashchain == vp) {
122739809Smckusick 			*vp->v_hashchain = vp->v_specnext;
122839433Smckusick 		} else {
122939809Smckusick 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
123039615Smckusick 				if (vq->v_specnext != vp)
123139433Smckusick 					continue;
123239615Smckusick 				vq->v_specnext = vp->v_specnext;
123339433Smckusick 				break;
123439433Smckusick 			}
123539615Smckusick 			if (vq == NULL)
123639433Smckusick 				panic("missing bdev");
123739433Smckusick 		}
123839615Smckusick 		if (vp->v_flag & VALIASED) {
123952416Storek 			vx = NULL;
124039809Smckusick 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
124140108Smckusick 				if (vq->v_rdev != vp->v_rdev ||
124240108Smckusick 				    vq->v_type != vp->v_type)
124339615Smckusick 					continue;
124452416Storek 				if (vx)
124552416Storek 					break;
124639615Smckusick 				vx = vq;
124739615Smckusick 			}
124852416Storek 			if (vx == NULL)
124939615Smckusick 				panic("missing alias");
125052416Storek 			if (vq == NULL)
125139615Smckusick 				vx->v_flag &= ~VALIASED;
125239615Smckusick 			vp->v_flag &= ~VALIASED;
125339615Smckusick 		}
125469408Smckusick 		simple_unlock(&spechash_slock);
125539615Smckusick 		FREE(vp->v_specinfo, M_VNODE);
125639615Smckusick 		vp->v_specinfo = NULL;
125739433Smckusick 	}
125839433Smckusick 	/*
125956932Smckusick 	 * If it is on the freelist and not already at the head,
126065505Smckusick 	 * move it to the head of the list. The test of the back
126165505Smckusick 	 * pointer and the reference count of zero is because
126265505Smckusick 	 * it will be removed from the free list by getnewvnode,
126365505Smckusick 	 * but will not have its reference count incremented until
126465505Smckusick 	 * after calling vgone. If the reference count were
126565505Smckusick 	 * incremented first, vgone would (incorrectly) try to
126665505Smckusick 	 * close the previous instance of the underlying object.
126765505Smckusick 	 * So, the back pointer is explicitly set to `0xdeadb' in
126865505Smckusick 	 * getnewvnode after removing it from the freelist to ensure
126965505Smckusick 	 * that we do not try to move it here.
127039433Smckusick 	 */
127169408Smckusick 	if (vp->v_usecount == 0) {
127269408Smckusick 		simple_lock(&vnode_free_list_slock);
127369408Smckusick 		if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
127469408Smckusick 		    vnode_free_list.tqh_first != vp) {
127569408Smckusick 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
127669408Smckusick 			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
127769408Smckusick 		}
127869408Smckusick 		simple_unlock(&vnode_free_list_slock);
127939433Smckusick 	}
128039484Smckusick 	vp->v_type = VBAD;
128139433Smckusick }
128239633Smckusick 
128339633Smckusick /*
128439821Smckusick  * Lookup a vnode by device number.
128539821Smckusick  */
128668319Scgd int
128739821Smckusick vfinddev(dev, type, vpp)
128839821Smckusick 	dev_t dev;
128939821Smckusick 	enum vtype type;
129039821Smckusick 	struct vnode **vpp;
129139821Smckusick {
129269408Smckusick 	struct vnode *vp;
129369408Smckusick 	int rc = 0;
129439821Smckusick 
129569408Smckusick 	simple_lock(&spechash_slock);
129639821Smckusick 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
129739821Smckusick 		if (dev != vp->v_rdev || type != vp->v_type)
129839821Smckusick 			continue;
129939821Smckusick 		*vpp = vp;
130069408Smckusick 		rc = 1;
130169408Smckusick 		break;
130239821Smckusick 	}
130369408Smckusick 	simple_unlock(&spechash_slock);
130469408Smckusick 	return (rc);
130539821Smckusick }
130639821Smckusick 
130739821Smckusick /*
130839633Smckusick  * Calculate the total number of references to a special device.
130939633Smckusick  */
131068319Scgd int
131139633Smckusick vcount(vp)
131269408Smckusick 	struct vnode *vp;
131339633Smckusick {
131469408Smckusick 	struct vnode *vq, *vnext;
131539633Smckusick 	int count;
131639633Smckusick 
131766742Smckusick loop:
131839633Smckusick 	if ((vp->v_flag & VALIASED) == 0)
131939809Smckusick 		return (vp->v_usecount);
132069408Smckusick 	simple_lock(&spechash_slock);
132166742Smckusick 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
132266742Smckusick 		vnext = vq->v_specnext;
132340108Smckusick 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
132439633Smckusick 			continue;
132539633Smckusick 		/*
132639633Smckusick 		 * Alias, but not in use, so flush it out.
132739633Smckusick 		 */
132866742Smckusick 		if (vq->v_usecount == 0 && vq != vp) {
132969408Smckusick 			simple_unlock(&spechash_slock);
133039633Smckusick 			vgone(vq);
133139633Smckusick 			goto loop;
133239633Smckusick 		}
133339809Smckusick 		count += vq->v_usecount;
133439633Smckusick 	}
133569408Smckusick 	simple_unlock(&spechash_slock);
133639633Smckusick 	return (count);
133739633Smckusick }
133839667Smckusick 
133939667Smckusick /*
134039667Smckusick  * Print out a description of a vnode.
134139667Smckusick  */
134239667Smckusick static char *typename[] =
134340286Smckusick    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
134439667Smckusick 
134568171Scgd void
134639667Smckusick vprint(label, vp)
134739667Smckusick 	char *label;
134839667Smckusick 	register struct vnode *vp;
134939667Smckusick {
135039913Smckusick 	char buf[64];
135139667Smckusick 
135239667Smckusick 	if (label != NULL)
135339667Smckusick 		printf("%s: ", label);
135450109Smckusick 	printf("type %s, usecount %d, writecount %d, refcount %d,",
135550109Smckusick 		typename[vp->v_type], vp->v_usecount, vp->v_writecount,
135650109Smckusick 		vp->v_holdcnt);
135739913Smckusick 	buf[0] = '\0';
135839913Smckusick 	if (vp->v_flag & VROOT)
135939913Smckusick 		strcat(buf, "|VROOT");
136039913Smckusick 	if (vp->v_flag & VTEXT)
136139913Smckusick 		strcat(buf, "|VTEXT");
136241300Smckusick 	if (vp->v_flag & VSYSTEM)
136341300Smckusick 		strcat(buf, "|VSYSTEM");
136441300Smckusick 	if (vp->v_flag & VXLOCK)
136541300Smckusick 		strcat(buf, "|VXLOCK");
136641300Smckusick 	if (vp->v_flag & VXWANT)
136741300Smckusick 		strcat(buf, "|VXWANT");
136841300Smckusick 	if (vp->v_flag & VBWAIT)
136941300Smckusick 		strcat(buf, "|VBWAIT");
137039913Smckusick 	if (vp->v_flag & VALIASED)
137139913Smckusick 		strcat(buf, "|VALIASED");
137239913Smckusick 	if (buf[0] != '\0')
137339913Smckusick 		printf(" flags (%s)", &buf[1]);
137465260Smckusick 	if (vp->v_data == NULL) {
137565260Smckusick 		printf("\n");
137665260Smckusick 	} else {
137765260Smckusick 		printf("\n\t");
137865260Smckusick 		VOP_PRINT(vp);
137965260Smckusick 	}
138039667Smckusick }
138141110Smarc 
138249691Smckusick #ifdef DEBUG
138349691Smckusick /*
138449691Smckusick  * List all of the locked vnodes in the system.
138549691Smckusick  * Called when debugging the kernel.
138649691Smckusick  */
138768319Scgd void
138849691Smckusick printlockedvnodes()
138949691Smckusick {
139069578Smckusick 	struct proc *p = curproc;	/* XXX */
139169578Smckusick 	struct mount *mp, *nmp;
139269578Smckusick 	struct vnode *vp;
139349691Smckusick 
139449691Smckusick 	printf("Locked vnodes\n");
139569578Smckusick 	simple_lock(&mountlist_slock);
139669578Smckusick 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
139769578Smckusick 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
139869578Smckusick 			nmp = mp->mnt_list.cqe_next;
139969578Smckusick 			continue;
140069578Smckusick 		}
140165260Smckusick 		for (vp = mp->mnt_vnodelist.lh_first;
140265260Smckusick 		     vp != NULL;
140369325Smckusick 		     vp = vp->v_mntvnodes.le_next) {
140449691Smckusick 			if (VOP_ISLOCKED(vp))
140549691Smckusick 				vprint((char *)0, vp);
140669325Smckusick 		}
140769578Smckusick 		simple_lock(&mountlist_slock);
140869578Smckusick 		nmp = mp->mnt_list.cqe_next;
140969578Smckusick 		vfs_unbusy(mp, p);
141065260Smckusick 	}
141169578Smckusick 	simple_unlock(&mountlist_slock);
141249691Smckusick }
141349691Smckusick #endif
141449691Smckusick 
141568659Smckusick /*
141668659Smckusick  * Top level filesystem related information gathering.
141768659Smckusick  */
141868659Smckusick int
141968659Smckusick vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
142068659Smckusick 	int *name;
142168659Smckusick 	u_int namelen;
142268659Smckusick 	void *oldp;
142368659Smckusick 	size_t *oldlenp;
142468659Smckusick 	void *newp;
142568659Smckusick 	size_t newlen;
142668659Smckusick 	struct proc *p;
142768659Smckusick {
142868659Smckusick 	struct ctldebug *cdp;
142968659Smckusick 	struct vfsconf *vfsp;
143068659Smckusick 
143168659Smckusick 	/* all sysctl names at this level are at least name and field */
143268659Smckusick 	if (namelen < 2)
143368659Smckusick 		return (ENOTDIR);		/* overloaded */
143468659Smckusick 	if (name[0] != VFS_GENERIC) {
143568659Smckusick 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
143668659Smckusick 			if (vfsp->vfc_typenum == name[0])
143768659Smckusick 				break;
143868659Smckusick 		if (vfsp == NULL)
143968659Smckusick 			return (EOPNOTSUPP);
144068659Smckusick 		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
144168659Smckusick 		    oldp, oldlenp, newp, newlen, p));
144268659Smckusick 	}
144368659Smckusick 	switch (name[1]) {
144468659Smckusick 	case VFS_MAXTYPENUM:
144568659Smckusick 		return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
144668659Smckusick 	case VFS_CONF:
144768659Smckusick 		if (namelen < 3)
144868659Smckusick 			return (ENOTDIR);	/* overloaded */
144968659Smckusick 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
145068659Smckusick 			if (vfsp->vfc_typenum == name[2])
145168659Smckusick 				break;
145268659Smckusick 		if (vfsp == NULL)
145368659Smckusick 			return (EOPNOTSUPP);
145468659Smckusick 		return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
145568659Smckusick 		    sizeof(struct vfsconf)));
145668659Smckusick 	}
145768659Smckusick 	return (EOPNOTSUPP);
145868659Smckusick }
145968659Smckusick 
146041110Smarc int kinfo_vdebug = 1;
146141110Smarc int kinfo_vgetfailed;
146241110Smarc #define KINFO_VNODESLOP	10
146341110Smarc /*
146457841Smckusick  * Dump vnode list (via sysctl).
146541110Smarc  * Copyout address of vnode followed by vnode.
146641110Smarc  */
146745118Smckusick /* ARGSUSED */
146868319Scgd int
146969578Smckusick sysctl_vnode(where, sizep, p)
147041110Smarc 	char *where;
147158465Sbostic 	size_t *sizep;
147269578Smckusick 	struct proc *p;
147341110Smarc {
147469578Smckusick 	struct mount *mp, *nmp;
147569408Smckusick 	struct vnode *nvp, *vp;
147669578Smckusick 	char *bp = where, *savebp;
147753818Smckusick 	char *ewhere;
147841110Smarc 	int error;
147941110Smarc 
148041110Smarc #define VPTRSZ	sizeof (struct vnode *)
148141110Smarc #define VNODESZ	sizeof (struct vnode)
148241110Smarc 	if (where == NULL) {
148357841Smckusick 		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
148441110Smarc 		return (0);
148541110Smarc 	}
148657841Smckusick 	ewhere = where + *sizep;
148741110Smarc 
148869578Smckusick 	simple_lock(&mountlist_slock);
148969325Smckusick 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
149069578Smckusick 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
149169578Smckusick 			nmp = mp->mnt_list.cqe_next;
149241300Smckusick 			continue;
149369578Smckusick 		}
149441110Smarc 		savebp = bp;
149541110Smarc again:
149669408Smckusick 		simple_lock(&mntvnode_slock);
149765260Smckusick 		for (vp = mp->mnt_vnodelist.lh_first;
149865260Smckusick 		     vp != NULL;
149969408Smckusick 		     vp = nvp) {
150041422Smckusick 			/*
150141422Smckusick 			 * Check that the vp is still associated with
150241422Smckusick 			 * this filesystem.  RACE: could have been
150341422Smckusick 			 * recycled onto the same filesystem.
150441422Smckusick 			 */
150541421Smckusick 			if (vp->v_mount != mp) {
150669408Smckusick 				simple_unlock(&mntvnode_slock);
150741421Smckusick 				if (kinfo_vdebug)
150841421Smckusick 					printf("kinfo: vp changed\n");
150941421Smckusick 				bp = savebp;
151041421Smckusick 				goto again;
151141421Smckusick 			}
151269408Smckusick 			nvp = vp->v_mntvnodes.le_next;
151357841Smckusick 			if (bp + VPTRSZ + VNODESZ > ewhere) {
151469408Smckusick 				simple_unlock(&mntvnode_slock);
151557841Smckusick 				*sizep = bp - where;
151657841Smckusick 				return (ENOMEM);
151757841Smckusick 			}
151869408Smckusick 			simple_unlock(&mntvnode_slock);
151957841Smckusick 			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
152057841Smckusick 			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
152141110Smarc 				return (error);
152241110Smarc 			bp += VPTRSZ + VNODESZ;
152369408Smckusick 			simple_lock(&mntvnode_slock);
152441110Smarc 		}
152569408Smckusick 		simple_unlock(&mntvnode_slock);
152669578Smckusick 		simple_lock(&mountlist_slock);
152769578Smckusick 		nmp = mp->mnt_list.cqe_next;
152869578Smckusick 		vfs_unbusy(mp, p);
152965260Smckusick 	}
153069578Smckusick 	simple_unlock(&mountlist_slock);
153141110Smarc 
153257841Smckusick 	*sizep = bp - where;
153341110Smarc 	return (0);
153441110Smarc }
153565679Shibler 
153665679Shibler /*
153765679Shibler  * Check to see if a filesystem is mounted on a block device.
153865679Shibler  */
153965679Shibler int
154065679Shibler vfs_mountedon(vp)
154169408Smckusick 	struct vnode *vp;
154265679Shibler {
154369408Smckusick 	struct vnode *vq;
154469408Smckusick 	int error = 0;
154565679Shibler 
154665679Shibler 	if (vp->v_specflags & SI_MOUNTEDON)
154765679Shibler 		return (EBUSY);
154865679Shibler 	if (vp->v_flag & VALIASED) {
154969408Smckusick 		simple_lock(&spechash_slock);
155065679Shibler 		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
155165679Shibler 			if (vq->v_rdev != vp->v_rdev ||
155265679Shibler 			    vq->v_type != vp->v_type)
155365679Shibler 				continue;
155469408Smckusick 			if (vq->v_specflags & SI_MOUNTEDON) {
155569408Smckusick 				error = EBUSY;
155669408Smckusick 				break;
155769408Smckusick 			}
155865679Shibler 		}
155969408Smckusick 		simple_unlock(&spechash_slock);
156065679Shibler 	}
156169408Smckusick 	return (error);
156265679Shibler }
156365679Shibler 
156465679Shibler /*
156569325Smckusick  * Unmount all filesystems. The list is traversed in reverse order
156669325Smckusick  * of mounting to avoid dependencies.
156769325Smckusick  */
156869325Smckusick void
156969325Smckusick vfs_unmountall()
157069325Smckusick {
157169325Smckusick 	struct mount *mp, *nmp;
157269578Smckusick 	struct proc *p = curproc;	/* XXX */
157369325Smckusick 
157469578Smckusick 	/*
157569578Smckusick 	 * Since this only runs when rebooting, it is not interlocked.
157669578Smckusick 	 */
157769325Smckusick 	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
157869325Smckusick 		nmp = mp->mnt_list.cqe_prev;
157969578Smckusick 		(void) dounmount(mp, MNT_FORCE, p);
158069325Smckusick 	}
158169325Smckusick }
158269325Smckusick 
158369325Smckusick /*
158465679Shibler  * Build hash lists of net addresses and hang them off the mount point.
158565679Shibler  * Called by ufs_mount() to set up the lists of export addresses.
158665679Shibler  */
158765679Shibler static int
158865679Shibler vfs_hang_addrlist(mp, nep, argp)
158965679Shibler 	struct mount *mp;
159065679Shibler 	struct netexport *nep;
159165679Shibler 	struct export_args *argp;
159265679Shibler {
159365679Shibler 	register struct netcred *np;
159465679Shibler 	register struct radix_node_head *rnh;
159565679Shibler 	register int i;
159665679Shibler 	struct radix_node *rn;
159765679Shibler 	struct sockaddr *saddr, *smask = 0;
159865679Shibler 	struct domain *dom;
159965679Shibler 	int error;
160065679Shibler 
160165679Shibler 	if (argp->ex_addrlen == 0) {
160265679Shibler 		if (mp->mnt_flag & MNT_DEFEXPORTED)
160365679Shibler 			return (EPERM);
160465679Shibler 		np = &nep->ne_defexported;
160565679Shibler 		np->netc_exflags = argp->ex_flags;
160665679Shibler 		np->netc_anon = argp->ex_anon;
160765679Shibler 		np->netc_anon.cr_ref = 1;
160865679Shibler 		mp->mnt_flag |= MNT_DEFEXPORTED;
160965679Shibler 		return (0);
161065679Shibler 	}
161165679Shibler 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
161265679Shibler 	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
161365679Shibler 	bzero((caddr_t)np, i);
161465679Shibler 	saddr = (struct sockaddr *)(np + 1);
161565679Shibler 	if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
161665679Shibler 		goto out;
161765679Shibler 	if (saddr->sa_len > argp->ex_addrlen)
161865679Shibler 		saddr->sa_len = argp->ex_addrlen;
161965679Shibler 	if (argp->ex_masklen) {
162065679Shibler 		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
162165679Shibler 		error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
162265679Shibler 		if (error)
162365679Shibler 			goto out;
162465679Shibler 		if (smask->sa_len > argp->ex_masklen)
162565679Shibler 			smask->sa_len = argp->ex_masklen;
162665679Shibler 	}
162765679Shibler 	i = saddr->sa_family;
162865679Shibler 	if ((rnh = nep->ne_rtable[i]) == 0) {
162965679Shibler 		/*
163065679Shibler 		 * Seems silly to initialize every AF when most are not
163165679Shibler 		 * used, do so on demand here
163265679Shibler 		 */
163365679Shibler 		for (dom = domains; dom; dom = dom->dom_next)
163465679Shibler 			if (dom->dom_family == i && dom->dom_rtattach) {
163565679Shibler 				dom->dom_rtattach((void **)&nep->ne_rtable[i],
163665679Shibler 					dom->dom_rtoffset);
163765679Shibler 				break;
163865679Shibler 			}
163965679Shibler 		if ((rnh = nep->ne_rtable[i]) == 0) {
164065679Shibler 			error = ENOBUFS;
164165679Shibler 			goto out;
164265679Shibler 		}
164365679Shibler 	}
164465679Shibler 	rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
164565679Shibler 		np->netc_rnodes);
164669140Smckusick 	if (rn == 0) {
164769140Smckusick 		/*
164869140Smckusick 		 * One of the reasons that rnh_addaddr may fail is that
164969140Smckusick 		 * the entry already exists. To check for this case, we
165069140Smckusick 		 * look up the entry to see if it is there. If so, we
165169140Smckusick 		 * do not need to make a new entry but do return success.
165269140Smckusick 		 */
165369140Smckusick 		free(np, M_NETADDR);
165469140Smckusick 		rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
165569140Smckusick 		if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 &&
165669140Smckusick 		    ((struct netcred *)rn)->netc_exflags == argp->ex_flags &&
165769140Smckusick 		    !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon,
165869140Smckusick 			    (caddr_t)&argp->ex_anon, sizeof(struct ucred)))
165969140Smckusick 			return (0);
166069140Smckusick 		return (EPERM);
166165679Shibler 	}
166265679Shibler 	np->netc_exflags = argp->ex_flags;
166365679Shibler 	np->netc_anon = argp->ex_anon;
166465679Shibler 	np->netc_anon.cr_ref = 1;
166565679Shibler 	return (0);
166665679Shibler out:
166765679Shibler 	free(np, M_NETADDR);
166865679Shibler 	return (error);
166965679Shibler }
167065679Shibler 
167165679Shibler /* ARGSUSED */
167265679Shibler static int
167365679Shibler vfs_free_netcred(rn, w)
167465679Shibler 	struct radix_node *rn;
167565679Shibler 	caddr_t w;
167665679Shibler {
167765679Shibler 	register struct radix_node_head *rnh = (struct radix_node_head *)w;
167865679Shibler 
167965679Shibler 	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
168065679Shibler 	free((caddr_t)rn, M_NETADDR);
168165679Shibler 	return (0);
168265679Shibler }
168368319Scgd 
168465679Shibler /*
168565679Shibler  * Free the net address hash lists that are hanging off the mount points.
168665679Shibler  */
168765679Shibler static void
168865679Shibler vfs_free_addrlist(nep)
168965679Shibler 	struct netexport *nep;
169065679Shibler {
169165679Shibler 	register int i;
169265679Shibler 	register struct radix_node_head *rnh;
169365679Shibler 
169465679Shibler 	for (i = 0; i <= AF_MAX; i++)
169565679Shibler 		if (rnh = nep->ne_rtable[i]) {
169665679Shibler 			(*rnh->rnh_walktree)(rnh, vfs_free_netcred,
169765679Shibler 			    (caddr_t)rnh);
169865679Shibler 			free((caddr_t)rnh, M_RTABLE);
169965679Shibler 			nep->ne_rtable[i] = 0;
170065679Shibler 		}
170165679Shibler }
170265679Shibler 
170365679Shibler int
170465679Shibler vfs_export(mp, nep, argp)
170565679Shibler 	struct mount *mp;
170665679Shibler 	struct netexport *nep;
170765679Shibler 	struct export_args *argp;
170865679Shibler {
170965679Shibler 	int error;
171065679Shibler 
171165679Shibler 	if (argp->ex_flags & MNT_DELEXPORT) {
171265679Shibler 		vfs_free_addrlist(nep);
171365679Shibler 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
171465679Shibler 	}
171565679Shibler 	if (argp->ex_flags & MNT_EXPORTED) {
171665679Shibler 		if (error = vfs_hang_addrlist(mp, nep, argp))
171765679Shibler 			return (error);
171865679Shibler 		mp->mnt_flag |= MNT_EXPORTED;
171965679Shibler 	}
172065679Shibler 	return (0);
172165679Shibler }
172265679Shibler 
172365679Shibler struct netcred *
172465679Shibler vfs_export_lookup(mp, nep, nam)
172565679Shibler 	register struct mount *mp;
172665679Shibler 	struct netexport *nep;
172765679Shibler 	struct mbuf *nam;
172865679Shibler {
172965679Shibler 	register struct netcred *np;
173065679Shibler 	register struct radix_node_head *rnh;
173165679Shibler 	struct sockaddr *saddr;
173265679Shibler 
173365679Shibler 	np = NULL;
173465679Shibler 	if (mp->mnt_flag & MNT_EXPORTED) {
173565679Shibler 		/*
173665679Shibler 		 * Lookup in the export list first.
173765679Shibler 		 */
173865679Shibler 		if (nam != NULL) {
173965679Shibler 			saddr = mtod(nam, struct sockaddr *);
174065679Shibler 			rnh = nep->ne_rtable[saddr->sa_family];
174165679Shibler 			if (rnh != NULL) {
174265679Shibler 				np = (struct netcred *)
174365679Shibler 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
174465679Shibler 							      rnh);
174565679Shibler 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
174665679Shibler 					np = NULL;
174765679Shibler 			}
174865679Shibler 		}
174965679Shibler 		/*
175065679Shibler 		 * If no address match, use the default if it exists.
175165679Shibler 		 */
175265679Shibler 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
175365679Shibler 			np = &nep->ne_defexported;
175465679Shibler 	}
175565679Shibler 	return (np);
175665679Shibler }
1757