xref: /csrg-svn/sys/kern/vfs_subr.c (revision 69578)
137488Smckusick /*
263180Sbostic  * Copyright (c) 1989, 1993
363180Sbostic  *	The Regents of the University of California.  All rights reserved.
465771Sbostic  * (c) UNIX System Laboratories, Inc.
565771Sbostic  * All or some portions of this file are derived from material licensed
665771Sbostic  * to the University of California by American Telephone and Telegraph
765771Sbostic  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
865771Sbostic  * the permission of UNIX System Laboratories, Inc.
937488Smckusick  *
1044458Sbostic  * %sccs.include.redist.c%
1137488Smckusick  *
12*69578Smckusick  *	@(#)vfs_subr.c	8.29 (Berkeley) 05/20/95
1337488Smckusick  */
1437488Smckusick 
1537488Smckusick /*
1637488Smckusick  * External virtual filesystem routines
1737488Smckusick  */
1837488Smckusick 
1951460Sbostic #include <sys/param.h>
2053829Spendry #include <sys/systm.h>
2151460Sbostic #include <sys/proc.h>
2251460Sbostic #include <sys/mount.h>
2351460Sbostic #include <sys/time.h>
2451460Sbostic #include <sys/vnode.h>
2552415Smckusick #include <sys/stat.h>
2651460Sbostic #include <sys/namei.h>
2751460Sbostic #include <sys/ucred.h>
2851460Sbostic #include <sys/buf.h>
2951460Sbostic #include <sys/errno.h>
3051460Sbostic #include <sys/malloc.h>
3165679Shibler #include <sys/domain.h>
3265679Shibler #include <sys/mbuf.h>
3337488Smckusick 
3460930Smckusick #include <vm/vm.h>
3560930Smckusick #include <sys/sysctl.h>
3660930Smckusick 
3755050Spendry #include <miscfs/specfs/specdev.h>
3855050Spendry 
3952415Smckusick enum vtype iftovt_tab[16] = {
4052415Smckusick 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
4152415Smckusick 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
4252415Smckusick };
4352415Smckusick int	vttoif_tab[9] = {
4452415Smckusick 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
4552415Smckusick 	S_IFSOCK, S_IFIFO, S_IFMT,
4652415Smckusick };
4752415Smckusick 
4837488Smckusick /*
4956608Smckusick  * Insq/Remq for the vnode usage lists.
5056608Smckusick  */
5165260Smckusick #define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
5268319Scgd #define	bufremvn(bp) {							\
5368319Scgd 	LIST_REMOVE(bp, b_vnbufs);					\
5468319Scgd 	(bp)->b_vnbufs.le_next = NOLIST;				\
5565260Smckusick }
5665260Smckusick TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
5765260Smckusick struct mntlist mountlist;			/* mounted filesystem list */
58*69578Smckusick struct simplelock mountlist_slock;
5969408Smckusick static struct simplelock mntid_slock;
6069408Smckusick struct simplelock mntvnode_slock;
6169408Smckusick static struct simplelock spechash_slock;
6269408Smckusick static struct simplelock vnode_free_list_slock;
6365260Smckusick 
6456608Smckusick /*
6565260Smckusick  * Initialize the vnode management data structures.
6637488Smckusick  */
6768319Scgd void
6865260Smckusick vntblinit()
6937488Smckusick {
7037488Smckusick 
7169408Smckusick 	simple_lock_init(&mntvnode_slock);
7269408Smckusick 	simple_lock_init(&mntid_slock);
7369408Smckusick 	simple_lock_init(&spechash_slock);
7465260Smckusick 	TAILQ_INIT(&vnode_free_list);
7569408Smckusick 	simple_lock_init(&vnode_free_list_slock);
7669325Smckusick 	CIRCLEQ_INIT(&mountlist);
7737488Smckusick }
7837488Smckusick 
7937488Smckusick /*
80*69578Smckusick  * Mark a mount point as busy. Used to synchronize access and to delay
81*69578Smckusick  * unmounting. Interlock is not released on failure.
8237488Smckusick  */
8368319Scgd int
84*69578Smckusick vfs_busy(mp, flags, interlkp, p)
85*69578Smckusick 	struct mount *mp;
86*69578Smckusick 	int flags;
87*69578Smckusick 	struct simplelock *interlkp;
88*69578Smckusick 	struct proc *p;
8937488Smckusick {
90*69578Smckusick 	int lkflags;
9137488Smckusick 
92*69578Smckusick 	if (mp->mnt_flag & MNT_UNMOUNT) {
93*69578Smckusick 		if (flags & LK_NOWAIT)
94*69578Smckusick 			return (ENOENT);
9541400Smckusick 		mp->mnt_flag |= MNT_MWAIT;
96*69578Smckusick 		sleep((caddr_t)mp, PVFS);
97*69578Smckusick 		return (ENOENT);
9839045Smckusick 	}
99*69578Smckusick 	lkflags = LK_SHARED;
100*69578Smckusick 	if (interlkp)
101*69578Smckusick 		lkflags |= LK_INTERLOCK;
102*69578Smckusick 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
103*69578Smckusick 		panic("vfs_busy: unexpected lock failure");
10437488Smckusick 	return (0);
10537488Smckusick }
10637488Smckusick 
10737488Smckusick /*
10841300Smckusick  * Free a busy filesystem.
10941300Smckusick  * Panic if filesystem is not busy.
11041300Smckusick  */
11168319Scgd void
112*69578Smckusick vfs_unbusy(mp, p)
113*69578Smckusick 	struct mount *mp;
114*69578Smckusick 	struct proc *p;
11541300Smckusick {
11641300Smckusick 
117*69578Smckusick 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
11841300Smckusick }
11941300Smckusick 
12041300Smckusick /*
12169378Smckusick  * Lookup a filesystem type, and if found allocate and initialize
12269378Smckusick  * a mount structure for it.
12369378Smckusick  *
12469378Smckusick  * Devname is usually updated by mount(8) after booting.
12569378Smckusick  */
12669378Smckusick int
12769378Smckusick vfs_rootmountalloc(fstypename, devname, mpp)
12869378Smckusick 	char *fstypename;
12969378Smckusick 	char *devname;
13069378Smckusick 	struct mount **mpp;
13169378Smckusick {
132*69578Smckusick 	struct proc *p = curproc;	/* XXX */
13369378Smckusick 	struct vfsconf *vfsp;
13469378Smckusick 	struct mount *mp;
13569378Smckusick 
13669378Smckusick 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
13769378Smckusick 		if (!strcmp(vfsp->vfc_name, fstypename))
13869378Smckusick 			break;
13969378Smckusick 	if (vfsp == NULL)
14069378Smckusick 		return (ENODEV);
14169378Smckusick 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
14269378Smckusick 	bzero((char *)mp, (u_long)sizeof(struct mount));
143*69578Smckusick 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
144*69578Smckusick 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
14569378Smckusick 	LIST_INIT(&mp->mnt_vnodelist);
14669378Smckusick 	mp->mnt_vfc = vfsp;
14769378Smckusick 	mp->mnt_op = vfsp->vfc_vfsops;
14869378Smckusick 	mp->mnt_flag = MNT_RDONLY;
14969378Smckusick 	mp->mnt_vnodecovered = NULLVP;
15069378Smckusick 	vfsp->vfc_refcount++;
15169378Smckusick 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
15269378Smckusick 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
15369378Smckusick 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
15469378Smckusick 	mp->mnt_stat.f_mntonname[0] = '/';
15569378Smckusick 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
15669378Smckusick 	*mpp = mp;
15769378Smckusick 	return (0);
15869378Smckusick }
15969378Smckusick 
16069378Smckusick /*
16169378Smckusick  * Find an appropriate filesystem to use for the root. If a filesystem
16269378Smckusick  * has not been preselected, walk through the list of known filesystems
16369378Smckusick  * trying those that have mountroot routines, and try them until one
16469378Smckusick  * works or we have tried them all.
16569378Smckusick  */
16669378Smckusick int
16769378Smckusick vfs_mountroot()
16869378Smckusick {
16969378Smckusick 	struct vfsconf *vfsp;
17069378Smckusick 	extern int (*mountroot)(void);
17169378Smckusick 	int error;
17269378Smckusick 
17369378Smckusick 	if (mountroot != NULL)
17469537Smckusick 		return ((*mountroot)());
17569378Smckusick 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
17669378Smckusick 		if (vfsp->vfc_mountroot == NULL)
17769378Smckusick 			continue;
17869378Smckusick 		if ((error = (*vfsp->vfc_mountroot)()) == 0)
17969378Smckusick 			return (0);
18069378Smckusick 		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
18169378Smckusick 	}
18269378Smckusick 	return (ENODEV);
18369378Smckusick }
18469378Smckusick 
18569378Smckusick /*
18637488Smckusick  * Lookup a mount point by filesystem identifier.
18737488Smckusick  */
18837488Smckusick struct mount *
18968659Smckusick vfs_getvfs(fsid)
19037488Smckusick 	fsid_t *fsid;
19137488Smckusick {
19237488Smckusick 	register struct mount *mp;
19337488Smckusick 
194*69578Smckusick 	simple_lock(&mountlist_slock);
19569325Smckusick 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
19669325Smckusick 	     mp = mp->mnt_list.cqe_next) {
19741400Smckusick 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
198*69578Smckusick 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
199*69578Smckusick 			simple_unlock(&mountlist_slock);
20038288Smckusick 			return (mp);
201*69578Smckusick 		}
20265260Smckusick 	}
203*69578Smckusick 	simple_unlock(&mountlist_slock);
20438288Smckusick 	return ((struct mount *)0);
20537488Smckusick }
20637488Smckusick 
20737488Smckusick /*
20853829Spendry  * Get a new unique fsid
20953829Spendry  */
21053829Spendry void
21168659Smckusick vfs_getnewfsid(mp)
21253829Spendry 	struct mount *mp;
21353829Spendry {
21453829Spendry static u_short xxxfs_mntid;
21553829Spendry 
21653829Spendry 	fsid_t tfsid;
21768659Smckusick 	int mtype;
21853829Spendry 
21969408Smckusick 	simple_lock(&mntid_slock);
22068659Smckusick 	mtype = mp->mnt_vfc->vfc_typenum;
22165507Spendry 	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
22253829Spendry 	mp->mnt_stat.f_fsid.val[1] = mtype;
22353829Spendry 	if (xxxfs_mntid == 0)
22453829Spendry 		++xxxfs_mntid;
22565507Spendry 	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
22653829Spendry 	tfsid.val[1] = mtype;
22769325Smckusick 	if (mountlist.cqh_first != (void *)&mountlist) {
22868659Smckusick 		while (vfs_getvfs(&tfsid)) {
22953936Spendry 			tfsid.val[0]++;
23053936Spendry 			xxxfs_mntid++;
23153936Spendry 		}
23253829Spendry 	}
23353829Spendry 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
23469408Smckusick 	simple_unlock(&mntid_slock);
23553829Spendry }
23653829Spendry 
23753829Spendry /*
23837488Smckusick  * Set vnode attributes to VNOVAL
23937488Smckusick  */
24068319Scgd void
24168319Scgd vattr_null(vap)
24237488Smckusick 	register struct vattr *vap;
24337488Smckusick {
24437488Smckusick 
24537488Smckusick 	vap->va_type = VNON;
24652005Smckusick 	vap->va_size = vap->va_bytes = VNOVAL;
24737488Smckusick 	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
24852005Smckusick 		vap->va_fsid = vap->va_fileid =
24952005Smckusick 		vap->va_blocksize = vap->va_rdev =
25054347Smckusick 		vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
25154347Smckusick 		vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
25254347Smckusick 		vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
25338258Smckusick 		vap->va_flags = vap->va_gen = VNOVAL;
25458548Sbostic 	vap->va_vaflags = 0;
25537488Smckusick }
25638265Smckusick 
25738265Smckusick /*
25839397Smckusick  * Routines having to do with the management of the vnode table.
25939397Smckusick  */
26053547Sheideman extern int (**dead_vnodeop_p)();
26169408Smckusick static void vclean __P((struct vnode *vp, int flag, struct proc *p));
26269408Smckusick extern void vgonel __P((struct vnode *vp, struct proc *p));
26340883Smckusick long numvnodes;
26453493Sheideman extern struct vattr va_null;
26539397Smckusick 
26639397Smckusick /*
26739397Smckusick  * Return the next vnode from the free list.
26839397Smckusick  */
26968319Scgd int
27039397Smckusick getnewvnode(tag, mp, vops, vpp)
27139397Smckusick 	enum vtagtype tag;
27239397Smckusick 	struct mount *mp;
27353495Sheideman 	int (**vops)();
27439397Smckusick 	struct vnode **vpp;
27539397Smckusick {
27669408Smckusick 	struct proc *p = curproc;	/* XXX */
27769408Smckusick 	struct vnode *vp;
27857042Smargo 	int s;
27969408Smckusick 	int cnt;
28039397Smckusick 
28169408Smckusick top:
28269408Smckusick 	simple_lock(&vnode_free_list_slock);
28365260Smckusick 	if ((vnode_free_list.tqh_first == NULL &&
28465260Smckusick 	     numvnodes < 2 * desiredvnodes) ||
28554347Smckusick 	    numvnodes < desiredvnodes) {
28669408Smckusick 		simple_unlock(&vnode_free_list_slock);
28745118Smckusick 		vp = (struct vnode *)malloc((u_long)sizeof *vp,
28845118Smckusick 		    M_VNODE, M_WAITOK);
28940883Smckusick 		bzero((char *)vp, sizeof *vp);
29040883Smckusick 		numvnodes++;
29140883Smckusick 	} else {
29269408Smckusick 		for (vp = vnode_free_list.tqh_first;
29369408Smckusick 				vp != NULLVP; vp = vp->v_freelist.tqe_next) {
29469408Smckusick 			if (simple_lock_try(&vp->v_interlock))
29569408Smckusick 				break;
29669408Smckusick 		}
29769408Smckusick 		/*
29869408Smckusick 		 * Unless this is a bad time of the month, at most
29969408Smckusick 		 * the first NCPUS items on the free list are
30069408Smckusick 		 * locked, so this is close enough to being empty.
30169408Smckusick 		 */
30269408Smckusick 		if (vp == NULLVP) {
30369408Smckusick 			simple_unlock(&vnode_free_list_slock);
30440883Smckusick 			tablefull("vnode");
30540883Smckusick 			*vpp = 0;
30640883Smckusick 			return (ENFILE);
30740883Smckusick 		}
30840883Smckusick 		if (vp->v_usecount)
30940883Smckusick 			panic("free vnode isn't");
31065260Smckusick 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
31165505Smckusick 		/* see comment on why 0xdeadb is set at end of vgone (below) */
31265505Smckusick 		vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
31369408Smckusick 		simple_unlock(&vnode_free_list_slock);
31452190Smckusick 		vp->v_lease = NULL;
31540883Smckusick 		if (vp->v_type != VBAD)
31669408Smckusick 			vgonel(vp, p);
31769408Smckusick 		else
31869408Smckusick 			simple_unlock(&vp->v_interlock);
31957042Smargo #ifdef DIAGNOSTIC
32052006Smckusick 		if (vp->v_data)
32152006Smckusick 			panic("cleaned vnode isn't");
32257042Smargo 		s = splbio();
32357042Smargo 		if (vp->v_numoutput)
32457042Smargo 			panic("Clean vnode has pending I/O's");
32557042Smargo 		splx(s);
32657042Smargo #endif
32740883Smckusick 		vp->v_flag = 0;
32840883Smckusick 		vp->v_lastr = 0;
32965745Shibler 		vp->v_ralen = 0;
33065745Shibler 		vp->v_maxra = 0;
33157042Smargo 		vp->v_lastw = 0;
33257042Smargo 		vp->v_lasta = 0;
33357042Smargo 		vp->v_cstart = 0;
33457042Smargo 		vp->v_clen = 0;
33540883Smckusick 		vp->v_socket = 0;
33639397Smckusick 	}
33739512Smckusick 	vp->v_type = VNON;
33839397Smckusick 	cache_purge(vp);
33939397Smckusick 	vp->v_tag = tag;
34039433Smckusick 	vp->v_op = vops;
34139397Smckusick 	insmntque(vp, mp);
34239397Smckusick 	*vpp = vp;
34365505Smckusick 	vp->v_usecount = 1;
34465260Smckusick 	vp->v_data = 0;
34539397Smckusick 	return (0);
34639397Smckusick }
34765679Shibler 
34839397Smckusick /*
34939397Smckusick  * Move a vnode from one mount queue to another.
35039397Smckusick  */
35168319Scgd void
35239397Smckusick insmntque(vp, mp)
35369408Smckusick 	struct vnode *vp;
35469408Smckusick 	struct mount *mp;
35539397Smckusick {
35639397Smckusick 
35769408Smckusick 	simple_lock(&mntvnode_slock);
35839397Smckusick 	/*
35939397Smckusick 	 * Delete from old mount point vnode list, if on one.
36039397Smckusick 	 */
36165679Shibler 	if (vp->v_mount != NULL)
36265260Smckusick 		LIST_REMOVE(vp, v_mntvnodes);
36339397Smckusick 	/*
36439397Smckusick 	 * Insert into list of vnodes for the new mount point, if available.
36539397Smckusick 	 */
36669408Smckusick 	if ((vp->v_mount = mp) != NULL)
36769408Smckusick 		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
36869408Smckusick 	simple_unlock(&mntvnode_slock);
36939397Smckusick }
37039397Smckusick 
37139397Smckusick /*
37249232Smckusick  * Update outstanding I/O count and do wakeup if requested.
37349232Smckusick  */
37468319Scgd void
37549232Smckusick vwakeup(bp)
37649232Smckusick 	register struct buf *bp;
37749232Smckusick {
37849232Smckusick 	register struct vnode *vp;
37949232Smckusick 
38057810Smckusick 	bp->b_flags &= ~B_WRITEINPROG;
38149232Smckusick 	if (vp = bp->b_vp) {
38268319Scgd 		if (--vp->v_numoutput < 0)
38357042Smargo 			panic("vwakeup: neg numoutput");
38449232Smckusick 		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
38549232Smckusick 			if (vp->v_numoutput < 0)
38668319Scgd 				panic("vwakeup: neg numoutput 2");
38749232Smckusick 			vp->v_flag &= ~VBWAIT;
38849232Smckusick 			wakeup((caddr_t)&vp->v_numoutput);
38949232Smckusick 		}
39049232Smckusick 	}
39149232Smckusick }
39249232Smckusick 
39349232Smckusick /*
39449232Smckusick  * Flush out and invalidate all buffers associated with a vnode.
39549232Smckusick  * Called with the underlying object locked.
39649232Smckusick  */
39754442Smckusick int
39857792Smckusick vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
39949232Smckusick 	register struct vnode *vp;
40056459Smargo 	int flags;
40154442Smckusick 	struct ucred *cred;
40254442Smckusick 	struct proc *p;
40357792Smckusick 	int slpflag, slptimeo;
40449232Smckusick {
40549232Smckusick 	register struct buf *bp;
40649232Smckusick 	struct buf *nbp, *blist;
40754442Smckusick 	int s, error;
40849232Smckusick 
40956459Smargo 	if (flags & V_SAVE) {
41054442Smckusick 		if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
41154442Smckusick 			return (error);
41265260Smckusick 		if (vp->v_dirtyblkhd.lh_first != NULL)
41354442Smckusick 			panic("vinvalbuf: dirty bufs");
41454442Smckusick 	}
41549232Smckusick 	for (;;) {
41665260Smckusick 		if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
41756459Smargo 			while (blist && blist->b_lblkno < 0)
41865260Smckusick 				blist = blist->b_vnbufs.le_next;
41968319Scgd 		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
42056608Smckusick 		    (flags & V_SAVEMETA))
42156459Smargo 			while (blist && blist->b_lblkno < 0)
42265260Smckusick 				blist = blist->b_vnbufs.le_next;
42356459Smargo 		if (!blist)
42449232Smckusick 			break;
42556459Smargo 
42649232Smckusick 		for (bp = blist; bp; bp = nbp) {
42765260Smckusick 			nbp = bp->b_vnbufs.le_next;
42856459Smargo 			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
42956459Smargo 				continue;
43049232Smckusick 			s = splbio();
43149232Smckusick 			if (bp->b_flags & B_BUSY) {
43249232Smckusick 				bp->b_flags |= B_WANTED;
43357792Smckusick 				error = tsleep((caddr_t)bp,
43457792Smckusick 					slpflag | (PRIBIO + 1), "vinvalbuf",
43557792Smckusick 					slptimeo);
43649232Smckusick 				splx(s);
43757792Smckusick 				if (error)
43857792Smckusick 					return (error);
43949232Smckusick 				break;
44049232Smckusick 			}
44149232Smckusick 			bremfree(bp);
44249232Smckusick 			bp->b_flags |= B_BUSY;
44349232Smckusick 			splx(s);
44457792Smckusick 			/*
44557792Smckusick 			 * XXX Since there are no node locks for NFS, I believe
44657792Smckusick 			 * there is a slight chance that a delayed write will
44757792Smckusick 			 * occur while sleeping just above, so check for it.
44857792Smckusick 			 */
44957792Smckusick 			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
45057792Smckusick 				(void) VOP_BWRITE(bp);
45157792Smckusick 				break;
45257792Smckusick 			}
45356459Smargo 			bp->b_flags |= B_INVAL;
45449232Smckusick 			brelse(bp);
45549232Smckusick 		}
45649232Smckusick 	}
45756608Smckusick 	if (!(flags & V_SAVEMETA) &&
45865260Smckusick 	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
45949232Smckusick 		panic("vinvalbuf: flush failed");
46054442Smckusick 	return (0);
46149232Smckusick }
46249232Smckusick 
46349232Smckusick /*
46449232Smckusick  * Associate a buffer with a vnode.
46549232Smckusick  */
46668319Scgd void
46749232Smckusick bgetvp(vp, bp)
46849232Smckusick 	register struct vnode *vp;
46949232Smckusick 	register struct buf *bp;
47049232Smckusick {
47149232Smckusick 
47249232Smckusick 	if (bp->b_vp)
47349232Smckusick 		panic("bgetvp: not free");
47449232Smckusick 	VHOLD(vp);
47549232Smckusick 	bp->b_vp = vp;
47649232Smckusick 	if (vp->v_type == VBLK || vp->v_type == VCHR)
47749232Smckusick 		bp->b_dev = vp->v_rdev;
47849232Smckusick 	else
47949232Smckusick 		bp->b_dev = NODEV;
48049232Smckusick 	/*
48149232Smckusick 	 * Insert onto list for new vnode.
48249232Smckusick 	 */
48356608Smckusick 	bufinsvn(bp, &vp->v_cleanblkhd);
48449232Smckusick }
48549232Smckusick 
48649232Smckusick /*
48749232Smckusick  * Disassociate a buffer from a vnode.
48849232Smckusick  */
48968319Scgd void
49049232Smckusick brelvp(bp)
49149232Smckusick 	register struct buf *bp;
49249232Smckusick {
49349232Smckusick 	struct vnode *vp;
49449232Smckusick 
49549232Smckusick 	if (bp->b_vp == (struct vnode *) 0)
49649232Smckusick 		panic("brelvp: NULL");
49749232Smckusick 	/*
49849232Smckusick 	 * Delete from old vnode list, if on one.
49949232Smckusick 	 */
50065260Smckusick 	if (bp->b_vnbufs.le_next != NOLIST)
50156608Smckusick 		bufremvn(bp);
50249232Smckusick 	vp = bp->b_vp;
50349232Smckusick 	bp->b_vp = (struct vnode *) 0;
50449232Smckusick 	HOLDRELE(vp);
50549232Smckusick }
50649232Smckusick 
50749232Smckusick /*
50849232Smckusick  * Reassign a buffer from one vnode to another.
50949232Smckusick  * Used to assign file specific control information
51049232Smckusick  * (indirect blocks) to the vnode to which they belong.
51149232Smckusick  */
51268319Scgd void
51349232Smckusick reassignbuf(bp, newvp)
51449232Smckusick 	register struct buf *bp;
51549232Smckusick 	register struct vnode *newvp;
51649232Smckusick {
51765260Smckusick 	register struct buflists *listheadp;
51849232Smckusick 
51952655Smckusick 	if (newvp == NULL) {
52052655Smckusick 		printf("reassignbuf: NULL");
52152655Smckusick 		return;
52252655Smckusick 	}
52349232Smckusick 	/*
52449232Smckusick 	 * Delete from old vnode list, if on one.
52549232Smckusick 	 */
52665260Smckusick 	if (bp->b_vnbufs.le_next != NOLIST)
52756608Smckusick 		bufremvn(bp);
52849232Smckusick 	/*
52949232Smckusick 	 * If dirty, put on list of dirty buffers;
53049232Smckusick 	 * otherwise insert onto list of clean buffers.
53149232Smckusick 	 */
53249232Smckusick 	if (bp->b_flags & B_DELWRI)
53349232Smckusick 		listheadp = &newvp->v_dirtyblkhd;
53449232Smckusick 	else
53549232Smckusick 		listheadp = &newvp->v_cleanblkhd;
53656608Smckusick 	bufinsvn(bp, listheadp);
53749232Smckusick }
53849232Smckusick 
53949232Smckusick /*
54039433Smckusick  * Create a vnode for a block device.
54139433Smckusick  * Used for root filesystem, argdev, and swap areas.
54239433Smckusick  * Also used for memory file system special devices.
54339397Smckusick  */
54468319Scgd int
54539433Smckusick bdevvp(dev, vpp)
54639433Smckusick 	dev_t dev;
54739433Smckusick 	struct vnode **vpp;
54839433Smckusick {
54939433Smckusick 	register struct vnode *vp;
55039433Smckusick 	struct vnode *nvp;
55139433Smckusick 	int error;
55239433Smckusick 
55369378Smckusick 	if (dev == NODEV) {
55469378Smckusick 		*vpp = NULLVP;
55569378Smckusick 		return (ENODEV);
55669378Smckusick 	}
55753547Sheideman 	error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
55839433Smckusick 	if (error) {
55968319Scgd 		*vpp = NULLVP;
56039433Smckusick 		return (error);
56139433Smckusick 	}
56239433Smckusick 	vp = nvp;
56339433Smckusick 	vp->v_type = VBLK;
56439615Smckusick 	if (nvp = checkalias(vp, dev, (struct mount *)0)) {
56539433Smckusick 		vput(vp);
56639433Smckusick 		vp = nvp;
56739433Smckusick 	}
56839433Smckusick 	*vpp = vp;
56939433Smckusick 	return (0);
57039433Smckusick }
57139433Smckusick 
57239433Smckusick /*
57339433Smckusick  * Check to see if the new vnode represents a special device
57439433Smckusick  * for which we already have a vnode (either because of
57539433Smckusick  * bdevvp() or because of a different vnode representing
57639433Smckusick  * the same block device). If such an alias exists, deallocate
57739509Smckusick  * the existing contents and return the aliased vnode. The
57839433Smckusick  * caller is responsible for filling it with its new contents.
57939433Smckusick  */
58039433Smckusick struct vnode *
58139615Smckusick checkalias(nvp, nvp_rdev, mp)
58239433Smckusick 	register struct vnode *nvp;
58339615Smckusick 	dev_t nvp_rdev;
58439433Smckusick 	struct mount *mp;
58539433Smckusick {
58669408Smckusick 	struct proc *p = curproc;	/* XXX */
58769408Smckusick 	struct vnode *vp;
58839615Smckusick 	struct vnode **vpp;
58939433Smckusick 
59039433Smckusick 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
59141400Smckusick 		return (NULLVP);
59239615Smckusick 
59339615Smckusick 	vpp = &speclisth[SPECHASH(nvp_rdev)];
59439433Smckusick loop:
59569408Smckusick 	simple_lock(&spechash_slock);
59639615Smckusick 	for (vp = *vpp; vp; vp = vp->v_specnext) {
59739615Smckusick 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
59839433Smckusick 			continue;
59939615Smckusick 		/*
60039615Smckusick 		 * Alias, but not in use, so flush it out.
60139615Smckusick 		 */
60269408Smckusick 		simple_lock(&vp->v_interlock);
60339809Smckusick 		if (vp->v_usecount == 0) {
60469408Smckusick 			simple_unlock(&spechash_slock);
60569408Smckusick 			vgonel(vp, p);
60639615Smckusick 			goto loop;
60739615Smckusick 		}
60869408Smckusick 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
60969408Smckusick 			simple_unlock(&spechash_slock);
61039633Smckusick 			goto loop;
61169408Smckusick 		}
61239433Smckusick 		break;
61339433Smckusick 	}
61439615Smckusick 	if (vp == NULL || vp->v_tag != VT_NON) {
61539615Smckusick 		MALLOC(nvp->v_specinfo, struct specinfo *,
61639615Smckusick 			sizeof(struct specinfo), M_VNODE, M_WAITOK);
61739615Smckusick 		nvp->v_rdev = nvp_rdev;
61839809Smckusick 		nvp->v_hashchain = vpp;
61939615Smckusick 		nvp->v_specnext = *vpp;
62042152Smckusick 		nvp->v_specflags = 0;
62169408Smckusick 		simple_unlock(&spechash_slock);
62239615Smckusick 		*vpp = nvp;
62369408Smckusick 		if (vp != NULLVP) {
62440640Smckusick 			nvp->v_flag |= VALIASED;
62540640Smckusick 			vp->v_flag |= VALIASED;
62640640Smckusick 			vput(vp);
62740640Smckusick 		}
62841400Smckusick 		return (NULLVP);
62939433Smckusick 	}
63069408Smckusick 	simple_unlock(&spechash_slock);
63169408Smckusick 	VOP_UNLOCK(vp, 0, p);
63269408Smckusick 	simple_lock(&vp->v_interlock);
63369408Smckusick 	vclean(vp, 0, p);
63439433Smckusick 	vp->v_op = nvp->v_op;
63539433Smckusick 	vp->v_tag = nvp->v_tag;
63639433Smckusick 	nvp->v_type = VNON;
63739433Smckusick 	insmntque(vp, mp);
63839433Smckusick 	return (vp);
63939433Smckusick }
64039433Smckusick 
64139433Smckusick /*
64239433Smckusick  * Grab a particular vnode from the free list, increment its
64339433Smckusick  * reference count and lock it. The vnode lock bit is set the
64439433Smckusick  * vnode is being eliminated in vgone. The process is awakened
64539433Smckusick  * when the transition is completed, and an error returned to
64639433Smckusick  * indicate that the vnode is no longer usable (possibly having
64739433Smckusick  * been changed to a new file system type).
64839433Smckusick  */
64968319Scgd int
65069408Smckusick vget(vp, flags, p)
65169408Smckusick 	struct vnode *vp;
65269408Smckusick 	int flags;
65369408Smckusick 	struct proc *p;
65439397Smckusick {
65569546Smckusick 	int error;
65639397Smckusick 
65766897Smckusick 	/*
65866897Smckusick 	 * If the vnode is in the process of being cleaned out for
65966897Smckusick 	 * another use, we wait for the cleaning to finish and then
66069408Smckusick 	 * return failure. Cleaning is determined by checking that
66169408Smckusick 	 * the VXLOCK flag is set.
66266897Smckusick 	 */
66369408Smckusick 	if ((flags & LK_INTERLOCK) == 0)
66469408Smckusick 		simple_lock(&vp->v_interlock);
66569408Smckusick 	if (vp->v_flag & VXLOCK) {
66639433Smckusick 		vp->v_flag |= VXWANT;
66769408Smckusick 		simple_unlock(&vp->v_interlock);
66868319Scgd 		tsleep((caddr_t)vp, PINOD, "vget", 0);
66969408Smckusick 		return (ENOENT);
67039433Smckusick 	}
67169408Smckusick 	if (vp->v_usecount == 0) {
67269408Smckusick 		simple_lock(&vnode_free_list_slock);
67365260Smckusick 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
67469408Smckusick 		simple_unlock(&vnode_free_list_slock);
67569408Smckusick 	}
67659450Smckusick 	vp->v_usecount++;
67769546Smckusick 	if (flags & LK_TYPE_MASK) {
67869546Smckusick 		if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
67969546Smckusick 			vrele(vp);
68069546Smckusick 		return (error);
68169546Smckusick 	}
68269408Smckusick 	simple_unlock(&vp->v_interlock);
68339433Smckusick 	return (0);
68439397Smckusick }
68539397Smckusick 
68639397Smckusick /*
68769408Smckusick  * Stubs to use when there is no locking to be done on the underlying object.
68869408Smckusick  *
68969408Smckusick  * Getting a lock just clears the interlock if necessary.
69039397Smckusick  */
69169408Smckusick int
69269408Smckusick vop_nolock(ap)
69369408Smckusick 	struct vop_lock_args /* {
69469408Smckusick 		struct vnode *a_vp;
69569408Smckusick 		int a_flags;
69669408Smckusick 		struct proc *a_p;
69769408Smckusick 	} */ *ap;
69869408Smckusick {
69969408Smckusick 	struct vnode *vp = ap->a_vp;
70069408Smckusick 
70169408Smckusick 	/*
70269408Smckusick 	 * Since we are not using the lock manager, we must clear
70369408Smckusick 	 * the interlock here.
70469408Smckusick 	 */
70569408Smckusick 	if (ap->a_flags & LK_INTERLOCK)
70669408Smckusick 		simple_unlock(&vp->v_interlock);
70769408Smckusick 	return (0);
70869408Smckusick }
70969408Smckusick 
71069408Smckusick /*
71169408Smckusick  * Unlock has nothing to do.
71269408Smckusick  */
71369408Smckusick int
71469408Smckusick vop_nounlock(ap)
71569408Smckusick 	struct vop_unlock_args /* {
71669408Smckusick 		struct vnode *a_vp;
71769408Smckusick 		int a_flags;
71869408Smckusick 		struct proc *a_p;
71969408Smckusick 	} */ *ap;
72069408Smckusick {
72169408Smckusick 
72269408Smckusick 	return (0);
72369408Smckusick }
72469408Smckusick 
72569408Smckusick /*
72669408Smckusick  * Nothing is ever locked.
72769408Smckusick  */
72869408Smckusick int
72969408Smckusick vop_noislocked(ap)
73069408Smckusick 	struct vop_islocked_args /* {
73169408Smckusick 		struct vnode *a_vp;
73269408Smckusick 	} */ *ap;
73369408Smckusick {
73469408Smckusick 
73569408Smckusick 	return (0);
73669408Smckusick }
73769408Smckusick 
73869408Smckusick /*
73969408Smckusick  * Vnode reference.
74069408Smckusick  */
74168319Scgd void
74268319Scgd vref(vp)
74339397Smckusick 	struct vnode *vp;
74439397Smckusick {
74539397Smckusick 
74669408Smckusick 	simple_lock(&vp->v_interlock);
74759450Smckusick 	if (vp->v_usecount <= 0)
74859450Smckusick 		panic("vref used where vget required");
74939809Smckusick 	vp->v_usecount++;
75069408Smckusick 	simple_unlock(&vp->v_interlock);
75139397Smckusick }
75239397Smckusick 
75339397Smckusick /*
75439397Smckusick  * vput(), just unlock and vrele()
75539397Smckusick  */
75668319Scgd void
75768319Scgd vput(vp)
75869408Smckusick 	struct vnode *vp;
75939397Smckusick {
76069408Smckusick 	struct proc *p = curproc;	/* XXX */
76152416Storek 
76269523Spendry #ifdef DIGANOSTIC
76369523Spendry 	if (vp == NULL)
76469523Spendry 		panic("vput: null vp");
76569523Spendry #endif
76669523Spendry 	simple_lock(&vp->v_interlock);
76769523Spendry 	vp->v_usecount--;
76869523Spendry 	if (vp->v_usecount > 0) {
76969523Spendry 		simple_unlock(&vp->v_interlock);
77069523Spendry 		VOP_UNLOCK(vp, 0, p);
77169523Spendry 		return;
77269523Spendry 	}
77369523Spendry #ifdef DIAGNOSTIC
77469523Spendry 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
77569523Spendry 		vprint("vput: bad ref count", vp);
77669523Spendry 		panic("vput: ref cnt");
77769523Spendry 	}
77869523Spendry #endif
77969523Spendry 	/*
78069523Spendry 	 * insert at tail of LRU list
78169523Spendry 	 */
78269523Spendry 	simple_lock(&vnode_free_list_slock);
78369523Spendry 	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
78469523Spendry 	simple_unlock(&vnode_free_list_slock);
78569523Spendry 	simple_unlock(&vp->v_interlock);
78669523Spendry 	VOP_INACTIVE(vp, p);
78739397Smckusick }
78839397Smckusick 
78939397Smckusick /*
79039397Smckusick  * Vnode release.
79139397Smckusick  * If count drops to zero, call inactive routine and return to freelist.
79239397Smckusick  */
79368319Scgd void
79468319Scgd vrele(vp)
79569408Smckusick 	struct vnode *vp;
79639397Smckusick {
79769408Smckusick 	struct proc *p = curproc;	/* XXX */
79839397Smckusick 
79950109Smckusick #ifdef DIAGNOSTIC
80039397Smckusick 	if (vp == NULL)
80139433Smckusick 		panic("vrele: null vp");
80250109Smckusick #endif
80369408Smckusick 	simple_lock(&vp->v_interlock);
80439809Smckusick 	vp->v_usecount--;
80569408Smckusick 	if (vp->v_usecount > 0) {
80669408Smckusick 		simple_unlock(&vp->v_interlock);
80739397Smckusick 		return;
80869408Smckusick 	}
80950109Smckusick #ifdef DIAGNOSTIC
81069408Smckusick 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
81150109Smckusick 		vprint("vrele: bad ref count", vp);
81250109Smckusick 		panic("vrele: ref cnt");
81350109Smckusick 	}
81450109Smckusick #endif
81555468Smckusick 	/*
81655468Smckusick 	 * insert at tail of LRU list
81755468Smckusick 	 */
81869408Smckusick 	simple_lock(&vnode_free_list_slock);
81965260Smckusick 	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
82069408Smckusick 	simple_unlock(&vnode_free_list_slock);
82169408Smckusick 	if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0)
82269408Smckusick 		VOP_INACTIVE(vp, p);
82339397Smckusick }
82439433Smckusick 
82569408Smckusick #ifdef DIAGNOSTIC
82639433Smckusick /*
82739809Smckusick  * Page or buffer structure gets a reference.
82839809Smckusick  */
82968319Scgd void
83068319Scgd vhold(vp)
83139809Smckusick 	register struct vnode *vp;
83239809Smckusick {
83339809Smckusick 
83469408Smckusick 	simple_lock(&vp->v_interlock);
83539809Smckusick 	vp->v_holdcnt++;
83669408Smckusick 	simple_unlock(&vp->v_interlock);
83739809Smckusick }
83839809Smckusick 
83939809Smckusick /*
84039809Smckusick  * Page or buffer structure frees a reference.
84139809Smckusick  */
84268319Scgd void
84368319Scgd holdrele(vp)
84439809Smckusick 	register struct vnode *vp;
84539809Smckusick {
84639809Smckusick 
84769408Smckusick 	simple_lock(&vp->v_interlock);
84839809Smckusick 	if (vp->v_holdcnt <= 0)
84939809Smckusick 		panic("holdrele: holdcnt");
85039809Smckusick 	vp->v_holdcnt--;
85169408Smckusick 	simple_unlock(&vp->v_interlock);
85239809Smckusick }
85369408Smckusick #endif /* DIAGNOSTIC */
85439809Smckusick 
85539809Smckusick /*
85639509Smckusick  * Remove any vnodes in the vnode table belonging to mount point mp.
85739509Smckusick  *
85839509Smckusick  * If MNT_NOFORCE is specified, there should not be any active ones,
85939509Smckusick  * return error if any are found (nb: this is a user error, not a
86039509Smckusick  * system error). If MNT_FORCE is specified, detach any active vnodes
86139509Smckusick  * that are found.
86239509Smckusick  */
86365679Shibler #ifdef DIAGNOSTIC
86460930Smckusick int busyprt = 0;	/* print out busy vnodes */
86560930Smckusick struct ctldebug debug1 = { "busyprt", &busyprt };
86665679Shibler #endif
86739509Smckusick 
86868319Scgd int
86939509Smckusick vflush(mp, skipvp, flags)
87039509Smckusick 	struct mount *mp;
87139509Smckusick 	struct vnode *skipvp;
87239509Smckusick 	int flags;
87339509Smckusick {
87469408Smckusick 	struct proc *p = curproc;	/* XXX */
87569408Smckusick 	struct vnode *vp, *nvp;
87639509Smckusick 	int busy = 0;
87739509Smckusick 
87869408Smckusick 	simple_lock(&mntvnode_slock);
87941421Smckusick loop:
88065260Smckusick 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
88141421Smckusick 		if (vp->v_mount != mp)
88241421Smckusick 			goto loop;
88365260Smckusick 		nvp = vp->v_mntvnodes.le_next;
88439509Smckusick 		/*
88539509Smckusick 		 * Skip over a selected vnode.
88639509Smckusick 		 */
88739509Smckusick 		if (vp == skipvp)
88839509Smckusick 			continue;
88969408Smckusick 
89069408Smckusick 		simple_lock(&vp->v_interlock);
89139509Smckusick 		/*
89241300Smckusick 		 * Skip over a vnodes marked VSYSTEM.
89341300Smckusick 		 */
89469408Smckusick 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
89569408Smckusick 			simple_unlock(&vp->v_interlock);
89641300Smckusick 			continue;
89769408Smckusick 		}
89841300Smckusick 		/*
89957040Smckusick 		 * If WRITECLOSE is set, only flush out regular file
90057040Smckusick 		 * vnodes open for writing.
90157040Smckusick 		 */
90257040Smckusick 		if ((flags & WRITECLOSE) &&
90369408Smckusick 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
90469408Smckusick 			simple_unlock(&vp->v_interlock);
90557040Smckusick 			continue;
90669408Smckusick 		}
90757040Smckusick 		/*
90839809Smckusick 		 * With v_usecount == 0, all we need to do is clear
90939509Smckusick 		 * out the vnode data structures and we are done.
91039509Smckusick 		 */
91139809Smckusick 		if (vp->v_usecount == 0) {
91269408Smckusick 			simple_unlock(&mntvnode_slock);
91369408Smckusick 			vgonel(vp, p);
91469408Smckusick 			simple_lock(&mntvnode_slock);
91539509Smckusick 			continue;
91639509Smckusick 		}
91739509Smckusick 		/*
91857040Smckusick 		 * If FORCECLOSE is set, forcibly close the vnode.
91939509Smckusick 		 * For block or character devices, revert to an
92039509Smckusick 		 * anonymous device. For all other files, just kill them.
92139509Smckusick 		 */
92241300Smckusick 		if (flags & FORCECLOSE) {
92369408Smckusick 			simple_unlock(&mntvnode_slock);
92439509Smckusick 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
92569408Smckusick 				vgonel(vp, p);
92639509Smckusick 			} else {
92769408Smckusick 				vclean(vp, 0, p);
92853547Sheideman 				vp->v_op = spec_vnodeop_p;
92939509Smckusick 				insmntque(vp, (struct mount *)0);
93039509Smckusick 			}
93169408Smckusick 			simple_lock(&mntvnode_slock);
93239509Smckusick 			continue;
93339509Smckusick 		}
93465679Shibler #ifdef DIAGNOSTIC
93539509Smckusick 		if (busyprt)
93639667Smckusick 			vprint("vflush: busy vnode", vp);
93765679Shibler #endif
93869408Smckusick 		simple_unlock(&vp->v_interlock);
93939509Smckusick 		busy++;
94039509Smckusick 	}
94169408Smckusick 	simple_unlock(&mntvnode_slock);
94239509Smckusick 	if (busy)
94339509Smckusick 		return (EBUSY);
94439509Smckusick 	return (0);
94539509Smckusick }
94639509Smckusick 
94739509Smckusick /*
94839433Smckusick  * Disassociate the underlying file system from a vnode.
94969408Smckusick  * The vnode interlock is held on entry.
95039433Smckusick  */
95169408Smckusick static void
95269408Smckusick vclean(vp, flags, p)
95369408Smckusick 	struct vnode *vp;
95445118Smckusick 	int flags;
95569408Smckusick 	struct proc *p;
95639433Smckusick {
95739484Smckusick 	int active;
95839433Smckusick 
95939484Smckusick 	/*
96039484Smckusick 	 * Check to see if the vnode is in use.
96139667Smckusick 	 * If so we have to reference it before we clean it out
96239667Smckusick 	 * so that its count cannot fall to zero and generate a
96339667Smckusick 	 * race against ourselves to recycle it.
96439484Smckusick 	 */
96539809Smckusick 	if (active = vp->v_usecount)
96669408Smckusick 		vp->v_usecount++;
96739484Smckusick 	/*
96869408Smckusick 	 * Prevent the vnode from being recycled or
96969408Smckusick 	 * brought into use while we clean it out.
97069408Smckusick 	 */
97169408Smckusick 	if (vp->v_flag & VXLOCK)
97269408Smckusick 		panic("vclean: deadlock");
97369408Smckusick 	vp->v_flag |= VXLOCK;
97469408Smckusick 	/*
97556805Smckusick 	 * Even if the count is zero, the VOP_INACTIVE routine may still
97656805Smckusick 	 * have the object locked while it cleans it out. The VOP_LOCK
97756805Smckusick 	 * ensures that the VOP_INACTIVE routine is done with its work.
97856805Smckusick 	 * For active vnodes, it ensures that no other activity can
97956805Smckusick 	 * occur while the underlying object is being cleaned out.
98056805Smckusick 	 */
98169408Smckusick 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
98256805Smckusick 	/*
98356805Smckusick 	 * Clean out any buffers associated with the vnode.
98439667Smckusick 	 */
98541300Smckusick 	if (flags & DOCLOSE)
98669559Spendry 		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
98739667Smckusick 	/*
98856805Smckusick 	 * If purging an active vnode, it must be closed and
98969408Smckusick 	 * deactivated before being reclaimed. Note that the
99069408Smckusick 	 * VOP_INACTIVE will unlock the vnode.
99139433Smckusick 	 */
99239484Smckusick 	if (active) {
99356805Smckusick 		if (flags & DOCLOSE)
99469408Smckusick 			VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
99569408Smckusick 		VOP_INACTIVE(vp, p);
99669408Smckusick 	} else {
99769408Smckusick 		/*
99869408Smckusick 		 * Any other processes trying to obtain this lock must first
99969408Smckusick 		 * wait for VXLOCK to clear, then call the new lock operation.
100069408Smckusick 		 */
100169408Smckusick 		VOP_UNLOCK(vp, 0, p);
100239433Smckusick 	}
100339433Smckusick 	/*
100439433Smckusick 	 * Reclaim the vnode.
100539433Smckusick 	 */
100669408Smckusick 	if (VOP_RECLAIM(vp, p))
100739433Smckusick 		panic("vclean: cannot reclaim");
100839484Smckusick 	if (active)
100939484Smckusick 		vrele(vp);
101068784Smckusick 	cache_purge(vp);
101153580Sheideman 
101239433Smckusick 	/*
101356805Smckusick 	 * Done with purge, notify sleepers of the grim news.
101439433Smckusick 	 */
101556805Smckusick 	vp->v_op = dead_vnodeop_p;
101656805Smckusick 	vp->v_tag = VT_NON;
101739433Smckusick 	vp->v_flag &= ~VXLOCK;
101839433Smckusick 	if (vp->v_flag & VXWANT) {
101939433Smckusick 		vp->v_flag &= ~VXWANT;
102039433Smckusick 		wakeup((caddr_t)vp);
102139433Smckusick 	}
102239433Smckusick }
102339433Smckusick 
102439433Smckusick /*
102539633Smckusick  * Eliminate all activity associated with  the requested vnode
102639633Smckusick  * and with all vnodes aliased to the requested vnode.
102739633Smckusick  */
102868424Smckusick int
102968424Smckusick vop_revoke(ap)
103068424Smckusick 	struct vop_revoke_args /* {
103168424Smckusick 		struct vnode *a_vp;
103268424Smckusick 		int a_flags;
103368424Smckusick 	} */ *ap;
103439633Smckusick {
103569408Smckusick 	struct vnode *vp, *vq;
103669408Smckusick 	struct proc *p = curproc;	/* XXX */
103739633Smckusick 
103869408Smckusick #ifdef DIAGNOSTIC
103969408Smckusick 	if ((ap->a_flags & REVOKEALL) == 0)
104069408Smckusick 		panic("vop_revoke");
104169408Smckusick #endif
104269408Smckusick 
104368424Smckusick 	vp = ap->a_vp;
104469408Smckusick 	simple_lock(&vp->v_interlock);
104569408Smckusick 
104669408Smckusick 	if (vp->v_flag & VALIASED) {
104740665Smckusick 		/*
104840665Smckusick 		 * If a vgone (or vclean) is already in progress,
104940665Smckusick 		 * wait until it is done and return.
105040665Smckusick 		 */
105140665Smckusick 		if (vp->v_flag & VXLOCK) {
105240665Smckusick 			vp->v_flag |= VXWANT;
105369408Smckusick 			simple_unlock(&vp->v_interlock);
105468424Smckusick 			tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
105568424Smckusick 			return (0);
105639633Smckusick 		}
105740665Smckusick 		/*
105840665Smckusick 		 * Ensure that vp will not be vgone'd while we
105940665Smckusick 		 * are eliminating its aliases.
106040665Smckusick 		 */
106140665Smckusick 		vp->v_flag |= VXLOCK;
106269408Smckusick 		simple_unlock(&vp->v_interlock);
106340665Smckusick 		while (vp->v_flag & VALIASED) {
106469408Smckusick 			simple_lock(&spechash_slock);
106540665Smckusick 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
106640665Smckusick 				if (vq->v_rdev != vp->v_rdev ||
106740665Smckusick 				    vq->v_type != vp->v_type || vp == vq)
106840665Smckusick 					continue;
106969408Smckusick 				simple_unlock(&spechash_slock);
107040665Smckusick 				vgone(vq);
107140665Smckusick 				break;
107240665Smckusick 			}
107369408Smckusick 			if (vq == NULLVP)
107469408Smckusick 				simple_unlock(&spechash_slock);
107540665Smckusick 		}
107640665Smckusick 		/*
107740665Smckusick 		 * Remove the lock so that vgone below will
107840665Smckusick 		 * really eliminate the vnode after which time
107940665Smckusick 		 * vgone will awaken any sleepers.
108040665Smckusick 		 */
108169408Smckusick 		simple_lock(&vp->v_interlock);
108240665Smckusick 		vp->v_flag &= ~VXLOCK;
108339633Smckusick 	}
108469408Smckusick 	vgonel(vp, p);
108568424Smckusick 	return (0);
108639633Smckusick }
108739633Smckusick 
108839633Smckusick /*
108969408Smckusick  * Recycle an unused vnode to the front of the free list.
109069408Smckusick  * Release the passed interlock if the vnode will be recycled.
109169408Smckusick  */
109269408Smckusick int
109369408Smckusick vrecycle(vp, inter_lkp, p)
109469408Smckusick 	struct vnode *vp;
109569408Smckusick 	struct simplelock *inter_lkp;
109669408Smckusick 	struct proc *p;
109769408Smckusick {
109869408Smckusick 
109969408Smckusick 	simple_lock(&vp->v_interlock);
110069408Smckusick 	if (vp->v_usecount == 0) {
110169408Smckusick 		if (inter_lkp)
110269408Smckusick 			simple_unlock(inter_lkp);
110369408Smckusick 		vgonel(vp, p);
110469408Smckusick 		return (1);
110569408Smckusick 	}
110669408Smckusick 	simple_unlock(&vp->v_interlock);
110769408Smckusick 	return (0);
110869408Smckusick }
110969408Smckusick 
111069408Smckusick /*
111139433Smckusick  * Eliminate all activity associated with a vnode
111239433Smckusick  * in preparation for reuse.
111339433Smckusick  */
111468319Scgd void
111568319Scgd vgone(vp)
111669408Smckusick 	struct vnode *vp;
111739433Smckusick {
111869408Smckusick 	struct proc *p = curproc;	/* XXX */
111969408Smckusick 
112069408Smckusick 	simple_lock(&vp->v_interlock);
112169408Smckusick 	vgonel(vp, p);
112269408Smckusick }
112369408Smckusick 
112469408Smckusick /*
112569408Smckusick  * vgone, with the vp interlock held.
112669408Smckusick  */
112769408Smckusick void
112869408Smckusick vgonel(vp, p)
112969408Smckusick 	struct vnode *vp;
113069408Smckusick 	struct proc *p;
113169408Smckusick {
113269408Smckusick 	struct vnode *vq;
113339615Smckusick 	struct vnode *vx;
113439433Smckusick 
113539433Smckusick 	/*
113640548Smckusick 	 * If a vgone (or vclean) is already in progress,
113740548Smckusick 	 * wait until it is done and return.
113840548Smckusick 	 */
113940548Smckusick 	if (vp->v_flag & VXLOCK) {
114040548Smckusick 		vp->v_flag |= VXWANT;
114169408Smckusick 		simple_unlock(&vp->v_interlock);
114268319Scgd 		tsleep((caddr_t)vp, PINOD, "vgone", 0);
114340548Smckusick 		return;
114440548Smckusick 	}
114540548Smckusick 	/*
114639433Smckusick 	 * Clean out the filesystem specific data.
114739433Smckusick 	 */
114869408Smckusick 	vclean(vp, DOCLOSE, p);
114939433Smckusick 	/*
115039433Smckusick 	 * Delete from old mount point vnode list, if on one.
115139433Smckusick 	 */
115269408Smckusick 	if (vp->v_mount != NULL)
115369408Smckusick 		insmntque(vp, (struct mount *)0);
115439433Smckusick 	/*
115568592Smckusick 	 * If special device, remove it from special device alias list
115668592Smckusick 	 * if it is on one.
115739433Smckusick 	 */
115868592Smckusick 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
115969408Smckusick 		simple_lock(&spechash_slock);
116039809Smckusick 		if (*vp->v_hashchain == vp) {
116139809Smckusick 			*vp->v_hashchain = vp->v_specnext;
116239433Smckusick 		} else {
116339809Smckusick 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
116439615Smckusick 				if (vq->v_specnext != vp)
116539433Smckusick 					continue;
116639615Smckusick 				vq->v_specnext = vp->v_specnext;
116739433Smckusick 				break;
116839433Smckusick 			}
116939615Smckusick 			if (vq == NULL)
117039433Smckusick 				panic("missing bdev");
117139433Smckusick 		}
117239615Smckusick 		if (vp->v_flag & VALIASED) {
117352416Storek 			vx = NULL;
117439809Smckusick 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
117540108Smckusick 				if (vq->v_rdev != vp->v_rdev ||
117640108Smckusick 				    vq->v_type != vp->v_type)
117739615Smckusick 					continue;
117852416Storek 				if (vx)
117952416Storek 					break;
118039615Smckusick 				vx = vq;
118139615Smckusick 			}
118252416Storek 			if (vx == NULL)
118339615Smckusick 				panic("missing alias");
118452416Storek 			if (vq == NULL)
118539615Smckusick 				vx->v_flag &= ~VALIASED;
118639615Smckusick 			vp->v_flag &= ~VALIASED;
118739615Smckusick 		}
118869408Smckusick 		simple_unlock(&spechash_slock);
118939615Smckusick 		FREE(vp->v_specinfo, M_VNODE);
119039615Smckusick 		vp->v_specinfo = NULL;
119139433Smckusick 	}
119239433Smckusick 	/*
119356932Smckusick 	 * If it is on the freelist and not already at the head,
119465505Smckusick 	 * move it to the head of the list. The test of the back
119565505Smckusick 	 * pointer and the reference count of zero is because
119665505Smckusick 	 * it will be removed from the free list by getnewvnode,
119765505Smckusick 	 * but will not have its reference count incremented until
119865505Smckusick 	 * after calling vgone. If the reference count were
119965505Smckusick 	 * incremented first, vgone would (incorrectly) try to
120065505Smckusick 	 * close the previous instance of the underlying object.
120165505Smckusick 	 * So, the back pointer is explicitly set to `0xdeadb' in
120265505Smckusick 	 * getnewvnode after removing it from the freelist to ensure
120365505Smckusick 	 * that we do not try to move it here.
120439433Smckusick 	 */
120569408Smckusick 	if (vp->v_usecount == 0) {
120669408Smckusick 		simple_lock(&vnode_free_list_slock);
120769408Smckusick 		if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
120869408Smckusick 		    vnode_free_list.tqh_first != vp) {
120969408Smckusick 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
121069408Smckusick 			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
121169408Smckusick 		}
121269408Smckusick 		simple_unlock(&vnode_free_list_slock);
121339433Smckusick 	}
121439484Smckusick 	vp->v_type = VBAD;
121539433Smckusick }
121639633Smckusick 
121739633Smckusick /*
121839821Smckusick  * Lookup a vnode by device number.
121939821Smckusick  */
122068319Scgd int
122139821Smckusick vfinddev(dev, type, vpp)
122239821Smckusick 	dev_t dev;
122339821Smckusick 	enum vtype type;
122439821Smckusick 	struct vnode **vpp;
122539821Smckusick {
122669408Smckusick 	struct vnode *vp;
122769408Smckusick 	int rc = 0;
122839821Smckusick 
122969408Smckusick 	simple_lock(&spechash_slock);
123039821Smckusick 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
123139821Smckusick 		if (dev != vp->v_rdev || type != vp->v_type)
123239821Smckusick 			continue;
123339821Smckusick 		*vpp = vp;
123469408Smckusick 		rc = 1;
123569408Smckusick 		break;
123639821Smckusick 	}
123769408Smckusick 	simple_unlock(&spechash_slock);
123869408Smckusick 	return (rc);
123939821Smckusick }
124039821Smckusick 
124139821Smckusick /*
124239633Smckusick  * Calculate the total number of references to a special device.
124339633Smckusick  */
124468319Scgd int
124539633Smckusick vcount(vp)
124669408Smckusick 	struct vnode *vp;
124739633Smckusick {
124869408Smckusick 	struct vnode *vq, *vnext;
124939633Smckusick 	int count;
125039633Smckusick 
125166742Smckusick loop:
125239633Smckusick 	if ((vp->v_flag & VALIASED) == 0)
125339809Smckusick 		return (vp->v_usecount);
125469408Smckusick 	simple_lock(&spechash_slock);
125566742Smckusick 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
125666742Smckusick 		vnext = vq->v_specnext;
125740108Smckusick 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
125839633Smckusick 			continue;
125939633Smckusick 		/*
126039633Smckusick 		 * Alias, but not in use, so flush it out.
126139633Smckusick 		 */
126266742Smckusick 		if (vq->v_usecount == 0 && vq != vp) {
126369408Smckusick 			simple_unlock(&spechash_slock);
126439633Smckusick 			vgone(vq);
126539633Smckusick 			goto loop;
126639633Smckusick 		}
126739809Smckusick 		count += vq->v_usecount;
126839633Smckusick 	}
126969408Smckusick 	simple_unlock(&spechash_slock);
127039633Smckusick 	return (count);
127139633Smckusick }
127239667Smckusick 
127339667Smckusick /*
127439667Smckusick  * Print out a description of a vnode.
127539667Smckusick  */
127639667Smckusick static char *typename[] =
127740286Smckusick    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
127839667Smckusick 
127968171Scgd void
128039667Smckusick vprint(label, vp)
128139667Smckusick 	char *label;
128239667Smckusick 	register struct vnode *vp;
128339667Smckusick {
128439913Smckusick 	char buf[64];
128539667Smckusick 
128639667Smckusick 	if (label != NULL)
128739667Smckusick 		printf("%s: ", label);
128850109Smckusick 	printf("type %s, usecount %d, writecount %d, refcount %d,",
128950109Smckusick 		typename[vp->v_type], vp->v_usecount, vp->v_writecount,
129050109Smckusick 		vp->v_holdcnt);
129139913Smckusick 	buf[0] = '\0';
129239913Smckusick 	if (vp->v_flag & VROOT)
129339913Smckusick 		strcat(buf, "|VROOT");
129439913Smckusick 	if (vp->v_flag & VTEXT)
129539913Smckusick 		strcat(buf, "|VTEXT");
129641300Smckusick 	if (vp->v_flag & VSYSTEM)
129741300Smckusick 		strcat(buf, "|VSYSTEM");
129841300Smckusick 	if (vp->v_flag & VXLOCK)
129941300Smckusick 		strcat(buf, "|VXLOCK");
130041300Smckusick 	if (vp->v_flag & VXWANT)
130141300Smckusick 		strcat(buf, "|VXWANT");
130241300Smckusick 	if (vp->v_flag & VBWAIT)
130341300Smckusick 		strcat(buf, "|VBWAIT");
130439913Smckusick 	if (vp->v_flag & VALIASED)
130539913Smckusick 		strcat(buf, "|VALIASED");
130639913Smckusick 	if (buf[0] != '\0')
130739913Smckusick 		printf(" flags (%s)", &buf[1]);
130865260Smckusick 	if (vp->v_data == NULL) {
130965260Smckusick 		printf("\n");
131065260Smckusick 	} else {
131165260Smckusick 		printf("\n\t");
131265260Smckusick 		VOP_PRINT(vp);
131365260Smckusick 	}
131439667Smckusick }
131541110Smarc 
131649691Smckusick #ifdef DEBUG
131749691Smckusick /*
131849691Smckusick  * List all of the locked vnodes in the system.
131949691Smckusick  * Called when debugging the kernel.
132049691Smckusick  */
132168319Scgd void
132249691Smckusick printlockedvnodes()
132349691Smckusick {
1324*69578Smckusick 	struct proc *p = curproc;	/* XXX */
1325*69578Smckusick 	struct mount *mp, *nmp;
1326*69578Smckusick 	struct vnode *vp;
132749691Smckusick 
132849691Smckusick 	printf("Locked vnodes\n");
1329*69578Smckusick 	simple_lock(&mountlist_slock);
1330*69578Smckusick 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1331*69578Smckusick 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1332*69578Smckusick 			nmp = mp->mnt_list.cqe_next;
1333*69578Smckusick 			continue;
1334*69578Smckusick 		}
133565260Smckusick 		for (vp = mp->mnt_vnodelist.lh_first;
133665260Smckusick 		     vp != NULL;
133769325Smckusick 		     vp = vp->v_mntvnodes.le_next) {
133849691Smckusick 			if (VOP_ISLOCKED(vp))
133949691Smckusick 				vprint((char *)0, vp);
134069325Smckusick 		}
1341*69578Smckusick 		simple_lock(&mountlist_slock);
1342*69578Smckusick 		nmp = mp->mnt_list.cqe_next;
1343*69578Smckusick 		vfs_unbusy(mp, p);
134465260Smckusick 	}
1345*69578Smckusick 	simple_unlock(&mountlist_slock);
134649691Smckusick }
134749691Smckusick #endif
134849691Smckusick 
134968659Smckusick /*
135068659Smckusick  * Top level filesystem related information gathering.
135168659Smckusick  */
135268659Smckusick int
135368659Smckusick vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
135468659Smckusick 	int *name;
135568659Smckusick 	u_int namelen;
135668659Smckusick 	void *oldp;
135768659Smckusick 	size_t *oldlenp;
135868659Smckusick 	void *newp;
135968659Smckusick 	size_t newlen;
136068659Smckusick 	struct proc *p;
136168659Smckusick {
136268659Smckusick 	struct ctldebug *cdp;
136368659Smckusick 	struct vfsconf *vfsp;
136468659Smckusick 
136568659Smckusick 	/* all sysctl names at this level are at least name and field */
136668659Smckusick 	if (namelen < 2)
136768659Smckusick 		return (ENOTDIR);		/* overloaded */
136868659Smckusick 	if (name[0] != VFS_GENERIC) {
136968659Smckusick 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
137068659Smckusick 			if (vfsp->vfc_typenum == name[0])
137168659Smckusick 				break;
137268659Smckusick 		if (vfsp == NULL)
137368659Smckusick 			return (EOPNOTSUPP);
137468659Smckusick 		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
137568659Smckusick 		    oldp, oldlenp, newp, newlen, p));
137668659Smckusick 	}
137768659Smckusick 	switch (name[1]) {
137868659Smckusick 	case VFS_MAXTYPENUM:
137968659Smckusick 		return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
138068659Smckusick 	case VFS_CONF:
138168659Smckusick 		if (namelen < 3)
138268659Smckusick 			return (ENOTDIR);	/* overloaded */
138368659Smckusick 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
138468659Smckusick 			if (vfsp->vfc_typenum == name[2])
138568659Smckusick 				break;
138668659Smckusick 		if (vfsp == NULL)
138768659Smckusick 			return (EOPNOTSUPP);
138868659Smckusick 		return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
138968659Smckusick 		    sizeof(struct vfsconf)));
139068659Smckusick 	}
139168659Smckusick 	return (EOPNOTSUPP);
139268659Smckusick }
139368659Smckusick 
139441110Smarc int kinfo_vdebug = 1;
139541110Smarc int kinfo_vgetfailed;
139641110Smarc #define KINFO_VNODESLOP	10
139741110Smarc /*
139857841Smckusick  * Dump vnode list (via sysctl).
139941110Smarc  * Copyout address of vnode followed by vnode.
140041110Smarc  */
140145118Smckusick /* ARGSUSED */
140268319Scgd int
1403*69578Smckusick sysctl_vnode(where, sizep, p)
140441110Smarc 	char *where;
140558465Sbostic 	size_t *sizep;
1406*69578Smckusick 	struct proc *p;
140741110Smarc {
1408*69578Smckusick 	struct mount *mp, *nmp;
140969408Smckusick 	struct vnode *nvp, *vp;
1410*69578Smckusick 	char *bp = where, *savebp;
141153818Smckusick 	char *ewhere;
141241110Smarc 	int error;
141341110Smarc 
141441110Smarc #define VPTRSZ	sizeof (struct vnode *)
141541110Smarc #define VNODESZ	sizeof (struct vnode)
141641110Smarc 	if (where == NULL) {
141757841Smckusick 		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
141841110Smarc 		return (0);
141941110Smarc 	}
142057841Smckusick 	ewhere = where + *sizep;
142141110Smarc 
1422*69578Smckusick 	simple_lock(&mountlist_slock);
142369325Smckusick 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1424*69578Smckusick 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1425*69578Smckusick 			nmp = mp->mnt_list.cqe_next;
142641300Smckusick 			continue;
1427*69578Smckusick 		}
142841110Smarc 		savebp = bp;
142941110Smarc again:
143069408Smckusick 		simple_lock(&mntvnode_slock);
143165260Smckusick 		for (vp = mp->mnt_vnodelist.lh_first;
143265260Smckusick 		     vp != NULL;
143369408Smckusick 		     vp = nvp) {
143441422Smckusick 			/*
143541422Smckusick 			 * Check that the vp is still associated with
143641422Smckusick 			 * this filesystem.  RACE: could have been
143741422Smckusick 			 * recycled onto the same filesystem.
143841422Smckusick 			 */
143941421Smckusick 			if (vp->v_mount != mp) {
144069408Smckusick 				simple_unlock(&mntvnode_slock);
144141421Smckusick 				if (kinfo_vdebug)
144241421Smckusick 					printf("kinfo: vp changed\n");
144341421Smckusick 				bp = savebp;
144441421Smckusick 				goto again;
144541421Smckusick 			}
144669408Smckusick 			nvp = vp->v_mntvnodes.le_next;
144757841Smckusick 			if (bp + VPTRSZ + VNODESZ > ewhere) {
144869408Smckusick 				simple_unlock(&mntvnode_slock);
144957841Smckusick 				*sizep = bp - where;
145057841Smckusick 				return (ENOMEM);
145157841Smckusick 			}
145269408Smckusick 			simple_unlock(&mntvnode_slock);
145357841Smckusick 			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
145457841Smckusick 			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
145541110Smarc 				return (error);
145641110Smarc 			bp += VPTRSZ + VNODESZ;
145769408Smckusick 			simple_lock(&mntvnode_slock);
145841110Smarc 		}
145969408Smckusick 		simple_unlock(&mntvnode_slock);
1460*69578Smckusick 		simple_lock(&mountlist_slock);
1461*69578Smckusick 		nmp = mp->mnt_list.cqe_next;
1462*69578Smckusick 		vfs_unbusy(mp, p);
146365260Smckusick 	}
1464*69578Smckusick 	simple_unlock(&mountlist_slock);
146541110Smarc 
146657841Smckusick 	*sizep = bp - where;
146741110Smarc 	return (0);
146841110Smarc }
146965679Shibler 
147065679Shibler /*
147165679Shibler  * Check to see if a filesystem is mounted on a block device.
147265679Shibler  */
147365679Shibler int
147465679Shibler vfs_mountedon(vp)
147569408Smckusick 	struct vnode *vp;
147665679Shibler {
147769408Smckusick 	struct vnode *vq;
147869408Smckusick 	int error = 0;
147965679Shibler 
148065679Shibler 	if (vp->v_specflags & SI_MOUNTEDON)
148165679Shibler 		return (EBUSY);
148265679Shibler 	if (vp->v_flag & VALIASED) {
148369408Smckusick 		simple_lock(&spechash_slock);
148465679Shibler 		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
148565679Shibler 			if (vq->v_rdev != vp->v_rdev ||
148665679Shibler 			    vq->v_type != vp->v_type)
148765679Shibler 				continue;
148869408Smckusick 			if (vq->v_specflags & SI_MOUNTEDON) {
148969408Smckusick 				error = EBUSY;
149069408Smckusick 				break;
149169408Smckusick 			}
149265679Shibler 		}
149369408Smckusick 		simple_unlock(&spechash_slock);
149465679Shibler 	}
149569408Smckusick 	return (error);
149665679Shibler }
149765679Shibler 
149865679Shibler /*
149969325Smckusick  * Unmount all filesystems. The list is traversed in reverse order
150069325Smckusick  * of mounting to avoid dependencies.
150169325Smckusick  */
150269325Smckusick void
150369325Smckusick vfs_unmountall()
150469325Smckusick {
150569325Smckusick 	struct mount *mp, *nmp;
1506*69578Smckusick 	struct proc *p = curproc;	/* XXX */
150769325Smckusick 
1508*69578Smckusick 	/*
1509*69578Smckusick 	 * Since this only runs when rebooting, it is not interlocked.
1510*69578Smckusick 	 */
151169325Smckusick 	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
151269325Smckusick 		nmp = mp->mnt_list.cqe_prev;
1513*69578Smckusick 		(void) dounmount(mp, MNT_FORCE, p);
151469325Smckusick 	}
151569325Smckusick }
151669325Smckusick 
151769325Smckusick /*
151865679Shibler  * Build hash lists of net addresses and hang them off the mount point.
151965679Shibler  * Called by ufs_mount() to set up the lists of export addresses.
152065679Shibler  */
152165679Shibler static int
152265679Shibler vfs_hang_addrlist(mp, nep, argp)
152365679Shibler 	struct mount *mp;
152465679Shibler 	struct netexport *nep;
152565679Shibler 	struct export_args *argp;
152665679Shibler {
152765679Shibler 	register struct netcred *np;
152865679Shibler 	register struct radix_node_head *rnh;
152965679Shibler 	register int i;
153065679Shibler 	struct radix_node *rn;
153165679Shibler 	struct sockaddr *saddr, *smask = 0;
153265679Shibler 	struct domain *dom;
153365679Shibler 	int error;
153465679Shibler 
153565679Shibler 	if (argp->ex_addrlen == 0) {
153665679Shibler 		if (mp->mnt_flag & MNT_DEFEXPORTED)
153765679Shibler 			return (EPERM);
153865679Shibler 		np = &nep->ne_defexported;
153965679Shibler 		np->netc_exflags = argp->ex_flags;
154065679Shibler 		np->netc_anon = argp->ex_anon;
154165679Shibler 		np->netc_anon.cr_ref = 1;
154265679Shibler 		mp->mnt_flag |= MNT_DEFEXPORTED;
154365679Shibler 		return (0);
154465679Shibler 	}
154565679Shibler 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
154665679Shibler 	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
154765679Shibler 	bzero((caddr_t)np, i);
154865679Shibler 	saddr = (struct sockaddr *)(np + 1);
154965679Shibler 	if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
155065679Shibler 		goto out;
155165679Shibler 	if (saddr->sa_len > argp->ex_addrlen)
155265679Shibler 		saddr->sa_len = argp->ex_addrlen;
155365679Shibler 	if (argp->ex_masklen) {
155465679Shibler 		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
155565679Shibler 		error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
155665679Shibler 		if (error)
155765679Shibler 			goto out;
155865679Shibler 		if (smask->sa_len > argp->ex_masklen)
155965679Shibler 			smask->sa_len = argp->ex_masklen;
156065679Shibler 	}
156165679Shibler 	i = saddr->sa_family;
156265679Shibler 	if ((rnh = nep->ne_rtable[i]) == 0) {
156365679Shibler 		/*
156465679Shibler 		 * Seems silly to initialize every AF when most are not
156565679Shibler 		 * used, do so on demand here
156665679Shibler 		 */
156765679Shibler 		for (dom = domains; dom; dom = dom->dom_next)
156865679Shibler 			if (dom->dom_family == i && dom->dom_rtattach) {
156965679Shibler 				dom->dom_rtattach((void **)&nep->ne_rtable[i],
157065679Shibler 					dom->dom_rtoffset);
157165679Shibler 				break;
157265679Shibler 			}
157365679Shibler 		if ((rnh = nep->ne_rtable[i]) == 0) {
157465679Shibler 			error = ENOBUFS;
157565679Shibler 			goto out;
157665679Shibler 		}
157765679Shibler 	}
157865679Shibler 	rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
157965679Shibler 		np->netc_rnodes);
158069140Smckusick 	if (rn == 0) {
158169140Smckusick 		/*
158269140Smckusick 		 * One of the reasons that rnh_addaddr may fail is that
158369140Smckusick 		 * the entry already exists. To check for this case, we
158469140Smckusick 		 * look up the entry to see if it is there. If so, we
158569140Smckusick 		 * do not need to make a new entry but do return success.
158669140Smckusick 		 */
158769140Smckusick 		free(np, M_NETADDR);
158869140Smckusick 		rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
158969140Smckusick 		if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 &&
159069140Smckusick 		    ((struct netcred *)rn)->netc_exflags == argp->ex_flags &&
159169140Smckusick 		    !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon,
159269140Smckusick 			    (caddr_t)&argp->ex_anon, sizeof(struct ucred)))
159369140Smckusick 			return (0);
159469140Smckusick 		return (EPERM);
159565679Shibler 	}
159665679Shibler 	np->netc_exflags = argp->ex_flags;
159765679Shibler 	np->netc_anon = argp->ex_anon;
159865679Shibler 	np->netc_anon.cr_ref = 1;
159965679Shibler 	return (0);
160065679Shibler out:
160165679Shibler 	free(np, M_NETADDR);
160265679Shibler 	return (error);
160365679Shibler }
160465679Shibler 
160565679Shibler /* ARGSUSED */
160665679Shibler static int
160765679Shibler vfs_free_netcred(rn, w)
160865679Shibler 	struct radix_node *rn;
160965679Shibler 	caddr_t w;
161065679Shibler {
161165679Shibler 	register struct radix_node_head *rnh = (struct radix_node_head *)w;
161265679Shibler 
161365679Shibler 	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
161465679Shibler 	free((caddr_t)rn, M_NETADDR);
161565679Shibler 	return (0);
161665679Shibler }
161768319Scgd 
161865679Shibler /*
161965679Shibler  * Free the net address hash lists that are hanging off the mount points.
162065679Shibler  */
162165679Shibler static void
162265679Shibler vfs_free_addrlist(nep)
162365679Shibler 	struct netexport *nep;
162465679Shibler {
162565679Shibler 	register int i;
162665679Shibler 	register struct radix_node_head *rnh;
162765679Shibler 
162865679Shibler 	for (i = 0; i <= AF_MAX; i++)
162965679Shibler 		if (rnh = nep->ne_rtable[i]) {
163065679Shibler 			(*rnh->rnh_walktree)(rnh, vfs_free_netcred,
163165679Shibler 			    (caddr_t)rnh);
163265679Shibler 			free((caddr_t)rnh, M_RTABLE);
163365679Shibler 			nep->ne_rtable[i] = 0;
163465679Shibler 		}
163565679Shibler }
163665679Shibler 
163765679Shibler int
163865679Shibler vfs_export(mp, nep, argp)
163965679Shibler 	struct mount *mp;
164065679Shibler 	struct netexport *nep;
164165679Shibler 	struct export_args *argp;
164265679Shibler {
164365679Shibler 	int error;
164465679Shibler 
164565679Shibler 	if (argp->ex_flags & MNT_DELEXPORT) {
164665679Shibler 		vfs_free_addrlist(nep);
164765679Shibler 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
164865679Shibler 	}
164965679Shibler 	if (argp->ex_flags & MNT_EXPORTED) {
165065679Shibler 		if (error = vfs_hang_addrlist(mp, nep, argp))
165165679Shibler 			return (error);
165265679Shibler 		mp->mnt_flag |= MNT_EXPORTED;
165365679Shibler 	}
165465679Shibler 	return (0);
165565679Shibler }
165665679Shibler 
165765679Shibler struct netcred *
165865679Shibler vfs_export_lookup(mp, nep, nam)
165965679Shibler 	register struct mount *mp;
166065679Shibler 	struct netexport *nep;
166165679Shibler 	struct mbuf *nam;
166265679Shibler {
166365679Shibler 	register struct netcred *np;
166465679Shibler 	register struct radix_node_head *rnh;
166565679Shibler 	struct sockaddr *saddr;
166665679Shibler 
166765679Shibler 	np = NULL;
166865679Shibler 	if (mp->mnt_flag & MNT_EXPORTED) {
166965679Shibler 		/*
167065679Shibler 		 * Lookup in the export list first.
167165679Shibler 		 */
167265679Shibler 		if (nam != NULL) {
167365679Shibler 			saddr = mtod(nam, struct sockaddr *);
167465679Shibler 			rnh = nep->ne_rtable[saddr->sa_family];
167565679Shibler 			if (rnh != NULL) {
167665679Shibler 				np = (struct netcred *)
167765679Shibler 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
167865679Shibler 							      rnh);
167965679Shibler 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
168065679Shibler 					np = NULL;
168165679Shibler 			}
168265679Shibler 		}
168365679Shibler 		/*
168465679Shibler 		 * If no address match, use the default if it exists.
168565679Shibler 		 */
168665679Shibler 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
168765679Shibler 			np = &nep->ne_defexported;
168865679Shibler 	}
168965679Shibler 	return (np);
169065679Shibler }
1691