137488Smckusick /* 263180Sbostic * Copyright (c) 1989, 1993 363180Sbostic * The Regents of the University of California. All rights reserved. 465771Sbostic * (c) UNIX System Laboratories, Inc. 565771Sbostic * All or some portions of this file are derived from material licensed 665771Sbostic * to the University of California by American Telephone and Telegraph 765771Sbostic * Co. or Unix System Laboratories, Inc. and are reproduced herein with 865771Sbostic * the permission of UNIX System Laboratories, Inc. 937488Smckusick * 1044458Sbostic * %sccs.include.redist.c% 1137488Smckusick * 12*69605Smckusick * @(#)vfs_subr.c 8.30 (Berkeley) 05/22/95 1337488Smckusick */ 1437488Smckusick 1537488Smckusick /* 1637488Smckusick * External virtual filesystem routines 1737488Smckusick */ 1837488Smckusick 1951460Sbostic #include <sys/param.h> 2053829Spendry #include <sys/systm.h> 2151460Sbostic #include <sys/proc.h> 2251460Sbostic #include <sys/mount.h> 2351460Sbostic #include <sys/time.h> 2451460Sbostic #include <sys/vnode.h> 2552415Smckusick #include <sys/stat.h> 2651460Sbostic #include <sys/namei.h> 2751460Sbostic #include <sys/ucred.h> 2851460Sbostic #include <sys/buf.h> 2951460Sbostic #include <sys/errno.h> 3051460Sbostic #include <sys/malloc.h> 3165679Shibler #include <sys/domain.h> 3265679Shibler #include <sys/mbuf.h> 3337488Smckusick 3460930Smckusick #include <vm/vm.h> 3560930Smckusick #include <sys/sysctl.h> 3660930Smckusick 3755050Spendry #include <miscfs/specfs/specdev.h> 3855050Spendry 3952415Smckusick enum vtype iftovt_tab[16] = { 4052415Smckusick VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 4152415Smckusick VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 4252415Smckusick }; 4352415Smckusick int vttoif_tab[9] = { 4452415Smckusick 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 4552415Smckusick S_IFSOCK, S_IFIFO, S_IFMT, 4652415Smckusick }; 4752415Smckusick 4837488Smckusick /* 4956608Smckusick * Insq/Remq for the vnode usage lists. 5056608Smckusick */ 5165260Smckusick #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 5268319Scgd #define bufremvn(bp) { \ 5368319Scgd LIST_REMOVE(bp, b_vnbufs); \ 5468319Scgd (bp)->b_vnbufs.le_next = NOLIST; \ 5565260Smckusick } 5665260Smckusick TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 5765260Smckusick struct mntlist mountlist; /* mounted filesystem list */ 5869578Smckusick struct simplelock mountlist_slock; 5969408Smckusick static struct simplelock mntid_slock; 6069408Smckusick struct simplelock mntvnode_slock; 6169408Smckusick static struct simplelock spechash_slock; 6269408Smckusick static struct simplelock vnode_free_list_slock; 6365260Smckusick 6456608Smckusick /* 6565260Smckusick * Initialize the vnode management data structures. 6637488Smckusick */ 6768319Scgd void 6865260Smckusick vntblinit() 6937488Smckusick { 7037488Smckusick 7169408Smckusick simple_lock_init(&mntvnode_slock); 7269408Smckusick simple_lock_init(&mntid_slock); 7369408Smckusick simple_lock_init(&spechash_slock); 7465260Smckusick TAILQ_INIT(&vnode_free_list); 7569408Smckusick simple_lock_init(&vnode_free_list_slock); 7669325Smckusick CIRCLEQ_INIT(&mountlist); 7737488Smckusick } 7837488Smckusick 7937488Smckusick /* 8069578Smckusick * Mark a mount point as busy. Used to synchronize access and to delay 8169578Smckusick * unmounting. Interlock is not released on failure. 8237488Smckusick */ 8368319Scgd int 8469578Smckusick vfs_busy(mp, flags, interlkp, p) 8569578Smckusick struct mount *mp; 8669578Smckusick int flags; 8769578Smckusick struct simplelock *interlkp; 8869578Smckusick struct proc *p; 8937488Smckusick { 9069578Smckusick int lkflags; 9137488Smckusick 9269578Smckusick if (mp->mnt_flag & MNT_UNMOUNT) { 9369578Smckusick if (flags & LK_NOWAIT) 9469578Smckusick return (ENOENT); 9541400Smckusick mp->mnt_flag |= MNT_MWAIT; 96*69605Smckusick if (interlkp) 97*69605Smckusick simple_unlock(interlkp); 98*69605Smckusick /* 99*69605Smckusick * Since all busy locks are shared except the exclusive 100*69605Smckusick * lock granted when unmounting, the only place that a 101*69605Smckusick * wakeup needs to be done is at the release of the 102*69605Smckusick * exclusive lock at the end of dounmount. 103*69605Smckusick */ 10469578Smckusick sleep((caddr_t)mp, PVFS); 105*69605Smckusick if (interlkp) 106*69605Smckusick simple_lock(interlkp); 10769578Smckusick return (ENOENT); 10839045Smckusick } 10969578Smckusick lkflags = LK_SHARED; 11069578Smckusick if (interlkp) 11169578Smckusick lkflags |= LK_INTERLOCK; 11269578Smckusick if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 11369578Smckusick panic("vfs_busy: unexpected lock failure"); 11437488Smckusick return (0); 11537488Smckusick } 11637488Smckusick 11737488Smckusick /* 11841300Smckusick * Free a busy filesystem. 11941300Smckusick */ 12068319Scgd void 12169578Smckusick vfs_unbusy(mp, p) 12269578Smckusick struct mount *mp; 12369578Smckusick struct proc *p; 12441300Smckusick { 12541300Smckusick 12669578Smckusick lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 12741300Smckusick } 12841300Smckusick 12941300Smckusick /* 13069378Smckusick * Lookup a filesystem type, and if found allocate and initialize 13169378Smckusick * a mount structure for it. 13269378Smckusick * 13369378Smckusick * Devname is usually updated by mount(8) after booting. 13469378Smckusick */ 13569378Smckusick int 13669378Smckusick vfs_rootmountalloc(fstypename, devname, mpp) 13769378Smckusick char *fstypename; 13869378Smckusick char *devname; 13969378Smckusick struct mount **mpp; 14069378Smckusick { 14169578Smckusick struct proc *p = curproc; /* XXX */ 14269378Smckusick struct vfsconf *vfsp; 14369378Smckusick struct mount *mp; 14469378Smckusick 14569378Smckusick for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 14669378Smckusick if (!strcmp(vfsp->vfc_name, fstypename)) 14769378Smckusick break; 14869378Smckusick if (vfsp == NULL) 14969378Smckusick return (ENODEV); 15069378Smckusick mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 15169378Smckusick bzero((char *)mp, (u_long)sizeof(struct mount)); 15269578Smckusick lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 15369578Smckusick (void)vfs_busy(mp, LK_NOWAIT, 0, p); 15469378Smckusick LIST_INIT(&mp->mnt_vnodelist); 15569378Smckusick mp->mnt_vfc = vfsp; 15669378Smckusick mp->mnt_op = vfsp->vfc_vfsops; 15769378Smckusick mp->mnt_flag = MNT_RDONLY; 15869378Smckusick mp->mnt_vnodecovered = NULLVP; 15969378Smckusick vfsp->vfc_refcount++; 16069378Smckusick mp->mnt_stat.f_type = vfsp->vfc_typenum; 16169378Smckusick mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 16269378Smckusick strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 16369378Smckusick mp->mnt_stat.f_mntonname[0] = '/'; 16469378Smckusick (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 16569378Smckusick *mpp = mp; 16669378Smckusick return (0); 16769378Smckusick } 16869378Smckusick 16969378Smckusick /* 17069378Smckusick * Find an appropriate filesystem to use for the root. If a filesystem 17169378Smckusick * has not been preselected, walk through the list of known filesystems 17269378Smckusick * trying those that have mountroot routines, and try them until one 17369378Smckusick * works or we have tried them all. 17469378Smckusick */ 17569378Smckusick int 17669378Smckusick vfs_mountroot() 17769378Smckusick { 17869378Smckusick struct vfsconf *vfsp; 17969378Smckusick extern int (*mountroot)(void); 18069378Smckusick int error; 18169378Smckusick 18269378Smckusick if (mountroot != NULL) 18369537Smckusick return ((*mountroot)()); 18469378Smckusick for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 18569378Smckusick if (vfsp->vfc_mountroot == NULL) 18669378Smckusick continue; 18769378Smckusick if ((error = (*vfsp->vfc_mountroot)()) == 0) 18869378Smckusick return (0); 18969378Smckusick printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 19069378Smckusick } 19169378Smckusick return (ENODEV); 19269378Smckusick } 19369378Smckusick 19469378Smckusick /* 19537488Smckusick * Lookup a mount point by filesystem identifier. 19637488Smckusick */ 19737488Smckusick struct mount * 19868659Smckusick vfs_getvfs(fsid) 19937488Smckusick fsid_t *fsid; 20037488Smckusick { 20137488Smckusick register struct mount *mp; 20237488Smckusick 20369578Smckusick simple_lock(&mountlist_slock); 20469325Smckusick for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 20569325Smckusick mp = mp->mnt_list.cqe_next) { 20641400Smckusick if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 20769578Smckusick mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 20869578Smckusick simple_unlock(&mountlist_slock); 20938288Smckusick return (mp); 21069578Smckusick } 21165260Smckusick } 21269578Smckusick simple_unlock(&mountlist_slock); 21338288Smckusick return ((struct mount *)0); 21437488Smckusick } 21537488Smckusick 21637488Smckusick /* 21753829Spendry * Get a new unique fsid 21853829Spendry */ 21953829Spendry void 22068659Smckusick vfs_getnewfsid(mp) 22153829Spendry struct mount *mp; 22253829Spendry { 22353829Spendry static u_short xxxfs_mntid; 22453829Spendry 22553829Spendry fsid_t tfsid; 22668659Smckusick int mtype; 22753829Spendry 22869408Smckusick simple_lock(&mntid_slock); 22968659Smckusick mtype = mp->mnt_vfc->vfc_typenum; 23065507Spendry mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 23153829Spendry mp->mnt_stat.f_fsid.val[1] = mtype; 23253829Spendry if (xxxfs_mntid == 0) 23353829Spendry ++xxxfs_mntid; 23465507Spendry tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 23553829Spendry tfsid.val[1] = mtype; 23669325Smckusick if (mountlist.cqh_first != (void *)&mountlist) { 23768659Smckusick while (vfs_getvfs(&tfsid)) { 23853936Spendry tfsid.val[0]++; 23953936Spendry xxxfs_mntid++; 24053936Spendry } 24153829Spendry } 24253829Spendry mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 24369408Smckusick simple_unlock(&mntid_slock); 24453829Spendry } 24553829Spendry 24653829Spendry /* 24737488Smckusick * Set vnode attributes to VNOVAL 24837488Smckusick */ 24968319Scgd void 25068319Scgd vattr_null(vap) 25137488Smckusick register struct vattr *vap; 25237488Smckusick { 25337488Smckusick 25437488Smckusick vap->va_type = VNON; 25552005Smckusick vap->va_size = vap->va_bytes = VNOVAL; 25637488Smckusick vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 25752005Smckusick vap->va_fsid = vap->va_fileid = 25852005Smckusick vap->va_blocksize = vap->va_rdev = 25954347Smckusick vap->va_atime.ts_sec = vap->va_atime.ts_nsec = 26054347Smckusick vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = 26154347Smckusick vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = 26238258Smckusick vap->va_flags = vap->va_gen = VNOVAL; 26358548Sbostic vap->va_vaflags = 0; 26437488Smckusick } 26538265Smckusick 26638265Smckusick /* 26739397Smckusick * Routines having to do with the management of the vnode table. 26839397Smckusick */ 26953547Sheideman extern int (**dead_vnodeop_p)(); 27069408Smckusick static void vclean __P((struct vnode *vp, int flag, struct proc *p)); 27169408Smckusick extern void vgonel __P((struct vnode *vp, struct proc *p)); 27240883Smckusick long numvnodes; 27353493Sheideman extern struct vattr va_null; 27439397Smckusick 27539397Smckusick /* 27639397Smckusick * Return the next vnode from the free list. 27739397Smckusick */ 27868319Scgd int 27939397Smckusick getnewvnode(tag, mp, vops, vpp) 28039397Smckusick enum vtagtype tag; 28139397Smckusick struct mount *mp; 28253495Sheideman int (**vops)(); 28339397Smckusick struct vnode **vpp; 28439397Smckusick { 28569408Smckusick struct proc *p = curproc; /* XXX */ 28669408Smckusick struct vnode *vp; 28757042Smargo int s; 28869408Smckusick int cnt; 28939397Smckusick 29069408Smckusick top: 29169408Smckusick simple_lock(&vnode_free_list_slock); 29265260Smckusick if ((vnode_free_list.tqh_first == NULL && 29365260Smckusick numvnodes < 2 * desiredvnodes) || 29454347Smckusick numvnodes < desiredvnodes) { 29569408Smckusick simple_unlock(&vnode_free_list_slock); 29645118Smckusick vp = (struct vnode *)malloc((u_long)sizeof *vp, 29745118Smckusick M_VNODE, M_WAITOK); 29840883Smckusick bzero((char *)vp, sizeof *vp); 29940883Smckusick numvnodes++; 30040883Smckusick } else { 30169408Smckusick for (vp = vnode_free_list.tqh_first; 30269408Smckusick vp != NULLVP; vp = vp->v_freelist.tqe_next) { 30369408Smckusick if (simple_lock_try(&vp->v_interlock)) 30469408Smckusick break; 30569408Smckusick } 30669408Smckusick /* 30769408Smckusick * Unless this is a bad time of the month, at most 30869408Smckusick * the first NCPUS items on the free list are 30969408Smckusick * locked, so this is close enough to being empty. 31069408Smckusick */ 31169408Smckusick if (vp == NULLVP) { 31269408Smckusick simple_unlock(&vnode_free_list_slock); 31340883Smckusick tablefull("vnode"); 31440883Smckusick *vpp = 0; 31540883Smckusick return (ENFILE); 31640883Smckusick } 31740883Smckusick if (vp->v_usecount) 31840883Smckusick panic("free vnode isn't"); 31965260Smckusick TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 32065505Smckusick /* see comment on why 0xdeadb is set at end of vgone (below) */ 32165505Smckusick vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 32269408Smckusick simple_unlock(&vnode_free_list_slock); 32352190Smckusick vp->v_lease = NULL; 32440883Smckusick if (vp->v_type != VBAD) 32569408Smckusick vgonel(vp, p); 32669408Smckusick else 32769408Smckusick simple_unlock(&vp->v_interlock); 32857042Smargo #ifdef DIAGNOSTIC 32952006Smckusick if (vp->v_data) 33052006Smckusick panic("cleaned vnode isn't"); 33157042Smargo s = splbio(); 33257042Smargo if (vp->v_numoutput) 33357042Smargo panic("Clean vnode has pending I/O's"); 33457042Smargo splx(s); 33557042Smargo #endif 33640883Smckusick vp->v_flag = 0; 33740883Smckusick vp->v_lastr = 0; 33865745Shibler vp->v_ralen = 0; 33965745Shibler vp->v_maxra = 0; 34057042Smargo vp->v_lastw = 0; 34157042Smargo vp->v_lasta = 0; 34257042Smargo vp->v_cstart = 0; 34357042Smargo vp->v_clen = 0; 34440883Smckusick vp->v_socket = 0; 34539397Smckusick } 34639512Smckusick vp->v_type = VNON; 34739397Smckusick cache_purge(vp); 34839397Smckusick vp->v_tag = tag; 34939433Smckusick vp->v_op = vops; 35039397Smckusick insmntque(vp, mp); 35139397Smckusick *vpp = vp; 35265505Smckusick vp->v_usecount = 1; 35365260Smckusick vp->v_data = 0; 35439397Smckusick return (0); 35539397Smckusick } 35665679Shibler 35739397Smckusick /* 35839397Smckusick * Move a vnode from one mount queue to another. 35939397Smckusick */ 36068319Scgd void 36139397Smckusick insmntque(vp, mp) 36269408Smckusick struct vnode *vp; 36369408Smckusick struct mount *mp; 36439397Smckusick { 36539397Smckusick 36669408Smckusick simple_lock(&mntvnode_slock); 36739397Smckusick /* 36839397Smckusick * Delete from old mount point vnode list, if on one. 36939397Smckusick */ 37065679Shibler if (vp->v_mount != NULL) 37165260Smckusick LIST_REMOVE(vp, v_mntvnodes); 37239397Smckusick /* 37339397Smckusick * Insert into list of vnodes for the new mount point, if available. 37439397Smckusick */ 37569408Smckusick if ((vp->v_mount = mp) != NULL) 37669408Smckusick LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 37769408Smckusick simple_unlock(&mntvnode_slock); 37839397Smckusick } 37939397Smckusick 38039397Smckusick /* 38149232Smckusick * Update outstanding I/O count and do wakeup if requested. 38249232Smckusick */ 38368319Scgd void 38449232Smckusick vwakeup(bp) 38549232Smckusick register struct buf *bp; 38649232Smckusick { 38749232Smckusick register struct vnode *vp; 38849232Smckusick 38957810Smckusick bp->b_flags &= ~B_WRITEINPROG; 39049232Smckusick if (vp = bp->b_vp) { 39168319Scgd if (--vp->v_numoutput < 0) 39257042Smargo panic("vwakeup: neg numoutput"); 39349232Smckusick if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 39449232Smckusick if (vp->v_numoutput < 0) 39568319Scgd panic("vwakeup: neg numoutput 2"); 39649232Smckusick vp->v_flag &= ~VBWAIT; 39749232Smckusick wakeup((caddr_t)&vp->v_numoutput); 39849232Smckusick } 39949232Smckusick } 40049232Smckusick } 40149232Smckusick 40249232Smckusick /* 40349232Smckusick * Flush out and invalidate all buffers associated with a vnode. 40449232Smckusick * Called with the underlying object locked. 40549232Smckusick */ 40654442Smckusick int 40757792Smckusick vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 40849232Smckusick register struct vnode *vp; 40956459Smargo int flags; 41054442Smckusick struct ucred *cred; 41154442Smckusick struct proc *p; 41257792Smckusick int slpflag, slptimeo; 41349232Smckusick { 41449232Smckusick register struct buf *bp; 41549232Smckusick struct buf *nbp, *blist; 41654442Smckusick int s, error; 41749232Smckusick 41856459Smargo if (flags & V_SAVE) { 41954442Smckusick if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) 42054442Smckusick return (error); 42165260Smckusick if (vp->v_dirtyblkhd.lh_first != NULL) 42254442Smckusick panic("vinvalbuf: dirty bufs"); 42354442Smckusick } 42449232Smckusick for (;;) { 42565260Smckusick if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 42656459Smargo while (blist && blist->b_lblkno < 0) 42765260Smckusick blist = blist->b_vnbufs.le_next; 42868319Scgd if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 42956608Smckusick (flags & V_SAVEMETA)) 43056459Smargo while (blist && blist->b_lblkno < 0) 43165260Smckusick blist = blist->b_vnbufs.le_next; 43256459Smargo if (!blist) 43349232Smckusick break; 43456459Smargo 43549232Smckusick for (bp = blist; bp; bp = nbp) { 43665260Smckusick nbp = bp->b_vnbufs.le_next; 43756459Smargo if (flags & V_SAVEMETA && bp->b_lblkno < 0) 43856459Smargo continue; 43949232Smckusick s = splbio(); 44049232Smckusick if (bp->b_flags & B_BUSY) { 44149232Smckusick bp->b_flags |= B_WANTED; 44257792Smckusick error = tsleep((caddr_t)bp, 44357792Smckusick slpflag | (PRIBIO + 1), "vinvalbuf", 44457792Smckusick slptimeo); 44549232Smckusick splx(s); 44657792Smckusick if (error) 44757792Smckusick return (error); 44849232Smckusick break; 44949232Smckusick } 45049232Smckusick bremfree(bp); 45149232Smckusick bp->b_flags |= B_BUSY; 45249232Smckusick splx(s); 45357792Smckusick /* 45457792Smckusick * XXX Since there are no node locks for NFS, I believe 45557792Smckusick * there is a slight chance that a delayed write will 45657792Smckusick * occur while sleeping just above, so check for it. 45757792Smckusick */ 45857792Smckusick if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 45957792Smckusick (void) VOP_BWRITE(bp); 46057792Smckusick break; 46157792Smckusick } 46256459Smargo bp->b_flags |= B_INVAL; 46349232Smckusick brelse(bp); 46449232Smckusick } 46549232Smckusick } 46656608Smckusick if (!(flags & V_SAVEMETA) && 46765260Smckusick (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 46849232Smckusick panic("vinvalbuf: flush failed"); 46954442Smckusick return (0); 47049232Smckusick } 47149232Smckusick 47249232Smckusick /* 47349232Smckusick * Associate a buffer with a vnode. 47449232Smckusick */ 47568319Scgd void 47649232Smckusick bgetvp(vp, bp) 47749232Smckusick register struct vnode *vp; 47849232Smckusick register struct buf *bp; 47949232Smckusick { 48049232Smckusick 48149232Smckusick if (bp->b_vp) 48249232Smckusick panic("bgetvp: not free"); 48349232Smckusick VHOLD(vp); 48449232Smckusick bp->b_vp = vp; 48549232Smckusick if (vp->v_type == VBLK || vp->v_type == VCHR) 48649232Smckusick bp->b_dev = vp->v_rdev; 48749232Smckusick else 48849232Smckusick bp->b_dev = NODEV; 48949232Smckusick /* 49049232Smckusick * Insert onto list for new vnode. 49149232Smckusick */ 49256608Smckusick bufinsvn(bp, &vp->v_cleanblkhd); 49349232Smckusick } 49449232Smckusick 49549232Smckusick /* 49649232Smckusick * Disassociate a buffer from a vnode. 49749232Smckusick */ 49868319Scgd void 49949232Smckusick brelvp(bp) 50049232Smckusick register struct buf *bp; 50149232Smckusick { 50249232Smckusick struct vnode *vp; 50349232Smckusick 50449232Smckusick if (bp->b_vp == (struct vnode *) 0) 50549232Smckusick panic("brelvp: NULL"); 50649232Smckusick /* 50749232Smckusick * Delete from old vnode list, if on one. 50849232Smckusick */ 50965260Smckusick if (bp->b_vnbufs.le_next != NOLIST) 51056608Smckusick bufremvn(bp); 51149232Smckusick vp = bp->b_vp; 51249232Smckusick bp->b_vp = (struct vnode *) 0; 51349232Smckusick HOLDRELE(vp); 51449232Smckusick } 51549232Smckusick 51649232Smckusick /* 51749232Smckusick * Reassign a buffer from one vnode to another. 51849232Smckusick * Used to assign file specific control information 51949232Smckusick * (indirect blocks) to the vnode to which they belong. 52049232Smckusick */ 52168319Scgd void 52249232Smckusick reassignbuf(bp, newvp) 52349232Smckusick register struct buf *bp; 52449232Smckusick register struct vnode *newvp; 52549232Smckusick { 52665260Smckusick register struct buflists *listheadp; 52749232Smckusick 52852655Smckusick if (newvp == NULL) { 52952655Smckusick printf("reassignbuf: NULL"); 53052655Smckusick return; 53152655Smckusick } 53249232Smckusick /* 53349232Smckusick * Delete from old vnode list, if on one. 53449232Smckusick */ 53565260Smckusick if (bp->b_vnbufs.le_next != NOLIST) 53656608Smckusick bufremvn(bp); 53749232Smckusick /* 53849232Smckusick * If dirty, put on list of dirty buffers; 53949232Smckusick * otherwise insert onto list of clean buffers. 54049232Smckusick */ 54149232Smckusick if (bp->b_flags & B_DELWRI) 54249232Smckusick listheadp = &newvp->v_dirtyblkhd; 54349232Smckusick else 54449232Smckusick listheadp = &newvp->v_cleanblkhd; 54556608Smckusick bufinsvn(bp, listheadp); 54649232Smckusick } 54749232Smckusick 54849232Smckusick /* 54939433Smckusick * Create a vnode for a block device. 55039433Smckusick * Used for root filesystem, argdev, and swap areas. 55139433Smckusick * Also used for memory file system special devices. 55239397Smckusick */ 55368319Scgd int 55439433Smckusick bdevvp(dev, vpp) 55539433Smckusick dev_t dev; 55639433Smckusick struct vnode **vpp; 55739433Smckusick { 55839433Smckusick register struct vnode *vp; 55939433Smckusick struct vnode *nvp; 56039433Smckusick int error; 56139433Smckusick 56269378Smckusick if (dev == NODEV) { 56369378Smckusick *vpp = NULLVP; 56469378Smckusick return (ENODEV); 56569378Smckusick } 56653547Sheideman error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 56739433Smckusick if (error) { 56868319Scgd *vpp = NULLVP; 56939433Smckusick return (error); 57039433Smckusick } 57139433Smckusick vp = nvp; 57239433Smckusick vp->v_type = VBLK; 57339615Smckusick if (nvp = checkalias(vp, dev, (struct mount *)0)) { 57439433Smckusick vput(vp); 57539433Smckusick vp = nvp; 57639433Smckusick } 57739433Smckusick *vpp = vp; 57839433Smckusick return (0); 57939433Smckusick } 58039433Smckusick 58139433Smckusick /* 58239433Smckusick * Check to see if the new vnode represents a special device 58339433Smckusick * for which we already have a vnode (either because of 58439433Smckusick * bdevvp() or because of a different vnode representing 58539433Smckusick * the same block device). If such an alias exists, deallocate 58639509Smckusick * the existing contents and return the aliased vnode. The 58739433Smckusick * caller is responsible for filling it with its new contents. 58839433Smckusick */ 58939433Smckusick struct vnode * 59039615Smckusick checkalias(nvp, nvp_rdev, mp) 59139433Smckusick register struct vnode *nvp; 59239615Smckusick dev_t nvp_rdev; 59339433Smckusick struct mount *mp; 59439433Smckusick { 59569408Smckusick struct proc *p = curproc; /* XXX */ 59669408Smckusick struct vnode *vp; 59739615Smckusick struct vnode **vpp; 59839433Smckusick 59939433Smckusick if (nvp->v_type != VBLK && nvp->v_type != VCHR) 60041400Smckusick return (NULLVP); 60139615Smckusick 60239615Smckusick vpp = &speclisth[SPECHASH(nvp_rdev)]; 60339433Smckusick loop: 60469408Smckusick simple_lock(&spechash_slock); 60539615Smckusick for (vp = *vpp; vp; vp = vp->v_specnext) { 60639615Smckusick if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 60739433Smckusick continue; 60839615Smckusick /* 60939615Smckusick * Alias, but not in use, so flush it out. 61039615Smckusick */ 61169408Smckusick simple_lock(&vp->v_interlock); 61239809Smckusick if (vp->v_usecount == 0) { 61369408Smckusick simple_unlock(&spechash_slock); 61469408Smckusick vgonel(vp, p); 61539615Smckusick goto loop; 61639615Smckusick } 61769408Smckusick if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 61869408Smckusick simple_unlock(&spechash_slock); 61939633Smckusick goto loop; 62069408Smckusick } 62139433Smckusick break; 62239433Smckusick } 62339615Smckusick if (vp == NULL || vp->v_tag != VT_NON) { 62439615Smckusick MALLOC(nvp->v_specinfo, struct specinfo *, 62539615Smckusick sizeof(struct specinfo), M_VNODE, M_WAITOK); 62639615Smckusick nvp->v_rdev = nvp_rdev; 62739809Smckusick nvp->v_hashchain = vpp; 62839615Smckusick nvp->v_specnext = *vpp; 62942152Smckusick nvp->v_specflags = 0; 63069408Smckusick simple_unlock(&spechash_slock); 63139615Smckusick *vpp = nvp; 63269408Smckusick if (vp != NULLVP) { 63340640Smckusick nvp->v_flag |= VALIASED; 63440640Smckusick vp->v_flag |= VALIASED; 63540640Smckusick vput(vp); 63640640Smckusick } 63741400Smckusick return (NULLVP); 63839433Smckusick } 63969408Smckusick simple_unlock(&spechash_slock); 64069408Smckusick VOP_UNLOCK(vp, 0, p); 64169408Smckusick simple_lock(&vp->v_interlock); 64269408Smckusick vclean(vp, 0, p); 64339433Smckusick vp->v_op = nvp->v_op; 64439433Smckusick vp->v_tag = nvp->v_tag; 64539433Smckusick nvp->v_type = VNON; 64639433Smckusick insmntque(vp, mp); 64739433Smckusick return (vp); 64839433Smckusick } 64939433Smckusick 65039433Smckusick /* 65139433Smckusick * Grab a particular vnode from the free list, increment its 65239433Smckusick * reference count and lock it. The vnode lock bit is set the 65339433Smckusick * vnode is being eliminated in vgone. The process is awakened 65439433Smckusick * when the transition is completed, and an error returned to 65539433Smckusick * indicate that the vnode is no longer usable (possibly having 65639433Smckusick * been changed to a new file system type). 65739433Smckusick */ 65868319Scgd int 65969408Smckusick vget(vp, flags, p) 66069408Smckusick struct vnode *vp; 66169408Smckusick int flags; 66269408Smckusick struct proc *p; 66339397Smckusick { 66469546Smckusick int error; 66539397Smckusick 66666897Smckusick /* 66766897Smckusick * If the vnode is in the process of being cleaned out for 66866897Smckusick * another use, we wait for the cleaning to finish and then 66969408Smckusick * return failure. Cleaning is determined by checking that 67069408Smckusick * the VXLOCK flag is set. 67166897Smckusick */ 67269408Smckusick if ((flags & LK_INTERLOCK) == 0) 67369408Smckusick simple_lock(&vp->v_interlock); 67469408Smckusick if (vp->v_flag & VXLOCK) { 67539433Smckusick vp->v_flag |= VXWANT; 67669408Smckusick simple_unlock(&vp->v_interlock); 67768319Scgd tsleep((caddr_t)vp, PINOD, "vget", 0); 67869408Smckusick return (ENOENT); 67939433Smckusick } 68069408Smckusick if (vp->v_usecount == 0) { 68169408Smckusick simple_lock(&vnode_free_list_slock); 68265260Smckusick TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 68369408Smckusick simple_unlock(&vnode_free_list_slock); 68469408Smckusick } 68559450Smckusick vp->v_usecount++; 68669546Smckusick if (flags & LK_TYPE_MASK) { 68769546Smckusick if (error = vn_lock(vp, flags | LK_INTERLOCK, p)) 68869546Smckusick vrele(vp); 68969546Smckusick return (error); 69069546Smckusick } 69169408Smckusick simple_unlock(&vp->v_interlock); 69239433Smckusick return (0); 69339397Smckusick } 69439397Smckusick 69539397Smckusick /* 69669408Smckusick * Stubs to use when there is no locking to be done on the underlying object. 697*69605Smckusick * A minimal shared lock is necessary to ensure that the underlying object 698*69605Smckusick * is not revoked while an operation is in progress. So, an active shared 699*69605Smckusick * count is maintained in an auxillary vnode lock structure. 70039397Smckusick */ 70169408Smckusick int 70269408Smckusick vop_nolock(ap) 70369408Smckusick struct vop_lock_args /* { 70469408Smckusick struct vnode *a_vp; 70569408Smckusick int a_flags; 70669408Smckusick struct proc *a_p; 70769408Smckusick } */ *ap; 70869408Smckusick { 709*69605Smckusick #ifdef notyet 710*69605Smckusick /* 711*69605Smckusick * This code cannot be used until all the non-locking filesystems 712*69605Smckusick * (notably NFS) are converted to properly lock and release nodes. 713*69605Smckusick * Also, certain vnode operations change the locking state within 714*69605Smckusick * the operation (create, mknod, remove, link, rename, mkdir, rmdir, 715*69605Smckusick * and symlink). Ideally these operations should not change the 716*69605Smckusick * lock state, but should be changed to let the caller of the 717*69605Smckusick * function unlock them. Otherwise all intermediate vnode layers 718*69605Smckusick * (such as union, umapfs, etc) must catch these functions to do 719*69605Smckusick * the necessary locking at their layer. Note that the inactive 720*69605Smckusick * and lookup operations also change their lock state, but this 721*69605Smckusick * cannot be avoided, so these two operations will always need 722*69605Smckusick * to be handled in intermediate layers. 723*69605Smckusick */ 72469408Smckusick struct vnode *vp = ap->a_vp; 725*69605Smckusick int vnflags, flags = ap->a_flags; 72669408Smckusick 727*69605Smckusick if (vp->v_vnlock == NULL) { 728*69605Smckusick if ((flags & LK_TYPE_MASK) == LK_DRAIN) 729*69605Smckusick return (0); 730*69605Smckusick MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock), 731*69605Smckusick M_VNODE, M_WAITOK); 732*69605Smckusick lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 733*69605Smckusick } 734*69605Smckusick switch (flags & LK_TYPE_MASK) { 735*69605Smckusick case LK_DRAIN: 736*69605Smckusick vnflags = LK_DRAIN; 737*69605Smckusick break; 738*69605Smckusick case LK_EXCLUSIVE: 739*69605Smckusick case LK_SHARED: 740*69605Smckusick vnflags = LK_SHARED; 741*69605Smckusick break; 742*69605Smckusick case LK_UPGRADE: 743*69605Smckusick case LK_EXCLUPGRADE: 744*69605Smckusick case LK_DOWNGRADE: 745*69605Smckusick return (0); 746*69605Smckusick case LK_RELEASE: 747*69605Smckusick default: 748*69605Smckusick panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK); 749*69605Smckusick } 750*69605Smckusick if (flags & LK_INTERLOCK) 751*69605Smckusick vnflags |= LK_INTERLOCK; 752*69605Smckusick return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p)); 753*69605Smckusick #else /* for now */ 75469408Smckusick /* 75569408Smckusick * Since we are not using the lock manager, we must clear 75669408Smckusick * the interlock here. 75769408Smckusick */ 75869408Smckusick if (ap->a_flags & LK_INTERLOCK) 759*69605Smckusick simple_unlock(&ap->a_vp->v_interlock); 76069408Smckusick return (0); 761*69605Smckusick #endif 76269408Smckusick } 76369408Smckusick 76469408Smckusick /* 765*69605Smckusick * Decrement the active use count. 76669408Smckusick */ 76769408Smckusick int 76869408Smckusick vop_nounlock(ap) 76969408Smckusick struct vop_unlock_args /* { 77069408Smckusick struct vnode *a_vp; 77169408Smckusick int a_flags; 77269408Smckusick struct proc *a_p; 77369408Smckusick } */ *ap; 77469408Smckusick { 775*69605Smckusick struct vnode *vp = ap->a_vp; 77669408Smckusick 777*69605Smckusick if (vp->v_vnlock == NULL) 778*69605Smckusick return (0); 779*69605Smckusick return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p)); 78069408Smckusick } 78169408Smckusick 78269408Smckusick /* 783*69605Smckusick * Return whether or not the node is in use. 78469408Smckusick */ 78569408Smckusick int 78669408Smckusick vop_noislocked(ap) 78769408Smckusick struct vop_islocked_args /* { 78869408Smckusick struct vnode *a_vp; 78969408Smckusick } */ *ap; 79069408Smckusick { 791*69605Smckusick struct vnode *vp = ap->a_vp; 79269408Smckusick 793*69605Smckusick if (vp->v_vnlock == NULL) 794*69605Smckusick return (0); 795*69605Smckusick return (lockstatus(vp->v_vnlock)); 79669408Smckusick } 79769408Smckusick 79869408Smckusick /* 79969408Smckusick * Vnode reference. 80069408Smckusick */ 80168319Scgd void 80268319Scgd vref(vp) 80339397Smckusick struct vnode *vp; 80439397Smckusick { 80539397Smckusick 80669408Smckusick simple_lock(&vp->v_interlock); 80759450Smckusick if (vp->v_usecount <= 0) 80859450Smckusick panic("vref used where vget required"); 80939809Smckusick vp->v_usecount++; 81069408Smckusick simple_unlock(&vp->v_interlock); 81139397Smckusick } 81239397Smckusick 81339397Smckusick /* 81439397Smckusick * vput(), just unlock and vrele() 81539397Smckusick */ 81668319Scgd void 81768319Scgd vput(vp) 81869408Smckusick struct vnode *vp; 81939397Smckusick { 82069408Smckusick struct proc *p = curproc; /* XXX */ 82152416Storek 82269523Spendry #ifdef DIGANOSTIC 82369523Spendry if (vp == NULL) 82469523Spendry panic("vput: null vp"); 82569523Spendry #endif 82669523Spendry simple_lock(&vp->v_interlock); 82769523Spendry vp->v_usecount--; 82869523Spendry if (vp->v_usecount > 0) { 82969523Spendry simple_unlock(&vp->v_interlock); 83069523Spendry VOP_UNLOCK(vp, 0, p); 83169523Spendry return; 83269523Spendry } 83369523Spendry #ifdef DIAGNOSTIC 83469523Spendry if (vp->v_usecount < 0 || vp->v_writecount != 0) { 83569523Spendry vprint("vput: bad ref count", vp); 83669523Spendry panic("vput: ref cnt"); 83769523Spendry } 83869523Spendry #endif 83969523Spendry /* 84069523Spendry * insert at tail of LRU list 84169523Spendry */ 84269523Spendry simple_lock(&vnode_free_list_slock); 84369523Spendry TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 84469523Spendry simple_unlock(&vnode_free_list_slock); 84569523Spendry simple_unlock(&vp->v_interlock); 84669523Spendry VOP_INACTIVE(vp, p); 84739397Smckusick } 84839397Smckusick 84939397Smckusick /* 85039397Smckusick * Vnode release. 85139397Smckusick * If count drops to zero, call inactive routine and return to freelist. 85239397Smckusick */ 85368319Scgd void 85468319Scgd vrele(vp) 85569408Smckusick struct vnode *vp; 85639397Smckusick { 85769408Smckusick struct proc *p = curproc; /* XXX */ 85839397Smckusick 85950109Smckusick #ifdef DIAGNOSTIC 86039397Smckusick if (vp == NULL) 86139433Smckusick panic("vrele: null vp"); 86250109Smckusick #endif 86369408Smckusick simple_lock(&vp->v_interlock); 86439809Smckusick vp->v_usecount--; 86569408Smckusick if (vp->v_usecount > 0) { 86669408Smckusick simple_unlock(&vp->v_interlock); 86739397Smckusick return; 86869408Smckusick } 86950109Smckusick #ifdef DIAGNOSTIC 87069408Smckusick if (vp->v_usecount < 0 || vp->v_writecount != 0) { 87150109Smckusick vprint("vrele: bad ref count", vp); 87250109Smckusick panic("vrele: ref cnt"); 87350109Smckusick } 87450109Smckusick #endif 87555468Smckusick /* 87655468Smckusick * insert at tail of LRU list 87755468Smckusick */ 87869408Smckusick simple_lock(&vnode_free_list_slock); 87965260Smckusick TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 88069408Smckusick simple_unlock(&vnode_free_list_slock); 88169408Smckusick if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) 88269408Smckusick VOP_INACTIVE(vp, p); 88339397Smckusick } 88439433Smckusick 88569408Smckusick #ifdef DIAGNOSTIC 88639433Smckusick /* 88739809Smckusick * Page or buffer structure gets a reference. 88839809Smckusick */ 88968319Scgd void 89068319Scgd vhold(vp) 89139809Smckusick register struct vnode *vp; 89239809Smckusick { 89339809Smckusick 89469408Smckusick simple_lock(&vp->v_interlock); 89539809Smckusick vp->v_holdcnt++; 89669408Smckusick simple_unlock(&vp->v_interlock); 89739809Smckusick } 89839809Smckusick 89939809Smckusick /* 90039809Smckusick * Page or buffer structure frees a reference. 90139809Smckusick */ 90268319Scgd void 90368319Scgd holdrele(vp) 90439809Smckusick register struct vnode *vp; 90539809Smckusick { 90639809Smckusick 90769408Smckusick simple_lock(&vp->v_interlock); 90839809Smckusick if (vp->v_holdcnt <= 0) 90939809Smckusick panic("holdrele: holdcnt"); 91039809Smckusick vp->v_holdcnt--; 91169408Smckusick simple_unlock(&vp->v_interlock); 91239809Smckusick } 91369408Smckusick #endif /* DIAGNOSTIC */ 91439809Smckusick 91539809Smckusick /* 91639509Smckusick * Remove any vnodes in the vnode table belonging to mount point mp. 91739509Smckusick * 91839509Smckusick * If MNT_NOFORCE is specified, there should not be any active ones, 91939509Smckusick * return error if any are found (nb: this is a user error, not a 92039509Smckusick * system error). If MNT_FORCE is specified, detach any active vnodes 92139509Smckusick * that are found. 92239509Smckusick */ 92365679Shibler #ifdef DIAGNOSTIC 92460930Smckusick int busyprt = 0; /* print out busy vnodes */ 92560930Smckusick struct ctldebug debug1 = { "busyprt", &busyprt }; 92665679Shibler #endif 92739509Smckusick 92868319Scgd int 92939509Smckusick vflush(mp, skipvp, flags) 93039509Smckusick struct mount *mp; 93139509Smckusick struct vnode *skipvp; 93239509Smckusick int flags; 93339509Smckusick { 93469408Smckusick struct proc *p = curproc; /* XXX */ 93569408Smckusick struct vnode *vp, *nvp; 93639509Smckusick int busy = 0; 93739509Smckusick 93869408Smckusick simple_lock(&mntvnode_slock); 93941421Smckusick loop: 94065260Smckusick for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 94141421Smckusick if (vp->v_mount != mp) 94241421Smckusick goto loop; 94365260Smckusick nvp = vp->v_mntvnodes.le_next; 94439509Smckusick /* 94539509Smckusick * Skip over a selected vnode. 94639509Smckusick */ 94739509Smckusick if (vp == skipvp) 94839509Smckusick continue; 94969408Smckusick 95069408Smckusick simple_lock(&vp->v_interlock); 95139509Smckusick /* 95241300Smckusick * Skip over a vnodes marked VSYSTEM. 95341300Smckusick */ 95469408Smckusick if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 95569408Smckusick simple_unlock(&vp->v_interlock); 95641300Smckusick continue; 95769408Smckusick } 95841300Smckusick /* 95957040Smckusick * If WRITECLOSE is set, only flush out regular file 96057040Smckusick * vnodes open for writing. 96157040Smckusick */ 96257040Smckusick if ((flags & WRITECLOSE) && 96369408Smckusick (vp->v_writecount == 0 || vp->v_type != VREG)) { 96469408Smckusick simple_unlock(&vp->v_interlock); 96557040Smckusick continue; 96669408Smckusick } 96757040Smckusick /* 96839809Smckusick * With v_usecount == 0, all we need to do is clear 96939509Smckusick * out the vnode data structures and we are done. 97039509Smckusick */ 97139809Smckusick if (vp->v_usecount == 0) { 97269408Smckusick simple_unlock(&mntvnode_slock); 97369408Smckusick vgonel(vp, p); 97469408Smckusick simple_lock(&mntvnode_slock); 97539509Smckusick continue; 97639509Smckusick } 97739509Smckusick /* 97857040Smckusick * If FORCECLOSE is set, forcibly close the vnode. 97939509Smckusick * For block or character devices, revert to an 98039509Smckusick * anonymous device. For all other files, just kill them. 98139509Smckusick */ 98241300Smckusick if (flags & FORCECLOSE) { 98369408Smckusick simple_unlock(&mntvnode_slock); 98439509Smckusick if (vp->v_type != VBLK && vp->v_type != VCHR) { 98569408Smckusick vgonel(vp, p); 98639509Smckusick } else { 98769408Smckusick vclean(vp, 0, p); 98853547Sheideman vp->v_op = spec_vnodeop_p; 98939509Smckusick insmntque(vp, (struct mount *)0); 99039509Smckusick } 99169408Smckusick simple_lock(&mntvnode_slock); 99239509Smckusick continue; 99339509Smckusick } 99465679Shibler #ifdef DIAGNOSTIC 99539509Smckusick if (busyprt) 99639667Smckusick vprint("vflush: busy vnode", vp); 99765679Shibler #endif 99869408Smckusick simple_unlock(&vp->v_interlock); 99939509Smckusick busy++; 100039509Smckusick } 100169408Smckusick simple_unlock(&mntvnode_slock); 100239509Smckusick if (busy) 100339509Smckusick return (EBUSY); 100439509Smckusick return (0); 100539509Smckusick } 100639509Smckusick 100739509Smckusick /* 100839433Smckusick * Disassociate the underlying file system from a vnode. 100969408Smckusick * The vnode interlock is held on entry. 101039433Smckusick */ 101169408Smckusick static void 101269408Smckusick vclean(vp, flags, p) 101369408Smckusick struct vnode *vp; 101445118Smckusick int flags; 101569408Smckusick struct proc *p; 101639433Smckusick { 101739484Smckusick int active; 101839433Smckusick 101939484Smckusick /* 102039484Smckusick * Check to see if the vnode is in use. 102139667Smckusick * If so we have to reference it before we clean it out 102239667Smckusick * so that its count cannot fall to zero and generate a 102339667Smckusick * race against ourselves to recycle it. 102439484Smckusick */ 102539809Smckusick if (active = vp->v_usecount) 102669408Smckusick vp->v_usecount++; 102739484Smckusick /* 102869408Smckusick * Prevent the vnode from being recycled or 102969408Smckusick * brought into use while we clean it out. 103069408Smckusick */ 103169408Smckusick if (vp->v_flag & VXLOCK) 103269408Smckusick panic("vclean: deadlock"); 103369408Smckusick vp->v_flag |= VXLOCK; 103469408Smckusick /* 103556805Smckusick * Even if the count is zero, the VOP_INACTIVE routine may still 103656805Smckusick * have the object locked while it cleans it out. The VOP_LOCK 103756805Smckusick * ensures that the VOP_INACTIVE routine is done with its work. 103856805Smckusick * For active vnodes, it ensures that no other activity can 103956805Smckusick * occur while the underlying object is being cleaned out. 104056805Smckusick */ 104169408Smckusick VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 104256805Smckusick /* 104356805Smckusick * Clean out any buffers associated with the vnode. 104439667Smckusick */ 104541300Smckusick if (flags & DOCLOSE) 104669559Spendry vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 104739667Smckusick /* 104856805Smckusick * If purging an active vnode, it must be closed and 104969408Smckusick * deactivated before being reclaimed. Note that the 105069408Smckusick * VOP_INACTIVE will unlock the vnode. 105139433Smckusick */ 105239484Smckusick if (active) { 105356805Smckusick if (flags & DOCLOSE) 105469408Smckusick VOP_CLOSE(vp, IO_NDELAY, NOCRED, p); 105569408Smckusick VOP_INACTIVE(vp, p); 105669408Smckusick } else { 105769408Smckusick /* 105869408Smckusick * Any other processes trying to obtain this lock must first 105969408Smckusick * wait for VXLOCK to clear, then call the new lock operation. 106069408Smckusick */ 106169408Smckusick VOP_UNLOCK(vp, 0, p); 106239433Smckusick } 106339433Smckusick /* 106439433Smckusick * Reclaim the vnode. 106539433Smckusick */ 106669408Smckusick if (VOP_RECLAIM(vp, p)) 106739433Smckusick panic("vclean: cannot reclaim"); 106839484Smckusick if (active) 106939484Smckusick vrele(vp); 107068784Smckusick cache_purge(vp); 1071*69605Smckusick if (vp->v_vnlock) { 1072*69605Smckusick if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1073*69605Smckusick vprint("vclean: lock not drained", vp); 1074*69605Smckusick FREE(vp->v_vnlock, M_VNODE); 1075*69605Smckusick vp->v_vnlock = NULL; 1076*69605Smckusick } 107753580Sheideman 107839433Smckusick /* 107956805Smckusick * Done with purge, notify sleepers of the grim news. 108039433Smckusick */ 108156805Smckusick vp->v_op = dead_vnodeop_p; 108256805Smckusick vp->v_tag = VT_NON; 108339433Smckusick vp->v_flag &= ~VXLOCK; 108439433Smckusick if (vp->v_flag & VXWANT) { 108539433Smckusick vp->v_flag &= ~VXWANT; 108639433Smckusick wakeup((caddr_t)vp); 108739433Smckusick } 108839433Smckusick } 108939433Smckusick 109039433Smckusick /* 109139633Smckusick * Eliminate all activity associated with the requested vnode 109239633Smckusick * and with all vnodes aliased to the requested vnode. 109339633Smckusick */ 109468424Smckusick int 109568424Smckusick vop_revoke(ap) 109668424Smckusick struct vop_revoke_args /* { 109768424Smckusick struct vnode *a_vp; 109868424Smckusick int a_flags; 109968424Smckusick } */ *ap; 110039633Smckusick { 110169408Smckusick struct vnode *vp, *vq; 110269408Smckusick struct proc *p = curproc; /* XXX */ 110339633Smckusick 110469408Smckusick #ifdef DIAGNOSTIC 110569408Smckusick if ((ap->a_flags & REVOKEALL) == 0) 110669408Smckusick panic("vop_revoke"); 110769408Smckusick #endif 110869408Smckusick 110968424Smckusick vp = ap->a_vp; 111069408Smckusick simple_lock(&vp->v_interlock); 111169408Smckusick 111269408Smckusick if (vp->v_flag & VALIASED) { 111340665Smckusick /* 111440665Smckusick * If a vgone (or vclean) is already in progress, 111540665Smckusick * wait until it is done and return. 111640665Smckusick */ 111740665Smckusick if (vp->v_flag & VXLOCK) { 111840665Smckusick vp->v_flag |= VXWANT; 111969408Smckusick simple_unlock(&vp->v_interlock); 112068424Smckusick tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 112168424Smckusick return (0); 112239633Smckusick } 112340665Smckusick /* 112440665Smckusick * Ensure that vp will not be vgone'd while we 112540665Smckusick * are eliminating its aliases. 112640665Smckusick */ 112740665Smckusick vp->v_flag |= VXLOCK; 112869408Smckusick simple_unlock(&vp->v_interlock); 112940665Smckusick while (vp->v_flag & VALIASED) { 113069408Smckusick simple_lock(&spechash_slock); 113140665Smckusick for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 113240665Smckusick if (vq->v_rdev != vp->v_rdev || 113340665Smckusick vq->v_type != vp->v_type || vp == vq) 113440665Smckusick continue; 113569408Smckusick simple_unlock(&spechash_slock); 113640665Smckusick vgone(vq); 113740665Smckusick break; 113840665Smckusick } 113969408Smckusick if (vq == NULLVP) 114069408Smckusick simple_unlock(&spechash_slock); 114140665Smckusick } 114240665Smckusick /* 114340665Smckusick * Remove the lock so that vgone below will 114440665Smckusick * really eliminate the vnode after which time 114540665Smckusick * vgone will awaken any sleepers. 114640665Smckusick */ 114769408Smckusick simple_lock(&vp->v_interlock); 114840665Smckusick vp->v_flag &= ~VXLOCK; 114939633Smckusick } 115069408Smckusick vgonel(vp, p); 115168424Smckusick return (0); 115239633Smckusick } 115339633Smckusick 115439633Smckusick /* 115569408Smckusick * Recycle an unused vnode to the front of the free list. 115669408Smckusick * Release the passed interlock if the vnode will be recycled. 115769408Smckusick */ 115869408Smckusick int 115969408Smckusick vrecycle(vp, inter_lkp, p) 116069408Smckusick struct vnode *vp; 116169408Smckusick struct simplelock *inter_lkp; 116269408Smckusick struct proc *p; 116369408Smckusick { 116469408Smckusick 116569408Smckusick simple_lock(&vp->v_interlock); 116669408Smckusick if (vp->v_usecount == 0) { 116769408Smckusick if (inter_lkp) 116869408Smckusick simple_unlock(inter_lkp); 116969408Smckusick vgonel(vp, p); 117069408Smckusick return (1); 117169408Smckusick } 117269408Smckusick simple_unlock(&vp->v_interlock); 117369408Smckusick return (0); 117469408Smckusick } 117569408Smckusick 117669408Smckusick /* 117739433Smckusick * Eliminate all activity associated with a vnode 117839433Smckusick * in preparation for reuse. 117939433Smckusick */ 118068319Scgd void 118168319Scgd vgone(vp) 118269408Smckusick struct vnode *vp; 118339433Smckusick { 118469408Smckusick struct proc *p = curproc; /* XXX */ 118569408Smckusick 118669408Smckusick simple_lock(&vp->v_interlock); 118769408Smckusick vgonel(vp, p); 118869408Smckusick } 118969408Smckusick 119069408Smckusick /* 119169408Smckusick * vgone, with the vp interlock held. 119269408Smckusick */ 119369408Smckusick void 119469408Smckusick vgonel(vp, p) 119569408Smckusick struct vnode *vp; 119669408Smckusick struct proc *p; 119769408Smckusick { 119869408Smckusick struct vnode *vq; 119939615Smckusick struct vnode *vx; 120039433Smckusick 120139433Smckusick /* 120240548Smckusick * If a vgone (or vclean) is already in progress, 120340548Smckusick * wait until it is done and return. 120440548Smckusick */ 120540548Smckusick if (vp->v_flag & VXLOCK) { 120640548Smckusick vp->v_flag |= VXWANT; 120769408Smckusick simple_unlock(&vp->v_interlock); 120868319Scgd tsleep((caddr_t)vp, PINOD, "vgone", 0); 120940548Smckusick return; 121040548Smckusick } 121140548Smckusick /* 121239433Smckusick * Clean out the filesystem specific data. 121339433Smckusick */ 121469408Smckusick vclean(vp, DOCLOSE, p); 121539433Smckusick /* 121639433Smckusick * Delete from old mount point vnode list, if on one. 121739433Smckusick */ 121869408Smckusick if (vp->v_mount != NULL) 121969408Smckusick insmntque(vp, (struct mount *)0); 122039433Smckusick /* 122168592Smckusick * If special device, remove it from special device alias list 122268592Smckusick * if it is on one. 122339433Smckusick */ 122468592Smckusick if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 122569408Smckusick simple_lock(&spechash_slock); 122639809Smckusick if (*vp->v_hashchain == vp) { 122739809Smckusick *vp->v_hashchain = vp->v_specnext; 122839433Smckusick } else { 122939809Smckusick for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 123039615Smckusick if (vq->v_specnext != vp) 123139433Smckusick continue; 123239615Smckusick vq->v_specnext = vp->v_specnext; 123339433Smckusick break; 123439433Smckusick } 123539615Smckusick if (vq == NULL) 123639433Smckusick panic("missing bdev"); 123739433Smckusick } 123839615Smckusick if (vp->v_flag & VALIASED) { 123952416Storek vx = NULL; 124039809Smckusick for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 124140108Smckusick if (vq->v_rdev != vp->v_rdev || 124240108Smckusick vq->v_type != vp->v_type) 124339615Smckusick continue; 124452416Storek if (vx) 124552416Storek break; 124639615Smckusick vx = vq; 124739615Smckusick } 124852416Storek if (vx == NULL) 124939615Smckusick panic("missing alias"); 125052416Storek if (vq == NULL) 125139615Smckusick vx->v_flag &= ~VALIASED; 125239615Smckusick vp->v_flag &= ~VALIASED; 125339615Smckusick } 125469408Smckusick simple_unlock(&spechash_slock); 125539615Smckusick FREE(vp->v_specinfo, M_VNODE); 125639615Smckusick vp->v_specinfo = NULL; 125739433Smckusick } 125839433Smckusick /* 125956932Smckusick * If it is on the freelist and not already at the head, 126065505Smckusick * move it to the head of the list. The test of the back 126165505Smckusick * pointer and the reference count of zero is because 126265505Smckusick * it will be removed from the free list by getnewvnode, 126365505Smckusick * but will not have its reference count incremented until 126465505Smckusick * after calling vgone. If the reference count were 126565505Smckusick * incremented first, vgone would (incorrectly) try to 126665505Smckusick * close the previous instance of the underlying object. 126765505Smckusick * So, the back pointer is explicitly set to `0xdeadb' in 126865505Smckusick * getnewvnode after removing it from the freelist to ensure 126965505Smckusick * that we do not try to move it here. 127039433Smckusick */ 127169408Smckusick if (vp->v_usecount == 0) { 127269408Smckusick simple_lock(&vnode_free_list_slock); 127369408Smckusick if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 127469408Smckusick vnode_free_list.tqh_first != vp) { 127569408Smckusick TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 127669408Smckusick TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 127769408Smckusick } 127869408Smckusick simple_unlock(&vnode_free_list_slock); 127939433Smckusick } 128039484Smckusick vp->v_type = VBAD; 128139433Smckusick } 128239633Smckusick 128339633Smckusick /* 128439821Smckusick * Lookup a vnode by device number. 128539821Smckusick */ 128668319Scgd int 128739821Smckusick vfinddev(dev, type, vpp) 128839821Smckusick dev_t dev; 128939821Smckusick enum vtype type; 129039821Smckusick struct vnode **vpp; 129139821Smckusick { 129269408Smckusick struct vnode *vp; 129369408Smckusick int rc = 0; 129439821Smckusick 129569408Smckusick simple_lock(&spechash_slock); 129639821Smckusick for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 129739821Smckusick if (dev != vp->v_rdev || type != vp->v_type) 129839821Smckusick continue; 129939821Smckusick *vpp = vp; 130069408Smckusick rc = 1; 130169408Smckusick break; 130239821Smckusick } 130369408Smckusick simple_unlock(&spechash_slock); 130469408Smckusick return (rc); 130539821Smckusick } 130639821Smckusick 130739821Smckusick /* 130839633Smckusick * Calculate the total number of references to a special device. 130939633Smckusick */ 131068319Scgd int 131139633Smckusick vcount(vp) 131269408Smckusick struct vnode *vp; 131339633Smckusick { 131469408Smckusick struct vnode *vq, *vnext; 131539633Smckusick int count; 131639633Smckusick 131766742Smckusick loop: 131839633Smckusick if ((vp->v_flag & VALIASED) == 0) 131939809Smckusick return (vp->v_usecount); 132069408Smckusick simple_lock(&spechash_slock); 132166742Smckusick for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 132266742Smckusick vnext = vq->v_specnext; 132340108Smckusick if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 132439633Smckusick continue; 132539633Smckusick /* 132639633Smckusick * Alias, but not in use, so flush it out. 132739633Smckusick */ 132866742Smckusick if (vq->v_usecount == 0 && vq != vp) { 132969408Smckusick simple_unlock(&spechash_slock); 133039633Smckusick vgone(vq); 133139633Smckusick goto loop; 133239633Smckusick } 133339809Smckusick count += vq->v_usecount; 133439633Smckusick } 133569408Smckusick simple_unlock(&spechash_slock); 133639633Smckusick return (count); 133739633Smckusick } 133839667Smckusick 133939667Smckusick /* 134039667Smckusick * Print out a description of a vnode. 134139667Smckusick */ 134239667Smckusick static char *typename[] = 134340286Smckusick { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 134439667Smckusick 134568171Scgd void 134639667Smckusick vprint(label, vp) 134739667Smckusick char *label; 134839667Smckusick register struct vnode *vp; 134939667Smckusick { 135039913Smckusick char buf[64]; 135139667Smckusick 135239667Smckusick if (label != NULL) 135339667Smckusick printf("%s: ", label); 135450109Smckusick printf("type %s, usecount %d, writecount %d, refcount %d,", 135550109Smckusick typename[vp->v_type], vp->v_usecount, vp->v_writecount, 135650109Smckusick vp->v_holdcnt); 135739913Smckusick buf[0] = '\0'; 135839913Smckusick if (vp->v_flag & VROOT) 135939913Smckusick strcat(buf, "|VROOT"); 136039913Smckusick if (vp->v_flag & VTEXT) 136139913Smckusick strcat(buf, "|VTEXT"); 136241300Smckusick if (vp->v_flag & VSYSTEM) 136341300Smckusick strcat(buf, "|VSYSTEM"); 136441300Smckusick if (vp->v_flag & VXLOCK) 136541300Smckusick strcat(buf, "|VXLOCK"); 136641300Smckusick if (vp->v_flag & VXWANT) 136741300Smckusick strcat(buf, "|VXWANT"); 136841300Smckusick if (vp->v_flag & VBWAIT) 136941300Smckusick strcat(buf, "|VBWAIT"); 137039913Smckusick if (vp->v_flag & VALIASED) 137139913Smckusick strcat(buf, "|VALIASED"); 137239913Smckusick if (buf[0] != '\0') 137339913Smckusick printf(" flags (%s)", &buf[1]); 137465260Smckusick if (vp->v_data == NULL) { 137565260Smckusick printf("\n"); 137665260Smckusick } else { 137765260Smckusick printf("\n\t"); 137865260Smckusick VOP_PRINT(vp); 137965260Smckusick } 138039667Smckusick } 138141110Smarc 138249691Smckusick #ifdef DEBUG 138349691Smckusick /* 138449691Smckusick * List all of the locked vnodes in the system. 138549691Smckusick * Called when debugging the kernel. 138649691Smckusick */ 138768319Scgd void 138849691Smckusick printlockedvnodes() 138949691Smckusick { 139069578Smckusick struct proc *p = curproc; /* XXX */ 139169578Smckusick struct mount *mp, *nmp; 139269578Smckusick struct vnode *vp; 139349691Smckusick 139449691Smckusick printf("Locked vnodes\n"); 139569578Smckusick simple_lock(&mountlist_slock); 139669578Smckusick for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 139769578Smckusick if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 139869578Smckusick nmp = mp->mnt_list.cqe_next; 139969578Smckusick continue; 140069578Smckusick } 140165260Smckusick for (vp = mp->mnt_vnodelist.lh_first; 140265260Smckusick vp != NULL; 140369325Smckusick vp = vp->v_mntvnodes.le_next) { 140449691Smckusick if (VOP_ISLOCKED(vp)) 140549691Smckusick vprint((char *)0, vp); 140669325Smckusick } 140769578Smckusick simple_lock(&mountlist_slock); 140869578Smckusick nmp = mp->mnt_list.cqe_next; 140969578Smckusick vfs_unbusy(mp, p); 141065260Smckusick } 141169578Smckusick simple_unlock(&mountlist_slock); 141249691Smckusick } 141349691Smckusick #endif 141449691Smckusick 141568659Smckusick /* 141668659Smckusick * Top level filesystem related information gathering. 141768659Smckusick */ 141868659Smckusick int 141968659Smckusick vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 142068659Smckusick int *name; 142168659Smckusick u_int namelen; 142268659Smckusick void *oldp; 142368659Smckusick size_t *oldlenp; 142468659Smckusick void *newp; 142568659Smckusick size_t newlen; 142668659Smckusick struct proc *p; 142768659Smckusick { 142868659Smckusick struct ctldebug *cdp; 142968659Smckusick struct vfsconf *vfsp; 143068659Smckusick 143168659Smckusick /* all sysctl names at this level are at least name and field */ 143268659Smckusick if (namelen < 2) 143368659Smckusick return (ENOTDIR); /* overloaded */ 143468659Smckusick if (name[0] != VFS_GENERIC) { 143568659Smckusick for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 143668659Smckusick if (vfsp->vfc_typenum == name[0]) 143768659Smckusick break; 143868659Smckusick if (vfsp == NULL) 143968659Smckusick return (EOPNOTSUPP); 144068659Smckusick return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 144168659Smckusick oldp, oldlenp, newp, newlen, p)); 144268659Smckusick } 144368659Smckusick switch (name[1]) { 144468659Smckusick case VFS_MAXTYPENUM: 144568659Smckusick return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf)); 144668659Smckusick case VFS_CONF: 144768659Smckusick if (namelen < 3) 144868659Smckusick return (ENOTDIR); /* overloaded */ 144968659Smckusick for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 145068659Smckusick if (vfsp->vfc_typenum == name[2]) 145168659Smckusick break; 145268659Smckusick if (vfsp == NULL) 145368659Smckusick return (EOPNOTSUPP); 145468659Smckusick return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp, 145568659Smckusick sizeof(struct vfsconf))); 145668659Smckusick } 145768659Smckusick return (EOPNOTSUPP); 145868659Smckusick } 145968659Smckusick 146041110Smarc int kinfo_vdebug = 1; 146141110Smarc int kinfo_vgetfailed; 146241110Smarc #define KINFO_VNODESLOP 10 146341110Smarc /* 146457841Smckusick * Dump vnode list (via sysctl). 146541110Smarc * Copyout address of vnode followed by vnode. 146641110Smarc */ 146745118Smckusick /* ARGSUSED */ 146868319Scgd int 146969578Smckusick sysctl_vnode(where, sizep, p) 147041110Smarc char *where; 147158465Sbostic size_t *sizep; 147269578Smckusick struct proc *p; 147341110Smarc { 147469578Smckusick struct mount *mp, *nmp; 147569408Smckusick struct vnode *nvp, *vp; 147669578Smckusick char *bp = where, *savebp; 147753818Smckusick char *ewhere; 147841110Smarc int error; 147941110Smarc 148041110Smarc #define VPTRSZ sizeof (struct vnode *) 148141110Smarc #define VNODESZ sizeof (struct vnode) 148241110Smarc if (where == NULL) { 148357841Smckusick *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 148441110Smarc return (0); 148541110Smarc } 148657841Smckusick ewhere = where + *sizep; 148741110Smarc 148869578Smckusick simple_lock(&mountlist_slock); 148969325Smckusick for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 149069578Smckusick if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 149169578Smckusick nmp = mp->mnt_list.cqe_next; 149241300Smckusick continue; 149369578Smckusick } 149441110Smarc savebp = bp; 149541110Smarc again: 149669408Smckusick simple_lock(&mntvnode_slock); 149765260Smckusick for (vp = mp->mnt_vnodelist.lh_first; 149865260Smckusick vp != NULL; 149969408Smckusick vp = nvp) { 150041422Smckusick /* 150141422Smckusick * Check that the vp is still associated with 150241422Smckusick * this filesystem. RACE: could have been 150341422Smckusick * recycled onto the same filesystem. 150441422Smckusick */ 150541421Smckusick if (vp->v_mount != mp) { 150669408Smckusick simple_unlock(&mntvnode_slock); 150741421Smckusick if (kinfo_vdebug) 150841421Smckusick printf("kinfo: vp changed\n"); 150941421Smckusick bp = savebp; 151041421Smckusick goto again; 151141421Smckusick } 151269408Smckusick nvp = vp->v_mntvnodes.le_next; 151357841Smckusick if (bp + VPTRSZ + VNODESZ > ewhere) { 151469408Smckusick simple_unlock(&mntvnode_slock); 151557841Smckusick *sizep = bp - where; 151657841Smckusick return (ENOMEM); 151757841Smckusick } 151869408Smckusick simple_unlock(&mntvnode_slock); 151957841Smckusick if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 152057841Smckusick (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 152141110Smarc return (error); 152241110Smarc bp += VPTRSZ + VNODESZ; 152369408Smckusick simple_lock(&mntvnode_slock); 152441110Smarc } 152569408Smckusick simple_unlock(&mntvnode_slock); 152669578Smckusick simple_lock(&mountlist_slock); 152769578Smckusick nmp = mp->mnt_list.cqe_next; 152869578Smckusick vfs_unbusy(mp, p); 152965260Smckusick } 153069578Smckusick simple_unlock(&mountlist_slock); 153141110Smarc 153257841Smckusick *sizep = bp - where; 153341110Smarc return (0); 153441110Smarc } 153565679Shibler 153665679Shibler /* 153765679Shibler * Check to see if a filesystem is mounted on a block device. 153865679Shibler */ 153965679Shibler int 154065679Shibler vfs_mountedon(vp) 154169408Smckusick struct vnode *vp; 154265679Shibler { 154369408Smckusick struct vnode *vq; 154469408Smckusick int error = 0; 154565679Shibler 154665679Shibler if (vp->v_specflags & SI_MOUNTEDON) 154765679Shibler return (EBUSY); 154865679Shibler if (vp->v_flag & VALIASED) { 154969408Smckusick simple_lock(&spechash_slock); 155065679Shibler for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 155165679Shibler if (vq->v_rdev != vp->v_rdev || 155265679Shibler vq->v_type != vp->v_type) 155365679Shibler continue; 155469408Smckusick if (vq->v_specflags & SI_MOUNTEDON) { 155569408Smckusick error = EBUSY; 155669408Smckusick break; 155769408Smckusick } 155865679Shibler } 155969408Smckusick simple_unlock(&spechash_slock); 156065679Shibler } 156169408Smckusick return (error); 156265679Shibler } 156365679Shibler 156465679Shibler /* 156569325Smckusick * Unmount all filesystems. The list is traversed in reverse order 156669325Smckusick * of mounting to avoid dependencies. 156769325Smckusick */ 156869325Smckusick void 156969325Smckusick vfs_unmountall() 157069325Smckusick { 157169325Smckusick struct mount *mp, *nmp; 157269578Smckusick struct proc *p = curproc; /* XXX */ 157369325Smckusick 157469578Smckusick /* 157569578Smckusick * Since this only runs when rebooting, it is not interlocked. 157669578Smckusick */ 157769325Smckusick for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 157869325Smckusick nmp = mp->mnt_list.cqe_prev; 157969578Smckusick (void) dounmount(mp, MNT_FORCE, p); 158069325Smckusick } 158169325Smckusick } 158269325Smckusick 158369325Smckusick /* 158465679Shibler * Build hash lists of net addresses and hang them off the mount point. 158565679Shibler * Called by ufs_mount() to set up the lists of export addresses. 158665679Shibler */ 158765679Shibler static int 158865679Shibler vfs_hang_addrlist(mp, nep, argp) 158965679Shibler struct mount *mp; 159065679Shibler struct netexport *nep; 159165679Shibler struct export_args *argp; 159265679Shibler { 159365679Shibler register struct netcred *np; 159465679Shibler register struct radix_node_head *rnh; 159565679Shibler register int i; 159665679Shibler struct radix_node *rn; 159765679Shibler struct sockaddr *saddr, *smask = 0; 159865679Shibler struct domain *dom; 159965679Shibler int error; 160065679Shibler 160165679Shibler if (argp->ex_addrlen == 0) { 160265679Shibler if (mp->mnt_flag & MNT_DEFEXPORTED) 160365679Shibler return (EPERM); 160465679Shibler np = &nep->ne_defexported; 160565679Shibler np->netc_exflags = argp->ex_flags; 160665679Shibler np->netc_anon = argp->ex_anon; 160765679Shibler np->netc_anon.cr_ref = 1; 160865679Shibler mp->mnt_flag |= MNT_DEFEXPORTED; 160965679Shibler return (0); 161065679Shibler } 161165679Shibler i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 161265679Shibler np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 161365679Shibler bzero((caddr_t)np, i); 161465679Shibler saddr = (struct sockaddr *)(np + 1); 161565679Shibler if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen)) 161665679Shibler goto out; 161765679Shibler if (saddr->sa_len > argp->ex_addrlen) 161865679Shibler saddr->sa_len = argp->ex_addrlen; 161965679Shibler if (argp->ex_masklen) { 162065679Shibler smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 162165679Shibler error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); 162265679Shibler if (error) 162365679Shibler goto out; 162465679Shibler if (smask->sa_len > argp->ex_masklen) 162565679Shibler smask->sa_len = argp->ex_masklen; 162665679Shibler } 162765679Shibler i = saddr->sa_family; 162865679Shibler if ((rnh = nep->ne_rtable[i]) == 0) { 162965679Shibler /* 163065679Shibler * Seems silly to initialize every AF when most are not 163165679Shibler * used, do so on demand here 163265679Shibler */ 163365679Shibler for (dom = domains; dom; dom = dom->dom_next) 163465679Shibler if (dom->dom_family == i && dom->dom_rtattach) { 163565679Shibler dom->dom_rtattach((void **)&nep->ne_rtable[i], 163665679Shibler dom->dom_rtoffset); 163765679Shibler break; 163865679Shibler } 163965679Shibler if ((rnh = nep->ne_rtable[i]) == 0) { 164065679Shibler error = ENOBUFS; 164165679Shibler goto out; 164265679Shibler } 164365679Shibler } 164465679Shibler rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 164565679Shibler np->netc_rnodes); 164669140Smckusick if (rn == 0) { 164769140Smckusick /* 164869140Smckusick * One of the reasons that rnh_addaddr may fail is that 164969140Smckusick * the entry already exists. To check for this case, we 165069140Smckusick * look up the entry to see if it is there. If so, we 165169140Smckusick * do not need to make a new entry but do return success. 165269140Smckusick */ 165369140Smckusick free(np, M_NETADDR); 165469140Smckusick rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh); 165569140Smckusick if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 && 165669140Smckusick ((struct netcred *)rn)->netc_exflags == argp->ex_flags && 165769140Smckusick !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon, 165869140Smckusick (caddr_t)&argp->ex_anon, sizeof(struct ucred))) 165969140Smckusick return (0); 166069140Smckusick return (EPERM); 166165679Shibler } 166265679Shibler np->netc_exflags = argp->ex_flags; 166365679Shibler np->netc_anon = argp->ex_anon; 166465679Shibler np->netc_anon.cr_ref = 1; 166565679Shibler return (0); 166665679Shibler out: 166765679Shibler free(np, M_NETADDR); 166865679Shibler return (error); 166965679Shibler } 167065679Shibler 167165679Shibler /* ARGSUSED */ 167265679Shibler static int 167365679Shibler vfs_free_netcred(rn, w) 167465679Shibler struct radix_node *rn; 167565679Shibler caddr_t w; 167665679Shibler { 167765679Shibler register struct radix_node_head *rnh = (struct radix_node_head *)w; 167865679Shibler 167965679Shibler (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 168065679Shibler free((caddr_t)rn, M_NETADDR); 168165679Shibler return (0); 168265679Shibler } 168368319Scgd 168465679Shibler /* 168565679Shibler * Free the net address hash lists that are hanging off the mount points. 168665679Shibler */ 168765679Shibler static void 168865679Shibler vfs_free_addrlist(nep) 168965679Shibler struct netexport *nep; 169065679Shibler { 169165679Shibler register int i; 169265679Shibler register struct radix_node_head *rnh; 169365679Shibler 169465679Shibler for (i = 0; i <= AF_MAX; i++) 169565679Shibler if (rnh = nep->ne_rtable[i]) { 169665679Shibler (*rnh->rnh_walktree)(rnh, vfs_free_netcred, 169765679Shibler (caddr_t)rnh); 169865679Shibler free((caddr_t)rnh, M_RTABLE); 169965679Shibler nep->ne_rtable[i] = 0; 170065679Shibler } 170165679Shibler } 170265679Shibler 170365679Shibler int 170465679Shibler vfs_export(mp, nep, argp) 170565679Shibler struct mount *mp; 170665679Shibler struct netexport *nep; 170765679Shibler struct export_args *argp; 170865679Shibler { 170965679Shibler int error; 171065679Shibler 171165679Shibler if (argp->ex_flags & MNT_DELEXPORT) { 171265679Shibler vfs_free_addrlist(nep); 171365679Shibler mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 171465679Shibler } 171565679Shibler if (argp->ex_flags & MNT_EXPORTED) { 171665679Shibler if (error = vfs_hang_addrlist(mp, nep, argp)) 171765679Shibler return (error); 171865679Shibler mp->mnt_flag |= MNT_EXPORTED; 171965679Shibler } 172065679Shibler return (0); 172165679Shibler } 172265679Shibler 172365679Shibler struct netcred * 172465679Shibler vfs_export_lookup(mp, nep, nam) 172565679Shibler register struct mount *mp; 172665679Shibler struct netexport *nep; 172765679Shibler struct mbuf *nam; 172865679Shibler { 172965679Shibler register struct netcred *np; 173065679Shibler register struct radix_node_head *rnh; 173165679Shibler struct sockaddr *saddr; 173265679Shibler 173365679Shibler np = NULL; 173465679Shibler if (mp->mnt_flag & MNT_EXPORTED) { 173565679Shibler /* 173665679Shibler * Lookup in the export list first. 173765679Shibler */ 173865679Shibler if (nam != NULL) { 173965679Shibler saddr = mtod(nam, struct sockaddr *); 174065679Shibler rnh = nep->ne_rtable[saddr->sa_family]; 174165679Shibler if (rnh != NULL) { 174265679Shibler np = (struct netcred *) 174365679Shibler (*rnh->rnh_matchaddr)((caddr_t)saddr, 174465679Shibler rnh); 174565679Shibler if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 174665679Shibler np = NULL; 174765679Shibler } 174865679Shibler } 174965679Shibler /* 175065679Shibler * If no address match, use the default if it exists. 175165679Shibler */ 175265679Shibler if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 175365679Shibler np = &nep->ne_defexported; 175465679Shibler } 175565679Shibler return (np); 175665679Shibler } 1757