137488Smckusick /* 263180Sbostic * Copyright (c) 1989, 1993 363180Sbostic * The Regents of the University of California. All rights reserved. 465771Sbostic * (c) UNIX System Laboratories, Inc. 565771Sbostic * All or some portions of this file are derived from material licensed 665771Sbostic * to the University of California by American Telephone and Telegraph 765771Sbostic * Co. or Unix System Laboratories, Inc. and are reproduced herein with 865771Sbostic * the permission of UNIX System Laboratories, Inc. 937488Smckusick * 1044458Sbostic * %sccs.include.redist.c% 1137488Smckusick * 12*69325Smckusick * @(#)vfs_subr.c 8.21 (Berkeley) 05/09/95 1337488Smckusick */ 1437488Smckusick 1537488Smckusick /* 1637488Smckusick * External virtual filesystem routines 1737488Smckusick */ 1837488Smckusick 1951460Sbostic #include <sys/param.h> 2053829Spendry #include <sys/systm.h> 2151460Sbostic #include <sys/proc.h> 2251460Sbostic #include <sys/mount.h> 2351460Sbostic #include <sys/time.h> 2451460Sbostic #include <sys/vnode.h> 2552415Smckusick #include <sys/stat.h> 2651460Sbostic #include <sys/namei.h> 2751460Sbostic #include <sys/ucred.h> 2851460Sbostic #include <sys/buf.h> 2951460Sbostic #include <sys/errno.h> 3051460Sbostic #include <sys/malloc.h> 3165679Shibler #include <sys/domain.h> 3265679Shibler #include <sys/mbuf.h> 3337488Smckusick 3460930Smckusick #include <vm/vm.h> 3560930Smckusick #include <sys/sysctl.h> 3660930Smckusick 3755050Spendry #include <miscfs/specfs/specdev.h> 3855050Spendry 3952415Smckusick enum vtype iftovt_tab[16] = { 4052415Smckusick VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 4152415Smckusick VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 4252415Smckusick }; 4352415Smckusick int vttoif_tab[9] = { 4452415Smckusick 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 4552415Smckusick S_IFSOCK, S_IFIFO, S_IFMT, 4652415Smckusick }; 4752415Smckusick 4837488Smckusick /* 4956608Smckusick * Insq/Remq for the vnode usage lists. 5056608Smckusick */ 5165260Smckusick #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 5268319Scgd #define bufremvn(bp) { \ 5368319Scgd LIST_REMOVE(bp, b_vnbufs); \ 5468319Scgd (bp)->b_vnbufs.le_next = NOLIST; \ 5565260Smckusick } 5665260Smckusick TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 5765260Smckusick struct mntlist mountlist; /* mounted filesystem list */ 5865260Smckusick 5956608Smckusick /* 6065260Smckusick * Initialize the vnode management data structures. 6137488Smckusick */ 6268319Scgd void 6365260Smckusick vntblinit() 6437488Smckusick { 6537488Smckusick 6665260Smckusick TAILQ_INIT(&vnode_free_list); 67*69325Smckusick CIRCLEQ_INIT(&mountlist); 6837488Smckusick } 6937488Smckusick 7037488Smckusick /* 7137488Smckusick * Lock a filesystem. 7237488Smckusick * Used to prevent access to it while mounting and unmounting. 7337488Smckusick */ 7468319Scgd int 7537488Smckusick vfs_lock(mp) 7637488Smckusick register struct mount *mp; 7737488Smckusick { 7837488Smckusick 7968319Scgd while (mp->mnt_flag & MNT_MLOCK) { 8041400Smckusick mp->mnt_flag |= MNT_MWAIT; 8168319Scgd tsleep((caddr_t)mp, PVFS, "vfslock", 0); 8239045Smckusick } 8341400Smckusick mp->mnt_flag |= MNT_MLOCK; 8437488Smckusick return (0); 8537488Smckusick } 8637488Smckusick 8737488Smckusick /* 8837488Smckusick * Unlock a locked filesystem. 8937488Smckusick * Panic if filesystem is not locked. 9037488Smckusick */ 9137488Smckusick void 9237488Smckusick vfs_unlock(mp) 9337488Smckusick register struct mount *mp; 9437488Smckusick { 9537488Smckusick 9641400Smckusick if ((mp->mnt_flag & MNT_MLOCK) == 0) 9741300Smckusick panic("vfs_unlock: not locked"); 9841400Smckusick mp->mnt_flag &= ~MNT_MLOCK; 9941400Smckusick if (mp->mnt_flag & MNT_MWAIT) { 10041400Smckusick mp->mnt_flag &= ~MNT_MWAIT; 10137488Smckusick wakeup((caddr_t)mp); 10237488Smckusick } 10337488Smckusick } 10437488Smckusick 10537488Smckusick /* 10641300Smckusick * Mark a mount point as busy. 10741300Smckusick * Used to synchronize access and to delay unmounting. 10841300Smckusick */ 10968319Scgd int 11041300Smckusick vfs_busy(mp) 11141300Smckusick register struct mount *mp; 11241300Smckusick { 11341300Smckusick 11468319Scgd while (mp->mnt_flag & MNT_MPBUSY) { 11541400Smckusick mp->mnt_flag |= MNT_MPWANT; 11668319Scgd tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0); 11741300Smckusick } 11841419Smckusick if (mp->mnt_flag & MNT_UNMOUNT) 11941419Smckusick return (1); 12041400Smckusick mp->mnt_flag |= MNT_MPBUSY; 12141300Smckusick return (0); 12241300Smckusick } 12341300Smckusick 12441300Smckusick /* 12541300Smckusick * Free a busy filesystem. 12641300Smckusick * Panic if filesystem is not busy. 12741300Smckusick */ 12868319Scgd void 12941300Smckusick vfs_unbusy(mp) 13041300Smckusick register struct mount *mp; 13141300Smckusick { 13241300Smckusick 13341400Smckusick if ((mp->mnt_flag & MNT_MPBUSY) == 0) 13441300Smckusick panic("vfs_unbusy: not busy"); 13541400Smckusick mp->mnt_flag &= ~MNT_MPBUSY; 13641400Smckusick if (mp->mnt_flag & MNT_MPWANT) { 13741400Smckusick mp->mnt_flag &= ~MNT_MPWANT; 13841400Smckusick wakeup((caddr_t)&mp->mnt_flag); 13941300Smckusick } 14041300Smckusick } 14141300Smckusick 14241300Smckusick /* 14337488Smckusick * Lookup a mount point by filesystem identifier. 14437488Smckusick */ 14537488Smckusick struct mount * 14668659Smckusick vfs_getvfs(fsid) 14737488Smckusick fsid_t *fsid; 14837488Smckusick { 14937488Smckusick register struct mount *mp; 15037488Smckusick 151*69325Smckusick for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 152*69325Smckusick mp = mp->mnt_list.cqe_next) { 15341400Smckusick if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 15465260Smckusick mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) 15538288Smckusick return (mp); 15665260Smckusick } 15738288Smckusick return ((struct mount *)0); 15837488Smckusick } 15937488Smckusick 16037488Smckusick /* 16153829Spendry * Get a new unique fsid 16253829Spendry */ 16353829Spendry void 16468659Smckusick vfs_getnewfsid(mp) 16553829Spendry struct mount *mp; 16653829Spendry { 16753829Spendry static u_short xxxfs_mntid; 16853829Spendry 16953829Spendry fsid_t tfsid; 17068659Smckusick int mtype; 17153829Spendry 17268659Smckusick mtype = mp->mnt_vfc->vfc_typenum; 17365507Spendry mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 17453829Spendry mp->mnt_stat.f_fsid.val[1] = mtype; 17553829Spendry if (xxxfs_mntid == 0) 17653829Spendry ++xxxfs_mntid; 17765507Spendry tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 17853829Spendry tfsid.val[1] = mtype; 179*69325Smckusick if (mountlist.cqh_first != (void *)&mountlist) { 18068659Smckusick while (vfs_getvfs(&tfsid)) { 18153936Spendry tfsid.val[0]++; 18253936Spendry xxxfs_mntid++; 18353936Spendry } 18453829Spendry } 18553829Spendry mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 18653829Spendry } 18753829Spendry 18853829Spendry /* 18937488Smckusick * Set vnode attributes to VNOVAL 19037488Smckusick */ 19168319Scgd void 19268319Scgd vattr_null(vap) 19337488Smckusick register struct vattr *vap; 19437488Smckusick { 19537488Smckusick 19637488Smckusick vap->va_type = VNON; 19752005Smckusick vap->va_size = vap->va_bytes = VNOVAL; 19837488Smckusick vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 19952005Smckusick vap->va_fsid = vap->va_fileid = 20052005Smckusick vap->va_blocksize = vap->va_rdev = 20154347Smckusick vap->va_atime.ts_sec = vap->va_atime.ts_nsec = 20254347Smckusick vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = 20354347Smckusick vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = 20438258Smckusick vap->va_flags = vap->va_gen = VNOVAL; 20558548Sbostic vap->va_vaflags = 0; 20637488Smckusick } 20738265Smckusick 20838265Smckusick /* 20939397Smckusick * Routines having to do with the management of the vnode table. 21039397Smckusick */ 21153547Sheideman extern int (**dead_vnodeop_p)(); 21239635Smckusick extern void vclean(); 21340883Smckusick long numvnodes; 21453493Sheideman extern struct vattr va_null; 21539397Smckusick 21639397Smckusick /* 21739397Smckusick * Return the next vnode from the free list. 21839397Smckusick */ 21968319Scgd int 22039397Smckusick getnewvnode(tag, mp, vops, vpp) 22139397Smckusick enum vtagtype tag; 22239397Smckusick struct mount *mp; 22353495Sheideman int (**vops)(); 22439397Smckusick struct vnode **vpp; 22539397Smckusick { 22665511Smckusick register struct vnode *vp; 22757042Smargo int s; 22839397Smckusick 22965260Smckusick if ((vnode_free_list.tqh_first == NULL && 23065260Smckusick numvnodes < 2 * desiredvnodes) || 23154347Smckusick numvnodes < desiredvnodes) { 23245118Smckusick vp = (struct vnode *)malloc((u_long)sizeof *vp, 23345118Smckusick M_VNODE, M_WAITOK); 23440883Smckusick bzero((char *)vp, sizeof *vp); 23540883Smckusick numvnodes++; 23640883Smckusick } else { 23765260Smckusick if ((vp = vnode_free_list.tqh_first) == NULL) { 23840883Smckusick tablefull("vnode"); 23940883Smckusick *vpp = 0; 24040883Smckusick return (ENFILE); 24140883Smckusick } 24240883Smckusick if (vp->v_usecount) 24340883Smckusick panic("free vnode isn't"); 24465260Smckusick TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 24565505Smckusick /* see comment on why 0xdeadb is set at end of vgone (below) */ 24665505Smckusick vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 24752190Smckusick vp->v_lease = NULL; 24840883Smckusick if (vp->v_type != VBAD) 24968424Smckusick VOP_REVOKE(vp, 0); 25057042Smargo #ifdef DIAGNOSTIC 25152006Smckusick if (vp->v_data) 25252006Smckusick panic("cleaned vnode isn't"); 25357042Smargo s = splbio(); 25457042Smargo if (vp->v_numoutput) 25557042Smargo panic("Clean vnode has pending I/O's"); 25657042Smargo splx(s); 25757042Smargo #endif 25840883Smckusick vp->v_flag = 0; 25940883Smckusick vp->v_lastr = 0; 26065745Shibler vp->v_ralen = 0; 26165745Shibler vp->v_maxra = 0; 26257042Smargo vp->v_lastw = 0; 26357042Smargo vp->v_lasta = 0; 26457042Smargo vp->v_cstart = 0; 26557042Smargo vp->v_clen = 0; 26640883Smckusick vp->v_socket = 0; 26739397Smckusick } 26839512Smckusick vp->v_type = VNON; 26939397Smckusick cache_purge(vp); 27039397Smckusick vp->v_tag = tag; 27139433Smckusick vp->v_op = vops; 27239397Smckusick insmntque(vp, mp); 27339397Smckusick *vpp = vp; 27465505Smckusick vp->v_usecount = 1; 27565260Smckusick vp->v_data = 0; 27639397Smckusick return (0); 27739397Smckusick } 27865679Shibler 27939397Smckusick /* 28039397Smckusick * Move a vnode from one mount queue to another. 28139397Smckusick */ 28268319Scgd void 28339397Smckusick insmntque(vp, mp) 28439397Smckusick register struct vnode *vp; 28539397Smckusick register struct mount *mp; 28639397Smckusick { 28739397Smckusick 28839397Smckusick /* 28939397Smckusick * Delete from old mount point vnode list, if on one. 29039397Smckusick */ 29165679Shibler if (vp->v_mount != NULL) 29265260Smckusick LIST_REMOVE(vp, v_mntvnodes); 29339397Smckusick /* 29439397Smckusick * Insert into list of vnodes for the new mount point, if available. 29539397Smckusick */ 29665260Smckusick if ((vp->v_mount = mp) == NULL) 29739397Smckusick return; 29865260Smckusick LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 29939397Smckusick } 30039397Smckusick 30139397Smckusick /* 30249232Smckusick * Update outstanding I/O count and do wakeup if requested. 30349232Smckusick */ 30468319Scgd void 30549232Smckusick vwakeup(bp) 30649232Smckusick register struct buf *bp; 30749232Smckusick { 30849232Smckusick register struct vnode *vp; 30949232Smckusick 31057810Smckusick bp->b_flags &= ~B_WRITEINPROG; 31149232Smckusick if (vp = bp->b_vp) { 31268319Scgd if (--vp->v_numoutput < 0) 31357042Smargo panic("vwakeup: neg numoutput"); 31449232Smckusick if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 31549232Smckusick if (vp->v_numoutput < 0) 31668319Scgd panic("vwakeup: neg numoutput 2"); 31749232Smckusick vp->v_flag &= ~VBWAIT; 31849232Smckusick wakeup((caddr_t)&vp->v_numoutput); 31949232Smckusick } 32049232Smckusick } 32149232Smckusick } 32249232Smckusick 32349232Smckusick /* 32449232Smckusick * Flush out and invalidate all buffers associated with a vnode. 32549232Smckusick * Called with the underlying object locked. 32649232Smckusick */ 32754442Smckusick int 32857792Smckusick vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 32949232Smckusick register struct vnode *vp; 33056459Smargo int flags; 33154442Smckusick struct ucred *cred; 33254442Smckusick struct proc *p; 33357792Smckusick int slpflag, slptimeo; 33449232Smckusick { 33549232Smckusick register struct buf *bp; 33649232Smckusick struct buf *nbp, *blist; 33754442Smckusick int s, error; 33849232Smckusick 33956459Smargo if (flags & V_SAVE) { 34054442Smckusick if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) 34154442Smckusick return (error); 34265260Smckusick if (vp->v_dirtyblkhd.lh_first != NULL) 34354442Smckusick panic("vinvalbuf: dirty bufs"); 34454442Smckusick } 34549232Smckusick for (;;) { 34665260Smckusick if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 34756459Smargo while (blist && blist->b_lblkno < 0) 34865260Smckusick blist = blist->b_vnbufs.le_next; 34968319Scgd if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 35056608Smckusick (flags & V_SAVEMETA)) 35156459Smargo while (blist && blist->b_lblkno < 0) 35265260Smckusick blist = blist->b_vnbufs.le_next; 35356459Smargo if (!blist) 35449232Smckusick break; 35556459Smargo 35649232Smckusick for (bp = blist; bp; bp = nbp) { 35765260Smckusick nbp = bp->b_vnbufs.le_next; 35856459Smargo if (flags & V_SAVEMETA && bp->b_lblkno < 0) 35956459Smargo continue; 36049232Smckusick s = splbio(); 36149232Smckusick if (bp->b_flags & B_BUSY) { 36249232Smckusick bp->b_flags |= B_WANTED; 36357792Smckusick error = tsleep((caddr_t)bp, 36457792Smckusick slpflag | (PRIBIO + 1), "vinvalbuf", 36557792Smckusick slptimeo); 36649232Smckusick splx(s); 36757792Smckusick if (error) 36857792Smckusick return (error); 36949232Smckusick break; 37049232Smckusick } 37149232Smckusick bremfree(bp); 37249232Smckusick bp->b_flags |= B_BUSY; 37349232Smckusick splx(s); 37457792Smckusick /* 37557792Smckusick * XXX Since there are no node locks for NFS, I believe 37657792Smckusick * there is a slight chance that a delayed write will 37757792Smckusick * occur while sleeping just above, so check for it. 37857792Smckusick */ 37957792Smckusick if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 38057792Smckusick (void) VOP_BWRITE(bp); 38157792Smckusick break; 38257792Smckusick } 38356459Smargo bp->b_flags |= B_INVAL; 38449232Smckusick brelse(bp); 38549232Smckusick } 38649232Smckusick } 38756608Smckusick if (!(flags & V_SAVEMETA) && 38865260Smckusick (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 38949232Smckusick panic("vinvalbuf: flush failed"); 39054442Smckusick return (0); 39149232Smckusick } 39249232Smckusick 39349232Smckusick /* 39449232Smckusick * Associate a buffer with a vnode. 39549232Smckusick */ 39668319Scgd void 39749232Smckusick bgetvp(vp, bp) 39849232Smckusick register struct vnode *vp; 39949232Smckusick register struct buf *bp; 40049232Smckusick { 40149232Smckusick 40249232Smckusick if (bp->b_vp) 40349232Smckusick panic("bgetvp: not free"); 40449232Smckusick VHOLD(vp); 40549232Smckusick bp->b_vp = vp; 40649232Smckusick if (vp->v_type == VBLK || vp->v_type == VCHR) 40749232Smckusick bp->b_dev = vp->v_rdev; 40849232Smckusick else 40949232Smckusick bp->b_dev = NODEV; 41049232Smckusick /* 41149232Smckusick * Insert onto list for new vnode. 41249232Smckusick */ 41356608Smckusick bufinsvn(bp, &vp->v_cleanblkhd); 41449232Smckusick } 41549232Smckusick 41649232Smckusick /* 41749232Smckusick * Disassociate a buffer from a vnode. 41849232Smckusick */ 41968319Scgd void 42049232Smckusick brelvp(bp) 42149232Smckusick register struct buf *bp; 42249232Smckusick { 42349232Smckusick struct vnode *vp; 42449232Smckusick 42549232Smckusick if (bp->b_vp == (struct vnode *) 0) 42649232Smckusick panic("brelvp: NULL"); 42749232Smckusick /* 42849232Smckusick * Delete from old vnode list, if on one. 42949232Smckusick */ 43065260Smckusick if (bp->b_vnbufs.le_next != NOLIST) 43156608Smckusick bufremvn(bp); 43249232Smckusick vp = bp->b_vp; 43349232Smckusick bp->b_vp = (struct vnode *) 0; 43449232Smckusick HOLDRELE(vp); 43549232Smckusick } 43649232Smckusick 43749232Smckusick /* 43849232Smckusick * Reassign a buffer from one vnode to another. 43949232Smckusick * Used to assign file specific control information 44049232Smckusick * (indirect blocks) to the vnode to which they belong. 44149232Smckusick */ 44268319Scgd void 44349232Smckusick reassignbuf(bp, newvp) 44449232Smckusick register struct buf *bp; 44549232Smckusick register struct vnode *newvp; 44649232Smckusick { 44765260Smckusick register struct buflists *listheadp; 44849232Smckusick 44952655Smckusick if (newvp == NULL) { 45052655Smckusick printf("reassignbuf: NULL"); 45152655Smckusick return; 45252655Smckusick } 45349232Smckusick /* 45449232Smckusick * Delete from old vnode list, if on one. 45549232Smckusick */ 45665260Smckusick if (bp->b_vnbufs.le_next != NOLIST) 45756608Smckusick bufremvn(bp); 45849232Smckusick /* 45949232Smckusick * If dirty, put on list of dirty buffers; 46049232Smckusick * otherwise insert onto list of clean buffers. 46149232Smckusick */ 46249232Smckusick if (bp->b_flags & B_DELWRI) 46349232Smckusick listheadp = &newvp->v_dirtyblkhd; 46449232Smckusick else 46549232Smckusick listheadp = &newvp->v_cleanblkhd; 46656608Smckusick bufinsvn(bp, listheadp); 46749232Smckusick } 46849232Smckusick 46949232Smckusick /* 47039433Smckusick * Create a vnode for a block device. 47139433Smckusick * Used for root filesystem, argdev, and swap areas. 47239433Smckusick * Also used for memory file system special devices. 47339397Smckusick */ 47468319Scgd int 47539433Smckusick bdevvp(dev, vpp) 47639433Smckusick dev_t dev; 47739433Smckusick struct vnode **vpp; 47839433Smckusick { 47939433Smckusick register struct vnode *vp; 48039433Smckusick struct vnode *nvp; 48139433Smckusick int error; 48239433Smckusick 48346989Smckusick if (dev == NODEV) 48446989Smckusick return (0); 48553547Sheideman error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 48639433Smckusick if (error) { 48768319Scgd *vpp = NULLVP; 48839433Smckusick return (error); 48939433Smckusick } 49039433Smckusick vp = nvp; 49139433Smckusick vp->v_type = VBLK; 49239615Smckusick if (nvp = checkalias(vp, dev, (struct mount *)0)) { 49339433Smckusick vput(vp); 49439433Smckusick vp = nvp; 49539433Smckusick } 49639433Smckusick *vpp = vp; 49739433Smckusick return (0); 49839433Smckusick } 49939433Smckusick 50039433Smckusick /* 50139433Smckusick * Check to see if the new vnode represents a special device 50239433Smckusick * for which we already have a vnode (either because of 50339433Smckusick * bdevvp() or because of a different vnode representing 50439433Smckusick * the same block device). If such an alias exists, deallocate 50539509Smckusick * the existing contents and return the aliased vnode. The 50639433Smckusick * caller is responsible for filling it with its new contents. 50739433Smckusick */ 50839433Smckusick struct vnode * 50939615Smckusick checkalias(nvp, nvp_rdev, mp) 51039433Smckusick register struct vnode *nvp; 51139615Smckusick dev_t nvp_rdev; 51239433Smckusick struct mount *mp; 51339433Smckusick { 51439433Smckusick register struct vnode *vp; 51539615Smckusick struct vnode **vpp; 51639433Smckusick 51739433Smckusick if (nvp->v_type != VBLK && nvp->v_type != VCHR) 51841400Smckusick return (NULLVP); 51939615Smckusick 52039615Smckusick vpp = &speclisth[SPECHASH(nvp_rdev)]; 52139433Smckusick loop: 52239615Smckusick for (vp = *vpp; vp; vp = vp->v_specnext) { 52339615Smckusick if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 52439433Smckusick continue; 52539615Smckusick /* 52639615Smckusick * Alias, but not in use, so flush it out. 52739615Smckusick */ 52839809Smckusick if (vp->v_usecount == 0) { 52939615Smckusick vgone(vp); 53039615Smckusick goto loop; 53139615Smckusick } 53265260Smckusick if (vget(vp, 1)) 53339633Smckusick goto loop; 53439433Smckusick break; 53539433Smckusick } 53639615Smckusick if (vp == NULL || vp->v_tag != VT_NON) { 53739615Smckusick MALLOC(nvp->v_specinfo, struct specinfo *, 53839615Smckusick sizeof(struct specinfo), M_VNODE, M_WAITOK); 53939615Smckusick nvp->v_rdev = nvp_rdev; 54039809Smckusick nvp->v_hashchain = vpp; 54139615Smckusick nvp->v_specnext = *vpp; 54242152Smckusick nvp->v_specflags = 0; 54339615Smckusick *vpp = nvp; 54440640Smckusick if (vp != NULL) { 54540640Smckusick nvp->v_flag |= VALIASED; 54640640Smckusick vp->v_flag |= VALIASED; 54740640Smckusick vput(vp); 54840640Smckusick } 54941400Smckusick return (NULLVP); 55039433Smckusick } 55139484Smckusick VOP_UNLOCK(vp); 55239484Smckusick vclean(vp, 0); 55339433Smckusick vp->v_op = nvp->v_op; 55439433Smckusick vp->v_tag = nvp->v_tag; 55539433Smckusick nvp->v_type = VNON; 55639433Smckusick insmntque(vp, mp); 55739433Smckusick return (vp); 55839433Smckusick } 55939433Smckusick 56039433Smckusick /* 56139433Smckusick * Grab a particular vnode from the free list, increment its 56239433Smckusick * reference count and lock it. The vnode lock bit is set the 56339433Smckusick * vnode is being eliminated in vgone. The process is awakened 56439433Smckusick * when the transition is completed, and an error returned to 56539433Smckusick * indicate that the vnode is no longer usable (possibly having 56639433Smckusick * been changed to a new file system type). 56739433Smckusick */ 56868319Scgd int 56965260Smckusick vget(vp, lockflag) 57039397Smckusick register struct vnode *vp; 57165260Smckusick int lockflag; 57239397Smckusick { 57339397Smckusick 57466897Smckusick /* 57566897Smckusick * If the vnode is in the process of being cleaned out for 57666897Smckusick * another use, we wait for the cleaning to finish and then 57766897Smckusick * return failure. Cleaning is determined either by checking 57866897Smckusick * that the VXLOCK flag is set, or that the use count is 57966897Smckusick * zero with the back pointer set to show that it has been 58066897Smckusick * removed from the free list by getnewvnode. The VXLOCK 58166897Smckusick * flag may not have been set yet because vclean is blocked in 58266897Smckusick * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. 58366897Smckusick */ 58466897Smckusick if ((vp->v_flag & VXLOCK) || 58566897Smckusick (vp->v_usecount == 0 && 58666897Smckusick vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { 58739433Smckusick vp->v_flag |= VXWANT; 58868319Scgd tsleep((caddr_t)vp, PINOD, "vget", 0); 58939433Smckusick return (1); 59039433Smckusick } 59166897Smckusick if (vp->v_usecount == 0) 59265260Smckusick TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 59359450Smckusick vp->v_usecount++; 59465260Smckusick if (lockflag) 59565260Smckusick VOP_LOCK(vp); 59639433Smckusick return (0); 59739397Smckusick } 59839397Smckusick 59939397Smckusick /* 60039397Smckusick * Vnode reference, just increment the count 60139397Smckusick */ 60268319Scgd void 60368319Scgd vref(vp) 60439397Smckusick struct vnode *vp; 60539397Smckusick { 60639397Smckusick 60759450Smckusick if (vp->v_usecount <= 0) 60859450Smckusick panic("vref used where vget required"); 60939809Smckusick vp->v_usecount++; 61039397Smckusick } 61139397Smckusick 61239397Smckusick /* 61339397Smckusick * vput(), just unlock and vrele() 61439397Smckusick */ 61568319Scgd void 61668319Scgd vput(vp) 61739397Smckusick register struct vnode *vp; 61839397Smckusick { 61952416Storek 62039397Smckusick VOP_UNLOCK(vp); 62139397Smckusick vrele(vp); 62239397Smckusick } 62339397Smckusick 62439397Smckusick /* 62539397Smckusick * Vnode release. 62639397Smckusick * If count drops to zero, call inactive routine and return to freelist. 62739397Smckusick */ 62868319Scgd void 62968319Scgd vrele(vp) 63039397Smckusick register struct vnode *vp; 63139397Smckusick { 63239397Smckusick 63350109Smckusick #ifdef DIAGNOSTIC 63439397Smckusick if (vp == NULL) 63539433Smckusick panic("vrele: null vp"); 63650109Smckusick #endif 63739809Smckusick vp->v_usecount--; 63839809Smckusick if (vp->v_usecount > 0) 63939397Smckusick return; 64050109Smckusick #ifdef DIAGNOSTIC 64150109Smckusick if (vp->v_usecount != 0 || vp->v_writecount != 0) { 64250109Smckusick vprint("vrele: bad ref count", vp); 64350109Smckusick panic("vrele: ref cnt"); 64450109Smckusick } 64550109Smckusick #endif 64655468Smckusick /* 64755468Smckusick * insert at tail of LRU list 64855468Smckusick */ 64965260Smckusick TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 65054442Smckusick VOP_INACTIVE(vp); 65139397Smckusick } 65239433Smckusick 65339433Smckusick /* 65439809Smckusick * Page or buffer structure gets a reference. 65539809Smckusick */ 65668319Scgd void 65768319Scgd vhold(vp) 65839809Smckusick register struct vnode *vp; 65939809Smckusick { 66039809Smckusick 66139809Smckusick vp->v_holdcnt++; 66239809Smckusick } 66339809Smckusick 66439809Smckusick /* 66539809Smckusick * Page or buffer structure frees a reference. 66639809Smckusick */ 66768319Scgd void 66868319Scgd holdrele(vp) 66939809Smckusick register struct vnode *vp; 67039809Smckusick { 67139809Smckusick 67239809Smckusick if (vp->v_holdcnt <= 0) 67339809Smckusick panic("holdrele: holdcnt"); 67439809Smckusick vp->v_holdcnt--; 67539809Smckusick } 67639809Smckusick 67739809Smckusick /* 67839509Smckusick * Remove any vnodes in the vnode table belonging to mount point mp. 67939509Smckusick * 68039509Smckusick * If MNT_NOFORCE is specified, there should not be any active ones, 68139509Smckusick * return error if any are found (nb: this is a user error, not a 68239509Smckusick * system error). If MNT_FORCE is specified, detach any active vnodes 68339509Smckusick * that are found. 68439509Smckusick */ 68565679Shibler #ifdef DIAGNOSTIC 68660930Smckusick int busyprt = 0; /* print out busy vnodes */ 68760930Smckusick struct ctldebug debug1 = { "busyprt", &busyprt }; 68865679Shibler #endif 68939509Smckusick 69068319Scgd int 69139509Smckusick vflush(mp, skipvp, flags) 69239509Smckusick struct mount *mp; 69339509Smckusick struct vnode *skipvp; 69439509Smckusick int flags; 69539509Smckusick { 69639509Smckusick register struct vnode *vp, *nvp; 69739509Smckusick int busy = 0; 69839509Smckusick 69941400Smckusick if ((mp->mnt_flag & MNT_MPBUSY) == 0) 70041300Smckusick panic("vflush: not busy"); 70141421Smckusick loop: 70265260Smckusick for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 70341421Smckusick if (vp->v_mount != mp) 70441421Smckusick goto loop; 70565260Smckusick nvp = vp->v_mntvnodes.le_next; 70639509Smckusick /* 70739509Smckusick * Skip over a selected vnode. 70839509Smckusick */ 70939509Smckusick if (vp == skipvp) 71039509Smckusick continue; 71139509Smckusick /* 71241300Smckusick * Skip over a vnodes marked VSYSTEM. 71341300Smckusick */ 71441300Smckusick if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) 71541300Smckusick continue; 71641300Smckusick /* 71757040Smckusick * If WRITECLOSE is set, only flush out regular file 71857040Smckusick * vnodes open for writing. 71957040Smckusick */ 72057040Smckusick if ((flags & WRITECLOSE) && 72157040Smckusick (vp->v_writecount == 0 || vp->v_type != VREG)) 72257040Smckusick continue; 72357040Smckusick /* 72439809Smckusick * With v_usecount == 0, all we need to do is clear 72539509Smckusick * out the vnode data structures and we are done. 72639509Smckusick */ 72739809Smckusick if (vp->v_usecount == 0) { 72868424Smckusick VOP_REVOKE(vp, 0); 72939509Smckusick continue; 73039509Smckusick } 73139509Smckusick /* 73257040Smckusick * If FORCECLOSE is set, forcibly close the vnode. 73339509Smckusick * For block or character devices, revert to an 73439509Smckusick * anonymous device. For all other files, just kill them. 73539509Smckusick */ 73641300Smckusick if (flags & FORCECLOSE) { 73739509Smckusick if (vp->v_type != VBLK && vp->v_type != VCHR) { 73868424Smckusick VOP_REVOKE(vp, 0); 73939509Smckusick } else { 74039509Smckusick vclean(vp, 0); 74153547Sheideman vp->v_op = spec_vnodeop_p; 74239509Smckusick insmntque(vp, (struct mount *)0); 74339509Smckusick } 74439509Smckusick continue; 74539509Smckusick } 74665679Shibler #ifdef DIAGNOSTIC 74739509Smckusick if (busyprt) 74839667Smckusick vprint("vflush: busy vnode", vp); 74965679Shibler #endif 75039509Smckusick busy++; 75139509Smckusick } 75239509Smckusick if (busy) 75339509Smckusick return (EBUSY); 75439509Smckusick return (0); 75539509Smckusick } 75639509Smckusick 75739509Smckusick /* 75839433Smckusick * Disassociate the underlying file system from a vnode. 75939433Smckusick */ 76054347Smckusick void 76154347Smckusick vclean(vp, flags) 76239433Smckusick register struct vnode *vp; 76345118Smckusick int flags; 76439433Smckusick { 76539484Smckusick int active; 76639433Smckusick 76739484Smckusick /* 76839484Smckusick * Check to see if the vnode is in use. 76939667Smckusick * If so we have to reference it before we clean it out 77039667Smckusick * so that its count cannot fall to zero and generate a 77139667Smckusick * race against ourselves to recycle it. 77239484Smckusick */ 77339809Smckusick if (active = vp->v_usecount) 77439484Smckusick VREF(vp); 77539484Smckusick /* 77656805Smckusick * Even if the count is zero, the VOP_INACTIVE routine may still 77756805Smckusick * have the object locked while it cleans it out. The VOP_LOCK 77856805Smckusick * ensures that the VOP_INACTIVE routine is done with its work. 77956805Smckusick * For active vnodes, it ensures that no other activity can 78056805Smckusick * occur while the underlying object is being cleaned out. 78156805Smckusick */ 78256805Smckusick VOP_LOCK(vp); 78356805Smckusick /* 78439484Smckusick * Prevent the vnode from being recycled or 78539484Smckusick * brought into use while we clean it out. 78639484Smckusick */ 78739667Smckusick if (vp->v_flag & VXLOCK) 78839667Smckusick panic("vclean: deadlock"); 78939433Smckusick vp->v_flag |= VXLOCK; 79039433Smckusick /* 79156805Smckusick * Clean out any buffers associated with the vnode. 79239667Smckusick */ 79341300Smckusick if (flags & DOCLOSE) 79457792Smckusick vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 79539667Smckusick /* 79656805Smckusick * Any other processes trying to obtain this lock must first 79756805Smckusick * wait for VXLOCK to clear, then call the new lock operation. 79839433Smckusick */ 79956805Smckusick VOP_UNLOCK(vp); 80039433Smckusick /* 80156805Smckusick * If purging an active vnode, it must be closed and 80256805Smckusick * deactivated before being reclaimed. 80339433Smckusick */ 80439484Smckusick if (active) { 80556805Smckusick if (flags & DOCLOSE) 80656805Smckusick VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL); 80756805Smckusick VOP_INACTIVE(vp); 80839433Smckusick } 80939433Smckusick /* 81039433Smckusick * Reclaim the vnode. 81139433Smckusick */ 81256805Smckusick if (VOP_RECLAIM(vp)) 81339433Smckusick panic("vclean: cannot reclaim"); 81439484Smckusick if (active) 81539484Smckusick vrele(vp); 81668784Smckusick cache_purge(vp); 81753580Sheideman 81839433Smckusick /* 81956805Smckusick * Done with purge, notify sleepers of the grim news. 82039433Smckusick */ 82156805Smckusick vp->v_op = dead_vnodeop_p; 82256805Smckusick vp->v_tag = VT_NON; 82339433Smckusick vp->v_flag &= ~VXLOCK; 82439433Smckusick if (vp->v_flag & VXWANT) { 82539433Smckusick vp->v_flag &= ~VXWANT; 82639433Smckusick wakeup((caddr_t)vp); 82739433Smckusick } 82839433Smckusick } 82939433Smckusick 83039433Smckusick /* 83139633Smckusick * Eliminate all activity associated with the requested vnode 83239633Smckusick * and with all vnodes aliased to the requested vnode. 83339633Smckusick */ 83468424Smckusick int 83568424Smckusick vop_revoke(ap) 83668424Smckusick struct vop_revoke_args /* { 83768424Smckusick struct vnode *a_vp; 83868424Smckusick int a_flags; 83968424Smckusick } */ *ap; 84039633Smckusick { 84168424Smckusick register struct vnode *vp, *vq; 84239633Smckusick 84368424Smckusick vp = ap->a_vp; 84468424Smckusick if ((ap->a_flags & REVOKEALL) && (vp->v_flag & VALIASED)) { 84540665Smckusick /* 84640665Smckusick * If a vgone (or vclean) is already in progress, 84740665Smckusick * wait until it is done and return. 84840665Smckusick */ 84940665Smckusick if (vp->v_flag & VXLOCK) { 85040665Smckusick vp->v_flag |= VXWANT; 85168424Smckusick tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 85268424Smckusick return (0); 85339633Smckusick } 85440665Smckusick /* 85540665Smckusick * Ensure that vp will not be vgone'd while we 85640665Smckusick * are eliminating its aliases. 85740665Smckusick */ 85840665Smckusick vp->v_flag |= VXLOCK; 85940665Smckusick while (vp->v_flag & VALIASED) { 86040665Smckusick for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 86140665Smckusick if (vq->v_rdev != vp->v_rdev || 86240665Smckusick vq->v_type != vp->v_type || vp == vq) 86340665Smckusick continue; 86440665Smckusick vgone(vq); 86540665Smckusick break; 86640665Smckusick } 86740665Smckusick } 86840665Smckusick /* 86940665Smckusick * Remove the lock so that vgone below will 87040665Smckusick * really eliminate the vnode after which time 87140665Smckusick * vgone will awaken any sleepers. 87240665Smckusick */ 87340665Smckusick vp->v_flag &= ~VXLOCK; 87439633Smckusick } 87539633Smckusick vgone(vp); 87668424Smckusick return (0); 87739633Smckusick } 87839633Smckusick 87939633Smckusick /* 88039433Smckusick * Eliminate all activity associated with a vnode 88139433Smckusick * in preparation for reuse. 88239433Smckusick */ 88368319Scgd void 88468319Scgd vgone(vp) 88539433Smckusick register struct vnode *vp; 88639433Smckusick { 88739809Smckusick register struct vnode *vq; 88839615Smckusick struct vnode *vx; 88939433Smckusick 89039433Smckusick /* 89140548Smckusick * If a vgone (or vclean) is already in progress, 89240548Smckusick * wait until it is done and return. 89340548Smckusick */ 89440548Smckusick if (vp->v_flag & VXLOCK) { 89540548Smckusick vp->v_flag |= VXWANT; 89668319Scgd tsleep((caddr_t)vp, PINOD, "vgone", 0); 89740548Smckusick return; 89840548Smckusick } 89940548Smckusick /* 90039433Smckusick * Clean out the filesystem specific data. 90139433Smckusick */ 90241300Smckusick vclean(vp, DOCLOSE); 90339433Smckusick /* 90439433Smckusick * Delete from old mount point vnode list, if on one. 90539433Smckusick */ 90665260Smckusick if (vp->v_mount != NULL) { 90765260Smckusick LIST_REMOVE(vp, v_mntvnodes); 90852311Smckusick vp->v_mount = NULL; 90939433Smckusick } 91039433Smckusick /* 91168592Smckusick * If special device, remove it from special device alias list 91268592Smckusick * if it is on one. 91339433Smckusick */ 91468592Smckusick if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 91539809Smckusick if (*vp->v_hashchain == vp) { 91639809Smckusick *vp->v_hashchain = vp->v_specnext; 91739433Smckusick } else { 91839809Smckusick for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 91939615Smckusick if (vq->v_specnext != vp) 92039433Smckusick continue; 92139615Smckusick vq->v_specnext = vp->v_specnext; 92239433Smckusick break; 92339433Smckusick } 92439615Smckusick if (vq == NULL) 92539433Smckusick panic("missing bdev"); 92639433Smckusick } 92739615Smckusick if (vp->v_flag & VALIASED) { 92852416Storek vx = NULL; 92939809Smckusick for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 93040108Smckusick if (vq->v_rdev != vp->v_rdev || 93140108Smckusick vq->v_type != vp->v_type) 93239615Smckusick continue; 93352416Storek if (vx) 93452416Storek break; 93539615Smckusick vx = vq; 93639615Smckusick } 93752416Storek if (vx == NULL) 93839615Smckusick panic("missing alias"); 93952416Storek if (vq == NULL) 94039615Smckusick vx->v_flag &= ~VALIASED; 94139615Smckusick vp->v_flag &= ~VALIASED; 94239615Smckusick } 94339615Smckusick FREE(vp->v_specinfo, M_VNODE); 94439615Smckusick vp->v_specinfo = NULL; 94539433Smckusick } 94639433Smckusick /* 94756932Smckusick * If it is on the freelist and not already at the head, 94865505Smckusick * move it to the head of the list. The test of the back 94965505Smckusick * pointer and the reference count of zero is because 95065505Smckusick * it will be removed from the free list by getnewvnode, 95165505Smckusick * but will not have its reference count incremented until 95265505Smckusick * after calling vgone. If the reference count were 95365505Smckusick * incremented first, vgone would (incorrectly) try to 95465505Smckusick * close the previous instance of the underlying object. 95565505Smckusick * So, the back pointer is explicitly set to `0xdeadb' in 95665505Smckusick * getnewvnode after removing it from the freelist to ensure 95765505Smckusick * that we do not try to move it here. 95839433Smckusick */ 95965505Smckusick if (vp->v_usecount == 0 && 96065505Smckusick vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 96165505Smckusick vnode_free_list.tqh_first != vp) { 96265260Smckusick TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 96365260Smckusick TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 96439433Smckusick } 96539484Smckusick vp->v_type = VBAD; 96639433Smckusick } 96739633Smckusick 96839633Smckusick /* 96939821Smckusick * Lookup a vnode by device number. 97039821Smckusick */ 97168319Scgd int 97239821Smckusick vfinddev(dev, type, vpp) 97339821Smckusick dev_t dev; 97439821Smckusick enum vtype type; 97539821Smckusick struct vnode **vpp; 97639821Smckusick { 97739821Smckusick register struct vnode *vp; 97839821Smckusick 97939821Smckusick for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 98039821Smckusick if (dev != vp->v_rdev || type != vp->v_type) 98139821Smckusick continue; 98239821Smckusick *vpp = vp; 98359484Smckusick return (1); 98439821Smckusick } 98559484Smckusick return (0); 98639821Smckusick } 98739821Smckusick 98839821Smckusick /* 98939633Smckusick * Calculate the total number of references to a special device. 99039633Smckusick */ 99168319Scgd int 99239633Smckusick vcount(vp) 99339633Smckusick register struct vnode *vp; 99439633Smckusick { 99566742Smckusick register struct vnode *vq, *vnext; 99639633Smckusick int count; 99739633Smckusick 99866742Smckusick loop: 99939633Smckusick if ((vp->v_flag & VALIASED) == 0) 100039809Smckusick return (vp->v_usecount); 100166742Smckusick for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 100266742Smckusick vnext = vq->v_specnext; 100340108Smckusick if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 100439633Smckusick continue; 100539633Smckusick /* 100639633Smckusick * Alias, but not in use, so flush it out. 100739633Smckusick */ 100866742Smckusick if (vq->v_usecount == 0 && vq != vp) { 100939633Smckusick vgone(vq); 101039633Smckusick goto loop; 101139633Smckusick } 101239809Smckusick count += vq->v_usecount; 101339633Smckusick } 101439633Smckusick return (count); 101539633Smckusick } 101639667Smckusick 101739667Smckusick /* 101839667Smckusick * Print out a description of a vnode. 101939667Smckusick */ 102039667Smckusick static char *typename[] = 102140286Smckusick { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 102239667Smckusick 102368171Scgd void 102439667Smckusick vprint(label, vp) 102539667Smckusick char *label; 102639667Smckusick register struct vnode *vp; 102739667Smckusick { 102839913Smckusick char buf[64]; 102939667Smckusick 103039667Smckusick if (label != NULL) 103139667Smckusick printf("%s: ", label); 103250109Smckusick printf("type %s, usecount %d, writecount %d, refcount %d,", 103350109Smckusick typename[vp->v_type], vp->v_usecount, vp->v_writecount, 103450109Smckusick vp->v_holdcnt); 103539913Smckusick buf[0] = '\0'; 103639913Smckusick if (vp->v_flag & VROOT) 103739913Smckusick strcat(buf, "|VROOT"); 103839913Smckusick if (vp->v_flag & VTEXT) 103939913Smckusick strcat(buf, "|VTEXT"); 104041300Smckusick if (vp->v_flag & VSYSTEM) 104141300Smckusick strcat(buf, "|VSYSTEM"); 104241300Smckusick if (vp->v_flag & VXLOCK) 104341300Smckusick strcat(buf, "|VXLOCK"); 104441300Smckusick if (vp->v_flag & VXWANT) 104541300Smckusick strcat(buf, "|VXWANT"); 104641300Smckusick if (vp->v_flag & VBWAIT) 104741300Smckusick strcat(buf, "|VBWAIT"); 104839913Smckusick if (vp->v_flag & VALIASED) 104939913Smckusick strcat(buf, "|VALIASED"); 105039913Smckusick if (buf[0] != '\0') 105139913Smckusick printf(" flags (%s)", &buf[1]); 105265260Smckusick if (vp->v_data == NULL) { 105365260Smckusick printf("\n"); 105465260Smckusick } else { 105565260Smckusick printf("\n\t"); 105665260Smckusick VOP_PRINT(vp); 105765260Smckusick } 105839667Smckusick } 105941110Smarc 106049691Smckusick #ifdef DEBUG 106149691Smckusick /* 106249691Smckusick * List all of the locked vnodes in the system. 106349691Smckusick * Called when debugging the kernel. 106449691Smckusick */ 106568319Scgd void 106649691Smckusick printlockedvnodes() 106749691Smckusick { 106849691Smckusick register struct mount *mp; 106949691Smckusick register struct vnode *vp; 107049691Smckusick 107149691Smckusick printf("Locked vnodes\n"); 1072*69325Smckusick for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 1073*69325Smckusick mp = mp->mnt_list.cqe_next) { 107465260Smckusick for (vp = mp->mnt_vnodelist.lh_first; 107565260Smckusick vp != NULL; 1076*69325Smckusick vp = vp->v_mntvnodes.le_next) { 107749691Smckusick if (VOP_ISLOCKED(vp)) 107849691Smckusick vprint((char *)0, vp); 1079*69325Smckusick } 108065260Smckusick } 108149691Smckusick } 108249691Smckusick #endif 108349691Smckusick 108468659Smckusick /* 108568659Smckusick * Top level filesystem related information gathering. 108668659Smckusick */ 108768659Smckusick int 108868659Smckusick vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 108968659Smckusick int *name; 109068659Smckusick u_int namelen; 109168659Smckusick void *oldp; 109268659Smckusick size_t *oldlenp; 109368659Smckusick void *newp; 109468659Smckusick size_t newlen; 109568659Smckusick struct proc *p; 109668659Smckusick { 109768659Smckusick struct ctldebug *cdp; 109868659Smckusick struct vfsconf *vfsp; 109968659Smckusick 110068659Smckusick /* all sysctl names at this level are at least name and field */ 110168659Smckusick if (namelen < 2) 110268659Smckusick return (ENOTDIR); /* overloaded */ 110368659Smckusick if (name[0] != VFS_GENERIC) { 110468659Smckusick for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 110568659Smckusick if (vfsp->vfc_typenum == name[0]) 110668659Smckusick break; 110768659Smckusick if (vfsp == NULL) 110868659Smckusick return (EOPNOTSUPP); 110968659Smckusick return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 111068659Smckusick oldp, oldlenp, newp, newlen, p)); 111168659Smckusick } 111268659Smckusick switch (name[1]) { 111368659Smckusick case VFS_MAXTYPENUM: 111468659Smckusick return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf)); 111568659Smckusick case VFS_CONF: 111668659Smckusick if (namelen < 3) 111768659Smckusick return (ENOTDIR); /* overloaded */ 111868659Smckusick for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 111968659Smckusick if (vfsp->vfc_typenum == name[2]) 112068659Smckusick break; 112168659Smckusick if (vfsp == NULL) 112268659Smckusick return (EOPNOTSUPP); 112368659Smckusick return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp, 112468659Smckusick sizeof(struct vfsconf))); 112568659Smckusick } 112668659Smckusick return (EOPNOTSUPP); 112768659Smckusick } 112868659Smckusick 112941110Smarc int kinfo_vdebug = 1; 113041110Smarc int kinfo_vgetfailed; 113141110Smarc #define KINFO_VNODESLOP 10 113241110Smarc /* 113357841Smckusick * Dump vnode list (via sysctl). 113441110Smarc * Copyout address of vnode followed by vnode. 113541110Smarc */ 113645118Smckusick /* ARGSUSED */ 113768319Scgd int 113857841Smckusick sysctl_vnode(where, sizep) 113941110Smarc char *where; 114058465Sbostic size_t *sizep; 114141110Smarc { 114265260Smckusick register struct mount *mp, *nmp; 114341110Smarc struct vnode *vp; 114441110Smarc register char *bp = where, *savebp; 114553818Smckusick char *ewhere; 114641110Smarc int error; 114741110Smarc 114841110Smarc #define VPTRSZ sizeof (struct vnode *) 114941110Smarc #define VNODESZ sizeof (struct vnode) 115041110Smarc if (where == NULL) { 115157841Smckusick *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 115241110Smarc return (0); 115341110Smarc } 115457841Smckusick ewhere = where + *sizep; 115541110Smarc 1156*69325Smckusick for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1157*69325Smckusick nmp = mp->mnt_list.cqe_next; 115865260Smckusick if (vfs_busy(mp)) 115941300Smckusick continue; 116041110Smarc savebp = bp; 116141110Smarc again: 116265260Smckusick for (vp = mp->mnt_vnodelist.lh_first; 116365260Smckusick vp != NULL; 116465260Smckusick vp = vp->v_mntvnodes.le_next) { 116541422Smckusick /* 116641422Smckusick * Check that the vp is still associated with 116741422Smckusick * this filesystem. RACE: could have been 116841422Smckusick * recycled onto the same filesystem. 116941422Smckusick */ 117041421Smckusick if (vp->v_mount != mp) { 117141421Smckusick if (kinfo_vdebug) 117241421Smckusick printf("kinfo: vp changed\n"); 117341421Smckusick bp = savebp; 117441421Smckusick goto again; 117541421Smckusick } 117657841Smckusick if (bp + VPTRSZ + VNODESZ > ewhere) { 117757841Smckusick *sizep = bp - where; 117857841Smckusick return (ENOMEM); 117957841Smckusick } 118057841Smckusick if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 118157841Smckusick (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 118241110Smarc return (error); 118341110Smarc bp += VPTRSZ + VNODESZ; 118441110Smarc } 118565260Smckusick vfs_unbusy(mp); 118665260Smckusick } 118741110Smarc 118857841Smckusick *sizep = bp - where; 118941110Smarc return (0); 119041110Smarc } 119165679Shibler 119265679Shibler /* 119365679Shibler * Check to see if a filesystem is mounted on a block device. 119465679Shibler */ 119565679Shibler int 119665679Shibler vfs_mountedon(vp) 119765679Shibler register struct vnode *vp; 119865679Shibler { 119965679Shibler register struct vnode *vq; 120065679Shibler 120165679Shibler if (vp->v_specflags & SI_MOUNTEDON) 120265679Shibler return (EBUSY); 120365679Shibler if (vp->v_flag & VALIASED) { 120465679Shibler for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 120565679Shibler if (vq->v_rdev != vp->v_rdev || 120665679Shibler vq->v_type != vp->v_type) 120765679Shibler continue; 120865679Shibler if (vq->v_specflags & SI_MOUNTEDON) 120965679Shibler return (EBUSY); 121065679Shibler } 121165679Shibler } 121265679Shibler return (0); 121365679Shibler } 121465679Shibler 121565679Shibler /* 1216*69325Smckusick * Unmount all filesystems. The list is traversed in reverse order 1217*69325Smckusick * of mounting to avoid dependencies. 1218*69325Smckusick */ 1219*69325Smckusick void 1220*69325Smckusick vfs_unmountall() 1221*69325Smckusick { 1222*69325Smckusick struct mount *mp, *nmp; 1223*69325Smckusick 1224*69325Smckusick for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 1225*69325Smckusick nmp = mp->mnt_list.cqe_prev; 1226*69325Smckusick (void) dounmount(mp, MNT_FORCE, &proc0); 1227*69325Smckusick } 1228*69325Smckusick } 1229*69325Smckusick 1230*69325Smckusick /* 123165679Shibler * Build hash lists of net addresses and hang them off the mount point. 123265679Shibler * Called by ufs_mount() to set up the lists of export addresses. 123365679Shibler */ 123465679Shibler static int 123565679Shibler vfs_hang_addrlist(mp, nep, argp) 123665679Shibler struct mount *mp; 123765679Shibler struct netexport *nep; 123865679Shibler struct export_args *argp; 123965679Shibler { 124065679Shibler register struct netcred *np; 124165679Shibler register struct radix_node_head *rnh; 124265679Shibler register int i; 124365679Shibler struct radix_node *rn; 124465679Shibler struct sockaddr *saddr, *smask = 0; 124565679Shibler struct domain *dom; 124665679Shibler int error; 124765679Shibler 124865679Shibler if (argp->ex_addrlen == 0) { 124965679Shibler if (mp->mnt_flag & MNT_DEFEXPORTED) 125065679Shibler return (EPERM); 125165679Shibler np = &nep->ne_defexported; 125265679Shibler np->netc_exflags = argp->ex_flags; 125365679Shibler np->netc_anon = argp->ex_anon; 125465679Shibler np->netc_anon.cr_ref = 1; 125565679Shibler mp->mnt_flag |= MNT_DEFEXPORTED; 125665679Shibler return (0); 125765679Shibler } 125865679Shibler i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 125965679Shibler np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 126065679Shibler bzero((caddr_t)np, i); 126165679Shibler saddr = (struct sockaddr *)(np + 1); 126265679Shibler if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen)) 126365679Shibler goto out; 126465679Shibler if (saddr->sa_len > argp->ex_addrlen) 126565679Shibler saddr->sa_len = argp->ex_addrlen; 126665679Shibler if (argp->ex_masklen) { 126765679Shibler smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 126865679Shibler error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); 126965679Shibler if (error) 127065679Shibler goto out; 127165679Shibler if (smask->sa_len > argp->ex_masklen) 127265679Shibler smask->sa_len = argp->ex_masklen; 127365679Shibler } 127465679Shibler i = saddr->sa_family; 127565679Shibler if ((rnh = nep->ne_rtable[i]) == 0) { 127665679Shibler /* 127765679Shibler * Seems silly to initialize every AF when most are not 127865679Shibler * used, do so on demand here 127965679Shibler */ 128065679Shibler for (dom = domains; dom; dom = dom->dom_next) 128165679Shibler if (dom->dom_family == i && dom->dom_rtattach) { 128265679Shibler dom->dom_rtattach((void **)&nep->ne_rtable[i], 128365679Shibler dom->dom_rtoffset); 128465679Shibler break; 128565679Shibler } 128665679Shibler if ((rnh = nep->ne_rtable[i]) == 0) { 128765679Shibler error = ENOBUFS; 128865679Shibler goto out; 128965679Shibler } 129065679Shibler } 129165679Shibler rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 129265679Shibler np->netc_rnodes); 129369140Smckusick if (rn == 0) { 129469140Smckusick /* 129569140Smckusick * One of the reasons that rnh_addaddr may fail is that 129669140Smckusick * the entry already exists. To check for this case, we 129769140Smckusick * look up the entry to see if it is there. If so, we 129869140Smckusick * do not need to make a new entry but do return success. 129969140Smckusick */ 130069140Smckusick free(np, M_NETADDR); 130169140Smckusick rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh); 130269140Smckusick if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 && 130369140Smckusick ((struct netcred *)rn)->netc_exflags == argp->ex_flags && 130469140Smckusick !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon, 130569140Smckusick (caddr_t)&argp->ex_anon, sizeof(struct ucred))) 130669140Smckusick return (0); 130769140Smckusick return (EPERM); 130865679Shibler } 130965679Shibler np->netc_exflags = argp->ex_flags; 131065679Shibler np->netc_anon = argp->ex_anon; 131165679Shibler np->netc_anon.cr_ref = 1; 131265679Shibler return (0); 131365679Shibler out: 131465679Shibler free(np, M_NETADDR); 131565679Shibler return (error); 131665679Shibler } 131765679Shibler 131865679Shibler /* ARGSUSED */ 131965679Shibler static int 132065679Shibler vfs_free_netcred(rn, w) 132165679Shibler struct radix_node *rn; 132265679Shibler caddr_t w; 132365679Shibler { 132465679Shibler register struct radix_node_head *rnh = (struct radix_node_head *)w; 132565679Shibler 132665679Shibler (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 132765679Shibler free((caddr_t)rn, M_NETADDR); 132865679Shibler return (0); 132965679Shibler } 133068319Scgd 133165679Shibler /* 133265679Shibler * Free the net address hash lists that are hanging off the mount points. 133365679Shibler */ 133465679Shibler static void 133565679Shibler vfs_free_addrlist(nep) 133665679Shibler struct netexport *nep; 133765679Shibler { 133865679Shibler register int i; 133965679Shibler register struct radix_node_head *rnh; 134065679Shibler 134165679Shibler for (i = 0; i <= AF_MAX; i++) 134265679Shibler if (rnh = nep->ne_rtable[i]) { 134365679Shibler (*rnh->rnh_walktree)(rnh, vfs_free_netcred, 134465679Shibler (caddr_t)rnh); 134565679Shibler free((caddr_t)rnh, M_RTABLE); 134665679Shibler nep->ne_rtable[i] = 0; 134765679Shibler } 134865679Shibler } 134965679Shibler 135065679Shibler int 135165679Shibler vfs_export(mp, nep, argp) 135265679Shibler struct mount *mp; 135365679Shibler struct netexport *nep; 135465679Shibler struct export_args *argp; 135565679Shibler { 135665679Shibler int error; 135765679Shibler 135865679Shibler if (argp->ex_flags & MNT_DELEXPORT) { 135965679Shibler vfs_free_addrlist(nep); 136065679Shibler mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 136165679Shibler } 136265679Shibler if (argp->ex_flags & MNT_EXPORTED) { 136365679Shibler if (error = vfs_hang_addrlist(mp, nep, argp)) 136465679Shibler return (error); 136565679Shibler mp->mnt_flag |= MNT_EXPORTED; 136665679Shibler } 136765679Shibler return (0); 136865679Shibler } 136965679Shibler 137065679Shibler struct netcred * 137165679Shibler vfs_export_lookup(mp, nep, nam) 137265679Shibler register struct mount *mp; 137365679Shibler struct netexport *nep; 137465679Shibler struct mbuf *nam; 137565679Shibler { 137665679Shibler register struct netcred *np; 137765679Shibler register struct radix_node_head *rnh; 137865679Shibler struct sockaddr *saddr; 137965679Shibler 138065679Shibler np = NULL; 138165679Shibler if (mp->mnt_flag & MNT_EXPORTED) { 138265679Shibler /* 138365679Shibler * Lookup in the export list first. 138465679Shibler */ 138565679Shibler if (nam != NULL) { 138665679Shibler saddr = mtod(nam, struct sockaddr *); 138765679Shibler rnh = nep->ne_rtable[saddr->sa_family]; 138865679Shibler if (rnh != NULL) { 138965679Shibler np = (struct netcred *) 139065679Shibler (*rnh->rnh_matchaddr)((caddr_t)saddr, 139165679Shibler rnh); 139265679Shibler if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 139365679Shibler np = NULL; 139465679Shibler } 139565679Shibler } 139665679Shibler /* 139765679Shibler * If no address match, use the default if it exists. 139865679Shibler */ 139965679Shibler if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 140065679Shibler np = &nep->ne_defexported; 140165679Shibler } 140265679Shibler return (np); 140365679Shibler } 1404