xref: /csrg-svn/sys/kern/vfs_subr.c (revision 49232)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)vfs_subr.c	7.54 (Berkeley) 05/06/91
8  */
9 
10 /*
11  * External virtual filesystem routines
12  */
13 
14 #include "param.h"
15 #include "proc.h"
16 #include "mount.h"
17 #include "time.h"
18 #include "vnode.h"
19 #include "specdev.h"
20 #include "namei.h"
21 #include "ucred.h"
22 #include "buf.h"
23 #include "errno.h"
24 #include "malloc.h"
25 
26 /*
27  * Remove a mount point from the list of mounted filesystems.
28  * Unmount of the root is illegal.
29  */
30 void
31 vfs_remove(mp)
32 	register struct mount *mp;
33 {
34 
35 	if (mp == rootfs)
36 		panic("vfs_remove: unmounting root");
37 	mp->mnt_prev->mnt_next = mp->mnt_next;
38 	mp->mnt_next->mnt_prev = mp->mnt_prev;
39 	mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
40 	vfs_unlock(mp);
41 }
42 
43 /*
44  * Lock a filesystem.
45  * Used to prevent access to it while mounting and unmounting.
46  */
47 vfs_lock(mp)
48 	register struct mount *mp;
49 {
50 
51 	while(mp->mnt_flag & MNT_MLOCK) {
52 		mp->mnt_flag |= MNT_MWAIT;
53 		sleep((caddr_t)mp, PVFS);
54 	}
55 	mp->mnt_flag |= MNT_MLOCK;
56 	return (0);
57 }
58 
59 /*
60  * Unlock a locked filesystem.
61  * Panic if filesystem is not locked.
62  */
63 void
64 vfs_unlock(mp)
65 	register struct mount *mp;
66 {
67 
68 	if ((mp->mnt_flag & MNT_MLOCK) == 0)
69 		panic("vfs_unlock: not locked");
70 	mp->mnt_flag &= ~MNT_MLOCK;
71 	if (mp->mnt_flag & MNT_MWAIT) {
72 		mp->mnt_flag &= ~MNT_MWAIT;
73 		wakeup((caddr_t)mp);
74 	}
75 }
76 
77 /*
78  * Mark a mount point as busy.
79  * Used to synchronize access and to delay unmounting.
80  */
81 vfs_busy(mp)
82 	register struct mount *mp;
83 {
84 
85 	while(mp->mnt_flag & MNT_MPBUSY) {
86 		mp->mnt_flag |= MNT_MPWANT;
87 		sleep((caddr_t)&mp->mnt_flag, PVFS);
88 	}
89 	if (mp->mnt_flag & MNT_UNMOUNT)
90 		return (1);
91 	mp->mnt_flag |= MNT_MPBUSY;
92 	return (0);
93 }
94 
95 /*
96  * Free a busy filesystem.
97  * Panic if filesystem is not busy.
98  */
99 vfs_unbusy(mp)
100 	register struct mount *mp;
101 {
102 
103 	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
104 		panic("vfs_unbusy: not busy");
105 	mp->mnt_flag &= ~MNT_MPBUSY;
106 	if (mp->mnt_flag & MNT_MPWANT) {
107 		mp->mnt_flag &= ~MNT_MPWANT;
108 		wakeup((caddr_t)&mp->mnt_flag);
109 	}
110 }
111 
112 /*
113  * Lookup a mount point by filesystem identifier.
114  */
115 struct mount *
116 getvfs(fsid)
117 	fsid_t *fsid;
118 {
119 	register struct mount *mp;
120 
121 	mp = rootfs;
122 	do {
123 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
124 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
125 			return (mp);
126 		}
127 		mp = mp->mnt_next;
128 	} while (mp != rootfs);
129 	return ((struct mount *)0);
130 }
131 
132 /*
133  * Set vnode attributes to VNOVAL
134  */
135 void vattr_null(vap)
136 	register struct vattr *vap;
137 {
138 
139 	vap->va_type = VNON;
140 	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
141 		vap->va_fsid = vap->va_fileid = vap->va_size =
142 		vap->va_size_rsv = vap->va_blocksize = vap->va_rdev =
143 		vap->va_bytes = vap->va_bytes_rsv =
144 		vap->va_atime.tv_sec = vap->va_atime.tv_usec =
145 		vap->va_mtime.tv_sec = vap->va_mtime.tv_usec =
146 		vap->va_ctime.tv_sec = vap->va_ctime.tv_usec =
147 		vap->va_flags = vap->va_gen = VNOVAL;
148 }
149 
150 /*
151  * Initialize a nameidata structure
152  */
153 ndinit(ndp)
154 	register struct nameidata *ndp;
155 {
156 
157 	bzero((caddr_t)ndp, sizeof(struct nameidata));
158 	ndp->ni_iov = &ndp->ni_nd.nd_iovec;
159 	ndp->ni_iovcnt = 1;
160 	ndp->ni_base = (caddr_t)&ndp->ni_dent;
161 	ndp->ni_rw = UIO_WRITE;
162 	ndp->ni_uioseg = UIO_SYSSPACE;
163 }
164 
165 /*
166  * Duplicate a nameidata structure
167  */
168 nddup(ndp, newndp)
169 	register struct nameidata *ndp, *newndp;
170 {
171 
172 	ndinit(newndp);
173 	newndp->ni_cred = ndp->ni_cred;
174 	crhold(newndp->ni_cred);
175 }
176 
177 /*
178  * Release a nameidata structure
179  */
180 ndrele(ndp)
181 	register struct nameidata *ndp;
182 {
183 
184 	crfree(ndp->ni_cred);
185 }
186 
187 /*
188  * Routines having to do with the management of the vnode table.
189  */
190 struct vnode *vfreeh, **vfreet;
191 extern struct vnodeops dead_vnodeops, spec_vnodeops;
192 extern void vclean();
193 long numvnodes;
194 struct vattr va_null;
195 
196 /*
197  * Initialize the vnode structures and initialize each file system type.
198  */
199 vfsinit()
200 {
201 	struct vfsops **vfsp;
202 
203 	/*
204 	 * Initialize the vnode name cache
205 	 */
206 	nchinit();
207 	/*
208 	 * Initialize each file system type.
209 	 */
210 	vattr_null(&va_null);
211 	for (vfsp = &vfssw[0]; vfsp <= &vfssw[MOUNT_MAXTYPE]; vfsp++) {
212 		if (*vfsp == NULL)
213 			continue;
214 		(*(*vfsp)->vfs_init)();
215 	}
216 }
217 
218 /*
219  * Return the next vnode from the free list.
220  */
221 getnewvnode(tag, mp, vops, vpp)
222 	enum vtagtype tag;
223 	struct mount *mp;
224 	struct vnodeops *vops;
225 	struct vnode **vpp;
226 {
227 	register struct vnode *vp, *vq;
228 
229 	if (numvnodes < desiredvnodes) {
230 		vp = (struct vnode *)malloc((u_long)sizeof *vp,
231 		    M_VNODE, M_WAITOK);
232 		bzero((char *)vp, sizeof *vp);
233 		numvnodes++;
234 	} else {
235 		if ((vp = vfreeh) == NULL) {
236 			tablefull("vnode");
237 			*vpp = 0;
238 			return (ENFILE);
239 		}
240 		if (vp->v_usecount)
241 			panic("free vnode isn't");
242 		if (vq = vp->v_freef)
243 			vq->v_freeb = &vfreeh;
244 		else
245 			vfreet = &vfreeh;
246 		vfreeh = vq;
247 		vp->v_freef = NULL;
248 		vp->v_freeb = NULL;
249 		if (vp->v_type != VBAD)
250 			vgone(vp);
251 		vp->v_flag = 0;
252 		vp->v_lastr = 0;
253 		vp->v_socket = 0;
254 	}
255 	vp->v_type = VNON;
256 	cache_purge(vp);
257 	vp->v_tag = tag;
258 	vp->v_op = vops;
259 	insmntque(vp, mp);
260 	VREF(vp);
261 	*vpp = vp;
262 	return (0);
263 }
264 
265 /*
266  * Move a vnode from one mount queue to another.
267  */
268 insmntque(vp, mp)
269 	register struct vnode *vp;
270 	register struct mount *mp;
271 {
272 	struct vnode *vq;
273 
274 	/*
275 	 * Delete from old mount point vnode list, if on one.
276 	 */
277 	if (vp->v_mountb) {
278 		if (vq = vp->v_mountf)
279 			vq->v_mountb = vp->v_mountb;
280 		*vp->v_mountb = vq;
281 	}
282 	/*
283 	 * Insert into list of vnodes for the new mount point, if available.
284 	 */
285 	vp->v_mount = mp;
286 	if (mp == NULL) {
287 		vp->v_mountf = NULL;
288 		vp->v_mountb = NULL;
289 		return;
290 	}
291 	if (mp->mnt_mounth) {
292 		vp->v_mountf = mp->mnt_mounth;
293 		vp->v_mountb = &mp->mnt_mounth;
294 		mp->mnt_mounth->v_mountb = &vp->v_mountf;
295 		mp->mnt_mounth = vp;
296 	} else {
297 		mp->mnt_mounth = vp;
298 		vp->v_mountb = &mp->mnt_mounth;
299 		vp->v_mountf = NULL;
300 	}
301 }
302 
303 /*
304  * Make sure all write-behind blocks associated
305  * with mount point are flushed out (from sync).
306  */
307 mntflushbuf(mountp, flags)
308 	struct mount *mountp;
309 	int flags;
310 {
311 	register struct vnode *vp;
312 
313 	if ((mountp->mnt_flag & MNT_MPBUSY) == 0)
314 		panic("mntflushbuf: not busy");
315 loop:
316 	for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) {
317 		if (VOP_ISLOCKED(vp))
318 			continue;
319 		if (vget(vp))
320 			goto loop;
321 		vflushbuf(vp, flags);
322 		vput(vp);
323 		if (vp->v_mount != mountp)
324 			goto loop;
325 	}
326 }
327 
328 /*
329  * Flush all dirty buffers associated with a vnode.
330  */
331 vflushbuf(vp, flags)
332 	register struct vnode *vp;
333 	int flags;
334 {
335 	register struct buf *bp;
336 	struct buf *nbp;
337 	int s;
338 
339 loop:
340 	s = splbio();
341 	for (bp = vp->v_dirtyblkhd; bp; bp = nbp) {
342 		nbp = bp->b_blockf;
343 		if ((bp->b_flags & B_BUSY))
344 			continue;
345 		if ((bp->b_flags & B_DELWRI) == 0)
346 			panic("vflushbuf: not dirty");
347 		bremfree(bp);
348 		bp->b_flags |= B_BUSY;
349 		splx(s);
350 		/*
351 		 * Wait for I/O associated with indirect blocks to complete,
352 		 * since there is no way to quickly wait for them below.
353 		 * NB: This is really specific to ufs, but is done here
354 		 * as it is easier and quicker.
355 		 */
356 		if (bp->b_vp == vp || (flags & B_SYNC) == 0) {
357 			(void) bawrite(bp);
358 			s = splbio();
359 		} else {
360 			(void) bwrite(bp);
361 			goto loop;
362 		}
363 	}
364 	splx(s);
365 	if ((flags & B_SYNC) == 0)
366 		return;
367 	s = splbio();
368 	while (vp->v_numoutput) {
369 		vp->v_flag |= VBWAIT;
370 		sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
371 	}
372 	splx(s);
373 	if (vp->v_dirtyblkhd) {
374 		vprint("vflushbuf: dirty", vp);
375 		goto loop;
376 	}
377 }
378 
379 /*
380  * Update outstanding I/O count and do wakeup if requested.
381  */
382 vwakeup(bp)
383 	register struct buf *bp;
384 {
385 	register struct vnode *vp;
386 
387 	bp->b_dirtyoff = bp->b_dirtyend = 0;
388 	if (vp = bp->b_vp) {
389 		vp->v_numoutput--;
390 		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
391 			if (vp->v_numoutput < 0)
392 				panic("vwakeup: neg numoutput");
393 			vp->v_flag &= ~VBWAIT;
394 			wakeup((caddr_t)&vp->v_numoutput);
395 		}
396 	}
397 }
398 
399 /*
400  * Invalidate in core blocks belonging to closed or umounted filesystem
401  *
402  * Go through the list of vnodes associated with the file system;
403  * for each vnode invalidate any buffers that it holds. Normally
404  * this routine is preceeded by a bflush call, so that on a quiescent
405  * filesystem there will be no dirty buffers when we are done. Binval
406  * returns the count of dirty buffers when it is finished.
407  */
408 mntinvalbuf(mountp)
409 	struct mount *mountp;
410 {
411 	register struct vnode *vp;
412 	int dirty = 0;
413 
414 	if ((mountp->mnt_flag & MNT_MPBUSY) == 0)
415 		panic("mntinvalbuf: not busy");
416 loop:
417 	for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) {
418 		if (vget(vp))
419 			goto loop;
420 		dirty += vinvalbuf(vp, 1);
421 		vput(vp);
422 		if (vp->v_mount != mountp)
423 			goto loop;
424 	}
425 	return (dirty);
426 }
427 
428 /*
429  * Flush out and invalidate all buffers associated with a vnode.
430  * Called with the underlying object locked.
431  */
432 vinvalbuf(vp, save)
433 	register struct vnode *vp;
434 	int save;
435 {
436 	register struct buf *bp;
437 	struct buf *nbp, *blist;
438 	int s, dirty = 0;
439 
440 	for (;;) {
441 		if (blist = vp->v_dirtyblkhd)
442 			/* void */;
443 		else if (blist = vp->v_cleanblkhd)
444 			/* void */;
445 		else
446 			break;
447 		for (bp = blist; bp; bp = nbp) {
448 			nbp = bp->b_blockf;
449 			s = splbio();
450 			if (bp->b_flags & B_BUSY) {
451 				bp->b_flags |= B_WANTED;
452 				sleep((caddr_t)bp, PRIBIO + 1);
453 				splx(s);
454 				break;
455 			}
456 			bremfree(bp);
457 			bp->b_flags |= B_BUSY;
458 			splx(s);
459 			if (save && (bp->b_flags & B_DELWRI)) {
460 				dirty++;
461 				(void) bwrite(bp);
462 				break;
463 			}
464 			if (bp->b_vp != vp)
465 				reassignbuf(bp, bp->b_vp);
466 			else
467 				bp->b_flags |= B_INVAL;
468 			brelse(bp);
469 		}
470 	}
471 	if (vp->v_dirtyblkhd || vp->v_cleanblkhd)
472 		panic("vinvalbuf: flush failed");
473 	return (dirty);
474 }
475 
476 /*
477  * Associate a buffer with a vnode.
478  */
479 bgetvp(vp, bp)
480 	register struct vnode *vp;
481 	register struct buf *bp;
482 {
483 
484 	if (bp->b_vp)
485 		panic("bgetvp: not free");
486 	VHOLD(vp);
487 	bp->b_vp = vp;
488 	if (vp->v_type == VBLK || vp->v_type == VCHR)
489 		bp->b_dev = vp->v_rdev;
490 	else
491 		bp->b_dev = NODEV;
492 	/*
493 	 * Insert onto list for new vnode.
494 	 */
495 	if (vp->v_cleanblkhd) {
496 		bp->b_blockf = vp->v_cleanblkhd;
497 		bp->b_blockb = &vp->v_cleanblkhd;
498 		vp->v_cleanblkhd->b_blockb = &bp->b_blockf;
499 		vp->v_cleanblkhd = bp;
500 	} else {
501 		vp->v_cleanblkhd = bp;
502 		bp->b_blockb = &vp->v_cleanblkhd;
503 		bp->b_blockf = NULL;
504 	}
505 }
506 
507 /*
508  * Disassociate a buffer from a vnode.
509  */
510 brelvp(bp)
511 	register struct buf *bp;
512 {
513 	struct buf *bq;
514 	struct vnode *vp;
515 
516 	if (bp->b_vp == (struct vnode *) 0)
517 		panic("brelvp: NULL");
518 	/*
519 	 * Delete from old vnode list, if on one.
520 	 */
521 	if (bp->b_blockb) {
522 		if (bq = bp->b_blockf)
523 			bq->b_blockb = bp->b_blockb;
524 		*bp->b_blockb = bq;
525 		bp->b_blockf = NULL;
526 		bp->b_blockb = NULL;
527 	}
528 	vp = bp->b_vp;
529 	bp->b_vp = (struct vnode *) 0;
530 	HOLDRELE(vp);
531 }
532 
533 /*
534  * Reassign a buffer from one vnode to another.
535  * Used to assign file specific control information
536  * (indirect blocks) to the vnode to which they belong.
537  */
538 reassignbuf(bp, newvp)
539 	register struct buf *bp;
540 	register struct vnode *newvp;
541 {
542 	register struct buf *bq, **listheadp;
543 
544 	if (newvp == NULL)
545 		panic("reassignbuf: NULL");
546 	/*
547 	 * Delete from old vnode list, if on one.
548 	 */
549 	if (bp->b_blockb) {
550 		if (bq = bp->b_blockf)
551 			bq->b_blockb = bp->b_blockb;
552 		*bp->b_blockb = bq;
553 	}
554 	/*
555 	 * If dirty, put on list of dirty buffers;
556 	 * otherwise insert onto list of clean buffers.
557 	 */
558 	if (bp->b_flags & B_DELWRI)
559 		listheadp = &newvp->v_dirtyblkhd;
560 	else
561 		listheadp = &newvp->v_cleanblkhd;
562 	if (*listheadp) {
563 		bp->b_blockf = *listheadp;
564 		bp->b_blockb = listheadp;
565 		bp->b_blockf->b_blockb = &bp->b_blockf;
566 		*listheadp = bp;
567 	} else {
568 		*listheadp = bp;
569 		bp->b_blockb = listheadp;
570 		bp->b_blockf = NULL;
571 	}
572 }
573 
574 /*
575  * Create a vnode for a block device.
576  * Used for root filesystem, argdev, and swap areas.
577  * Also used for memory file system special devices.
578  */
579 bdevvp(dev, vpp)
580 	dev_t dev;
581 	struct vnode **vpp;
582 {
583 	register struct vnode *vp;
584 	struct vnode *nvp;
585 	int error;
586 
587 	if (dev == NODEV)
588 		return (0);
589 	error = getnewvnode(VT_NON, (struct mount *)0, &spec_vnodeops, &nvp);
590 	if (error) {
591 		*vpp = 0;
592 		return (error);
593 	}
594 	vp = nvp;
595 	vp->v_type = VBLK;
596 	if (nvp = checkalias(vp, dev, (struct mount *)0)) {
597 		vput(vp);
598 		vp = nvp;
599 	}
600 	*vpp = vp;
601 	return (0);
602 }
603 
604 /*
605  * Check to see if the new vnode represents a special device
606  * for which we already have a vnode (either because of
607  * bdevvp() or because of a different vnode representing
608  * the same block device). If such an alias exists, deallocate
609  * the existing contents and return the aliased vnode. The
610  * caller is responsible for filling it with its new contents.
611  */
612 struct vnode *
613 checkalias(nvp, nvp_rdev, mp)
614 	register struct vnode *nvp;
615 	dev_t nvp_rdev;
616 	struct mount *mp;
617 {
618 	register struct vnode *vp;
619 	struct vnode **vpp;
620 
621 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
622 		return (NULLVP);
623 
624 	vpp = &speclisth[SPECHASH(nvp_rdev)];
625 loop:
626 	for (vp = *vpp; vp; vp = vp->v_specnext) {
627 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
628 			continue;
629 		/*
630 		 * Alias, but not in use, so flush it out.
631 		 */
632 		if (vp->v_usecount == 0) {
633 			vgone(vp);
634 			goto loop;
635 		}
636 		if (vget(vp))
637 			goto loop;
638 		break;
639 	}
640 	if (vp == NULL || vp->v_tag != VT_NON) {
641 		MALLOC(nvp->v_specinfo, struct specinfo *,
642 			sizeof(struct specinfo), M_VNODE, M_WAITOK);
643 		nvp->v_rdev = nvp_rdev;
644 		nvp->v_hashchain = vpp;
645 		nvp->v_specnext = *vpp;
646 		nvp->v_specflags = 0;
647 		*vpp = nvp;
648 		if (vp != NULL) {
649 			nvp->v_flag |= VALIASED;
650 			vp->v_flag |= VALIASED;
651 			vput(vp);
652 		}
653 		return (NULLVP);
654 	}
655 	VOP_UNLOCK(vp);
656 	vclean(vp, 0);
657 	vp->v_op = nvp->v_op;
658 	vp->v_tag = nvp->v_tag;
659 	nvp->v_type = VNON;
660 	insmntque(vp, mp);
661 	return (vp);
662 }
663 
664 /*
665  * Grab a particular vnode from the free list, increment its
666  * reference count and lock it. The vnode lock bit is set the
667  * vnode is being eliminated in vgone. The process is awakened
668  * when the transition is completed, and an error returned to
669  * indicate that the vnode is no longer usable (possibly having
670  * been changed to a new file system type).
671  */
672 vget(vp)
673 	register struct vnode *vp;
674 {
675 	register struct vnode *vq;
676 
677 	if (vp->v_flag & VXLOCK) {
678 		vp->v_flag |= VXWANT;
679 		sleep((caddr_t)vp, PINOD);
680 		return (1);
681 	}
682 	if (vp->v_usecount == 0) {
683 		if (vq = vp->v_freef)
684 			vq->v_freeb = vp->v_freeb;
685 		else
686 			vfreet = vp->v_freeb;
687 		*vp->v_freeb = vq;
688 		vp->v_freef = NULL;
689 		vp->v_freeb = NULL;
690 	}
691 	VREF(vp);
692 	VOP_LOCK(vp);
693 	return (0);
694 }
695 
696 /*
697  * Vnode reference, just increment the count
698  */
699 void vref(vp)
700 	struct vnode *vp;
701 {
702 
703 	vp->v_usecount++;
704 }
705 
706 /*
707  * vput(), just unlock and vrele()
708  */
709 void vput(vp)
710 	register struct vnode *vp;
711 {
712 	VOP_UNLOCK(vp);
713 	vrele(vp);
714 }
715 
716 /*
717  * Vnode release.
718  * If count drops to zero, call inactive routine and return to freelist.
719  */
720 void vrele(vp)
721 	register struct vnode *vp;
722 {
723 	struct proc *p = curproc;		/* XXX */
724 
725 	if (vp == NULL)
726 		panic("vrele: null vp");
727 	vp->v_usecount--;
728 	if (vp->v_usecount < 0)
729 		vprint("vrele: bad ref count", vp);
730 	if (vp->v_usecount > 0)
731 		return;
732 	if (vfreeh == NULLVP) {
733 		/*
734 		 * insert into empty list
735 		 */
736 		vfreeh = vp;
737 		vp->v_freeb = &vfreeh;
738 	} else {
739 		/*
740 		 * insert at tail of list
741 		 */
742 		*vfreet = vp;
743 		vp->v_freeb = vfreet;
744 	}
745 	vp->v_freef = NULL;
746 	vfreet = &vp->v_freef;
747 	VOP_INACTIVE(vp, p);
748 }
749 
750 /*
751  * Page or buffer structure gets a reference.
752  */
753 vhold(vp)
754 	register struct vnode *vp;
755 {
756 
757 	vp->v_holdcnt++;
758 }
759 
760 /*
761  * Page or buffer structure frees a reference.
762  */
763 holdrele(vp)
764 	register struct vnode *vp;
765 {
766 
767 	if (vp->v_holdcnt <= 0)
768 		panic("holdrele: holdcnt");
769 	vp->v_holdcnt--;
770 }
771 
772 /*
773  * Remove any vnodes in the vnode table belonging to mount point mp.
774  *
775  * If MNT_NOFORCE is specified, there should not be any active ones,
776  * return error if any are found (nb: this is a user error, not a
777  * system error). If MNT_FORCE is specified, detach any active vnodes
778  * that are found.
779  */
780 int busyprt = 0;	/* patch to print out busy vnodes */
781 
782 vflush(mp, skipvp, flags)
783 	struct mount *mp;
784 	struct vnode *skipvp;
785 	int flags;
786 {
787 	register struct vnode *vp, *nvp;
788 	int busy = 0;
789 
790 	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
791 		panic("vflush: not busy");
792 loop:
793 	for (vp = mp->mnt_mounth; vp; vp = nvp) {
794 		if (vp->v_mount != mp)
795 			goto loop;
796 		nvp = vp->v_mountf;
797 		/*
798 		 * Skip over a selected vnode.
799 		 */
800 		if (vp == skipvp)
801 			continue;
802 		/*
803 		 * Skip over a vnodes marked VSYSTEM.
804 		 */
805 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
806 			continue;
807 		/*
808 		 * With v_usecount == 0, all we need to do is clear
809 		 * out the vnode data structures and we are done.
810 		 */
811 		if (vp->v_usecount == 0) {
812 			vgone(vp);
813 			continue;
814 		}
815 		/*
816 		 * For block or character devices, revert to an
817 		 * anonymous device. For all other files, just kill them.
818 		 */
819 		if (flags & FORCECLOSE) {
820 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
821 				vgone(vp);
822 			} else {
823 				vclean(vp, 0);
824 				vp->v_op = &spec_vnodeops;
825 				insmntque(vp, (struct mount *)0);
826 			}
827 			continue;
828 		}
829 		if (busyprt)
830 			vprint("vflush: busy vnode", vp);
831 		busy++;
832 	}
833 	if (busy)
834 		return (EBUSY);
835 	return (0);
836 }
837 
838 /*
839  * Disassociate the underlying file system from a vnode.
840  */
841 void vclean(vp, flags)
842 	register struct vnode *vp;
843 	int flags;
844 {
845 	struct vnodeops *origops;
846 	int active;
847 	struct proc *p = curproc;	/* XXX */
848 
849 	/*
850 	 * Check to see if the vnode is in use.
851 	 * If so we have to reference it before we clean it out
852 	 * so that its count cannot fall to zero and generate a
853 	 * race against ourselves to recycle it.
854 	 */
855 	if (active = vp->v_usecount)
856 		VREF(vp);
857 	/*
858 	 * Prevent the vnode from being recycled or
859 	 * brought into use while we clean it out.
860 	 */
861 	if (vp->v_flag & VXLOCK)
862 		panic("vclean: deadlock");
863 	vp->v_flag |= VXLOCK;
864 	/*
865 	 * Even if the count is zero, the VOP_INACTIVE routine may still
866 	 * have the object locked while it cleans it out. The VOP_LOCK
867 	 * ensures that the VOP_INACTIVE routine is done with its work.
868 	 * For active vnodes, it ensures that no other activity can
869 	 * occur while the buffer list is being cleaned out.
870 	 */
871 	VOP_LOCK(vp);
872 	if (flags & DOCLOSE)
873 		vinvalbuf(vp, 1);
874 	/*
875 	 * Prevent any further operations on the vnode from
876 	 * being passed through to the old file system.
877 	 */
878 	origops = vp->v_op;
879 	vp->v_op = &dead_vnodeops;
880 	vp->v_tag = VT_NON;
881 	/*
882 	 * If purging an active vnode, it must be unlocked, closed,
883 	 * and deactivated before being reclaimed.
884 	 */
885 	(*(origops->vn_unlock))(vp);
886 	if (active) {
887 		if (flags & DOCLOSE)
888 			(*(origops->vn_close))(vp, IO_NDELAY, NOCRED, p);
889 		(*(origops->vn_inactive))(vp, p);
890 	}
891 	/*
892 	 * Reclaim the vnode.
893 	 */
894 	if ((*(origops->vn_reclaim))(vp))
895 		panic("vclean: cannot reclaim");
896 	if (active)
897 		vrele(vp);
898 	/*
899 	 * Done with purge, notify sleepers in vget of the grim news.
900 	 */
901 	vp->v_flag &= ~VXLOCK;
902 	if (vp->v_flag & VXWANT) {
903 		vp->v_flag &= ~VXWANT;
904 		wakeup((caddr_t)vp);
905 	}
906 }
907 
908 /*
909  * Eliminate all activity associated with  the requested vnode
910  * and with all vnodes aliased to the requested vnode.
911  */
912 void vgoneall(vp)
913 	register struct vnode *vp;
914 {
915 	register struct vnode *vq;
916 
917 	if (vp->v_flag & VALIASED) {
918 		/*
919 		 * If a vgone (or vclean) is already in progress,
920 		 * wait until it is done and return.
921 		 */
922 		if (vp->v_flag & VXLOCK) {
923 			vp->v_flag |= VXWANT;
924 			sleep((caddr_t)vp, PINOD);
925 			return;
926 		}
927 		/*
928 		 * Ensure that vp will not be vgone'd while we
929 		 * are eliminating its aliases.
930 		 */
931 		vp->v_flag |= VXLOCK;
932 		while (vp->v_flag & VALIASED) {
933 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
934 				if (vq->v_rdev != vp->v_rdev ||
935 				    vq->v_type != vp->v_type || vp == vq)
936 					continue;
937 				vgone(vq);
938 				break;
939 			}
940 		}
941 		/*
942 		 * Remove the lock so that vgone below will
943 		 * really eliminate the vnode after which time
944 		 * vgone will awaken any sleepers.
945 		 */
946 		vp->v_flag &= ~VXLOCK;
947 	}
948 	vgone(vp);
949 }
950 
951 /*
952  * Eliminate all activity associated with a vnode
953  * in preparation for reuse.
954  */
955 void vgone(vp)
956 	register struct vnode *vp;
957 {
958 	register struct vnode *vq;
959 	struct vnode *vx;
960 	long count;
961 
962 	/*
963 	 * If a vgone (or vclean) is already in progress,
964 	 * wait until it is done and return.
965 	 */
966 	if (vp->v_flag & VXLOCK) {
967 		vp->v_flag |= VXWANT;
968 		sleep((caddr_t)vp, PINOD);
969 		return;
970 	}
971 	/*
972 	 * Clean out the filesystem specific data.
973 	 */
974 	vclean(vp, DOCLOSE);
975 	/*
976 	 * Delete from old mount point vnode list, if on one.
977 	 */
978 	if (vp->v_mountb) {
979 		if (vq = vp->v_mountf)
980 			vq->v_mountb = vp->v_mountb;
981 		*vp->v_mountb = vq;
982 		vp->v_mountf = NULL;
983 		vp->v_mountb = NULL;
984 	}
985 	/*
986 	 * If special device, remove it from special device alias list.
987 	 */
988 	if (vp->v_type == VBLK || vp->v_type == VCHR) {
989 		if (*vp->v_hashchain == vp) {
990 			*vp->v_hashchain = vp->v_specnext;
991 		} else {
992 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
993 				if (vq->v_specnext != vp)
994 					continue;
995 				vq->v_specnext = vp->v_specnext;
996 				break;
997 			}
998 			if (vq == NULL)
999 				panic("missing bdev");
1000 		}
1001 		if (vp->v_flag & VALIASED) {
1002 			count = 0;
1003 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1004 				if (vq->v_rdev != vp->v_rdev ||
1005 				    vq->v_type != vp->v_type)
1006 					continue;
1007 				count++;
1008 				vx = vq;
1009 			}
1010 			if (count == 0)
1011 				panic("missing alias");
1012 			if (count == 1)
1013 				vx->v_flag &= ~VALIASED;
1014 			vp->v_flag &= ~VALIASED;
1015 		}
1016 		FREE(vp->v_specinfo, M_VNODE);
1017 		vp->v_specinfo = NULL;
1018 	}
1019 	/*
1020 	 * If it is on the freelist, move it to the head of the list.
1021 	 */
1022 	if (vp->v_freeb) {
1023 		if (vq = vp->v_freef)
1024 			vq->v_freeb = vp->v_freeb;
1025 		else
1026 			vfreet = vp->v_freeb;
1027 		*vp->v_freeb = vq;
1028 		vp->v_freef = vfreeh;
1029 		vp->v_freeb = &vfreeh;
1030 		vfreeh->v_freeb = &vp->v_freef;
1031 		vfreeh = vp;
1032 	}
1033 	vp->v_type = VBAD;
1034 }
1035 
1036 /*
1037  * Lookup a vnode by device number.
1038  */
1039 vfinddev(dev, type, vpp)
1040 	dev_t dev;
1041 	enum vtype type;
1042 	struct vnode **vpp;
1043 {
1044 	register struct vnode *vp;
1045 
1046 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1047 		if (dev != vp->v_rdev || type != vp->v_type)
1048 			continue;
1049 		*vpp = vp;
1050 		return (0);
1051 	}
1052 	return (1);
1053 }
1054 
1055 /*
1056  * Calculate the total number of references to a special device.
1057  */
1058 vcount(vp)
1059 	register struct vnode *vp;
1060 {
1061 	register struct vnode *vq;
1062 	int count;
1063 
1064 	if ((vp->v_flag & VALIASED) == 0)
1065 		return (vp->v_usecount);
1066 loop:
1067 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1068 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1069 			continue;
1070 		/*
1071 		 * Alias, but not in use, so flush it out.
1072 		 */
1073 		if (vq->v_usecount == 0) {
1074 			vgone(vq);
1075 			goto loop;
1076 		}
1077 		count += vq->v_usecount;
1078 	}
1079 	return (count);
1080 }
1081 
1082 /*
1083  * Print out a description of a vnode.
1084  */
1085 static char *typename[] =
1086    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1087 
1088 vprint(label, vp)
1089 	char *label;
1090 	register struct vnode *vp;
1091 {
1092 	char buf[64];
1093 
1094 	if (label != NULL)
1095 		printf("%s: ", label);
1096 	printf("type %s, usecount %d, refcount %d,", typename[vp->v_type],
1097 		vp->v_usecount, vp->v_holdcnt);
1098 	buf[0] = '\0';
1099 	if (vp->v_flag & VROOT)
1100 		strcat(buf, "|VROOT");
1101 	if (vp->v_flag & VTEXT)
1102 		strcat(buf, "|VTEXT");
1103 	if (vp->v_flag & VSYSTEM)
1104 		strcat(buf, "|VSYSTEM");
1105 	if (vp->v_flag & VXLOCK)
1106 		strcat(buf, "|VXLOCK");
1107 	if (vp->v_flag & VXWANT)
1108 		strcat(buf, "|VXWANT");
1109 	if (vp->v_flag & VBWAIT)
1110 		strcat(buf, "|VBWAIT");
1111 	if (vp->v_flag & VALIASED)
1112 		strcat(buf, "|VALIASED");
1113 	if (buf[0] != '\0')
1114 		printf(" flags (%s)", &buf[1]);
1115 	printf("\n\t");
1116 	VOP_PRINT(vp);
1117 }
1118 
1119 int kinfo_vdebug = 1;
1120 int kinfo_vgetfailed;
1121 #define KINFO_VNODESLOP	10
1122 /*
1123  * Dump vnode list (via kinfo).
1124  * Copyout address of vnode followed by vnode.
1125  */
1126 /* ARGSUSED */
1127 kinfo_vnode(op, where, acopysize, arg, aneeded)
1128 	int op;
1129 	char *where;
1130 	int *acopysize, arg, *aneeded;
1131 {
1132 	register struct mount *mp = rootfs;
1133 	struct mount *omp;
1134 	struct vnode *vp;
1135 	register char *bp = where, *savebp;
1136 	char *ewhere = where + *acopysize;
1137 	int error;
1138 
1139 #define VPTRSZ	sizeof (struct vnode *)
1140 #define VNODESZ	sizeof (struct vnode)
1141 	if (where == NULL) {
1142 		*aneeded = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1143 		return (0);
1144 	}
1145 
1146 	do {
1147 		if (vfs_busy(mp)) {
1148 			mp = mp->mnt_next;
1149 			continue;
1150 		}
1151 		savebp = bp;
1152 again:
1153 		for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) {
1154 			/*
1155 			 * Check that the vp is still associated with
1156 			 * this filesystem.  RACE: could have been
1157 			 * recycled onto the same filesystem.
1158 			 */
1159 			if (vp->v_mount != mp) {
1160 				if (kinfo_vdebug)
1161 					printf("kinfo: vp changed\n");
1162 				bp = savebp;
1163 				goto again;
1164 			}
1165 			if ((bp + VPTRSZ + VNODESZ <= ewhere) &&
1166 			    ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1167 			     (error = copyout((caddr_t)vp, bp + VPTRSZ,
1168 			      VNODESZ))))
1169 				return (error);
1170 			bp += VPTRSZ + VNODESZ;
1171 		}
1172 		omp = mp;
1173 		mp = mp->mnt_next;
1174 		vfs_unbusy(omp);
1175 	} while (mp != rootfs);
1176 
1177 	*aneeded = bp - where;
1178 	if (bp > ewhere)
1179 		*acopysize = ewhere - where;
1180 	else
1181 		*acopysize = bp - where;
1182 	return (0);
1183 }
1184