xref: /csrg-svn/sys/kern/vfs_subr.c (revision 52230)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)vfs_subr.c	7.66 (Berkeley) 01/22/92
8  */
9 
10 /*
11  * External virtual filesystem routines
12  */
13 
14 #include <sys/param.h>
15 #include <sys/proc.h>
16 #include <sys/mount.h>
17 #include <sys/time.h>
18 #include <sys/vnode.h>
19 #include <sys/specdev.h>
20 #include <sys/namei.h>
21 #include <sys/ucred.h>
22 #include <sys/buf.h>
23 #include <sys/errno.h>
24 #include <sys/malloc.h>
25 
26 /*
27  * Remove a mount point from the list of mounted filesystems.
28  * Unmount of the root is illegal.
29  */
30 void
31 vfs_remove(mp)
32 	register struct mount *mp;
33 {
34 
35 	if (mp == rootfs)
36 		panic("vfs_remove: unmounting root");
37 	mp->mnt_prev->mnt_next = mp->mnt_next;
38 	mp->mnt_next->mnt_prev = mp->mnt_prev;
39 	mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
40 	vfs_unlock(mp);
41 }
42 
43 /*
44  * Lock a filesystem.
45  * Used to prevent access to it while mounting and unmounting.
46  */
47 vfs_lock(mp)
48 	register struct mount *mp;
49 {
50 
51 	while(mp->mnt_flag & MNT_MLOCK) {
52 		mp->mnt_flag |= MNT_MWAIT;
53 		sleep((caddr_t)mp, PVFS);
54 	}
55 	mp->mnt_flag |= MNT_MLOCK;
56 	return (0);
57 }
58 
59 /*
60  * Unlock a locked filesystem.
61  * Panic if filesystem is not locked.
62  */
63 void
64 vfs_unlock(mp)
65 	register struct mount *mp;
66 {
67 
68 	if ((mp->mnt_flag & MNT_MLOCK) == 0)
69 		panic("vfs_unlock: not locked");
70 	mp->mnt_flag &= ~MNT_MLOCK;
71 	if (mp->mnt_flag & MNT_MWAIT) {
72 		mp->mnt_flag &= ~MNT_MWAIT;
73 		wakeup((caddr_t)mp);
74 	}
75 }
76 
77 /*
78  * Mark a mount point as busy.
79  * Used to synchronize access and to delay unmounting.
80  */
81 vfs_busy(mp)
82 	register struct mount *mp;
83 {
84 
85 	while(mp->mnt_flag & MNT_MPBUSY) {
86 		mp->mnt_flag |= MNT_MPWANT;
87 		sleep((caddr_t)&mp->mnt_flag, PVFS);
88 	}
89 	if (mp->mnt_flag & MNT_UNMOUNT)
90 		return (1);
91 	mp->mnt_flag |= MNT_MPBUSY;
92 	return (0);
93 }
94 
95 /*
96  * Free a busy filesystem.
97  * Panic if filesystem is not busy.
98  */
99 vfs_unbusy(mp)
100 	register struct mount *mp;
101 {
102 
103 	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
104 		panic("vfs_unbusy: not busy");
105 	mp->mnt_flag &= ~MNT_MPBUSY;
106 	if (mp->mnt_flag & MNT_MPWANT) {
107 		mp->mnt_flag &= ~MNT_MPWANT;
108 		wakeup((caddr_t)&mp->mnt_flag);
109 	}
110 }
111 
112 /*
113  * Lookup a mount point by filesystem identifier.
114  */
115 struct mount *
116 getvfs(fsid)
117 	fsid_t *fsid;
118 {
119 	register struct mount *mp;
120 
121 	mp = rootfs;
122 	do {
123 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
124 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
125 			return (mp);
126 		}
127 		mp = mp->mnt_next;
128 	} while (mp != rootfs);
129 	return ((struct mount *)0);
130 }
131 
132 /*
133  * Set vnode attributes to VNOVAL
134  */
135 void vattr_null(vap)
136 	register struct vattr *vap;
137 {
138 
139 	vap->va_type = VNON;
140 	vap->va_size = vap->va_bytes = VNOVAL;
141 #ifdef _NOQUAD
142 	vap->va_size_rsv = vap->va_bytes_rsv = VNOVAL;
143 #endif
144 	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
145 		vap->va_fsid = vap->va_fileid =
146 		vap->va_blocksize = vap->va_rdev =
147 		vap->va_atime.tv_sec = vap->va_atime.tv_usec =
148 		vap->va_mtime.tv_sec = vap->va_mtime.tv_usec =
149 		vap->va_ctime.tv_sec = vap->va_ctime.tv_usec =
150 		vap->va_flags = vap->va_gen = VNOVAL;
151 }
152 
153 /*
154  * Routines having to do with the management of the vnode table.
155  */
156 struct vnode *vfreeh, **vfreet;
157 extern struct vnodeops dead_vnodeops, spec_vnodeops;
158 extern void vclean();
159 long numvnodes;
160 struct vattr va_null;
161 
162 /*
163  * Initialize the vnode structures and initialize each file system type.
164  */
165 vfsinit()
166 {
167 	struct vfsops **vfsp;
168 
169 	/*
170 	 * Initialize the vnode name cache
171 	 */
172 	nchinit();
173 	/*
174 	 * Initialize each file system type.
175 	 */
176 	vattr_null(&va_null);
177 	for (vfsp = &vfssw[0]; vfsp <= &vfssw[MOUNT_MAXTYPE]; vfsp++) {
178 		if (*vfsp == NULL)
179 			continue;
180 		(*(*vfsp)->vfs_init)();
181 	}
182 }
183 
184 /*
185  * Return the next vnode from the free list.
186  */
187 getnewvnode(tag, mp, vops, vpp)
188 	enum vtagtype tag;
189 	struct mount *mp;
190 	struct vnodeops *vops;
191 	struct vnode **vpp;
192 {
193 	register struct vnode *vp, *vq;
194 
195 	if (numvnodes < desiredvnodes) {
196 		vp = (struct vnode *)malloc((u_long)sizeof *vp,
197 		    M_VNODE, M_WAITOK);
198 		bzero((char *)vp, sizeof *vp);
199 		numvnodes++;
200 	} else {
201 		if ((vp = vfreeh) == NULL) {
202 			tablefull("vnode");
203 			*vpp = 0;
204 			return (ENFILE);
205 		}
206 		if (vp->v_usecount)
207 			panic("free vnode isn't");
208 		if (vq = vp->v_freef)
209 			vq->v_freeb = &vfreeh;
210 		else
211 			vfreet = &vfreeh;
212 		vfreeh = vq;
213 		vp->v_freef = NULL;
214 		vp->v_freeb = NULL;
215 		vp->v_lease = NULL;
216 		if (vp->v_type != VBAD)
217 			vgone(vp);
218 		if (vp->v_data)
219 			panic("cleaned vnode isn't");
220 		vp->v_flag = 0;
221 		vp->v_lastr = 0;
222 		vp->v_socket = 0;
223 	}
224 	vp->v_type = VNON;
225 	cache_purge(vp);
226 	vp->v_tag = tag;
227 	vp->v_op = vops;
228 	insmntque(vp, mp);
229 	VREF(vp);
230 	*vpp = vp;
231 	return (0);
232 }
233 
234 /*
235  * Move a vnode from one mount queue to another.
236  */
237 insmntque(vp, mp)
238 	register struct vnode *vp;
239 	register struct mount *mp;
240 {
241 	register struct vnode *vq;
242 
243 	/*
244 	 * Delete from old mount point vnode list, if on one.
245 	 */
246 	if (vp->v_mountb) {
247 		if (vq = vp->v_mountf)
248 			vq->v_mountb = vp->v_mountb;
249 		*vp->v_mountb = vq;
250 	}
251 	/*
252 	 * Insert into list of vnodes for the new mount point, if available.
253 	 */
254 	vp->v_mount = mp;
255 	if (mp == NULL) {
256 		vp->v_mountf = NULL;
257 		vp->v_mountb = NULL;
258 		return;
259 	}
260 	if (vq = mp->mnt_mounth)
261 		vq->v_mountb = &vp->v_mountf;
262 	vp->v_mountf = vq;
263 	vp->v_mountb = &mp->mnt_mounth;
264 	mp->mnt_mounth = vp;
265 }
266 
267 /*
268  * Make sure all write-behind blocks associated
269  * with mount point are flushed out (from sync).
270  */
271 mntflushbuf(mountp, flags)
272 	struct mount *mountp;
273 	int flags;
274 {
275 	register struct vnode *vp;
276 
277 	if ((mountp->mnt_flag & MNT_MPBUSY) == 0)
278 		panic("mntflushbuf: not busy");
279 loop:
280 	for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) {
281 		if (VOP_ISLOCKED(vp))
282 			continue;
283 		if (vget(vp))
284 			goto loop;
285 		vflushbuf(vp, flags);
286 		vput(vp);
287 		if (vp->v_mount != mountp)
288 			goto loop;
289 	}
290 }
291 
292 /*
293  * Flush all dirty buffers associated with a vnode.
294  */
295 vflushbuf(vp, flags)
296 	register struct vnode *vp;
297 	int flags;
298 {
299 	register struct buf *bp;
300 	struct buf *nbp;
301 	int s;
302 
303 loop:
304 	s = splbio();
305 	for (bp = vp->v_dirtyblkhd; bp; bp = nbp) {
306 		nbp = bp->b_blockf;
307 		if ((bp->b_flags & B_BUSY))
308 			continue;
309 		if ((bp->b_flags & B_DELWRI) == 0)
310 			panic("vflushbuf: not dirty");
311 		bremfree(bp);
312 		bp->b_flags |= B_BUSY;
313 		splx(s);
314 		/*
315 		 * Wait for I/O associated with indirect blocks to complete,
316 		 * since there is no way to quickly wait for them below.
317 		 * NB: This is really specific to ufs, but is done here
318 		 * as it is easier and quicker.
319 		 */
320 		if (bp->b_vp == vp || (flags & B_SYNC) == 0)
321 			(void) bawrite(bp);
322 		else
323 			(void) bwrite(bp);
324 		goto loop;
325 	}
326 	splx(s);
327 	if ((flags & B_SYNC) == 0)
328 		return;
329 	s = splbio();
330 	while (vp->v_numoutput) {
331 		vp->v_flag |= VBWAIT;
332 		sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
333 	}
334 	splx(s);
335 	if (vp->v_dirtyblkhd) {
336 		vprint("vflushbuf: dirty", vp);
337 		goto loop;
338 	}
339 }
340 
341 /*
342  * Update outstanding I/O count and do wakeup if requested.
343  */
344 vwakeup(bp)
345 	register struct buf *bp;
346 {
347 	register struct vnode *vp;
348 
349 	bp->b_dirtyoff = bp->b_dirtyend = 0;
350 	if (vp = bp->b_vp) {
351 		vp->v_numoutput--;
352 		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
353 			if (vp->v_numoutput < 0)
354 				panic("vwakeup: neg numoutput");
355 			vp->v_flag &= ~VBWAIT;
356 			wakeup((caddr_t)&vp->v_numoutput);
357 		}
358 	}
359 }
360 
361 /*
362  * Invalidate in core blocks belonging to closed or umounted filesystem
363  *
364  * Go through the list of vnodes associated with the file system;
365  * for each vnode invalidate any buffers that it holds. Normally
366  * this routine is preceeded by a bflush call, so that on a quiescent
367  * filesystem there will be no dirty buffers when we are done. Binval
368  * returns the count of dirty buffers when it is finished.
369  */
370 mntinvalbuf(mountp)
371 	struct mount *mountp;
372 {
373 	register struct vnode *vp;
374 	int dirty = 0;
375 
376 	if ((mountp->mnt_flag & MNT_MPBUSY) == 0)
377 		panic("mntinvalbuf: not busy");
378 loop:
379 	for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) {
380 		if (vget(vp))
381 			goto loop;
382 		dirty += vinvalbuf(vp, 1);
383 		vput(vp);
384 		if (vp->v_mount != mountp)
385 			goto loop;
386 	}
387 	return (dirty);
388 }
389 
390 /*
391  * Flush out and invalidate all buffers associated with a vnode.
392  * Called with the underlying object locked.
393  */
394 vinvalbuf(vp, save)
395 	register struct vnode *vp;
396 	int save;
397 {
398 	register struct buf *bp;
399 	struct buf *nbp, *blist;
400 	int s, dirty = 0;
401 
402 	for (;;) {
403 		if (blist = vp->v_dirtyblkhd)
404 			/* void */;
405 		else if (blist = vp->v_cleanblkhd)
406 			/* void */;
407 		else
408 			break;
409 		for (bp = blist; bp; bp = nbp) {
410 			nbp = bp->b_blockf;
411 			s = splbio();
412 			if (bp->b_flags & B_BUSY) {
413 				bp->b_flags |= B_WANTED;
414 				sleep((caddr_t)bp, PRIBIO + 1);
415 				splx(s);
416 				break;
417 			}
418 			bremfree(bp);
419 			bp->b_flags |= B_BUSY;
420 			splx(s);
421 			if (save && (bp->b_flags & B_DELWRI)) {
422 				dirty++;
423 				(void) VOP_BWRITE(bp);
424 				break;
425 			}
426 			if (bp->b_vp != vp)
427 				reassignbuf(bp, bp->b_vp);
428 			else
429 				bp->b_flags |= B_INVAL;
430 			brelse(bp);
431 		}
432 	}
433 	if (vp->v_dirtyblkhd || vp->v_cleanblkhd)
434 		panic("vinvalbuf: flush failed");
435 	return (dirty);
436 }
437 
438 /*
439  * Associate a buffer with a vnode.
440  */
441 bgetvp(vp, bp)
442 	register struct vnode *vp;
443 	register struct buf *bp;
444 {
445 	register struct vnode *vq;
446 	register struct buf *bq;
447 
448 	if (bp->b_vp)
449 		panic("bgetvp: not free");
450 	VHOLD(vp);
451 	bp->b_vp = vp;
452 	if (vp->v_type == VBLK || vp->v_type == VCHR)
453 		bp->b_dev = vp->v_rdev;
454 	else
455 		bp->b_dev = NODEV;
456 	/*
457 	 * Insert onto list for new vnode.
458 	 */
459 	if (bq = vp->v_cleanblkhd)
460 		bq->b_blockb = &bp->b_blockf;
461 	bp->b_blockf = bq;
462 	bp->b_blockb = &vp->v_cleanblkhd;
463 	vp->v_cleanblkhd = bp;
464 }
465 
466 /*
467  * Disassociate a buffer from a vnode.
468  */
469 brelvp(bp)
470 	register struct buf *bp;
471 {
472 	struct buf *bq;
473 	struct vnode *vp;
474 
475 	if (bp->b_vp == (struct vnode *) 0)
476 		panic("brelvp: NULL");
477 	/*
478 	 * Delete from old vnode list, if on one.
479 	 */
480 	if (bp->b_blockb) {
481 		if (bq = bp->b_blockf)
482 			bq->b_blockb = bp->b_blockb;
483 		*bp->b_blockb = bq;
484 		bp->b_blockf = NULL;
485 		bp->b_blockb = NULL;
486 	}
487 	vp = bp->b_vp;
488 	bp->b_vp = (struct vnode *) 0;
489 	HOLDRELE(vp);
490 }
491 
492 /*
493  * Reassign a buffer from one vnode to another.
494  * Used to assign file specific control information
495  * (indirect blocks) to the vnode to which they belong.
496  */
497 reassignbuf(bp, newvp)
498 	register struct buf *bp;
499 	register struct vnode *newvp;
500 {
501 	register struct buf *bq, **listheadp;
502 
503 	if (newvp == NULL)
504 		panic("reassignbuf: NULL");
505 	/*
506 	 * Delete from old vnode list, if on one.
507 	 */
508 	if (bp->b_blockb) {
509 		if (bq = bp->b_blockf)
510 			bq->b_blockb = bp->b_blockb;
511 		*bp->b_blockb = bq;
512 	}
513 	/*
514 	 * If dirty, put on list of dirty buffers;
515 	 * otherwise insert onto list of clean buffers.
516 	 */
517 	if (bp->b_flags & B_DELWRI)
518 		listheadp = &newvp->v_dirtyblkhd;
519 	else
520 		listheadp = &newvp->v_cleanblkhd;
521 	if (bq = *listheadp)
522 		bq->b_blockb = &bp->b_blockf;
523 	bp->b_blockf = bq;
524 	bp->b_blockb = listheadp;
525 	*listheadp = bp;
526 }
527 
528 /*
529  * Create a vnode for a block device.
530  * Used for root filesystem, argdev, and swap areas.
531  * Also used for memory file system special devices.
532  */
533 bdevvp(dev, vpp)
534 	dev_t dev;
535 	struct vnode **vpp;
536 {
537 	register struct vnode *vp;
538 	struct vnode *nvp;
539 	int error;
540 
541 	if (dev == NODEV)
542 		return (0);
543 	error = getnewvnode(VT_NON, (struct mount *)0, &spec_vnodeops, &nvp);
544 	if (error) {
545 		*vpp = 0;
546 		return (error);
547 	}
548 	vp = nvp;
549 	vp->v_type = VBLK;
550 	if (nvp = checkalias(vp, dev, (struct mount *)0)) {
551 		vput(vp);
552 		vp = nvp;
553 	}
554 	*vpp = vp;
555 	return (0);
556 }
557 
558 /*
559  * Check to see if the new vnode represents a special device
560  * for which we already have a vnode (either because of
561  * bdevvp() or because of a different vnode representing
562  * the same block device). If such an alias exists, deallocate
563  * the existing contents and return the aliased vnode. The
564  * caller is responsible for filling it with its new contents.
565  */
566 struct vnode *
567 checkalias(nvp, nvp_rdev, mp)
568 	register struct vnode *nvp;
569 	dev_t nvp_rdev;
570 	struct mount *mp;
571 {
572 	register struct vnode *vp;
573 	struct vnode **vpp;
574 
575 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
576 		return (NULLVP);
577 
578 	vpp = &speclisth[SPECHASH(nvp_rdev)];
579 loop:
580 	for (vp = *vpp; vp; vp = vp->v_specnext) {
581 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
582 			continue;
583 		/*
584 		 * Alias, but not in use, so flush it out.
585 		 */
586 		if (vp->v_usecount == 0) {
587 			vgone(vp);
588 			goto loop;
589 		}
590 		if (vget(vp))
591 			goto loop;
592 		break;
593 	}
594 	if (vp == NULL || vp->v_tag != VT_NON) {
595 		MALLOC(nvp->v_specinfo, struct specinfo *,
596 			sizeof(struct specinfo), M_VNODE, M_WAITOK);
597 		nvp->v_rdev = nvp_rdev;
598 		nvp->v_hashchain = vpp;
599 		nvp->v_specnext = *vpp;
600 		nvp->v_specflags = 0;
601 		*vpp = nvp;
602 		if (vp != NULL) {
603 			nvp->v_flag |= VALIASED;
604 			vp->v_flag |= VALIASED;
605 			vput(vp);
606 		}
607 		return (NULLVP);
608 	}
609 	VOP_UNLOCK(vp);
610 	vclean(vp, 0);
611 	vp->v_op = nvp->v_op;
612 	vp->v_tag = nvp->v_tag;
613 	nvp->v_type = VNON;
614 	insmntque(vp, mp);
615 	return (vp);
616 }
617 
618 /*
619  * Grab a particular vnode from the free list, increment its
620  * reference count and lock it. The vnode lock bit is set the
621  * vnode is being eliminated in vgone. The process is awakened
622  * when the transition is completed, and an error returned to
623  * indicate that the vnode is no longer usable (possibly having
624  * been changed to a new file system type).
625  */
626 vget(vp)
627 	register struct vnode *vp;
628 {
629 	register struct vnode *vq;
630 
631 	if (vp->v_flag & VXLOCK) {
632 		vp->v_flag |= VXWANT;
633 		sleep((caddr_t)vp, PINOD);
634 		return (1);
635 	}
636 	if (vp->v_usecount == 0) {
637 		if (vq = vp->v_freef)
638 			vq->v_freeb = vp->v_freeb;
639 		else
640 			vfreet = vp->v_freeb;
641 		*vp->v_freeb = vq;
642 		vp->v_freef = NULL;
643 		vp->v_freeb = NULL;
644 	}
645 	VREF(vp);
646 	VOP_LOCK(vp);
647 	return (0);
648 }
649 
650 int bug_refs = 0;
651 
652 /*
653  * Vnode reference, just increment the count
654  */
655 void vref(vp)
656 	struct vnode *vp;
657 {
658 
659 	vp->v_usecount++;
660 	if (vp->v_type!=VBLK && curproc) {   /* NEEDSWORK: debugging */
661 		curproc->p_spare[2]++;
662 	};
663 	if (bug_refs) {
664 		vprint ("vref: ");
665 	};
666 }
667 
668 /*
669  * vput(), just unlock and vrele()
670  */
671 void vput(vp)
672 	register struct vnode *vp;
673 {
674 	VOP_UNLOCK(vp);
675 	vrele(vp);
676 }
677 
678 /*
679  * Vnode release.
680  * If count drops to zero, call inactive routine and return to freelist.
681  */
682 void vrele(vp)
683 	register struct vnode *vp;
684 {
685 	struct proc *p = curproc;		/* XXX */
686 
687 #ifdef DIAGNOSTIC
688 	if (vp == NULL)
689 		panic("vrele: null vp");
690 #endif
691 	vp->v_usecount--;
692 	if (vp->v_type!=VBLK && curproc) {   /* NEEDSWORK: debugging */
693 		curproc->p_spare[2]--;
694 	};
695 	if (bug_refs) {
696 		vprint ("vref: ");
697 	};
698 	if (vp->v_usecount > 0)
699 		return;
700 #ifdef DIAGNOSTIC
701 	if (vp->v_usecount != 0 || vp->v_writecount != 0) {
702 		vprint("vrele: bad ref count", vp);
703 		panic("vrele: ref cnt");
704 	}
705 #endif
706 	if (vfreeh == NULLVP) {
707 		/*
708 		 * insert into empty list
709 		 */
710 		vfreeh = vp;
711 		vp->v_freeb = &vfreeh;
712 	} else {
713 		/*
714 		 * insert at tail of list
715 		 */
716 		*vfreet = vp;
717 		vp->v_freeb = vfreet;
718 	}
719 	vp->v_freef = NULL;
720 	vfreet = &vp->v_freef;
721 	VOP_INACTIVE(vp, p);
722 }
723 
724 /*
725  * Page or buffer structure gets a reference.
726  */
727 vhold(vp)
728 	register struct vnode *vp;
729 {
730 
731 	vp->v_holdcnt++;
732 }
733 
734 /*
735  * Page or buffer structure frees a reference.
736  */
737 holdrele(vp)
738 	register struct vnode *vp;
739 {
740 
741 	if (vp->v_holdcnt <= 0)
742 		panic("holdrele: holdcnt");
743 	vp->v_holdcnt--;
744 }
745 
746 /*
747  * Remove any vnodes in the vnode table belonging to mount point mp.
748  *
749  * If MNT_NOFORCE is specified, there should not be any active ones,
750  * return error if any are found (nb: this is a user error, not a
751  * system error). If MNT_FORCE is specified, detach any active vnodes
752  * that are found.
753  */
754 int busyprt = 0;	/* patch to print out busy vnodes */
755 
756 vflush(mp, skipvp, flags)
757 	struct mount *mp;
758 	struct vnode *skipvp;
759 	int flags;
760 {
761 	register struct vnode *vp, *nvp;
762 	int busy = 0;
763 
764 	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
765 		panic("vflush: not busy");
766 loop:
767 	for (vp = mp->mnt_mounth; vp; vp = nvp) {
768 		if (vp->v_mount != mp)
769 			goto loop;
770 		nvp = vp->v_mountf;
771 		/*
772 		 * Skip over a selected vnode.
773 		 */
774 		if (vp == skipvp)
775 			continue;
776 		/*
777 		 * Skip over a vnodes marked VSYSTEM.
778 		 */
779 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
780 			continue;
781 		/*
782 		 * With v_usecount == 0, all we need to do is clear
783 		 * out the vnode data structures and we are done.
784 		 */
785 		if (vp->v_usecount == 0) {
786 			vgone(vp);
787 			continue;
788 		}
789 		/*
790 		 * For block or character devices, revert to an
791 		 * anonymous device. For all other files, just kill them.
792 		 */
793 		if (flags & FORCECLOSE) {
794 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
795 				vgone(vp);
796 			} else {
797 				vclean(vp, 0);
798 				vp->v_op = &spec_vnodeops;
799 				insmntque(vp, (struct mount *)0);
800 			}
801 			continue;
802 		}
803 		if (busyprt)
804 			vprint("vflush: busy vnode", vp);
805 		busy++;
806 	}
807 	if (busy)
808 		return (EBUSY);
809 	return (0);
810 }
811 
812 /*
813  * Disassociate the underlying file system from a vnode.
814  */
815 void vclean(vp, flags)
816 	register struct vnode *vp;
817 	int flags;
818 {
819 	struct vnodeops *origops;
820 	int active;
821 	struct proc *p = curproc;	/* XXX */
822 
823 	/*
824 	 * Check to see if the vnode is in use.
825 	 * If so we have to reference it before we clean it out
826 	 * so that its count cannot fall to zero and generate a
827 	 * race against ourselves to recycle it.
828 	 */
829 	if (active = vp->v_usecount)
830 		VREF(vp);
831 	/*
832 	 * Prevent the vnode from being recycled or
833 	 * brought into use while we clean it out.
834 	 */
835 	if (vp->v_flag & VXLOCK)
836 		panic("vclean: deadlock");
837 	vp->v_flag |= VXLOCK;
838 	/*
839 	 * Even if the count is zero, the VOP_INACTIVE routine may still
840 	 * have the object locked while it cleans it out. The VOP_LOCK
841 	 * ensures that the VOP_INACTIVE routine is done with its work.
842 	 * For active vnodes, it ensures that no other activity can
843 	 * occur while the buffer list is being cleaned out.
844 	 */
845 	VOP_LOCK(vp);
846 	if (flags & DOCLOSE)
847 		vinvalbuf(vp, 1);
848 	/*
849 	 * Prevent any further operations on the vnode from
850 	 * being passed through to the old file system.
851 	 */
852 	origops = vp->v_op;
853 	vp->v_op = &dead_vnodeops;
854 	vp->v_tag = VT_NON;
855 	/*
856 	 * If purging an active vnode, it must be unlocked, closed,
857 	 * and deactivated before being reclaimed.
858 	 */
859 	(*(origops->vop_unlock))(vp);
860 	if (active) {
861 		if (flags & DOCLOSE)
862 			(*(origops->vop_close))(vp, IO_NDELAY, NOCRED, p);
863 		(*(origops->vop_inactive))(vp, p);
864 	}
865 	/*
866 	 * Reclaim the vnode.
867 	 */
868 	if ((*(origops->vop_reclaim))(vp))
869 		panic("vclean: cannot reclaim");
870 	if (active)
871 		vrele(vp);
872 	/*
873 	 * Done with purge, notify sleepers in vget of the grim news.
874 	 */
875 	vp->v_flag &= ~VXLOCK;
876 	if (vp->v_flag & VXWANT) {
877 		vp->v_flag &= ~VXWANT;
878 		wakeup((caddr_t)vp);
879 	}
880 }
881 
882 /*
883  * Eliminate all activity associated with  the requested vnode
884  * and with all vnodes aliased to the requested vnode.
885  */
886 void vgoneall(vp)
887 	register struct vnode *vp;
888 {
889 	register struct vnode *vq;
890 
891 	if (vp->v_flag & VALIASED) {
892 		/*
893 		 * If a vgone (or vclean) is already in progress,
894 		 * wait until it is done and return.
895 		 */
896 		if (vp->v_flag & VXLOCK) {
897 			vp->v_flag |= VXWANT;
898 			sleep((caddr_t)vp, PINOD);
899 			return;
900 		}
901 		/*
902 		 * Ensure that vp will not be vgone'd while we
903 		 * are eliminating its aliases.
904 		 */
905 		vp->v_flag |= VXLOCK;
906 		while (vp->v_flag & VALIASED) {
907 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
908 				if (vq->v_rdev != vp->v_rdev ||
909 				    vq->v_type != vp->v_type || vp == vq)
910 					continue;
911 				vgone(vq);
912 				break;
913 			}
914 		}
915 		/*
916 		 * Remove the lock so that vgone below will
917 		 * really eliminate the vnode after which time
918 		 * vgone will awaken any sleepers.
919 		 */
920 		vp->v_flag &= ~VXLOCK;
921 	}
922 	vgone(vp);
923 }
924 
925 /*
926  * Eliminate all activity associated with a vnode
927  * in preparation for reuse.
928  */
929 void vgone(vp)
930 	register struct vnode *vp;
931 {
932 	register struct vnode *vq;
933 	struct vnode *vx;
934 	long count;
935 
936 	/*
937 	 * If a vgone (or vclean) is already in progress,
938 	 * wait until it is done and return.
939 	 */
940 	if (vp->v_flag & VXLOCK) {
941 		vp->v_flag |= VXWANT;
942 		sleep((caddr_t)vp, PINOD);
943 		return;
944 	}
945 	/*
946 	 * Clean out the filesystem specific data.
947 	 */
948 	vclean(vp, DOCLOSE);
949 	/*
950 	 * Delete from old mount point vnode list, if on one.
951 	 */
952 	if (vp->v_mountb) {
953 		if (vq = vp->v_mountf)
954 			vq->v_mountb = vp->v_mountb;
955 		*vp->v_mountb = vq;
956 		vp->v_mountf = NULL;
957 		vp->v_mountb = NULL;
958 	}
959 	/*
960 	 * If special device, remove it from special device alias list.
961 	 */
962 	if (vp->v_type == VBLK || vp->v_type == VCHR) {
963 		if (*vp->v_hashchain == vp) {
964 			*vp->v_hashchain = vp->v_specnext;
965 		} else {
966 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
967 				if (vq->v_specnext != vp)
968 					continue;
969 				vq->v_specnext = vp->v_specnext;
970 				break;
971 			}
972 			if (vq == NULL)
973 				panic("missing bdev");
974 		}
975 		if (vp->v_flag & VALIASED) {
976 			count = 0;
977 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
978 				if (vq->v_rdev != vp->v_rdev ||
979 				    vq->v_type != vp->v_type)
980 					continue;
981 				count++;
982 				vx = vq;
983 			}
984 			if (count == 0)
985 				panic("missing alias");
986 			if (count == 1)
987 				vx->v_flag &= ~VALIASED;
988 			vp->v_flag &= ~VALIASED;
989 		}
990 		FREE(vp->v_specinfo, M_VNODE);
991 		vp->v_specinfo = NULL;
992 	}
993 	/*
994 	 * If it is on the freelist, move it to the head of the list.
995 	 */
996 	if (vp->v_freeb) {
997 		if (vq = vp->v_freef)
998 			vq->v_freeb = vp->v_freeb;
999 		else
1000 			vfreet = vp->v_freeb;
1001 		*vp->v_freeb = vq;
1002 		vp->v_freef = vfreeh;
1003 		vp->v_freeb = &vfreeh;
1004 		vfreeh->v_freeb = &vp->v_freef;
1005 		vfreeh = vp;
1006 	}
1007 	vp->v_type = VBAD;
1008 }
1009 
1010 /*
1011  * Lookup a vnode by device number.
1012  */
1013 vfinddev(dev, type, vpp)
1014 	dev_t dev;
1015 	enum vtype type;
1016 	struct vnode **vpp;
1017 {
1018 	register struct vnode *vp;
1019 
1020 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1021 		if (dev != vp->v_rdev || type != vp->v_type)
1022 			continue;
1023 		*vpp = vp;
1024 		return (0);
1025 	}
1026 	return (1);
1027 }
1028 
1029 /*
1030  * Calculate the total number of references to a special device.
1031  */
1032 vcount(vp)
1033 	register struct vnode *vp;
1034 {
1035 	register struct vnode *vq;
1036 	int count;
1037 
1038 	if ((vp->v_flag & VALIASED) == 0)
1039 		return (vp->v_usecount);
1040 loop:
1041 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1042 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1043 			continue;
1044 		/*
1045 		 * Alias, but not in use, so flush it out.
1046 		 */
1047 		if (vq->v_usecount == 0) {
1048 			vgone(vq);
1049 			goto loop;
1050 		}
1051 		count += vq->v_usecount;
1052 	}
1053 	return (count);
1054 }
1055 
1056 /*
1057  * Print out a description of a vnode.
1058  */
1059 static char *typename[] =
1060    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1061 
1062 vprint(label, vp)
1063 	char *label;
1064 	register struct vnode *vp;
1065 {
1066 	char buf[64];
1067 
1068 	if (label != NULL)
1069 		printf("%s: ", label);
1070 	printf("type %s, usecount %d, writecount %d, refcount %d,",
1071 		typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1072 		vp->v_holdcnt);
1073 	buf[0] = '\0';
1074 	if (vp->v_flag & VROOT)
1075 		strcat(buf, "|VROOT");
1076 	if (vp->v_flag & VTEXT)
1077 		strcat(buf, "|VTEXT");
1078 	if (vp->v_flag & VSYSTEM)
1079 		strcat(buf, "|VSYSTEM");
1080 	if (vp->v_flag & VXLOCK)
1081 		strcat(buf, "|VXLOCK");
1082 	if (vp->v_flag & VXWANT)
1083 		strcat(buf, "|VXWANT");
1084 	if (vp->v_flag & VBWAIT)
1085 		strcat(buf, "|VBWAIT");
1086 	if (vp->v_flag & VALIASED)
1087 		strcat(buf, "|VALIASED");
1088 	if (buf[0] != '\0')
1089 		printf(" flags (%s)", &buf[1]);
1090 	printf("\n\t");
1091 	VOP_PRINT(vp);
1092 }
1093 
1094 #ifdef DEBUG
1095 /*
1096  * List all of the locked vnodes in the system.
1097  * Called when debugging the kernel.
1098  */
1099 printlockedvnodes()
1100 {
1101 	register struct mount *mp;
1102 	register struct vnode *vp;
1103 
1104 	printf("Locked vnodes\n");
1105 	mp = rootfs;
1106 	do {
1107 		for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf)
1108 			if (VOP_ISLOCKED(vp))
1109 				vprint((char *)0, vp);
1110 		mp = mp->mnt_next;
1111 	} while (mp != rootfs);
1112 }
1113 #endif
1114 
1115 int kinfo_vdebug = 1;
1116 int kinfo_vgetfailed;
1117 #define KINFO_VNODESLOP	10
1118 /*
1119  * Dump vnode list (via kinfo).
1120  * Copyout address of vnode followed by vnode.
1121  */
1122 /* ARGSUSED */
1123 kinfo_vnode(op, where, acopysize, arg, aneeded)
1124 	int op;
1125 	char *where;
1126 	int *acopysize, arg, *aneeded;
1127 {
1128 	register struct mount *mp = rootfs;
1129 	struct mount *omp;
1130 	struct vnode *vp;
1131 	register char *bp = where, *savebp;
1132 	char *ewhere = where + *acopysize;
1133 	int error;
1134 
1135 #define VPTRSZ	sizeof (struct vnode *)
1136 #define VNODESZ	sizeof (struct vnode)
1137 	if (where == NULL) {
1138 		*aneeded = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1139 		return (0);
1140 	}
1141 
1142 	do {
1143 		if (vfs_busy(mp)) {
1144 			mp = mp->mnt_next;
1145 			continue;
1146 		}
1147 		savebp = bp;
1148 again:
1149 		for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) {
1150 			/*
1151 			 * Check that the vp is still associated with
1152 			 * this filesystem.  RACE: could have been
1153 			 * recycled onto the same filesystem.
1154 			 */
1155 			if (vp->v_mount != mp) {
1156 				if (kinfo_vdebug)
1157 					printf("kinfo: vp changed\n");
1158 				bp = savebp;
1159 				goto again;
1160 			}
1161 			if ((bp + VPTRSZ + VNODESZ <= ewhere) &&
1162 			    ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1163 			     (error = copyout((caddr_t)vp, bp + VPTRSZ,
1164 			      VNODESZ))))
1165 				return (error);
1166 			bp += VPTRSZ + VNODESZ;
1167 		}
1168 		omp = mp;
1169 		mp = mp->mnt_next;
1170 		vfs_unbusy(omp);
1171 	} while (mp != rootfs);
1172 
1173 	*aneeded = bp - where;
1174 	if (bp > ewhere)
1175 		*acopysize = ewhere - where;
1176 	else
1177 		*acopysize = bp - where;
1178 	return (0);
1179 }
1180