xref: /netbsd-src/sys/kern/vfs_mount.c (revision 181254a7b1bdde6873432bffef2d2decc4b5c22f)
1 /*	$NetBSD: vfs_mount.c,v 1.83 2020/05/23 23:42:43 ad Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997-2020 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1989, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  * (c) UNIX System Laboratories, Inc.
37  * All or some portions of this file are derived from material licensed
38  * to the University of California by American Telephone and Telegraph
39  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40  * the permission of UNIX System Laboratories, Inc.
41  *
42  * Redistribution and use in source and binary forms, with or without
43  * modification, are permitted provided that the following conditions
44  * are met:
45  * 1. Redistributions of source code must retain the above copyright
46  *    notice, this list of conditions and the following disclaimer.
47  * 2. Redistributions in binary form must reproduce the above copyright
48  *    notice, this list of conditions and the following disclaimer in the
49  *    documentation and/or other materials provided with the distribution.
50  * 3. Neither the name of the University nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  *
66  *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
67  */
68 
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.83 2020/05/23 23:42:43 ad Exp $");
71 
72 #include <sys/param.h>
73 #include <sys/kernel.h>
74 
75 #include <sys/atomic.h>
76 #include <sys/buf.h>
77 #include <sys/conf.h>
78 #include <sys/fcntl.h>
79 #include <sys/filedesc.h>
80 #include <sys/device.h>
81 #include <sys/kauth.h>
82 #include <sys/kmem.h>
83 #include <sys/module.h>
84 #include <sys/mount.h>
85 #include <sys/fstrans.h>
86 #include <sys/namei.h>
87 #include <sys/extattr.h>
88 #include <sys/syscallargs.h>
89 #include <sys/sysctl.h>
90 #include <sys/systm.h>
91 #include <sys/vfs_syscalls.h>
92 #include <sys/vnode_impl.h>
93 
94 #include <miscfs/genfs/genfs.h>
95 #include <miscfs/specfs/specdev.h>
96 
97 enum mountlist_type {
98 	ME_MOUNT,
99 	ME_MARKER
100 };
101 struct mountlist_entry {
102 	TAILQ_ENTRY(mountlist_entry) me_list;	/* Mount list. */
103 	struct mount *me_mount;			/* Actual mount if ME_MOUNT,
104 						   current mount else. */
105 	enum mountlist_type me_type;		/* Mount or marker. */
106 };
107 struct mount_iterator {
108 	struct mountlist_entry mi_entry;
109 };
110 
111 static struct vnode *vfs_vnode_iterator_next1(struct vnode_iterator *,
112     bool (*)(void *, struct vnode *), void *, bool);
113 
114 /* Root filesystem. */
115 vnode_t *			rootvnode;
116 
117 /* Mounted filesystem list. */
118 static TAILQ_HEAD(mountlist, mountlist_entry) mountlist;
119 static kmutex_t			mountlist_lock __cacheline_aligned;
120 int vnode_offset_next_by_lru	/* XXX: ugly hack for pstat.c */
121     = offsetof(vnode_impl_t, vi_lrulist.tqe_next);
122 
123 kmutex_t			vfs_list_lock __cacheline_aligned;
124 
125 static specificdata_domain_t	mount_specificdata_domain;
126 static kmutex_t			mntid_lock;
127 
128 static kmutex_t			mountgen_lock __cacheline_aligned;
129 static uint64_t			mountgen;
130 
131 void
132 vfs_mount_sysinit(void)
133 {
134 
135 	TAILQ_INIT(&mountlist);
136 	mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE);
137 	mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE);
138 
139 	mount_specificdata_domain = specificdata_domain_create();
140 	mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE);
141 	mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE);
142 	mountgen = 0;
143 }
144 
145 struct mount *
146 vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp)
147 {
148 	struct mount *mp;
149 	int error __diagused;
150 
151 	mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
152 	mp->mnt_op = vfsops;
153 	mp->mnt_refcnt = 1;
154 	TAILQ_INIT(&mp->mnt_vnodelist);
155 	mp->mnt_renamelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
156 	mp->mnt_vnodelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
157 	mp->mnt_updating = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
158 	mp->mnt_vnodecovered = vp;
159 	mount_initspecific(mp);
160 
161 	error = fstrans_mount(mp);
162 	KASSERT(error == 0);
163 
164 	mutex_enter(&mountgen_lock);
165 	mp->mnt_gen = mountgen++;
166 	mutex_exit(&mountgen_lock);
167 
168 	return mp;
169 }
170 
171 /*
172  * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and
173  * initialize a mount structure for it.
174  *
175  * Devname is usually updated by mount(8) after booting.
176  */
177 int
178 vfs_rootmountalloc(const char *fstypename, const char *devname,
179     struct mount **mpp)
180 {
181 	struct vfsops *vfsp = NULL;
182 	struct mount *mp;
183 	int error __diagused;
184 
185 	mutex_enter(&vfs_list_lock);
186 	LIST_FOREACH(vfsp, &vfs_list, vfs_list)
187 		if (!strncmp(vfsp->vfs_name, fstypename,
188 		    sizeof(mp->mnt_stat.f_fstypename)))
189 			break;
190 	if (vfsp == NULL) {
191 		mutex_exit(&vfs_list_lock);
192 		return (ENODEV);
193 	}
194 	vfsp->vfs_refcount++;
195 	mutex_exit(&vfs_list_lock);
196 
197 	if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL)
198 		return ENOMEM;
199 	error = vfs_busy(mp);
200 	KASSERT(error == 0);
201 	mp->mnt_flag = MNT_RDONLY;
202 	(void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
203 	    sizeof(mp->mnt_stat.f_fstypename));
204 	mp->mnt_stat.f_mntonname[0] = '/';
205 	mp->mnt_stat.f_mntonname[1] = '\0';
206 	mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
207 	    '\0';
208 	(void)copystr(devname, mp->mnt_stat.f_mntfromname,
209 	    sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
210 	*mpp = mp;
211 	return 0;
212 }
213 
214 /*
215  * vfs_getnewfsid: get a new unique fsid.
216  */
217 void
218 vfs_getnewfsid(struct mount *mp)
219 {
220 	static u_short xxxfs_mntid;
221 	fsid_t tfsid;
222 	int mtype;
223 
224 	mutex_enter(&mntid_lock);
225 	mtype = makefstype(mp->mnt_op->vfs_name);
226 	mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
227 	mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
228 	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
229 	if (xxxfs_mntid == 0)
230 		++xxxfs_mntid;
231 	tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
232 	tfsid.__fsid_val[1] = mtype;
233 	while (vfs_getvfs(&tfsid)) {
234 		tfsid.__fsid_val[0]++;
235 		xxxfs_mntid++;
236 	}
237 	mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
238 	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
239 	mutex_exit(&mntid_lock);
240 }
241 
242 /*
243  * Lookup a mount point by filesystem identifier.
244  *
245  * XXX Needs to add a reference to the mount point.
246  */
247 struct mount *
248 vfs_getvfs(fsid_t *fsid)
249 {
250 	mount_iterator_t *iter;
251 	struct mount *mp;
252 
253 	mountlist_iterator_init(&iter);
254 	while ((mp = mountlist_iterator_next(iter)) != NULL) {
255 		if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
256 		    mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
257 			mountlist_iterator_destroy(iter);
258 			return mp;
259 		}
260 	}
261 	mountlist_iterator_destroy(iter);
262 	return NULL;
263 }
264 
265 /*
266  * Take a reference to a mount structure.
267  */
268 void
269 vfs_ref(struct mount *mp)
270 {
271 
272 	KASSERT(mp->mnt_refcnt > 0 || mutex_owned(&mountlist_lock));
273 
274 	atomic_inc_uint(&mp->mnt_refcnt);
275 }
276 
277 /*
278  * Drop a reference to a mount structure, freeing if the last reference.
279  */
280 void
281 vfs_rele(struct mount *mp)
282 {
283 
284 	if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) {
285 		return;
286 	}
287 
288 	/*
289 	 * Nothing else has visibility of the mount: we can now
290 	 * free the data structures.
291 	 */
292 	KASSERT(mp->mnt_refcnt == 0);
293 	specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
294 	mutex_obj_free(mp->mnt_updating);
295 	mutex_obj_free(mp->mnt_renamelock);
296 	mutex_obj_free(mp->mnt_vnodelock);
297 	if (mp->mnt_op != NULL) {
298 		vfs_delref(mp->mnt_op);
299 	}
300 	fstrans_unmount(mp);
301 	/*
302 	 * Final free of mp gets done from fstrans_mount_dtor().
303 	 *
304 	 * Prevents this memory to be reused as a mount before
305 	 * fstrans releases all references to it.
306 	 */
307 }
308 
309 /*
310  * Mark a mount point as busy, and gain a new reference to it.  Used to
311  * prevent the file system from being unmounted during critical sections.
312  *
313  * vfs_busy can be called multiple times and by multiple threads
314  * and must be accompanied by the same number of vfs_unbusy calls.
315  *
316  * => The caller must hold a pre-existing reference to the mount.
317  * => Will fail if the file system is being unmounted, or is unmounted.
318  */
319 static inline int
320 _vfs_busy(struct mount *mp, bool wait)
321 {
322 
323 	KASSERT(mp->mnt_refcnt > 0);
324 
325 	if (wait) {
326 		fstrans_start(mp);
327 	} else {
328 		if (fstrans_start_nowait(mp))
329 			return EBUSY;
330 	}
331 	if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) {
332 		fstrans_done(mp);
333 		return ENOENT;
334 	}
335 	vfs_ref(mp);
336 	return 0;
337 }
338 
339 int
340 vfs_busy(struct mount *mp)
341 {
342 
343 	return _vfs_busy(mp, true);
344 }
345 
346 int
347 vfs_trybusy(struct mount *mp)
348 {
349 
350 	return _vfs_busy(mp, false);
351 }
352 
353 /*
354  * Unbusy a busy filesystem.
355  *
356  * Every successful vfs_busy() call must be undone by a vfs_unbusy() call.
357  */
358 void
359 vfs_unbusy(struct mount *mp)
360 {
361 
362 	KASSERT(mp->mnt_refcnt > 0);
363 
364 	fstrans_done(mp);
365 	vfs_rele(mp);
366 }
367 
368 struct vnode_iterator {
369 	vnode_impl_t vi_vnode;
370 };
371 
372 void
373 vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vnip)
374 {
375 	vnode_t *vp;
376 	vnode_impl_t *vip;
377 
378 	vp = vnalloc_marker(mp);
379 	vip = VNODE_TO_VIMPL(vp);
380 
381 	mutex_enter(mp->mnt_vnodelock);
382 	TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vip, vi_mntvnodes);
383 	vp->v_usecount = 1;
384 	mutex_exit(mp->mnt_vnodelock);
385 
386 	*vnip = (struct vnode_iterator *)vip;
387 }
388 
389 void
390 vfs_vnode_iterator_destroy(struct vnode_iterator *vni)
391 {
392 	vnode_impl_t *mvip = &vni->vi_vnode;
393 	vnode_t *mvp = VIMPL_TO_VNODE(mvip);
394 	kmutex_t *lock;
395 
396 	KASSERT(vnis_marker(mvp));
397 	if (vrefcnt(mvp) != 0) {
398 		lock = mvp->v_mount->mnt_vnodelock;
399 		mutex_enter(lock);
400 		TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvip, vi_mntvnodes);
401 		mvp->v_usecount = 0;
402 		mutex_exit(lock);
403 	}
404 	vnfree_marker(mvp);
405 }
406 
407 static struct vnode *
408 vfs_vnode_iterator_next1(struct vnode_iterator *vni,
409     bool (*f)(void *, struct vnode *), void *cl, bool do_wait)
410 {
411 	vnode_impl_t *mvip = &vni->vi_vnode;
412 	struct mount *mp = VIMPL_TO_VNODE(mvip)->v_mount;
413 	vnode_t *vp;
414 	vnode_impl_t *vip;
415 	kmutex_t *lock;
416 	int error;
417 
418 	KASSERT(vnis_marker(VIMPL_TO_VNODE(mvip)));
419 
420 	lock = mp->mnt_vnodelock;
421 	do {
422 		mutex_enter(lock);
423 		vip = TAILQ_NEXT(mvip, vi_mntvnodes);
424 		TAILQ_REMOVE(&mp->mnt_vnodelist, mvip, vi_mntvnodes);
425 		VIMPL_TO_VNODE(mvip)->v_usecount = 0;
426 again:
427 		if (vip == NULL) {
428 			mutex_exit(lock);
429 	       		return NULL;
430 		}
431 		vp = VIMPL_TO_VNODE(vip);
432 		KASSERT(vp != NULL);
433 		mutex_enter(vp->v_interlock);
434 		if (vnis_marker(vp) ||
435 		    vdead_check(vp, (do_wait ? 0 : VDEAD_NOWAIT)) ||
436 		    (f && !(*f)(cl, vp))) {
437 			mutex_exit(vp->v_interlock);
438 			vip = TAILQ_NEXT(vip, vi_mntvnodes);
439 			goto again;
440 		}
441 
442 		TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vip, mvip, vi_mntvnodes);
443 		VIMPL_TO_VNODE(mvip)->v_usecount = 1;
444 		mutex_exit(lock);
445 		error = vcache_vget(vp);
446 		KASSERT(error == 0 || error == ENOENT);
447 	} while (error != 0);
448 
449 	return vp;
450 }
451 
452 struct vnode *
453 vfs_vnode_iterator_next(struct vnode_iterator *vni,
454     bool (*f)(void *, struct vnode *), void *cl)
455 {
456 
457 	return vfs_vnode_iterator_next1(vni, f, cl, false);
458 }
459 
460 /*
461  * Move a vnode from one mount queue to another.
462  */
463 void
464 vfs_insmntque(vnode_t *vp, struct mount *mp)
465 {
466 	vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
467 	struct mount *omp;
468 	kmutex_t *lock;
469 
470 	KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 ||
471 	    vp->v_tag == VT_VFS);
472 
473 	/*
474 	 * Delete from old mount point vnode list, if on one.
475 	 */
476 	if ((omp = vp->v_mount) != NULL) {
477 		lock = omp->mnt_vnodelock;
478 		mutex_enter(lock);
479 		TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vip, vi_mntvnodes);
480 		mutex_exit(lock);
481 	}
482 
483 	/*
484 	 * Insert into list of vnodes for the new mount point, if
485 	 * available.  The caller must take a reference on the mount
486 	 * structure and donate to the vnode.
487 	 */
488 	if ((vp->v_mount = mp) != NULL) {
489 		lock = mp->mnt_vnodelock;
490 		mutex_enter(lock);
491 		TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vip, vi_mntvnodes);
492 		mutex_exit(lock);
493 	}
494 
495 	if (omp != NULL) {
496 		/* Release reference to old mount. */
497 		vfs_rele(omp);
498 	}
499 }
500 
501 /*
502  * Remove any vnodes in the vnode table belonging to mount point mp.
503  *
504  * If FORCECLOSE is not specified, there should not be any active ones,
505  * return error if any are found (nb: this is a user error, not a
506  * system error). If FORCECLOSE is specified, detach any active vnodes
507  * that are found.
508  *
509  * If WRITECLOSE is set, only flush out regular file vnodes open for
510  * writing.
511  *
512  * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped.
513  */
514 #ifdef DEBUG
515 int busyprt = 0;	/* print out busy vnodes */
516 struct ctldebug debug1 = { "busyprt", &busyprt };
517 #endif
518 
519 static vnode_t *
520 vflushnext(struct vnode_iterator *marker, int *when)
521 {
522 	if (getticks() > *when) {
523 		yield();
524 		*when = getticks() + hz / 10;
525 	}
526 	return vfs_vnode_iterator_next1(marker, NULL, NULL, true);
527 }
528 
529 /*
530  * Flush one vnode.  Referenced on entry, unreferenced on return.
531  */
532 static int
533 vflush_one(vnode_t *vp, vnode_t *skipvp, int flags)
534 {
535 	int error;
536 	struct vattr vattr;
537 
538 	if (vp == skipvp ||
539 	    ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))) {
540 		vrele(vp);
541 		return 0;
542 	}
543 	/*
544 	 * If WRITECLOSE is set, only flush out regular file
545 	 * vnodes open for writing or open and unlinked.
546 	 */
547 	if ((flags & WRITECLOSE)) {
548 		if (vp->v_type != VREG) {
549 			vrele(vp);
550 			return 0;
551 		}
552 		error = vn_lock(vp, LK_EXCLUSIVE);
553 		if (error) {
554 			KASSERT(error == ENOENT);
555 			vrele(vp);
556 			return 0;
557 		}
558 		error = VOP_FSYNC(vp, curlwp->l_cred, FSYNC_WAIT, 0, 0);
559 		if (error == 0)
560 			error = VOP_GETATTR(vp, &vattr, curlwp->l_cred);
561 		VOP_UNLOCK(vp);
562 		if (error) {
563 			vrele(vp);
564 			return error;
565 		}
566 		if (vp->v_writecount == 0 && vattr.va_nlink > 0) {
567 			vrele(vp);
568 			return 0;
569 		}
570 	}
571 	/*
572 	 * First try to recycle the vnode.
573 	 */
574 	if (vrecycle(vp))
575 		return 0;
576 	/*
577 	 * If FORCECLOSE is set, forcibly close the vnode.
578 	 * For block or character devices, revert to an
579 	 * anonymous device.  For all other files, just
580 	 * kill them.
581 	 */
582 	if (flags & FORCECLOSE) {
583 		if (vrefcnt(vp) > 1 &&
584 		    (vp->v_type == VBLK || vp->v_type == VCHR))
585 			vcache_make_anon(vp);
586 		else
587 			vgone(vp);
588 		return 0;
589 	}
590 	vrele(vp);
591 	return EBUSY;
592 }
593 
594 int
595 vflush(struct mount *mp, vnode_t *skipvp, int flags)
596 {
597 	vnode_t *vp;
598 	struct vnode_iterator *marker;
599 	int busy, error, when, retries = 2;
600 
601 	do {
602 		busy = error = when = 0;
603 
604 		/*
605 		 * First, flush out any vnode references from the
606 		 * deferred vrele list.
607 		 */
608 		vrele_flush(mp);
609 
610 		vfs_vnode_iterator_init(mp, &marker);
611 
612 		while ((vp = vflushnext(marker, &when)) != NULL) {
613 			error = vflush_one(vp, skipvp, flags);
614 			if (error == EBUSY) {
615 				error = 0;
616 				busy++;
617 #ifdef DEBUG
618 				if (busyprt && retries == 0)
619 					vprint("vflush: busy vnode", vp);
620 #endif
621 			} else if (error != 0) {
622 				break;
623 			}
624 		}
625 
626 		vfs_vnode_iterator_destroy(marker);
627 	} while (error == 0 && busy > 0 && retries-- > 0);
628 
629 	if (error)
630 		return error;
631 	if (busy)
632 		return EBUSY;
633 	return 0;
634 }
635 
636 /*
637  * Mount a file system.
638  */
639 
640 /*
641  * Scan all active processes to see if any of them have a current or root
642  * directory onto which the new filesystem has just been  mounted. If so,
643  * replace them with the new mount point.
644  */
645 static void
646 mount_checkdirs(vnode_t *olddp)
647 {
648 	vnode_t *newdp, *rele1, *rele2;
649 	struct cwdinfo *cwdi;
650 	struct proc *p;
651 	bool retry;
652 
653 	if (vrefcnt(olddp) == 1) {
654 		return;
655 	}
656 	if (VFS_ROOT(olddp->v_mountedhere, LK_EXCLUSIVE, &newdp))
657 		panic("mount: lost mount");
658 
659 	do {
660 		retry = false;
661 		mutex_enter(&proc_lock);
662 		PROCLIST_FOREACH(p, &allproc) {
663 			if ((cwdi = p->p_cwdi) == NULL)
664 				continue;
665 			/*
666 			 * Cannot change to the old directory any more,
667 			 * so even if we see a stale value it is not a
668 			 * problem.
669 			 */
670 			if (cwdi->cwdi_cdir != olddp &&
671 			    cwdi->cwdi_rdir != olddp)
672 				continue;
673 			retry = true;
674 			rele1 = NULL;
675 			rele2 = NULL;
676 			atomic_inc_uint(&cwdi->cwdi_refcnt);
677 			mutex_exit(&proc_lock);
678 			rw_enter(&cwdi->cwdi_lock, RW_WRITER);
679 			if (cwdi->cwdi_cdir == olddp) {
680 				rele1 = cwdi->cwdi_cdir;
681 				vref(newdp);
682 				cwdi->cwdi_cdir = newdp;
683 			}
684 			if (cwdi->cwdi_rdir == olddp) {
685 				rele2 = cwdi->cwdi_rdir;
686 				vref(newdp);
687 				cwdi->cwdi_rdir = newdp;
688 			}
689 			rw_exit(&cwdi->cwdi_lock);
690 			cwdfree(cwdi);
691 			if (rele1 != NULL)
692 				vrele(rele1);
693 			if (rele2 != NULL)
694 				vrele(rele2);
695 			mutex_enter(&proc_lock);
696 			break;
697 		}
698 		mutex_exit(&proc_lock);
699 	} while (retry);
700 
701 	if (rootvnode == olddp) {
702 		vrele(rootvnode);
703 		vref(newdp);
704 		rootvnode = newdp;
705 	}
706 	vput(newdp);
707 }
708 
709 /*
710  * Start extended attributes
711  */
712 static int
713 start_extattr(struct mount *mp)
714 {
715 	int error;
716 
717 	error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, 0, NULL);
718 	if (error)
719 		printf("%s: failed to start extattr: error = %d\n",
720 		       mp->mnt_stat.f_mntonname, error);
721 
722 	return error;
723 }
724 
725 int
726 mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops,
727     const char *path, int flags, void *data, size_t *data_len)
728 {
729 	vnode_t *vp = *vpp;
730 	struct mount *mp;
731 	struct pathbuf *pb;
732 	struct nameidata nd;
733 	int error;
734 
735 	error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
736 	    KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
737 	if (error) {
738 		vfs_delref(vfsops);
739 		return error;
740 	}
741 
742 	/* Cannot make a non-dir a mount-point (from here anyway). */
743 	if (vp->v_type != VDIR) {
744 		vfs_delref(vfsops);
745 		return ENOTDIR;
746 	}
747 
748 	if (flags & MNT_EXPORTED) {
749 		vfs_delref(vfsops);
750 		return EINVAL;
751 	}
752 
753 	if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) {
754 		vfs_delref(vfsops);
755 		return ENOMEM;
756 	}
757 
758 	mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
759 
760 	/*
761 	 * The underlying file system may refuse the mount for
762 	 * various reasons.  Allow the user to force it to happen.
763 	 *
764 	 * Set the mount level flags.
765 	 */
766 	mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE);
767 
768 	mutex_enter(mp->mnt_updating);
769 	error = VFS_MOUNT(mp, path, data, data_len);
770 	mp->mnt_flag &= ~MNT_OP_FLAGS;
771 
772 	if (error != 0)
773 		goto err_unmounted;
774 
775 	/*
776 	 * Validate and prepare the mount point.
777 	 */
778 	error = pathbuf_copyin(path, &pb);
779 	if (error != 0) {
780 		goto err_mounted;
781 	}
782 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
783 	error = namei(&nd);
784 	pathbuf_destroy(pb);
785 	if (error != 0) {
786 		goto err_mounted;
787 	}
788 	if (nd.ni_vp != vp) {
789 		vput(nd.ni_vp);
790 		error = EINVAL;
791 		goto err_mounted;
792 	}
793 	if (vp->v_mountedhere != NULL) {
794 		vput(nd.ni_vp);
795 		error = EBUSY;
796 		goto err_mounted;
797 	}
798 	error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0);
799 	if (error != 0) {
800 		vput(nd.ni_vp);
801 		goto err_mounted;
802 	}
803 
804 	/*
805 	 * Put the new filesystem on the mount list after root.
806 	 */
807 	cache_purge(vp);
808 	mp->mnt_iflag &= ~IMNT_WANTRDWR;
809 
810 	mountlist_append(mp);
811 	if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
812 		vfs_syncer_add_to_worklist(mp);
813 	vp->v_mountedhere = mp;
814 	vput(nd.ni_vp);
815 
816 	mount_checkdirs(vp);
817 	mutex_exit(mp->mnt_updating);
818 
819 	/* Hold an additional reference to the mount across VFS_START(). */
820 	vfs_ref(mp);
821 	(void) VFS_STATVFS(mp, &mp->mnt_stat);
822 	error = VFS_START(mp, 0);
823 	if (error) {
824 		vrele(vp);
825 	} else if (flags & MNT_EXTATTR) {
826 		if (start_extattr(mp) != 0)
827 			mp->mnt_flag &= ~MNT_EXTATTR;
828 	}
829 	/* Drop reference held for VFS_START(). */
830 	vfs_rele(mp);
831 	*vpp = NULL;
832 	return error;
833 
834 err_mounted:
835 	if (VFS_UNMOUNT(mp, MNT_FORCE) != 0)
836 		panic("Unmounting fresh file system failed");
837 
838 err_unmounted:
839 	vp->v_mountedhere = NULL;
840 	mutex_exit(mp->mnt_updating);
841 	vfs_rele(mp);
842 
843 	return error;
844 }
845 
846 /*
847  * Do the actual file system unmount.  File system is assumed to have
848  * been locked by the caller.
849  *
850  * => Caller hold reference to the mount, explicitly for dounmount().
851  */
852 int
853 dounmount(struct mount *mp, int flags, struct lwp *l)
854 {
855 	vnode_t *coveredvp;
856 	int error, async, used_syncer, used_extattr;
857 	const bool was_suspended = fstrans_is_owner(mp);
858 
859 #if NVERIEXEC > 0
860 	error = veriexec_unmountchk(mp);
861 	if (error)
862 		return (error);
863 #endif /* NVERIEXEC > 0 */
864 
865 	if (!was_suspended) {
866 		error = vfs_suspend(mp, 0);
867 		if (error) {
868 			return error;
869 		}
870 	}
871 
872 	KASSERT((mp->mnt_iflag & IMNT_GONE) == 0);
873 
874 	used_syncer = (mp->mnt_iflag & IMNT_ONWORKLIST) != 0;
875 	used_extattr = mp->mnt_flag & MNT_EXTATTR;
876 
877 	mp->mnt_iflag |= IMNT_UNMOUNT;
878 	mutex_enter(mp->mnt_updating);
879 	async = mp->mnt_flag & MNT_ASYNC;
880 	mp->mnt_flag &= ~MNT_ASYNC;
881 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
882 	if (used_syncer)
883 		vfs_syncer_remove_from_worklist(mp);
884 	error = 0;
885 	if (((mp->mnt_flag & MNT_RDONLY) == 0) && ((flags & MNT_FORCE) == 0)) {
886 		error = VFS_SYNC(mp, MNT_WAIT, l->l_cred);
887 	}
888 	if (error == 0 || (flags & MNT_FORCE)) {
889 		error = VFS_UNMOUNT(mp, flags);
890 	}
891 	if (error) {
892 		mp->mnt_iflag &= ~IMNT_UNMOUNT;
893 		if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
894 			vfs_syncer_add_to_worklist(mp);
895 		mp->mnt_flag |= async;
896 		mutex_exit(mp->mnt_updating);
897 		if (!was_suspended)
898 			vfs_resume(mp);
899 		if (used_extattr) {
900 			if (start_extattr(mp) != 0)
901 				mp->mnt_flag &= ~MNT_EXTATTR;
902 			else
903 				mp->mnt_flag |= MNT_EXTATTR;
904 		}
905 		return (error);
906 	}
907 	mutex_exit(mp->mnt_updating);
908 
909 	/*
910 	 * mark filesystem as gone to prevent further umounts
911 	 * after mnt_umounting lock is gone, this also prevents
912 	 * vfs_busy() from succeeding.
913 	 */
914 	mp->mnt_iflag |= IMNT_GONE;
915 	if (!was_suspended)
916 		vfs_resume(mp);
917 
918 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
919 		vn_lock(coveredvp, LK_EXCLUSIVE | LK_RETRY);
920 		coveredvp->v_mountedhere = NULL;
921 		VOP_UNLOCK(coveredvp);
922 	}
923 	mountlist_remove(mp);
924 	if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
925 		panic("unmount: dangling vnode");
926 	vfs_hooks_unmount(mp);
927 
928 	vfs_rele(mp);	/* reference from mount() */
929 	if (coveredvp != NULLVP) {
930 		vrele(coveredvp);
931 	}
932 	return (0);
933 }
934 
935 /*
936  * Unmount all file systems.
937  * We traverse the list in reverse order under the assumption that doing so
938  * will avoid needing to worry about dependencies.
939  */
940 bool
941 vfs_unmountall(struct lwp *l)
942 {
943 
944 	printf("unmounting file systems...\n");
945 	return vfs_unmountall1(l, true, true);
946 }
947 
948 static void
949 vfs_unmount_print(struct mount *mp, const char *pfx)
950 {
951 
952 	aprint_verbose("%sunmounted %s on %s type %s\n", pfx,
953 	    mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname,
954 	    mp->mnt_stat.f_fstypename);
955 }
956 
957 /*
958  * Return the mount with the highest generation less than "gen".
959  */
960 static struct mount *
961 vfs_unmount_next(uint64_t gen)
962 {
963 	mount_iterator_t *iter;
964 	struct mount *mp, *nmp;
965 
966 	nmp = NULL;
967 
968 	mountlist_iterator_init(&iter);
969 	while ((mp = mountlist_iterator_next(iter)) != NULL) {
970 		if ((nmp == NULL || mp->mnt_gen > nmp->mnt_gen) &&
971 		    mp->mnt_gen < gen) {
972 			if (nmp != NULL)
973 				vfs_rele(nmp);
974 			nmp = mp;
975 			vfs_ref(nmp);
976 		}
977 	}
978 	mountlist_iterator_destroy(iter);
979 
980 	return nmp;
981 }
982 
983 bool
984 vfs_unmount_forceone(struct lwp *l)
985 {
986 	struct mount *mp;
987 	int error;
988 
989 	mp = vfs_unmount_next(mountgen);
990 	if (mp == NULL) {
991 		return false;
992 	}
993 
994 #ifdef DEBUG
995 	printf("forcefully unmounting %s (%s)...\n",
996 	    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
997 #endif
998 	if ((error = dounmount(mp, MNT_FORCE, l)) == 0) {
999 		vfs_unmount_print(mp, "forcefully ");
1000 		return true;
1001 	} else {
1002 		vfs_rele(mp);
1003 	}
1004 
1005 #ifdef DEBUG
1006 	printf("forceful unmount of %s failed with error %d\n",
1007 	    mp->mnt_stat.f_mntonname, error);
1008 #endif
1009 
1010 	return false;
1011 }
1012 
1013 bool
1014 vfs_unmountall1(struct lwp *l, bool force, bool verbose)
1015 {
1016 	struct mount *mp;
1017 	bool any_error = false, progress = false;
1018 	uint64_t gen;
1019 	int error;
1020 
1021 	gen = mountgen;
1022 	for (;;) {
1023 		mp = vfs_unmount_next(gen);
1024 		if (mp == NULL)
1025 			break;
1026 		gen = mp->mnt_gen;
1027 
1028 #ifdef DEBUG
1029 		printf("unmounting %p %s (%s)...\n",
1030 		    (void *)mp, mp->mnt_stat.f_mntonname,
1031 		    mp->mnt_stat.f_mntfromname);
1032 #endif
1033 		if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) {
1034 			vfs_unmount_print(mp, "");
1035 			progress = true;
1036 		} else {
1037 			vfs_rele(mp);
1038 			if (verbose) {
1039 				printf("unmount of %s failed with error %d\n",
1040 				    mp->mnt_stat.f_mntonname, error);
1041 			}
1042 			any_error = true;
1043 		}
1044 	}
1045 	if (verbose) {
1046 		printf("unmounting done\n");
1047 	}
1048 	if (any_error && verbose) {
1049 		printf("WARNING: some file systems would not unmount\n");
1050 	}
1051 	return progress;
1052 }
1053 
1054 void
1055 vfs_sync_all(struct lwp *l)
1056 {
1057 	printf("syncing disks... ");
1058 
1059 	/* remove user processes from run queue */
1060 	suspendsched();
1061 	(void)spl0();
1062 
1063 	/* avoid coming back this way again if we panic. */
1064 	doing_shutdown = 1;
1065 
1066 	do_sys_sync(l);
1067 
1068 	/* Wait for sync to finish. */
1069 	if (vfs_syncwait() != 0) {
1070 #if defined(DDB) && defined(DEBUG_HALT_BUSY)
1071 		Debugger();
1072 #endif
1073 		printf("giving up\n");
1074 		return;
1075 	} else
1076 		printf("done\n");
1077 }
1078 
1079 /*
1080  * Sync and unmount file systems before shutting down.
1081  */
1082 void
1083 vfs_shutdown(void)
1084 {
1085 	lwp_t *l = curlwp;
1086 
1087 	vfs_sync_all(l);
1088 
1089 	/*
1090 	 * If we have paniced - do not make the situation potentially
1091 	 * worse by unmounting the file systems.
1092 	 */
1093 	if (panicstr != NULL) {
1094 		return;
1095 	}
1096 
1097 	/* Unmount file systems. */
1098 	vfs_unmountall(l);
1099 }
1100 
1101 /*
1102  * Print a list of supported file system types (used by vfs_mountroot)
1103  */
1104 static void
1105 vfs_print_fstypes(void)
1106 {
1107 	struct vfsops *v;
1108 	int cnt = 0;
1109 
1110 	mutex_enter(&vfs_list_lock);
1111 	LIST_FOREACH(v, &vfs_list, vfs_list)
1112 		++cnt;
1113 	mutex_exit(&vfs_list_lock);
1114 
1115 	if (cnt == 0) {
1116 		printf("WARNING: No file system modules have been loaded.\n");
1117 		return;
1118 	}
1119 
1120 	printf("Supported file systems:");
1121 	mutex_enter(&vfs_list_lock);
1122 	LIST_FOREACH(v, &vfs_list, vfs_list) {
1123 		printf(" %s", v->vfs_name);
1124 	}
1125 	mutex_exit(&vfs_list_lock);
1126 	printf("\n");
1127 }
1128 
1129 /*
1130  * Mount the root file system.  If the operator didn't specify a
1131  * file system to use, try all possible file systems until one
1132  * succeeds.
1133  */
1134 int
1135 vfs_mountroot(void)
1136 {
1137 	struct vfsops *v;
1138 	int error = ENODEV;
1139 
1140 	if (root_device == NULL)
1141 		panic("vfs_mountroot: root device unknown");
1142 
1143 	switch (device_class(root_device)) {
1144 	case DV_IFNET:
1145 		if (rootdev != NODEV)
1146 			panic("vfs_mountroot: rootdev set for DV_IFNET "
1147 			    "(0x%llx -> %llu,%llu)",
1148 			    (unsigned long long)rootdev,
1149 			    (unsigned long long)major(rootdev),
1150 			    (unsigned long long)minor(rootdev));
1151 		break;
1152 
1153 	case DV_DISK:
1154 		if (rootdev == NODEV)
1155 			panic("vfs_mountroot: rootdev not set for DV_DISK");
1156 	        if (bdevvp(rootdev, &rootvp))
1157 	                panic("vfs_mountroot: can't get vnode for rootdev");
1158 		error = VOP_OPEN(rootvp, FREAD, FSCRED);
1159 		if (error) {
1160 			printf("vfs_mountroot: can't open root device\n");
1161 			return (error);
1162 		}
1163 		break;
1164 
1165 	case DV_VIRTUAL:
1166 		break;
1167 
1168 	default:
1169 		printf("%s: inappropriate for root file system\n",
1170 		    device_xname(root_device));
1171 		return (ENODEV);
1172 	}
1173 
1174 	/*
1175 	 * If user specified a root fs type, use it.  Make sure the
1176 	 * specified type exists and has a mount_root()
1177 	 */
1178 	if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) {
1179 		v = vfs_getopsbyname(rootfstype);
1180 		error = EFTYPE;
1181 		if (v != NULL) {
1182 			if (v->vfs_mountroot != NULL) {
1183 				error = (v->vfs_mountroot)();
1184 			}
1185 			v->vfs_refcount--;
1186 		}
1187 		goto done;
1188 	}
1189 
1190 	/*
1191 	 * Try each file system currently configured into the kernel.
1192 	 */
1193 	mutex_enter(&vfs_list_lock);
1194 	LIST_FOREACH(v, &vfs_list, vfs_list) {
1195 		if (v->vfs_mountroot == NULL)
1196 			continue;
1197 #ifdef DEBUG
1198 		aprint_normal("mountroot: trying %s...\n", v->vfs_name);
1199 #endif
1200 		v->vfs_refcount++;
1201 		mutex_exit(&vfs_list_lock);
1202 		error = (*v->vfs_mountroot)();
1203 		mutex_enter(&vfs_list_lock);
1204 		v->vfs_refcount--;
1205 		if (!error) {
1206 			aprint_normal("root file system type: %s\n",
1207 			    v->vfs_name);
1208 			break;
1209 		}
1210 	}
1211 	mutex_exit(&vfs_list_lock);
1212 
1213 	if (v == NULL) {
1214 		vfs_print_fstypes();
1215 		printf("no file system for %s", device_xname(root_device));
1216 		if (device_class(root_device) == DV_DISK)
1217 			printf(" (dev 0x%llx)", (unsigned long long)rootdev);
1218 		printf("\n");
1219 		error = EFTYPE;
1220 	}
1221 
1222 done:
1223 	if (error && device_class(root_device) == DV_DISK) {
1224 		VOP_CLOSE(rootvp, FREAD, FSCRED);
1225 		vrele(rootvp);
1226 	}
1227 	if (error == 0) {
1228 		mount_iterator_t *iter;
1229 		struct mount *mp;
1230 		extern struct cwdinfo cwdi0;
1231 
1232 		mountlist_iterator_init(&iter);
1233 		mp = mountlist_iterator_next(iter);
1234 		KASSERT(mp != NULL);
1235 		mountlist_iterator_destroy(iter);
1236 
1237 		mp->mnt_flag |= MNT_ROOTFS;
1238 		mp->mnt_op->vfs_refcount++;
1239 
1240 		/*
1241 		 * Get the vnode for '/'.  Set cwdi0.cwdi_cdir to
1242 		 * reference it, and donate it the reference grabbed
1243 		 * with VFS_ROOT().
1244 		 */
1245 		error = VFS_ROOT(mp, LK_NONE, &rootvnode);
1246 		if (error)
1247 			panic("cannot find root vnode, error=%d", error);
1248 		cwdi0.cwdi_cdir = rootvnode;
1249 		cwdi0.cwdi_rdir = NULL;
1250 
1251 		/*
1252 		 * Now that root is mounted, we can fixup initproc's CWD
1253 		 * info.  All other processes are kthreads, which merely
1254 		 * share proc0's CWD info.
1255 		 */
1256 		initproc->p_cwdi->cwdi_cdir = rootvnode;
1257 		vref(initproc->p_cwdi->cwdi_cdir);
1258 		initproc->p_cwdi->cwdi_rdir = NULL;
1259 		/*
1260 		 * Enable loading of modules from the filesystem
1261 		 */
1262 		module_load_vfs_init();
1263 
1264 	}
1265 	return (error);
1266 }
1267 
1268 /*
1269  * mount_specific_key_create --
1270  *	Create a key for subsystem mount-specific data.
1271  */
1272 int
1273 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
1274 {
1275 
1276 	return specificdata_key_create(mount_specificdata_domain, keyp, dtor);
1277 }
1278 
1279 /*
1280  * mount_specific_key_delete --
1281  *	Delete a key for subsystem mount-specific data.
1282  */
1283 void
1284 mount_specific_key_delete(specificdata_key_t key)
1285 {
1286 
1287 	specificdata_key_delete(mount_specificdata_domain, key);
1288 }
1289 
1290 /*
1291  * mount_initspecific --
1292  *	Initialize a mount's specificdata container.
1293  */
1294 void
1295 mount_initspecific(struct mount *mp)
1296 {
1297 	int error __diagused;
1298 
1299 	error = specificdata_init(mount_specificdata_domain,
1300 				  &mp->mnt_specdataref);
1301 	KASSERT(error == 0);
1302 }
1303 
1304 /*
1305  * mount_finispecific --
1306  *	Finalize a mount's specificdata container.
1307  */
1308 void
1309 mount_finispecific(struct mount *mp)
1310 {
1311 
1312 	specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
1313 }
1314 
1315 /*
1316  * mount_getspecific --
1317  *	Return mount-specific data corresponding to the specified key.
1318  */
1319 void *
1320 mount_getspecific(struct mount *mp, specificdata_key_t key)
1321 {
1322 
1323 	return specificdata_getspecific(mount_specificdata_domain,
1324 					 &mp->mnt_specdataref, key);
1325 }
1326 
1327 /*
1328  * mount_setspecific --
1329  *	Set mount-specific data corresponding to the specified key.
1330  */
1331 void
1332 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data)
1333 {
1334 
1335 	specificdata_setspecific(mount_specificdata_domain,
1336 				 &mp->mnt_specdataref, key, data);
1337 }
1338 
1339 /*
1340  * Check to see if a filesystem is mounted on a block device.
1341  */
1342 int
1343 vfs_mountedon(vnode_t *vp)
1344 {
1345 	vnode_t *vq;
1346 	int error = 0;
1347 
1348 	if (vp->v_type != VBLK)
1349 		return ENOTBLK;
1350 	if (spec_node_getmountedfs(vp) != NULL)
1351 		return EBUSY;
1352 	if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, &vq) == 0) {
1353 		if (spec_node_getmountedfs(vq) != NULL)
1354 			error = EBUSY;
1355 		vrele(vq);
1356 	}
1357 
1358 	return error;
1359 }
1360 
1361 /*
1362  * Check if a device pointed to by vp is mounted.
1363  *
1364  * Returns:
1365  *   EINVAL	if it's not a disk
1366  *   EBUSY	if it's a disk and mounted
1367  *   0		if it's a disk and not mounted
1368  */
1369 int
1370 rawdev_mounted(vnode_t *vp, vnode_t **bvpp)
1371 {
1372 	vnode_t *bvp;
1373 	dev_t dev;
1374 	int d_type;
1375 
1376 	bvp = NULL;
1377 	d_type = D_OTHER;
1378 
1379 	if (iskmemvp(vp))
1380 		return EINVAL;
1381 
1382 	switch (vp->v_type) {
1383 	case VCHR: {
1384 		const struct cdevsw *cdev;
1385 
1386 		dev = vp->v_rdev;
1387 		cdev = cdevsw_lookup(dev);
1388 		if (cdev != NULL) {
1389 			dev_t blkdev;
1390 
1391 			blkdev = devsw_chr2blk(dev);
1392 			if (blkdev != NODEV) {
1393 				if (vfinddev(blkdev, VBLK, &bvp) != 0) {
1394 					d_type = (cdev->d_flag & D_TYPEMASK);
1395 					/* XXX: what if bvp disappears? */
1396 					vrele(bvp);
1397 				}
1398 			}
1399 		}
1400 
1401 		break;
1402 		}
1403 
1404 	case VBLK: {
1405 		const struct bdevsw *bdev;
1406 
1407 		dev = vp->v_rdev;
1408 		bdev = bdevsw_lookup(dev);
1409 		if (bdev != NULL)
1410 			d_type = (bdev->d_flag & D_TYPEMASK);
1411 
1412 		bvp = vp;
1413 
1414 		break;
1415 		}
1416 
1417 	default:
1418 		break;
1419 	}
1420 
1421 	if (d_type != D_DISK)
1422 		return EINVAL;
1423 
1424 	if (bvpp != NULL)
1425 		*bvpp = bvp;
1426 
1427 	/*
1428 	 * XXX: This is bogus. We should be failing the request
1429 	 * XXX: not only if this specific slice is mounted, but
1430 	 * XXX: if it's on a disk with any other mounted slice.
1431 	 */
1432 	if (vfs_mountedon(bvp))
1433 		return EBUSY;
1434 
1435 	return 0;
1436 }
1437 
1438 /*
1439  * Make a 'unique' number from a mount type name.
1440  */
1441 long
1442 makefstype(const char *type)
1443 {
1444 	long rv;
1445 
1446 	for (rv = 0; *type; type++) {
1447 		rv <<= 2;
1448 		rv ^= *type;
1449 	}
1450 	return rv;
1451 }
1452 
1453 static struct mountlist_entry *
1454 mountlist_alloc(enum mountlist_type type, struct mount *mp)
1455 {
1456 	struct mountlist_entry *me;
1457 
1458 	me = kmem_zalloc(sizeof(*me), KM_SLEEP);
1459 	me->me_mount = mp;
1460 	me->me_type = type;
1461 
1462 	return me;
1463 }
1464 
1465 static void
1466 mountlist_free(struct mountlist_entry *me)
1467 {
1468 
1469 	kmem_free(me, sizeof(*me));
1470 }
1471 
1472 void
1473 mountlist_iterator_init(mount_iterator_t **mip)
1474 {
1475 	struct mountlist_entry *me;
1476 
1477 	me = mountlist_alloc(ME_MARKER, NULL);
1478 	mutex_enter(&mountlist_lock);
1479 	TAILQ_INSERT_HEAD(&mountlist, me, me_list);
1480 	mutex_exit(&mountlist_lock);
1481 	*mip = (mount_iterator_t *)me;
1482 }
1483 
1484 void
1485 mountlist_iterator_destroy(mount_iterator_t *mi)
1486 {
1487 	struct mountlist_entry *marker = &mi->mi_entry;
1488 
1489 	if (marker->me_mount != NULL)
1490 		vfs_unbusy(marker->me_mount);
1491 
1492 	mutex_enter(&mountlist_lock);
1493 	TAILQ_REMOVE(&mountlist, marker, me_list);
1494 	mutex_exit(&mountlist_lock);
1495 
1496 	mountlist_free(marker);
1497 
1498 }
1499 
1500 /*
1501  * Return the next mount or NULL for this iterator.
1502  * Mark it busy on success.
1503  */
1504 static inline struct mount *
1505 _mountlist_iterator_next(mount_iterator_t *mi, bool wait)
1506 {
1507 	struct mountlist_entry *me, *marker = &mi->mi_entry;
1508 	struct mount *mp;
1509 	int error;
1510 
1511 	if (marker->me_mount != NULL) {
1512 		vfs_unbusy(marker->me_mount);
1513 		marker->me_mount = NULL;
1514 	}
1515 
1516 	mutex_enter(&mountlist_lock);
1517 	for (;;) {
1518 		KASSERT(marker->me_type == ME_MARKER);
1519 
1520 		me = TAILQ_NEXT(marker, me_list);
1521 		if (me == NULL) {
1522 			/* End of list: keep marker and return. */
1523 			mutex_exit(&mountlist_lock);
1524 			return NULL;
1525 		}
1526 		TAILQ_REMOVE(&mountlist, marker, me_list);
1527 		TAILQ_INSERT_AFTER(&mountlist, me, marker, me_list);
1528 
1529 		/* Skip other markers. */
1530 		if (me->me_type != ME_MOUNT)
1531 			continue;
1532 
1533 		/* Take an initial reference for vfs_busy() below. */
1534 		mp = me->me_mount;
1535 		KASSERT(mp != NULL);
1536 		vfs_ref(mp);
1537 		mutex_exit(&mountlist_lock);
1538 
1539 		/* Try to mark this mount busy and return on success. */
1540 		if (wait)
1541 			error = vfs_busy(mp);
1542 		else
1543 			error = vfs_trybusy(mp);
1544 		if (error == 0) {
1545 			vfs_rele(mp);
1546 			marker->me_mount = mp;
1547 			return mp;
1548 		}
1549 		vfs_rele(mp);
1550 		mutex_enter(&mountlist_lock);
1551 	}
1552 }
1553 
1554 struct mount *
1555 mountlist_iterator_next(mount_iterator_t *mi)
1556 {
1557 
1558 	return _mountlist_iterator_next(mi, true);
1559 }
1560 
1561 struct mount *
1562 mountlist_iterator_trynext(mount_iterator_t *mi)
1563 {
1564 
1565 	return _mountlist_iterator_next(mi, false);
1566 }
1567 
1568 /*
1569  * Attach new mount to the end of the mount list.
1570  */
1571 void
1572 mountlist_append(struct mount *mp)
1573 {
1574 	struct mountlist_entry *me;
1575 
1576 	me = mountlist_alloc(ME_MOUNT, mp);
1577 	mutex_enter(&mountlist_lock);
1578 	TAILQ_INSERT_TAIL(&mountlist, me, me_list);
1579 	mutex_exit(&mountlist_lock);
1580 }
1581 
1582 /*
1583  * Remove mount from mount list.
1584  */void
1585 mountlist_remove(struct mount *mp)
1586 {
1587 	struct mountlist_entry *me;
1588 
1589 	mutex_enter(&mountlist_lock);
1590 	TAILQ_FOREACH(me, &mountlist, me_list)
1591 		if (me->me_type == ME_MOUNT && me->me_mount == mp)
1592 			break;
1593 	KASSERT(me != NULL);
1594 	TAILQ_REMOVE(&mountlist, me, me_list);
1595 	mutex_exit(&mountlist_lock);
1596 	mountlist_free(me);
1597 }
1598 
1599 /*
1600  * Unlocked variant to traverse the mountlist.
1601  * To be used from DDB only.
1602  */
1603 struct mount *
1604 _mountlist_next(struct mount *mp)
1605 {
1606 	struct mountlist_entry *me;
1607 
1608 	if (mp == NULL) {
1609 		me = TAILQ_FIRST(&mountlist);
1610 	} else {
1611 		TAILQ_FOREACH(me, &mountlist, me_list)
1612 			if (me->me_type == ME_MOUNT && me->me_mount == mp)
1613 				break;
1614 		if (me != NULL)
1615 			me = TAILQ_NEXT(me, me_list);
1616 	}
1617 
1618 	while (me != NULL && me->me_type != ME_MOUNT)
1619 		me = TAILQ_NEXT(me, me_list);
1620 
1621 	return (me ? me->me_mount : NULL);
1622 }
1623