xref: /netbsd-src/sys/kern/vfs_mount.c (revision a8c74629f602faa0ccf8a463757d7baf858bbf3a)
1 /*	$NetBSD: vfs_mount.c,v 1.84 2020/10/13 13:15:39 hannken Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997-2020 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1989, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  * (c) UNIX System Laboratories, Inc.
37  * All or some portions of this file are derived from material licensed
38  * to the University of California by American Telephone and Telegraph
39  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40  * the permission of UNIX System Laboratories, Inc.
41  *
42  * Redistribution and use in source and binary forms, with or without
43  * modification, are permitted provided that the following conditions
44  * are met:
45  * 1. Redistributions of source code must retain the above copyright
46  *    notice, this list of conditions and the following disclaimer.
47  * 2. Redistributions in binary form must reproduce the above copyright
48  *    notice, this list of conditions and the following disclaimer in the
49  *    documentation and/or other materials provided with the distribution.
50  * 3. Neither the name of the University nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  *
66  *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
67  */
68 
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.84 2020/10/13 13:15:39 hannken Exp $");
71 
72 #include <sys/param.h>
73 #include <sys/kernel.h>
74 
75 #include <sys/atomic.h>
76 #include <sys/buf.h>
77 #include <sys/conf.h>
78 #include <sys/fcntl.h>
79 #include <sys/filedesc.h>
80 #include <sys/device.h>
81 #include <sys/kauth.h>
82 #include <sys/kmem.h>
83 #include <sys/module.h>
84 #include <sys/mount.h>
85 #include <sys/fstrans.h>
86 #include <sys/namei.h>
87 #include <sys/extattr.h>
88 #include <sys/syscallargs.h>
89 #include <sys/sysctl.h>
90 #include <sys/systm.h>
91 #include <sys/vfs_syscalls.h>
92 #include <sys/vnode_impl.h>
93 
94 #include <miscfs/genfs/genfs.h>
95 #include <miscfs/specfs/specdev.h>
96 
97 enum mountlist_type {
98 	ME_MOUNT,
99 	ME_MARKER
100 };
101 struct mountlist_entry {
102 	TAILQ_ENTRY(mountlist_entry) me_list;	/* Mount list. */
103 	struct mount *me_mount;			/* Actual mount if ME_MOUNT,
104 						   current mount else. */
105 	enum mountlist_type me_type;		/* Mount or marker. */
106 };
107 struct mount_iterator {
108 	struct mountlist_entry mi_entry;
109 };
110 
111 static struct vnode *vfs_vnode_iterator_next1(struct vnode_iterator *,
112     bool (*)(void *, struct vnode *), void *, bool);
113 
114 /* Root filesystem. */
115 vnode_t *			rootvnode;
116 
117 /* Mounted filesystem list. */
118 static TAILQ_HEAD(mountlist, mountlist_entry) mountlist;
119 static kmutex_t			mountlist_lock __cacheline_aligned;
120 int vnode_offset_next_by_lru	/* XXX: ugly hack for pstat.c */
121     = offsetof(vnode_impl_t, vi_lrulist.tqe_next);
122 
123 kmutex_t			vfs_list_lock __cacheline_aligned;
124 
125 static specificdata_domain_t	mount_specificdata_domain;
126 static kmutex_t			mntid_lock;
127 
128 static kmutex_t			mountgen_lock __cacheline_aligned;
129 static uint64_t			mountgen;
130 
131 void
132 vfs_mount_sysinit(void)
133 {
134 
135 	TAILQ_INIT(&mountlist);
136 	mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE);
137 	mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE);
138 
139 	mount_specificdata_domain = specificdata_domain_create();
140 	mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE);
141 	mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE);
142 	mountgen = 0;
143 }
144 
145 struct mount *
146 vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp)
147 {
148 	struct mount *mp;
149 	int error __diagused;
150 
151 	mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
152 	mp->mnt_op = vfsops;
153 	mp->mnt_refcnt = 1;
154 	TAILQ_INIT(&mp->mnt_vnodelist);
155 	mp->mnt_renamelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
156 	mp->mnt_vnodelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
157 	mp->mnt_updating = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
158 	mp->mnt_vnodecovered = vp;
159 	mount_initspecific(mp);
160 
161 	error = fstrans_mount(mp);
162 	KASSERT(error == 0);
163 
164 	mutex_enter(&mountgen_lock);
165 	mp->mnt_gen = mountgen++;
166 	mutex_exit(&mountgen_lock);
167 
168 	return mp;
169 }
170 
171 /*
172  * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and
173  * initialize a mount structure for it.
174  *
175  * Devname is usually updated by mount(8) after booting.
176  */
177 int
178 vfs_rootmountalloc(const char *fstypename, const char *devname,
179     struct mount **mpp)
180 {
181 	struct vfsops *vfsp = NULL;
182 	struct mount *mp;
183 	int error __diagused;
184 
185 	mutex_enter(&vfs_list_lock);
186 	LIST_FOREACH(vfsp, &vfs_list, vfs_list)
187 		if (!strncmp(vfsp->vfs_name, fstypename,
188 		    sizeof(mp->mnt_stat.f_fstypename)))
189 			break;
190 	if (vfsp == NULL) {
191 		mutex_exit(&vfs_list_lock);
192 		return (ENODEV);
193 	}
194 	vfsp->vfs_refcount++;
195 	mutex_exit(&vfs_list_lock);
196 
197 	if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL)
198 		return ENOMEM;
199 	error = vfs_busy(mp);
200 	KASSERT(error == 0);
201 	mp->mnt_flag = MNT_RDONLY;
202 	(void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
203 	    sizeof(mp->mnt_stat.f_fstypename));
204 	mp->mnt_stat.f_mntonname[0] = '/';
205 	mp->mnt_stat.f_mntonname[1] = '\0';
206 	mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
207 	    '\0';
208 	(void)copystr(devname, mp->mnt_stat.f_mntfromname,
209 	    sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
210 	*mpp = mp;
211 	return 0;
212 }
213 
214 /*
215  * vfs_getnewfsid: get a new unique fsid.
216  */
217 void
218 vfs_getnewfsid(struct mount *mp)
219 {
220 	static u_short xxxfs_mntid;
221 	fsid_t tfsid;
222 	int mtype;
223 
224 	mutex_enter(&mntid_lock);
225 	mtype = makefstype(mp->mnt_op->vfs_name);
226 	mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
227 	mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
228 	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
229 	if (xxxfs_mntid == 0)
230 		++xxxfs_mntid;
231 	tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
232 	tfsid.__fsid_val[1] = mtype;
233 	while (vfs_getvfs(&tfsid)) {
234 		tfsid.__fsid_val[0]++;
235 		xxxfs_mntid++;
236 	}
237 	mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
238 	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
239 	mutex_exit(&mntid_lock);
240 }
241 
242 /*
243  * Lookup a mount point by filesystem identifier.
244  *
245  * XXX Needs to add a reference to the mount point.
246  */
247 struct mount *
248 vfs_getvfs(fsid_t *fsid)
249 {
250 	mount_iterator_t *iter;
251 	struct mount *mp;
252 
253 	mountlist_iterator_init(&iter);
254 	while ((mp = mountlist_iterator_next(iter)) != NULL) {
255 		if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
256 		    mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
257 			mountlist_iterator_destroy(iter);
258 			return mp;
259 		}
260 	}
261 	mountlist_iterator_destroy(iter);
262 	return NULL;
263 }
264 
265 /*
266  * Take a reference to a mount structure.
267  */
268 void
269 vfs_ref(struct mount *mp)
270 {
271 
272 	KASSERT(mp->mnt_refcnt > 0 || mutex_owned(&mountlist_lock));
273 
274 	atomic_inc_uint(&mp->mnt_refcnt);
275 }
276 
277 /*
278  * Drop a reference to a mount structure, freeing if the last reference.
279  */
280 void
281 vfs_rele(struct mount *mp)
282 {
283 
284 	if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) {
285 		return;
286 	}
287 
288 	/*
289 	 * Nothing else has visibility of the mount: we can now
290 	 * free the data structures.
291 	 */
292 	KASSERT(mp->mnt_refcnt == 0);
293 	specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
294 	mutex_obj_free(mp->mnt_updating);
295 	mutex_obj_free(mp->mnt_renamelock);
296 	mutex_obj_free(mp->mnt_vnodelock);
297 	if (mp->mnt_op != NULL) {
298 		vfs_delref(mp->mnt_op);
299 	}
300 	fstrans_unmount(mp);
301 	/*
302 	 * Final free of mp gets done from fstrans_mount_dtor().
303 	 *
304 	 * Prevents this memory to be reused as a mount before
305 	 * fstrans releases all references to it.
306 	 */
307 }
308 
309 /*
310  * Mark a mount point as busy, and gain a new reference to it.  Used to
311  * prevent the file system from being unmounted during critical sections.
312  *
313  * vfs_busy can be called multiple times and by multiple threads
314  * and must be accompanied by the same number of vfs_unbusy calls.
315  *
316  * => The caller must hold a pre-existing reference to the mount.
317  * => Will fail if the file system is being unmounted, or is unmounted.
318  */
319 static inline int
320 _vfs_busy(struct mount *mp, bool wait)
321 {
322 
323 	KASSERT(mp->mnt_refcnt > 0);
324 
325 	if (wait) {
326 		fstrans_start(mp);
327 	} else {
328 		if (fstrans_start_nowait(mp))
329 			return EBUSY;
330 	}
331 	if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) {
332 		fstrans_done(mp);
333 		return ENOENT;
334 	}
335 	vfs_ref(mp);
336 	return 0;
337 }
338 
339 int
340 vfs_busy(struct mount *mp)
341 {
342 
343 	return _vfs_busy(mp, true);
344 }
345 
346 int
347 vfs_trybusy(struct mount *mp)
348 {
349 
350 	return _vfs_busy(mp, false);
351 }
352 
353 /*
354  * Unbusy a busy filesystem.
355  *
356  * Every successful vfs_busy() call must be undone by a vfs_unbusy() call.
357  */
358 void
359 vfs_unbusy(struct mount *mp)
360 {
361 
362 	KASSERT(mp->mnt_refcnt > 0);
363 
364 	fstrans_done(mp);
365 	vfs_rele(mp);
366 }
367 
368 struct vnode_iterator {
369 	vnode_impl_t vi_vnode;
370 };
371 
372 void
373 vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vnip)
374 {
375 	vnode_t *vp;
376 	vnode_impl_t *vip;
377 
378 	vp = vnalloc_marker(mp);
379 	vip = VNODE_TO_VIMPL(vp);
380 
381 	mutex_enter(mp->mnt_vnodelock);
382 	TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vip, vi_mntvnodes);
383 	vp->v_usecount = 1;
384 	mutex_exit(mp->mnt_vnodelock);
385 
386 	*vnip = (struct vnode_iterator *)vip;
387 }
388 
389 void
390 vfs_vnode_iterator_destroy(struct vnode_iterator *vni)
391 {
392 	vnode_impl_t *mvip = &vni->vi_vnode;
393 	vnode_t *mvp = VIMPL_TO_VNODE(mvip);
394 	kmutex_t *lock;
395 
396 	KASSERT(vnis_marker(mvp));
397 	if (vrefcnt(mvp) != 0) {
398 		lock = mvp->v_mount->mnt_vnodelock;
399 		mutex_enter(lock);
400 		TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvip, vi_mntvnodes);
401 		mvp->v_usecount = 0;
402 		mutex_exit(lock);
403 	}
404 	vnfree_marker(mvp);
405 }
406 
407 static struct vnode *
408 vfs_vnode_iterator_next1(struct vnode_iterator *vni,
409     bool (*f)(void *, struct vnode *), void *cl, bool do_wait)
410 {
411 	vnode_impl_t *mvip = &vni->vi_vnode;
412 	struct mount *mp = VIMPL_TO_VNODE(mvip)->v_mount;
413 	vnode_t *vp;
414 	vnode_impl_t *vip;
415 	kmutex_t *lock;
416 	int error;
417 
418 	KASSERT(vnis_marker(VIMPL_TO_VNODE(mvip)));
419 
420 	lock = mp->mnt_vnodelock;
421 	do {
422 		mutex_enter(lock);
423 		vip = TAILQ_NEXT(mvip, vi_mntvnodes);
424 		TAILQ_REMOVE(&mp->mnt_vnodelist, mvip, vi_mntvnodes);
425 		VIMPL_TO_VNODE(mvip)->v_usecount = 0;
426 again:
427 		if (vip == NULL) {
428 			mutex_exit(lock);
429 	       		return NULL;
430 		}
431 		vp = VIMPL_TO_VNODE(vip);
432 		KASSERT(vp != NULL);
433 		mutex_enter(vp->v_interlock);
434 		if (vnis_marker(vp) ||
435 		    vdead_check(vp, (do_wait ? 0 : VDEAD_NOWAIT)) ||
436 		    (f && !(*f)(cl, vp))) {
437 			mutex_exit(vp->v_interlock);
438 			vip = TAILQ_NEXT(vip, vi_mntvnodes);
439 			goto again;
440 		}
441 
442 		TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vip, mvip, vi_mntvnodes);
443 		VIMPL_TO_VNODE(mvip)->v_usecount = 1;
444 		mutex_exit(lock);
445 		error = vcache_vget(vp);
446 		KASSERT(error == 0 || error == ENOENT);
447 	} while (error != 0);
448 
449 	return vp;
450 }
451 
452 struct vnode *
453 vfs_vnode_iterator_next(struct vnode_iterator *vni,
454     bool (*f)(void *, struct vnode *), void *cl)
455 {
456 
457 	return vfs_vnode_iterator_next1(vni, f, cl, false);
458 }
459 
460 /*
461  * Move a vnode from one mount queue to another.
462  */
463 void
464 vfs_insmntque(vnode_t *vp, struct mount *mp)
465 {
466 	vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
467 	struct mount *omp;
468 	kmutex_t *lock;
469 
470 	KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 ||
471 	    vp->v_tag == VT_VFS);
472 
473 	/*
474 	 * Delete from old mount point vnode list, if on one.
475 	 */
476 	if ((omp = vp->v_mount) != NULL) {
477 		lock = omp->mnt_vnodelock;
478 		mutex_enter(lock);
479 		TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vip, vi_mntvnodes);
480 		mutex_exit(lock);
481 	}
482 
483 	/*
484 	 * Insert into list of vnodes for the new mount point, if
485 	 * available.  The caller must take a reference on the mount
486 	 * structure and donate to the vnode.
487 	 */
488 	if ((vp->v_mount = mp) != NULL) {
489 		lock = mp->mnt_vnodelock;
490 		mutex_enter(lock);
491 		TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vip, vi_mntvnodes);
492 		mutex_exit(lock);
493 	}
494 
495 	if (omp != NULL) {
496 		/* Release reference to old mount. */
497 		vfs_rele(omp);
498 	}
499 }
500 
501 /*
502  * Remove any vnodes in the vnode table belonging to mount point mp.
503  *
504  * If FORCECLOSE is not specified, there should not be any active ones,
505  * return error if any are found (nb: this is a user error, not a
506  * system error). If FORCECLOSE is specified, detach any active vnodes
507  * that are found.
508  *
509  * If WRITECLOSE is set, only flush out regular file vnodes open for
510  * writing.
511  *
512  * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped.
513  */
514 #ifdef DEBUG
515 int busyprt = 0;	/* print out busy vnodes */
516 struct ctldebug debug1 = { "busyprt", &busyprt };
517 #endif
518 
519 static vnode_t *
520 vflushnext(struct vnode_iterator *marker, int *when)
521 {
522 	if (getticks() > *when) {
523 		yield();
524 		*when = getticks() + hz / 10;
525 	}
526 	return vfs_vnode_iterator_next1(marker, NULL, NULL, true);
527 }
528 
529 /*
530  * Flush one vnode.  Referenced on entry, unreferenced on return.
531  */
532 static int
533 vflush_one(vnode_t *vp, vnode_t *skipvp, int flags)
534 {
535 	int error;
536 	struct vattr vattr;
537 
538 	if (vp == skipvp ||
539 	    ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))) {
540 		vrele(vp);
541 		return 0;
542 	}
543 	/*
544 	 * If WRITECLOSE is set, only flush out regular file
545 	 * vnodes open for writing or open and unlinked.
546 	 */
547 	if ((flags & WRITECLOSE)) {
548 		if (vp->v_type != VREG) {
549 			vrele(vp);
550 			return 0;
551 		}
552 		error = vn_lock(vp, LK_EXCLUSIVE);
553 		if (error) {
554 			KASSERT(error == ENOENT);
555 			vrele(vp);
556 			return 0;
557 		}
558 		error = VOP_FSYNC(vp, curlwp->l_cred, FSYNC_WAIT, 0, 0);
559 		if (error == 0)
560 			error = VOP_GETATTR(vp, &vattr, curlwp->l_cred);
561 		VOP_UNLOCK(vp);
562 		if (error) {
563 			vrele(vp);
564 			return error;
565 		}
566 		if (vp->v_writecount == 0 && vattr.va_nlink > 0) {
567 			vrele(vp);
568 			return 0;
569 		}
570 	}
571 	/*
572 	 * First try to recycle the vnode.
573 	 */
574 	if (vrecycle(vp))
575 		return 0;
576 	/*
577 	 * If FORCECLOSE is set, forcibly close the vnode.
578 	 * For block or character devices, revert to an
579 	 * anonymous device.  For all other files, just
580 	 * kill them.
581 	 */
582 	if (flags & FORCECLOSE) {
583 		if (vrefcnt(vp) > 1 &&
584 		    (vp->v_type == VBLK || vp->v_type == VCHR))
585 			vcache_make_anon(vp);
586 		else
587 			vgone(vp);
588 		return 0;
589 	}
590 	vrele(vp);
591 	return EBUSY;
592 }
593 
594 int
595 vflush(struct mount *mp, vnode_t *skipvp, int flags)
596 {
597 	vnode_t *vp;
598 	struct vnode_iterator *marker;
599 	int busy, error, when, retries = 2;
600 
601 	do {
602 		busy = error = when = 0;
603 
604 		/*
605 		 * First, flush out any vnode references from the
606 		 * deferred vrele list.
607 		 */
608 		vrele_flush(mp);
609 
610 		vfs_vnode_iterator_init(mp, &marker);
611 
612 		while ((vp = vflushnext(marker, &when)) != NULL) {
613 			error = vflush_one(vp, skipvp, flags);
614 			if (error == EBUSY) {
615 				error = 0;
616 				busy++;
617 #ifdef DEBUG
618 				if (busyprt && retries == 0)
619 					vprint("vflush: busy vnode", vp);
620 #endif
621 			} else if (error != 0) {
622 				break;
623 			}
624 		}
625 
626 		vfs_vnode_iterator_destroy(marker);
627 	} while (error == 0 && busy > 0 && retries-- > 0);
628 
629 	if (error)
630 		return error;
631 	if (busy)
632 		return EBUSY;
633 	return 0;
634 }
635 
636 /*
637  * Mount a file system.
638  */
639 
640 /*
641  * Scan all active processes to see if any of them have a current or root
642  * directory onto which the new filesystem has just been  mounted. If so,
643  * replace them with the new mount point.
644  */
645 static void
646 mount_checkdirs(vnode_t *olddp)
647 {
648 	vnode_t *newdp, *rele1, *rele2;
649 	struct cwdinfo *cwdi;
650 	struct proc *p;
651 	bool retry;
652 
653 	if (vrefcnt(olddp) == 1) {
654 		return;
655 	}
656 	if (VFS_ROOT(olddp->v_mountedhere, LK_EXCLUSIVE, &newdp))
657 		panic("mount: lost mount");
658 
659 	do {
660 		retry = false;
661 		mutex_enter(&proc_lock);
662 		PROCLIST_FOREACH(p, &allproc) {
663 			if ((cwdi = p->p_cwdi) == NULL)
664 				continue;
665 			/*
666 			 * Cannot change to the old directory any more,
667 			 * so even if we see a stale value it is not a
668 			 * problem.
669 			 */
670 			if (cwdi->cwdi_cdir != olddp &&
671 			    cwdi->cwdi_rdir != olddp)
672 				continue;
673 			retry = true;
674 			rele1 = NULL;
675 			rele2 = NULL;
676 			atomic_inc_uint(&cwdi->cwdi_refcnt);
677 			mutex_exit(&proc_lock);
678 			rw_enter(&cwdi->cwdi_lock, RW_WRITER);
679 			if (cwdi->cwdi_cdir == olddp) {
680 				rele1 = cwdi->cwdi_cdir;
681 				vref(newdp);
682 				cwdi->cwdi_cdir = newdp;
683 			}
684 			if (cwdi->cwdi_rdir == olddp) {
685 				rele2 = cwdi->cwdi_rdir;
686 				vref(newdp);
687 				cwdi->cwdi_rdir = newdp;
688 			}
689 			rw_exit(&cwdi->cwdi_lock);
690 			cwdfree(cwdi);
691 			if (rele1 != NULL)
692 				vrele(rele1);
693 			if (rele2 != NULL)
694 				vrele(rele2);
695 			mutex_enter(&proc_lock);
696 			break;
697 		}
698 		mutex_exit(&proc_lock);
699 	} while (retry);
700 
701 	if (rootvnode == olddp) {
702 		vrele(rootvnode);
703 		vref(newdp);
704 		rootvnode = newdp;
705 	}
706 	vput(newdp);
707 }
708 
709 /*
710  * Start extended attributes
711  */
712 static int
713 start_extattr(struct mount *mp)
714 {
715 	int error;
716 
717 	error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, 0, NULL);
718 	if (error)
719 		printf("%s: failed to start extattr: error = %d\n",
720 		       mp->mnt_stat.f_mntonname, error);
721 
722 	return error;
723 }
724 
725 int
726 mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops,
727     const char *path, int flags, void *data, size_t *data_len)
728 {
729 	vnode_t *vp = *vpp;
730 	struct mount *mp;
731 	struct pathbuf *pb;
732 	struct nameidata nd;
733 	int error;
734 
735 	error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
736 	    KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
737 	if (error) {
738 		vfs_delref(vfsops);
739 		return error;
740 	}
741 
742 	/* Cannot make a non-dir a mount-point (from here anyway). */
743 	if (vp->v_type != VDIR) {
744 		vfs_delref(vfsops);
745 		return ENOTDIR;
746 	}
747 
748 	if (flags & MNT_EXPORTED) {
749 		vfs_delref(vfsops);
750 		return EINVAL;
751 	}
752 
753 	if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) {
754 		vfs_delref(vfsops);
755 		return ENOMEM;
756 	}
757 
758 	mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
759 
760 	/*
761 	 * The underlying file system may refuse the mount for
762 	 * various reasons.  Allow the user to force it to happen.
763 	 *
764 	 * Set the mount level flags.
765 	 */
766 	mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE);
767 
768 	mutex_enter(mp->mnt_updating);
769 	error = VFS_MOUNT(mp, path, data, data_len);
770 	mp->mnt_flag &= ~MNT_OP_FLAGS;
771 
772 	if (error != 0)
773 		goto err_unmounted;
774 
775 	/*
776 	 * Validate and prepare the mount point.
777 	 */
778 	error = pathbuf_copyin(path, &pb);
779 	if (error != 0) {
780 		goto err_mounted;
781 	}
782 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
783 	error = namei(&nd);
784 	pathbuf_destroy(pb);
785 	if (error != 0) {
786 		goto err_mounted;
787 	}
788 	if (nd.ni_vp != vp) {
789 		vput(nd.ni_vp);
790 		error = EINVAL;
791 		goto err_mounted;
792 	}
793 	if (vp->v_mountedhere != NULL) {
794 		vput(nd.ni_vp);
795 		error = EBUSY;
796 		goto err_mounted;
797 	}
798 	error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0);
799 	if (error != 0) {
800 		vput(nd.ni_vp);
801 		goto err_mounted;
802 	}
803 
804 	/*
805 	 * Put the new filesystem on the mount list after root.
806 	 */
807 	cache_purge(vp);
808 	mp->mnt_iflag &= ~IMNT_WANTRDWR;
809 
810 	mountlist_append(mp);
811 	if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
812 		vfs_syncer_add_to_worklist(mp);
813 	vp->v_mountedhere = mp;
814 	vput(nd.ni_vp);
815 
816 	mount_checkdirs(vp);
817 	mutex_exit(mp->mnt_updating);
818 
819 	/* Hold an additional reference to the mount across VFS_START(). */
820 	vfs_ref(mp);
821 	(void) VFS_STATVFS(mp, &mp->mnt_stat);
822 	error = VFS_START(mp, 0);
823 	if (error) {
824 		vrele(vp);
825 	} else if (flags & MNT_EXTATTR) {
826 		if (start_extattr(mp) != 0)
827 			mp->mnt_flag &= ~MNT_EXTATTR;
828 	}
829 	/* Drop reference held for VFS_START(). */
830 	vfs_rele(mp);
831 	*vpp = NULL;
832 	return error;
833 
834 err_mounted:
835 	if (vfs_suspend(mp, 0))
836 		panic("Suspending fresh file system failed");
837 	if (VFS_UNMOUNT(mp, MNT_FORCE) != 0)
838 		panic("Unmounting fresh file system failed");
839 	vfs_resume(mp);
840 
841 err_unmounted:
842 	vp->v_mountedhere = NULL;
843 	mutex_exit(mp->mnt_updating);
844 	vfs_rele(mp);
845 
846 	return error;
847 }
848 
849 /*
850  * Do the actual file system unmount.  File system is assumed to have
851  * been locked by the caller.
852  *
853  * => Caller hold reference to the mount, explicitly for dounmount().
854  */
855 int
856 dounmount(struct mount *mp, int flags, struct lwp *l)
857 {
858 	vnode_t *coveredvp;
859 	int error, async, used_syncer, used_extattr;
860 	const bool was_suspended = fstrans_is_owner(mp);
861 
862 #if NVERIEXEC > 0
863 	error = veriexec_unmountchk(mp);
864 	if (error)
865 		return (error);
866 #endif /* NVERIEXEC > 0 */
867 
868 	if (!was_suspended) {
869 		error = vfs_suspend(mp, 0);
870 		if (error) {
871 			return error;
872 		}
873 	}
874 
875 	KASSERT((mp->mnt_iflag & IMNT_GONE) == 0);
876 
877 	used_syncer = (mp->mnt_iflag & IMNT_ONWORKLIST) != 0;
878 	used_extattr = mp->mnt_flag & MNT_EXTATTR;
879 
880 	mp->mnt_iflag |= IMNT_UNMOUNT;
881 	mutex_enter(mp->mnt_updating);
882 	async = mp->mnt_flag & MNT_ASYNC;
883 	mp->mnt_flag &= ~MNT_ASYNC;
884 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
885 	if (used_syncer)
886 		vfs_syncer_remove_from_worklist(mp);
887 	error = 0;
888 	if (((mp->mnt_flag & MNT_RDONLY) == 0) && ((flags & MNT_FORCE) == 0)) {
889 		error = VFS_SYNC(mp, MNT_WAIT, l->l_cred);
890 	}
891 	if (error == 0 || (flags & MNT_FORCE)) {
892 		error = VFS_UNMOUNT(mp, flags);
893 	}
894 	if (error) {
895 		mp->mnt_iflag &= ~IMNT_UNMOUNT;
896 		if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
897 			vfs_syncer_add_to_worklist(mp);
898 		mp->mnt_flag |= async;
899 		mutex_exit(mp->mnt_updating);
900 		if (!was_suspended)
901 			vfs_resume(mp);
902 		if (used_extattr) {
903 			if (start_extattr(mp) != 0)
904 				mp->mnt_flag &= ~MNT_EXTATTR;
905 			else
906 				mp->mnt_flag |= MNT_EXTATTR;
907 		}
908 		return (error);
909 	}
910 	mutex_exit(mp->mnt_updating);
911 
912 	/*
913 	 * mark filesystem as gone to prevent further umounts
914 	 * after mnt_umounting lock is gone, this also prevents
915 	 * vfs_busy() from succeeding.
916 	 */
917 	mp->mnt_iflag |= IMNT_GONE;
918 	if (!was_suspended)
919 		vfs_resume(mp);
920 
921 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
922 		vn_lock(coveredvp, LK_EXCLUSIVE | LK_RETRY);
923 		coveredvp->v_mountedhere = NULL;
924 		VOP_UNLOCK(coveredvp);
925 	}
926 	mountlist_remove(mp);
927 	if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
928 		panic("unmount: dangling vnode");
929 	vfs_hooks_unmount(mp);
930 
931 	vfs_rele(mp);	/* reference from mount() */
932 	if (coveredvp != NULLVP) {
933 		vrele(coveredvp);
934 	}
935 	return (0);
936 }
937 
938 /*
939  * Unmount all file systems.
940  * We traverse the list in reverse order under the assumption that doing so
941  * will avoid needing to worry about dependencies.
942  */
943 bool
944 vfs_unmountall(struct lwp *l)
945 {
946 
947 	printf("unmounting file systems...\n");
948 	return vfs_unmountall1(l, true, true);
949 }
950 
951 static void
952 vfs_unmount_print(struct mount *mp, const char *pfx)
953 {
954 
955 	aprint_verbose("%sunmounted %s on %s type %s\n", pfx,
956 	    mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname,
957 	    mp->mnt_stat.f_fstypename);
958 }
959 
960 /*
961  * Return the mount with the highest generation less than "gen".
962  */
963 static struct mount *
964 vfs_unmount_next(uint64_t gen)
965 {
966 	mount_iterator_t *iter;
967 	struct mount *mp, *nmp;
968 
969 	nmp = NULL;
970 
971 	mountlist_iterator_init(&iter);
972 	while ((mp = mountlist_iterator_next(iter)) != NULL) {
973 		if ((nmp == NULL || mp->mnt_gen > nmp->mnt_gen) &&
974 		    mp->mnt_gen < gen) {
975 			if (nmp != NULL)
976 				vfs_rele(nmp);
977 			nmp = mp;
978 			vfs_ref(nmp);
979 		}
980 	}
981 	mountlist_iterator_destroy(iter);
982 
983 	return nmp;
984 }
985 
986 bool
987 vfs_unmount_forceone(struct lwp *l)
988 {
989 	struct mount *mp;
990 	int error;
991 
992 	mp = vfs_unmount_next(mountgen);
993 	if (mp == NULL) {
994 		return false;
995 	}
996 
997 #ifdef DEBUG
998 	printf("forcefully unmounting %s (%s)...\n",
999 	    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
1000 #endif
1001 	if ((error = dounmount(mp, MNT_FORCE, l)) == 0) {
1002 		vfs_unmount_print(mp, "forcefully ");
1003 		return true;
1004 	} else {
1005 		vfs_rele(mp);
1006 	}
1007 
1008 #ifdef DEBUG
1009 	printf("forceful unmount of %s failed with error %d\n",
1010 	    mp->mnt_stat.f_mntonname, error);
1011 #endif
1012 
1013 	return false;
1014 }
1015 
1016 bool
1017 vfs_unmountall1(struct lwp *l, bool force, bool verbose)
1018 {
1019 	struct mount *mp;
1020 	bool any_error = false, progress = false;
1021 	uint64_t gen;
1022 	int error;
1023 
1024 	gen = mountgen;
1025 	for (;;) {
1026 		mp = vfs_unmount_next(gen);
1027 		if (mp == NULL)
1028 			break;
1029 		gen = mp->mnt_gen;
1030 
1031 #ifdef DEBUG
1032 		printf("unmounting %p %s (%s)...\n",
1033 		    (void *)mp, mp->mnt_stat.f_mntonname,
1034 		    mp->mnt_stat.f_mntfromname);
1035 #endif
1036 		if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) {
1037 			vfs_unmount_print(mp, "");
1038 			progress = true;
1039 		} else {
1040 			vfs_rele(mp);
1041 			if (verbose) {
1042 				printf("unmount of %s failed with error %d\n",
1043 				    mp->mnt_stat.f_mntonname, error);
1044 			}
1045 			any_error = true;
1046 		}
1047 	}
1048 	if (verbose) {
1049 		printf("unmounting done\n");
1050 	}
1051 	if (any_error && verbose) {
1052 		printf("WARNING: some file systems would not unmount\n");
1053 	}
1054 	return progress;
1055 }
1056 
1057 void
1058 vfs_sync_all(struct lwp *l)
1059 {
1060 	printf("syncing disks... ");
1061 
1062 	/* remove user processes from run queue */
1063 	suspendsched();
1064 	(void)spl0();
1065 
1066 	/* avoid coming back this way again if we panic. */
1067 	doing_shutdown = 1;
1068 
1069 	do_sys_sync(l);
1070 
1071 	/* Wait for sync to finish. */
1072 	if (vfs_syncwait() != 0) {
1073 #if defined(DDB) && defined(DEBUG_HALT_BUSY)
1074 		Debugger();
1075 #endif
1076 		printf("giving up\n");
1077 		return;
1078 	} else
1079 		printf("done\n");
1080 }
1081 
1082 /*
1083  * Sync and unmount file systems before shutting down.
1084  */
1085 void
1086 vfs_shutdown(void)
1087 {
1088 	lwp_t *l = curlwp;
1089 
1090 	vfs_sync_all(l);
1091 
1092 	/*
1093 	 * If we have paniced - do not make the situation potentially
1094 	 * worse by unmounting the file systems.
1095 	 */
1096 	if (panicstr != NULL) {
1097 		return;
1098 	}
1099 
1100 	/* Unmount file systems. */
1101 	vfs_unmountall(l);
1102 }
1103 
1104 /*
1105  * Print a list of supported file system types (used by vfs_mountroot)
1106  */
1107 static void
1108 vfs_print_fstypes(void)
1109 {
1110 	struct vfsops *v;
1111 	int cnt = 0;
1112 
1113 	mutex_enter(&vfs_list_lock);
1114 	LIST_FOREACH(v, &vfs_list, vfs_list)
1115 		++cnt;
1116 	mutex_exit(&vfs_list_lock);
1117 
1118 	if (cnt == 0) {
1119 		printf("WARNING: No file system modules have been loaded.\n");
1120 		return;
1121 	}
1122 
1123 	printf("Supported file systems:");
1124 	mutex_enter(&vfs_list_lock);
1125 	LIST_FOREACH(v, &vfs_list, vfs_list) {
1126 		printf(" %s", v->vfs_name);
1127 	}
1128 	mutex_exit(&vfs_list_lock);
1129 	printf("\n");
1130 }
1131 
1132 /*
1133  * Mount the root file system.  If the operator didn't specify a
1134  * file system to use, try all possible file systems until one
1135  * succeeds.
1136  */
1137 int
1138 vfs_mountroot(void)
1139 {
1140 	struct vfsops *v;
1141 	int error = ENODEV;
1142 
1143 	if (root_device == NULL)
1144 		panic("vfs_mountroot: root device unknown");
1145 
1146 	switch (device_class(root_device)) {
1147 	case DV_IFNET:
1148 		if (rootdev != NODEV)
1149 			panic("vfs_mountroot: rootdev set for DV_IFNET "
1150 			    "(0x%llx -> %llu,%llu)",
1151 			    (unsigned long long)rootdev,
1152 			    (unsigned long long)major(rootdev),
1153 			    (unsigned long long)minor(rootdev));
1154 		break;
1155 
1156 	case DV_DISK:
1157 		if (rootdev == NODEV)
1158 			panic("vfs_mountroot: rootdev not set for DV_DISK");
1159 	        if (bdevvp(rootdev, &rootvp))
1160 	                panic("vfs_mountroot: can't get vnode for rootdev");
1161 		error = VOP_OPEN(rootvp, FREAD, FSCRED);
1162 		if (error) {
1163 			printf("vfs_mountroot: can't open root device\n");
1164 			return (error);
1165 		}
1166 		break;
1167 
1168 	case DV_VIRTUAL:
1169 		break;
1170 
1171 	default:
1172 		printf("%s: inappropriate for root file system\n",
1173 		    device_xname(root_device));
1174 		return (ENODEV);
1175 	}
1176 
1177 	/*
1178 	 * If user specified a root fs type, use it.  Make sure the
1179 	 * specified type exists and has a mount_root()
1180 	 */
1181 	if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) {
1182 		v = vfs_getopsbyname(rootfstype);
1183 		error = EFTYPE;
1184 		if (v != NULL) {
1185 			if (v->vfs_mountroot != NULL) {
1186 				error = (v->vfs_mountroot)();
1187 			}
1188 			v->vfs_refcount--;
1189 		}
1190 		goto done;
1191 	}
1192 
1193 	/*
1194 	 * Try each file system currently configured into the kernel.
1195 	 */
1196 	mutex_enter(&vfs_list_lock);
1197 	LIST_FOREACH(v, &vfs_list, vfs_list) {
1198 		if (v->vfs_mountroot == NULL)
1199 			continue;
1200 #ifdef DEBUG
1201 		aprint_normal("mountroot: trying %s...\n", v->vfs_name);
1202 #endif
1203 		v->vfs_refcount++;
1204 		mutex_exit(&vfs_list_lock);
1205 		error = (*v->vfs_mountroot)();
1206 		mutex_enter(&vfs_list_lock);
1207 		v->vfs_refcount--;
1208 		if (!error) {
1209 			aprint_normal("root file system type: %s\n",
1210 			    v->vfs_name);
1211 			break;
1212 		}
1213 	}
1214 	mutex_exit(&vfs_list_lock);
1215 
1216 	if (v == NULL) {
1217 		vfs_print_fstypes();
1218 		printf("no file system for %s", device_xname(root_device));
1219 		if (device_class(root_device) == DV_DISK)
1220 			printf(" (dev 0x%llx)", (unsigned long long)rootdev);
1221 		printf("\n");
1222 		error = EFTYPE;
1223 	}
1224 
1225 done:
1226 	if (error && device_class(root_device) == DV_DISK) {
1227 		VOP_CLOSE(rootvp, FREAD, FSCRED);
1228 		vrele(rootvp);
1229 	}
1230 	if (error == 0) {
1231 		mount_iterator_t *iter;
1232 		struct mount *mp;
1233 		extern struct cwdinfo cwdi0;
1234 
1235 		mountlist_iterator_init(&iter);
1236 		mp = mountlist_iterator_next(iter);
1237 		KASSERT(mp != NULL);
1238 		mountlist_iterator_destroy(iter);
1239 
1240 		mp->mnt_flag |= MNT_ROOTFS;
1241 		mp->mnt_op->vfs_refcount++;
1242 
1243 		/*
1244 		 * Get the vnode for '/'.  Set cwdi0.cwdi_cdir to
1245 		 * reference it, and donate it the reference grabbed
1246 		 * with VFS_ROOT().
1247 		 */
1248 		error = VFS_ROOT(mp, LK_NONE, &rootvnode);
1249 		if (error)
1250 			panic("cannot find root vnode, error=%d", error);
1251 		cwdi0.cwdi_cdir = rootvnode;
1252 		cwdi0.cwdi_rdir = NULL;
1253 
1254 		/*
1255 		 * Now that root is mounted, we can fixup initproc's CWD
1256 		 * info.  All other processes are kthreads, which merely
1257 		 * share proc0's CWD info.
1258 		 */
1259 		initproc->p_cwdi->cwdi_cdir = rootvnode;
1260 		vref(initproc->p_cwdi->cwdi_cdir);
1261 		initproc->p_cwdi->cwdi_rdir = NULL;
1262 		/*
1263 		 * Enable loading of modules from the filesystem
1264 		 */
1265 		module_load_vfs_init();
1266 
1267 	}
1268 	return (error);
1269 }
1270 
1271 /*
1272  * mount_specific_key_create --
1273  *	Create a key for subsystem mount-specific data.
1274  */
1275 int
1276 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
1277 {
1278 
1279 	return specificdata_key_create(mount_specificdata_domain, keyp, dtor);
1280 }
1281 
1282 /*
1283  * mount_specific_key_delete --
1284  *	Delete a key for subsystem mount-specific data.
1285  */
1286 void
1287 mount_specific_key_delete(specificdata_key_t key)
1288 {
1289 
1290 	specificdata_key_delete(mount_specificdata_domain, key);
1291 }
1292 
1293 /*
1294  * mount_initspecific --
1295  *	Initialize a mount's specificdata container.
1296  */
1297 void
1298 mount_initspecific(struct mount *mp)
1299 {
1300 	int error __diagused;
1301 
1302 	error = specificdata_init(mount_specificdata_domain,
1303 				  &mp->mnt_specdataref);
1304 	KASSERT(error == 0);
1305 }
1306 
1307 /*
1308  * mount_finispecific --
1309  *	Finalize a mount's specificdata container.
1310  */
1311 void
1312 mount_finispecific(struct mount *mp)
1313 {
1314 
1315 	specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
1316 }
1317 
1318 /*
1319  * mount_getspecific --
1320  *	Return mount-specific data corresponding to the specified key.
1321  */
1322 void *
1323 mount_getspecific(struct mount *mp, specificdata_key_t key)
1324 {
1325 
1326 	return specificdata_getspecific(mount_specificdata_domain,
1327 					 &mp->mnt_specdataref, key);
1328 }
1329 
1330 /*
1331  * mount_setspecific --
1332  *	Set mount-specific data corresponding to the specified key.
1333  */
1334 void
1335 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data)
1336 {
1337 
1338 	specificdata_setspecific(mount_specificdata_domain,
1339 				 &mp->mnt_specdataref, key, data);
1340 }
1341 
1342 /*
1343  * Check to see if a filesystem is mounted on a block device.
1344  */
1345 int
1346 vfs_mountedon(vnode_t *vp)
1347 {
1348 	vnode_t *vq;
1349 	int error = 0;
1350 
1351 	if (vp->v_type != VBLK)
1352 		return ENOTBLK;
1353 	if (spec_node_getmountedfs(vp) != NULL)
1354 		return EBUSY;
1355 	if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, &vq) == 0) {
1356 		if (spec_node_getmountedfs(vq) != NULL)
1357 			error = EBUSY;
1358 		vrele(vq);
1359 	}
1360 
1361 	return error;
1362 }
1363 
1364 /*
1365  * Check if a device pointed to by vp is mounted.
1366  *
1367  * Returns:
1368  *   EINVAL	if it's not a disk
1369  *   EBUSY	if it's a disk and mounted
1370  *   0		if it's a disk and not mounted
1371  */
1372 int
1373 rawdev_mounted(vnode_t *vp, vnode_t **bvpp)
1374 {
1375 	vnode_t *bvp;
1376 	dev_t dev;
1377 	int d_type;
1378 
1379 	bvp = NULL;
1380 	d_type = D_OTHER;
1381 
1382 	if (iskmemvp(vp))
1383 		return EINVAL;
1384 
1385 	switch (vp->v_type) {
1386 	case VCHR: {
1387 		const struct cdevsw *cdev;
1388 
1389 		dev = vp->v_rdev;
1390 		cdev = cdevsw_lookup(dev);
1391 		if (cdev != NULL) {
1392 			dev_t blkdev;
1393 
1394 			blkdev = devsw_chr2blk(dev);
1395 			if (blkdev != NODEV) {
1396 				if (vfinddev(blkdev, VBLK, &bvp) != 0) {
1397 					d_type = (cdev->d_flag & D_TYPEMASK);
1398 					/* XXX: what if bvp disappears? */
1399 					vrele(bvp);
1400 				}
1401 			}
1402 		}
1403 
1404 		break;
1405 		}
1406 
1407 	case VBLK: {
1408 		const struct bdevsw *bdev;
1409 
1410 		dev = vp->v_rdev;
1411 		bdev = bdevsw_lookup(dev);
1412 		if (bdev != NULL)
1413 			d_type = (bdev->d_flag & D_TYPEMASK);
1414 
1415 		bvp = vp;
1416 
1417 		break;
1418 		}
1419 
1420 	default:
1421 		break;
1422 	}
1423 
1424 	if (d_type != D_DISK)
1425 		return EINVAL;
1426 
1427 	if (bvpp != NULL)
1428 		*bvpp = bvp;
1429 
1430 	/*
1431 	 * XXX: This is bogus. We should be failing the request
1432 	 * XXX: not only if this specific slice is mounted, but
1433 	 * XXX: if it's on a disk with any other mounted slice.
1434 	 */
1435 	if (vfs_mountedon(bvp))
1436 		return EBUSY;
1437 
1438 	return 0;
1439 }
1440 
1441 /*
1442  * Make a 'unique' number from a mount type name.
1443  */
1444 long
1445 makefstype(const char *type)
1446 {
1447 	long rv;
1448 
1449 	for (rv = 0; *type; type++) {
1450 		rv <<= 2;
1451 		rv ^= *type;
1452 	}
1453 	return rv;
1454 }
1455 
1456 static struct mountlist_entry *
1457 mountlist_alloc(enum mountlist_type type, struct mount *mp)
1458 {
1459 	struct mountlist_entry *me;
1460 
1461 	me = kmem_zalloc(sizeof(*me), KM_SLEEP);
1462 	me->me_mount = mp;
1463 	me->me_type = type;
1464 
1465 	return me;
1466 }
1467 
1468 static void
1469 mountlist_free(struct mountlist_entry *me)
1470 {
1471 
1472 	kmem_free(me, sizeof(*me));
1473 }
1474 
1475 void
1476 mountlist_iterator_init(mount_iterator_t **mip)
1477 {
1478 	struct mountlist_entry *me;
1479 
1480 	me = mountlist_alloc(ME_MARKER, NULL);
1481 	mutex_enter(&mountlist_lock);
1482 	TAILQ_INSERT_HEAD(&mountlist, me, me_list);
1483 	mutex_exit(&mountlist_lock);
1484 	*mip = (mount_iterator_t *)me;
1485 }
1486 
1487 void
1488 mountlist_iterator_destroy(mount_iterator_t *mi)
1489 {
1490 	struct mountlist_entry *marker = &mi->mi_entry;
1491 
1492 	if (marker->me_mount != NULL)
1493 		vfs_unbusy(marker->me_mount);
1494 
1495 	mutex_enter(&mountlist_lock);
1496 	TAILQ_REMOVE(&mountlist, marker, me_list);
1497 	mutex_exit(&mountlist_lock);
1498 
1499 	mountlist_free(marker);
1500 
1501 }
1502 
1503 /*
1504  * Return the next mount or NULL for this iterator.
1505  * Mark it busy on success.
1506  */
1507 static inline struct mount *
1508 _mountlist_iterator_next(mount_iterator_t *mi, bool wait)
1509 {
1510 	struct mountlist_entry *me, *marker = &mi->mi_entry;
1511 	struct mount *mp;
1512 	int error;
1513 
1514 	if (marker->me_mount != NULL) {
1515 		vfs_unbusy(marker->me_mount);
1516 		marker->me_mount = NULL;
1517 	}
1518 
1519 	mutex_enter(&mountlist_lock);
1520 	for (;;) {
1521 		KASSERT(marker->me_type == ME_MARKER);
1522 
1523 		me = TAILQ_NEXT(marker, me_list);
1524 		if (me == NULL) {
1525 			/* End of list: keep marker and return. */
1526 			mutex_exit(&mountlist_lock);
1527 			return NULL;
1528 		}
1529 		TAILQ_REMOVE(&mountlist, marker, me_list);
1530 		TAILQ_INSERT_AFTER(&mountlist, me, marker, me_list);
1531 
1532 		/* Skip other markers. */
1533 		if (me->me_type != ME_MOUNT)
1534 			continue;
1535 
1536 		/* Take an initial reference for vfs_busy() below. */
1537 		mp = me->me_mount;
1538 		KASSERT(mp != NULL);
1539 		vfs_ref(mp);
1540 		mutex_exit(&mountlist_lock);
1541 
1542 		/* Try to mark this mount busy and return on success. */
1543 		if (wait)
1544 			error = vfs_busy(mp);
1545 		else
1546 			error = vfs_trybusy(mp);
1547 		if (error == 0) {
1548 			vfs_rele(mp);
1549 			marker->me_mount = mp;
1550 			return mp;
1551 		}
1552 		vfs_rele(mp);
1553 		mutex_enter(&mountlist_lock);
1554 	}
1555 }
1556 
1557 struct mount *
1558 mountlist_iterator_next(mount_iterator_t *mi)
1559 {
1560 
1561 	return _mountlist_iterator_next(mi, true);
1562 }
1563 
1564 struct mount *
1565 mountlist_iterator_trynext(mount_iterator_t *mi)
1566 {
1567 
1568 	return _mountlist_iterator_next(mi, false);
1569 }
1570 
1571 /*
1572  * Attach new mount to the end of the mount list.
1573  */
1574 void
1575 mountlist_append(struct mount *mp)
1576 {
1577 	struct mountlist_entry *me;
1578 
1579 	me = mountlist_alloc(ME_MOUNT, mp);
1580 	mutex_enter(&mountlist_lock);
1581 	TAILQ_INSERT_TAIL(&mountlist, me, me_list);
1582 	mutex_exit(&mountlist_lock);
1583 }
1584 
1585 /*
1586  * Remove mount from mount list.
1587  */void
1588 mountlist_remove(struct mount *mp)
1589 {
1590 	struct mountlist_entry *me;
1591 
1592 	mutex_enter(&mountlist_lock);
1593 	TAILQ_FOREACH(me, &mountlist, me_list)
1594 		if (me->me_type == ME_MOUNT && me->me_mount == mp)
1595 			break;
1596 	KASSERT(me != NULL);
1597 	TAILQ_REMOVE(&mountlist, me, me_list);
1598 	mutex_exit(&mountlist_lock);
1599 	mountlist_free(me);
1600 }
1601 
1602 /*
1603  * Unlocked variant to traverse the mountlist.
1604  * To be used from DDB only.
1605  */
1606 struct mount *
1607 _mountlist_next(struct mount *mp)
1608 {
1609 	struct mountlist_entry *me;
1610 
1611 	if (mp == NULL) {
1612 		me = TAILQ_FIRST(&mountlist);
1613 	} else {
1614 		TAILQ_FOREACH(me, &mountlist, me_list)
1615 			if (me->me_type == ME_MOUNT && me->me_mount == mp)
1616 				break;
1617 		if (me != NULL)
1618 			me = TAILQ_NEXT(me, me_list);
1619 	}
1620 
1621 	while (me != NULL && me->me_type != ME_MOUNT)
1622 		me = TAILQ_NEXT(me, me_list);
1623 
1624 	return (me ? me->me_mount : NULL);
1625 }
1626