xref: /onnv-gate/usr/src/uts/common/fs/lofs/lofs_vfsops.c (revision 1748:40fcbc933441)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51488Srsb  * Common Development and Distribution License (the "License").
61488Srsb  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
221488Srsb  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate #include <sys/param.h>
290Sstevel@tonic-gate #include <sys/errno.h>
300Sstevel@tonic-gate #include <sys/vfs.h>
310Sstevel@tonic-gate #include <sys/vnode.h>
320Sstevel@tonic-gate #include <sys/uio.h>
330Sstevel@tonic-gate #include <sys/pathname.h>
340Sstevel@tonic-gate #include <sys/kmem.h>
350Sstevel@tonic-gate #include <sys/cred.h>
360Sstevel@tonic-gate #include <sys/statvfs.h>
370Sstevel@tonic-gate #include <sys/fs/lofs_info.h>
380Sstevel@tonic-gate #include <sys/fs/lofs_node.h>
390Sstevel@tonic-gate #include <sys/mount.h>
400Sstevel@tonic-gate #include <sys/mntent.h>
410Sstevel@tonic-gate #include <sys/mkdev.h>
421676Sjpk #include <sys/priv.h>
430Sstevel@tonic-gate #include <sys/sysmacros.h>
440Sstevel@tonic-gate #include <sys/systm.h>
450Sstevel@tonic-gate #include <sys/cmn_err.h>
460Sstevel@tonic-gate #include <sys/policy.h>
471676Sjpk #include <sys/tsol/label.h>
480Sstevel@tonic-gate #include "fs/fs_subr.h"
490Sstevel@tonic-gate 
500Sstevel@tonic-gate /*
510Sstevel@tonic-gate  * This is the loadable module wrapper.
520Sstevel@tonic-gate  */
530Sstevel@tonic-gate #include <sys/modctl.h>
540Sstevel@tonic-gate 
550Sstevel@tonic-gate static mntopts_t lofs_mntopts;
560Sstevel@tonic-gate 
570Sstevel@tonic-gate static int lofsinit(int, char *);
580Sstevel@tonic-gate 
590Sstevel@tonic-gate static vfsdef_t vfw = {
600Sstevel@tonic-gate 	VFSDEF_VERSION,
610Sstevel@tonic-gate 	"lofs",
620Sstevel@tonic-gate 	lofsinit,
631488Srsb 	VSW_HASPROTO|VSW_STATS,
640Sstevel@tonic-gate 	&lofs_mntopts
650Sstevel@tonic-gate };
660Sstevel@tonic-gate 
670Sstevel@tonic-gate /*
680Sstevel@tonic-gate  * Stuff needed to support "zonedevfs" mode.
690Sstevel@tonic-gate  */
700Sstevel@tonic-gate static major_t lofs_major;
710Sstevel@tonic-gate static minor_t lofs_minor;
720Sstevel@tonic-gate static kmutex_t lofs_minor_lock;
730Sstevel@tonic-gate 
740Sstevel@tonic-gate /*
750Sstevel@tonic-gate  * LOFS mount options table
760Sstevel@tonic-gate  */
770Sstevel@tonic-gate static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
780Sstevel@tonic-gate static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
790Sstevel@tonic-gate static char *zonedevfs_cancel[] = { MNTOPT_LOFS_NOZONEDEVFS, NULL };
800Sstevel@tonic-gate static char *nozonedevfs_cancel[] = { MNTOPT_LOFS_ZONEDEVFS, NULL };
810Sstevel@tonic-gate static char *sub_cancel[] = { MNTOPT_LOFS_NOSUB, NULL };
820Sstevel@tonic-gate static char *nosub_cancel[] = { MNTOPT_LOFS_SUB, NULL };
830Sstevel@tonic-gate 
840Sstevel@tonic-gate static mntopt_t mntopts[] = {
850Sstevel@tonic-gate /*
860Sstevel@tonic-gate  *	option name		cancel option	default arg	flags
870Sstevel@tonic-gate  *		private data
880Sstevel@tonic-gate  */
890Sstevel@tonic-gate 	{ MNTOPT_XATTR,		xattr_cancel,	NULL,		0,
900Sstevel@tonic-gate 		(void *)0 },
910Sstevel@tonic-gate 	{ MNTOPT_NOXATTR,	noxattr_cancel,	NULL,		0,
920Sstevel@tonic-gate 		(void *)0 },
930Sstevel@tonic-gate 	{ MNTOPT_LOFS_ZONEDEVFS,	zonedevfs_cancel,	NULL,	0,
940Sstevel@tonic-gate 		(void *)0 },
950Sstevel@tonic-gate 	{ MNTOPT_LOFS_NOZONEDEVFS,	nozonedevfs_cancel,	NULL,	0,
960Sstevel@tonic-gate 		(void *)0 },
970Sstevel@tonic-gate 	{ MNTOPT_LOFS_SUB,	sub_cancel,	NULL,		0,
980Sstevel@tonic-gate 		(void *)0 },
990Sstevel@tonic-gate 	{ MNTOPT_LOFS_NOSUB,	nosub_cancel,	NULL,		0,
1000Sstevel@tonic-gate 		(void *)0 },
1010Sstevel@tonic-gate };
1020Sstevel@tonic-gate 
1030Sstevel@tonic-gate static mntopts_t lofs_mntopts = {
1040Sstevel@tonic-gate 	sizeof (mntopts) / sizeof (mntopt_t),
1050Sstevel@tonic-gate 	mntopts
1060Sstevel@tonic-gate };
1070Sstevel@tonic-gate 
1080Sstevel@tonic-gate /*
1090Sstevel@tonic-gate  * Module linkage information for the kernel.
1100Sstevel@tonic-gate  */
1110Sstevel@tonic-gate 
1120Sstevel@tonic-gate static struct modlfs modlfs = {
1130Sstevel@tonic-gate 	&mod_fsops, "filesystem for lofs", &vfw
1140Sstevel@tonic-gate };
1150Sstevel@tonic-gate 
1160Sstevel@tonic-gate static struct modlinkage modlinkage = {
1170Sstevel@tonic-gate 	MODREV_1, (void *)&modlfs, NULL
1180Sstevel@tonic-gate };
1190Sstevel@tonic-gate 
1200Sstevel@tonic-gate /*
1210Sstevel@tonic-gate  * This is the module initialization routine.
1220Sstevel@tonic-gate  */
1231676Sjpk 
1240Sstevel@tonic-gate int
1251676Sjpk _init(void)
1260Sstevel@tonic-gate {
1270Sstevel@tonic-gate 	int status;
1280Sstevel@tonic-gate 
1290Sstevel@tonic-gate 	lofs_subrinit();
1300Sstevel@tonic-gate 	status = mod_install(&modlinkage);
1310Sstevel@tonic-gate 	if (status != 0) {
1320Sstevel@tonic-gate 		/*
1330Sstevel@tonic-gate 		 * Cleanup previously initialized work.
1340Sstevel@tonic-gate 		 */
1350Sstevel@tonic-gate 		lofs_subrfini();
1360Sstevel@tonic-gate 	}
1370Sstevel@tonic-gate 
1380Sstevel@tonic-gate 	return (status);
1390Sstevel@tonic-gate }
1400Sstevel@tonic-gate 
1410Sstevel@tonic-gate /*
1420Sstevel@tonic-gate  * Don't allow the lofs module to be unloaded for now.
1430Sstevel@tonic-gate  * There is a memory leak if it gets unloaded.
1440Sstevel@tonic-gate  */
1451676Sjpk 
1460Sstevel@tonic-gate int
1471676Sjpk _fini(void)
1480Sstevel@tonic-gate {
1490Sstevel@tonic-gate 	return (EBUSY);
1500Sstevel@tonic-gate }
1510Sstevel@tonic-gate 
1520Sstevel@tonic-gate int
1530Sstevel@tonic-gate _info(struct modinfo *modinfop)
1540Sstevel@tonic-gate {
1550Sstevel@tonic-gate 	return (mod_info(&modlinkage, modinfop));
1560Sstevel@tonic-gate }
1570Sstevel@tonic-gate 
1580Sstevel@tonic-gate 
1590Sstevel@tonic-gate static int lofsfstype;
1600Sstevel@tonic-gate vfsops_t *lo_vfsops;
1610Sstevel@tonic-gate 
1620Sstevel@tonic-gate /*
1630Sstevel@tonic-gate  * lo mount vfsop
1640Sstevel@tonic-gate  * Set up mount info record and attach it to vfs struct.
1650Sstevel@tonic-gate  */
1660Sstevel@tonic-gate /*ARGSUSED*/
1670Sstevel@tonic-gate static int
1680Sstevel@tonic-gate lo_mount(struct vfs *vfsp,
1690Sstevel@tonic-gate 	struct vnode *vp,
1700Sstevel@tonic-gate 	struct mounta *uap,
1710Sstevel@tonic-gate 	struct cred *cr)
1720Sstevel@tonic-gate {
1730Sstevel@tonic-gate 	int error;
1740Sstevel@tonic-gate 	struct vnode *srootvp = NULL;	/* the server's root */
1750Sstevel@tonic-gate 	struct vnode *realrootvp;
1760Sstevel@tonic-gate 	struct loinfo *li;
1770Sstevel@tonic-gate 	int is_zonedevfs = 0;
1780Sstevel@tonic-gate 	int nodev;
1790Sstevel@tonic-gate 
1800Sstevel@tonic-gate 	nodev = vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL);
1810Sstevel@tonic-gate 
1820Sstevel@tonic-gate 	if ((error = secpolicy_fs_mount(cr, vp, vfsp)) != 0)
1830Sstevel@tonic-gate 		return (EPERM);
1840Sstevel@tonic-gate 
1850Sstevel@tonic-gate 	/*
1860Sstevel@tonic-gate 	 * Loopback devices which get "nodevices" added can be done without
1870Sstevel@tonic-gate 	 * "nodevices" set because we cannot import devices into a zone
1880Sstevel@tonic-gate 	 * with loopback.  Note that we have all zone privileges when
1890Sstevel@tonic-gate 	 * this happens; if not, we'd have gotten "nosuid".
1900Sstevel@tonic-gate 	 */
1910Sstevel@tonic-gate 	if (!nodev && vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
1920Sstevel@tonic-gate 		vfs_setmntopt(vfsp, MNTOPT_DEVICES, NULL, VFS_NODISPLAY);
1930Sstevel@tonic-gate 
1940Sstevel@tonic-gate 	/*
1950Sstevel@tonic-gate 	 * We must ensure that only the global zone applies the 'zonedevfs'
1960Sstevel@tonic-gate 	 * option; we don't want non-global zones to be able to establish
1970Sstevel@tonic-gate 	 * lofs mounts using the special dev_t we use to ensure that the
1980Sstevel@tonic-gate 	 * contents of a zone's /dev cannot be victim to link(2) or rename(2).
1990Sstevel@tonic-gate 	 * See below, where we set all of this up.
2000Sstevel@tonic-gate 	 *
2010Sstevel@tonic-gate 	 * Since this is more like a privilege check, we use crgetzoneid(cr)
2020Sstevel@tonic-gate 	 * instead of getzoneid().
2030Sstevel@tonic-gate 	 */
2040Sstevel@tonic-gate 	is_zonedevfs = vfs_optionisset(vfsp, MNTOPT_LOFS_ZONEDEVFS, NULL);
2050Sstevel@tonic-gate 	if (crgetzoneid(cr) != GLOBAL_ZONEID && is_zonedevfs)
2060Sstevel@tonic-gate 		return (EPERM);
2070Sstevel@tonic-gate 
2080Sstevel@tonic-gate 	mutex_enter(&vp->v_lock);
2090Sstevel@tonic-gate 	if (!(uap->flags & MS_OVERLAY) &&
2101676Sjpk 	    (vp->v_count != 1 || (vp->v_flag & VROOT))) {
2110Sstevel@tonic-gate 		mutex_exit(&vp->v_lock);
2120Sstevel@tonic-gate 		return (EBUSY);
2130Sstevel@tonic-gate 	}
2140Sstevel@tonic-gate 	mutex_exit(&vp->v_lock);
2150Sstevel@tonic-gate 
2160Sstevel@tonic-gate 	/*
2170Sstevel@tonic-gate 	 * Find real root, and make vfs point to real vfs
2180Sstevel@tonic-gate 	 */
2190Sstevel@tonic-gate 	if (error = lookupname(uap->spec, (uap->flags & MS_SYSSPACE) ?
2200Sstevel@tonic-gate 		UIO_SYSSPACE : UIO_USERSPACE, FOLLOW, NULLVPP,
2210Sstevel@tonic-gate 	    &realrootvp))
2220Sstevel@tonic-gate 		return (error);
2230Sstevel@tonic-gate 
2240Sstevel@tonic-gate 	/*
2251676Sjpk 	 * Enforce MAC policy if needed.
2261676Sjpk 	 *
2271676Sjpk 	 * Loopback mounts must not allow writing up. The dominance test
2281676Sjpk 	 * is intended to prevent a global zone caller from accidentally
2291676Sjpk 	 * creating write-up conditions between two labeled zones.
2301676Sjpk 	 * Local zones can't violate MAC on their own without help from
2311676Sjpk 	 * the global zone because they can't name a pathname that
2321676Sjpk 	 * they don't already have.
2331676Sjpk 	 *
2341676Sjpk 	 * The special case check for the NET_MAC_AWARE process flag is
2351676Sjpk 	 * to support the case of the automounter in the global zone. We
2361676Sjpk 	 * permit automounting of local zone directories such as home
2371676Sjpk 	 * directories, into the global zone as required by setlabel,
2381676Sjpk 	 * zonecopy, and saving of desktop sessions. Such mounts are
2391676Sjpk 	 * trusted not to expose the contents of one zone's directories
2401676Sjpk 	 * to another by leaking them through the global zone.
2411676Sjpk 	 */
2421676Sjpk 	if (is_system_labeled() && crgetzoneid(cr) == GLOBAL_ZONEID) {
243*1748Srica 		char	specname[MAXPATHLEN];
244*1748Srica 		zone_t	*from_zptr;
245*1748Srica 		zone_t	*to_zptr;
2461676Sjpk 
247*1748Srica 		if (vnodetopath(NULL, realrootvp, specname,
248*1748Srica 		    sizeof (specname), CRED()) != 0)
249*1748Srica 			return (EACCES);
250*1748Srica 
2511676Sjpk 		from_zptr = zone_find_by_path(specname);
2521676Sjpk 		to_zptr = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
2531676Sjpk 
2541676Sjpk 		/*
2551676Sjpk 		 * Special case for zone devfs: the zone for /dev will
2561676Sjpk 		 * incorrectly appear as the global zone since it's not
2571676Sjpk 		 * under the zone rootpath.  So for zone devfs check allow
2581676Sjpk 		 * read-write mounts.
2591676Sjpk 		 */
2601676Sjpk 
2611676Sjpk 		if (from_zptr != to_zptr && !is_zonedevfs) {
2621676Sjpk 			/*
2631676Sjpk 			 * We know at this point that the labels aren't equal
2641676Sjpk 			 * because the zone pointers aren't equal, and zones
2651676Sjpk 			 * can't share a label.
2661676Sjpk 			 *
2671676Sjpk 			 * If the source is the global zone then making
2681676Sjpk 			 * it available to a local zone must be done in
2691676Sjpk 			 * read-only mode as the label will become admin_low.
2701676Sjpk 			 *
2711676Sjpk 			 * If it is a mount between local zones then if
2721676Sjpk 			 * the current process is in the global zone and has
2731676Sjpk 			 * the NET_MAC_AWARE flag, then regular read-write
2741676Sjpk 			 * access is allowed.  If it's in some other zone, but
2751676Sjpk 			 * the label on the mount point dominates the original
2761676Sjpk 			 * source, then allow the mount as read-only
2771676Sjpk 			 * ("read-down").
2781676Sjpk 			 */
2791676Sjpk 			if (from_zptr->zone_id == GLOBAL_ZONEID) {
2801676Sjpk 				/* make the mount read-only */
2811676Sjpk 				vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
2821676Sjpk 			} else { /* cross-zone mount */
2831676Sjpk 				if (to_zptr->zone_id == GLOBAL_ZONEID &&
2841676Sjpk 				    /* LINTED: no consequent */
2851676Sjpk 				    getpflags(NET_MAC_AWARE, cr) != 0) {
2861676Sjpk 					/* Allow the mount as read-write */
2871676Sjpk 				} else if (bldominates(
2881676Sjpk 				    label2bslabel(to_zptr->zone_slabel),
2891676Sjpk 				    label2bslabel(from_zptr->zone_slabel))) {
2901676Sjpk 					/* make the mount read-only */
2911676Sjpk 					vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
2921676Sjpk 				} else {
2931676Sjpk 					zone_rele(to_zptr);
2941676Sjpk 					zone_rele(from_zptr);
2951676Sjpk 					return (EACCES);
2961676Sjpk 				}
2971676Sjpk 			}
2981676Sjpk 		}
2991676Sjpk 		zone_rele(to_zptr);
3001676Sjpk 		zone_rele(from_zptr);
3011676Sjpk 	}
3021676Sjpk 
3031676Sjpk 	/*
3040Sstevel@tonic-gate 	 * realrootvp may be an AUTOFS node, in which case we
3050Sstevel@tonic-gate 	 * perform a VOP_ACCESS() to trigger the mount of the
3060Sstevel@tonic-gate 	 * intended filesystem, so we loopback mount the intended
3070Sstevel@tonic-gate 	 * filesystem instead of the AUTOFS filesystem.
3080Sstevel@tonic-gate 	 */
3090Sstevel@tonic-gate 	(void) VOP_ACCESS(realrootvp, 0, 0, cr);
3100Sstevel@tonic-gate 
3110Sstevel@tonic-gate 	/*
3120Sstevel@tonic-gate 	 * We're interested in the top most filesystem.
3130Sstevel@tonic-gate 	 * This is specially important when uap->spec is a trigger
3140Sstevel@tonic-gate 	 * AUTOFS node, since we're really interested in mounting the
3150Sstevel@tonic-gate 	 * filesystem AUTOFS mounted as result of the VOP_ACCESS()
3160Sstevel@tonic-gate 	 * call not the AUTOFS node itself.
3170Sstevel@tonic-gate 	 */
3180Sstevel@tonic-gate 	if (vn_mountedvfs(realrootvp) != NULL) {
3190Sstevel@tonic-gate 		if (error = traverse(&realrootvp)) {
3200Sstevel@tonic-gate 			VN_RELE(realrootvp);
3210Sstevel@tonic-gate 			return (error);
3220Sstevel@tonic-gate 		}
3230Sstevel@tonic-gate 	}
3240Sstevel@tonic-gate 
3250Sstevel@tonic-gate 	/*
3260Sstevel@tonic-gate 	 * Allocate a vfs info struct and attach it
3270Sstevel@tonic-gate 	 */
3280Sstevel@tonic-gate 	li = kmem_zalloc(sizeof (struct loinfo), KM_SLEEP);
3290Sstevel@tonic-gate 	li->li_realvfs = realrootvp->v_vfsp;
3300Sstevel@tonic-gate 	li->li_mountvfs = vfsp;
3310Sstevel@tonic-gate 
3320Sstevel@tonic-gate 	/*
3330Sstevel@tonic-gate 	 * Set mount flags to be inherited by loopback vfs's
3340Sstevel@tonic-gate 	 */
3350Sstevel@tonic-gate 	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
3360Sstevel@tonic-gate 		li->li_mflag |= VFS_RDONLY;
3370Sstevel@tonic-gate 	}
3380Sstevel@tonic-gate 	if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
3390Sstevel@tonic-gate 		li->li_mflag |= (VFS_NOSETUID|VFS_NODEVICES);
3400Sstevel@tonic-gate 	}
3410Sstevel@tonic-gate 	if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) {
3420Sstevel@tonic-gate 		li->li_mflag |= VFS_NODEVICES;
3430Sstevel@tonic-gate 	}
3440Sstevel@tonic-gate 	if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
3450Sstevel@tonic-gate 		li->li_mflag |= VFS_NOSETUID;
3460Sstevel@tonic-gate 	}
3470Sstevel@tonic-gate 	/*
3480Sstevel@tonic-gate 	 * Permissive flags are added to the "deny" bitmap.
3490Sstevel@tonic-gate 	 */
3500Sstevel@tonic-gate 	if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
3510Sstevel@tonic-gate 		li->li_dflag |= VFS_XATTR;
3520Sstevel@tonic-gate 	}
3530Sstevel@tonic-gate 	if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
3540Sstevel@tonic-gate 		li->li_dflag |= VFS_NBMAND;
3550Sstevel@tonic-gate 	}
3560Sstevel@tonic-gate 
3570Sstevel@tonic-gate 	/*
3580Sstevel@tonic-gate 	 * Propagate inheritable mount flags from the real vfs.
3590Sstevel@tonic-gate 	 */
3600Sstevel@tonic-gate 	if ((li->li_realvfs->vfs_flag & VFS_RDONLY) &&
3610Sstevel@tonic-gate 	    !vfs_optionisset(vfsp, MNTOPT_RO, NULL))
3620Sstevel@tonic-gate 		vfs_setmntopt(vfsp, MNTOPT_RO, NULL,
3630Sstevel@tonic-gate 		    VFS_NODISPLAY);
3640Sstevel@tonic-gate 	if ((li->li_realvfs->vfs_flag & VFS_NOSETUID) &&
3650Sstevel@tonic-gate 	    !vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
3660Sstevel@tonic-gate 		vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL,
3670Sstevel@tonic-gate 		    VFS_NODISPLAY);
3680Sstevel@tonic-gate 	if ((li->li_realvfs->vfs_flag & VFS_NODEVICES) &&
3690Sstevel@tonic-gate 	    !vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
3700Sstevel@tonic-gate 		vfs_setmntopt(vfsp, MNTOPT_NODEVICES, NULL,
3710Sstevel@tonic-gate 		    VFS_NODISPLAY);
3720Sstevel@tonic-gate 	/*
3730Sstevel@tonic-gate 	 * Permissive flags such as VFS_XATTR, as opposed to restrictive flags
3740Sstevel@tonic-gate 	 * such as VFS_RDONLY, are handled differently.  An explicit
3750Sstevel@tonic-gate 	 * MNTOPT_NOXATTR should override the underlying filesystem's VFS_XATTR.
3760Sstevel@tonic-gate 	 */
3770Sstevel@tonic-gate 	if ((li->li_realvfs->vfs_flag & VFS_XATTR) &&
3780Sstevel@tonic-gate 	    !vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL) &&
3790Sstevel@tonic-gate 	    !vfs_optionisset(vfsp, MNTOPT_XATTR, NULL))
3800Sstevel@tonic-gate 		vfs_setmntopt(vfsp, MNTOPT_XATTR, NULL,
3810Sstevel@tonic-gate 		    VFS_NODISPLAY);
3820Sstevel@tonic-gate 	if ((li->li_realvfs->vfs_flag & VFS_NBMAND) &&
3830Sstevel@tonic-gate 	    !vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL) &&
3840Sstevel@tonic-gate 	    !vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL))
3850Sstevel@tonic-gate 		vfs_setmntopt(vfsp, MNTOPT_NBMAND, NULL,
3860Sstevel@tonic-gate 		    VFS_NODISPLAY);
3870Sstevel@tonic-gate 
3880Sstevel@tonic-gate 	li->li_refct = 0;
3890Sstevel@tonic-gate 	vfsp->vfs_data = (caddr_t)li;
3900Sstevel@tonic-gate 	vfsp->vfs_bcount = 0;
3910Sstevel@tonic-gate 	vfsp->vfs_fstype = lofsfstype;
3920Sstevel@tonic-gate 	vfsp->vfs_bsize = li->li_realvfs->vfs_bsize;
3930Sstevel@tonic-gate 
3940Sstevel@tonic-gate 	/*
3950Sstevel@tonic-gate 	 * Test to see if we need to be in "zone /dev" mode.  In zonedevfs
3960Sstevel@tonic-gate 	 * mode, we pull a nasty trick; we make sure that the lofs dev_t does
3970Sstevel@tonic-gate 	 * *not* reflect the underlying device, so that no renames or links
3980Sstevel@tonic-gate 	 * can occur to or from the /dev hierarchy.
3990Sstevel@tonic-gate 	 */
4000Sstevel@tonic-gate 	if (is_zonedevfs) {
4010Sstevel@tonic-gate 		dev_t dev;
4020Sstevel@tonic-gate 
4030Sstevel@tonic-gate 		mutex_enter(&lofs_minor_lock);
4040Sstevel@tonic-gate 		do {
4050Sstevel@tonic-gate 			lofs_minor = (lofs_minor + 1) & MAXMIN32;
4060Sstevel@tonic-gate 			dev = makedevice(lofs_major, lofs_minor);
4070Sstevel@tonic-gate 		} while (vfs_devismounted(dev));
4080Sstevel@tonic-gate 		mutex_exit(&lofs_minor_lock);
4090Sstevel@tonic-gate 
4100Sstevel@tonic-gate 		vfsp->vfs_dev = dev;
4110Sstevel@tonic-gate 		vfs_make_fsid(&vfsp->vfs_fsid, dev, lofsfstype);
4120Sstevel@tonic-gate 
4130Sstevel@tonic-gate 		li->li_flag |= LO_ZONEDEVFS;
4140Sstevel@tonic-gate 	} else {
4150Sstevel@tonic-gate 		vfsp->vfs_dev = li->li_realvfs->vfs_dev;
4160Sstevel@tonic-gate 		vfsp->vfs_fsid.val[0] = li->li_realvfs->vfs_fsid.val[0];
4170Sstevel@tonic-gate 		vfsp->vfs_fsid.val[1] = li->li_realvfs->vfs_fsid.val[1];
4180Sstevel@tonic-gate 	}
4190Sstevel@tonic-gate 
4200Sstevel@tonic-gate 	if (vfs_optionisset(vfsp, MNTOPT_LOFS_NOSUB, NULL)) {
4210Sstevel@tonic-gate 		li->li_flag |= LO_NOSUB;
4220Sstevel@tonic-gate 	}
4230Sstevel@tonic-gate 
4240Sstevel@tonic-gate 	/*
4250Sstevel@tonic-gate 	 * Setup the hashtable. If the root of this mount isn't a directory,
4260Sstevel@tonic-gate 	 * there's no point in allocating a large hashtable. A table with one
4270Sstevel@tonic-gate 	 * bucket is sufficient.
4280Sstevel@tonic-gate 	 */
4290Sstevel@tonic-gate 	if (realrootvp->v_type != VDIR)
4300Sstevel@tonic-gate 		lsetup(li, 1);
4310Sstevel@tonic-gate 	else
4320Sstevel@tonic-gate 		lsetup(li, 0);
4330Sstevel@tonic-gate 
4340Sstevel@tonic-gate 	/*
4350Sstevel@tonic-gate 	 * Make the root vnode
4360Sstevel@tonic-gate 	 */
437324Sowenr 	srootvp = makelonode(realrootvp, li, 0);
4380Sstevel@tonic-gate 	srootvp->v_flag |= VROOT;
4390Sstevel@tonic-gate 	li->li_rootvp = srootvp;
4400Sstevel@tonic-gate 
4410Sstevel@tonic-gate #ifdef LODEBUG
4420Sstevel@tonic-gate 	lo_dprint(4, "lo_mount: vfs %p realvfs %p root %p realroot %p li %p\n",
4430Sstevel@tonic-gate 	    vfsp, li->li_realvfs, srootvp, realrootvp, li);
4440Sstevel@tonic-gate #endif
4450Sstevel@tonic-gate 	return (0);
4460Sstevel@tonic-gate }
4470Sstevel@tonic-gate 
4480Sstevel@tonic-gate /*
4490Sstevel@tonic-gate  * Undo loopback mount
4500Sstevel@tonic-gate  */
4510Sstevel@tonic-gate static int
4520Sstevel@tonic-gate lo_unmount(struct vfs *vfsp, int flag, struct cred *cr)
4530Sstevel@tonic-gate {
4540Sstevel@tonic-gate 	struct loinfo *li;
4550Sstevel@tonic-gate 
4560Sstevel@tonic-gate 	if (secpolicy_fs_unmount(cr, vfsp) != 0)
4570Sstevel@tonic-gate 		return (EPERM);
4580Sstevel@tonic-gate 
4590Sstevel@tonic-gate 	/*
4600Sstevel@tonic-gate 	 * Forced unmount is not supported by this file system
4610Sstevel@tonic-gate 	 * and thus, ENOTSUP, is being returned.
4620Sstevel@tonic-gate 	 */
4630Sstevel@tonic-gate 	if (flag & MS_FORCE)
4640Sstevel@tonic-gate 		return (ENOTSUP);
4650Sstevel@tonic-gate 
4660Sstevel@tonic-gate 	li = vtoli(vfsp);
4670Sstevel@tonic-gate #ifdef LODEBUG
4680Sstevel@tonic-gate 	lo_dprint(4, "lo_unmount(%p) li %p\n", vfsp, li);
4690Sstevel@tonic-gate #endif
4700Sstevel@tonic-gate 	if (li->li_refct != 1 || li->li_rootvp->v_count != 1) {
4710Sstevel@tonic-gate #ifdef LODEBUG
4720Sstevel@tonic-gate 		lo_dprint(4, "refct %d v_ct %d\n", li->li_refct,
4730Sstevel@tonic-gate 		    li->li_rootvp->v_count);
4740Sstevel@tonic-gate #endif
4750Sstevel@tonic-gate 		return (EBUSY);
4760Sstevel@tonic-gate 	}
4770Sstevel@tonic-gate 	VN_RELE(li->li_rootvp);
4780Sstevel@tonic-gate 	return (0);
4790Sstevel@tonic-gate }
4800Sstevel@tonic-gate 
4810Sstevel@tonic-gate /*
4820Sstevel@tonic-gate  * Find root of lofs mount.
4830Sstevel@tonic-gate  */
4840Sstevel@tonic-gate static int
4850Sstevel@tonic-gate lo_root(struct vfs *vfsp, struct vnode **vpp)
4860Sstevel@tonic-gate {
4870Sstevel@tonic-gate 	*vpp = vtoli(vfsp)->li_rootvp;
4880Sstevel@tonic-gate #ifdef LODEBUG
4890Sstevel@tonic-gate 	lo_dprint(4, "lo_root(0x%p) = %p\n", vfsp, *vpp);
4900Sstevel@tonic-gate #endif
4910Sstevel@tonic-gate 	/*
4920Sstevel@tonic-gate 	 * If the root of the filesystem is a special file, return the specvp
4930Sstevel@tonic-gate 	 * version of the vnode. We don't save the specvp vnode in our
4940Sstevel@tonic-gate 	 * hashtable since that's exclusively for lnodes.
4950Sstevel@tonic-gate 	 */
4960Sstevel@tonic-gate 	if (IS_DEVVP(*vpp)) {
4970Sstevel@tonic-gate 		struct vnode *svp;
4980Sstevel@tonic-gate 
4990Sstevel@tonic-gate 		svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, kcred);
5000Sstevel@tonic-gate 		if (svp == NULL)
5010Sstevel@tonic-gate 			return (ENOSYS);
5020Sstevel@tonic-gate 		*vpp = svp;
5030Sstevel@tonic-gate 	} else {
5040Sstevel@tonic-gate 		VN_HOLD(*vpp);
5050Sstevel@tonic-gate 	}
5060Sstevel@tonic-gate 
5070Sstevel@tonic-gate 	return (0);
5080Sstevel@tonic-gate }
5090Sstevel@tonic-gate 
5100Sstevel@tonic-gate /*
5110Sstevel@tonic-gate  * Get file system statistics.
5120Sstevel@tonic-gate  */
5130Sstevel@tonic-gate static int
5140Sstevel@tonic-gate lo_statvfs(register struct vfs *vfsp, struct statvfs64 *sbp)
5150Sstevel@tonic-gate {
5160Sstevel@tonic-gate 	vnode_t *realrootvp;
5170Sstevel@tonic-gate 
5180Sstevel@tonic-gate #ifdef LODEBUG
5190Sstevel@tonic-gate 	lo_dprint(4, "lostatvfs %p\n", vfsp);
5200Sstevel@tonic-gate #endif
5210Sstevel@tonic-gate 	/*
5220Sstevel@tonic-gate 	 * Using realrootvp->v_vfsp (instead of the realvfsp that was
5230Sstevel@tonic-gate 	 * cached) is necessary to make lofs work woth forced UFS unmounts.
5240Sstevel@tonic-gate 	 * In the case of a forced unmount, UFS stores a set of dummy vfsops
5250Sstevel@tonic-gate 	 * in all the (i)vnodes in the filesystem. The dummy ops simply
5260Sstevel@tonic-gate 	 * returns back EIO.
5270Sstevel@tonic-gate 	 */
5280Sstevel@tonic-gate 	(void) lo_realvfs(vfsp, &realrootvp);
5290Sstevel@tonic-gate 	if (realrootvp != NULL)
5300Sstevel@tonic-gate 		return (VFS_STATVFS(realrootvp->v_vfsp, sbp));
5310Sstevel@tonic-gate 	else
5320Sstevel@tonic-gate 		return (EIO);
5330Sstevel@tonic-gate }
5340Sstevel@tonic-gate 
5350Sstevel@tonic-gate /*
5360Sstevel@tonic-gate  * LOFS doesn't have any data or metadata to flush, pending I/O on the
5370Sstevel@tonic-gate  * underlying filesystem will be flushed when such filesystem is synched.
5380Sstevel@tonic-gate  */
5390Sstevel@tonic-gate /* ARGSUSED */
5400Sstevel@tonic-gate static int
5410Sstevel@tonic-gate lo_sync(struct vfs *vfsp,
5420Sstevel@tonic-gate 	short flag,
5430Sstevel@tonic-gate 	struct cred *cr)
5440Sstevel@tonic-gate {
5450Sstevel@tonic-gate #ifdef LODEBUG
5460Sstevel@tonic-gate 	lo_dprint(4, "lo_sync: %p\n", vfsp);
5470Sstevel@tonic-gate #endif
5480Sstevel@tonic-gate 	return (0);
5490Sstevel@tonic-gate }
5500Sstevel@tonic-gate 
5510Sstevel@tonic-gate /*
5520Sstevel@tonic-gate  * Obtain the vnode from the underlying filesystem.
5530Sstevel@tonic-gate  */
5540Sstevel@tonic-gate static int
5550Sstevel@tonic-gate lo_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
5560Sstevel@tonic-gate {
5570Sstevel@tonic-gate 	vnode_t *realrootvp;
5580Sstevel@tonic-gate 
5590Sstevel@tonic-gate #ifdef LODEBUG
5600Sstevel@tonic-gate 	lo_dprint(4, "lo_vget: %p\n", vfsp);
5610Sstevel@tonic-gate #endif
5620Sstevel@tonic-gate 	(void) lo_realvfs(vfsp, &realrootvp);
5630Sstevel@tonic-gate 	if (realrootvp != NULL)
5640Sstevel@tonic-gate 		return (VFS_VGET(realrootvp->v_vfsp, vpp, fidp));
5650Sstevel@tonic-gate 	else
5660Sstevel@tonic-gate 		return (EIO);
5670Sstevel@tonic-gate }
5680Sstevel@tonic-gate 
5690Sstevel@tonic-gate /*
5700Sstevel@tonic-gate  * Free mount-specific data.
5710Sstevel@tonic-gate  */
5720Sstevel@tonic-gate static void
5730Sstevel@tonic-gate lo_freevfs(struct vfs *vfsp)
5740Sstevel@tonic-gate {
5750Sstevel@tonic-gate 	struct loinfo *li = vtoli(vfsp);
5760Sstevel@tonic-gate 
5770Sstevel@tonic-gate 	ldestroy(li);
5780Sstevel@tonic-gate 	kmem_free(li, sizeof (struct loinfo));
5790Sstevel@tonic-gate }
5800Sstevel@tonic-gate 
5810Sstevel@tonic-gate static int
5820Sstevel@tonic-gate lofsinit(int fstyp, char *name)
5830Sstevel@tonic-gate {
5840Sstevel@tonic-gate 	static const fs_operation_def_t lo_vfsops_template[] = {
5850Sstevel@tonic-gate 		VFSNAME_MOUNT, lo_mount,
5860Sstevel@tonic-gate 		VFSNAME_UNMOUNT, lo_unmount,
5870Sstevel@tonic-gate 		VFSNAME_ROOT, lo_root,
5880Sstevel@tonic-gate 		VFSNAME_STATVFS, lo_statvfs,
5890Sstevel@tonic-gate 		VFSNAME_SYNC, (fs_generic_func_p) lo_sync,
5900Sstevel@tonic-gate 		VFSNAME_VGET, lo_vget,
5910Sstevel@tonic-gate 		VFSNAME_FREEVFS, (fs_generic_func_p) lo_freevfs,
5920Sstevel@tonic-gate 		NULL, NULL
5930Sstevel@tonic-gate 	};
5940Sstevel@tonic-gate 	int error;
5950Sstevel@tonic-gate 
5960Sstevel@tonic-gate 	error = vfs_setfsops(fstyp, lo_vfsops_template, &lo_vfsops);
5970Sstevel@tonic-gate 	if (error != 0) {
5980Sstevel@tonic-gate 		cmn_err(CE_WARN, "lofsinit: bad vfs ops template");
5990Sstevel@tonic-gate 		return (error);
6000Sstevel@tonic-gate 	}
6010Sstevel@tonic-gate 
6020Sstevel@tonic-gate 	error = vn_make_ops(name, lo_vnodeops_template, &lo_vnodeops);
6030Sstevel@tonic-gate 	if (error != 0) {
6040Sstevel@tonic-gate 		(void) vfs_freevfsops_by_type(fstyp);
6050Sstevel@tonic-gate 		cmn_err(CE_WARN, "lofsinit: bad vnode ops template");
6060Sstevel@tonic-gate 		return (error);
6070Sstevel@tonic-gate 	}
6080Sstevel@tonic-gate 
6090Sstevel@tonic-gate 	lofsfstype = fstyp;
6100Sstevel@tonic-gate 
6110Sstevel@tonic-gate 	if ((lofs_major = getudev()) == (major_t)-1) {
6120Sstevel@tonic-gate 		(void) vfs_freevfsops_by_type(fstyp);
6130Sstevel@tonic-gate 		cmn_err(CE_WARN, "lofsinit: Can't get unique device number.");
6140Sstevel@tonic-gate 		return (ENXIO);
6150Sstevel@tonic-gate 	}
6160Sstevel@tonic-gate 
6170Sstevel@tonic-gate 	lofs_minor = 0;
6180Sstevel@tonic-gate 	mutex_init(&lofs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
6190Sstevel@tonic-gate 
6200Sstevel@tonic-gate 	return (0);
6210Sstevel@tonic-gate }
622