10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51488Srsb * Common Development and Distribution License (the "License"). 61488Srsb * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 221488Srsb * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 270Sstevel@tonic-gate 280Sstevel@tonic-gate #include <sys/param.h> 290Sstevel@tonic-gate #include <sys/errno.h> 300Sstevel@tonic-gate #include <sys/vfs.h> 310Sstevel@tonic-gate #include <sys/vnode.h> 320Sstevel@tonic-gate #include <sys/uio.h> 330Sstevel@tonic-gate #include <sys/pathname.h> 340Sstevel@tonic-gate #include <sys/kmem.h> 350Sstevel@tonic-gate #include <sys/cred.h> 360Sstevel@tonic-gate #include <sys/statvfs.h> 370Sstevel@tonic-gate #include <sys/fs/lofs_info.h> 380Sstevel@tonic-gate #include <sys/fs/lofs_node.h> 390Sstevel@tonic-gate #include <sys/mount.h> 400Sstevel@tonic-gate #include <sys/mntent.h> 410Sstevel@tonic-gate #include <sys/mkdev.h> 421676Sjpk #include <sys/priv.h> 430Sstevel@tonic-gate #include <sys/sysmacros.h> 440Sstevel@tonic-gate #include <sys/systm.h> 450Sstevel@tonic-gate #include <sys/cmn_err.h> 460Sstevel@tonic-gate #include <sys/policy.h> 471676Sjpk #include <sys/tsol/label.h> 480Sstevel@tonic-gate #include "fs/fs_subr.h" 490Sstevel@tonic-gate 500Sstevel@tonic-gate /* 510Sstevel@tonic-gate * This is the loadable module wrapper. 520Sstevel@tonic-gate */ 530Sstevel@tonic-gate #include <sys/modctl.h> 540Sstevel@tonic-gate 550Sstevel@tonic-gate static mntopts_t lofs_mntopts; 560Sstevel@tonic-gate 570Sstevel@tonic-gate static int lofsinit(int, char *); 580Sstevel@tonic-gate 590Sstevel@tonic-gate static vfsdef_t vfw = { 600Sstevel@tonic-gate VFSDEF_VERSION, 610Sstevel@tonic-gate "lofs", 620Sstevel@tonic-gate lofsinit, 631488Srsb VSW_HASPROTO|VSW_STATS, 640Sstevel@tonic-gate &lofs_mntopts 650Sstevel@tonic-gate }; 660Sstevel@tonic-gate 670Sstevel@tonic-gate /* 680Sstevel@tonic-gate * Stuff needed to support "zonedevfs" mode. 690Sstevel@tonic-gate */ 700Sstevel@tonic-gate static major_t lofs_major; 710Sstevel@tonic-gate static minor_t lofs_minor; 720Sstevel@tonic-gate static kmutex_t lofs_minor_lock; 730Sstevel@tonic-gate 740Sstevel@tonic-gate /* 750Sstevel@tonic-gate * LOFS mount options table 760Sstevel@tonic-gate */ 770Sstevel@tonic-gate static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 780Sstevel@tonic-gate static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 790Sstevel@tonic-gate static char *zonedevfs_cancel[] = { MNTOPT_LOFS_NOZONEDEVFS, NULL }; 800Sstevel@tonic-gate static char *nozonedevfs_cancel[] = { MNTOPT_LOFS_ZONEDEVFS, NULL }; 810Sstevel@tonic-gate static char *sub_cancel[] = { MNTOPT_LOFS_NOSUB, NULL }; 820Sstevel@tonic-gate static char *nosub_cancel[] = { MNTOPT_LOFS_SUB, NULL }; 830Sstevel@tonic-gate 840Sstevel@tonic-gate static mntopt_t mntopts[] = { 850Sstevel@tonic-gate /* 860Sstevel@tonic-gate * option name cancel option default arg flags 870Sstevel@tonic-gate * private data 880Sstevel@tonic-gate */ 890Sstevel@tonic-gate { MNTOPT_XATTR, xattr_cancel, NULL, 0, 900Sstevel@tonic-gate (void *)0 }, 910Sstevel@tonic-gate { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, 920Sstevel@tonic-gate (void *)0 }, 930Sstevel@tonic-gate { MNTOPT_LOFS_ZONEDEVFS, zonedevfs_cancel, NULL, 0, 940Sstevel@tonic-gate (void *)0 }, 950Sstevel@tonic-gate { MNTOPT_LOFS_NOZONEDEVFS, nozonedevfs_cancel, NULL, 0, 960Sstevel@tonic-gate (void *)0 }, 970Sstevel@tonic-gate { MNTOPT_LOFS_SUB, sub_cancel, NULL, 0, 980Sstevel@tonic-gate (void *)0 }, 990Sstevel@tonic-gate { MNTOPT_LOFS_NOSUB, nosub_cancel, NULL, 0, 1000Sstevel@tonic-gate (void *)0 }, 1010Sstevel@tonic-gate }; 1020Sstevel@tonic-gate 1030Sstevel@tonic-gate static mntopts_t lofs_mntopts = { 1040Sstevel@tonic-gate sizeof (mntopts) / sizeof (mntopt_t), 1050Sstevel@tonic-gate mntopts 1060Sstevel@tonic-gate }; 1070Sstevel@tonic-gate 1080Sstevel@tonic-gate /* 1090Sstevel@tonic-gate * Module linkage information for the kernel. 1100Sstevel@tonic-gate */ 1110Sstevel@tonic-gate 1120Sstevel@tonic-gate static struct modlfs modlfs = { 1130Sstevel@tonic-gate &mod_fsops, "filesystem for lofs", &vfw 1140Sstevel@tonic-gate }; 1150Sstevel@tonic-gate 1160Sstevel@tonic-gate static struct modlinkage modlinkage = { 1170Sstevel@tonic-gate MODREV_1, (void *)&modlfs, NULL 1180Sstevel@tonic-gate }; 1190Sstevel@tonic-gate 1200Sstevel@tonic-gate /* 1210Sstevel@tonic-gate * This is the module initialization routine. 1220Sstevel@tonic-gate */ 1231676Sjpk 1240Sstevel@tonic-gate int 1251676Sjpk _init(void) 1260Sstevel@tonic-gate { 1270Sstevel@tonic-gate int status; 1280Sstevel@tonic-gate 1290Sstevel@tonic-gate lofs_subrinit(); 1300Sstevel@tonic-gate status = mod_install(&modlinkage); 1310Sstevel@tonic-gate if (status != 0) { 1320Sstevel@tonic-gate /* 1330Sstevel@tonic-gate * Cleanup previously initialized work. 1340Sstevel@tonic-gate */ 1350Sstevel@tonic-gate lofs_subrfini(); 1360Sstevel@tonic-gate } 1370Sstevel@tonic-gate 1380Sstevel@tonic-gate return (status); 1390Sstevel@tonic-gate } 1400Sstevel@tonic-gate 1410Sstevel@tonic-gate /* 1420Sstevel@tonic-gate * Don't allow the lofs module to be unloaded for now. 1430Sstevel@tonic-gate * There is a memory leak if it gets unloaded. 1440Sstevel@tonic-gate */ 1451676Sjpk 1460Sstevel@tonic-gate int 1471676Sjpk _fini(void) 1480Sstevel@tonic-gate { 1490Sstevel@tonic-gate return (EBUSY); 1500Sstevel@tonic-gate } 1510Sstevel@tonic-gate 1520Sstevel@tonic-gate int 1530Sstevel@tonic-gate _info(struct modinfo *modinfop) 1540Sstevel@tonic-gate { 1550Sstevel@tonic-gate return (mod_info(&modlinkage, modinfop)); 1560Sstevel@tonic-gate } 1570Sstevel@tonic-gate 1580Sstevel@tonic-gate 1590Sstevel@tonic-gate static int lofsfstype; 1600Sstevel@tonic-gate vfsops_t *lo_vfsops; 1610Sstevel@tonic-gate 1620Sstevel@tonic-gate /* 1630Sstevel@tonic-gate * lo mount vfsop 1640Sstevel@tonic-gate * Set up mount info record and attach it to vfs struct. 1650Sstevel@tonic-gate */ 1660Sstevel@tonic-gate /*ARGSUSED*/ 1670Sstevel@tonic-gate static int 1680Sstevel@tonic-gate lo_mount(struct vfs *vfsp, 1690Sstevel@tonic-gate struct vnode *vp, 1700Sstevel@tonic-gate struct mounta *uap, 1710Sstevel@tonic-gate struct cred *cr) 1720Sstevel@tonic-gate { 1730Sstevel@tonic-gate int error; 1740Sstevel@tonic-gate struct vnode *srootvp = NULL; /* the server's root */ 1750Sstevel@tonic-gate struct vnode *realrootvp; 1760Sstevel@tonic-gate struct loinfo *li; 1770Sstevel@tonic-gate int is_zonedevfs = 0; 1780Sstevel@tonic-gate int nodev; 1790Sstevel@tonic-gate 1800Sstevel@tonic-gate nodev = vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL); 1810Sstevel@tonic-gate 1820Sstevel@tonic-gate if ((error = secpolicy_fs_mount(cr, vp, vfsp)) != 0) 1830Sstevel@tonic-gate return (EPERM); 1840Sstevel@tonic-gate 1850Sstevel@tonic-gate /* 1860Sstevel@tonic-gate * Loopback devices which get "nodevices" added can be done without 1870Sstevel@tonic-gate * "nodevices" set because we cannot import devices into a zone 1880Sstevel@tonic-gate * with loopback. Note that we have all zone privileges when 1890Sstevel@tonic-gate * this happens; if not, we'd have gotten "nosuid". 1900Sstevel@tonic-gate */ 1910Sstevel@tonic-gate if (!nodev && vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 1920Sstevel@tonic-gate vfs_setmntopt(vfsp, MNTOPT_DEVICES, NULL, VFS_NODISPLAY); 1930Sstevel@tonic-gate 1940Sstevel@tonic-gate /* 1950Sstevel@tonic-gate * We must ensure that only the global zone applies the 'zonedevfs' 1960Sstevel@tonic-gate * option; we don't want non-global zones to be able to establish 1970Sstevel@tonic-gate * lofs mounts using the special dev_t we use to ensure that the 1980Sstevel@tonic-gate * contents of a zone's /dev cannot be victim to link(2) or rename(2). 1990Sstevel@tonic-gate * See below, where we set all of this up. 2000Sstevel@tonic-gate * 2010Sstevel@tonic-gate * Since this is more like a privilege check, we use crgetzoneid(cr) 2020Sstevel@tonic-gate * instead of getzoneid(). 2030Sstevel@tonic-gate */ 2040Sstevel@tonic-gate is_zonedevfs = vfs_optionisset(vfsp, MNTOPT_LOFS_ZONEDEVFS, NULL); 2050Sstevel@tonic-gate if (crgetzoneid(cr) != GLOBAL_ZONEID && is_zonedevfs) 2060Sstevel@tonic-gate return (EPERM); 2070Sstevel@tonic-gate 2080Sstevel@tonic-gate mutex_enter(&vp->v_lock); 2090Sstevel@tonic-gate if (!(uap->flags & MS_OVERLAY) && 2101676Sjpk (vp->v_count != 1 || (vp->v_flag & VROOT))) { 2110Sstevel@tonic-gate mutex_exit(&vp->v_lock); 2120Sstevel@tonic-gate return (EBUSY); 2130Sstevel@tonic-gate } 2140Sstevel@tonic-gate mutex_exit(&vp->v_lock); 2150Sstevel@tonic-gate 2160Sstevel@tonic-gate /* 2170Sstevel@tonic-gate * Find real root, and make vfs point to real vfs 2180Sstevel@tonic-gate */ 2190Sstevel@tonic-gate if (error = lookupname(uap->spec, (uap->flags & MS_SYSSPACE) ? 2200Sstevel@tonic-gate UIO_SYSSPACE : UIO_USERSPACE, FOLLOW, NULLVPP, 2210Sstevel@tonic-gate &realrootvp)) 2220Sstevel@tonic-gate return (error); 2230Sstevel@tonic-gate 2240Sstevel@tonic-gate /* 2251676Sjpk * Enforce MAC policy if needed. 2261676Sjpk * 2271676Sjpk * Loopback mounts must not allow writing up. The dominance test 2281676Sjpk * is intended to prevent a global zone caller from accidentally 2291676Sjpk * creating write-up conditions between two labeled zones. 2301676Sjpk * Local zones can't violate MAC on their own without help from 2311676Sjpk * the global zone because they can't name a pathname that 2321676Sjpk * they don't already have. 2331676Sjpk * 2341676Sjpk * The special case check for the NET_MAC_AWARE process flag is 2351676Sjpk * to support the case of the automounter in the global zone. We 2361676Sjpk * permit automounting of local zone directories such as home 2371676Sjpk * directories, into the global zone as required by setlabel, 2381676Sjpk * zonecopy, and saving of desktop sessions. Such mounts are 2391676Sjpk * trusted not to expose the contents of one zone's directories 2401676Sjpk * to another by leaking them through the global zone. 2411676Sjpk */ 2421676Sjpk if (is_system_labeled() && crgetzoneid(cr) == GLOBAL_ZONEID) { 243*1748Srica char specname[MAXPATHLEN]; 244*1748Srica zone_t *from_zptr; 245*1748Srica zone_t *to_zptr; 2461676Sjpk 247*1748Srica if (vnodetopath(NULL, realrootvp, specname, 248*1748Srica sizeof (specname), CRED()) != 0) 249*1748Srica return (EACCES); 250*1748Srica 2511676Sjpk from_zptr = zone_find_by_path(specname); 2521676Sjpk to_zptr = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 2531676Sjpk 2541676Sjpk /* 2551676Sjpk * Special case for zone devfs: the zone for /dev will 2561676Sjpk * incorrectly appear as the global zone since it's not 2571676Sjpk * under the zone rootpath. So for zone devfs check allow 2581676Sjpk * read-write mounts. 2591676Sjpk */ 2601676Sjpk 2611676Sjpk if (from_zptr != to_zptr && !is_zonedevfs) { 2621676Sjpk /* 2631676Sjpk * We know at this point that the labels aren't equal 2641676Sjpk * because the zone pointers aren't equal, and zones 2651676Sjpk * can't share a label. 2661676Sjpk * 2671676Sjpk * If the source is the global zone then making 2681676Sjpk * it available to a local zone must be done in 2691676Sjpk * read-only mode as the label will become admin_low. 2701676Sjpk * 2711676Sjpk * If it is a mount between local zones then if 2721676Sjpk * the current process is in the global zone and has 2731676Sjpk * the NET_MAC_AWARE flag, then regular read-write 2741676Sjpk * access is allowed. If it's in some other zone, but 2751676Sjpk * the label on the mount point dominates the original 2761676Sjpk * source, then allow the mount as read-only 2771676Sjpk * ("read-down"). 2781676Sjpk */ 2791676Sjpk if (from_zptr->zone_id == GLOBAL_ZONEID) { 2801676Sjpk /* make the mount read-only */ 2811676Sjpk vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 2821676Sjpk } else { /* cross-zone mount */ 2831676Sjpk if (to_zptr->zone_id == GLOBAL_ZONEID && 2841676Sjpk /* LINTED: no consequent */ 2851676Sjpk getpflags(NET_MAC_AWARE, cr) != 0) { 2861676Sjpk /* Allow the mount as read-write */ 2871676Sjpk } else if (bldominates( 2881676Sjpk label2bslabel(to_zptr->zone_slabel), 2891676Sjpk label2bslabel(from_zptr->zone_slabel))) { 2901676Sjpk /* make the mount read-only */ 2911676Sjpk vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 2921676Sjpk } else { 2931676Sjpk zone_rele(to_zptr); 2941676Sjpk zone_rele(from_zptr); 2951676Sjpk return (EACCES); 2961676Sjpk } 2971676Sjpk } 2981676Sjpk } 2991676Sjpk zone_rele(to_zptr); 3001676Sjpk zone_rele(from_zptr); 3011676Sjpk } 3021676Sjpk 3031676Sjpk /* 3040Sstevel@tonic-gate * realrootvp may be an AUTOFS node, in which case we 3050Sstevel@tonic-gate * perform a VOP_ACCESS() to trigger the mount of the 3060Sstevel@tonic-gate * intended filesystem, so we loopback mount the intended 3070Sstevel@tonic-gate * filesystem instead of the AUTOFS filesystem. 3080Sstevel@tonic-gate */ 3090Sstevel@tonic-gate (void) VOP_ACCESS(realrootvp, 0, 0, cr); 3100Sstevel@tonic-gate 3110Sstevel@tonic-gate /* 3120Sstevel@tonic-gate * We're interested in the top most filesystem. 3130Sstevel@tonic-gate * This is specially important when uap->spec is a trigger 3140Sstevel@tonic-gate * AUTOFS node, since we're really interested in mounting the 3150Sstevel@tonic-gate * filesystem AUTOFS mounted as result of the VOP_ACCESS() 3160Sstevel@tonic-gate * call not the AUTOFS node itself. 3170Sstevel@tonic-gate */ 3180Sstevel@tonic-gate if (vn_mountedvfs(realrootvp) != NULL) { 3190Sstevel@tonic-gate if (error = traverse(&realrootvp)) { 3200Sstevel@tonic-gate VN_RELE(realrootvp); 3210Sstevel@tonic-gate return (error); 3220Sstevel@tonic-gate } 3230Sstevel@tonic-gate } 3240Sstevel@tonic-gate 3250Sstevel@tonic-gate /* 3260Sstevel@tonic-gate * Allocate a vfs info struct and attach it 3270Sstevel@tonic-gate */ 3280Sstevel@tonic-gate li = kmem_zalloc(sizeof (struct loinfo), KM_SLEEP); 3290Sstevel@tonic-gate li->li_realvfs = realrootvp->v_vfsp; 3300Sstevel@tonic-gate li->li_mountvfs = vfsp; 3310Sstevel@tonic-gate 3320Sstevel@tonic-gate /* 3330Sstevel@tonic-gate * Set mount flags to be inherited by loopback vfs's 3340Sstevel@tonic-gate */ 3350Sstevel@tonic-gate if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 3360Sstevel@tonic-gate li->li_mflag |= VFS_RDONLY; 3370Sstevel@tonic-gate } 3380Sstevel@tonic-gate if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 3390Sstevel@tonic-gate li->li_mflag |= (VFS_NOSETUID|VFS_NODEVICES); 3400Sstevel@tonic-gate } 3410Sstevel@tonic-gate if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 3420Sstevel@tonic-gate li->li_mflag |= VFS_NODEVICES; 3430Sstevel@tonic-gate } 3440Sstevel@tonic-gate if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 3450Sstevel@tonic-gate li->li_mflag |= VFS_NOSETUID; 3460Sstevel@tonic-gate } 3470Sstevel@tonic-gate /* 3480Sstevel@tonic-gate * Permissive flags are added to the "deny" bitmap. 3490Sstevel@tonic-gate */ 3500Sstevel@tonic-gate if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 3510Sstevel@tonic-gate li->li_dflag |= VFS_XATTR; 3520Sstevel@tonic-gate } 3530Sstevel@tonic-gate if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 3540Sstevel@tonic-gate li->li_dflag |= VFS_NBMAND; 3550Sstevel@tonic-gate } 3560Sstevel@tonic-gate 3570Sstevel@tonic-gate /* 3580Sstevel@tonic-gate * Propagate inheritable mount flags from the real vfs. 3590Sstevel@tonic-gate */ 3600Sstevel@tonic-gate if ((li->li_realvfs->vfs_flag & VFS_RDONLY) && 3610Sstevel@tonic-gate !vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 3620Sstevel@tonic-gate vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 3630Sstevel@tonic-gate VFS_NODISPLAY); 3640Sstevel@tonic-gate if ((li->li_realvfs->vfs_flag & VFS_NOSETUID) && 3650Sstevel@tonic-gate !vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 3660Sstevel@tonic-gate vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL, 3670Sstevel@tonic-gate VFS_NODISPLAY); 3680Sstevel@tonic-gate if ((li->li_realvfs->vfs_flag & VFS_NODEVICES) && 3690Sstevel@tonic-gate !vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 3700Sstevel@tonic-gate vfs_setmntopt(vfsp, MNTOPT_NODEVICES, NULL, 3710Sstevel@tonic-gate VFS_NODISPLAY); 3720Sstevel@tonic-gate /* 3730Sstevel@tonic-gate * Permissive flags such as VFS_XATTR, as opposed to restrictive flags 3740Sstevel@tonic-gate * such as VFS_RDONLY, are handled differently. An explicit 3750Sstevel@tonic-gate * MNTOPT_NOXATTR should override the underlying filesystem's VFS_XATTR. 3760Sstevel@tonic-gate */ 3770Sstevel@tonic-gate if ((li->li_realvfs->vfs_flag & VFS_XATTR) && 3780Sstevel@tonic-gate !vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL) && 3790Sstevel@tonic-gate !vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) 3800Sstevel@tonic-gate vfs_setmntopt(vfsp, MNTOPT_XATTR, NULL, 3810Sstevel@tonic-gate VFS_NODISPLAY); 3820Sstevel@tonic-gate if ((li->li_realvfs->vfs_flag & VFS_NBMAND) && 3830Sstevel@tonic-gate !vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL) && 3840Sstevel@tonic-gate !vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) 3850Sstevel@tonic-gate vfs_setmntopt(vfsp, MNTOPT_NBMAND, NULL, 3860Sstevel@tonic-gate VFS_NODISPLAY); 3870Sstevel@tonic-gate 3880Sstevel@tonic-gate li->li_refct = 0; 3890Sstevel@tonic-gate vfsp->vfs_data = (caddr_t)li; 3900Sstevel@tonic-gate vfsp->vfs_bcount = 0; 3910Sstevel@tonic-gate vfsp->vfs_fstype = lofsfstype; 3920Sstevel@tonic-gate vfsp->vfs_bsize = li->li_realvfs->vfs_bsize; 3930Sstevel@tonic-gate 3940Sstevel@tonic-gate /* 3950Sstevel@tonic-gate * Test to see if we need to be in "zone /dev" mode. In zonedevfs 3960Sstevel@tonic-gate * mode, we pull a nasty trick; we make sure that the lofs dev_t does 3970Sstevel@tonic-gate * *not* reflect the underlying device, so that no renames or links 3980Sstevel@tonic-gate * can occur to or from the /dev hierarchy. 3990Sstevel@tonic-gate */ 4000Sstevel@tonic-gate if (is_zonedevfs) { 4010Sstevel@tonic-gate dev_t dev; 4020Sstevel@tonic-gate 4030Sstevel@tonic-gate mutex_enter(&lofs_minor_lock); 4040Sstevel@tonic-gate do { 4050Sstevel@tonic-gate lofs_minor = (lofs_minor + 1) & MAXMIN32; 4060Sstevel@tonic-gate dev = makedevice(lofs_major, lofs_minor); 4070Sstevel@tonic-gate } while (vfs_devismounted(dev)); 4080Sstevel@tonic-gate mutex_exit(&lofs_minor_lock); 4090Sstevel@tonic-gate 4100Sstevel@tonic-gate vfsp->vfs_dev = dev; 4110Sstevel@tonic-gate vfs_make_fsid(&vfsp->vfs_fsid, dev, lofsfstype); 4120Sstevel@tonic-gate 4130Sstevel@tonic-gate li->li_flag |= LO_ZONEDEVFS; 4140Sstevel@tonic-gate } else { 4150Sstevel@tonic-gate vfsp->vfs_dev = li->li_realvfs->vfs_dev; 4160Sstevel@tonic-gate vfsp->vfs_fsid.val[0] = li->li_realvfs->vfs_fsid.val[0]; 4170Sstevel@tonic-gate vfsp->vfs_fsid.val[1] = li->li_realvfs->vfs_fsid.val[1]; 4180Sstevel@tonic-gate } 4190Sstevel@tonic-gate 4200Sstevel@tonic-gate if (vfs_optionisset(vfsp, MNTOPT_LOFS_NOSUB, NULL)) { 4210Sstevel@tonic-gate li->li_flag |= LO_NOSUB; 4220Sstevel@tonic-gate } 4230Sstevel@tonic-gate 4240Sstevel@tonic-gate /* 4250Sstevel@tonic-gate * Setup the hashtable. If the root of this mount isn't a directory, 4260Sstevel@tonic-gate * there's no point in allocating a large hashtable. A table with one 4270Sstevel@tonic-gate * bucket is sufficient. 4280Sstevel@tonic-gate */ 4290Sstevel@tonic-gate if (realrootvp->v_type != VDIR) 4300Sstevel@tonic-gate lsetup(li, 1); 4310Sstevel@tonic-gate else 4320Sstevel@tonic-gate lsetup(li, 0); 4330Sstevel@tonic-gate 4340Sstevel@tonic-gate /* 4350Sstevel@tonic-gate * Make the root vnode 4360Sstevel@tonic-gate */ 437324Sowenr srootvp = makelonode(realrootvp, li, 0); 4380Sstevel@tonic-gate srootvp->v_flag |= VROOT; 4390Sstevel@tonic-gate li->li_rootvp = srootvp; 4400Sstevel@tonic-gate 4410Sstevel@tonic-gate #ifdef LODEBUG 4420Sstevel@tonic-gate lo_dprint(4, "lo_mount: vfs %p realvfs %p root %p realroot %p li %p\n", 4430Sstevel@tonic-gate vfsp, li->li_realvfs, srootvp, realrootvp, li); 4440Sstevel@tonic-gate #endif 4450Sstevel@tonic-gate return (0); 4460Sstevel@tonic-gate } 4470Sstevel@tonic-gate 4480Sstevel@tonic-gate /* 4490Sstevel@tonic-gate * Undo loopback mount 4500Sstevel@tonic-gate */ 4510Sstevel@tonic-gate static int 4520Sstevel@tonic-gate lo_unmount(struct vfs *vfsp, int flag, struct cred *cr) 4530Sstevel@tonic-gate { 4540Sstevel@tonic-gate struct loinfo *li; 4550Sstevel@tonic-gate 4560Sstevel@tonic-gate if (secpolicy_fs_unmount(cr, vfsp) != 0) 4570Sstevel@tonic-gate return (EPERM); 4580Sstevel@tonic-gate 4590Sstevel@tonic-gate /* 4600Sstevel@tonic-gate * Forced unmount is not supported by this file system 4610Sstevel@tonic-gate * and thus, ENOTSUP, is being returned. 4620Sstevel@tonic-gate */ 4630Sstevel@tonic-gate if (flag & MS_FORCE) 4640Sstevel@tonic-gate return (ENOTSUP); 4650Sstevel@tonic-gate 4660Sstevel@tonic-gate li = vtoli(vfsp); 4670Sstevel@tonic-gate #ifdef LODEBUG 4680Sstevel@tonic-gate lo_dprint(4, "lo_unmount(%p) li %p\n", vfsp, li); 4690Sstevel@tonic-gate #endif 4700Sstevel@tonic-gate if (li->li_refct != 1 || li->li_rootvp->v_count != 1) { 4710Sstevel@tonic-gate #ifdef LODEBUG 4720Sstevel@tonic-gate lo_dprint(4, "refct %d v_ct %d\n", li->li_refct, 4730Sstevel@tonic-gate li->li_rootvp->v_count); 4740Sstevel@tonic-gate #endif 4750Sstevel@tonic-gate return (EBUSY); 4760Sstevel@tonic-gate } 4770Sstevel@tonic-gate VN_RELE(li->li_rootvp); 4780Sstevel@tonic-gate return (0); 4790Sstevel@tonic-gate } 4800Sstevel@tonic-gate 4810Sstevel@tonic-gate /* 4820Sstevel@tonic-gate * Find root of lofs mount. 4830Sstevel@tonic-gate */ 4840Sstevel@tonic-gate static int 4850Sstevel@tonic-gate lo_root(struct vfs *vfsp, struct vnode **vpp) 4860Sstevel@tonic-gate { 4870Sstevel@tonic-gate *vpp = vtoli(vfsp)->li_rootvp; 4880Sstevel@tonic-gate #ifdef LODEBUG 4890Sstevel@tonic-gate lo_dprint(4, "lo_root(0x%p) = %p\n", vfsp, *vpp); 4900Sstevel@tonic-gate #endif 4910Sstevel@tonic-gate /* 4920Sstevel@tonic-gate * If the root of the filesystem is a special file, return the specvp 4930Sstevel@tonic-gate * version of the vnode. We don't save the specvp vnode in our 4940Sstevel@tonic-gate * hashtable since that's exclusively for lnodes. 4950Sstevel@tonic-gate */ 4960Sstevel@tonic-gate if (IS_DEVVP(*vpp)) { 4970Sstevel@tonic-gate struct vnode *svp; 4980Sstevel@tonic-gate 4990Sstevel@tonic-gate svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, kcred); 5000Sstevel@tonic-gate if (svp == NULL) 5010Sstevel@tonic-gate return (ENOSYS); 5020Sstevel@tonic-gate *vpp = svp; 5030Sstevel@tonic-gate } else { 5040Sstevel@tonic-gate VN_HOLD(*vpp); 5050Sstevel@tonic-gate } 5060Sstevel@tonic-gate 5070Sstevel@tonic-gate return (0); 5080Sstevel@tonic-gate } 5090Sstevel@tonic-gate 5100Sstevel@tonic-gate /* 5110Sstevel@tonic-gate * Get file system statistics. 5120Sstevel@tonic-gate */ 5130Sstevel@tonic-gate static int 5140Sstevel@tonic-gate lo_statvfs(register struct vfs *vfsp, struct statvfs64 *sbp) 5150Sstevel@tonic-gate { 5160Sstevel@tonic-gate vnode_t *realrootvp; 5170Sstevel@tonic-gate 5180Sstevel@tonic-gate #ifdef LODEBUG 5190Sstevel@tonic-gate lo_dprint(4, "lostatvfs %p\n", vfsp); 5200Sstevel@tonic-gate #endif 5210Sstevel@tonic-gate /* 5220Sstevel@tonic-gate * Using realrootvp->v_vfsp (instead of the realvfsp that was 5230Sstevel@tonic-gate * cached) is necessary to make lofs work woth forced UFS unmounts. 5240Sstevel@tonic-gate * In the case of a forced unmount, UFS stores a set of dummy vfsops 5250Sstevel@tonic-gate * in all the (i)vnodes in the filesystem. The dummy ops simply 5260Sstevel@tonic-gate * returns back EIO. 5270Sstevel@tonic-gate */ 5280Sstevel@tonic-gate (void) lo_realvfs(vfsp, &realrootvp); 5290Sstevel@tonic-gate if (realrootvp != NULL) 5300Sstevel@tonic-gate return (VFS_STATVFS(realrootvp->v_vfsp, sbp)); 5310Sstevel@tonic-gate else 5320Sstevel@tonic-gate return (EIO); 5330Sstevel@tonic-gate } 5340Sstevel@tonic-gate 5350Sstevel@tonic-gate /* 5360Sstevel@tonic-gate * LOFS doesn't have any data or metadata to flush, pending I/O on the 5370Sstevel@tonic-gate * underlying filesystem will be flushed when such filesystem is synched. 5380Sstevel@tonic-gate */ 5390Sstevel@tonic-gate /* ARGSUSED */ 5400Sstevel@tonic-gate static int 5410Sstevel@tonic-gate lo_sync(struct vfs *vfsp, 5420Sstevel@tonic-gate short flag, 5430Sstevel@tonic-gate struct cred *cr) 5440Sstevel@tonic-gate { 5450Sstevel@tonic-gate #ifdef LODEBUG 5460Sstevel@tonic-gate lo_dprint(4, "lo_sync: %p\n", vfsp); 5470Sstevel@tonic-gate #endif 5480Sstevel@tonic-gate return (0); 5490Sstevel@tonic-gate } 5500Sstevel@tonic-gate 5510Sstevel@tonic-gate /* 5520Sstevel@tonic-gate * Obtain the vnode from the underlying filesystem. 5530Sstevel@tonic-gate */ 5540Sstevel@tonic-gate static int 5550Sstevel@tonic-gate lo_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp) 5560Sstevel@tonic-gate { 5570Sstevel@tonic-gate vnode_t *realrootvp; 5580Sstevel@tonic-gate 5590Sstevel@tonic-gate #ifdef LODEBUG 5600Sstevel@tonic-gate lo_dprint(4, "lo_vget: %p\n", vfsp); 5610Sstevel@tonic-gate #endif 5620Sstevel@tonic-gate (void) lo_realvfs(vfsp, &realrootvp); 5630Sstevel@tonic-gate if (realrootvp != NULL) 5640Sstevel@tonic-gate return (VFS_VGET(realrootvp->v_vfsp, vpp, fidp)); 5650Sstevel@tonic-gate else 5660Sstevel@tonic-gate return (EIO); 5670Sstevel@tonic-gate } 5680Sstevel@tonic-gate 5690Sstevel@tonic-gate /* 5700Sstevel@tonic-gate * Free mount-specific data. 5710Sstevel@tonic-gate */ 5720Sstevel@tonic-gate static void 5730Sstevel@tonic-gate lo_freevfs(struct vfs *vfsp) 5740Sstevel@tonic-gate { 5750Sstevel@tonic-gate struct loinfo *li = vtoli(vfsp); 5760Sstevel@tonic-gate 5770Sstevel@tonic-gate ldestroy(li); 5780Sstevel@tonic-gate kmem_free(li, sizeof (struct loinfo)); 5790Sstevel@tonic-gate } 5800Sstevel@tonic-gate 5810Sstevel@tonic-gate static int 5820Sstevel@tonic-gate lofsinit(int fstyp, char *name) 5830Sstevel@tonic-gate { 5840Sstevel@tonic-gate static const fs_operation_def_t lo_vfsops_template[] = { 5850Sstevel@tonic-gate VFSNAME_MOUNT, lo_mount, 5860Sstevel@tonic-gate VFSNAME_UNMOUNT, lo_unmount, 5870Sstevel@tonic-gate VFSNAME_ROOT, lo_root, 5880Sstevel@tonic-gate VFSNAME_STATVFS, lo_statvfs, 5890Sstevel@tonic-gate VFSNAME_SYNC, (fs_generic_func_p) lo_sync, 5900Sstevel@tonic-gate VFSNAME_VGET, lo_vget, 5910Sstevel@tonic-gate VFSNAME_FREEVFS, (fs_generic_func_p) lo_freevfs, 5920Sstevel@tonic-gate NULL, NULL 5930Sstevel@tonic-gate }; 5940Sstevel@tonic-gate int error; 5950Sstevel@tonic-gate 5960Sstevel@tonic-gate error = vfs_setfsops(fstyp, lo_vfsops_template, &lo_vfsops); 5970Sstevel@tonic-gate if (error != 0) { 5980Sstevel@tonic-gate cmn_err(CE_WARN, "lofsinit: bad vfs ops template"); 5990Sstevel@tonic-gate return (error); 6000Sstevel@tonic-gate } 6010Sstevel@tonic-gate 6020Sstevel@tonic-gate error = vn_make_ops(name, lo_vnodeops_template, &lo_vnodeops); 6030Sstevel@tonic-gate if (error != 0) { 6040Sstevel@tonic-gate (void) vfs_freevfsops_by_type(fstyp); 6050Sstevel@tonic-gate cmn_err(CE_WARN, "lofsinit: bad vnode ops template"); 6060Sstevel@tonic-gate return (error); 6070Sstevel@tonic-gate } 6080Sstevel@tonic-gate 6090Sstevel@tonic-gate lofsfstype = fstyp; 6100Sstevel@tonic-gate 6110Sstevel@tonic-gate if ((lofs_major = getudev()) == (major_t)-1) { 6120Sstevel@tonic-gate (void) vfs_freevfsops_by_type(fstyp); 6130Sstevel@tonic-gate cmn_err(CE_WARN, "lofsinit: Can't get unique device number."); 6140Sstevel@tonic-gate return (ENXIO); 6150Sstevel@tonic-gate } 6160Sstevel@tonic-gate 6170Sstevel@tonic-gate lofs_minor = 0; 6180Sstevel@tonic-gate mutex_init(&lofs_minor_lock, NULL, MUTEX_DEFAULT, NULL); 6190Sstevel@tonic-gate 6200Sstevel@tonic-gate return (0); 6210Sstevel@tonic-gate } 622