xref: /onnv-gate/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c (revision 12894:3ca11e9f39b7)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51488Srsb  * Common Development and Distribution License (the "License").
61488Srsb  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
2212633Sjohn.levon@sun.com  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate  */
240Sstevel@tonic-gate 
250Sstevel@tonic-gate #include <sys/types.h>
260Sstevel@tonic-gate #include <sys/param.h>
270Sstevel@tonic-gate #include <sys/sysmacros.h>
280Sstevel@tonic-gate #include <sys/kmem.h>
290Sstevel@tonic-gate #include <sys/time.h>
300Sstevel@tonic-gate #include <sys/pathname.h>
310Sstevel@tonic-gate #include <sys/vfs.h>
323898Srsb #include <sys/vfs_opreg.h>
330Sstevel@tonic-gate #include <sys/vnode.h>
340Sstevel@tonic-gate #include <sys/stat.h>
350Sstevel@tonic-gate #include <sys/uio.h>
360Sstevel@tonic-gate #include <sys/stat.h>
370Sstevel@tonic-gate #include <sys/errno.h>
380Sstevel@tonic-gate #include <sys/cmn_err.h>
390Sstevel@tonic-gate #include <sys/cred.h>
400Sstevel@tonic-gate #include <sys/statvfs.h>
410Sstevel@tonic-gate #include <sys/mount.h>
420Sstevel@tonic-gate #include <sys/debug.h>
430Sstevel@tonic-gate #include <sys/systm.h>
440Sstevel@tonic-gate #include <sys/mntent.h>
450Sstevel@tonic-gate #include <fs/fs_subr.h>
460Sstevel@tonic-gate #include <vm/page.h>
470Sstevel@tonic-gate #include <vm/anon.h>
480Sstevel@tonic-gate #include <sys/model.h>
490Sstevel@tonic-gate #include <sys/policy.h>
500Sstevel@tonic-gate 
510Sstevel@tonic-gate #include <sys/fs/swapnode.h>
520Sstevel@tonic-gate #include <sys/fs/tmp.h>
530Sstevel@tonic-gate #include <sys/fs/tmpnode.h>
540Sstevel@tonic-gate 
550Sstevel@tonic-gate static int tmpfsfstype;
560Sstevel@tonic-gate 
570Sstevel@tonic-gate /*
580Sstevel@tonic-gate  * tmpfs vfs operations.
590Sstevel@tonic-gate  */
600Sstevel@tonic-gate static int tmpfsinit(int, char *);
610Sstevel@tonic-gate static int tmp_mount(struct vfs *, struct vnode *,
620Sstevel@tonic-gate 	struct mounta *, struct cred *);
630Sstevel@tonic-gate static int tmp_unmount(struct vfs *, int, struct cred *);
640Sstevel@tonic-gate static int tmp_root(struct vfs *, struct vnode **);
650Sstevel@tonic-gate static int tmp_statvfs(struct vfs *, struct statvfs64 *);
660Sstevel@tonic-gate static int tmp_vget(struct vfs *, struct vnode **, struct fid *);
670Sstevel@tonic-gate 
680Sstevel@tonic-gate /*
690Sstevel@tonic-gate  * Loadable module wrapper
700Sstevel@tonic-gate  */
710Sstevel@tonic-gate #include <sys/modctl.h>
720Sstevel@tonic-gate 
730Sstevel@tonic-gate static mntopts_t tmpfs_proto_opttbl;
740Sstevel@tonic-gate 
750Sstevel@tonic-gate static vfsdef_t vfw = {
760Sstevel@tonic-gate 	VFSDEF_VERSION,
770Sstevel@tonic-gate 	"tmpfs",
780Sstevel@tonic-gate 	tmpfsinit,
7912633Sjohn.levon@sun.com 	VSW_HASPROTO|VSW_STATS|VSW_ZMOUNT,
800Sstevel@tonic-gate 	&tmpfs_proto_opttbl
810Sstevel@tonic-gate };
820Sstevel@tonic-gate 
830Sstevel@tonic-gate /*
840Sstevel@tonic-gate  * in-kernel mnttab options
850Sstevel@tonic-gate  */
860Sstevel@tonic-gate static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
870Sstevel@tonic-gate static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
880Sstevel@tonic-gate 
890Sstevel@tonic-gate static mntopt_t tmpfs_options[] = {
900Sstevel@tonic-gate 	/* Option name		Cancel Opt	Arg	Flags		Data */
910Sstevel@tonic-gate 	{ MNTOPT_XATTR,		xattr_cancel,	NULL,	MO_DEFAULT,	NULL},
920Sstevel@tonic-gate 	{ MNTOPT_NOXATTR,	noxattr_cancel,	NULL,	NULL,		NULL},
930Sstevel@tonic-gate 	{ "size",		NULL,		"0",	MO_HASVALUE,	NULL}
940Sstevel@tonic-gate };
950Sstevel@tonic-gate 
960Sstevel@tonic-gate 
970Sstevel@tonic-gate static mntopts_t tmpfs_proto_opttbl = {
980Sstevel@tonic-gate 	sizeof (tmpfs_options) / sizeof (mntopt_t),
990Sstevel@tonic-gate 	tmpfs_options
1000Sstevel@tonic-gate };
1010Sstevel@tonic-gate 
1020Sstevel@tonic-gate /*
1030Sstevel@tonic-gate  * Module linkage information
1040Sstevel@tonic-gate  */
1050Sstevel@tonic-gate static struct modlfs modlfs = {
1060Sstevel@tonic-gate 	&mod_fsops, "filesystem for tmpfs", &vfw
1070Sstevel@tonic-gate };
1080Sstevel@tonic-gate 
1090Sstevel@tonic-gate static struct modlinkage modlinkage = {
1100Sstevel@tonic-gate 	MODREV_1, &modlfs, NULL
1110Sstevel@tonic-gate };
1120Sstevel@tonic-gate 
1130Sstevel@tonic-gate int
_init()1140Sstevel@tonic-gate _init()
1150Sstevel@tonic-gate {
1160Sstevel@tonic-gate 	return (mod_install(&modlinkage));
1170Sstevel@tonic-gate }
1180Sstevel@tonic-gate 
1190Sstevel@tonic-gate int
_fini()1200Sstevel@tonic-gate _fini()
1210Sstevel@tonic-gate {
1220Sstevel@tonic-gate 	int error;
1230Sstevel@tonic-gate 
1240Sstevel@tonic-gate 	error = mod_remove(&modlinkage);
1250Sstevel@tonic-gate 	if (error)
1260Sstevel@tonic-gate 		return (error);
1270Sstevel@tonic-gate 	/*
1280Sstevel@tonic-gate 	 * Tear down the operations vectors
1290Sstevel@tonic-gate 	 */
1300Sstevel@tonic-gate 	(void) vfs_freevfsops_by_type(tmpfsfstype);
1310Sstevel@tonic-gate 	vn_freevnodeops(tmp_vnodeops);
1320Sstevel@tonic-gate 	return (0);
1330Sstevel@tonic-gate }
1340Sstevel@tonic-gate 
1350Sstevel@tonic-gate int
_info(struct modinfo * modinfop)1360Sstevel@tonic-gate _info(struct modinfo *modinfop)
1370Sstevel@tonic-gate {
1380Sstevel@tonic-gate 	return (mod_info(&modlinkage, modinfop));
1390Sstevel@tonic-gate }
1400Sstevel@tonic-gate 
1410Sstevel@tonic-gate /*
1420Sstevel@tonic-gate  * The following are patchable variables limiting the amount of system
1430Sstevel@tonic-gate  * resources tmpfs can use.
1440Sstevel@tonic-gate  *
1450Sstevel@tonic-gate  * tmpfs_maxkmem limits the amount of kernel kmem_alloc memory
1460Sstevel@tonic-gate  * tmpfs can use for it's data structures (e.g. tmpnodes, directory entries)
1470Sstevel@tonic-gate  * It is not determined by setting a hard limit but rather as a percentage of
1480Sstevel@tonic-gate  * physical memory which is determined when tmpfs is first used in the system.
1490Sstevel@tonic-gate  *
1500Sstevel@tonic-gate  * tmpfs_minfree is the minimum amount of swap space that tmpfs leaves for
1510Sstevel@tonic-gate  * the rest of the system.  In other words, if the amount of free swap space
1520Sstevel@tonic-gate  * in the system (i.e. anoninfo.ani_free) drops below tmpfs_minfree, tmpfs
1530Sstevel@tonic-gate  * anon allocations will fail.
1540Sstevel@tonic-gate  *
1550Sstevel@tonic-gate  * There is also a per mount limit on the amount of swap space
1560Sstevel@tonic-gate  * (tmount.tm_anonmax) settable via a mount option.
1570Sstevel@tonic-gate  */
1580Sstevel@tonic-gate size_t tmpfs_maxkmem = 0;
1590Sstevel@tonic-gate size_t tmpfs_minfree = 0;
1600Sstevel@tonic-gate size_t tmp_kmemspace;		/* bytes of kernel heap used by all tmpfs */
1610Sstevel@tonic-gate 
1620Sstevel@tonic-gate static major_t tmpfs_major;
1630Sstevel@tonic-gate static minor_t tmpfs_minor;
1640Sstevel@tonic-gate static kmutex_t	tmpfs_minor_lock;
1650Sstevel@tonic-gate 
1660Sstevel@tonic-gate /*
1670Sstevel@tonic-gate  * initialize global tmpfs locks and such
1680Sstevel@tonic-gate  * called when loading tmpfs module
1690Sstevel@tonic-gate  */
1700Sstevel@tonic-gate static int
tmpfsinit(int fstype,char * name)1710Sstevel@tonic-gate tmpfsinit(int fstype, char *name)
1720Sstevel@tonic-gate {
1730Sstevel@tonic-gate 	static const fs_operation_def_t tmp_vfsops_template[] = {
1743898Srsb 		VFSNAME_MOUNT,		{ .vfs_mount = tmp_mount },
1753898Srsb 		VFSNAME_UNMOUNT,	{ .vfs_unmount = tmp_unmount },
1763898Srsb 		VFSNAME_ROOT,		{ .vfs_root = tmp_root },
1773898Srsb 		VFSNAME_STATVFS,	{ .vfs_statvfs = tmp_statvfs },
1783898Srsb 		VFSNAME_VGET,		{ .vfs_vget = tmp_vget },
1793898Srsb 		NULL,			NULL
1800Sstevel@tonic-gate 	};
1810Sstevel@tonic-gate 	int error;
1820Sstevel@tonic-gate 	extern  void    tmpfs_hash_init();
1830Sstevel@tonic-gate 
1840Sstevel@tonic-gate 	tmpfs_hash_init();
1850Sstevel@tonic-gate 	tmpfsfstype = fstype;
1860Sstevel@tonic-gate 	ASSERT(tmpfsfstype != 0);
1870Sstevel@tonic-gate 
1880Sstevel@tonic-gate 	error = vfs_setfsops(fstype, tmp_vfsops_template, NULL);
1890Sstevel@tonic-gate 	if (error != 0) {
1900Sstevel@tonic-gate 		cmn_err(CE_WARN, "tmpfsinit: bad vfs ops template");
1910Sstevel@tonic-gate 		return (error);
1920Sstevel@tonic-gate 	}
1930Sstevel@tonic-gate 
1940Sstevel@tonic-gate 	error = vn_make_ops(name, tmp_vnodeops_template, &tmp_vnodeops);
1950Sstevel@tonic-gate 	if (error != 0) {
1960Sstevel@tonic-gate 		(void) vfs_freevfsops_by_type(fstype);
1970Sstevel@tonic-gate 		cmn_err(CE_WARN, "tmpfsinit: bad vnode ops template");
1980Sstevel@tonic-gate 		return (error);
1990Sstevel@tonic-gate 	}
2000Sstevel@tonic-gate 
2010Sstevel@tonic-gate 	/*
2020Sstevel@tonic-gate 	 * tmpfs_minfree doesn't need to be some function of configured
2030Sstevel@tonic-gate 	 * swap space since it really is an absolute limit of swap space
2040Sstevel@tonic-gate 	 * which still allows other processes to execute.
2050Sstevel@tonic-gate 	 */
2060Sstevel@tonic-gate 	if (tmpfs_minfree == 0) {
2070Sstevel@tonic-gate 		/*
2080Sstevel@tonic-gate 		 * Set if not patched
2090Sstevel@tonic-gate 		 */
2100Sstevel@tonic-gate 		tmpfs_minfree = btopr(TMPMINFREE);
2110Sstevel@tonic-gate 	}
2120Sstevel@tonic-gate 
2130Sstevel@tonic-gate 	/*
2140Sstevel@tonic-gate 	 * The maximum amount of space tmpfs can allocate is
2150Sstevel@tonic-gate 	 * TMPMAXPROCKMEM percent of kernel memory
2160Sstevel@tonic-gate 	 */
2170Sstevel@tonic-gate 	if (tmpfs_maxkmem == 0)
2180Sstevel@tonic-gate 		tmpfs_maxkmem = MAX(PAGESIZE, kmem_maxavail() / TMPMAXFRACKMEM);
2190Sstevel@tonic-gate 
2200Sstevel@tonic-gate 	if ((tmpfs_major = getudev()) == (major_t)-1) {
2210Sstevel@tonic-gate 		cmn_err(CE_WARN, "tmpfsinit: Can't get unique device number.");
2220Sstevel@tonic-gate 		tmpfs_major = 0;
2230Sstevel@tonic-gate 	}
2240Sstevel@tonic-gate 	mutex_init(&tmpfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
2250Sstevel@tonic-gate 	return (0);
2260Sstevel@tonic-gate }
2270Sstevel@tonic-gate 
2280Sstevel@tonic-gate static int
tmp_mount(struct vfs * vfsp,struct vnode * mvp,struct mounta * uap,struct cred * cr)2290Sstevel@tonic-gate tmp_mount(
2300Sstevel@tonic-gate 	struct vfs *vfsp,
2310Sstevel@tonic-gate 	struct vnode *mvp,
2320Sstevel@tonic-gate 	struct mounta *uap,
2330Sstevel@tonic-gate 	struct cred *cr)
2340Sstevel@tonic-gate {
2350Sstevel@tonic-gate 	struct tmount *tm = NULL;
2360Sstevel@tonic-gate 	struct tmpnode *tp;
2370Sstevel@tonic-gate 	struct pathname dpn;
2380Sstevel@tonic-gate 	int error;
2390Sstevel@tonic-gate 	pgcnt_t anonmax;
2400Sstevel@tonic-gate 	struct vattr rattr;
2410Sstevel@tonic-gate 	int got_attrs;
2420Sstevel@tonic-gate 
2430Sstevel@tonic-gate 	char *sizestr;
2440Sstevel@tonic-gate 
2450Sstevel@tonic-gate 	if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
2460Sstevel@tonic-gate 		return (error);
2470Sstevel@tonic-gate 
2480Sstevel@tonic-gate 	if (mvp->v_type != VDIR)
2490Sstevel@tonic-gate 		return (ENOTDIR);
2500Sstevel@tonic-gate 
2510Sstevel@tonic-gate 	mutex_enter(&mvp->v_lock);
2520Sstevel@tonic-gate 	if ((uap->flags & MS_OVERLAY) == 0 &&
2530Sstevel@tonic-gate 	    (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
2540Sstevel@tonic-gate 		mutex_exit(&mvp->v_lock);
2550Sstevel@tonic-gate 		return (EBUSY);
2560Sstevel@tonic-gate 	}
2570Sstevel@tonic-gate 	mutex_exit(&mvp->v_lock);
2580Sstevel@tonic-gate 
2590Sstevel@tonic-gate 	/*
2600Sstevel@tonic-gate 	 * Having the resource be anything but "swap" doesn't make sense.
2610Sstevel@tonic-gate 	 */
262*12894SRobert.Harris@Sun.COM 	vfs_setresource(vfsp, "swap", 0);
2630Sstevel@tonic-gate 
2640Sstevel@tonic-gate 	/*
2650Sstevel@tonic-gate 	 * now look for options we understand...
2660Sstevel@tonic-gate 	 */
2670Sstevel@tonic-gate 
2680Sstevel@tonic-gate 	/* tmpfs doesn't support read-only mounts */
2690Sstevel@tonic-gate 	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
2700Sstevel@tonic-gate 		error = EINVAL;
2710Sstevel@tonic-gate 		goto out;
2720Sstevel@tonic-gate 	}
2730Sstevel@tonic-gate 
2740Sstevel@tonic-gate 	/*
2750Sstevel@tonic-gate 	 * tm_anonmax is set according to the mount arguments
2760Sstevel@tonic-gate 	 * if any.  Otherwise, it is set to a maximum value.
2770Sstevel@tonic-gate 	 */
2780Sstevel@tonic-gate 	if (vfs_optionisset(vfsp, "size", &sizestr)) {
2790Sstevel@tonic-gate 		if ((error = tmp_convnum(sizestr, &anonmax)) != 0)
2800Sstevel@tonic-gate 			goto out;
2810Sstevel@tonic-gate 	} else {
2820Sstevel@tonic-gate 		anonmax = ULONG_MAX;
2830Sstevel@tonic-gate 	}
2840Sstevel@tonic-gate 
2850Sstevel@tonic-gate 	if (error = pn_get(uap->dir,
2860Sstevel@tonic-gate 	    (uap->flags & MS_SYSSPACE) ? UIO_SYSSPACE : UIO_USERSPACE, &dpn))
2870Sstevel@tonic-gate 		goto out;
2880Sstevel@tonic-gate 
2890Sstevel@tonic-gate 	if ((tm = tmp_memalloc(sizeof (struct tmount), 0)) == NULL) {
2900Sstevel@tonic-gate 		pn_free(&dpn);
2910Sstevel@tonic-gate 		error = ENOMEM;
2920Sstevel@tonic-gate 		goto out;
2930Sstevel@tonic-gate 	}
2940Sstevel@tonic-gate 
2950Sstevel@tonic-gate 	/*
2960Sstevel@tonic-gate 	 * find an available minor device number for this mount
2970Sstevel@tonic-gate 	 */
2980Sstevel@tonic-gate 	mutex_enter(&tmpfs_minor_lock);
2990Sstevel@tonic-gate 	do {
3000Sstevel@tonic-gate 		tmpfs_minor = (tmpfs_minor + 1) & L_MAXMIN32;
3010Sstevel@tonic-gate 		tm->tm_dev = makedevice(tmpfs_major, tmpfs_minor);
3020Sstevel@tonic-gate 	} while (vfs_devismounted(tm->tm_dev));
3030Sstevel@tonic-gate 	mutex_exit(&tmpfs_minor_lock);
3040Sstevel@tonic-gate 
3050Sstevel@tonic-gate 	/*
3060Sstevel@tonic-gate 	 * Set but don't bother entering the mutex
3070Sstevel@tonic-gate 	 * (tmount not on mount list yet)
3080Sstevel@tonic-gate 	 */
3090Sstevel@tonic-gate 	mutex_init(&tm->tm_contents, NULL, MUTEX_DEFAULT, NULL);
3100Sstevel@tonic-gate 	mutex_init(&tm->tm_renamelck, NULL, MUTEX_DEFAULT, NULL);
3110Sstevel@tonic-gate 
3120Sstevel@tonic-gate 	tm->tm_vfsp = vfsp;
3130Sstevel@tonic-gate 	tm->tm_anonmax = anonmax;
3140Sstevel@tonic-gate 
3150Sstevel@tonic-gate 	vfsp->vfs_data = (caddr_t)tm;
3160Sstevel@tonic-gate 	vfsp->vfs_fstype = tmpfsfstype;
3170Sstevel@tonic-gate 	vfsp->vfs_dev = tm->tm_dev;
3180Sstevel@tonic-gate 	vfsp->vfs_bsize = PAGESIZE;
3190Sstevel@tonic-gate 	vfsp->vfs_flag |= VFS_NOTRUNC;
3200Sstevel@tonic-gate 	vfs_make_fsid(&vfsp->vfs_fsid, tm->tm_dev, tmpfsfstype);
3210Sstevel@tonic-gate 	tm->tm_mntpath = tmp_memalloc(dpn.pn_pathlen + 1, TMP_MUSTHAVE);
3220Sstevel@tonic-gate 	(void) strcpy(tm->tm_mntpath, dpn.pn_path);
3230Sstevel@tonic-gate 
3240Sstevel@tonic-gate 	/*
3250Sstevel@tonic-gate 	 * allocate and initialize root tmpnode structure
3260Sstevel@tonic-gate 	 */
3270Sstevel@tonic-gate 	bzero(&rattr, sizeof (struct vattr));
3280Sstevel@tonic-gate 	rattr.va_mode = (mode_t)(S_IFDIR | 0777);	/* XXX modes */
3290Sstevel@tonic-gate 	rattr.va_type = VDIR;
3300Sstevel@tonic-gate 	rattr.va_rdev = 0;
3310Sstevel@tonic-gate 	tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE);
3320Sstevel@tonic-gate 	tmpnode_init(tm, tp, &rattr, cr);
3330Sstevel@tonic-gate 
3340Sstevel@tonic-gate 	/*
3350Sstevel@tonic-gate 	 * Get the mode, uid, and gid from the underlying mount point.
3360Sstevel@tonic-gate 	 */
3370Sstevel@tonic-gate 	rattr.va_mask = AT_MODE|AT_UID|AT_GID;	/* Hint to getattr */
3385331Samw 	got_attrs = VOP_GETATTR(mvp, &rattr, 0, cr, NULL);
3390Sstevel@tonic-gate 
3400Sstevel@tonic-gate 	rw_enter(&tp->tn_rwlock, RW_WRITER);
3410Sstevel@tonic-gate 	TNTOV(tp)->v_flag |= VROOT;
3420Sstevel@tonic-gate 
3430Sstevel@tonic-gate 	/*
3440Sstevel@tonic-gate 	 * If the getattr succeeded, use its results.  Otherwise allow
3450Sstevel@tonic-gate 	 * the previously set hardwired defaults to prevail.
3460Sstevel@tonic-gate 	 */
3470Sstevel@tonic-gate 	if (got_attrs == 0) {
3480Sstevel@tonic-gate 		tp->tn_mode = rattr.va_mode;
3490Sstevel@tonic-gate 		tp->tn_uid = rattr.va_uid;
3500Sstevel@tonic-gate 		tp->tn_gid = rattr.va_gid;
3510Sstevel@tonic-gate 	}
3520Sstevel@tonic-gate 
3530Sstevel@tonic-gate 	/*
3540Sstevel@tonic-gate 	 * initialize linked list of tmpnodes so that the back pointer of
3550Sstevel@tonic-gate 	 * the root tmpnode always points to the last one on the list
3560Sstevel@tonic-gate 	 * and the forward pointer of the last node is null
3570Sstevel@tonic-gate 	 */
3580Sstevel@tonic-gate 	tp->tn_back = tp;
3590Sstevel@tonic-gate 	tp->tn_forw = NULL;
3600Sstevel@tonic-gate 	tp->tn_nlink = 0;
3610Sstevel@tonic-gate 	tm->tm_rootnode = tp;
3620Sstevel@tonic-gate 
3630Sstevel@tonic-gate 	tdirinit(tp, tp);
3640Sstevel@tonic-gate 
3650Sstevel@tonic-gate 	rw_exit(&tp->tn_rwlock);
3660Sstevel@tonic-gate 
3670Sstevel@tonic-gate 	pn_free(&dpn);
3680Sstevel@tonic-gate 	error = 0;
3690Sstevel@tonic-gate 
3700Sstevel@tonic-gate out:
3715331Samw 	if (error == 0)
3727757SJanice.Chang@Sun.COM 		vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS);
3735331Samw 
3740Sstevel@tonic-gate 	return (error);
3750Sstevel@tonic-gate }
3760Sstevel@tonic-gate 
3770Sstevel@tonic-gate static int
tmp_unmount(struct vfs * vfsp,int flag,struct cred * cr)3780Sstevel@tonic-gate tmp_unmount(struct vfs *vfsp, int flag, struct cred *cr)
3790Sstevel@tonic-gate {
3800Sstevel@tonic-gate 	struct tmount *tm = (struct tmount *)VFSTOTM(vfsp);
3810Sstevel@tonic-gate 	struct tmpnode *tnp, *cancel;
3820Sstevel@tonic-gate 	struct vnode	*vp;
3830Sstevel@tonic-gate 	int error;
3840Sstevel@tonic-gate 
3850Sstevel@tonic-gate 	if ((error = secpolicy_fs_unmount(cr, vfsp)) != 0)
3860Sstevel@tonic-gate 		return (error);
3870Sstevel@tonic-gate 
3880Sstevel@tonic-gate 	/*
3890Sstevel@tonic-gate 	 * forced unmount is not supported by this file system
3900Sstevel@tonic-gate 	 * and thus, ENOTSUP, is being returned.
3910Sstevel@tonic-gate 	 */
3920Sstevel@tonic-gate 	if (flag & MS_FORCE)
3930Sstevel@tonic-gate 		return (ENOTSUP);
3940Sstevel@tonic-gate 
3950Sstevel@tonic-gate 	mutex_enter(&tm->tm_contents);
3960Sstevel@tonic-gate 
3970Sstevel@tonic-gate 	/*
3980Sstevel@tonic-gate 	 * If there are no open files, only the root node should have
3990Sstevel@tonic-gate 	 * a reference count.
4000Sstevel@tonic-gate 	 * With tm_contents held, nothing can be added or removed.
4010Sstevel@tonic-gate 	 * There may be some dirty pages.  To prevent fsflush from
4020Sstevel@tonic-gate 	 * disrupting the unmount, put a hold on each node while scanning.
4030Sstevel@tonic-gate 	 * If we find a previously referenced node, undo the holds we have
4040Sstevel@tonic-gate 	 * placed and fail EBUSY.
4050Sstevel@tonic-gate 	 */
4060Sstevel@tonic-gate 	tnp = tm->tm_rootnode;
4070Sstevel@tonic-gate 	if (TNTOV(tnp)->v_count > 1) {
4080Sstevel@tonic-gate 		mutex_exit(&tm->tm_contents);
4090Sstevel@tonic-gate 		return (EBUSY);
4100Sstevel@tonic-gate 	}
4110Sstevel@tonic-gate 
4120Sstevel@tonic-gate 	for (tnp = tnp->tn_forw; tnp; tnp = tnp->tn_forw) {
4130Sstevel@tonic-gate 		if ((vp = TNTOV(tnp))->v_count > 0) {
4140Sstevel@tonic-gate 			cancel = tm->tm_rootnode->tn_forw;
4150Sstevel@tonic-gate 			while (cancel != tnp) {
4160Sstevel@tonic-gate 				vp = TNTOV(cancel);
4170Sstevel@tonic-gate 				ASSERT(vp->v_count > 0);
4180Sstevel@tonic-gate 				VN_RELE(vp);
4190Sstevel@tonic-gate 				cancel = cancel->tn_forw;
4200Sstevel@tonic-gate 			}
4210Sstevel@tonic-gate 			mutex_exit(&tm->tm_contents);
4220Sstevel@tonic-gate 			return (EBUSY);
4230Sstevel@tonic-gate 		}
4240Sstevel@tonic-gate 		VN_HOLD(vp);
4250Sstevel@tonic-gate 	}
4260Sstevel@tonic-gate 
4270Sstevel@tonic-gate 	/*
4280Sstevel@tonic-gate 	 * We can drop the mutex now because no one can find this mount
4290Sstevel@tonic-gate 	 */
4300Sstevel@tonic-gate 	mutex_exit(&tm->tm_contents);
4310Sstevel@tonic-gate 
4320Sstevel@tonic-gate 	/*
4330Sstevel@tonic-gate 	 * Free all kmemalloc'd and anonalloc'd memory associated with
4340Sstevel@tonic-gate 	 * this filesystem.  To do this, we go through the file list twice,
4350Sstevel@tonic-gate 	 * once to remove all the directory entries, and then to remove
4360Sstevel@tonic-gate 	 * all the files.  We do this because there is useful code in
4370Sstevel@tonic-gate 	 * tmpnode_free which assumes that the directory entry has been
4380Sstevel@tonic-gate 	 * removed before the file.
4390Sstevel@tonic-gate 	 */
4400Sstevel@tonic-gate 	/*
4410Sstevel@tonic-gate 	 * Remove all directory entries
4420Sstevel@tonic-gate 	 */
4430Sstevel@tonic-gate 	for (tnp = tm->tm_rootnode; tnp; tnp = tnp->tn_forw) {
4440Sstevel@tonic-gate 		rw_enter(&tnp->tn_rwlock, RW_WRITER);
4450Sstevel@tonic-gate 		if (tnp->tn_type == VDIR)
4460Sstevel@tonic-gate 			tdirtrunc(tnp);
4470Sstevel@tonic-gate 		if (tnp->tn_vnode->v_flag & V_XATTRDIR) {
4480Sstevel@tonic-gate 			/*
4490Sstevel@tonic-gate 			 * Account for implicit attrdir reference.
4500Sstevel@tonic-gate 			 */
4510Sstevel@tonic-gate 			ASSERT(tnp->tn_nlink > 0);
4520Sstevel@tonic-gate 			DECR_COUNT(&tnp->tn_nlink, &tnp->tn_tlock);
4530Sstevel@tonic-gate 		}
4540Sstevel@tonic-gate 		rw_exit(&tnp->tn_rwlock);
4550Sstevel@tonic-gate 	}
4560Sstevel@tonic-gate 
4570Sstevel@tonic-gate 	ASSERT(tm->tm_rootnode);
4580Sstevel@tonic-gate 
4590Sstevel@tonic-gate 	/*
4600Sstevel@tonic-gate 	 * All links are gone, v_count is keeping nodes in place.
4610Sstevel@tonic-gate 	 * VN_RELE should make the node disappear, unless somebody
4620Sstevel@tonic-gate 	 * is holding pages against it.  Nap and retry until it disappears.
4630Sstevel@tonic-gate 	 *
4640Sstevel@tonic-gate 	 * We re-acquire the lock to prevent others who have a HOLD on
4650Sstevel@tonic-gate 	 * a tmpnode via its pages or anon slots from blowing it away
4660Sstevel@tonic-gate 	 * (in tmp_inactive) while we're trying to get to it here. Once
4670Sstevel@tonic-gate 	 * we have a HOLD on it we know it'll stick around.
4680Sstevel@tonic-gate 	 *
4690Sstevel@tonic-gate 	 */
4700Sstevel@tonic-gate 	mutex_enter(&tm->tm_contents);
4710Sstevel@tonic-gate 	/*
4720Sstevel@tonic-gate 	 * Remove all the files (except the rootnode) backwards.
4730Sstevel@tonic-gate 	 */
4740Sstevel@tonic-gate 	while ((tnp = tm->tm_rootnode->tn_back) != tm->tm_rootnode) {
4750Sstevel@tonic-gate 		mutex_exit(&tm->tm_contents);
4760Sstevel@tonic-gate 		/*
4770Sstevel@tonic-gate 		 * Inhibit tmp_inactive from touching attribute directory
4780Sstevel@tonic-gate 		 * as all nodes will be released here.
4790Sstevel@tonic-gate 		 * Note we handled the link count in pass 2 above.
4800Sstevel@tonic-gate 		 */
4810Sstevel@tonic-gate 		rw_enter(&tnp->tn_rwlock, RW_WRITER);
4820Sstevel@tonic-gate 		tnp->tn_xattrdp = NULL;
4830Sstevel@tonic-gate 		rw_exit(&tnp->tn_rwlock);
4840Sstevel@tonic-gate 		vp = TNTOV(tnp);
4850Sstevel@tonic-gate 		VN_RELE(vp);
4860Sstevel@tonic-gate 		mutex_enter(&tm->tm_contents);
4870Sstevel@tonic-gate 		/*
4880Sstevel@tonic-gate 		 * It's still there after the RELE. Someone else like pageout
4890Sstevel@tonic-gate 		 * has a hold on it so wait a bit and then try again - we know
4900Sstevel@tonic-gate 		 * they'll give it up soon.
4910Sstevel@tonic-gate 		 */
4920Sstevel@tonic-gate 		if (tnp == tm->tm_rootnode->tn_back) {
4930Sstevel@tonic-gate 			VN_HOLD(vp);
4940Sstevel@tonic-gate 			mutex_exit(&tm->tm_contents);
4950Sstevel@tonic-gate 			delay(hz / 4);
4960Sstevel@tonic-gate 			mutex_enter(&tm->tm_contents);
4970Sstevel@tonic-gate 		}
4980Sstevel@tonic-gate 	}
4990Sstevel@tonic-gate 	mutex_exit(&tm->tm_contents);
5000Sstevel@tonic-gate 
5010Sstevel@tonic-gate 	tm->tm_rootnode->tn_xattrdp = NULL;
5020Sstevel@tonic-gate 	VN_RELE(TNTOV(tm->tm_rootnode));
5030Sstevel@tonic-gate 
5040Sstevel@tonic-gate 	ASSERT(tm->tm_mntpath);
5050Sstevel@tonic-gate 
5060Sstevel@tonic-gate 	tmp_memfree(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1);
5070Sstevel@tonic-gate 
5080Sstevel@tonic-gate 	ASSERT(tm->tm_anonmem == 0);
5090Sstevel@tonic-gate 
5100Sstevel@tonic-gate 	mutex_destroy(&tm->tm_contents);
5110Sstevel@tonic-gate 	mutex_destroy(&tm->tm_renamelck);
5120Sstevel@tonic-gate 	tmp_memfree(tm, sizeof (struct tmount));
5130Sstevel@tonic-gate 
5140Sstevel@tonic-gate 	return (0);
5150Sstevel@tonic-gate }
5160Sstevel@tonic-gate 
5170Sstevel@tonic-gate /*
5180Sstevel@tonic-gate  * return root tmpnode for given vnode
5190Sstevel@tonic-gate  */
5200Sstevel@tonic-gate static int
tmp_root(struct vfs * vfsp,struct vnode ** vpp)5210Sstevel@tonic-gate tmp_root(struct vfs *vfsp, struct vnode **vpp)
5220Sstevel@tonic-gate {
5230Sstevel@tonic-gate 	struct tmount *tm = (struct tmount *)VFSTOTM(vfsp);
5240Sstevel@tonic-gate 	struct tmpnode *tp = tm->tm_rootnode;
5250Sstevel@tonic-gate 	struct vnode *vp;
5260Sstevel@tonic-gate 
5270Sstevel@tonic-gate 	ASSERT(tp);
5280Sstevel@tonic-gate 
5290Sstevel@tonic-gate 	vp = TNTOV(tp);
5300Sstevel@tonic-gate 	VN_HOLD(vp);
5310Sstevel@tonic-gate 	*vpp = vp;
5320Sstevel@tonic-gate 	return (0);
5330Sstevel@tonic-gate }
5340Sstevel@tonic-gate 
5350Sstevel@tonic-gate static int
tmp_statvfs(struct vfs * vfsp,struct statvfs64 * sbp)5360Sstevel@tonic-gate tmp_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
5370Sstevel@tonic-gate {
5380Sstevel@tonic-gate 	struct tmount	*tm = (struct tmount *)VFSTOTM(vfsp);
5390Sstevel@tonic-gate 	ulong_t	blocks;
5400Sstevel@tonic-gate 	dev32_t d32;
5417920Sgerald.jelinek@sun.com 	zoneid_t eff_zid;
5427920Sgerald.jelinek@sun.com 	struct zone *zp;
5437920Sgerald.jelinek@sun.com 
5447920Sgerald.jelinek@sun.com 	/*
5457920Sgerald.jelinek@sun.com 	 * The file system may have been mounted by the global zone on
5467920Sgerald.jelinek@sun.com 	 * behalf of the non-global zone.  In that case, the tmount zone_id
5477920Sgerald.jelinek@sun.com 	 * will be the global zone.  We still want to show the swap cap inside
5487920Sgerald.jelinek@sun.com 	 * the zone in this case, even though the file system was mounted by
5497920Sgerald.jelinek@sun.com 	 * the global zone.
5507920Sgerald.jelinek@sun.com 	 */
5517920Sgerald.jelinek@sun.com 	if (curproc->p_zone->zone_id != GLOBAL_ZONEUNIQID)
5527920Sgerald.jelinek@sun.com 		zp = curproc->p_zone;
5537920Sgerald.jelinek@sun.com 	else
5547920Sgerald.jelinek@sun.com 		zp = tm->tm_vfsp->vfs_zone;
5557920Sgerald.jelinek@sun.com 
5567920Sgerald.jelinek@sun.com 	if (zp == NULL)
5577920Sgerald.jelinek@sun.com 		eff_zid = GLOBAL_ZONEUNIQID;
5587920Sgerald.jelinek@sun.com 	else
5597920Sgerald.jelinek@sun.com 		eff_zid = zp->zone_id;
5600Sstevel@tonic-gate 
5610Sstevel@tonic-gate 	sbp->f_bsize = PAGESIZE;
5620Sstevel@tonic-gate 	sbp->f_frsize = PAGESIZE;
5630Sstevel@tonic-gate 
5640Sstevel@tonic-gate 	/*
5650Sstevel@tonic-gate 	 * Find the amount of available physical and memory swap
5660Sstevel@tonic-gate 	 */
5670Sstevel@tonic-gate 	mutex_enter(&anoninfo_lock);
5680Sstevel@tonic-gate 	ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv);
5690Sstevel@tonic-gate 	blocks = (ulong_t)CURRENT_TOTAL_AVAILABLE_SWAP;
5700Sstevel@tonic-gate 	mutex_exit(&anoninfo_lock);
5710Sstevel@tonic-gate 
5720Sstevel@tonic-gate 	/*
5730Sstevel@tonic-gate 	 * If tm_anonmax for this mount is less than the available swap space
5740Sstevel@tonic-gate 	 * (minus the amount tmpfs can't use), use that instead
5750Sstevel@tonic-gate 	 */
5760Sstevel@tonic-gate 	if (blocks > tmpfs_minfree)
5770Sstevel@tonic-gate 		sbp->f_bfree = MIN(blocks - tmpfs_minfree,
5780Sstevel@tonic-gate 		    tm->tm_anonmax - tm->tm_anonmem);
5790Sstevel@tonic-gate 	else
5800Sstevel@tonic-gate 		sbp->f_bfree = 0;
5810Sstevel@tonic-gate 
5820Sstevel@tonic-gate 	sbp->f_bavail = sbp->f_bfree;
5830Sstevel@tonic-gate 
5840Sstevel@tonic-gate 	/*
5850Sstevel@tonic-gate 	 * Total number of blocks is what's available plus what's been used
5860Sstevel@tonic-gate 	 */
5870Sstevel@tonic-gate 	sbp->f_blocks = (fsblkcnt64_t)(sbp->f_bfree + tm->tm_anonmem);
5880Sstevel@tonic-gate 
5897920Sgerald.jelinek@sun.com 	if (eff_zid != GLOBAL_ZONEUNIQID &&
5907920Sgerald.jelinek@sun.com 	    zp->zone_max_swap_ctl != UINT64_MAX) {
5917920Sgerald.jelinek@sun.com 		/*
5927920Sgerald.jelinek@sun.com 		 * If the fs is used by a non-global zone with a swap cap,
5937920Sgerald.jelinek@sun.com 		 * then report the capped size.
5947920Sgerald.jelinek@sun.com 		 */
5957920Sgerald.jelinek@sun.com 		rctl_qty_t cap, used;
5967920Sgerald.jelinek@sun.com 		pgcnt_t pgcap, pgused;
5977920Sgerald.jelinek@sun.com 
5987920Sgerald.jelinek@sun.com 		mutex_enter(&zp->zone_mem_lock);
5997920Sgerald.jelinek@sun.com 		cap = zp->zone_max_swap_ctl;
6007920Sgerald.jelinek@sun.com 		used = zp->zone_max_swap;
6017920Sgerald.jelinek@sun.com 		mutex_exit(&zp->zone_mem_lock);
6027920Sgerald.jelinek@sun.com 
6037920Sgerald.jelinek@sun.com 		pgcap = btop(cap);
6047920Sgerald.jelinek@sun.com 		pgused = btop(used);
6057920Sgerald.jelinek@sun.com 
6067920Sgerald.jelinek@sun.com 		sbp->f_bfree = MIN(pgcap - pgused, sbp->f_bfree);
6077920Sgerald.jelinek@sun.com 		sbp->f_bavail = sbp->f_bfree;
6087920Sgerald.jelinek@sun.com 		sbp->f_blocks = MIN(pgcap, sbp->f_blocks);
6097920Sgerald.jelinek@sun.com 	}
6107920Sgerald.jelinek@sun.com 
6110Sstevel@tonic-gate 	/*
6120Sstevel@tonic-gate 	 * The maximum number of files available is approximately the number
6130Sstevel@tonic-gate 	 * of tmpnodes we can allocate from the remaining kernel memory
6140Sstevel@tonic-gate 	 * available to tmpfs.  This is fairly inaccurate since it doesn't
6150Sstevel@tonic-gate 	 * take into account the names stored in the directory entries.
6160Sstevel@tonic-gate 	 */
6170Sstevel@tonic-gate 	if (tmpfs_maxkmem > tmp_kmemspace)
6180Sstevel@tonic-gate 		sbp->f_ffree = (tmpfs_maxkmem - tmp_kmemspace) /
6190Sstevel@tonic-gate 		    (sizeof (struct tmpnode) + sizeof (struct tdirent));
6200Sstevel@tonic-gate 	else
6210Sstevel@tonic-gate 		sbp->f_ffree = 0;
6220Sstevel@tonic-gate 
6230Sstevel@tonic-gate 	sbp->f_files = tmpfs_maxkmem /
6240Sstevel@tonic-gate 	    (sizeof (struct tmpnode) + sizeof (struct tdirent));
6250Sstevel@tonic-gate 	sbp->f_favail = (fsfilcnt64_t)(sbp->f_ffree);
6260Sstevel@tonic-gate 	(void) cmpldev(&d32, vfsp->vfs_dev);
6270Sstevel@tonic-gate 	sbp->f_fsid = d32;
6280Sstevel@tonic-gate 	(void) strcpy(sbp->f_basetype, vfssw[tmpfsfstype].vsw_name);
6290Sstevel@tonic-gate 	(void) strncpy(sbp->f_fstr, tm->tm_mntpath, sizeof (sbp->f_fstr));
6300Sstevel@tonic-gate 	/*
6310Sstevel@tonic-gate 	 * ensure null termination
6320Sstevel@tonic-gate 	 */
6330Sstevel@tonic-gate 	sbp->f_fstr[sizeof (sbp->f_fstr) - 1] = '\0';
6340Sstevel@tonic-gate 	sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
6350Sstevel@tonic-gate 	sbp->f_namemax = MAXNAMELEN - 1;
6360Sstevel@tonic-gate 	return (0);
6370Sstevel@tonic-gate }
6380Sstevel@tonic-gate 
6390Sstevel@tonic-gate static int
tmp_vget(struct vfs * vfsp,struct vnode ** vpp,struct fid * fidp)6400Sstevel@tonic-gate tmp_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
6410Sstevel@tonic-gate {
6420Sstevel@tonic-gate 	struct tfid *tfid;
6430Sstevel@tonic-gate 	struct tmount *tm = (struct tmount *)VFSTOTM(vfsp);
6440Sstevel@tonic-gate 	struct tmpnode *tp = NULL;
6450Sstevel@tonic-gate 
6460Sstevel@tonic-gate 	tfid = (struct tfid *)fidp;
6470Sstevel@tonic-gate 	*vpp = NULL;
6480Sstevel@tonic-gate 
6490Sstevel@tonic-gate 	mutex_enter(&tm->tm_contents);
6500Sstevel@tonic-gate 	for (tp = tm->tm_rootnode; tp; tp = tp->tn_forw) {
6510Sstevel@tonic-gate 		mutex_enter(&tp->tn_tlock);
6520Sstevel@tonic-gate 		if (tp->tn_nodeid == tfid->tfid_ino) {
6530Sstevel@tonic-gate 			/*
6540Sstevel@tonic-gate 			 * If the gen numbers don't match we know the
6550Sstevel@tonic-gate 			 * file won't be found since only one tmpnode
6560Sstevel@tonic-gate 			 * can have this number at a time.
6570Sstevel@tonic-gate 			 */
6580Sstevel@tonic-gate 			if (tp->tn_gen != tfid->tfid_gen || tp->tn_nlink == 0) {
6590Sstevel@tonic-gate 				mutex_exit(&tp->tn_tlock);
6600Sstevel@tonic-gate 				mutex_exit(&tm->tm_contents);
6610Sstevel@tonic-gate 				return (0);
6620Sstevel@tonic-gate 			}
6630Sstevel@tonic-gate 			*vpp = (struct vnode *)TNTOV(tp);
6640Sstevel@tonic-gate 
6650Sstevel@tonic-gate 			VN_HOLD(*vpp);
6660Sstevel@tonic-gate 
6670Sstevel@tonic-gate 			if ((tp->tn_mode & S_ISVTX) &&
6680Sstevel@tonic-gate 			    !(tp->tn_mode & (S_IXUSR | S_IFDIR))) {
6690Sstevel@tonic-gate 				mutex_enter(&(*vpp)->v_lock);
6700Sstevel@tonic-gate 				(*vpp)->v_flag |= VISSWAP;
6710Sstevel@tonic-gate 				mutex_exit(&(*vpp)->v_lock);
6720Sstevel@tonic-gate 			}
6730Sstevel@tonic-gate 			mutex_exit(&tp->tn_tlock);
6740Sstevel@tonic-gate 			mutex_exit(&tm->tm_contents);
6750Sstevel@tonic-gate 			return (0);
6760Sstevel@tonic-gate 		}
6770Sstevel@tonic-gate 		mutex_exit(&tp->tn_tlock);
6780Sstevel@tonic-gate 	}
6790Sstevel@tonic-gate 	mutex_exit(&tm->tm_contents);
6800Sstevel@tonic-gate 	return (0);
6810Sstevel@tonic-gate }
682