xref: /onnv-gate/usr/src/uts/common/fs/mntfs/mntvnops.c (revision 13096:b02331b7b26d)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
53898Srsb  * Common Development and Distribution License (the "License").
63898Srsb  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
2212999Slori.alt@oracle.com  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate  */
240Sstevel@tonic-gate 
250Sstevel@tonic-gate #include <sys/file.h>
260Sstevel@tonic-gate #include <sys/stat.h>
270Sstevel@tonic-gate #include <sys/atomic.h>
280Sstevel@tonic-gate #include <sys/mntio.h>
290Sstevel@tonic-gate #include <sys/mnttab.h>
300Sstevel@tonic-gate #include <sys/mount.h>
310Sstevel@tonic-gate #include <sys/sunddi.h>
320Sstevel@tonic-gate #include <sys/sysmacros.h>
330Sstevel@tonic-gate #include <sys/systm.h>
340Sstevel@tonic-gate #include <sys/vfs.h>
353898Srsb #include <sys/vfs_opreg.h>
360Sstevel@tonic-gate #include <sys/fs/mntdata.h>
370Sstevel@tonic-gate #include <fs/fs_subr.h>
380Sstevel@tonic-gate #include <sys/vmsystm.h>
390Sstevel@tonic-gate #include <vm/seg_vn.h>
4010910SRobert.Harris@Sun.COM #include <sys/time.h>
4110910SRobert.Harris@Sun.COM #include <sys/ksynch.h>
4210910SRobert.Harris@Sun.COM #include <sys/sdt.h>
430Sstevel@tonic-gate 
440Sstevel@tonic-gate #define	MNTROOTINO	2
450Sstevel@tonic-gate 
460Sstevel@tonic-gate static mntnode_t *mntgetnode(vnode_t *);
470Sstevel@tonic-gate 
480Sstevel@tonic-gate vnodeops_t *mntvnodeops;
494863Spraks extern void vfs_mnttab_readop(void);
500Sstevel@tonic-gate 
510Sstevel@tonic-gate /*
520Sstevel@tonic-gate  * Design of kernel mnttab accounting.
530Sstevel@tonic-gate  *
5410910SRobert.Harris@Sun.COM  * mntfs provides two methods of reading the in-kernel mnttab, i.e. the state of
5510910SRobert.Harris@Sun.COM  * the mounted resources: the read-only file /etc/mnttab, and a collection of
5610910SRobert.Harris@Sun.COM  * ioctl() commands. Most of these interfaces are public and are described in
5710910SRobert.Harris@Sun.COM  * mnttab(4). Three private ioctl() commands, MNTIOC_GETMNTENT,
5810910SRobert.Harris@Sun.COM  * MNTIOC_GETEXTMNTENT and MNTIOC_GETMNTANY, provide for the getmntent(3C)
5910910SRobert.Harris@Sun.COM  * family of functions, allowing them to support white space in mount names.
600Sstevel@tonic-gate  *
6110910SRobert.Harris@Sun.COM  * A significant feature of mntfs is that it provides a file descriptor with a
6210910SRobert.Harris@Sun.COM  * snapshot once it begins to consume mnttab data. Thus, as the process
6310910SRobert.Harris@Sun.COM  * continues to consume data, its view of the in-kernel mnttab does not change
6410910SRobert.Harris@Sun.COM  * even if resources are mounted or unmounted. The intent is to ensure that
6510910SRobert.Harris@Sun.COM  * processes are guaranteed to read self-consistent data even as the system
6610910SRobert.Harris@Sun.COM  * changes.
6710910SRobert.Harris@Sun.COM  *
6810910SRobert.Harris@Sun.COM  * The snapshot is implemented by a "database", unique to each zone, that
6910910SRobert.Harris@Sun.COM  * comprises a linked list of mntelem_ts. The database is identified by
7010910SRobert.Harris@Sun.COM  * zone_mntfs_db and is protected by zone_mntfs_db_lock. Each element contains
7110910SRobert.Harris@Sun.COM  * the text entry in /etc/mnttab for a mounted resource, i.e. a vfs_t, and is
7210910SRobert.Harris@Sun.COM  * marked with its time of "birth", i.e. creation. An element is "killed", and
7310910SRobert.Harris@Sun.COM  * marked with its time of death, when it is found to be out of date, e.g. when
7410910SRobert.Harris@Sun.COM  * the corresponding resource has been unmounted.
750Sstevel@tonic-gate  *
7610910SRobert.Harris@Sun.COM  * When a process performs the first read() or ioctl() for a file descriptor for
7710910SRobert.Harris@Sun.COM  * /etc/mnttab, the database is updated by a call to mntfs_snapshot() to ensure
7810910SRobert.Harris@Sun.COM  * that an element exists for each currently mounted resource. Following this,
7910910SRobert.Harris@Sun.COM  * the current time is written into a snapshot structure, a mntsnap_t, embedded
8010910SRobert.Harris@Sun.COM  * in the descriptor's mntnode_t.
8110910SRobert.Harris@Sun.COM  *
8210910SRobert.Harris@Sun.COM  * mntfs is able to enumerate the /etc/mnttab entries corresponding to a
8310910SRobert.Harris@Sun.COM  * particular file descriptor by searching the database for entries that were
8410910SRobert.Harris@Sun.COM  * born before the appropriate snapshot and that either are still alive or died
8510910SRobert.Harris@Sun.COM  * after the snapshot was created. Consumers use the iterator function
8610910SRobert.Harris@Sun.COM  * mntfs_get_next_elem() to identify the next suitable element in the database.
8710910SRobert.Harris@Sun.COM  *
8810910SRobert.Harris@Sun.COM  * Each snapshot has a hold on its corresponding database elements, effected by
8910910SRobert.Harris@Sun.COM  * a per-element reference count. At last close(), a snapshot is destroyed in
9010910SRobert.Harris@Sun.COM  * mntfs_freesnap() by releasing all of its holds; an element is destroyed if
9110910SRobert.Harris@Sun.COM  * its reference count becomes zero. Therefore the database never exists unless
9210910SRobert.Harris@Sun.COM  * there is at least one active consumer of /etc/mnttab.
9310910SRobert.Harris@Sun.COM  *
9410910SRobert.Harris@Sun.COM  * getmntent(3C) et al. "do not open, close or rewind the file." This implies
9510910SRobert.Harris@Sun.COM  * that getmntent() and read() must be able to operate without interaction on
9610910SRobert.Harris@Sun.COM  * the same file descriptor; this is accomplished by the use of separate
9710910SRobert.Harris@Sun.COM  * mntsnap_ts for both read() and ioctl().
9810910SRobert.Harris@Sun.COM  *
9911757SRobert.Harris@Sun.COM  * mntfs observes the following lock-ordering:
10011757SRobert.Harris@Sun.COM  *
10111757SRobert.Harris@Sun.COM  *	mnp->mnt_contents -> vfslist -> zonep->zone_mntfs_db_lock
10211757SRobert.Harris@Sun.COM  *
1030Sstevel@tonic-gate  * NOTE: The following variable enables the generation of the "dev=xxx"
1040Sstevel@tonic-gate  * in the option string for a mounted file system.  Really this should
1050Sstevel@tonic-gate  * be gotten rid of altogether, but for the sake of backwards compatibility
1060Sstevel@tonic-gate  * we had to leave it in.  It is defined as a 32-bit device number.  This
1070Sstevel@tonic-gate  * means that when 64-bit device numbers are in use, if either the major or
1080Sstevel@tonic-gate  * minor part of the device number will not fit in a 16 bit quantity, the
1090Sstevel@tonic-gate  * "dev=" will be set to NODEV (0x7fffffff).  See PSARC 1999/566 and
1100Sstevel@tonic-gate  * 1999/131 for details.  The cmpldev() function used to generate the 32-bit
1110Sstevel@tonic-gate  * device number handles this check and assigns the proper value.
1120Sstevel@tonic-gate  */
1130Sstevel@tonic-gate int mntfs_enabledev = 1;	/* enable old "dev=xxx" option */
1140Sstevel@tonic-gate 
11510910SRobert.Harris@Sun.COM extern void vfs_mono_time(timespec_t *);
11610910SRobert.Harris@Sun.COM enum { MNTFS_FIRST, MNTFS_SECOND, MNTFS_NEITHER };
11710910SRobert.Harris@Sun.COM 
11810910SRobert.Harris@Sun.COM /*
11910910SRobert.Harris@Sun.COM  * Determine whether a field within a line from /etc/mnttab contains actual
12010910SRobert.Harris@Sun.COM  * content or simply the marker string "-". This never applies to the time,
12110910SRobert.Harris@Sun.COM  * therefore the delimiter must be a tab.
12210910SRobert.Harris@Sun.COM  */
12310910SRobert.Harris@Sun.COM #define	MNTFS_REAL_FIELD(x)	(*(x) != '-' || *((x) + 1) != '\t')
12410910SRobert.Harris@Sun.COM 
1250Sstevel@tonic-gate static int
mntfs_devsize(struct vfs * vfsp)1260Sstevel@tonic-gate mntfs_devsize(struct vfs *vfsp)
1270Sstevel@tonic-gate {
1280Sstevel@tonic-gate 	dev32_t odev;
1290Sstevel@tonic-gate 
1300Sstevel@tonic-gate 	(void) cmpldev(&odev, vfsp->vfs_dev);
1310Sstevel@tonic-gate 	return (snprintf(NULL, 0, "dev=%x", odev));
1320Sstevel@tonic-gate }
1330Sstevel@tonic-gate 
1340Sstevel@tonic-gate static int
mntfs_devprint(struct vfs * vfsp,char * buf)1350Sstevel@tonic-gate mntfs_devprint(struct vfs *vfsp, char *buf)
1360Sstevel@tonic-gate {
1370Sstevel@tonic-gate 	dev32_t odev;
1380Sstevel@tonic-gate 
1390Sstevel@tonic-gate 	(void) cmpldev(&odev, vfsp->vfs_dev);
1400Sstevel@tonic-gate 	return (snprintf(buf, MAX_MNTOPT_STR, "dev=%x", odev));
1410Sstevel@tonic-gate }
1420Sstevel@tonic-gate 
14310910SRobert.Harris@Sun.COM /* Identify which, if either, of two supplied timespec structs is newer. */
14410910SRobert.Harris@Sun.COM static int
mntfs_newest(timespec_t * a,timespec_t * b)14510910SRobert.Harris@Sun.COM mntfs_newest(timespec_t *a, timespec_t *b)
14610910SRobert.Harris@Sun.COM {
14710910SRobert.Harris@Sun.COM 	if (a->tv_sec == b->tv_sec &&
14810910SRobert.Harris@Sun.COM 	    a->tv_nsec == b->tv_nsec) {
14910910SRobert.Harris@Sun.COM 		return (MNTFS_NEITHER);
15010910SRobert.Harris@Sun.COM 	} else if (b->tv_sec > a->tv_sec ||
15110910SRobert.Harris@Sun.COM 	    (b->tv_sec == a->tv_sec &&
15210910SRobert.Harris@Sun.COM 	    b->tv_nsec > a->tv_nsec)) {
15310910SRobert.Harris@Sun.COM 		return (MNTFS_SECOND);
15410910SRobert.Harris@Sun.COM 	} else {
15510910SRobert.Harris@Sun.COM 		return (MNTFS_FIRST);
15610910SRobert.Harris@Sun.COM 	}
15710910SRobert.Harris@Sun.COM }
15810910SRobert.Harris@Sun.COM 
1590Sstevel@tonic-gate static int
mntfs_optsize(struct vfs * vfsp)1600Sstevel@tonic-gate mntfs_optsize(struct vfs *vfsp)
1610Sstevel@tonic-gate {
1620Sstevel@tonic-gate 	int i, size = 0;
1630Sstevel@tonic-gate 	mntopt_t *mop;
1640Sstevel@tonic-gate 
1650Sstevel@tonic-gate 	for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) {
1660Sstevel@tonic-gate 		mop = &vfsp->vfs_mntopts.mo_list[i];
1670Sstevel@tonic-gate 		if (mop->mo_flags & MO_NODISPLAY)
1680Sstevel@tonic-gate 			continue;
1690Sstevel@tonic-gate 		if (mop->mo_flags & MO_SET) {
1700Sstevel@tonic-gate 			if (size)
1710Sstevel@tonic-gate 				size++; /* space for comma */
1720Sstevel@tonic-gate 			size += strlen(mop->mo_name);
1730Sstevel@tonic-gate 			/*
1740Sstevel@tonic-gate 			 * count option value if there is one
1750Sstevel@tonic-gate 			 */
1760Sstevel@tonic-gate 			if (mop->mo_arg != NULL) {
1770Sstevel@tonic-gate 				size += strlen(mop->mo_arg) + 1;
1780Sstevel@tonic-gate 			}
1790Sstevel@tonic-gate 		}
1800Sstevel@tonic-gate 	}
1810Sstevel@tonic-gate 	if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) {
1820Sstevel@tonic-gate 		/*
1830Sstevel@tonic-gate 		 * Add space for "zone=<zone_name>" if required.
1840Sstevel@tonic-gate 		 */
1850Sstevel@tonic-gate 		if (size)
1860Sstevel@tonic-gate 			size++;	/* space for comma */
1870Sstevel@tonic-gate 		size += sizeof ("zone=") - 1;
1880Sstevel@tonic-gate 		size += strlen(vfsp->vfs_zone->zone_name);
1890Sstevel@tonic-gate 	}
1900Sstevel@tonic-gate 	if (mntfs_enabledev) {
1910Sstevel@tonic-gate 		if (size != 0)
1920Sstevel@tonic-gate 			size++; /* space for comma */
1930Sstevel@tonic-gate 		size += mntfs_devsize(vfsp);
1940Sstevel@tonic-gate 	}
1950Sstevel@tonic-gate 	if (size == 0)
1960Sstevel@tonic-gate 		size = strlen("-");
1970Sstevel@tonic-gate 	return (size);
1980Sstevel@tonic-gate }
1990Sstevel@tonic-gate 
2000Sstevel@tonic-gate static int
mntfs_optprint(struct vfs * vfsp,char * buf)2010Sstevel@tonic-gate mntfs_optprint(struct vfs *vfsp, char *buf)
2020Sstevel@tonic-gate {
2030Sstevel@tonic-gate 	int i, optinbuf = 0;
2040Sstevel@tonic-gate 	mntopt_t *mop;
2050Sstevel@tonic-gate 	char *origbuf = buf;
2060Sstevel@tonic-gate 
2070Sstevel@tonic-gate 	for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) {
2080Sstevel@tonic-gate 		mop = &vfsp->vfs_mntopts.mo_list[i];
2090Sstevel@tonic-gate 		if (mop->mo_flags & MO_NODISPLAY)
2100Sstevel@tonic-gate 			continue;
2110Sstevel@tonic-gate 		if (mop->mo_flags & MO_SET) {
2120Sstevel@tonic-gate 			if (optinbuf)
2130Sstevel@tonic-gate 				*buf++ = ',';
2140Sstevel@tonic-gate 			else
2150Sstevel@tonic-gate 				optinbuf = 1;
2160Sstevel@tonic-gate 			buf += snprintf(buf, MAX_MNTOPT_STR,
2176036Smec 			    "%s", mop->mo_name);
2180Sstevel@tonic-gate 			/*
2190Sstevel@tonic-gate 			 * print option value if there is one
2200Sstevel@tonic-gate 			 */
2210Sstevel@tonic-gate 			if (mop->mo_arg != NULL) {
2220Sstevel@tonic-gate 				buf += snprintf(buf, MAX_MNTOPT_STR, "=%s",
2236036Smec 				    mop->mo_arg);
2240Sstevel@tonic-gate 			}
2250Sstevel@tonic-gate 		}
2260Sstevel@tonic-gate 	}
2270Sstevel@tonic-gate 	if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) {
2280Sstevel@tonic-gate 		if (optinbuf)
2290Sstevel@tonic-gate 			*buf++ = ',';
2300Sstevel@tonic-gate 		else
2310Sstevel@tonic-gate 			optinbuf = 1;
2320Sstevel@tonic-gate 		buf += snprintf(buf, MAX_MNTOPT_STR, "zone=%s",
2330Sstevel@tonic-gate 		    vfsp->vfs_zone->zone_name);
2340Sstevel@tonic-gate 	}
2350Sstevel@tonic-gate 	if (mntfs_enabledev) {
2360Sstevel@tonic-gate 		if (optinbuf++)
2370Sstevel@tonic-gate 			*buf++ = ',';
2380Sstevel@tonic-gate 		buf += mntfs_devprint(vfsp, buf);
2390Sstevel@tonic-gate 	}
2400Sstevel@tonic-gate 	if (!optinbuf) {
2410Sstevel@tonic-gate 		buf += snprintf(buf, MAX_MNTOPT_STR, "-");
2420Sstevel@tonic-gate 	}
2430Sstevel@tonic-gate 	return (buf - origbuf);
2440Sstevel@tonic-gate }
2450Sstevel@tonic-gate 
24610910SRobert.Harris@Sun.COM void
mntfs_populate_text(vfs_t * vfsp,zone_t * zonep,mntelem_t * elemp)24710910SRobert.Harris@Sun.COM mntfs_populate_text(vfs_t *vfsp, zone_t *zonep, mntelem_t *elemp)
24810910SRobert.Harris@Sun.COM {
24910910SRobert.Harris@Sun.COM 	struct extmnttab *tabp = &elemp->mnte_tab;
25010910SRobert.Harris@Sun.COM 	const char *resource, *mntpt;
25110910SRobert.Harris@Sun.COM 	char *cp = elemp->mnte_text;
25210910SRobert.Harris@Sun.COM 	mntpt = refstr_value(vfsp->vfs_mntpt);
25310910SRobert.Harris@Sun.COM 	resource = refstr_value(vfsp->vfs_resource);
25410910SRobert.Harris@Sun.COM 
25510910SRobert.Harris@Sun.COM 	tabp->mnt_special = 0;
25610910SRobert.Harris@Sun.COM 	if (resource != NULL && resource[0] != '\0') {
25710910SRobert.Harris@Sun.COM 		if (resource[0] != '/') {
25810910SRobert.Harris@Sun.COM 			cp += snprintf(cp, MAXPATHLEN, "%s\t", resource);
25910910SRobert.Harris@Sun.COM 		} else if (!ZONE_PATH_VISIBLE(resource, zonep)) {
26010910SRobert.Harris@Sun.COM 			/*
26110910SRobert.Harris@Sun.COM 			 * Use the mount point as the resource.
26210910SRobert.Harris@Sun.COM 			 */
26310910SRobert.Harris@Sun.COM 			cp += snprintf(cp, MAXPATHLEN, "%s\t",
26410910SRobert.Harris@Sun.COM 			    ZONE_PATH_TRANSLATE(mntpt, zonep));
26510910SRobert.Harris@Sun.COM 		} else {
26610910SRobert.Harris@Sun.COM 			cp += snprintf(cp, MAXPATHLEN, "%s\t",
26710910SRobert.Harris@Sun.COM 			    ZONE_PATH_TRANSLATE(resource, zonep));
26810910SRobert.Harris@Sun.COM 		}
26910910SRobert.Harris@Sun.COM 	} else {
27010910SRobert.Harris@Sun.COM 		cp += snprintf(cp, MAXPATHLEN, "-\t");
27110910SRobert.Harris@Sun.COM 	}
27210910SRobert.Harris@Sun.COM 
27310910SRobert.Harris@Sun.COM 	tabp->mnt_mountp = (char *)(cp - elemp->mnte_text);
27410910SRobert.Harris@Sun.COM 	if (mntpt != NULL && mntpt[0] != '\0') {
27510910SRobert.Harris@Sun.COM 		/*
27610910SRobert.Harris@Sun.COM 		 * We know the mount point is visible from within the zone,
27710910SRobert.Harris@Sun.COM 		 * otherwise it wouldn't be on the zone's vfs list.
27810910SRobert.Harris@Sun.COM 		 */
27910910SRobert.Harris@Sun.COM 		cp += snprintf(cp, MAXPATHLEN, "%s\t",
28010910SRobert.Harris@Sun.COM 		    ZONE_PATH_TRANSLATE(mntpt, zonep));
28110910SRobert.Harris@Sun.COM 	} else {
28210910SRobert.Harris@Sun.COM 		cp += snprintf(cp, MAXPATHLEN, "-\t");
28310910SRobert.Harris@Sun.COM 	}
28410910SRobert.Harris@Sun.COM 
28510910SRobert.Harris@Sun.COM 	tabp->mnt_fstype = (char *)(cp - elemp->mnte_text);
28610910SRobert.Harris@Sun.COM 	cp += snprintf(cp, MAXPATHLEN, "%s\t",
28710910SRobert.Harris@Sun.COM 	    vfssw[vfsp->vfs_fstype].vsw_name);
28810910SRobert.Harris@Sun.COM 
28910910SRobert.Harris@Sun.COM 	tabp->mnt_mntopts = (char *)(cp - elemp->mnte_text);
29010910SRobert.Harris@Sun.COM 	cp += mntfs_optprint(vfsp, cp);
29110910SRobert.Harris@Sun.COM 	*cp++ = '\t';
29210910SRobert.Harris@Sun.COM 
29310910SRobert.Harris@Sun.COM 	tabp->mnt_time = (char *)(cp - elemp->mnte_text);
29410910SRobert.Harris@Sun.COM 	cp += snprintf(cp, MAX_MNTOPT_STR, "%ld", vfsp->vfs_mtime);
29510910SRobert.Harris@Sun.COM 	*cp++ = '\n'; /* over-write snprintf's trailing null-byte */
29610910SRobert.Harris@Sun.COM 
29710910SRobert.Harris@Sun.COM 	tabp->mnt_major = getmajor(vfsp->vfs_dev);
29810910SRobert.Harris@Sun.COM 	tabp->mnt_minor = getminor(vfsp->vfs_dev);
29910910SRobert.Harris@Sun.COM 
30010910SRobert.Harris@Sun.COM 	elemp->mnte_text_size = cp - elemp->mnte_text;
30110910SRobert.Harris@Sun.COM 	elemp->mnte_vfs_ctime = vfsp->vfs_hrctime;
30210910SRobert.Harris@Sun.COM 	elemp->mnte_hidden = vfsp->vfs_flag & VFS_NOMNTTAB;
30310910SRobert.Harris@Sun.COM }
30410910SRobert.Harris@Sun.COM 
30510910SRobert.Harris@Sun.COM /* Determine the length of the /etc/mnttab entry for this vfs_t. */
3060Sstevel@tonic-gate static size_t
mntfs_text_len(vfs_t * vfsp,zone_t * zone)30710910SRobert.Harris@Sun.COM mntfs_text_len(vfs_t *vfsp, zone_t *zone)
3080Sstevel@tonic-gate {
3090Sstevel@tonic-gate 	size_t size = 0;
3100Sstevel@tonic-gate 	const char *resource, *mntpt;
31110910SRobert.Harris@Sun.COM 	size_t mntsize;
3120Sstevel@tonic-gate 
3130Sstevel@tonic-gate 	mntpt = refstr_value(vfsp->vfs_mntpt);
3140Sstevel@tonic-gate 	if (mntpt != NULL && mntpt[0] != '\0') {
31510910SRobert.Harris@Sun.COM 		mntsize = strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1;
3160Sstevel@tonic-gate 	} else {
31710910SRobert.Harris@Sun.COM 		mntsize = 2;	/* "-\t" */
3180Sstevel@tonic-gate 	}
31910910SRobert.Harris@Sun.COM 	size += mntsize;
3200Sstevel@tonic-gate 
3210Sstevel@tonic-gate 	resource = refstr_value(vfsp->vfs_resource);
3220Sstevel@tonic-gate 	if (resource != NULL && resource[0] != '\0') {
3230Sstevel@tonic-gate 		if (resource[0] != '/') {
3240Sstevel@tonic-gate 			size += strlen(resource) + 1;
3250Sstevel@tonic-gate 		} else if (!ZONE_PATH_VISIBLE(resource, zone)) {
3260Sstevel@tonic-gate 			/*
3270Sstevel@tonic-gate 			 * Same as the zone's view of the mount point.
3280Sstevel@tonic-gate 			 */
32910910SRobert.Harris@Sun.COM 			size += mntsize;
3300Sstevel@tonic-gate 		} else {
3310Sstevel@tonic-gate 			size += strlen(ZONE_PATH_TRANSLATE(resource, zone)) + 1;
3320Sstevel@tonic-gate 		}
3330Sstevel@tonic-gate 	} else {
33410910SRobert.Harris@Sun.COM 		size += 2;	/* "-\t" */
3350Sstevel@tonic-gate 	}
3360Sstevel@tonic-gate 	size += strlen(vfssw[vfsp->vfs_fstype].vsw_name) + 1;
3370Sstevel@tonic-gate 	size += mntfs_optsize(vfsp);
3380Sstevel@tonic-gate 	size += snprintf(NULL, 0, "\t%ld\n", vfsp->vfs_mtime);
3390Sstevel@tonic-gate 	return (size);
3400Sstevel@tonic-gate }
3410Sstevel@tonic-gate 
34210910SRobert.Harris@Sun.COM /* Destroy the resources associated with a snapshot element. */
3430Sstevel@tonic-gate static void
mntfs_destroy_elem(mntelem_t * elemp)34410910SRobert.Harris@Sun.COM mntfs_destroy_elem(mntelem_t *elemp)
3450Sstevel@tonic-gate {
34610910SRobert.Harris@Sun.COM 	kmem_free(elemp->mnte_text, elemp->mnte_text_size);
34710910SRobert.Harris@Sun.COM 	kmem_free(elemp, sizeof (mntelem_t));
3480Sstevel@tonic-gate }
3490Sstevel@tonic-gate 
35010910SRobert.Harris@Sun.COM /*
35110910SRobert.Harris@Sun.COM  * Return 1 if the given snapshot is in the range of the given element; return
35210910SRobert.Harris@Sun.COM  * 0 otherwise.
35310910SRobert.Harris@Sun.COM  */
35410910SRobert.Harris@Sun.COM static int
mntfs_elem_in_range(mntsnap_t * snapp,mntelem_t * elemp)35510910SRobert.Harris@Sun.COM mntfs_elem_in_range(mntsnap_t *snapp, mntelem_t *elemp)
3560Sstevel@tonic-gate {
35710910SRobert.Harris@Sun.COM 	timespec_t	*stimep = &snapp->mnts_time;
35810910SRobert.Harris@Sun.COM 	timespec_t	*btimep = &elemp->mnte_birth;
35910910SRobert.Harris@Sun.COM 	timespec_t	*dtimep = &elemp->mnte_death;
3600Sstevel@tonic-gate 
3610Sstevel@tonic-gate 	/*
36210910SRobert.Harris@Sun.COM 	 * If a snapshot is in range of an element then the snapshot must have
36310910SRobert.Harris@Sun.COM 	 * been created after the birth of the element, and either the element
36410910SRobert.Harris@Sun.COM 	 * is still alive or it died after the snapshot was created.
3650Sstevel@tonic-gate 	 */
36610910SRobert.Harris@Sun.COM 	if (mntfs_newest(btimep, stimep) == MNTFS_SECOND &&
36710910SRobert.Harris@Sun.COM 	    (MNTFS_ELEM_IS_ALIVE(elemp) ||
36810910SRobert.Harris@Sun.COM 	    mntfs_newest(stimep, dtimep) == MNTFS_SECOND))
36910910SRobert.Harris@Sun.COM 		return (1);
37010910SRobert.Harris@Sun.COM 	else
37110910SRobert.Harris@Sun.COM 		return (0);
3720Sstevel@tonic-gate }
3730Sstevel@tonic-gate 
37410910SRobert.Harris@Sun.COM /*
37510910SRobert.Harris@Sun.COM  * Return the next valid database element, after the one provided, for a given
37610910SRobert.Harris@Sun.COM  * snapshot; return NULL if none exists. The caller must hold the zone's
37710910SRobert.Harris@Sun.COM  * database lock as a reader before calling this function.
37810910SRobert.Harris@Sun.COM  */
37910910SRobert.Harris@Sun.COM static mntelem_t *
mntfs_get_next_elem(mntsnap_t * snapp,mntelem_t * elemp)38010910SRobert.Harris@Sun.COM mntfs_get_next_elem(mntsnap_t *snapp, mntelem_t *elemp)
3810Sstevel@tonic-gate {
38210910SRobert.Harris@Sun.COM 	int show_hidden = snapp->mnts_flags & MNTS_SHOWHIDDEN;
3830Sstevel@tonic-gate 
3840Sstevel@tonic-gate 	do {
38510910SRobert.Harris@Sun.COM 		elemp = elemp->mnte_next;
38610910SRobert.Harris@Sun.COM 	} while (elemp &&
38710910SRobert.Harris@Sun.COM 	    (!mntfs_elem_in_range(snapp, elemp) ||
38810910SRobert.Harris@Sun.COM 	    (!show_hidden && elemp->mnte_hidden)));
38910910SRobert.Harris@Sun.COM 	return (elemp);
3900Sstevel@tonic-gate }
3910Sstevel@tonic-gate 
39210910SRobert.Harris@Sun.COM /*
39310910SRobert.Harris@Sun.COM  * This function frees the resources associated with a mntsnap_t. It walks
39410910SRobert.Harris@Sun.COM  * through the database, decrementing the reference count of any element that
39510910SRobert.Harris@Sun.COM  * satisfies the snapshot. If the reference count of an element becomes zero
39610910SRobert.Harris@Sun.COM  * then it is removed from the database.
39710910SRobert.Harris@Sun.COM  */
3980Sstevel@tonic-gate static void
mntfs_freesnap(mntnode_t * mnp,mntsnap_t * snapp)39910910SRobert.Harris@Sun.COM mntfs_freesnap(mntnode_t *mnp, mntsnap_t *snapp)
4000Sstevel@tonic-gate {
401*13096SJordan.Vaughan@Sun.com 	zone_t *zonep = MTOD(mnp)->mnt_zone_ref.zref_zone;
40210910SRobert.Harris@Sun.COM 	krwlock_t *dblockp = &zonep->zone_mntfs_db_lock;
40310910SRobert.Harris@Sun.COM 	mntelem_t **elempp = &zonep->zone_mntfs_db;
40410910SRobert.Harris@Sun.COM 	mntelem_t *elemp;
40510910SRobert.Harris@Sun.COM 	int show_hidden = snapp->mnts_flags & MNTS_SHOWHIDDEN;
40610910SRobert.Harris@Sun.COM 	size_t number_decremented = 0;
4070Sstevel@tonic-gate 
40810910SRobert.Harris@Sun.COM 	ASSERT(RW_WRITE_HELD(&mnp->mnt_contents));
4090Sstevel@tonic-gate 
41010910SRobert.Harris@Sun.COM 	/* Ignore an uninitialised snapshot. */
41110910SRobert.Harris@Sun.COM 	if (snapp->mnts_nmnts == 0)
41210910SRobert.Harris@Sun.COM 		return;
4130Sstevel@tonic-gate 
41410910SRobert.Harris@Sun.COM 	/* Drop the holds on any matching database elements. */
41510910SRobert.Harris@Sun.COM 	rw_enter(dblockp, RW_WRITER);
41610910SRobert.Harris@Sun.COM 	while ((elemp = *elempp) != NULL) {
41710910SRobert.Harris@Sun.COM 		if (mntfs_elem_in_range(snapp, elemp) &&
41810910SRobert.Harris@Sun.COM 		    (!elemp->mnte_hidden || show_hidden) &&
41910910SRobert.Harris@Sun.COM 		    ++number_decremented && --elemp->mnte_refcnt == 0) {
42010910SRobert.Harris@Sun.COM 			if ((*elempp = elemp->mnte_next) != NULL)
42110910SRobert.Harris@Sun.COM 				(*elempp)->mnte_prev = elemp->mnte_prev;
42210910SRobert.Harris@Sun.COM 			mntfs_destroy_elem(elemp);
42310910SRobert.Harris@Sun.COM 		} else {
42410910SRobert.Harris@Sun.COM 			elempp = &elemp->mnte_next;
42510910SRobert.Harris@Sun.COM 		}
42610910SRobert.Harris@Sun.COM 	}
42710910SRobert.Harris@Sun.COM 	rw_exit(dblockp);
42810910SRobert.Harris@Sun.COM 	ASSERT(number_decremented == snapp->mnts_nmnts);
4290Sstevel@tonic-gate 
43010910SRobert.Harris@Sun.COM 	/* Clear the snapshot data. */
43110910SRobert.Harris@Sun.COM 	bzero(snapp, sizeof (mntsnap_t));
4320Sstevel@tonic-gate }
4330Sstevel@tonic-gate 
43410910SRobert.Harris@Sun.COM /* Insert the new database element newp after the existing element prevp. */
4350Sstevel@tonic-gate static void
mntfs_insert_after(mntelem_t * newp,mntelem_t * prevp)43610910SRobert.Harris@Sun.COM mntfs_insert_after(mntelem_t *newp, mntelem_t *prevp)
4370Sstevel@tonic-gate {
43810910SRobert.Harris@Sun.COM 	newp->mnte_prev = prevp;
43910910SRobert.Harris@Sun.COM 	newp->mnte_next = prevp->mnte_next;
44010910SRobert.Harris@Sun.COM 	prevp->mnte_next = newp;
44110910SRobert.Harris@Sun.COM 	if (newp->mnte_next != NULL)
44210910SRobert.Harris@Sun.COM 		newp->mnte_next->mnte_prev = newp;
44310910SRobert.Harris@Sun.COM }
44410910SRobert.Harris@Sun.COM 
44510910SRobert.Harris@Sun.COM /* Create and return a copy of a given database element. */
44610910SRobert.Harris@Sun.COM static mntelem_t *
mntfs_copy(mntelem_t * origp)44710910SRobert.Harris@Sun.COM mntfs_copy(mntelem_t *origp)
44810910SRobert.Harris@Sun.COM {
44910910SRobert.Harris@Sun.COM 	mntelem_t *copyp;
4500Sstevel@tonic-gate 
45110910SRobert.Harris@Sun.COM 	copyp = kmem_zalloc(sizeof (mntelem_t), KM_SLEEP);
45210910SRobert.Harris@Sun.COM 	copyp->mnte_vfs_ctime = origp->mnte_vfs_ctime;
45310910SRobert.Harris@Sun.COM 	copyp->mnte_text_size = origp->mnte_text_size;
45410910SRobert.Harris@Sun.COM 	copyp->mnte_text = kmem_alloc(copyp->mnte_text_size, KM_SLEEP);
45510910SRobert.Harris@Sun.COM 	bcopy(origp->mnte_text, copyp->mnte_text, copyp->mnte_text_size);
45610910SRobert.Harris@Sun.COM 	copyp->mnte_tab = origp->mnte_tab;
45710910SRobert.Harris@Sun.COM 	copyp->mnte_hidden = origp->mnte_hidden;
45810910SRobert.Harris@Sun.COM 
45910910SRobert.Harris@Sun.COM 	return (copyp);
46010910SRobert.Harris@Sun.COM }
46110910SRobert.Harris@Sun.COM 
46210910SRobert.Harris@Sun.COM /*
46310910SRobert.Harris@Sun.COM  * Compare two database elements and determine whether or not the vfs_t payload
46410910SRobert.Harris@Sun.COM  * data of each are the same. Return 1 if so and 0 otherwise.
46510910SRobert.Harris@Sun.COM  */
46610910SRobert.Harris@Sun.COM static int
mntfs_is_same_element(mntelem_t * a,mntelem_t * b)46710910SRobert.Harris@Sun.COM mntfs_is_same_element(mntelem_t *a, mntelem_t *b)
46810910SRobert.Harris@Sun.COM {
46910910SRobert.Harris@Sun.COM 	if (a->mnte_hidden == b->mnte_hidden &&
47010910SRobert.Harris@Sun.COM 	    a->mnte_text_size == b->mnte_text_size &&
47110910SRobert.Harris@Sun.COM 	    bcmp(a->mnte_text, b->mnte_text, a->mnte_text_size) == 0 &&
47210910SRobert.Harris@Sun.COM 	    bcmp(&a->mnte_tab, &b->mnte_tab, sizeof (struct extmnttab)) == 0)
47310910SRobert.Harris@Sun.COM 		return (1);
47410910SRobert.Harris@Sun.COM 	else
47510910SRobert.Harris@Sun.COM 		return (0);
4760Sstevel@tonic-gate }
4770Sstevel@tonic-gate 
47810910SRobert.Harris@Sun.COM /*
47910910SRobert.Harris@Sun.COM  * mntfs_snapshot() updates the database, creating it if necessary, so that it
48010910SRobert.Harris@Sun.COM  * accurately reflects the state of the in-kernel mnttab. It also increments
48110910SRobert.Harris@Sun.COM  * the reference count on all database elements that correspond to currently-
48210910SRobert.Harris@Sun.COM  * mounted resources. Finally, it initialises the appropriate snapshot
48310910SRobert.Harris@Sun.COM  * structure.
48410910SRobert.Harris@Sun.COM  *
48510910SRobert.Harris@Sun.COM  * Each vfs_t is given a high-resolution time stamp, for the benefit of mntfs,
48610910SRobert.Harris@Sun.COM  * when it is inserted into the in-kernel mnttab. This time stamp is copied into
48710910SRobert.Harris@Sun.COM  * the corresponding database element when it is created, allowing the element
48810910SRobert.Harris@Sun.COM  * and the vfs_t to be identified as a pair. It is possible that some file
48910910SRobert.Harris@Sun.COM  * systems may make unadvertised changes to, for example, a resource's mount
49010910SRobert.Harris@Sun.COM  * options. Therefore, in order to determine whether a database element is an
49110910SRobert.Harris@Sun.COM  * up-to-date representation of a given vfs_t, it is compared with a temporary
49210910SRobert.Harris@Sun.COM  * element generated for this purpose. Although less efficient, this is safer
49310910SRobert.Harris@Sun.COM  * than implementing an mtime for a vfs_t.
49410910SRobert.Harris@Sun.COM  *
49510910SRobert.Harris@Sun.COM  * Some mounted resources are marked as "hidden" with a VFS_NOMNTTAB flag. These
49610910SRobert.Harris@Sun.COM  * are considered invisible unless the user has already set the MNT_SHOWHIDDEN
49710910SRobert.Harris@Sun.COM  * flag in the vnode using the MNTIOC_SHOWHIDDEN ioctl.
49810910SRobert.Harris@Sun.COM  */
4990Sstevel@tonic-gate static void
mntfs_snapshot(mntnode_t * mnp,mntsnap_t * snapp)50010910SRobert.Harris@Sun.COM mntfs_snapshot(mntnode_t *mnp, mntsnap_t *snapp)
5010Sstevel@tonic-gate {
50211757SRobert.Harris@Sun.COM 	mntdata_t	*mnd = MTOD(mnp);
503*13096SJordan.Vaughan@Sun.com 	zone_t		*zonep = mnd->mnt_zone_ref.zref_zone;
50410910SRobert.Harris@Sun.COM 	int		is_global_zone = (zonep == global_zone);
50510910SRobert.Harris@Sun.COM 	int		show_hidden = mnp->mnt_flags & MNT_SHOWHIDDEN;
50610910SRobert.Harris@Sun.COM 	vfs_t		*vfsp, *firstvfsp, *lastvfsp;
50710910SRobert.Harris@Sun.COM 	vfs_t		dummyvfs;
50810910SRobert.Harris@Sun.COM 	vfs_t		*dummyvfsp = NULL;
50910910SRobert.Harris@Sun.COM 	krwlock_t	*dblockp = &zonep->zone_mntfs_db_lock;
51010910SRobert.Harris@Sun.COM 	mntelem_t	**headpp = &zonep->zone_mntfs_db;
51110910SRobert.Harris@Sun.COM 	mntelem_t	*elemp;
51210910SRobert.Harris@Sun.COM 	mntelem_t	*prevp = NULL;
51310910SRobert.Harris@Sun.COM 	int		order;
51410910SRobert.Harris@Sun.COM 	mntelem_t	*tempelemp;
51510910SRobert.Harris@Sun.COM 	mntelem_t	*newp;
51610910SRobert.Harris@Sun.COM 	mntelem_t	*firstp = NULL;
51710910SRobert.Harris@Sun.COM 	size_t		nmnts = 0;
51811757SRobert.Harris@Sun.COM 	size_t		total_text_size = 0;
51911757SRobert.Harris@Sun.COM 	size_t		normal_text_size = 0;
52010910SRobert.Harris@Sun.COM 	int		insert_before;
52110910SRobert.Harris@Sun.COM 	timespec_t	last_mtime;
52210910SRobert.Harris@Sun.COM 	size_t		entry_length, new_entry_length;
5230Sstevel@tonic-gate 
52410910SRobert.Harris@Sun.COM 
52510910SRobert.Harris@Sun.COM 	ASSERT(RW_WRITE_HELD(&mnp->mnt_contents));
52610910SRobert.Harris@Sun.COM 	vfs_list_read_lock();
52710910SRobert.Harris@Sun.COM 	vfs_mnttab_modtime(&last_mtime);
5280Sstevel@tonic-gate 
52910910SRobert.Harris@Sun.COM 	/*
53010910SRobert.Harris@Sun.COM 	 * If this snapshot already exists then we must have been asked to
53110910SRobert.Harris@Sun.COM 	 * rewind the file, i.e. discard the snapshot and create a new one in
53210910SRobert.Harris@Sun.COM 	 * its place. In this case we first see if the in-kernel mnttab has
53310910SRobert.Harris@Sun.COM 	 * advertised a change; if not then we simply reinitialise the metadata.
53410910SRobert.Harris@Sun.COM 	 */
53510910SRobert.Harris@Sun.COM 	if (snapp->mnts_nmnts) {
53610910SRobert.Harris@Sun.COM 		if (mntfs_newest(&last_mtime, &snapp->mnts_last_mtime) ==
53710910SRobert.Harris@Sun.COM 		    MNTFS_NEITHER) {
53810910SRobert.Harris@Sun.COM 			/*
53910910SRobert.Harris@Sun.COM 			 * An unchanged mtime is no guarantee that the
54010910SRobert.Harris@Sun.COM 			 * in-kernel mnttab is unchanged; for example, a
54110910SRobert.Harris@Sun.COM 			 * concurrent remount may be between calls to
54210910SRobert.Harris@Sun.COM 			 * vfs_setmntopt_nolock() and vfs_mnttab_modtimeupd().
54310910SRobert.Harris@Sun.COM 			 * It follows that the database may have changed, and
54410910SRobert.Harris@Sun.COM 			 * in particular that some elements in this snapshot
54510910SRobert.Harris@Sun.COM 			 * may have been killed by another call to
54610910SRobert.Harris@Sun.COM 			 * mntfs_snapshot(). It is therefore not merely
54710910SRobert.Harris@Sun.COM 			 * unnecessary to update the snapshot's time but in
54810910SRobert.Harris@Sun.COM 			 * fact dangerous; it needs to be left alone.
54910910SRobert.Harris@Sun.COM 			 */
55010910SRobert.Harris@Sun.COM 			snapp->mnts_next = snapp->mnts_first;
55110910SRobert.Harris@Sun.COM 			snapp->mnts_flags &= ~MNTS_REWIND;
55210910SRobert.Harris@Sun.COM 			snapp->mnts_foffset = snapp->mnts_ieoffset = 0;
55310910SRobert.Harris@Sun.COM 			vfs_list_unlock();
55410910SRobert.Harris@Sun.COM 			return;
55510910SRobert.Harris@Sun.COM 		} else {
55610910SRobert.Harris@Sun.COM 			mntfs_freesnap(mnp, snapp);
55710910SRobert.Harris@Sun.COM 		}
55810910SRobert.Harris@Sun.COM 	}
5590Sstevel@tonic-gate 
56010910SRobert.Harris@Sun.COM 	/*
56110910SRobert.Harris@Sun.COM 	 * Create a temporary database element. For each vfs_t, the temporary
56210910SRobert.Harris@Sun.COM 	 * element will be populated with the corresponding text. If the vfs_t
56310910SRobert.Harris@Sun.COM 	 * does not have a corresponding element within the database, or if
56410910SRobert.Harris@Sun.COM 	 * there is such an element but it is stale, a copy of the temporary
56510910SRobert.Harris@Sun.COM 	 * element is inserted into the database at the appropriate location.
56610910SRobert.Harris@Sun.COM 	 */
56710910SRobert.Harris@Sun.COM 	tempelemp = kmem_alloc(sizeof (mntelem_t), KM_SLEEP);
56810910SRobert.Harris@Sun.COM 	entry_length = MNT_LINE_MAX;
56910910SRobert.Harris@Sun.COM 	tempelemp->mnte_text = kmem_alloc(entry_length, KM_SLEEP);
5700Sstevel@tonic-gate 
57110910SRobert.Harris@Sun.COM 	/* Find the first and last vfs_t for the given zone. */
57210910SRobert.Harris@Sun.COM 	if (is_global_zone) {
57310910SRobert.Harris@Sun.COM 		firstvfsp = rootvfs;
57410910SRobert.Harris@Sun.COM 		lastvfsp = firstvfsp->vfs_prev;
57510910SRobert.Harris@Sun.COM 	} else {
57610910SRobert.Harris@Sun.COM 		firstvfsp = zonep->zone_vfslist;
57710910SRobert.Harris@Sun.COM 		/*
57810910SRobert.Harris@Sun.COM 		 * If there isn't already a vfs_t for root then we create a
57910910SRobert.Harris@Sun.COM 		 * dummy which will be used as the head of the list (which will
58010910SRobert.Harris@Sun.COM 		 * therefore no longer be circular).
58110910SRobert.Harris@Sun.COM 		 */
58210910SRobert.Harris@Sun.COM 		if (firstvfsp == NULL ||
58310910SRobert.Harris@Sun.COM 		    strcmp(refstr_value(firstvfsp->vfs_mntpt),
58410910SRobert.Harris@Sun.COM 		    zonep->zone_rootpath) != 0) {
58510910SRobert.Harris@Sun.COM 			/*
58610910SRobert.Harris@Sun.COM 			 * The zone's vfs_ts will have mount points relative to
58710910SRobert.Harris@Sun.COM 			 * the zone's root path. The vfs_t for the zone's
58810910SRobert.Harris@Sun.COM 			 * root file system would therefore have a mount point
58910910SRobert.Harris@Sun.COM 			 * equal to the zone's root path. Since the zone's root
59010910SRobert.Harris@Sun.COM 			 * path isn't a mount point, we copy the vfs_t of the
59110910SRobert.Harris@Sun.COM 			 * zone's root vnode, and provide it with a fake mount
59212999Slori.alt@oracle.com 			 * and resource. However, if the zone's root is a
59312999Slori.alt@oracle.com 			 * zfs dataset, use the dataset name as the resource.
59410910SRobert.Harris@Sun.COM 			 *
59510910SRobert.Harris@Sun.COM 			 * Note that by cloning another vfs_t we also acquire
59610910SRobert.Harris@Sun.COM 			 * its high-resolution ctime. This might appear to
59710910SRobert.Harris@Sun.COM 			 * violate the requirement that the ctimes in the list
59810910SRobert.Harris@Sun.COM 			 * of vfs_ts are unique and monotonically increasing;
59910910SRobert.Harris@Sun.COM 			 * this is not the case. The dummy vfs_t appears in only
60010910SRobert.Harris@Sun.COM 			 * a non-global zone's vfs_t list, where the cloned
60110910SRobert.Harris@Sun.COM 			 * vfs_t would not ordinarily be visible; the ctimes are
60210910SRobert.Harris@Sun.COM 			 * therefore unique. The zone's root path must be
60310910SRobert.Harris@Sun.COM 			 * available before the zone boots, and so its root
60410910SRobert.Harris@Sun.COM 			 * vnode's vfs_t's ctime must be lower than those of any
60510910SRobert.Harris@Sun.COM 			 * resources subsequently mounted by the zone. The
60610910SRobert.Harris@Sun.COM 			 * ctimes are therefore monotonically increasing.
60710910SRobert.Harris@Sun.COM 			 */
60810910SRobert.Harris@Sun.COM 			dummyvfs = *zonep->zone_rootvp->v_vfsp;
60910910SRobert.Harris@Sun.COM 			dummyvfs.vfs_mntpt = refstr_alloc(zonep->zone_rootpath);
61012999Slori.alt@oracle.com 			if (strcmp(vfssw[dummyvfs.vfs_fstype].vsw_name, "zfs")
61112999Slori.alt@oracle.com 			    != 0)
61212999Slori.alt@oracle.com 				dummyvfs.vfs_resource = dummyvfs.vfs_mntpt;
61310910SRobert.Harris@Sun.COM 			dummyvfsp = &dummyvfs;
61410910SRobert.Harris@Sun.COM 			if (firstvfsp == NULL) {
61510910SRobert.Harris@Sun.COM 				lastvfsp = dummyvfsp;
61610910SRobert.Harris@Sun.COM 			} else {
61710910SRobert.Harris@Sun.COM 				lastvfsp = firstvfsp->vfs_zone_prev;
61810910SRobert.Harris@Sun.COM 				dummyvfsp->vfs_zone_next = firstvfsp;
61910910SRobert.Harris@Sun.COM 			}
62010910SRobert.Harris@Sun.COM 			firstvfsp = dummyvfsp;
62110910SRobert.Harris@Sun.COM 		} else {
62210910SRobert.Harris@Sun.COM 			lastvfsp = firstvfsp->vfs_zone_prev;
62310910SRobert.Harris@Sun.COM 		}
6240Sstevel@tonic-gate 	}
6250Sstevel@tonic-gate 
62610910SRobert.Harris@Sun.COM 	/*
62710910SRobert.Harris@Sun.COM 	 * Now walk through all the vfs_ts for this zone. For each one, find the
62810910SRobert.Harris@Sun.COM 	 * corresponding database element, creating it first if necessary, and
62910910SRobert.Harris@Sun.COM 	 * increment its reference count.
63010910SRobert.Harris@Sun.COM 	 */
63110910SRobert.Harris@Sun.COM 	rw_enter(dblockp, RW_WRITER);
63210910SRobert.Harris@Sun.COM 	elemp = zonep->zone_mntfs_db;
63310910SRobert.Harris@Sun.COM 	/* CSTYLED */
63410910SRobert.Harris@Sun.COM 	for (vfsp = firstvfsp;;
63510910SRobert.Harris@Sun.COM 	    vfsp = is_global_zone ? vfsp->vfs_next : vfsp->vfs_zone_next) {
63610910SRobert.Harris@Sun.COM 		DTRACE_PROBE1(new__vfs, vfs_t *, vfsp);
63710910SRobert.Harris@Sun.COM 		/* Consider only visible entries. */
63810910SRobert.Harris@Sun.COM 		if ((vfsp->vfs_flag & VFS_NOMNTTAB) == 0 || show_hidden) {
63910910SRobert.Harris@Sun.COM 			/*
64010910SRobert.Harris@Sun.COM 			 * Walk through the existing database looking for either
64110910SRobert.Harris@Sun.COM 			 * an element that matches the current vfs_t, or for the
64210910SRobert.Harris@Sun.COM 			 * correct place in which to insert a new element.
64310910SRobert.Harris@Sun.COM 			 */
64410910SRobert.Harris@Sun.COM 			insert_before = 0;
64510910SRobert.Harris@Sun.COM 			for (; elemp; prevp = elemp, elemp = elemp->mnte_next) {
64610910SRobert.Harris@Sun.COM 				DTRACE_PROBE1(considering__elem, mntelem_t *,
64710910SRobert.Harris@Sun.COM 				    elemp);
6480Sstevel@tonic-gate 
64910910SRobert.Harris@Sun.COM 				/* Compare the vfs_t with the element. */
65010910SRobert.Harris@Sun.COM 				order = mntfs_newest(&elemp->mnte_vfs_ctime,
65110910SRobert.Harris@Sun.COM 				    &vfsp->vfs_hrctime);
65210910SRobert.Harris@Sun.COM 
65310910SRobert.Harris@Sun.COM 				/*
65410910SRobert.Harris@Sun.COM 				 * If we encounter a database element newer than
65510910SRobert.Harris@Sun.COM 				 * this vfs_t then we've stepped over a gap
65610910SRobert.Harris@Sun.COM 				 * where the element for this vfs_t must be
65710910SRobert.Harris@Sun.COM 				 * inserted.
65810910SRobert.Harris@Sun.COM 				 */
65910910SRobert.Harris@Sun.COM 				if (order == MNTFS_FIRST) {
66010910SRobert.Harris@Sun.COM 					insert_before = 1;
66110910SRobert.Harris@Sun.COM 					break;
66210910SRobert.Harris@Sun.COM 				}
66310910SRobert.Harris@Sun.COM 
66410910SRobert.Harris@Sun.COM 				/* Dead elements no longer interest us. */
66510910SRobert.Harris@Sun.COM 				if (MNTFS_ELEM_IS_DEAD(elemp))
66610910SRobert.Harris@Sun.COM 					continue;
6670Sstevel@tonic-gate 
66810910SRobert.Harris@Sun.COM 				/*
66910910SRobert.Harris@Sun.COM 				 * If the time stamps are the same then the
67010910SRobert.Harris@Sun.COM 				 * element is potential match for the vfs_t,
67110910SRobert.Harris@Sun.COM 				 * although it may later prove to be stale.
67210910SRobert.Harris@Sun.COM 				 */
67310910SRobert.Harris@Sun.COM 				if (order == MNTFS_NEITHER)
67410910SRobert.Harris@Sun.COM 					break;
67510910SRobert.Harris@Sun.COM 
67610910SRobert.Harris@Sun.COM 				/*
67710910SRobert.Harris@Sun.COM 				 * This element must be older than the vfs_t.
67810910SRobert.Harris@Sun.COM 				 * It must, therefore, correspond to a vfs_t
67910910SRobert.Harris@Sun.COM 				 * that has been unmounted. Since the element is
68010910SRobert.Harris@Sun.COM 				 * still alive, we kill it if it is visible.
68110910SRobert.Harris@Sun.COM 				 */
68210910SRobert.Harris@Sun.COM 				if (!elemp->mnte_hidden || show_hidden)
68310910SRobert.Harris@Sun.COM 					vfs_mono_time(&elemp->mnte_death);
68410910SRobert.Harris@Sun.COM 			}
68510910SRobert.Harris@Sun.COM 			DTRACE_PROBE2(possible__match, vfs_t *, vfsp,
68610910SRobert.Harris@Sun.COM 			    mntelem_t *, elemp);
6870Sstevel@tonic-gate 
68810910SRobert.Harris@Sun.COM 			/* Create a new database element if required. */
68910910SRobert.Harris@Sun.COM 			new_entry_length = mntfs_text_len(vfsp, zonep);
69010910SRobert.Harris@Sun.COM 			if (new_entry_length > entry_length) {
69110910SRobert.Harris@Sun.COM 				kmem_free(tempelemp->mnte_text, entry_length);
69210910SRobert.Harris@Sun.COM 				tempelemp->mnte_text =
69310910SRobert.Harris@Sun.COM 				    kmem_alloc(new_entry_length, KM_SLEEP);
69410910SRobert.Harris@Sun.COM 				entry_length = new_entry_length;
69510910SRobert.Harris@Sun.COM 			}
69610910SRobert.Harris@Sun.COM 			mntfs_populate_text(vfsp, zonep, tempelemp);
69710910SRobert.Harris@Sun.COM 			ASSERT(tempelemp->mnte_text_size == new_entry_length);
69810910SRobert.Harris@Sun.COM 			if (elemp == NULL) {
69910910SRobert.Harris@Sun.COM 				/*
70010910SRobert.Harris@Sun.COM 				 * We ran off the end of the database. Insert a
70110910SRobert.Harris@Sun.COM 				 * new element at the end.
70210910SRobert.Harris@Sun.COM 				 */
70310910SRobert.Harris@Sun.COM 				newp = mntfs_copy(tempelemp);
70410910SRobert.Harris@Sun.COM 				vfs_mono_time(&newp->mnte_birth);
70510910SRobert.Harris@Sun.COM 				if (prevp) {
70610910SRobert.Harris@Sun.COM 					mntfs_insert_after(newp, prevp);
70710910SRobert.Harris@Sun.COM 				} else {
70810910SRobert.Harris@Sun.COM 					newp->mnte_next = NULL;
70910910SRobert.Harris@Sun.COM 					newp->mnte_prev = NULL;
71010910SRobert.Harris@Sun.COM 					ASSERT(*headpp == NULL);
71110910SRobert.Harris@Sun.COM 					*headpp = newp;
71210910SRobert.Harris@Sun.COM 				}
71310910SRobert.Harris@Sun.COM 				elemp = newp;
71410910SRobert.Harris@Sun.COM 			} else if (insert_before) {
71510910SRobert.Harris@Sun.COM 				/*
71610910SRobert.Harris@Sun.COM 				 * Insert a new element before the current one.
71710910SRobert.Harris@Sun.COM 				 */
71810910SRobert.Harris@Sun.COM 				newp = mntfs_copy(tempelemp);
71910910SRobert.Harris@Sun.COM 				vfs_mono_time(&newp->mnte_birth);
72010910SRobert.Harris@Sun.COM 				if (prevp) {
72110910SRobert.Harris@Sun.COM 					mntfs_insert_after(newp, prevp);
72210910SRobert.Harris@Sun.COM 				} else {
72310910SRobert.Harris@Sun.COM 					newp->mnte_next = elemp;
72410910SRobert.Harris@Sun.COM 					newp->mnte_prev = NULL;
72510910SRobert.Harris@Sun.COM 					elemp->mnte_prev = newp;
72610910SRobert.Harris@Sun.COM 					ASSERT(*headpp == elemp);
72710910SRobert.Harris@Sun.COM 					*headpp = newp;
72810910SRobert.Harris@Sun.COM 				}
72910910SRobert.Harris@Sun.COM 				elemp = newp;
73010910SRobert.Harris@Sun.COM 			} else if (!mntfs_is_same_element(elemp, tempelemp)) {
73110910SRobert.Harris@Sun.COM 				/*
73210910SRobert.Harris@Sun.COM 				 * The element corresponds to the vfs_t, but the
73310910SRobert.Harris@Sun.COM 				 * vfs_t has changed; it must have been
73410910SRobert.Harris@Sun.COM 				 * remounted. Kill the old element and insert a
73510910SRobert.Harris@Sun.COM 				 * new one after it.
73610910SRobert.Harris@Sun.COM 				 */
73710910SRobert.Harris@Sun.COM 				vfs_mono_time(&elemp->mnte_death);
73810910SRobert.Harris@Sun.COM 				newp = mntfs_copy(tempelemp);
73910910SRobert.Harris@Sun.COM 				vfs_mono_time(&newp->mnte_birth);
74010910SRobert.Harris@Sun.COM 				mntfs_insert_after(newp, elemp);
74110910SRobert.Harris@Sun.COM 				elemp = newp;
74210910SRobert.Harris@Sun.COM 			}
7430Sstevel@tonic-gate 
74410910SRobert.Harris@Sun.COM 			/* We've found the corresponding element. Hold it. */
74510910SRobert.Harris@Sun.COM 			DTRACE_PROBE1(incrementing, mntelem_t *, elemp);
74610910SRobert.Harris@Sun.COM 			elemp->mnte_refcnt++;
7470Sstevel@tonic-gate 
74810910SRobert.Harris@Sun.COM 			/*
74910910SRobert.Harris@Sun.COM 			 * Update the parameters used to initialise the
75010910SRobert.Harris@Sun.COM 			 * snapshot.
75110910SRobert.Harris@Sun.COM 			 */
75210910SRobert.Harris@Sun.COM 			nmnts++;
75311757SRobert.Harris@Sun.COM 			total_text_size += elemp->mnte_text_size;
75411757SRobert.Harris@Sun.COM 			if (!elemp->mnte_hidden)
75511757SRobert.Harris@Sun.COM 				normal_text_size += elemp->mnte_text_size;
75610910SRobert.Harris@Sun.COM 			if (!firstp)
75710910SRobert.Harris@Sun.COM 				firstp = elemp;
7580Sstevel@tonic-gate 
75910910SRobert.Harris@Sun.COM 			prevp = elemp;
76010910SRobert.Harris@Sun.COM 			elemp = elemp->mnte_next;
76110910SRobert.Harris@Sun.COM 		}
76210910SRobert.Harris@Sun.COM 
76310910SRobert.Harris@Sun.COM 		if (vfsp == lastvfsp)
76410910SRobert.Harris@Sun.COM 			break;
76510910SRobert.Harris@Sun.COM 	}
7660Sstevel@tonic-gate 
7670Sstevel@tonic-gate 	/*
76810910SRobert.Harris@Sun.COM 	 * Any remaining visible database elements that are still alive must be
76910910SRobert.Harris@Sun.COM 	 * killed now, because their corresponding vfs_ts must have been
77010910SRobert.Harris@Sun.COM 	 * unmounted.
7710Sstevel@tonic-gate 	 */
77210910SRobert.Harris@Sun.COM 	for (; elemp; elemp = elemp->mnte_next) {
77310910SRobert.Harris@Sun.COM 		if (MNTFS_ELEM_IS_ALIVE(elemp) &&
77410910SRobert.Harris@Sun.COM 		    (!elemp->mnte_hidden || show_hidden))
77510910SRobert.Harris@Sun.COM 			vfs_mono_time(&elemp->mnte_death);
7760Sstevel@tonic-gate 	}
7770Sstevel@tonic-gate 
77810910SRobert.Harris@Sun.COM 	/* Initialise the snapshot. */
77910910SRobert.Harris@Sun.COM 	vfs_mono_time(&snapp->mnts_time);
78010910SRobert.Harris@Sun.COM 	snapp->mnts_last_mtime = last_mtime;
78110910SRobert.Harris@Sun.COM 	snapp->mnts_first = snapp->mnts_next = firstp;
78210910SRobert.Harris@Sun.COM 	snapp->mnts_flags = show_hidden ? MNTS_SHOWHIDDEN : 0;
78310910SRobert.Harris@Sun.COM 	snapp->mnts_nmnts = nmnts;
78411757SRobert.Harris@Sun.COM 	snapp->mnts_text_size = total_text_size;
78510910SRobert.Harris@Sun.COM 	snapp->mnts_foffset = snapp->mnts_ieoffset = 0;
7860Sstevel@tonic-gate 
78711757SRobert.Harris@Sun.COM 	/*
78811757SRobert.Harris@Sun.COM 	 * Record /etc/mnttab's current size and mtime for possible future use
78911757SRobert.Harris@Sun.COM 	 * by mntgetattr().
79011757SRobert.Harris@Sun.COM 	 */
79111757SRobert.Harris@Sun.COM 	mnd->mnt_size = normal_text_size;
79211757SRobert.Harris@Sun.COM 	mnd->mnt_mtime = last_mtime;
79311757SRobert.Harris@Sun.COM 	if (show_hidden) {
79411757SRobert.Harris@Sun.COM 		mnd->mnt_hidden_size = total_text_size;
79511757SRobert.Harris@Sun.COM 		mnd->mnt_hidden_mtime = last_mtime;
79611757SRobert.Harris@Sun.COM 	}
79711757SRobert.Harris@Sun.COM 
79810910SRobert.Harris@Sun.COM 	/* Clean up. */
79910910SRobert.Harris@Sun.COM 	rw_exit(dblockp);
80010910SRobert.Harris@Sun.COM 	vfs_list_unlock();
80110910SRobert.Harris@Sun.COM 	if (dummyvfsp != NULL)
80210910SRobert.Harris@Sun.COM 		refstr_rele(dummyvfsp->vfs_mntpt);
80310910SRobert.Harris@Sun.COM 	kmem_free(tempelemp->mnte_text, entry_length);
80410910SRobert.Harris@Sun.COM 	kmem_free(tempelemp, sizeof (mntelem_t));
8050Sstevel@tonic-gate }
8060Sstevel@tonic-gate 
8070Sstevel@tonic-gate /*
8080Sstevel@tonic-gate  * Public function to convert vfs_mntopts into a string.
8090Sstevel@tonic-gate  * A buffer of sufficient size is allocated, which is returned via bufp,
8100Sstevel@tonic-gate  * and whose length is returned via lenp.
8110Sstevel@tonic-gate  */
8120Sstevel@tonic-gate void
mntfs_getmntopts(struct vfs * vfsp,char ** bufp,size_t * lenp)8130Sstevel@tonic-gate mntfs_getmntopts(struct vfs *vfsp, char **bufp, size_t *lenp)
8140Sstevel@tonic-gate {
8150Sstevel@tonic-gate 	size_t len;
8160Sstevel@tonic-gate 	char *buf;
8170Sstevel@tonic-gate 
8180Sstevel@tonic-gate 	vfs_list_read_lock();
8190Sstevel@tonic-gate 
8200Sstevel@tonic-gate 	len = mntfs_optsize(vfsp) + 1;
8210Sstevel@tonic-gate 	buf = kmem_alloc(len, KM_NOSLEEP);
8220Sstevel@tonic-gate 	if (buf == NULL) {
8230Sstevel@tonic-gate 		*bufp = NULL;
8240Sstevel@tonic-gate 		vfs_list_unlock();
8250Sstevel@tonic-gate 		return;
8260Sstevel@tonic-gate 	}
8270Sstevel@tonic-gate 	buf[len - 1] = '\0';
8280Sstevel@tonic-gate 	(void) mntfs_optprint(vfsp, buf);
8290Sstevel@tonic-gate 	ASSERT(buf[len - 1] == '\0');
8300Sstevel@tonic-gate 
8310Sstevel@tonic-gate 	vfs_list_unlock();
8320Sstevel@tonic-gate 	*bufp = buf;
8330Sstevel@tonic-gate 	*lenp = len;
8340Sstevel@tonic-gate }
8350Sstevel@tonic-gate 
8360Sstevel@tonic-gate /* ARGSUSED */
8370Sstevel@tonic-gate static int
mntopen(vnode_t ** vpp,int flag,cred_t * cr,caller_context_t * ct)8385331Samw mntopen(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
8390Sstevel@tonic-gate {
8400Sstevel@tonic-gate 	vnode_t *vp = *vpp;
8410Sstevel@tonic-gate 	mntnode_t *nmnp;
8420Sstevel@tonic-gate 
8430Sstevel@tonic-gate 	/*
8440Sstevel@tonic-gate 	 * Not allowed to open for writing, return error.
8450Sstevel@tonic-gate 	 */
8460Sstevel@tonic-gate 	if (flag & FWRITE)
8470Sstevel@tonic-gate 		return (EPERM);
8480Sstevel@tonic-gate 	/*
8490Sstevel@tonic-gate 	 * Create a new mnt/vnode for each open, this will give us a handle to
8500Sstevel@tonic-gate 	 * hang the snapshot on.
8510Sstevel@tonic-gate 	 */
8520Sstevel@tonic-gate 	nmnp = mntgetnode(vp);
8530Sstevel@tonic-gate 
8540Sstevel@tonic-gate 	*vpp = MTOV(nmnp);
8550Sstevel@tonic-gate 	atomic_add_32(&MTOD(nmnp)->mnt_nopen, 1);
8560Sstevel@tonic-gate 	VN_RELE(vp);
8570Sstevel@tonic-gate 	return (0);
8580Sstevel@tonic-gate }
8590Sstevel@tonic-gate 
8600Sstevel@tonic-gate /* ARGSUSED */
8610Sstevel@tonic-gate static int
mntclose(vnode_t * vp,int flag,int count,offset_t offset,cred_t * cr,caller_context_t * ct)8625331Samw mntclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
8635331Samw 	caller_context_t *ct)
8640Sstevel@tonic-gate {
8650Sstevel@tonic-gate 	mntnode_t *mnp = VTOM(vp);
8660Sstevel@tonic-gate 
8670Sstevel@tonic-gate 	/* Clean up any locks or shares held by the current process */
8680Sstevel@tonic-gate 	cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
8690Sstevel@tonic-gate 	cleanshares(vp, ttoproc(curthread)->p_pid);
8700Sstevel@tonic-gate 
8710Sstevel@tonic-gate 	if (count > 1)
8720Sstevel@tonic-gate 		return (0);
8730Sstevel@tonic-gate 	if (vp->v_count == 1) {
87410910SRobert.Harris@Sun.COM 		rw_enter(&mnp->mnt_contents, RW_WRITER);
87510910SRobert.Harris@Sun.COM 		mntfs_freesnap(mnp, &mnp->mnt_read);
87610910SRobert.Harris@Sun.COM 		mntfs_freesnap(mnp, &mnp->mnt_ioctl);
87710910SRobert.Harris@Sun.COM 		rw_exit(&mnp->mnt_contents);
8780Sstevel@tonic-gate 		atomic_add_32(&MTOD(mnp)->mnt_nopen, -1);
8790Sstevel@tonic-gate 	}
8800Sstevel@tonic-gate 	return (0);
8810Sstevel@tonic-gate }
8820Sstevel@tonic-gate 
8830Sstevel@tonic-gate /* ARGSUSED */
8840Sstevel@tonic-gate static int
mntread(vnode_t * vp,uio_t * uio,int ioflag,cred_t * cred,caller_context_t * ct)8850Sstevel@tonic-gate mntread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, caller_context_t *ct)
8860Sstevel@tonic-gate {
88710910SRobert.Harris@Sun.COM 	mntnode_t *mnp = VTOM(vp);
888*13096SJordan.Vaughan@Sun.com 	zone_t *zonep = MTOD(mnp)->mnt_zone_ref.zref_zone;
88910910SRobert.Harris@Sun.COM 	mntsnap_t *snapp = &mnp->mnt_read;
8900Sstevel@tonic-gate 	off_t off = uio->uio_offset;
8910Sstevel@tonic-gate 	size_t len = uio->uio_resid;
89210910SRobert.Harris@Sun.COM 	char *bufferp;
89310910SRobert.Harris@Sun.COM 	size_t available, copylen;
89410910SRobert.Harris@Sun.COM 	size_t written = 0;
89510910SRobert.Harris@Sun.COM 	mntelem_t *elemp;
89610910SRobert.Harris@Sun.COM 	krwlock_t *dblockp = &zonep->zone_mntfs_db_lock;
89710910SRobert.Harris@Sun.COM 	int error = 0;
89810910SRobert.Harris@Sun.COM 	off_t	ieoffset;
8990Sstevel@tonic-gate 
90010910SRobert.Harris@Sun.COM 	rw_enter(&mnp->mnt_contents, RW_WRITER);
90110910SRobert.Harris@Sun.COM 	if (snapp->mnts_nmnts == 0 || (off == (off_t)0))
90210910SRobert.Harris@Sun.COM 		mntfs_snapshot(mnp, snapp);
9030Sstevel@tonic-gate 
90410910SRobert.Harris@Sun.COM 	if ((size_t)(off + len) > snapp->mnts_text_size)
90510910SRobert.Harris@Sun.COM 		len = snapp->mnts_text_size - off;
90610910SRobert.Harris@Sun.COM 
90710910SRobert.Harris@Sun.COM 	if (off < 0 || len > snapp->mnts_text_size) {
9088004SViswanathan.Kannappan@Sun.COM 		rw_exit(&mnp->mnt_contents);
9090Sstevel@tonic-gate 		return (EFAULT);
9108004SViswanathan.Kannappan@Sun.COM 	}
9110Sstevel@tonic-gate 
9128004SViswanathan.Kannappan@Sun.COM 	if (len == 0) {
9138004SViswanathan.Kannappan@Sun.COM 		rw_exit(&mnp->mnt_contents);
9140Sstevel@tonic-gate 		return (0);
9158004SViswanathan.Kannappan@Sun.COM 	}
9160Sstevel@tonic-gate 
9170Sstevel@tonic-gate 	/*
91810910SRobert.Harris@Sun.COM 	 * For the file offset provided, locate the corresponding database
91910910SRobert.Harris@Sun.COM 	 * element and calculate the corresponding offset within its text. If
92010910SRobert.Harris@Sun.COM 	 * the file offset is the same as that reached during the last read(2)
92110910SRobert.Harris@Sun.COM 	 * then use the saved element and intra-element offset.
9220Sstevel@tonic-gate 	 */
92310910SRobert.Harris@Sun.COM 	rw_enter(dblockp, RW_READER);
92410910SRobert.Harris@Sun.COM 	if (off == 0 || (off == snapp->mnts_foffset)) {
92510910SRobert.Harris@Sun.COM 		elemp = snapp->mnts_next;
92610910SRobert.Harris@Sun.COM 		ieoffset = snapp->mnts_ieoffset;
92710910SRobert.Harris@Sun.COM 	} else {
92810910SRobert.Harris@Sun.COM 		off_t total_off;
92910910SRobert.Harris@Sun.COM 		/*
93010910SRobert.Harris@Sun.COM 		 * Find the element corresponding to the requested file offset
93110910SRobert.Harris@Sun.COM 		 * by walking through the database and summing the text sizes
93210910SRobert.Harris@Sun.COM 		 * of the individual elements. If the requested file offset is
93310910SRobert.Harris@Sun.COM 		 * greater than that reached on the last visit then we can start
93410910SRobert.Harris@Sun.COM 		 * at the last seen element; otherwise, we have to start at the
93510910SRobert.Harris@Sun.COM 		 * beginning.
93610910SRobert.Harris@Sun.COM 		 */
93710910SRobert.Harris@Sun.COM 		if (off > snapp->mnts_foffset) {
93810910SRobert.Harris@Sun.COM 			elemp = snapp->mnts_next;
93910910SRobert.Harris@Sun.COM 			total_off = snapp->mnts_foffset - snapp->mnts_ieoffset;
94010910SRobert.Harris@Sun.COM 		} else {
94110910SRobert.Harris@Sun.COM 			elemp = snapp->mnts_first;
94210910SRobert.Harris@Sun.COM 			total_off = 0;
94310910SRobert.Harris@Sun.COM 		}
94410910SRobert.Harris@Sun.COM 		while (off > total_off + elemp->mnte_text_size) {
94510910SRobert.Harris@Sun.COM 			total_off += elemp->mnte_text_size;
94610910SRobert.Harris@Sun.COM 			elemp = mntfs_get_next_elem(snapp, elemp);
94710910SRobert.Harris@Sun.COM 			ASSERT(elemp != NULL);
94810910SRobert.Harris@Sun.COM 		}
94910910SRobert.Harris@Sun.COM 		/* Calculate the intra-element offset. */
95010910SRobert.Harris@Sun.COM 		if (off > total_off)
95110910SRobert.Harris@Sun.COM 			ieoffset = off - total_off;
95210910SRobert.Harris@Sun.COM 		else
95310910SRobert.Harris@Sun.COM 			ieoffset = 0;
9540Sstevel@tonic-gate 	}
95510910SRobert.Harris@Sun.COM 
95610910SRobert.Harris@Sun.COM 	/*
95710910SRobert.Harris@Sun.COM 	 * Create a buffer and populate it with the text from successive
95810910SRobert.Harris@Sun.COM 	 * database elements until it is full.
95910910SRobert.Harris@Sun.COM 	 */
96010910SRobert.Harris@Sun.COM 	bufferp = kmem_alloc(len, KM_SLEEP);
96110910SRobert.Harris@Sun.COM 	while (written < len) {
96210910SRobert.Harris@Sun.COM 		available = elemp->mnte_text_size - ieoffset;
96310910SRobert.Harris@Sun.COM 		copylen = MIN(len - written, available);
96410910SRobert.Harris@Sun.COM 		bcopy(elemp->mnte_text + ieoffset, bufferp + written, copylen);
96510910SRobert.Harris@Sun.COM 		written += copylen;
96610910SRobert.Harris@Sun.COM 		if (copylen == available) {
96710910SRobert.Harris@Sun.COM 			elemp = mntfs_get_next_elem(snapp, elemp);
96810910SRobert.Harris@Sun.COM 			ASSERT(elemp != NULL || written == len);
96910910SRobert.Harris@Sun.COM 			ieoffset = 0;
97010910SRobert.Harris@Sun.COM 		} else {
97110910SRobert.Harris@Sun.COM 			ieoffset += copylen;
97210910SRobert.Harris@Sun.COM 		}
97310910SRobert.Harris@Sun.COM 	}
97410910SRobert.Harris@Sun.COM 	rw_exit(dblockp);
97510910SRobert.Harris@Sun.COM 
97610910SRobert.Harris@Sun.COM 	/*
97710910SRobert.Harris@Sun.COM 	 * Write the populated buffer, update the snapshot's state if
97810910SRobert.Harris@Sun.COM 	 * successful and then advertise our read.
97910910SRobert.Harris@Sun.COM 	 */
98010910SRobert.Harris@Sun.COM 	error = uiomove(bufferp, len, UIO_READ, uio);
98110910SRobert.Harris@Sun.COM 	if (error == 0) {
98210910SRobert.Harris@Sun.COM 		snapp->mnts_next = elemp;
98310910SRobert.Harris@Sun.COM 		snapp->mnts_foffset = off + len;
98410910SRobert.Harris@Sun.COM 		snapp->mnts_ieoffset = ieoffset;
98510910SRobert.Harris@Sun.COM 	}
9864863Spraks 	vfs_mnttab_readop();
9878004SViswanathan.Kannappan@Sun.COM 	rw_exit(&mnp->mnt_contents);
98810910SRobert.Harris@Sun.COM 
98910910SRobert.Harris@Sun.COM 	/* Clean up. */
99010910SRobert.Harris@Sun.COM 	kmem_free(bufferp, len);
9910Sstevel@tonic-gate 	return (error);
9920Sstevel@tonic-gate }
9930Sstevel@tonic-gate 
9940Sstevel@tonic-gate static int
mntgetattr(vnode_t * vp,vattr_t * vap,int flags,cred_t * cr,caller_context_t * ct)9955331Samw mntgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
9965331Samw 	caller_context_t *ct)
9970Sstevel@tonic-gate {
99811757SRobert.Harris@Sun.COM 	int mask = vap->va_mask;
9990Sstevel@tonic-gate 	int error;
100011757SRobert.Harris@Sun.COM 	mntnode_t *mnp = VTOM(vp);
100111757SRobert.Harris@Sun.COM 	timespec_t mtime, old_mtime;
100211757SRobert.Harris@Sun.COM 	size_t size, old_size;
100311757SRobert.Harris@Sun.COM 	mntdata_t *mntdata = MTOD(VTOM(vp));
100411757SRobert.Harris@Sun.COM 	mntsnap_t *rsnapp, *isnapp;
10050Sstevel@tonic-gate 	extern timespec_t vfs_mnttab_ctime;
10060Sstevel@tonic-gate 
100711757SRobert.Harris@Sun.COM 
100811757SRobert.Harris@Sun.COM 	/* AT_MODE, AT_UID and AT_GID are derived from the underlying file. */
100911757SRobert.Harris@Sun.COM 	if (mask & AT_MODE|AT_UID|AT_GID) {
101011757SRobert.Harris@Sun.COM 		if (error = VOP_GETATTR(mnp->mnt_mountvp, vap, flags, cr, ct))
101111757SRobert.Harris@Sun.COM 			return (error);
101210910SRobert.Harris@Sun.COM 	}
10130Sstevel@tonic-gate 
10140Sstevel@tonic-gate 	/*
101511757SRobert.Harris@Sun.COM 	 * There are some minor subtleties in the determination of
101611757SRobert.Harris@Sun.COM 	 * /etc/mnttab's size and mtime. We wish to avoid any condition in
101711757SRobert.Harris@Sun.COM 	 * which, in the vicinity of a change to the in-kernel mnttab, we
101811757SRobert.Harris@Sun.COM 	 * return an old value for one but a new value for the other. We cannot
101911757SRobert.Harris@Sun.COM 	 * simply hold vfslist for the entire calculation because we might need
102011757SRobert.Harris@Sun.COM 	 * to call mntfs_snapshot(), which calls vfs_list_read_lock().
10210Sstevel@tonic-gate 	 */
102211757SRobert.Harris@Sun.COM 	if (mask & AT_SIZE|AT_NBLOCKS) {
102311757SRobert.Harris@Sun.COM 		rw_enter(&mnp->mnt_contents, RW_WRITER);
102411757SRobert.Harris@Sun.COM 
102511757SRobert.Harris@Sun.COM 		vfs_list_read_lock();
102611757SRobert.Harris@Sun.COM 		vfs_mnttab_modtime(&mtime);
102711757SRobert.Harris@Sun.COM 		if (mnp->mnt_flags & MNT_SHOWHIDDEN) {
102811757SRobert.Harris@Sun.COM 			old_mtime = mntdata->mnt_hidden_mtime;
102911757SRobert.Harris@Sun.COM 			old_size = mntdata->mnt_hidden_size;
103011757SRobert.Harris@Sun.COM 		} else {
103111757SRobert.Harris@Sun.COM 			old_mtime = mntdata->mnt_mtime;
103211757SRobert.Harris@Sun.COM 			old_size = mntdata->mnt_size;
103311757SRobert.Harris@Sun.COM 		}
103411757SRobert.Harris@Sun.COM 		vfs_list_unlock();
103511757SRobert.Harris@Sun.COM 
103611757SRobert.Harris@Sun.COM 		rsnapp = &mnp->mnt_read;
103711757SRobert.Harris@Sun.COM 		isnapp = &mnp->mnt_ioctl;
103811757SRobert.Harris@Sun.COM 		if (rsnapp->mnts_nmnts || isnapp->mnts_nmnts) {
103911757SRobert.Harris@Sun.COM 			/*
104011757SRobert.Harris@Sun.COM 			 * The mntnode already has at least one snapshot from
104111757SRobert.Harris@Sun.COM 			 * which to take the size; the user will understand from
104211757SRobert.Harris@Sun.COM 			 * mnttab(4) that the current size of the in-kernel
104311757SRobert.Harris@Sun.COM 			 * mnttab is irrelevant.
104411757SRobert.Harris@Sun.COM 			 */
104511757SRobert.Harris@Sun.COM 			size = rsnapp->mnts_nmnts ? rsnapp->mnts_text_size :
104611757SRobert.Harris@Sun.COM 			    isnapp->mnts_text_size;
104711757SRobert.Harris@Sun.COM 		} else if (mntfs_newest(&mtime, &old_mtime) == MNTFS_NEITHER) {
104811757SRobert.Harris@Sun.COM 			/*
104911757SRobert.Harris@Sun.COM 			 * There is no existing valid snapshot but the in-kernel
105011757SRobert.Harris@Sun.COM 			 * mnttab has not changed since the time that the last
105111757SRobert.Harris@Sun.COM 			 * one was generated. Use the old file size; note that
105211757SRobert.Harris@Sun.COM 			 * it is guaranteed to be consistent with mtime, which
105311757SRobert.Harris@Sun.COM 			 * may be returned to the user later.
105411757SRobert.Harris@Sun.COM 			 */
105511757SRobert.Harris@Sun.COM 			size = old_size;
105611757SRobert.Harris@Sun.COM 		} else {
105711757SRobert.Harris@Sun.COM 			/*
105811757SRobert.Harris@Sun.COM 			 * There is no snapshot and the in-kernel mnttab has
105911757SRobert.Harris@Sun.COM 			 * changed since the last one was created. We generate a
106011757SRobert.Harris@Sun.COM 			 * new snapshot which we use for not only the size but
106111757SRobert.Harris@Sun.COM 			 * also the mtime, thereby ensuring that the two are
106211757SRobert.Harris@Sun.COM 			 * consistent.
106311757SRobert.Harris@Sun.COM 			 */
106411757SRobert.Harris@Sun.COM 			mntfs_snapshot(mnp, rsnapp);
106511757SRobert.Harris@Sun.COM 			size = rsnapp->mnts_text_size;
106611757SRobert.Harris@Sun.COM 			mtime = rsnapp->mnts_last_mtime;
106711757SRobert.Harris@Sun.COM 			mntfs_freesnap(mnp, rsnapp);
106811757SRobert.Harris@Sun.COM 		}
106911757SRobert.Harris@Sun.COM 
107011757SRobert.Harris@Sun.COM 		rw_exit(&mnp->mnt_contents);
107111757SRobert.Harris@Sun.COM 	} else if (mask & AT_ATIME|AT_MTIME) {
107211757SRobert.Harris@Sun.COM 		vfs_list_read_lock();
107311757SRobert.Harris@Sun.COM 		vfs_mnttab_modtime(&mtime);
107411757SRobert.Harris@Sun.COM 		vfs_list_unlock();
107511757SRobert.Harris@Sun.COM 	}
107611757SRobert.Harris@Sun.COM 
107711757SRobert.Harris@Sun.COM 	/* Always look like a regular file. */
107811757SRobert.Harris@Sun.COM 	if (mask & AT_TYPE)
107911757SRobert.Harris@Sun.COM 		vap->va_type = VREG;
108011757SRobert.Harris@Sun.COM 	/* Mode should basically be read only. */
108111757SRobert.Harris@Sun.COM 	if (mask & AT_MODE)
108211757SRobert.Harris@Sun.COM 		vap->va_mode &= 07444;
108311757SRobert.Harris@Sun.COM 	if (mask & AT_FSID)
108411757SRobert.Harris@Sun.COM 		vap->va_fsid = vp->v_vfsp->vfs_dev;
108511757SRobert.Harris@Sun.COM 	/* Nodeid is always ROOTINO. */
108611757SRobert.Harris@Sun.COM 	if (mask & AT_NODEID)
108711757SRobert.Harris@Sun.COM 		vap->va_nodeid = (ino64_t)MNTROOTINO;
10880Sstevel@tonic-gate 	/*
10890Sstevel@tonic-gate 	 * Set nlink to the number of open vnodes for mnttab info
10900Sstevel@tonic-gate 	 * plus one for existing.
10910Sstevel@tonic-gate 	 */
109211757SRobert.Harris@Sun.COM 	if (mask & AT_NLINK)
109311757SRobert.Harris@Sun.COM 		vap->va_nlink = mntdata->mnt_nopen + 1;
109411757SRobert.Harris@Sun.COM 	if (mask & AT_SIZE)
109511757SRobert.Harris@Sun.COM 		vap->va_size = size;
109611757SRobert.Harris@Sun.COM 	if (mask & AT_ATIME)
109711757SRobert.Harris@Sun.COM 		vap->va_atime = mtime;
109811757SRobert.Harris@Sun.COM 	if (mask & AT_MTIME)
109911757SRobert.Harris@Sun.COM 		vap->va_mtime = mtime;
110011757SRobert.Harris@Sun.COM 	if (mask & AT_CTIME)
110111757SRobert.Harris@Sun.COM 		vap->va_ctime = vfs_mnttab_ctime;
110211757SRobert.Harris@Sun.COM 	if (mask & AT_RDEV)
110311757SRobert.Harris@Sun.COM 		vap->va_rdev = 0;
110411757SRobert.Harris@Sun.COM 	if (mask & AT_BLKSIZE)
110511757SRobert.Harris@Sun.COM 		vap->va_blksize = DEV_BSIZE;
110611757SRobert.Harris@Sun.COM 	if (mask & AT_NBLOCKS)
110711757SRobert.Harris@Sun.COM 		vap->va_nblocks = btod(size);
110811757SRobert.Harris@Sun.COM 	if (mask & AT_SEQ)
110911757SRobert.Harris@Sun.COM 		vap->va_seq = 0;
111011757SRobert.Harris@Sun.COM 
11110Sstevel@tonic-gate 	return (0);
11120Sstevel@tonic-gate }
11130Sstevel@tonic-gate 
11140Sstevel@tonic-gate static int
mntaccess(vnode_t * vp,int mode,int flags,cred_t * cr,caller_context_t * ct)11155331Samw mntaccess(vnode_t *vp, int mode, int flags, cred_t *cr,
11165331Samw 	caller_context_t *ct)
11170Sstevel@tonic-gate {
11180Sstevel@tonic-gate 	mntnode_t *mnp = VTOM(vp);
11190Sstevel@tonic-gate 
11200Sstevel@tonic-gate 	if (mode & (VWRITE|VEXEC))
11210Sstevel@tonic-gate 		return (EROFS);
11220Sstevel@tonic-gate 
11230Sstevel@tonic-gate 	/*
11240Sstevel@tonic-gate 	 * Do access check on the underlying directory vnode.
11250Sstevel@tonic-gate 	 */
11265331Samw 	return (VOP_ACCESS(mnp->mnt_mountvp, mode, flags, cr, ct));
11270Sstevel@tonic-gate }
11280Sstevel@tonic-gate 
11290Sstevel@tonic-gate 
11300Sstevel@tonic-gate /*
11310Sstevel@tonic-gate  * New /mntfs vnode required; allocate it and fill in most of the fields.
11320Sstevel@tonic-gate  */
11330Sstevel@tonic-gate static mntnode_t *
mntgetnode(vnode_t * dp)11340Sstevel@tonic-gate mntgetnode(vnode_t *dp)
11350Sstevel@tonic-gate {
11360Sstevel@tonic-gate 	mntnode_t *mnp;
11370Sstevel@tonic-gate 	vnode_t *vp;
11380Sstevel@tonic-gate 
11390Sstevel@tonic-gate 	mnp = kmem_zalloc(sizeof (mntnode_t), KM_SLEEP);
11400Sstevel@tonic-gate 	mnp->mnt_vnode = vn_alloc(KM_SLEEP);
11410Sstevel@tonic-gate 	mnp->mnt_mountvp = VTOM(dp)->mnt_mountvp;
11428004SViswanathan.Kannappan@Sun.COM 	rw_init(&mnp->mnt_contents, NULL, RW_DEFAULT, NULL);
11430Sstevel@tonic-gate 	vp = MTOV(mnp);
11440Sstevel@tonic-gate 	vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT;
11450Sstevel@tonic-gate 	vn_setops(vp, mntvnodeops);
11460Sstevel@tonic-gate 	vp->v_vfsp = dp->v_vfsp;
11470Sstevel@tonic-gate 	vp->v_type = VREG;
11480Sstevel@tonic-gate 	vp->v_data = (caddr_t)mnp;
11490Sstevel@tonic-gate 
11500Sstevel@tonic-gate 	return (mnp);
11510Sstevel@tonic-gate }
11520Sstevel@tonic-gate 
11530Sstevel@tonic-gate /*
11540Sstevel@tonic-gate  * Free the storage obtained from mntgetnode().
11550Sstevel@tonic-gate  */
11560Sstevel@tonic-gate static void
mntfreenode(mntnode_t * mnp)11570Sstevel@tonic-gate mntfreenode(mntnode_t *mnp)
11580Sstevel@tonic-gate {
11590Sstevel@tonic-gate 	vnode_t *vp = MTOV(mnp);
11600Sstevel@tonic-gate 
11618004SViswanathan.Kannappan@Sun.COM 	rw_destroy(&mnp->mnt_contents);
11620Sstevel@tonic-gate 	vn_invalid(vp);
11630Sstevel@tonic-gate 	vn_free(vp);
11640Sstevel@tonic-gate 	kmem_free(mnp, sizeof (*mnp));
11650Sstevel@tonic-gate }
11660Sstevel@tonic-gate 
11670Sstevel@tonic-gate 
11680Sstevel@tonic-gate /* ARGSUSED */
11690Sstevel@tonic-gate static int
mntfsync(vnode_t * vp,int syncflag,cred_t * cr,caller_context_t * ct)11705331Samw mntfsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
11710Sstevel@tonic-gate {
11720Sstevel@tonic-gate 	return (0);
11730Sstevel@tonic-gate }
11740Sstevel@tonic-gate 
11750Sstevel@tonic-gate /* ARGSUSED */
11760Sstevel@tonic-gate static void
mntinactive(vnode_t * vp,cred_t * cr,caller_context_t * ct)11775331Samw mntinactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
11780Sstevel@tonic-gate {
11790Sstevel@tonic-gate 	mntnode_t *mnp = VTOM(vp);
11800Sstevel@tonic-gate 
11810Sstevel@tonic-gate 	mntfreenode(mnp);
11820Sstevel@tonic-gate }
11830Sstevel@tonic-gate 
118410910SRobert.Harris@Sun.COM /*
118511757SRobert.Harris@Sun.COM  * lseek(2) is supported only to rewind the file by resetmnttab(3C). Rewinding
118611757SRobert.Harris@Sun.COM  * has a special meaning for /etc/mnttab: it forces mntfs to refresh the
118711757SRobert.Harris@Sun.COM  * snapshot at the next ioctl().
118810910SRobert.Harris@Sun.COM  *
118911757SRobert.Harris@Sun.COM  * mnttab(4) explains that "the snapshot...is taken any time a read(2) is
119011757SRobert.Harris@Sun.COM  * performed at offset 0". We therefore ignore the read snapshot here.
119110910SRobert.Harris@Sun.COM  */
11920Sstevel@tonic-gate /* ARGSUSED */
11930Sstevel@tonic-gate static int
mntseek(vnode_t * vp,offset_t ooff,offset_t * noffp,caller_context_t * ct)119410910SRobert.Harris@Sun.COM mntseek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
11950Sstevel@tonic-gate {
11968004SViswanathan.Kannappan@Sun.COM 	mntnode_t *mnp = VTOM(vp);
11978004SViswanathan.Kannappan@Sun.COM 
11988004SViswanathan.Kannappan@Sun.COM 	if (*noffp == 0) {
11998004SViswanathan.Kannappan@Sun.COM 		rw_enter(&mnp->mnt_contents, RW_WRITER);
120010910SRobert.Harris@Sun.COM 		mnp->mnt_ioctl.mnts_flags |= MNTS_REWIND;
12018004SViswanathan.Kannappan@Sun.COM 		rw_exit(&mnp->mnt_contents);
12028004SViswanathan.Kannappan@Sun.COM 	}
12030Sstevel@tonic-gate 
12040Sstevel@tonic-gate 	return (0);
12050Sstevel@tonic-gate }
12060Sstevel@tonic-gate 
12070Sstevel@tonic-gate /*
12080Sstevel@tonic-gate  * Return the answer requested to poll().
12090Sstevel@tonic-gate  * POLLRDBAND will return when the mtime of the mnttab
12100Sstevel@tonic-gate  * information is newer than the latest one read for this open.
12110Sstevel@tonic-gate  */
12120Sstevel@tonic-gate /* ARGSUSED */
12130Sstevel@tonic-gate static int
mntpoll(vnode_t * vp,short ev,int any,short * revp,pollhead_t ** phpp,caller_context_t * ct)12145331Samw mntpoll(vnode_t *vp, short ev, int any, short *revp, pollhead_t **phpp,
12155331Samw 	caller_context_t *ct)
12160Sstevel@tonic-gate {
12170Sstevel@tonic-gate 	mntnode_t *mnp = VTOM(vp);
121810910SRobert.Harris@Sun.COM 	mntsnap_t *snapp;
12190Sstevel@tonic-gate 
12208004SViswanathan.Kannappan@Sun.COM 	rw_enter(&mnp->mnt_contents, RW_READER);
122110910SRobert.Harris@Sun.COM 	if (mntfs_newest(&mnp->mnt_ioctl.mnts_last_mtime,
122210910SRobert.Harris@Sun.COM 	    &mnp->mnt_read.mnts_last_mtime) == MNTFS_FIRST)
122310910SRobert.Harris@Sun.COM 		snapp = &mnp->mnt_ioctl;
122410910SRobert.Harris@Sun.COM 	else
122510910SRobert.Harris@Sun.COM 		snapp = &mnp->mnt_read;
12260Sstevel@tonic-gate 
12270Sstevel@tonic-gate 	*revp = 0;
12280Sstevel@tonic-gate 	*phpp = (pollhead_t *)NULL;
12290Sstevel@tonic-gate 	if (ev & POLLIN)
12300Sstevel@tonic-gate 		*revp |= POLLIN;
12310Sstevel@tonic-gate 
12320Sstevel@tonic-gate 	if (ev & POLLRDNORM)
12330Sstevel@tonic-gate 		*revp |= POLLRDNORM;
12340Sstevel@tonic-gate 
12350Sstevel@tonic-gate 	if (ev & POLLRDBAND) {
123610910SRobert.Harris@Sun.COM 		vfs_mnttab_poll(&snapp->mnts_last_mtime, phpp);
12370Sstevel@tonic-gate 		if (*phpp == (pollhead_t *)NULL)
12380Sstevel@tonic-gate 			*revp |= POLLRDBAND;
12390Sstevel@tonic-gate 	}
12408004SViswanathan.Kannappan@Sun.COM 	rw_exit(&mnp->mnt_contents);
12418004SViswanathan.Kannappan@Sun.COM 
12420Sstevel@tonic-gate 	if (*revp || *phpp != NULL || any) {
12430Sstevel@tonic-gate 		return (0);
12440Sstevel@tonic-gate 	}
12450Sstevel@tonic-gate 	/*
12460Sstevel@tonic-gate 	 * If someone is polling an unsupported poll events (e.g.
12470Sstevel@tonic-gate 	 * POLLOUT, POLLPRI, etc.), just return POLLERR revents.
12480Sstevel@tonic-gate 	 * That way we will ensure that we don't return a 0
12490Sstevel@tonic-gate 	 * revents with a NULL pollhead pointer.
12500Sstevel@tonic-gate 	 */
12510Sstevel@tonic-gate 	*revp = POLLERR;
12520Sstevel@tonic-gate 	return (0);
12530Sstevel@tonic-gate }
125410910SRobert.Harris@Sun.COM 
125510910SRobert.Harris@Sun.COM /*
125610910SRobert.Harris@Sun.COM  * mntfs_same_word() returns 1 if two words are the same in the context of
125710910SRobert.Harris@Sun.COM  * MNTIOC_GETMNTANY and 0 otherwise.
125810910SRobert.Harris@Sun.COM  *
125910910SRobert.Harris@Sun.COM  * worda is a memory address that lies somewhere in the buffer bufa; it cannot
126010910SRobert.Harris@Sun.COM  * be NULL since this is used to indicate to getmntany(3C) that the user does
126110910SRobert.Harris@Sun.COM  * not wish to match a particular field. The text to which worda points is
126210910SRobert.Harris@Sun.COM  * supplied by the user; if it is not null-terminated then it cannot match.
126310910SRobert.Harris@Sun.COM  *
126410910SRobert.Harris@Sun.COM  * Buffer bufb contains a line from /etc/mnttab, in which the fields are
126510910SRobert.Harris@Sun.COM  * delimited by tab or new-line characters. offb is the offset of the second
126610910SRobert.Harris@Sun.COM  * word within this buffer.
126710910SRobert.Harris@Sun.COM  *
126810910SRobert.Harris@Sun.COM  * mntfs_same_word() returns 1 if the words are the same and 0 otherwise.
126910910SRobert.Harris@Sun.COM  */
127010910SRobert.Harris@Sun.COM int
mntfs_same_word(char * worda,char * bufa,size_t sizea,off_t offb,char * bufb,size_t sizeb)127110910SRobert.Harris@Sun.COM mntfs_same_word(char *worda, char *bufa, size_t sizea, off_t offb, char *bufb,
127210910SRobert.Harris@Sun.COM     size_t sizeb)
127310910SRobert.Harris@Sun.COM {
127410910SRobert.Harris@Sun.COM 	char *wordb = bufb + offb;
127510910SRobert.Harris@Sun.COM 	int bytes_remaining;
127610910SRobert.Harris@Sun.COM 
127710910SRobert.Harris@Sun.COM 	ASSERT(worda != NULL);
127810910SRobert.Harris@Sun.COM 
127910910SRobert.Harris@Sun.COM 	bytes_remaining = MIN(((bufa + sizea) - worda),
128010910SRobert.Harris@Sun.COM 	    ((bufb + sizeb) - wordb));
128110910SRobert.Harris@Sun.COM 	while (bytes_remaining && *worda == *wordb) {
128210910SRobert.Harris@Sun.COM 		worda++;
128310910SRobert.Harris@Sun.COM 		wordb++;
128410910SRobert.Harris@Sun.COM 		bytes_remaining--;
128510910SRobert.Harris@Sun.COM 	}
128610910SRobert.Harris@Sun.COM 	if (bytes_remaining &&
128710910SRobert.Harris@Sun.COM 	    *worda == '\0' && (*wordb == '\t' || *wordb == '\n'))
128810910SRobert.Harris@Sun.COM 		return (1);
128910910SRobert.Harris@Sun.COM 	else
129010910SRobert.Harris@Sun.COM 		return (0);
129110910SRobert.Harris@Sun.COM }
129210910SRobert.Harris@Sun.COM 
129310910SRobert.Harris@Sun.COM /*
129410910SRobert.Harris@Sun.COM  * mntfs_special_info_string() returns which, if either, of VBLK or VCHR
129510910SRobert.Harris@Sun.COM  * corresponds to a supplied path. If the path is a special device then the
129610910SRobert.Harris@Sun.COM  * function optionally sets the major and minor numbers.
129710910SRobert.Harris@Sun.COM  */
129810910SRobert.Harris@Sun.COM vtype_t
mntfs_special_info_string(char * path,uint_t * major,uint_t * minor,cred_t * cr)129910910SRobert.Harris@Sun.COM mntfs_special_info_string(char *path, uint_t *major, uint_t *minor, cred_t *cr)
130010910SRobert.Harris@Sun.COM {
130110910SRobert.Harris@Sun.COM 	vattr_t vattr;
130210910SRobert.Harris@Sun.COM 	vnode_t *vp;
130310910SRobert.Harris@Sun.COM 	vtype_t type;
130410910SRobert.Harris@Sun.COM 	int error;
130510910SRobert.Harris@Sun.COM 
130610910SRobert.Harris@Sun.COM 	if (path == NULL || *path != '/' ||
130710910SRobert.Harris@Sun.COM 	    lookupnameat(path + 1, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir))
130810910SRobert.Harris@Sun.COM 		return (0);
130910910SRobert.Harris@Sun.COM 
131010910SRobert.Harris@Sun.COM 	vattr.va_mask = AT_TYPE | AT_RDEV;
131110910SRobert.Harris@Sun.COM 	error = VOP_GETATTR(vp, &vattr, ATTR_REAL, cr, NULL);
131210910SRobert.Harris@Sun.COM 	VN_RELE(vp);
131310910SRobert.Harris@Sun.COM 
131410910SRobert.Harris@Sun.COM 	if (error == 0 && ((type = vattr.va_type) == VBLK || type == VCHR)) {
131510910SRobert.Harris@Sun.COM 		if (major && minor) {
131610910SRobert.Harris@Sun.COM 			*major = getmajor(vattr.va_rdev);
131710910SRobert.Harris@Sun.COM 			*minor = getminor(vattr.va_rdev);
131810910SRobert.Harris@Sun.COM 		}
131910910SRobert.Harris@Sun.COM 		return (type);
132010910SRobert.Harris@Sun.COM 	} else {
132110910SRobert.Harris@Sun.COM 		return (0);
132210910SRobert.Harris@Sun.COM 	}
132310910SRobert.Harris@Sun.COM }
132410910SRobert.Harris@Sun.COM 
132510910SRobert.Harris@Sun.COM /*
132610910SRobert.Harris@Sun.COM  * mntfs_special_info_element() extracts the name of the mounted resource
132710910SRobert.Harris@Sun.COM  * for a given element and copies it into a null-terminated string, which it
132810910SRobert.Harris@Sun.COM  * then passes to mntfs_special_info_string().
132910910SRobert.Harris@Sun.COM  */
133010910SRobert.Harris@Sun.COM vtype_t
mntfs_special_info_element(mntelem_t * elemp,cred_t * cr)133110910SRobert.Harris@Sun.COM mntfs_special_info_element(mntelem_t *elemp, cred_t *cr)
133210910SRobert.Harris@Sun.COM {
133310910SRobert.Harris@Sun.COM 	char *newpath;
133410910SRobert.Harris@Sun.COM 	vtype_t type;
133510910SRobert.Harris@Sun.COM 
133610910SRobert.Harris@Sun.COM 	newpath = kmem_alloc(elemp->mnte_text_size, KM_SLEEP);
133710910SRobert.Harris@Sun.COM 	bcopy(elemp->mnte_text, newpath, (off_t)(elemp->mnte_tab.mnt_mountp));
133810910SRobert.Harris@Sun.COM 	*(newpath + (off_t)elemp->mnte_tab.mnt_mountp - 1) = '\0';
133910910SRobert.Harris@Sun.COM 	type = mntfs_special_info_string(newpath, NULL, NULL, cr);
134010910SRobert.Harris@Sun.COM 	kmem_free(newpath, elemp->mnte_text_size);
134110910SRobert.Harris@Sun.COM 
134210910SRobert.Harris@Sun.COM 	return (type);
134310910SRobert.Harris@Sun.COM }
134410910SRobert.Harris@Sun.COM 
134510910SRobert.Harris@Sun.COM /*
134610910SRobert.Harris@Sun.COM  * Convert an address that points to a byte within a user buffer into an
134710910SRobert.Harris@Sun.COM  * address that points to the corresponding offset within a kernel buffer. If
134810910SRobert.Harris@Sun.COM  * the user address is NULL then make no conversion. If the address does not
134910910SRobert.Harris@Sun.COM  * lie within the buffer then reset it to NULL.
135010910SRobert.Harris@Sun.COM  */
135110910SRobert.Harris@Sun.COM char *
mntfs_import_addr(char * uaddr,char * ubufp,char * kbufp,size_t bufsize)135210910SRobert.Harris@Sun.COM mntfs_import_addr(char *uaddr, char *ubufp, char *kbufp, size_t bufsize)
135310910SRobert.Harris@Sun.COM {
135410910SRobert.Harris@Sun.COM 	if (uaddr < ubufp || uaddr >= ubufp + bufsize)
135510910SRobert.Harris@Sun.COM 		return (NULL);
135610910SRobert.Harris@Sun.COM 	else
135710910SRobert.Harris@Sun.COM 		return (kbufp + (uaddr - ubufp));
135810910SRobert.Harris@Sun.COM }
135910910SRobert.Harris@Sun.COM 
136010910SRobert.Harris@Sun.COM /*
136110910SRobert.Harris@Sun.COM  * These 32-bit versions are to support STRUCT_DECL(9F) etc. in
136210910SRobert.Harris@Sun.COM  * mntfs_copyout_element() and mntioctl().
136310910SRobert.Harris@Sun.COM  */
136410910SRobert.Harris@Sun.COM #ifdef _SYSCALL32_IMPL
136510910SRobert.Harris@Sun.COM typedef struct extmnttab32 {
136610910SRobert.Harris@Sun.COM 	uint32_t	mnt_special;
136710910SRobert.Harris@Sun.COM 	uint32_t	mnt_mountp;
136810910SRobert.Harris@Sun.COM 	uint32_t	mnt_fstype;
136910910SRobert.Harris@Sun.COM 	uint32_t	mnt_mntopts;
137010910SRobert.Harris@Sun.COM 	uint32_t	mnt_time;
137110910SRobert.Harris@Sun.COM 	uint_t		mnt_major;
137210910SRobert.Harris@Sun.COM 	uint_t		mnt_minor;
137310910SRobert.Harris@Sun.COM } extmnttab32_t;
137410910SRobert.Harris@Sun.COM 
137510910SRobert.Harris@Sun.COM typedef struct mnttab32 {
137610910SRobert.Harris@Sun.COM 	uint32_t	mnt_special;
137710910SRobert.Harris@Sun.COM 	uint32_t	mnt_mountp;
137810910SRobert.Harris@Sun.COM 	uint32_t	mnt_fstype;
137910910SRobert.Harris@Sun.COM 	uint32_t	mnt_mntopts;
138010910SRobert.Harris@Sun.COM 	uint32_t	mnt_time;
138110910SRobert.Harris@Sun.COM } mnttab32_t;
138210910SRobert.Harris@Sun.COM 
138310910SRobert.Harris@Sun.COM struct mntentbuf32 {
138410910SRobert.Harris@Sun.COM 	uint32_t	mbuf_emp;
138510910SRobert.Harris@Sun.COM 	uint_t		mbuf_bufsize;
138610910SRobert.Harris@Sun.COM 	uint32_t	mbuf_buf;
138710910SRobert.Harris@Sun.COM };
138810910SRobert.Harris@Sun.COM #endif
138910910SRobert.Harris@Sun.COM 
139010910SRobert.Harris@Sun.COM /*
139110910SRobert.Harris@Sun.COM  * mntfs_copyout_element() is common code for the MNTIOC_GETMNTENT,
139210910SRobert.Harris@Sun.COM  * MNTIOC_GETEXTMNTENT and MNTIOC_GETMNTANY ioctls. Having identifed the
139310910SRobert.Harris@Sun.COM  * database element desired by the user, this function copies out the text and
139410910SRobert.Harris@Sun.COM  * the pointers to the relevant userland addresses. It returns 0 on success
139510910SRobert.Harris@Sun.COM  * and non-zero otherwise.
139610910SRobert.Harris@Sun.COM  */
139710910SRobert.Harris@Sun.COM int
mntfs_copyout_elem(mntelem_t * elemp,struct extmnttab * uemp,char * ubufp,int cmd,int datamodel)139810910SRobert.Harris@Sun.COM mntfs_copyout_elem(mntelem_t *elemp, struct extmnttab *uemp,
139910910SRobert.Harris@Sun.COM     char *ubufp, int cmd, int datamodel)
140010910SRobert.Harris@Sun.COM {
140110910SRobert.Harris@Sun.COM 		STRUCT_DECL(extmnttab, ktab);
140210910SRobert.Harris@Sun.COM 		char *dbbufp = elemp->mnte_text;
140310910SRobert.Harris@Sun.COM 		size_t dbbufsize = elemp->mnte_text_size;
140410910SRobert.Harris@Sun.COM 		struct extmnttab *dbtabp = &elemp->mnte_tab;
140510910SRobert.Harris@Sun.COM 		size_t ssize;
140610910SRobert.Harris@Sun.COM 		char *kbufp;
140710910SRobert.Harris@Sun.COM 		int error = 0;
140810910SRobert.Harris@Sun.COM 
140910910SRobert.Harris@Sun.COM 
141010910SRobert.Harris@Sun.COM 		/*
141110910SRobert.Harris@Sun.COM 		 * We create a struct extmnttab within the kernel of the size
141210910SRobert.Harris@Sun.COM 		 * determined by the user's data model. We then populate its
141310910SRobert.Harris@Sun.COM 		 * fields by combining the start address of the text buffer
141410910SRobert.Harris@Sun.COM 		 * supplied by the user, ubufp, with the offsets stored for
141510910SRobert.Harris@Sun.COM 		 * this database element within dbtabp, a pointer to a struct
141610910SRobert.Harris@Sun.COM 		 * extmnttab.
141710910SRobert.Harris@Sun.COM 		 *
141810910SRobert.Harris@Sun.COM 		 * Note that if the corresponding field is "-" this signifies
141910910SRobert.Harris@Sun.COM 		 * no real content, and we set the address to NULL. This does
142010910SRobert.Harris@Sun.COM 		 * not apply to mnt_time.
142110910SRobert.Harris@Sun.COM 		 */
142210910SRobert.Harris@Sun.COM 		STRUCT_INIT(ktab, datamodel);
142310910SRobert.Harris@Sun.COM 		STRUCT_FSETP(ktab, mnt_special,
142410910SRobert.Harris@Sun.COM 		    MNTFS_REAL_FIELD(dbbufp) ? ubufp : NULL);
142510910SRobert.Harris@Sun.COM 		STRUCT_FSETP(ktab, mnt_mountp,
142610910SRobert.Harris@Sun.COM 		    MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_mountp) ?
142710910SRobert.Harris@Sun.COM 		    ubufp + (off_t)dbtabp->mnt_mountp : NULL);
142810910SRobert.Harris@Sun.COM 		STRUCT_FSETP(ktab, mnt_fstype,
142910910SRobert.Harris@Sun.COM 		    MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_fstype) ?
143010910SRobert.Harris@Sun.COM 		    ubufp + (off_t)dbtabp->mnt_fstype : NULL);
143110910SRobert.Harris@Sun.COM 		STRUCT_FSETP(ktab, mnt_mntopts,
143210910SRobert.Harris@Sun.COM 		    MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_mntopts) ?
143310910SRobert.Harris@Sun.COM 		    ubufp + (off_t)dbtabp->mnt_mntopts : NULL);
143410910SRobert.Harris@Sun.COM 		STRUCT_FSETP(ktab, mnt_time,
143510910SRobert.Harris@Sun.COM 		    ubufp + (off_t)dbtabp->mnt_time);
143610910SRobert.Harris@Sun.COM 		if (cmd == MNTIOC_GETEXTMNTENT) {
143710910SRobert.Harris@Sun.COM 			STRUCT_FSETP(ktab, mnt_major, dbtabp->mnt_major);
143810910SRobert.Harris@Sun.COM 			STRUCT_FSETP(ktab, mnt_minor, dbtabp->mnt_minor);
143910910SRobert.Harris@Sun.COM 			ssize = SIZEOF_STRUCT(extmnttab, datamodel);
144010910SRobert.Harris@Sun.COM 		} else {
144110910SRobert.Harris@Sun.COM 			ssize = SIZEOF_STRUCT(mnttab, datamodel);
144210910SRobert.Harris@Sun.COM 		}
144310910SRobert.Harris@Sun.COM 		if (copyout(STRUCT_BUF(ktab), uemp, ssize))
144410910SRobert.Harris@Sun.COM 			return (EFAULT);
144510910SRobert.Harris@Sun.COM 
144610910SRobert.Harris@Sun.COM 		/*
144710910SRobert.Harris@Sun.COM 		 * We create a text buffer in the kernel into which we copy the
144810910SRobert.Harris@Sun.COM 		 * /etc/mnttab entry for this element. We change the tab and
144910910SRobert.Harris@Sun.COM 		 * new-line delimiters to null bytes before copying out the
145010910SRobert.Harris@Sun.COM 		 * buffer.
145110910SRobert.Harris@Sun.COM 		 */
145210910SRobert.Harris@Sun.COM 		kbufp = kmem_alloc(dbbufsize, KM_SLEEP);
145310910SRobert.Harris@Sun.COM 		bcopy(elemp->mnte_text, kbufp, dbbufsize);
145410910SRobert.Harris@Sun.COM 		*(kbufp + (off_t)dbtabp->mnt_mountp - 1) =
145510910SRobert.Harris@Sun.COM 		    *(kbufp + (off_t)dbtabp->mnt_fstype - 1) =
145610910SRobert.Harris@Sun.COM 		    *(kbufp + (off_t)dbtabp->mnt_mntopts - 1) =
145710910SRobert.Harris@Sun.COM 		    *(kbufp + (off_t)dbtabp->mnt_time - 1) =
145810910SRobert.Harris@Sun.COM 		    *(kbufp + dbbufsize - 1) = '\0';
145910910SRobert.Harris@Sun.COM 		if (copyout(kbufp, ubufp, dbbufsize))
146010910SRobert.Harris@Sun.COM 			error = EFAULT;
146110910SRobert.Harris@Sun.COM 
146210910SRobert.Harris@Sun.COM 		kmem_free(kbufp, dbbufsize);
146310910SRobert.Harris@Sun.COM 		return (error);
146410910SRobert.Harris@Sun.COM }
146510910SRobert.Harris@Sun.COM 
14660Sstevel@tonic-gate /* ARGSUSED */
14670Sstevel@tonic-gate static int
mntioctl(struct vnode * vp,int cmd,intptr_t arg,int flag,cred_t * cr,int * rvalp,caller_context_t * ct)146810910SRobert.Harris@Sun.COM mntioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, cred_t *cr,
146910910SRobert.Harris@Sun.COM     int *rvalp, caller_context_t *ct)
14700Sstevel@tonic-gate {
14710Sstevel@tonic-gate 	uint_t *up = (uint_t *)arg;
14720Sstevel@tonic-gate 	mntnode_t *mnp = VTOM(vp);
147310910SRobert.Harris@Sun.COM 	mntsnap_t *snapp = &mnp->mnt_ioctl;
147410910SRobert.Harris@Sun.COM 	int error = 0;
1475*13096SJordan.Vaughan@Sun.com 	zone_t *zonep = MTOD(mnp)->mnt_zone_ref.zref_zone;
147610910SRobert.Harris@Sun.COM 	krwlock_t *dblockp = &zonep->zone_mntfs_db_lock;
147710910SRobert.Harris@Sun.COM 	model_t datamodel = flag & DATAMODEL_MASK;
14780Sstevel@tonic-gate 
14790Sstevel@tonic-gate 	switch (cmd) {
14800Sstevel@tonic-gate 
148110910SRobert.Harris@Sun.COM 	case MNTIOC_NMNTS:  		/* get no. of mounted resources */
148210910SRobert.Harris@Sun.COM 	{
148310910SRobert.Harris@Sun.COM 		rw_enter(&mnp->mnt_contents, RW_READER);
148410910SRobert.Harris@Sun.COM 		if (snapp->mnts_nmnts == 0 ||
148510910SRobert.Harris@Sun.COM 		    (snapp->mnts_flags & MNTS_REWIND)) {
14868004SViswanathan.Kannappan@Sun.COM 			if (!rw_tryupgrade(&mnp->mnt_contents)) {
14878004SViswanathan.Kannappan@Sun.COM 				rw_exit(&mnp->mnt_contents);
14888004SViswanathan.Kannappan@Sun.COM 				rw_enter(&mnp->mnt_contents, RW_WRITER);
14898004SViswanathan.Kannappan@Sun.COM 			}
149010910SRobert.Harris@Sun.COM 			if (snapp->mnts_nmnts == 0 ||
149110910SRobert.Harris@Sun.COM 			    (snapp->mnts_flags & MNTS_REWIND))
149210910SRobert.Harris@Sun.COM 				mntfs_snapshot(mnp, snapp);
14930Sstevel@tonic-gate 		}
149410910SRobert.Harris@Sun.COM 		rw_exit(&mnp->mnt_contents);
149510910SRobert.Harris@Sun.COM 
149610910SRobert.Harris@Sun.COM 		if (suword32(up, snapp->mnts_nmnts) != 0)
14970Sstevel@tonic-gate 			error = EFAULT;
14980Sstevel@tonic-gate 		break;
14990Sstevel@tonic-gate 	}
15000Sstevel@tonic-gate 
150110910SRobert.Harris@Sun.COM 	case MNTIOC_GETDEVLIST:  	/* get mounted device major/minor nos */
150210910SRobert.Harris@Sun.COM 	{
150310910SRobert.Harris@Sun.COM 		size_t len;
15040Sstevel@tonic-gate 		uint_t *devlist;
150510910SRobert.Harris@Sun.COM 		mntelem_t *elemp;
150610910SRobert.Harris@Sun.COM 		int i = 0;
15070Sstevel@tonic-gate 
150810910SRobert.Harris@Sun.COM 		rw_enter(&mnp->mnt_contents, RW_READER);
150910910SRobert.Harris@Sun.COM 		if (snapp->mnts_nmnts == 0 ||
151010910SRobert.Harris@Sun.COM 		    (snapp->mnts_flags & MNTS_REWIND)) {
15118004SViswanathan.Kannappan@Sun.COM 			if (!rw_tryupgrade(&mnp->mnt_contents)) {
15128004SViswanathan.Kannappan@Sun.COM 				rw_exit(&mnp->mnt_contents);
15138004SViswanathan.Kannappan@Sun.COM 				rw_enter(&mnp->mnt_contents, RW_WRITER);
15148004SViswanathan.Kannappan@Sun.COM 			}
151510910SRobert.Harris@Sun.COM 			if (snapp->mnts_nmnts == 0 ||
151610910SRobert.Harris@Sun.COM 			    (snapp->mnts_flags & MNTS_REWIND))
151710910SRobert.Harris@Sun.COM 				mntfs_snapshot(mnp, snapp);
15188004SViswanathan.Kannappan@Sun.COM 			rw_downgrade(&mnp->mnt_contents);
15190Sstevel@tonic-gate 		}
15200Sstevel@tonic-gate 
152110910SRobert.Harris@Sun.COM 		/* Create a local buffer to hold the device numbers. */
152210910SRobert.Harris@Sun.COM 		len = 2 * snapp->mnts_nmnts * sizeof (uint_t);
15230Sstevel@tonic-gate 		devlist = kmem_alloc(len, KM_SLEEP);
15240Sstevel@tonic-gate 
152510910SRobert.Harris@Sun.COM 		/*
152610910SRobert.Harris@Sun.COM 		 * Walk the database elements for this snapshot and add their
152710910SRobert.Harris@Sun.COM 		 * major and minor numbers.
152810910SRobert.Harris@Sun.COM 		 */
152910910SRobert.Harris@Sun.COM 		rw_enter(dblockp, RW_READER);
153010910SRobert.Harris@Sun.COM 		for (elemp = snapp->mnts_first; elemp;
153110910SRobert.Harris@Sun.COM 		    elemp = mntfs_get_next_elem(snapp, elemp)) {
153210910SRobert.Harris@Sun.COM 				devlist[2 * i] = elemp->mnte_tab.mnt_major;
153310910SRobert.Harris@Sun.COM 				devlist[2 * i + 1] = elemp->mnte_tab.mnt_minor;
153410910SRobert.Harris@Sun.COM 				i++;
153510910SRobert.Harris@Sun.COM 		}
153610910SRobert.Harris@Sun.COM 		rw_exit(dblockp);
153710910SRobert.Harris@Sun.COM 		ASSERT(i == snapp->mnts_nmnts);
153810910SRobert.Harris@Sun.COM 		rw_exit(&mnp->mnt_contents);
15390Sstevel@tonic-gate 
154010910SRobert.Harris@Sun.COM 		error = xcopyout(devlist, up, len);
15410Sstevel@tonic-gate 		kmem_free(devlist, len);
15420Sstevel@tonic-gate 		break;
15430Sstevel@tonic-gate 	}
15440Sstevel@tonic-gate 
15450Sstevel@tonic-gate 	case MNTIOC_SETTAG:		/* set tag on mounted file system */
15460Sstevel@tonic-gate 	case MNTIOC_CLRTAG:		/* clear tag on mounted file system */
15470Sstevel@tonic-gate 	{
15480Sstevel@tonic-gate 		struct mnttagdesc *dp = (struct mnttagdesc *)arg;
15490Sstevel@tonic-gate 		STRUCT_DECL(mnttagdesc, tagdesc);
15500Sstevel@tonic-gate 		char *cptr;
15510Sstevel@tonic-gate 		uint32_t major, minor;
15520Sstevel@tonic-gate 		char tagbuf[MAX_MNTOPT_TAG];
15530Sstevel@tonic-gate 		char *pbuf;
15540Sstevel@tonic-gate 		size_t len;
15550Sstevel@tonic-gate 		uint_t start = 0;
15560Sstevel@tonic-gate 		mntdata_t *mntdata = MTOD(mnp);
1557*13096SJordan.Vaughan@Sun.com 		zone_t *zone = mntdata->mnt_zone_ref.zref_zone;
15580Sstevel@tonic-gate 
15590Sstevel@tonic-gate 		STRUCT_INIT(tagdesc, flag & DATAMODEL_MASK);
15600Sstevel@tonic-gate 		if (copyin(dp, STRUCT_BUF(tagdesc), STRUCT_SIZE(tagdesc))) {
15610Sstevel@tonic-gate 			error = EFAULT;
15620Sstevel@tonic-gate 			break;
15630Sstevel@tonic-gate 		}
15640Sstevel@tonic-gate 		pbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
15650Sstevel@tonic-gate 		if (zone != global_zone) {
15660Sstevel@tonic-gate 			(void) strcpy(pbuf, zone->zone_rootpath);
15670Sstevel@tonic-gate 			/* truncate "/" and nul */
15680Sstevel@tonic-gate 			start = zone->zone_rootpathlen - 2;
15690Sstevel@tonic-gate 			ASSERT(pbuf[start] == '/');
15700Sstevel@tonic-gate 		}
15710Sstevel@tonic-gate 		cptr = STRUCT_FGETP(tagdesc, mtd_mntpt);
15720Sstevel@tonic-gate 		error = copyinstr(cptr, pbuf + start, MAXPATHLEN - start, &len);
15730Sstevel@tonic-gate 		if (error) {
15740Sstevel@tonic-gate 			kmem_free(pbuf, MAXPATHLEN);
15750Sstevel@tonic-gate 			break;
15760Sstevel@tonic-gate 		}
15770Sstevel@tonic-gate 		if (start != 0 && pbuf[start] != '/') {
15780Sstevel@tonic-gate 			kmem_free(pbuf, MAXPATHLEN);
15790Sstevel@tonic-gate 			error = EINVAL;
15800Sstevel@tonic-gate 			break;
15810Sstevel@tonic-gate 		}
15820Sstevel@tonic-gate 		cptr = STRUCT_FGETP(tagdesc, mtd_tag);
15830Sstevel@tonic-gate 		if ((error = copyinstr(cptr, tagbuf, MAX_MNTOPT_TAG, &len))) {
15840Sstevel@tonic-gate 			kmem_free(pbuf, MAXPATHLEN);
15850Sstevel@tonic-gate 			break;
15860Sstevel@tonic-gate 		}
15870Sstevel@tonic-gate 		major = STRUCT_FGET(tagdesc, mtd_major);
15880Sstevel@tonic-gate 		minor = STRUCT_FGET(tagdesc, mtd_minor);
15890Sstevel@tonic-gate 		if (cmd == MNTIOC_SETTAG)
15900Sstevel@tonic-gate 			error = vfs_settag(major, minor, pbuf, tagbuf, cr);
15910Sstevel@tonic-gate 		else
15920Sstevel@tonic-gate 			error = vfs_clrtag(major, minor, pbuf, tagbuf, cr);
15930Sstevel@tonic-gate 		kmem_free(pbuf, MAXPATHLEN);
15940Sstevel@tonic-gate 		break;
15950Sstevel@tonic-gate 	}
15960Sstevel@tonic-gate 
15970Sstevel@tonic-gate 	case MNTIOC_SHOWHIDDEN:
15980Sstevel@tonic-gate 	{
159911757SRobert.Harris@Sun.COM 		rw_enter(&mnp->mnt_contents, RW_WRITER);
16000Sstevel@tonic-gate 		mnp->mnt_flags |= MNT_SHOWHIDDEN;
160111757SRobert.Harris@Sun.COM 		rw_exit(&mnp->mnt_contents);
16020Sstevel@tonic-gate 		break;
16030Sstevel@tonic-gate 	}
16040Sstevel@tonic-gate 
160510910SRobert.Harris@Sun.COM 	case MNTIOC_GETMNTANY:
16060Sstevel@tonic-gate 	{
160710910SRobert.Harris@Sun.COM 		STRUCT_DECL(mntentbuf, embuf);	/* Our copy of user's embuf */
160810910SRobert.Harris@Sun.COM 		STRUCT_DECL(extmnttab, ktab);	/* Out copy of user's emp */
160910910SRobert.Harris@Sun.COM 		struct extmnttab *uemp;		/* uaddr of user's emp */
161010910SRobert.Harris@Sun.COM 		char *ubufp;			/* uaddr of user's text buf */
161110910SRobert.Harris@Sun.COM 		size_t ubufsize;		/* size of the above */
161210910SRobert.Harris@Sun.COM 		struct extmnttab preftab;	/* our version of user's emp */
161310910SRobert.Harris@Sun.COM 		char *prefbuf;			/* our copy of user's text */
161410910SRobert.Harris@Sun.COM 		mntelem_t *elemp;		/* a database element */
161510910SRobert.Harris@Sun.COM 		struct extmnttab *dbtabp;	/* element's extmnttab */
161610910SRobert.Harris@Sun.COM 		char *dbbufp;			/* element's text buf */
161710910SRobert.Harris@Sun.COM 		size_t dbbufsize;		/* size of the above */
161810910SRobert.Harris@Sun.COM 		vtype_t type;			/* type, if any, of special */
16190Sstevel@tonic-gate 
162010910SRobert.Harris@Sun.COM 
162110910SRobert.Harris@Sun.COM 		/*
162210910SRobert.Harris@Sun.COM 		 * embuf is a struct embuf within the kernel. We copy into it
162310910SRobert.Harris@Sun.COM 		 * the struct embuf supplied by the user.
162410910SRobert.Harris@Sun.COM 		 */
162510910SRobert.Harris@Sun.COM 		STRUCT_INIT(embuf, datamodel);
162610910SRobert.Harris@Sun.COM 		if (copyin((void *) arg, STRUCT_BUF(embuf),
162710910SRobert.Harris@Sun.COM 		    STRUCT_SIZE(embuf))) {
162810910SRobert.Harris@Sun.COM 			error = EFAULT;
162910910SRobert.Harris@Sun.COM 			break;
16308004SViswanathan.Kannappan@Sun.COM 		}
163110910SRobert.Harris@Sun.COM 		uemp = STRUCT_FGETP(embuf, mbuf_emp);
163210910SRobert.Harris@Sun.COM 		ubufp = STRUCT_FGETP(embuf, mbuf_buf);
163310910SRobert.Harris@Sun.COM 		ubufsize = STRUCT_FGET(embuf, mbuf_bufsize);
163410910SRobert.Harris@Sun.COM 
163510910SRobert.Harris@Sun.COM 		/*
163610910SRobert.Harris@Sun.COM 		 * Check that the text buffer offered by the user is the
163710910SRobert.Harris@Sun.COM 		 * agreed size.
163810910SRobert.Harris@Sun.COM 		 */
163910910SRobert.Harris@Sun.COM 		if (ubufsize != MNT_LINE_MAX) {
164010910SRobert.Harris@Sun.COM 			error = EINVAL;
164110910SRobert.Harris@Sun.COM 			break;
16420Sstevel@tonic-gate 		}
164310910SRobert.Harris@Sun.COM 
164410910SRobert.Harris@Sun.COM 		/* Copy the user-supplied entry into a local buffer. */
164510910SRobert.Harris@Sun.COM 		prefbuf = kmem_alloc(MNT_LINE_MAX, KM_SLEEP);
164610910SRobert.Harris@Sun.COM 		if (copyin(ubufp, prefbuf, MNT_LINE_MAX)) {
164710910SRobert.Harris@Sun.COM 			kmem_free(prefbuf, MNT_LINE_MAX);
164810910SRobert.Harris@Sun.COM 			error = EFAULT;
164910910SRobert.Harris@Sun.COM 			break;
165010910SRobert.Harris@Sun.COM 		}
165110910SRobert.Harris@Sun.COM 
165210910SRobert.Harris@Sun.COM 		/* Ensure that any string within it is null-terminated. */
165310910SRobert.Harris@Sun.COM 		*(prefbuf + MNT_LINE_MAX - 1) = 0;
165410910SRobert.Harris@Sun.COM 
165510910SRobert.Harris@Sun.COM 		/* Copy in the user-supplied mpref */
165610910SRobert.Harris@Sun.COM 		STRUCT_INIT(ktab, datamodel);
165710910SRobert.Harris@Sun.COM 		if (copyin(uemp, STRUCT_BUF(ktab),
165810910SRobert.Harris@Sun.COM 		    SIZEOF_STRUCT(mnttab, datamodel))) {
165910910SRobert.Harris@Sun.COM 			kmem_free(prefbuf, MNT_LINE_MAX);
166010910SRobert.Harris@Sun.COM 			error = EFAULT;
166110910SRobert.Harris@Sun.COM 			break;
16620Sstevel@tonic-gate 		}
16630Sstevel@tonic-gate 
166410910SRobert.Harris@Sun.COM 		/*
166510910SRobert.Harris@Sun.COM 		 * Copy the members of the user's pref struct into a local
166610910SRobert.Harris@Sun.COM 		 * struct. The pointers need to be offset and verified to
166710910SRobert.Harris@Sun.COM 		 * ensure that they lie within the bounds of the buffer.
166810910SRobert.Harris@Sun.COM 		 */
166910910SRobert.Harris@Sun.COM 		preftab.mnt_special = mntfs_import_addr(STRUCT_FGETP(ktab,
167010910SRobert.Harris@Sun.COM 		    mnt_special), ubufp, prefbuf, MNT_LINE_MAX);
167110910SRobert.Harris@Sun.COM 		preftab.mnt_mountp = mntfs_import_addr(STRUCT_FGETP(ktab,
167210910SRobert.Harris@Sun.COM 		    mnt_mountp), ubufp, prefbuf, MNT_LINE_MAX);
167310910SRobert.Harris@Sun.COM 		preftab.mnt_fstype = mntfs_import_addr(STRUCT_FGETP(ktab,
167410910SRobert.Harris@Sun.COM 		    mnt_fstype), ubufp, prefbuf, MNT_LINE_MAX);
167510910SRobert.Harris@Sun.COM 		preftab.mnt_mntopts = mntfs_import_addr(STRUCT_FGETP(ktab,
167610910SRobert.Harris@Sun.COM 		    mnt_mntopts), ubufp, prefbuf, MNT_LINE_MAX);
167710910SRobert.Harris@Sun.COM 		preftab.mnt_time = mntfs_import_addr(STRUCT_FGETP(ktab,
167810910SRobert.Harris@Sun.COM 		    mnt_time), ubufp, prefbuf, MNT_LINE_MAX);
167910910SRobert.Harris@Sun.COM 
168010910SRobert.Harris@Sun.COM 		/*
168110910SRobert.Harris@Sun.COM 		 * If the user specifies a mounted resource that is a special
168210910SRobert.Harris@Sun.COM 		 * device then we capture its mode and major and minor numbers;
168311757SRobert.Harris@Sun.COM 		 * cf. the block comment below.
168410910SRobert.Harris@Sun.COM 		 */
168510910SRobert.Harris@Sun.COM 		type = mntfs_special_info_string(preftab.mnt_special,
168610910SRobert.Harris@Sun.COM 		    &preftab.mnt_major, &preftab.mnt_minor, cr);
168710910SRobert.Harris@Sun.COM 
168810910SRobert.Harris@Sun.COM 		rw_enter(&mnp->mnt_contents, RW_WRITER);
168910910SRobert.Harris@Sun.COM 		if (snapp->mnts_nmnts == 0 ||
169010910SRobert.Harris@Sun.COM 		    (snapp->mnts_flags & MNTS_REWIND))
169110910SRobert.Harris@Sun.COM 			mntfs_snapshot(mnp, snapp);
16920Sstevel@tonic-gate 
169310910SRobert.Harris@Sun.COM 		/*
169410910SRobert.Harris@Sun.COM 		 * This is the core functionality that implements getmntany().
169510910SRobert.Harris@Sun.COM 		 * We walk through the mntfs database until we find an element
169610910SRobert.Harris@Sun.COM 		 * matching the user's preferences that are contained in
169710910SRobert.Harris@Sun.COM 		 * preftab. Typically, this means checking that the text
169810910SRobert.Harris@Sun.COM 		 * matches. However, the mounted resource is special: if the
169910910SRobert.Harris@Sun.COM 		 * user is looking for a special device then we must find a
170010910SRobert.Harris@Sun.COM 		 * database element with the same major and minor numbers and
170110910SRobert.Harris@Sun.COM 		 * the same type, i.e. VBLK or VCHR. The type is not recorded
170210910SRobert.Harris@Sun.COM 		 * in the element because it cannot be inferred from the vfs_t.
170310910SRobert.Harris@Sun.COM 		 * We therefore check the type of suitable candidates via
170410910SRobert.Harris@Sun.COM 		 * mntfs_special_info_element(); since this calls into the
170510910SRobert.Harris@Sun.COM 		 * underlying file system we make sure to drop the database lock
170610910SRobert.Harris@Sun.COM 		 * first.
170710910SRobert.Harris@Sun.COM 		 */
170810910SRobert.Harris@Sun.COM 		elemp = snapp->mnts_next;
170910910SRobert.Harris@Sun.COM 		rw_enter(dblockp, RW_READER);
171010910SRobert.Harris@Sun.COM 		for (;;) {
171110910SRobert.Harris@Sun.COM 			for (; elemp; elemp = mntfs_get_next_elem(snapp,
171210910SRobert.Harris@Sun.COM 			    elemp)) {
171310910SRobert.Harris@Sun.COM 				dbtabp = &elemp->mnte_tab;
171410910SRobert.Harris@Sun.COM 				dbbufp = elemp->mnte_text;
171510910SRobert.Harris@Sun.COM 				dbbufsize = elemp->mnte_text_size;
171610910SRobert.Harris@Sun.COM 
171710910SRobert.Harris@Sun.COM 				if (((type &&
171810910SRobert.Harris@Sun.COM 				    dbtabp->mnt_major == preftab.mnt_major &&
171910910SRobert.Harris@Sun.COM 				    dbtabp->mnt_minor == preftab.mnt_minor &&
172010910SRobert.Harris@Sun.COM 				    MNTFS_REAL_FIELD(dbbufp)) ||
172110910SRobert.Harris@Sun.COM 				    (!type && (!preftab.mnt_special ||
172210910SRobert.Harris@Sun.COM 				    mntfs_same_word(preftab.mnt_special,
172310910SRobert.Harris@Sun.COM 				    prefbuf, MNT_LINE_MAX, (off_t)0, dbbufp,
172410910SRobert.Harris@Sun.COM 				    dbbufsize)))) &&
172510910SRobert.Harris@Sun.COM 
172610910SRobert.Harris@Sun.COM 				    (!preftab.mnt_mountp || mntfs_same_word(
172710910SRobert.Harris@Sun.COM 				    preftab.mnt_mountp, prefbuf, MNT_LINE_MAX,
172810910SRobert.Harris@Sun.COM 				    (off_t)dbtabp->mnt_mountp, dbbufp,
172910910SRobert.Harris@Sun.COM 				    dbbufsize)) &&
173010910SRobert.Harris@Sun.COM 
173110910SRobert.Harris@Sun.COM 				    (!preftab.mnt_fstype || mntfs_same_word(
173210910SRobert.Harris@Sun.COM 				    preftab.mnt_fstype, prefbuf, MNT_LINE_MAX,
173310910SRobert.Harris@Sun.COM 				    (off_t)dbtabp->mnt_fstype, dbbufp,
173410910SRobert.Harris@Sun.COM 				    dbbufsize)) &&
173510910SRobert.Harris@Sun.COM 
173610910SRobert.Harris@Sun.COM 				    (!preftab.mnt_mntopts || mntfs_same_word(
173710910SRobert.Harris@Sun.COM 				    preftab.mnt_mntopts, prefbuf, MNT_LINE_MAX,
173810910SRobert.Harris@Sun.COM 				    (off_t)dbtabp->mnt_mntopts, dbbufp,
173910910SRobert.Harris@Sun.COM 				    dbbufsize)) &&
174010910SRobert.Harris@Sun.COM 
174110910SRobert.Harris@Sun.COM 				    (!preftab.mnt_time || mntfs_same_word(
174210910SRobert.Harris@Sun.COM 				    preftab.mnt_time, prefbuf, MNT_LINE_MAX,
174310910SRobert.Harris@Sun.COM 				    (off_t)dbtabp->mnt_time, dbbufp,
174410910SRobert.Harris@Sun.COM 				    dbbufsize)))
174510910SRobert.Harris@Sun.COM 					break;
174610910SRobert.Harris@Sun.COM 			}
174710910SRobert.Harris@Sun.COM 			rw_exit(dblockp);
174810910SRobert.Harris@Sun.COM 
174910910SRobert.Harris@Sun.COM 			if (elemp == NULL || type == 0 ||
175010910SRobert.Harris@Sun.COM 			    type == mntfs_special_info_element(elemp, cr))
175110910SRobert.Harris@Sun.COM 				break;
175210910SRobert.Harris@Sun.COM 
175310910SRobert.Harris@Sun.COM 			rw_enter(dblockp, RW_READER);
175410910SRobert.Harris@Sun.COM 			elemp = mntfs_get_next_elem(snapp, elemp);
17558004SViswanathan.Kannappan@Sun.COM 		}
17560Sstevel@tonic-gate 
175710910SRobert.Harris@Sun.COM 		kmem_free(prefbuf, MNT_LINE_MAX);
175810910SRobert.Harris@Sun.COM 
175910910SRobert.Harris@Sun.COM 		/* If we failed to find a match then return EOF. */
176010910SRobert.Harris@Sun.COM 		if (elemp == NULL) {
176110910SRobert.Harris@Sun.COM 			rw_exit(&mnp->mnt_contents);
176210910SRobert.Harris@Sun.COM 			*rvalp = MNTFS_EOF;
176310910SRobert.Harris@Sun.COM 			break;
176410910SRobert.Harris@Sun.COM 		}
176510910SRobert.Harris@Sun.COM 
176610910SRobert.Harris@Sun.COM 		/*
176710910SRobert.Harris@Sun.COM 		 * Check that the text buffer offered by the user will be large
176810910SRobert.Harris@Sun.COM 		 * enough to accommodate the text for this entry.
176910910SRobert.Harris@Sun.COM 		 */
177010910SRobert.Harris@Sun.COM 		if (elemp->mnte_text_size > MNT_LINE_MAX) {
177110910SRobert.Harris@Sun.COM 			rw_exit(&mnp->mnt_contents);
177210910SRobert.Harris@Sun.COM 			*rvalp = MNTFS_TOOLONG;
177310910SRobert.Harris@Sun.COM 			break;
177410910SRobert.Harris@Sun.COM 		}
177510910SRobert.Harris@Sun.COM 
177610910SRobert.Harris@Sun.COM 		/*
177710910SRobert.Harris@Sun.COM 		 * Populate the user's struct mnttab and text buffer using the
177810910SRobert.Harris@Sun.COM 		 * element's contents.
177910910SRobert.Harris@Sun.COM 		 */
178010910SRobert.Harris@Sun.COM 		if (mntfs_copyout_elem(elemp, uemp, ubufp, cmd, datamodel)) {
178110910SRobert.Harris@Sun.COM 			error = EFAULT;
178210910SRobert.Harris@Sun.COM 		} else {
178310910SRobert.Harris@Sun.COM 			rw_enter(dblockp, RW_READER);
178410910SRobert.Harris@Sun.COM 			elemp = mntfs_get_next_elem(snapp, elemp);
178510910SRobert.Harris@Sun.COM 			rw_exit(dblockp);
178610910SRobert.Harris@Sun.COM 			snapp->mnts_next = elemp;
178710910SRobert.Harris@Sun.COM 		}
178810910SRobert.Harris@Sun.COM 		rw_exit(&mnp->mnt_contents);
178910910SRobert.Harris@Sun.COM 		break;
179010910SRobert.Harris@Sun.COM 	}
179110910SRobert.Harris@Sun.COM 
179210910SRobert.Harris@Sun.COM 	case MNTIOC_GETMNTENT:
179310910SRobert.Harris@Sun.COM 	case MNTIOC_GETEXTMNTENT:
179410910SRobert.Harris@Sun.COM 	{
179510910SRobert.Harris@Sun.COM 		STRUCT_DECL(mntentbuf, embuf);	/* Our copy of user's embuf */
179610910SRobert.Harris@Sun.COM 		struct extmnttab *uemp;		/* uaddr of user's emp */
179710910SRobert.Harris@Sun.COM 		char *ubufp;			/* uaddr of user's text buf */
179810910SRobert.Harris@Sun.COM 		size_t ubufsize;		/* size of the above */
179910910SRobert.Harris@Sun.COM 		mntelem_t *elemp;		/* a database element */
180010910SRobert.Harris@Sun.COM 
180110910SRobert.Harris@Sun.COM 
180210910SRobert.Harris@Sun.COM 		rw_enter(&mnp->mnt_contents, RW_WRITER);
180310910SRobert.Harris@Sun.COM 		if (snapp->mnts_nmnts == 0 ||
180410910SRobert.Harris@Sun.COM 		    (snapp->mnts_flags & MNTS_REWIND))
180510910SRobert.Harris@Sun.COM 			mntfs_snapshot(mnp, snapp);
180610910SRobert.Harris@Sun.COM 		if ((elemp = snapp->mnts_next) == NULL) {
180710910SRobert.Harris@Sun.COM 			rw_exit(&mnp->mnt_contents);
180810910SRobert.Harris@Sun.COM 			*rvalp = MNTFS_EOF;
180910910SRobert.Harris@Sun.COM 			break;
181010910SRobert.Harris@Sun.COM 		}
181110910SRobert.Harris@Sun.COM 
181210910SRobert.Harris@Sun.COM 		/*
181310910SRobert.Harris@Sun.COM 		 * embuf is a struct embuf within the kernel. We copy into it
181410910SRobert.Harris@Sun.COM 		 * the struct embuf supplied by the user.
181510910SRobert.Harris@Sun.COM 		 */
181610910SRobert.Harris@Sun.COM 		STRUCT_INIT(embuf, datamodel);
181710910SRobert.Harris@Sun.COM 		if (copyin((void *) arg, STRUCT_BUF(embuf),
181810910SRobert.Harris@Sun.COM 		    STRUCT_SIZE(embuf))) {
181910910SRobert.Harris@Sun.COM 			rw_exit(&mnp->mnt_contents);
182010910SRobert.Harris@Sun.COM 			error = EFAULT;
182110910SRobert.Harris@Sun.COM 			break;
182210910SRobert.Harris@Sun.COM 		}
182310910SRobert.Harris@Sun.COM 		uemp = STRUCT_FGETP(embuf, mbuf_emp);
182410910SRobert.Harris@Sun.COM 		ubufp = STRUCT_FGETP(embuf, mbuf_buf);
182510910SRobert.Harris@Sun.COM 		ubufsize = STRUCT_FGET(embuf, mbuf_bufsize);
182610910SRobert.Harris@Sun.COM 
182710910SRobert.Harris@Sun.COM 		/*
182810910SRobert.Harris@Sun.COM 		 * Check that the text buffer offered by the user will be large
182910910SRobert.Harris@Sun.COM 		 * enough to accommodate the text for this entry.
183010910SRobert.Harris@Sun.COM 		 */
183110910SRobert.Harris@Sun.COM 		if (elemp->mnte_text_size > ubufsize) {
183210910SRobert.Harris@Sun.COM 			rw_exit(&mnp->mnt_contents);
183310910SRobert.Harris@Sun.COM 			*rvalp = MNTFS_TOOLONG;
183410910SRobert.Harris@Sun.COM 			break;
183510910SRobert.Harris@Sun.COM 		}
183610910SRobert.Harris@Sun.COM 
183710910SRobert.Harris@Sun.COM 		/*
183810910SRobert.Harris@Sun.COM 		 * Populate the user's struct mnttab and text buffer using the
183910910SRobert.Harris@Sun.COM 		 * element's contents.
184010910SRobert.Harris@Sun.COM 		 */
184110910SRobert.Harris@Sun.COM 		if (mntfs_copyout_elem(elemp, uemp, ubufp, cmd, datamodel)) {
184210910SRobert.Harris@Sun.COM 			error = EFAULT;
184310910SRobert.Harris@Sun.COM 		} else {
184410910SRobert.Harris@Sun.COM 			rw_enter(dblockp, RW_READER);
184510910SRobert.Harris@Sun.COM 			elemp = mntfs_get_next_elem(snapp, elemp);
184610910SRobert.Harris@Sun.COM 			rw_exit(dblockp);
184710910SRobert.Harris@Sun.COM 			snapp->mnts_next = elemp;
184810910SRobert.Harris@Sun.COM 		}
184910910SRobert.Harris@Sun.COM 		rw_exit(&mnp->mnt_contents);
18500Sstevel@tonic-gate 		break;
18510Sstevel@tonic-gate 	}
18520Sstevel@tonic-gate 
18530Sstevel@tonic-gate 	default:
18540Sstevel@tonic-gate 		error = EINVAL;
18550Sstevel@tonic-gate 		break;
18560Sstevel@tonic-gate 	}
18570Sstevel@tonic-gate 
18580Sstevel@tonic-gate 	return (error);
18590Sstevel@tonic-gate }
18600Sstevel@tonic-gate 
18610Sstevel@tonic-gate /*
186211757SRobert.Harris@Sun.COM  * mntfs provides a new vnode for each open(2). Two vnodes will represent the
186311757SRobert.Harris@Sun.COM  * same instance of /etc/mnttab if they share the same (zone-specific) vfs.
186411757SRobert.Harris@Sun.COM  */
186511757SRobert.Harris@Sun.COM /* ARGSUSED */
186611757SRobert.Harris@Sun.COM int
mntcmp(vnode_t * vp1,vnode_t * vp2,caller_context_t * ct)186711757SRobert.Harris@Sun.COM mntcmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
186811757SRobert.Harris@Sun.COM {
186911757SRobert.Harris@Sun.COM 	return (vp1 != NULL && vp2 != NULL && vp1->v_vfsp == vp2->v_vfsp);
187011757SRobert.Harris@Sun.COM }
187111757SRobert.Harris@Sun.COM 
187211757SRobert.Harris@Sun.COM /*
18730Sstevel@tonic-gate  * /mntfs vnode operations vector
18740Sstevel@tonic-gate  */
18750Sstevel@tonic-gate const fs_operation_def_t mnt_vnodeops_template[] = {
18763898Srsb 	VOPNAME_OPEN,		{ .vop_open = mntopen },
18773898Srsb 	VOPNAME_CLOSE,		{ .vop_close = mntclose },
18783898Srsb 	VOPNAME_READ,		{ .vop_read = mntread },
18793898Srsb 	VOPNAME_IOCTL,		{ .vop_ioctl = mntioctl },
18803898Srsb 	VOPNAME_GETATTR,	{ .vop_getattr = mntgetattr },
18813898Srsb 	VOPNAME_ACCESS,		{ .vop_access = mntaccess },
18823898Srsb 	VOPNAME_FSYNC,		{ .vop_fsync = mntfsync },
18833898Srsb 	VOPNAME_INACTIVE,	{ .vop_inactive = mntinactive },
18843898Srsb 	VOPNAME_SEEK,		{ .vop_seek = mntseek },
18853898Srsb 	VOPNAME_POLL,		{ .vop_poll = mntpoll },
188611757SRobert.Harris@Sun.COM 	VOPNAME_CMP,		{ .vop_cmp = mntcmp },
18873898Srsb 	VOPNAME_DISPOSE,	{ .error = fs_error },
18883898Srsb 	VOPNAME_SHRLOCK,	{ .error = fs_error },
18893898Srsb 	NULL,			NULL
18900Sstevel@tonic-gate };
1891