10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 53898Srsb * Common Development and Distribution License (the "License"). 63898Srsb * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*12999Slori.alt@oracle.com * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 230Sstevel@tonic-gate */ 240Sstevel@tonic-gate 250Sstevel@tonic-gate #include <sys/file.h> 260Sstevel@tonic-gate #include <sys/stat.h> 270Sstevel@tonic-gate #include <sys/atomic.h> 280Sstevel@tonic-gate #include <sys/mntio.h> 290Sstevel@tonic-gate #include <sys/mnttab.h> 300Sstevel@tonic-gate #include <sys/mount.h> 310Sstevel@tonic-gate #include <sys/sunddi.h> 320Sstevel@tonic-gate #include <sys/sysmacros.h> 330Sstevel@tonic-gate #include <sys/systm.h> 340Sstevel@tonic-gate #include <sys/vfs.h> 353898Srsb #include <sys/vfs_opreg.h> 360Sstevel@tonic-gate #include <sys/fs/mntdata.h> 370Sstevel@tonic-gate #include <fs/fs_subr.h> 380Sstevel@tonic-gate #include <sys/vmsystm.h> 390Sstevel@tonic-gate #include <vm/seg_vn.h> 4010910SRobert.Harris@Sun.COM #include <sys/time.h> 4110910SRobert.Harris@Sun.COM #include <sys/ksynch.h> 4210910SRobert.Harris@Sun.COM #include <sys/sdt.h> 430Sstevel@tonic-gate 440Sstevel@tonic-gate #define MNTROOTINO 2 450Sstevel@tonic-gate 460Sstevel@tonic-gate static mntnode_t *mntgetnode(vnode_t *); 470Sstevel@tonic-gate 480Sstevel@tonic-gate vnodeops_t *mntvnodeops; 494863Spraks extern void vfs_mnttab_readop(void); 500Sstevel@tonic-gate 510Sstevel@tonic-gate /* 520Sstevel@tonic-gate * Design of kernel mnttab accounting. 530Sstevel@tonic-gate * 5410910SRobert.Harris@Sun.COM * mntfs provides two methods of reading the in-kernel mnttab, i.e. the state of 5510910SRobert.Harris@Sun.COM * the mounted resources: the read-only file /etc/mnttab, and a collection of 5610910SRobert.Harris@Sun.COM * ioctl() commands. Most of these interfaces are public and are described in 5710910SRobert.Harris@Sun.COM * mnttab(4). Three private ioctl() commands, MNTIOC_GETMNTENT, 5810910SRobert.Harris@Sun.COM * MNTIOC_GETEXTMNTENT and MNTIOC_GETMNTANY, provide for the getmntent(3C) 5910910SRobert.Harris@Sun.COM * family of functions, allowing them to support white space in mount names. 600Sstevel@tonic-gate * 6110910SRobert.Harris@Sun.COM * A significant feature of mntfs is that it provides a file descriptor with a 6210910SRobert.Harris@Sun.COM * snapshot once it begins to consume mnttab data. Thus, as the process 6310910SRobert.Harris@Sun.COM * continues to consume data, its view of the in-kernel mnttab does not change 6410910SRobert.Harris@Sun.COM * even if resources are mounted or unmounted. The intent is to ensure that 6510910SRobert.Harris@Sun.COM * processes are guaranteed to read self-consistent data even as the system 6610910SRobert.Harris@Sun.COM * changes. 6710910SRobert.Harris@Sun.COM * 6810910SRobert.Harris@Sun.COM * The snapshot is implemented by a "database", unique to each zone, that 6910910SRobert.Harris@Sun.COM * comprises a linked list of mntelem_ts. The database is identified by 7010910SRobert.Harris@Sun.COM * zone_mntfs_db and is protected by zone_mntfs_db_lock. Each element contains 7110910SRobert.Harris@Sun.COM * the text entry in /etc/mnttab for a mounted resource, i.e. a vfs_t, and is 7210910SRobert.Harris@Sun.COM * marked with its time of "birth", i.e. creation. An element is "killed", and 7310910SRobert.Harris@Sun.COM * marked with its time of death, when it is found to be out of date, e.g. when 7410910SRobert.Harris@Sun.COM * the corresponding resource has been unmounted. 750Sstevel@tonic-gate * 7610910SRobert.Harris@Sun.COM * When a process performs the first read() or ioctl() for a file descriptor for 7710910SRobert.Harris@Sun.COM * /etc/mnttab, the database is updated by a call to mntfs_snapshot() to ensure 7810910SRobert.Harris@Sun.COM * that an element exists for each currently mounted resource. Following this, 7910910SRobert.Harris@Sun.COM * the current time is written into a snapshot structure, a mntsnap_t, embedded 8010910SRobert.Harris@Sun.COM * in the descriptor's mntnode_t. 8110910SRobert.Harris@Sun.COM * 8210910SRobert.Harris@Sun.COM * mntfs is able to enumerate the /etc/mnttab entries corresponding to a 8310910SRobert.Harris@Sun.COM * particular file descriptor by searching the database for entries that were 8410910SRobert.Harris@Sun.COM * born before the appropriate snapshot and that either are still alive or died 8510910SRobert.Harris@Sun.COM * after the snapshot was created. Consumers use the iterator function 8610910SRobert.Harris@Sun.COM * mntfs_get_next_elem() to identify the next suitable element in the database. 8710910SRobert.Harris@Sun.COM * 8810910SRobert.Harris@Sun.COM * Each snapshot has a hold on its corresponding database elements, effected by 8910910SRobert.Harris@Sun.COM * a per-element reference count. At last close(), a snapshot is destroyed in 9010910SRobert.Harris@Sun.COM * mntfs_freesnap() by releasing all of its holds; an element is destroyed if 9110910SRobert.Harris@Sun.COM * its reference count becomes zero. Therefore the database never exists unless 9210910SRobert.Harris@Sun.COM * there is at least one active consumer of /etc/mnttab. 9310910SRobert.Harris@Sun.COM * 9410910SRobert.Harris@Sun.COM * getmntent(3C) et al. "do not open, close or rewind the file." This implies 9510910SRobert.Harris@Sun.COM * that getmntent() and read() must be able to operate without interaction on 9610910SRobert.Harris@Sun.COM * the same file descriptor; this is accomplished by the use of separate 9710910SRobert.Harris@Sun.COM * mntsnap_ts for both read() and ioctl(). 9810910SRobert.Harris@Sun.COM * 9911757SRobert.Harris@Sun.COM * mntfs observes the following lock-ordering: 10011757SRobert.Harris@Sun.COM * 10111757SRobert.Harris@Sun.COM * mnp->mnt_contents -> vfslist -> zonep->zone_mntfs_db_lock 10211757SRobert.Harris@Sun.COM * 1030Sstevel@tonic-gate * NOTE: The following variable enables the generation of the "dev=xxx" 1040Sstevel@tonic-gate * in the option string for a mounted file system. Really this should 1050Sstevel@tonic-gate * be gotten rid of altogether, but for the sake of backwards compatibility 1060Sstevel@tonic-gate * we had to leave it in. It is defined as a 32-bit device number. This 1070Sstevel@tonic-gate * means that when 64-bit device numbers are in use, if either the major or 1080Sstevel@tonic-gate * minor part of the device number will not fit in a 16 bit quantity, the 1090Sstevel@tonic-gate * "dev=" will be set to NODEV (0x7fffffff). See PSARC 1999/566 and 1100Sstevel@tonic-gate * 1999/131 for details. The cmpldev() function used to generate the 32-bit 1110Sstevel@tonic-gate * device number handles this check and assigns the proper value. 1120Sstevel@tonic-gate */ 1130Sstevel@tonic-gate int mntfs_enabledev = 1; /* enable old "dev=xxx" option */ 1140Sstevel@tonic-gate 11510910SRobert.Harris@Sun.COM extern void vfs_mono_time(timespec_t *); 11610910SRobert.Harris@Sun.COM enum { MNTFS_FIRST, MNTFS_SECOND, MNTFS_NEITHER }; 11710910SRobert.Harris@Sun.COM 11810910SRobert.Harris@Sun.COM /* 11910910SRobert.Harris@Sun.COM * Determine whether a field within a line from /etc/mnttab contains actual 12010910SRobert.Harris@Sun.COM * content or simply the marker string "-". This never applies to the time, 12110910SRobert.Harris@Sun.COM * therefore the delimiter must be a tab. 12210910SRobert.Harris@Sun.COM */ 12310910SRobert.Harris@Sun.COM #define MNTFS_REAL_FIELD(x) (*(x) != '-' || *((x) + 1) != '\t') 12410910SRobert.Harris@Sun.COM 1250Sstevel@tonic-gate static int 1260Sstevel@tonic-gate mntfs_devsize(struct vfs *vfsp) 1270Sstevel@tonic-gate { 1280Sstevel@tonic-gate dev32_t odev; 1290Sstevel@tonic-gate 1300Sstevel@tonic-gate (void) cmpldev(&odev, vfsp->vfs_dev); 1310Sstevel@tonic-gate return (snprintf(NULL, 0, "dev=%x", odev)); 1320Sstevel@tonic-gate } 1330Sstevel@tonic-gate 1340Sstevel@tonic-gate static int 1350Sstevel@tonic-gate mntfs_devprint(struct vfs *vfsp, char *buf) 1360Sstevel@tonic-gate { 1370Sstevel@tonic-gate dev32_t odev; 1380Sstevel@tonic-gate 1390Sstevel@tonic-gate (void) cmpldev(&odev, vfsp->vfs_dev); 1400Sstevel@tonic-gate return (snprintf(buf, MAX_MNTOPT_STR, "dev=%x", odev)); 1410Sstevel@tonic-gate } 1420Sstevel@tonic-gate 14310910SRobert.Harris@Sun.COM /* Identify which, if either, of two supplied timespec structs is newer. */ 14410910SRobert.Harris@Sun.COM static int 14510910SRobert.Harris@Sun.COM mntfs_newest(timespec_t *a, timespec_t *b) 14610910SRobert.Harris@Sun.COM { 14710910SRobert.Harris@Sun.COM if (a->tv_sec == b->tv_sec && 14810910SRobert.Harris@Sun.COM a->tv_nsec == b->tv_nsec) { 14910910SRobert.Harris@Sun.COM return (MNTFS_NEITHER); 15010910SRobert.Harris@Sun.COM } else if (b->tv_sec > a->tv_sec || 15110910SRobert.Harris@Sun.COM (b->tv_sec == a->tv_sec && 15210910SRobert.Harris@Sun.COM b->tv_nsec > a->tv_nsec)) { 15310910SRobert.Harris@Sun.COM return (MNTFS_SECOND); 15410910SRobert.Harris@Sun.COM } else { 15510910SRobert.Harris@Sun.COM return (MNTFS_FIRST); 15610910SRobert.Harris@Sun.COM } 15710910SRobert.Harris@Sun.COM } 15810910SRobert.Harris@Sun.COM 1590Sstevel@tonic-gate static int 1600Sstevel@tonic-gate mntfs_optsize(struct vfs *vfsp) 1610Sstevel@tonic-gate { 1620Sstevel@tonic-gate int i, size = 0; 1630Sstevel@tonic-gate mntopt_t *mop; 1640Sstevel@tonic-gate 1650Sstevel@tonic-gate for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) { 1660Sstevel@tonic-gate mop = &vfsp->vfs_mntopts.mo_list[i]; 1670Sstevel@tonic-gate if (mop->mo_flags & MO_NODISPLAY) 1680Sstevel@tonic-gate continue; 1690Sstevel@tonic-gate if (mop->mo_flags & MO_SET) { 1700Sstevel@tonic-gate if (size) 1710Sstevel@tonic-gate size++; /* space for comma */ 1720Sstevel@tonic-gate size += strlen(mop->mo_name); 1730Sstevel@tonic-gate /* 1740Sstevel@tonic-gate * count option value if there is one 1750Sstevel@tonic-gate */ 1760Sstevel@tonic-gate if (mop->mo_arg != NULL) { 1770Sstevel@tonic-gate size += strlen(mop->mo_arg) + 1; 1780Sstevel@tonic-gate } 1790Sstevel@tonic-gate } 1800Sstevel@tonic-gate } 1810Sstevel@tonic-gate if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) { 1820Sstevel@tonic-gate /* 1830Sstevel@tonic-gate * Add space for "zone=<zone_name>" if required. 1840Sstevel@tonic-gate */ 1850Sstevel@tonic-gate if (size) 1860Sstevel@tonic-gate size++; /* space for comma */ 1870Sstevel@tonic-gate size += sizeof ("zone=") - 1; 1880Sstevel@tonic-gate size += strlen(vfsp->vfs_zone->zone_name); 1890Sstevel@tonic-gate } 1900Sstevel@tonic-gate if (mntfs_enabledev) { 1910Sstevel@tonic-gate if (size != 0) 1920Sstevel@tonic-gate size++; /* space for comma */ 1930Sstevel@tonic-gate size += mntfs_devsize(vfsp); 1940Sstevel@tonic-gate } 1950Sstevel@tonic-gate if (size == 0) 1960Sstevel@tonic-gate size = strlen("-"); 1970Sstevel@tonic-gate return (size); 1980Sstevel@tonic-gate } 1990Sstevel@tonic-gate 2000Sstevel@tonic-gate static int 2010Sstevel@tonic-gate mntfs_optprint(struct vfs *vfsp, char *buf) 2020Sstevel@tonic-gate { 2030Sstevel@tonic-gate int i, optinbuf = 0; 2040Sstevel@tonic-gate mntopt_t *mop; 2050Sstevel@tonic-gate char *origbuf = buf; 2060Sstevel@tonic-gate 2070Sstevel@tonic-gate for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) { 2080Sstevel@tonic-gate mop = &vfsp->vfs_mntopts.mo_list[i]; 2090Sstevel@tonic-gate if (mop->mo_flags & MO_NODISPLAY) 2100Sstevel@tonic-gate continue; 2110Sstevel@tonic-gate if (mop->mo_flags & MO_SET) { 2120Sstevel@tonic-gate if (optinbuf) 2130Sstevel@tonic-gate *buf++ = ','; 2140Sstevel@tonic-gate else 2150Sstevel@tonic-gate optinbuf = 1; 2160Sstevel@tonic-gate buf += snprintf(buf, MAX_MNTOPT_STR, 2176036Smec "%s", mop->mo_name); 2180Sstevel@tonic-gate /* 2190Sstevel@tonic-gate * print option value if there is one 2200Sstevel@tonic-gate */ 2210Sstevel@tonic-gate if (mop->mo_arg != NULL) { 2220Sstevel@tonic-gate buf += snprintf(buf, MAX_MNTOPT_STR, "=%s", 2236036Smec mop->mo_arg); 2240Sstevel@tonic-gate } 2250Sstevel@tonic-gate } 2260Sstevel@tonic-gate } 2270Sstevel@tonic-gate if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) { 2280Sstevel@tonic-gate if (optinbuf) 2290Sstevel@tonic-gate *buf++ = ','; 2300Sstevel@tonic-gate else 2310Sstevel@tonic-gate optinbuf = 1; 2320Sstevel@tonic-gate buf += snprintf(buf, MAX_MNTOPT_STR, "zone=%s", 2330Sstevel@tonic-gate vfsp->vfs_zone->zone_name); 2340Sstevel@tonic-gate } 2350Sstevel@tonic-gate if (mntfs_enabledev) { 2360Sstevel@tonic-gate if (optinbuf++) 2370Sstevel@tonic-gate *buf++ = ','; 2380Sstevel@tonic-gate buf += mntfs_devprint(vfsp, buf); 2390Sstevel@tonic-gate } 2400Sstevel@tonic-gate if (!optinbuf) { 2410Sstevel@tonic-gate buf += snprintf(buf, MAX_MNTOPT_STR, "-"); 2420Sstevel@tonic-gate } 2430Sstevel@tonic-gate return (buf - origbuf); 2440Sstevel@tonic-gate } 2450Sstevel@tonic-gate 24610910SRobert.Harris@Sun.COM void 24710910SRobert.Harris@Sun.COM mntfs_populate_text(vfs_t *vfsp, zone_t *zonep, mntelem_t *elemp) 24810910SRobert.Harris@Sun.COM { 24910910SRobert.Harris@Sun.COM struct extmnttab *tabp = &elemp->mnte_tab; 25010910SRobert.Harris@Sun.COM const char *resource, *mntpt; 25110910SRobert.Harris@Sun.COM char *cp = elemp->mnte_text; 25210910SRobert.Harris@Sun.COM mntpt = refstr_value(vfsp->vfs_mntpt); 25310910SRobert.Harris@Sun.COM resource = refstr_value(vfsp->vfs_resource); 25410910SRobert.Harris@Sun.COM 25510910SRobert.Harris@Sun.COM tabp->mnt_special = 0; 25610910SRobert.Harris@Sun.COM if (resource != NULL && resource[0] != '\0') { 25710910SRobert.Harris@Sun.COM if (resource[0] != '/') { 25810910SRobert.Harris@Sun.COM cp += snprintf(cp, MAXPATHLEN, "%s\t", resource); 25910910SRobert.Harris@Sun.COM } else if (!ZONE_PATH_VISIBLE(resource, zonep)) { 26010910SRobert.Harris@Sun.COM /* 26110910SRobert.Harris@Sun.COM * Use the mount point as the resource. 26210910SRobert.Harris@Sun.COM */ 26310910SRobert.Harris@Sun.COM cp += snprintf(cp, MAXPATHLEN, "%s\t", 26410910SRobert.Harris@Sun.COM ZONE_PATH_TRANSLATE(mntpt, zonep)); 26510910SRobert.Harris@Sun.COM } else { 26610910SRobert.Harris@Sun.COM cp += snprintf(cp, MAXPATHLEN, "%s\t", 26710910SRobert.Harris@Sun.COM ZONE_PATH_TRANSLATE(resource, zonep)); 26810910SRobert.Harris@Sun.COM } 26910910SRobert.Harris@Sun.COM } else { 27010910SRobert.Harris@Sun.COM cp += snprintf(cp, MAXPATHLEN, "-\t"); 27110910SRobert.Harris@Sun.COM } 27210910SRobert.Harris@Sun.COM 27310910SRobert.Harris@Sun.COM tabp->mnt_mountp = (char *)(cp - elemp->mnte_text); 27410910SRobert.Harris@Sun.COM if (mntpt != NULL && mntpt[0] != '\0') { 27510910SRobert.Harris@Sun.COM /* 27610910SRobert.Harris@Sun.COM * We know the mount point is visible from within the zone, 27710910SRobert.Harris@Sun.COM * otherwise it wouldn't be on the zone's vfs list. 27810910SRobert.Harris@Sun.COM */ 27910910SRobert.Harris@Sun.COM cp += snprintf(cp, MAXPATHLEN, "%s\t", 28010910SRobert.Harris@Sun.COM ZONE_PATH_TRANSLATE(mntpt, zonep)); 28110910SRobert.Harris@Sun.COM } else { 28210910SRobert.Harris@Sun.COM cp += snprintf(cp, MAXPATHLEN, "-\t"); 28310910SRobert.Harris@Sun.COM } 28410910SRobert.Harris@Sun.COM 28510910SRobert.Harris@Sun.COM tabp->mnt_fstype = (char *)(cp - elemp->mnte_text); 28610910SRobert.Harris@Sun.COM cp += snprintf(cp, MAXPATHLEN, "%s\t", 28710910SRobert.Harris@Sun.COM vfssw[vfsp->vfs_fstype].vsw_name); 28810910SRobert.Harris@Sun.COM 28910910SRobert.Harris@Sun.COM tabp->mnt_mntopts = (char *)(cp - elemp->mnte_text); 29010910SRobert.Harris@Sun.COM cp += mntfs_optprint(vfsp, cp); 29110910SRobert.Harris@Sun.COM *cp++ = '\t'; 29210910SRobert.Harris@Sun.COM 29310910SRobert.Harris@Sun.COM tabp->mnt_time = (char *)(cp - elemp->mnte_text); 29410910SRobert.Harris@Sun.COM cp += snprintf(cp, MAX_MNTOPT_STR, "%ld", vfsp->vfs_mtime); 29510910SRobert.Harris@Sun.COM *cp++ = '\n'; /* over-write snprintf's trailing null-byte */ 29610910SRobert.Harris@Sun.COM 29710910SRobert.Harris@Sun.COM tabp->mnt_major = getmajor(vfsp->vfs_dev); 29810910SRobert.Harris@Sun.COM tabp->mnt_minor = getminor(vfsp->vfs_dev); 29910910SRobert.Harris@Sun.COM 30010910SRobert.Harris@Sun.COM elemp->mnte_text_size = cp - elemp->mnte_text; 30110910SRobert.Harris@Sun.COM elemp->mnte_vfs_ctime = vfsp->vfs_hrctime; 30210910SRobert.Harris@Sun.COM elemp->mnte_hidden = vfsp->vfs_flag & VFS_NOMNTTAB; 30310910SRobert.Harris@Sun.COM } 30410910SRobert.Harris@Sun.COM 30510910SRobert.Harris@Sun.COM /* Determine the length of the /etc/mnttab entry for this vfs_t. */ 3060Sstevel@tonic-gate static size_t 30710910SRobert.Harris@Sun.COM mntfs_text_len(vfs_t *vfsp, zone_t *zone) 3080Sstevel@tonic-gate { 3090Sstevel@tonic-gate size_t size = 0; 3100Sstevel@tonic-gate const char *resource, *mntpt; 31110910SRobert.Harris@Sun.COM size_t mntsize; 3120Sstevel@tonic-gate 3130Sstevel@tonic-gate mntpt = refstr_value(vfsp->vfs_mntpt); 3140Sstevel@tonic-gate if (mntpt != NULL && mntpt[0] != '\0') { 31510910SRobert.Harris@Sun.COM mntsize = strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1; 3160Sstevel@tonic-gate } else { 31710910SRobert.Harris@Sun.COM mntsize = 2; /* "-\t" */ 3180Sstevel@tonic-gate } 31910910SRobert.Harris@Sun.COM size += mntsize; 3200Sstevel@tonic-gate 3210Sstevel@tonic-gate resource = refstr_value(vfsp->vfs_resource); 3220Sstevel@tonic-gate if (resource != NULL && resource[0] != '\0') { 3230Sstevel@tonic-gate if (resource[0] != '/') { 3240Sstevel@tonic-gate size += strlen(resource) + 1; 3250Sstevel@tonic-gate } else if (!ZONE_PATH_VISIBLE(resource, zone)) { 3260Sstevel@tonic-gate /* 3270Sstevel@tonic-gate * Same as the zone's view of the mount point. 3280Sstevel@tonic-gate */ 32910910SRobert.Harris@Sun.COM size += mntsize; 3300Sstevel@tonic-gate } else { 3310Sstevel@tonic-gate size += strlen(ZONE_PATH_TRANSLATE(resource, zone)) + 1; 3320Sstevel@tonic-gate } 3330Sstevel@tonic-gate } else { 33410910SRobert.Harris@Sun.COM size += 2; /* "-\t" */ 3350Sstevel@tonic-gate } 3360Sstevel@tonic-gate size += strlen(vfssw[vfsp->vfs_fstype].vsw_name) + 1; 3370Sstevel@tonic-gate size += mntfs_optsize(vfsp); 3380Sstevel@tonic-gate size += snprintf(NULL, 0, "\t%ld\n", vfsp->vfs_mtime); 3390Sstevel@tonic-gate return (size); 3400Sstevel@tonic-gate } 3410Sstevel@tonic-gate 34210910SRobert.Harris@Sun.COM /* Destroy the resources associated with a snapshot element. */ 3430Sstevel@tonic-gate static void 34410910SRobert.Harris@Sun.COM mntfs_destroy_elem(mntelem_t *elemp) 3450Sstevel@tonic-gate { 34610910SRobert.Harris@Sun.COM kmem_free(elemp->mnte_text, elemp->mnte_text_size); 34710910SRobert.Harris@Sun.COM kmem_free(elemp, sizeof (mntelem_t)); 3480Sstevel@tonic-gate } 3490Sstevel@tonic-gate 35010910SRobert.Harris@Sun.COM /* 35110910SRobert.Harris@Sun.COM * Return 1 if the given snapshot is in the range of the given element; return 35210910SRobert.Harris@Sun.COM * 0 otherwise. 35310910SRobert.Harris@Sun.COM */ 35410910SRobert.Harris@Sun.COM static int 35510910SRobert.Harris@Sun.COM mntfs_elem_in_range(mntsnap_t *snapp, mntelem_t *elemp) 3560Sstevel@tonic-gate { 35710910SRobert.Harris@Sun.COM timespec_t *stimep = &snapp->mnts_time; 35810910SRobert.Harris@Sun.COM timespec_t *btimep = &elemp->mnte_birth; 35910910SRobert.Harris@Sun.COM timespec_t *dtimep = &elemp->mnte_death; 3600Sstevel@tonic-gate 3610Sstevel@tonic-gate /* 36210910SRobert.Harris@Sun.COM * If a snapshot is in range of an element then the snapshot must have 36310910SRobert.Harris@Sun.COM * been created after the birth of the element, and either the element 36410910SRobert.Harris@Sun.COM * is still alive or it died after the snapshot was created. 3650Sstevel@tonic-gate */ 36610910SRobert.Harris@Sun.COM if (mntfs_newest(btimep, stimep) == MNTFS_SECOND && 36710910SRobert.Harris@Sun.COM (MNTFS_ELEM_IS_ALIVE(elemp) || 36810910SRobert.Harris@Sun.COM mntfs_newest(stimep, dtimep) == MNTFS_SECOND)) 36910910SRobert.Harris@Sun.COM return (1); 37010910SRobert.Harris@Sun.COM else 37110910SRobert.Harris@Sun.COM return (0); 3720Sstevel@tonic-gate } 3730Sstevel@tonic-gate 37410910SRobert.Harris@Sun.COM /* 37510910SRobert.Harris@Sun.COM * Return the next valid database element, after the one provided, for a given 37610910SRobert.Harris@Sun.COM * snapshot; return NULL if none exists. The caller must hold the zone's 37710910SRobert.Harris@Sun.COM * database lock as a reader before calling this function. 37810910SRobert.Harris@Sun.COM */ 37910910SRobert.Harris@Sun.COM static mntelem_t * 38010910SRobert.Harris@Sun.COM mntfs_get_next_elem(mntsnap_t *snapp, mntelem_t *elemp) 3810Sstevel@tonic-gate { 38210910SRobert.Harris@Sun.COM int show_hidden = snapp->mnts_flags & MNTS_SHOWHIDDEN; 3830Sstevel@tonic-gate 3840Sstevel@tonic-gate do { 38510910SRobert.Harris@Sun.COM elemp = elemp->mnte_next; 38610910SRobert.Harris@Sun.COM } while (elemp && 38710910SRobert.Harris@Sun.COM (!mntfs_elem_in_range(snapp, elemp) || 38810910SRobert.Harris@Sun.COM (!show_hidden && elemp->mnte_hidden))); 38910910SRobert.Harris@Sun.COM return (elemp); 3900Sstevel@tonic-gate } 3910Sstevel@tonic-gate 39210910SRobert.Harris@Sun.COM /* 39310910SRobert.Harris@Sun.COM * This function frees the resources associated with a mntsnap_t. It walks 39410910SRobert.Harris@Sun.COM * through the database, decrementing the reference count of any element that 39510910SRobert.Harris@Sun.COM * satisfies the snapshot. If the reference count of an element becomes zero 39610910SRobert.Harris@Sun.COM * then it is removed from the database. 39710910SRobert.Harris@Sun.COM */ 3980Sstevel@tonic-gate static void 39910910SRobert.Harris@Sun.COM mntfs_freesnap(mntnode_t *mnp, mntsnap_t *snapp) 4000Sstevel@tonic-gate { 40110910SRobert.Harris@Sun.COM zone_t *zonep = MTOD(mnp)->mnt_zone; 40210910SRobert.Harris@Sun.COM krwlock_t *dblockp = &zonep->zone_mntfs_db_lock; 40310910SRobert.Harris@Sun.COM mntelem_t **elempp = &zonep->zone_mntfs_db; 40410910SRobert.Harris@Sun.COM mntelem_t *elemp; 40510910SRobert.Harris@Sun.COM int show_hidden = snapp->mnts_flags & MNTS_SHOWHIDDEN; 40610910SRobert.Harris@Sun.COM size_t number_decremented = 0; 4070Sstevel@tonic-gate 40810910SRobert.Harris@Sun.COM ASSERT(RW_WRITE_HELD(&mnp->mnt_contents)); 4090Sstevel@tonic-gate 41010910SRobert.Harris@Sun.COM /* Ignore an uninitialised snapshot. */ 41110910SRobert.Harris@Sun.COM if (snapp->mnts_nmnts == 0) 41210910SRobert.Harris@Sun.COM return; 4130Sstevel@tonic-gate 41410910SRobert.Harris@Sun.COM /* Drop the holds on any matching database elements. */ 41510910SRobert.Harris@Sun.COM rw_enter(dblockp, RW_WRITER); 41610910SRobert.Harris@Sun.COM while ((elemp = *elempp) != NULL) { 41710910SRobert.Harris@Sun.COM if (mntfs_elem_in_range(snapp, elemp) && 41810910SRobert.Harris@Sun.COM (!elemp->mnte_hidden || show_hidden) && 41910910SRobert.Harris@Sun.COM ++number_decremented && --elemp->mnte_refcnt == 0) { 42010910SRobert.Harris@Sun.COM if ((*elempp = elemp->mnte_next) != NULL) 42110910SRobert.Harris@Sun.COM (*elempp)->mnte_prev = elemp->mnte_prev; 42210910SRobert.Harris@Sun.COM mntfs_destroy_elem(elemp); 42310910SRobert.Harris@Sun.COM } else { 42410910SRobert.Harris@Sun.COM elempp = &elemp->mnte_next; 42510910SRobert.Harris@Sun.COM } 42610910SRobert.Harris@Sun.COM } 42710910SRobert.Harris@Sun.COM rw_exit(dblockp); 42810910SRobert.Harris@Sun.COM ASSERT(number_decremented == snapp->mnts_nmnts); 4290Sstevel@tonic-gate 43010910SRobert.Harris@Sun.COM /* Clear the snapshot data. */ 43110910SRobert.Harris@Sun.COM bzero(snapp, sizeof (mntsnap_t)); 4320Sstevel@tonic-gate } 4330Sstevel@tonic-gate 43410910SRobert.Harris@Sun.COM /* Insert the new database element newp after the existing element prevp. */ 4350Sstevel@tonic-gate static void 43610910SRobert.Harris@Sun.COM mntfs_insert_after(mntelem_t *newp, mntelem_t *prevp) 4370Sstevel@tonic-gate { 43810910SRobert.Harris@Sun.COM newp->mnte_prev = prevp; 43910910SRobert.Harris@Sun.COM newp->mnte_next = prevp->mnte_next; 44010910SRobert.Harris@Sun.COM prevp->mnte_next = newp; 44110910SRobert.Harris@Sun.COM if (newp->mnte_next != NULL) 44210910SRobert.Harris@Sun.COM newp->mnte_next->mnte_prev = newp; 44310910SRobert.Harris@Sun.COM } 44410910SRobert.Harris@Sun.COM 44510910SRobert.Harris@Sun.COM /* Create and return a copy of a given database element. */ 44610910SRobert.Harris@Sun.COM static mntelem_t * 44710910SRobert.Harris@Sun.COM mntfs_copy(mntelem_t *origp) 44810910SRobert.Harris@Sun.COM { 44910910SRobert.Harris@Sun.COM mntelem_t *copyp; 4500Sstevel@tonic-gate 45110910SRobert.Harris@Sun.COM copyp = kmem_zalloc(sizeof (mntelem_t), KM_SLEEP); 45210910SRobert.Harris@Sun.COM copyp->mnte_vfs_ctime = origp->mnte_vfs_ctime; 45310910SRobert.Harris@Sun.COM copyp->mnte_text_size = origp->mnte_text_size; 45410910SRobert.Harris@Sun.COM copyp->mnte_text = kmem_alloc(copyp->mnte_text_size, KM_SLEEP); 45510910SRobert.Harris@Sun.COM bcopy(origp->mnte_text, copyp->mnte_text, copyp->mnte_text_size); 45610910SRobert.Harris@Sun.COM copyp->mnte_tab = origp->mnte_tab; 45710910SRobert.Harris@Sun.COM copyp->mnte_hidden = origp->mnte_hidden; 45810910SRobert.Harris@Sun.COM 45910910SRobert.Harris@Sun.COM return (copyp); 46010910SRobert.Harris@Sun.COM } 46110910SRobert.Harris@Sun.COM 46210910SRobert.Harris@Sun.COM /* 46310910SRobert.Harris@Sun.COM * Compare two database elements and determine whether or not the vfs_t payload 46410910SRobert.Harris@Sun.COM * data of each are the same. Return 1 if so and 0 otherwise. 46510910SRobert.Harris@Sun.COM */ 46610910SRobert.Harris@Sun.COM static int 46710910SRobert.Harris@Sun.COM mntfs_is_same_element(mntelem_t *a, mntelem_t *b) 46810910SRobert.Harris@Sun.COM { 46910910SRobert.Harris@Sun.COM if (a->mnte_hidden == b->mnte_hidden && 47010910SRobert.Harris@Sun.COM a->mnte_text_size == b->mnte_text_size && 47110910SRobert.Harris@Sun.COM bcmp(a->mnte_text, b->mnte_text, a->mnte_text_size) == 0 && 47210910SRobert.Harris@Sun.COM bcmp(&a->mnte_tab, &b->mnte_tab, sizeof (struct extmnttab)) == 0) 47310910SRobert.Harris@Sun.COM return (1); 47410910SRobert.Harris@Sun.COM else 47510910SRobert.Harris@Sun.COM return (0); 4760Sstevel@tonic-gate } 4770Sstevel@tonic-gate 47810910SRobert.Harris@Sun.COM /* 47910910SRobert.Harris@Sun.COM * mntfs_snapshot() updates the database, creating it if necessary, so that it 48010910SRobert.Harris@Sun.COM * accurately reflects the state of the in-kernel mnttab. It also increments 48110910SRobert.Harris@Sun.COM * the reference count on all database elements that correspond to currently- 48210910SRobert.Harris@Sun.COM * mounted resources. Finally, it initialises the appropriate snapshot 48310910SRobert.Harris@Sun.COM * structure. 48410910SRobert.Harris@Sun.COM * 48510910SRobert.Harris@Sun.COM * Each vfs_t is given a high-resolution time stamp, for the benefit of mntfs, 48610910SRobert.Harris@Sun.COM * when it is inserted into the in-kernel mnttab. This time stamp is copied into 48710910SRobert.Harris@Sun.COM * the corresponding database element when it is created, allowing the element 48810910SRobert.Harris@Sun.COM * and the vfs_t to be identified as a pair. It is possible that some file 48910910SRobert.Harris@Sun.COM * systems may make unadvertised changes to, for example, a resource's mount 49010910SRobert.Harris@Sun.COM * options. Therefore, in order to determine whether a database element is an 49110910SRobert.Harris@Sun.COM * up-to-date representation of a given vfs_t, it is compared with a temporary 49210910SRobert.Harris@Sun.COM * element generated for this purpose. Although less efficient, this is safer 49310910SRobert.Harris@Sun.COM * than implementing an mtime for a vfs_t. 49410910SRobert.Harris@Sun.COM * 49510910SRobert.Harris@Sun.COM * Some mounted resources are marked as "hidden" with a VFS_NOMNTTAB flag. These 49610910SRobert.Harris@Sun.COM * are considered invisible unless the user has already set the MNT_SHOWHIDDEN 49710910SRobert.Harris@Sun.COM * flag in the vnode using the MNTIOC_SHOWHIDDEN ioctl. 49810910SRobert.Harris@Sun.COM */ 4990Sstevel@tonic-gate static void 50010910SRobert.Harris@Sun.COM mntfs_snapshot(mntnode_t *mnp, mntsnap_t *snapp) 5010Sstevel@tonic-gate { 50211757SRobert.Harris@Sun.COM mntdata_t *mnd = MTOD(mnp); 50311757SRobert.Harris@Sun.COM zone_t *zonep = mnd->mnt_zone; 50410910SRobert.Harris@Sun.COM int is_global_zone = (zonep == global_zone); 50510910SRobert.Harris@Sun.COM int show_hidden = mnp->mnt_flags & MNT_SHOWHIDDEN; 50610910SRobert.Harris@Sun.COM vfs_t *vfsp, *firstvfsp, *lastvfsp; 50710910SRobert.Harris@Sun.COM vfs_t dummyvfs; 50810910SRobert.Harris@Sun.COM vfs_t *dummyvfsp = NULL; 50910910SRobert.Harris@Sun.COM krwlock_t *dblockp = &zonep->zone_mntfs_db_lock; 51010910SRobert.Harris@Sun.COM mntelem_t **headpp = &zonep->zone_mntfs_db; 51110910SRobert.Harris@Sun.COM mntelem_t *elemp; 51210910SRobert.Harris@Sun.COM mntelem_t *prevp = NULL; 51310910SRobert.Harris@Sun.COM int order; 51410910SRobert.Harris@Sun.COM mntelem_t *tempelemp; 51510910SRobert.Harris@Sun.COM mntelem_t *newp; 51610910SRobert.Harris@Sun.COM mntelem_t *firstp = NULL; 51710910SRobert.Harris@Sun.COM size_t nmnts = 0; 51811757SRobert.Harris@Sun.COM size_t total_text_size = 0; 51911757SRobert.Harris@Sun.COM size_t normal_text_size = 0; 52010910SRobert.Harris@Sun.COM int insert_before; 52110910SRobert.Harris@Sun.COM timespec_t last_mtime; 52210910SRobert.Harris@Sun.COM size_t entry_length, new_entry_length; 5230Sstevel@tonic-gate 52410910SRobert.Harris@Sun.COM 52510910SRobert.Harris@Sun.COM ASSERT(RW_WRITE_HELD(&mnp->mnt_contents)); 52610910SRobert.Harris@Sun.COM vfs_list_read_lock(); 52710910SRobert.Harris@Sun.COM vfs_mnttab_modtime(&last_mtime); 5280Sstevel@tonic-gate 52910910SRobert.Harris@Sun.COM /* 53010910SRobert.Harris@Sun.COM * If this snapshot already exists then we must have been asked to 53110910SRobert.Harris@Sun.COM * rewind the file, i.e. discard the snapshot and create a new one in 53210910SRobert.Harris@Sun.COM * its place. In this case we first see if the in-kernel mnttab has 53310910SRobert.Harris@Sun.COM * advertised a change; if not then we simply reinitialise the metadata. 53410910SRobert.Harris@Sun.COM */ 53510910SRobert.Harris@Sun.COM if (snapp->mnts_nmnts) { 53610910SRobert.Harris@Sun.COM if (mntfs_newest(&last_mtime, &snapp->mnts_last_mtime) == 53710910SRobert.Harris@Sun.COM MNTFS_NEITHER) { 53810910SRobert.Harris@Sun.COM /* 53910910SRobert.Harris@Sun.COM * An unchanged mtime is no guarantee that the 54010910SRobert.Harris@Sun.COM * in-kernel mnttab is unchanged; for example, a 54110910SRobert.Harris@Sun.COM * concurrent remount may be between calls to 54210910SRobert.Harris@Sun.COM * vfs_setmntopt_nolock() and vfs_mnttab_modtimeupd(). 54310910SRobert.Harris@Sun.COM * It follows that the database may have changed, and 54410910SRobert.Harris@Sun.COM * in particular that some elements in this snapshot 54510910SRobert.Harris@Sun.COM * may have been killed by another call to 54610910SRobert.Harris@Sun.COM * mntfs_snapshot(). It is therefore not merely 54710910SRobert.Harris@Sun.COM * unnecessary to update the snapshot's time but in 54810910SRobert.Harris@Sun.COM * fact dangerous; it needs to be left alone. 54910910SRobert.Harris@Sun.COM */ 55010910SRobert.Harris@Sun.COM snapp->mnts_next = snapp->mnts_first; 55110910SRobert.Harris@Sun.COM snapp->mnts_flags &= ~MNTS_REWIND; 55210910SRobert.Harris@Sun.COM snapp->mnts_foffset = snapp->mnts_ieoffset = 0; 55310910SRobert.Harris@Sun.COM vfs_list_unlock(); 55410910SRobert.Harris@Sun.COM return; 55510910SRobert.Harris@Sun.COM } else { 55610910SRobert.Harris@Sun.COM mntfs_freesnap(mnp, snapp); 55710910SRobert.Harris@Sun.COM } 55810910SRobert.Harris@Sun.COM } 5590Sstevel@tonic-gate 56010910SRobert.Harris@Sun.COM /* 56110910SRobert.Harris@Sun.COM * Create a temporary database element. For each vfs_t, the temporary 56210910SRobert.Harris@Sun.COM * element will be populated with the corresponding text. If the vfs_t 56310910SRobert.Harris@Sun.COM * does not have a corresponding element within the database, or if 56410910SRobert.Harris@Sun.COM * there is such an element but it is stale, a copy of the temporary 56510910SRobert.Harris@Sun.COM * element is inserted into the database at the appropriate location. 56610910SRobert.Harris@Sun.COM */ 56710910SRobert.Harris@Sun.COM tempelemp = kmem_alloc(sizeof (mntelem_t), KM_SLEEP); 56810910SRobert.Harris@Sun.COM entry_length = MNT_LINE_MAX; 56910910SRobert.Harris@Sun.COM tempelemp->mnte_text = kmem_alloc(entry_length, KM_SLEEP); 5700Sstevel@tonic-gate 57110910SRobert.Harris@Sun.COM /* Find the first and last vfs_t for the given zone. */ 57210910SRobert.Harris@Sun.COM if (is_global_zone) { 57310910SRobert.Harris@Sun.COM firstvfsp = rootvfs; 57410910SRobert.Harris@Sun.COM lastvfsp = firstvfsp->vfs_prev; 57510910SRobert.Harris@Sun.COM } else { 57610910SRobert.Harris@Sun.COM firstvfsp = zonep->zone_vfslist; 57710910SRobert.Harris@Sun.COM /* 57810910SRobert.Harris@Sun.COM * If there isn't already a vfs_t for root then we create a 57910910SRobert.Harris@Sun.COM * dummy which will be used as the head of the list (which will 58010910SRobert.Harris@Sun.COM * therefore no longer be circular). 58110910SRobert.Harris@Sun.COM */ 58210910SRobert.Harris@Sun.COM if (firstvfsp == NULL || 58310910SRobert.Harris@Sun.COM strcmp(refstr_value(firstvfsp->vfs_mntpt), 58410910SRobert.Harris@Sun.COM zonep->zone_rootpath) != 0) { 58510910SRobert.Harris@Sun.COM /* 58610910SRobert.Harris@Sun.COM * The zone's vfs_ts will have mount points relative to 58710910SRobert.Harris@Sun.COM * the zone's root path. The vfs_t for the zone's 58810910SRobert.Harris@Sun.COM * root file system would therefore have a mount point 58910910SRobert.Harris@Sun.COM * equal to the zone's root path. Since the zone's root 59010910SRobert.Harris@Sun.COM * path isn't a mount point, we copy the vfs_t of the 59110910SRobert.Harris@Sun.COM * zone's root vnode, and provide it with a fake mount 592*12999Slori.alt@oracle.com * and resource. However, if the zone's root is a 593*12999Slori.alt@oracle.com * zfs dataset, use the dataset name as the resource. 59410910SRobert.Harris@Sun.COM * 59510910SRobert.Harris@Sun.COM * Note that by cloning another vfs_t we also acquire 59610910SRobert.Harris@Sun.COM * its high-resolution ctime. This might appear to 59710910SRobert.Harris@Sun.COM * violate the requirement that the ctimes in the list 59810910SRobert.Harris@Sun.COM * of vfs_ts are unique and monotonically increasing; 59910910SRobert.Harris@Sun.COM * this is not the case. The dummy vfs_t appears in only 60010910SRobert.Harris@Sun.COM * a non-global zone's vfs_t list, where the cloned 60110910SRobert.Harris@Sun.COM * vfs_t would not ordinarily be visible; the ctimes are 60210910SRobert.Harris@Sun.COM * therefore unique. The zone's root path must be 60310910SRobert.Harris@Sun.COM * available before the zone boots, and so its root 60410910SRobert.Harris@Sun.COM * vnode's vfs_t's ctime must be lower than those of any 60510910SRobert.Harris@Sun.COM * resources subsequently mounted by the zone. The 60610910SRobert.Harris@Sun.COM * ctimes are therefore monotonically increasing. 60710910SRobert.Harris@Sun.COM */ 60810910SRobert.Harris@Sun.COM dummyvfs = *zonep->zone_rootvp->v_vfsp; 60910910SRobert.Harris@Sun.COM dummyvfs.vfs_mntpt = refstr_alloc(zonep->zone_rootpath); 610*12999Slori.alt@oracle.com if (strcmp(vfssw[dummyvfs.vfs_fstype].vsw_name, "zfs") 611*12999Slori.alt@oracle.com != 0) 612*12999Slori.alt@oracle.com dummyvfs.vfs_resource = dummyvfs.vfs_mntpt; 61310910SRobert.Harris@Sun.COM dummyvfsp = &dummyvfs; 61410910SRobert.Harris@Sun.COM if (firstvfsp == NULL) { 61510910SRobert.Harris@Sun.COM lastvfsp = dummyvfsp; 61610910SRobert.Harris@Sun.COM } else { 61710910SRobert.Harris@Sun.COM lastvfsp = firstvfsp->vfs_zone_prev; 61810910SRobert.Harris@Sun.COM dummyvfsp->vfs_zone_next = firstvfsp; 61910910SRobert.Harris@Sun.COM } 62010910SRobert.Harris@Sun.COM firstvfsp = dummyvfsp; 62110910SRobert.Harris@Sun.COM } else { 62210910SRobert.Harris@Sun.COM lastvfsp = firstvfsp->vfs_zone_prev; 62310910SRobert.Harris@Sun.COM } 6240Sstevel@tonic-gate } 6250Sstevel@tonic-gate 62610910SRobert.Harris@Sun.COM /* 62710910SRobert.Harris@Sun.COM * Now walk through all the vfs_ts for this zone. For each one, find the 62810910SRobert.Harris@Sun.COM * corresponding database element, creating it first if necessary, and 62910910SRobert.Harris@Sun.COM * increment its reference count. 63010910SRobert.Harris@Sun.COM */ 63110910SRobert.Harris@Sun.COM rw_enter(dblockp, RW_WRITER); 63210910SRobert.Harris@Sun.COM elemp = zonep->zone_mntfs_db; 63310910SRobert.Harris@Sun.COM /* CSTYLED */ 63410910SRobert.Harris@Sun.COM for (vfsp = firstvfsp;; 63510910SRobert.Harris@Sun.COM vfsp = is_global_zone ? vfsp->vfs_next : vfsp->vfs_zone_next) { 63610910SRobert.Harris@Sun.COM DTRACE_PROBE1(new__vfs, vfs_t *, vfsp); 63710910SRobert.Harris@Sun.COM /* Consider only visible entries. */ 63810910SRobert.Harris@Sun.COM if ((vfsp->vfs_flag & VFS_NOMNTTAB) == 0 || show_hidden) { 63910910SRobert.Harris@Sun.COM /* 64010910SRobert.Harris@Sun.COM * Walk through the existing database looking for either 64110910SRobert.Harris@Sun.COM * an element that matches the current vfs_t, or for the 64210910SRobert.Harris@Sun.COM * correct place in which to insert a new element. 64310910SRobert.Harris@Sun.COM */ 64410910SRobert.Harris@Sun.COM insert_before = 0; 64510910SRobert.Harris@Sun.COM for (; elemp; prevp = elemp, elemp = elemp->mnte_next) { 64610910SRobert.Harris@Sun.COM DTRACE_PROBE1(considering__elem, mntelem_t *, 64710910SRobert.Harris@Sun.COM elemp); 6480Sstevel@tonic-gate 64910910SRobert.Harris@Sun.COM /* Compare the vfs_t with the element. */ 65010910SRobert.Harris@Sun.COM order = mntfs_newest(&elemp->mnte_vfs_ctime, 65110910SRobert.Harris@Sun.COM &vfsp->vfs_hrctime); 65210910SRobert.Harris@Sun.COM 65310910SRobert.Harris@Sun.COM /* 65410910SRobert.Harris@Sun.COM * If we encounter a database element newer than 65510910SRobert.Harris@Sun.COM * this vfs_t then we've stepped over a gap 65610910SRobert.Harris@Sun.COM * where the element for this vfs_t must be 65710910SRobert.Harris@Sun.COM * inserted. 65810910SRobert.Harris@Sun.COM */ 65910910SRobert.Harris@Sun.COM if (order == MNTFS_FIRST) { 66010910SRobert.Harris@Sun.COM insert_before = 1; 66110910SRobert.Harris@Sun.COM break; 66210910SRobert.Harris@Sun.COM } 66310910SRobert.Harris@Sun.COM 66410910SRobert.Harris@Sun.COM /* Dead elements no longer interest us. */ 66510910SRobert.Harris@Sun.COM if (MNTFS_ELEM_IS_DEAD(elemp)) 66610910SRobert.Harris@Sun.COM continue; 6670Sstevel@tonic-gate 66810910SRobert.Harris@Sun.COM /* 66910910SRobert.Harris@Sun.COM * If the time stamps are the same then the 67010910SRobert.Harris@Sun.COM * element is potential match for the vfs_t, 67110910SRobert.Harris@Sun.COM * although it may later prove to be stale. 67210910SRobert.Harris@Sun.COM */ 67310910SRobert.Harris@Sun.COM if (order == MNTFS_NEITHER) 67410910SRobert.Harris@Sun.COM break; 67510910SRobert.Harris@Sun.COM 67610910SRobert.Harris@Sun.COM /* 67710910SRobert.Harris@Sun.COM * This element must be older than the vfs_t. 67810910SRobert.Harris@Sun.COM * It must, therefore, correspond to a vfs_t 67910910SRobert.Harris@Sun.COM * that has been unmounted. Since the element is 68010910SRobert.Harris@Sun.COM * still alive, we kill it if it is visible. 68110910SRobert.Harris@Sun.COM */ 68210910SRobert.Harris@Sun.COM if (!elemp->mnte_hidden || show_hidden) 68310910SRobert.Harris@Sun.COM vfs_mono_time(&elemp->mnte_death); 68410910SRobert.Harris@Sun.COM } 68510910SRobert.Harris@Sun.COM DTRACE_PROBE2(possible__match, vfs_t *, vfsp, 68610910SRobert.Harris@Sun.COM mntelem_t *, elemp); 6870Sstevel@tonic-gate 68810910SRobert.Harris@Sun.COM /* Create a new database element if required. */ 68910910SRobert.Harris@Sun.COM new_entry_length = mntfs_text_len(vfsp, zonep); 69010910SRobert.Harris@Sun.COM if (new_entry_length > entry_length) { 69110910SRobert.Harris@Sun.COM kmem_free(tempelemp->mnte_text, entry_length); 69210910SRobert.Harris@Sun.COM tempelemp->mnte_text = 69310910SRobert.Harris@Sun.COM kmem_alloc(new_entry_length, KM_SLEEP); 69410910SRobert.Harris@Sun.COM entry_length = new_entry_length; 69510910SRobert.Harris@Sun.COM } 69610910SRobert.Harris@Sun.COM mntfs_populate_text(vfsp, zonep, tempelemp); 69710910SRobert.Harris@Sun.COM ASSERT(tempelemp->mnte_text_size == new_entry_length); 69810910SRobert.Harris@Sun.COM if (elemp == NULL) { 69910910SRobert.Harris@Sun.COM /* 70010910SRobert.Harris@Sun.COM * We ran off the end of the database. Insert a 70110910SRobert.Harris@Sun.COM * new element at the end. 70210910SRobert.Harris@Sun.COM */ 70310910SRobert.Harris@Sun.COM newp = mntfs_copy(tempelemp); 70410910SRobert.Harris@Sun.COM vfs_mono_time(&newp->mnte_birth); 70510910SRobert.Harris@Sun.COM if (prevp) { 70610910SRobert.Harris@Sun.COM mntfs_insert_after(newp, prevp); 70710910SRobert.Harris@Sun.COM } else { 70810910SRobert.Harris@Sun.COM newp->mnte_next = NULL; 70910910SRobert.Harris@Sun.COM newp->mnte_prev = NULL; 71010910SRobert.Harris@Sun.COM ASSERT(*headpp == NULL); 71110910SRobert.Harris@Sun.COM *headpp = newp; 71210910SRobert.Harris@Sun.COM } 71310910SRobert.Harris@Sun.COM elemp = newp; 71410910SRobert.Harris@Sun.COM } else if (insert_before) { 71510910SRobert.Harris@Sun.COM /* 71610910SRobert.Harris@Sun.COM * Insert a new element before the current one. 71710910SRobert.Harris@Sun.COM */ 71810910SRobert.Harris@Sun.COM newp = mntfs_copy(tempelemp); 71910910SRobert.Harris@Sun.COM vfs_mono_time(&newp->mnte_birth); 72010910SRobert.Harris@Sun.COM if (prevp) { 72110910SRobert.Harris@Sun.COM mntfs_insert_after(newp, prevp); 72210910SRobert.Harris@Sun.COM } else { 72310910SRobert.Harris@Sun.COM newp->mnte_next = elemp; 72410910SRobert.Harris@Sun.COM newp->mnte_prev = NULL; 72510910SRobert.Harris@Sun.COM elemp->mnte_prev = newp; 72610910SRobert.Harris@Sun.COM ASSERT(*headpp == elemp); 72710910SRobert.Harris@Sun.COM *headpp = newp; 72810910SRobert.Harris@Sun.COM } 72910910SRobert.Harris@Sun.COM elemp = newp; 73010910SRobert.Harris@Sun.COM } else if (!mntfs_is_same_element(elemp, tempelemp)) { 73110910SRobert.Harris@Sun.COM /* 73210910SRobert.Harris@Sun.COM * The element corresponds to the vfs_t, but the 73310910SRobert.Harris@Sun.COM * vfs_t has changed; it must have been 73410910SRobert.Harris@Sun.COM * remounted. Kill the old element and insert a 73510910SRobert.Harris@Sun.COM * new one after it. 73610910SRobert.Harris@Sun.COM */ 73710910SRobert.Harris@Sun.COM vfs_mono_time(&elemp->mnte_death); 73810910SRobert.Harris@Sun.COM newp = mntfs_copy(tempelemp); 73910910SRobert.Harris@Sun.COM vfs_mono_time(&newp->mnte_birth); 74010910SRobert.Harris@Sun.COM mntfs_insert_after(newp, elemp); 74110910SRobert.Harris@Sun.COM elemp = newp; 74210910SRobert.Harris@Sun.COM } 7430Sstevel@tonic-gate 74410910SRobert.Harris@Sun.COM /* We've found the corresponding element. Hold it. */ 74510910SRobert.Harris@Sun.COM DTRACE_PROBE1(incrementing, mntelem_t *, elemp); 74610910SRobert.Harris@Sun.COM elemp->mnte_refcnt++; 7470Sstevel@tonic-gate 74810910SRobert.Harris@Sun.COM /* 74910910SRobert.Harris@Sun.COM * Update the parameters used to initialise the 75010910SRobert.Harris@Sun.COM * snapshot. 75110910SRobert.Harris@Sun.COM */ 75210910SRobert.Harris@Sun.COM nmnts++; 75311757SRobert.Harris@Sun.COM total_text_size += elemp->mnte_text_size; 75411757SRobert.Harris@Sun.COM if (!elemp->mnte_hidden) 75511757SRobert.Harris@Sun.COM normal_text_size += elemp->mnte_text_size; 75610910SRobert.Harris@Sun.COM if (!firstp) 75710910SRobert.Harris@Sun.COM firstp = elemp; 7580Sstevel@tonic-gate 75910910SRobert.Harris@Sun.COM prevp = elemp; 76010910SRobert.Harris@Sun.COM elemp = elemp->mnte_next; 76110910SRobert.Harris@Sun.COM } 76210910SRobert.Harris@Sun.COM 76310910SRobert.Harris@Sun.COM if (vfsp == lastvfsp) 76410910SRobert.Harris@Sun.COM break; 76510910SRobert.Harris@Sun.COM } 7660Sstevel@tonic-gate 7670Sstevel@tonic-gate /* 76810910SRobert.Harris@Sun.COM * Any remaining visible database elements that are still alive must be 76910910SRobert.Harris@Sun.COM * killed now, because their corresponding vfs_ts must have been 77010910SRobert.Harris@Sun.COM * unmounted. 7710Sstevel@tonic-gate */ 77210910SRobert.Harris@Sun.COM for (; elemp; elemp = elemp->mnte_next) { 77310910SRobert.Harris@Sun.COM if (MNTFS_ELEM_IS_ALIVE(elemp) && 77410910SRobert.Harris@Sun.COM (!elemp->mnte_hidden || show_hidden)) 77510910SRobert.Harris@Sun.COM vfs_mono_time(&elemp->mnte_death); 7760Sstevel@tonic-gate } 7770Sstevel@tonic-gate 77810910SRobert.Harris@Sun.COM /* Initialise the snapshot. */ 77910910SRobert.Harris@Sun.COM vfs_mono_time(&snapp->mnts_time); 78010910SRobert.Harris@Sun.COM snapp->mnts_last_mtime = last_mtime; 78110910SRobert.Harris@Sun.COM snapp->mnts_first = snapp->mnts_next = firstp; 78210910SRobert.Harris@Sun.COM snapp->mnts_flags = show_hidden ? MNTS_SHOWHIDDEN : 0; 78310910SRobert.Harris@Sun.COM snapp->mnts_nmnts = nmnts; 78411757SRobert.Harris@Sun.COM snapp->mnts_text_size = total_text_size; 78510910SRobert.Harris@Sun.COM snapp->mnts_foffset = snapp->mnts_ieoffset = 0; 7860Sstevel@tonic-gate 78711757SRobert.Harris@Sun.COM /* 78811757SRobert.Harris@Sun.COM * Record /etc/mnttab's current size and mtime for possible future use 78911757SRobert.Harris@Sun.COM * by mntgetattr(). 79011757SRobert.Harris@Sun.COM */ 79111757SRobert.Harris@Sun.COM mnd->mnt_size = normal_text_size; 79211757SRobert.Harris@Sun.COM mnd->mnt_mtime = last_mtime; 79311757SRobert.Harris@Sun.COM if (show_hidden) { 79411757SRobert.Harris@Sun.COM mnd->mnt_hidden_size = total_text_size; 79511757SRobert.Harris@Sun.COM mnd->mnt_hidden_mtime = last_mtime; 79611757SRobert.Harris@Sun.COM } 79711757SRobert.Harris@Sun.COM 79810910SRobert.Harris@Sun.COM /* Clean up. */ 79910910SRobert.Harris@Sun.COM rw_exit(dblockp); 80010910SRobert.Harris@Sun.COM vfs_list_unlock(); 80110910SRobert.Harris@Sun.COM if (dummyvfsp != NULL) 80210910SRobert.Harris@Sun.COM refstr_rele(dummyvfsp->vfs_mntpt); 80310910SRobert.Harris@Sun.COM kmem_free(tempelemp->mnte_text, entry_length); 80410910SRobert.Harris@Sun.COM kmem_free(tempelemp, sizeof (mntelem_t)); 8050Sstevel@tonic-gate } 8060Sstevel@tonic-gate 8070Sstevel@tonic-gate /* 8080Sstevel@tonic-gate * Public function to convert vfs_mntopts into a string. 8090Sstevel@tonic-gate * A buffer of sufficient size is allocated, which is returned via bufp, 8100Sstevel@tonic-gate * and whose length is returned via lenp. 8110Sstevel@tonic-gate */ 8120Sstevel@tonic-gate void 8130Sstevel@tonic-gate mntfs_getmntopts(struct vfs *vfsp, char **bufp, size_t *lenp) 8140Sstevel@tonic-gate { 8150Sstevel@tonic-gate size_t len; 8160Sstevel@tonic-gate char *buf; 8170Sstevel@tonic-gate 8180Sstevel@tonic-gate vfs_list_read_lock(); 8190Sstevel@tonic-gate 8200Sstevel@tonic-gate len = mntfs_optsize(vfsp) + 1; 8210Sstevel@tonic-gate buf = kmem_alloc(len, KM_NOSLEEP); 8220Sstevel@tonic-gate if (buf == NULL) { 8230Sstevel@tonic-gate *bufp = NULL; 8240Sstevel@tonic-gate vfs_list_unlock(); 8250Sstevel@tonic-gate return; 8260Sstevel@tonic-gate } 8270Sstevel@tonic-gate buf[len - 1] = '\0'; 8280Sstevel@tonic-gate (void) mntfs_optprint(vfsp, buf); 8290Sstevel@tonic-gate ASSERT(buf[len - 1] == '\0'); 8300Sstevel@tonic-gate 8310Sstevel@tonic-gate vfs_list_unlock(); 8320Sstevel@tonic-gate *bufp = buf; 8330Sstevel@tonic-gate *lenp = len; 8340Sstevel@tonic-gate } 8350Sstevel@tonic-gate 8360Sstevel@tonic-gate /* ARGSUSED */ 8370Sstevel@tonic-gate static int 8385331Samw mntopen(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 8390Sstevel@tonic-gate { 8400Sstevel@tonic-gate vnode_t *vp = *vpp; 8410Sstevel@tonic-gate mntnode_t *nmnp; 8420Sstevel@tonic-gate 8430Sstevel@tonic-gate /* 8440Sstevel@tonic-gate * Not allowed to open for writing, return error. 8450Sstevel@tonic-gate */ 8460Sstevel@tonic-gate if (flag & FWRITE) 8470Sstevel@tonic-gate return (EPERM); 8480Sstevel@tonic-gate /* 8490Sstevel@tonic-gate * Create a new mnt/vnode for each open, this will give us a handle to 8500Sstevel@tonic-gate * hang the snapshot on. 8510Sstevel@tonic-gate */ 8520Sstevel@tonic-gate nmnp = mntgetnode(vp); 8530Sstevel@tonic-gate 8540Sstevel@tonic-gate *vpp = MTOV(nmnp); 8550Sstevel@tonic-gate atomic_add_32(&MTOD(nmnp)->mnt_nopen, 1); 8560Sstevel@tonic-gate VN_RELE(vp); 8570Sstevel@tonic-gate return (0); 8580Sstevel@tonic-gate } 8590Sstevel@tonic-gate 8600Sstevel@tonic-gate /* ARGSUSED */ 8610Sstevel@tonic-gate static int 8625331Samw mntclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 8635331Samw caller_context_t *ct) 8640Sstevel@tonic-gate { 8650Sstevel@tonic-gate mntnode_t *mnp = VTOM(vp); 8660Sstevel@tonic-gate 8670Sstevel@tonic-gate /* Clean up any locks or shares held by the current process */ 8680Sstevel@tonic-gate cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 8690Sstevel@tonic-gate cleanshares(vp, ttoproc(curthread)->p_pid); 8700Sstevel@tonic-gate 8710Sstevel@tonic-gate if (count > 1) 8720Sstevel@tonic-gate return (0); 8730Sstevel@tonic-gate if (vp->v_count == 1) { 87410910SRobert.Harris@Sun.COM rw_enter(&mnp->mnt_contents, RW_WRITER); 87510910SRobert.Harris@Sun.COM mntfs_freesnap(mnp, &mnp->mnt_read); 87610910SRobert.Harris@Sun.COM mntfs_freesnap(mnp, &mnp->mnt_ioctl); 87710910SRobert.Harris@Sun.COM rw_exit(&mnp->mnt_contents); 8780Sstevel@tonic-gate atomic_add_32(&MTOD(mnp)->mnt_nopen, -1); 8790Sstevel@tonic-gate } 8800Sstevel@tonic-gate return (0); 8810Sstevel@tonic-gate } 8820Sstevel@tonic-gate 8830Sstevel@tonic-gate /* ARGSUSED */ 8840Sstevel@tonic-gate static int 8850Sstevel@tonic-gate mntread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, caller_context_t *ct) 8860Sstevel@tonic-gate { 88710910SRobert.Harris@Sun.COM mntnode_t *mnp = VTOM(vp); 88810910SRobert.Harris@Sun.COM zone_t *zonep = MTOD(mnp)->mnt_zone; 88910910SRobert.Harris@Sun.COM mntsnap_t *snapp = &mnp->mnt_read; 8900Sstevel@tonic-gate off_t off = uio->uio_offset; 8910Sstevel@tonic-gate size_t len = uio->uio_resid; 89210910SRobert.Harris@Sun.COM char *bufferp; 89310910SRobert.Harris@Sun.COM size_t available, copylen; 89410910SRobert.Harris@Sun.COM size_t written = 0; 89510910SRobert.Harris@Sun.COM mntelem_t *elemp; 89610910SRobert.Harris@Sun.COM krwlock_t *dblockp = &zonep->zone_mntfs_db_lock; 89710910SRobert.Harris@Sun.COM int error = 0; 89810910SRobert.Harris@Sun.COM off_t ieoffset; 8990Sstevel@tonic-gate 90010910SRobert.Harris@Sun.COM rw_enter(&mnp->mnt_contents, RW_WRITER); 90110910SRobert.Harris@Sun.COM if (snapp->mnts_nmnts == 0 || (off == (off_t)0)) 90210910SRobert.Harris@Sun.COM mntfs_snapshot(mnp, snapp); 9030Sstevel@tonic-gate 90410910SRobert.Harris@Sun.COM if ((size_t)(off + len) > snapp->mnts_text_size) 90510910SRobert.Harris@Sun.COM len = snapp->mnts_text_size - off; 90610910SRobert.Harris@Sun.COM 90710910SRobert.Harris@Sun.COM if (off < 0 || len > snapp->mnts_text_size) { 9088004SViswanathan.Kannappan@Sun.COM rw_exit(&mnp->mnt_contents); 9090Sstevel@tonic-gate return (EFAULT); 9108004SViswanathan.Kannappan@Sun.COM } 9110Sstevel@tonic-gate 9128004SViswanathan.Kannappan@Sun.COM if (len == 0) { 9138004SViswanathan.Kannappan@Sun.COM rw_exit(&mnp->mnt_contents); 9140Sstevel@tonic-gate return (0); 9158004SViswanathan.Kannappan@Sun.COM } 9160Sstevel@tonic-gate 9170Sstevel@tonic-gate /* 91810910SRobert.Harris@Sun.COM * For the file offset provided, locate the corresponding database 91910910SRobert.Harris@Sun.COM * element and calculate the corresponding offset within its text. If 92010910SRobert.Harris@Sun.COM * the file offset is the same as that reached during the last read(2) 92110910SRobert.Harris@Sun.COM * then use the saved element and intra-element offset. 9220Sstevel@tonic-gate */ 92310910SRobert.Harris@Sun.COM rw_enter(dblockp, RW_READER); 92410910SRobert.Harris@Sun.COM if (off == 0 || (off == snapp->mnts_foffset)) { 92510910SRobert.Harris@Sun.COM elemp = snapp->mnts_next; 92610910SRobert.Harris@Sun.COM ieoffset = snapp->mnts_ieoffset; 92710910SRobert.Harris@Sun.COM } else { 92810910SRobert.Harris@Sun.COM off_t total_off; 92910910SRobert.Harris@Sun.COM /* 93010910SRobert.Harris@Sun.COM * Find the element corresponding to the requested file offset 93110910SRobert.Harris@Sun.COM * by walking through the database and summing the text sizes 93210910SRobert.Harris@Sun.COM * of the individual elements. If the requested file offset is 93310910SRobert.Harris@Sun.COM * greater than that reached on the last visit then we can start 93410910SRobert.Harris@Sun.COM * at the last seen element; otherwise, we have to start at the 93510910SRobert.Harris@Sun.COM * beginning. 93610910SRobert.Harris@Sun.COM */ 93710910SRobert.Harris@Sun.COM if (off > snapp->mnts_foffset) { 93810910SRobert.Harris@Sun.COM elemp = snapp->mnts_next; 93910910SRobert.Harris@Sun.COM total_off = snapp->mnts_foffset - snapp->mnts_ieoffset; 94010910SRobert.Harris@Sun.COM } else { 94110910SRobert.Harris@Sun.COM elemp = snapp->mnts_first; 94210910SRobert.Harris@Sun.COM total_off = 0; 94310910SRobert.Harris@Sun.COM } 94410910SRobert.Harris@Sun.COM while (off > total_off + elemp->mnte_text_size) { 94510910SRobert.Harris@Sun.COM total_off += elemp->mnte_text_size; 94610910SRobert.Harris@Sun.COM elemp = mntfs_get_next_elem(snapp, elemp); 94710910SRobert.Harris@Sun.COM ASSERT(elemp != NULL); 94810910SRobert.Harris@Sun.COM } 94910910SRobert.Harris@Sun.COM /* Calculate the intra-element offset. */ 95010910SRobert.Harris@Sun.COM if (off > total_off) 95110910SRobert.Harris@Sun.COM ieoffset = off - total_off; 95210910SRobert.Harris@Sun.COM else 95310910SRobert.Harris@Sun.COM ieoffset = 0; 9540Sstevel@tonic-gate } 95510910SRobert.Harris@Sun.COM 95610910SRobert.Harris@Sun.COM /* 95710910SRobert.Harris@Sun.COM * Create a buffer and populate it with the text from successive 95810910SRobert.Harris@Sun.COM * database elements until it is full. 95910910SRobert.Harris@Sun.COM */ 96010910SRobert.Harris@Sun.COM bufferp = kmem_alloc(len, KM_SLEEP); 96110910SRobert.Harris@Sun.COM while (written < len) { 96210910SRobert.Harris@Sun.COM available = elemp->mnte_text_size - ieoffset; 96310910SRobert.Harris@Sun.COM copylen = MIN(len - written, available); 96410910SRobert.Harris@Sun.COM bcopy(elemp->mnte_text + ieoffset, bufferp + written, copylen); 96510910SRobert.Harris@Sun.COM written += copylen; 96610910SRobert.Harris@Sun.COM if (copylen == available) { 96710910SRobert.Harris@Sun.COM elemp = mntfs_get_next_elem(snapp, elemp); 96810910SRobert.Harris@Sun.COM ASSERT(elemp != NULL || written == len); 96910910SRobert.Harris@Sun.COM ieoffset = 0; 97010910SRobert.Harris@Sun.COM } else { 97110910SRobert.Harris@Sun.COM ieoffset += copylen; 97210910SRobert.Harris@Sun.COM } 97310910SRobert.Harris@Sun.COM } 97410910SRobert.Harris@Sun.COM rw_exit(dblockp); 97510910SRobert.Harris@Sun.COM 97610910SRobert.Harris@Sun.COM /* 97710910SRobert.Harris@Sun.COM * Write the populated buffer, update the snapshot's state if 97810910SRobert.Harris@Sun.COM * successful and then advertise our read. 97910910SRobert.Harris@Sun.COM */ 98010910SRobert.Harris@Sun.COM error = uiomove(bufferp, len, UIO_READ, uio); 98110910SRobert.Harris@Sun.COM if (error == 0) { 98210910SRobert.Harris@Sun.COM snapp->mnts_next = elemp; 98310910SRobert.Harris@Sun.COM snapp->mnts_foffset = off + len; 98410910SRobert.Harris@Sun.COM snapp->mnts_ieoffset = ieoffset; 98510910SRobert.Harris@Sun.COM } 9864863Spraks vfs_mnttab_readop(); 9878004SViswanathan.Kannappan@Sun.COM rw_exit(&mnp->mnt_contents); 98810910SRobert.Harris@Sun.COM 98910910SRobert.Harris@Sun.COM /* Clean up. */ 99010910SRobert.Harris@Sun.COM kmem_free(bufferp, len); 9910Sstevel@tonic-gate return (error); 9920Sstevel@tonic-gate } 9930Sstevel@tonic-gate 9940Sstevel@tonic-gate static int 9955331Samw mntgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 9965331Samw caller_context_t *ct) 9970Sstevel@tonic-gate { 99811757SRobert.Harris@Sun.COM int mask = vap->va_mask; 9990Sstevel@tonic-gate int error; 100011757SRobert.Harris@Sun.COM mntnode_t *mnp = VTOM(vp); 100111757SRobert.Harris@Sun.COM timespec_t mtime, old_mtime; 100211757SRobert.Harris@Sun.COM size_t size, old_size; 100311757SRobert.Harris@Sun.COM mntdata_t *mntdata = MTOD(VTOM(vp)); 100411757SRobert.Harris@Sun.COM mntsnap_t *rsnapp, *isnapp; 10050Sstevel@tonic-gate extern timespec_t vfs_mnttab_ctime; 10060Sstevel@tonic-gate 100711757SRobert.Harris@Sun.COM 100811757SRobert.Harris@Sun.COM /* AT_MODE, AT_UID and AT_GID are derived from the underlying file. */ 100911757SRobert.Harris@Sun.COM if (mask & AT_MODE|AT_UID|AT_GID) { 101011757SRobert.Harris@Sun.COM if (error = VOP_GETATTR(mnp->mnt_mountvp, vap, flags, cr, ct)) 101111757SRobert.Harris@Sun.COM return (error); 101210910SRobert.Harris@Sun.COM } 10130Sstevel@tonic-gate 10140Sstevel@tonic-gate /* 101511757SRobert.Harris@Sun.COM * There are some minor subtleties in the determination of 101611757SRobert.Harris@Sun.COM * /etc/mnttab's size and mtime. We wish to avoid any condition in 101711757SRobert.Harris@Sun.COM * which, in the vicinity of a change to the in-kernel mnttab, we 101811757SRobert.Harris@Sun.COM * return an old value for one but a new value for the other. We cannot 101911757SRobert.Harris@Sun.COM * simply hold vfslist for the entire calculation because we might need 102011757SRobert.Harris@Sun.COM * to call mntfs_snapshot(), which calls vfs_list_read_lock(). 10210Sstevel@tonic-gate */ 102211757SRobert.Harris@Sun.COM if (mask & AT_SIZE|AT_NBLOCKS) { 102311757SRobert.Harris@Sun.COM rw_enter(&mnp->mnt_contents, RW_WRITER); 102411757SRobert.Harris@Sun.COM 102511757SRobert.Harris@Sun.COM vfs_list_read_lock(); 102611757SRobert.Harris@Sun.COM vfs_mnttab_modtime(&mtime); 102711757SRobert.Harris@Sun.COM if (mnp->mnt_flags & MNT_SHOWHIDDEN) { 102811757SRobert.Harris@Sun.COM old_mtime = mntdata->mnt_hidden_mtime; 102911757SRobert.Harris@Sun.COM old_size = mntdata->mnt_hidden_size; 103011757SRobert.Harris@Sun.COM } else { 103111757SRobert.Harris@Sun.COM old_mtime = mntdata->mnt_mtime; 103211757SRobert.Harris@Sun.COM old_size = mntdata->mnt_size; 103311757SRobert.Harris@Sun.COM } 103411757SRobert.Harris@Sun.COM vfs_list_unlock(); 103511757SRobert.Harris@Sun.COM 103611757SRobert.Harris@Sun.COM rsnapp = &mnp->mnt_read; 103711757SRobert.Harris@Sun.COM isnapp = &mnp->mnt_ioctl; 103811757SRobert.Harris@Sun.COM if (rsnapp->mnts_nmnts || isnapp->mnts_nmnts) { 103911757SRobert.Harris@Sun.COM /* 104011757SRobert.Harris@Sun.COM * The mntnode already has at least one snapshot from 104111757SRobert.Harris@Sun.COM * which to take the size; the user will understand from 104211757SRobert.Harris@Sun.COM * mnttab(4) that the current size of the in-kernel 104311757SRobert.Harris@Sun.COM * mnttab is irrelevant. 104411757SRobert.Harris@Sun.COM */ 104511757SRobert.Harris@Sun.COM size = rsnapp->mnts_nmnts ? rsnapp->mnts_text_size : 104611757SRobert.Harris@Sun.COM isnapp->mnts_text_size; 104711757SRobert.Harris@Sun.COM } else if (mntfs_newest(&mtime, &old_mtime) == MNTFS_NEITHER) { 104811757SRobert.Harris@Sun.COM /* 104911757SRobert.Harris@Sun.COM * There is no existing valid snapshot but the in-kernel 105011757SRobert.Harris@Sun.COM * mnttab has not changed since the time that the last 105111757SRobert.Harris@Sun.COM * one was generated. Use the old file size; note that 105211757SRobert.Harris@Sun.COM * it is guaranteed to be consistent with mtime, which 105311757SRobert.Harris@Sun.COM * may be returned to the user later. 105411757SRobert.Harris@Sun.COM */ 105511757SRobert.Harris@Sun.COM size = old_size; 105611757SRobert.Harris@Sun.COM } else { 105711757SRobert.Harris@Sun.COM /* 105811757SRobert.Harris@Sun.COM * There is no snapshot and the in-kernel mnttab has 105911757SRobert.Harris@Sun.COM * changed since the last one was created. We generate a 106011757SRobert.Harris@Sun.COM * new snapshot which we use for not only the size but 106111757SRobert.Harris@Sun.COM * also the mtime, thereby ensuring that the two are 106211757SRobert.Harris@Sun.COM * consistent. 106311757SRobert.Harris@Sun.COM */ 106411757SRobert.Harris@Sun.COM mntfs_snapshot(mnp, rsnapp); 106511757SRobert.Harris@Sun.COM size = rsnapp->mnts_text_size; 106611757SRobert.Harris@Sun.COM mtime = rsnapp->mnts_last_mtime; 106711757SRobert.Harris@Sun.COM mntfs_freesnap(mnp, rsnapp); 106811757SRobert.Harris@Sun.COM } 106911757SRobert.Harris@Sun.COM 107011757SRobert.Harris@Sun.COM rw_exit(&mnp->mnt_contents); 107111757SRobert.Harris@Sun.COM } else if (mask & AT_ATIME|AT_MTIME) { 107211757SRobert.Harris@Sun.COM vfs_list_read_lock(); 107311757SRobert.Harris@Sun.COM vfs_mnttab_modtime(&mtime); 107411757SRobert.Harris@Sun.COM vfs_list_unlock(); 107511757SRobert.Harris@Sun.COM } 107611757SRobert.Harris@Sun.COM 107711757SRobert.Harris@Sun.COM /* Always look like a regular file. */ 107811757SRobert.Harris@Sun.COM if (mask & AT_TYPE) 107911757SRobert.Harris@Sun.COM vap->va_type = VREG; 108011757SRobert.Harris@Sun.COM /* Mode should basically be read only. */ 108111757SRobert.Harris@Sun.COM if (mask & AT_MODE) 108211757SRobert.Harris@Sun.COM vap->va_mode &= 07444; 108311757SRobert.Harris@Sun.COM if (mask & AT_FSID) 108411757SRobert.Harris@Sun.COM vap->va_fsid = vp->v_vfsp->vfs_dev; 108511757SRobert.Harris@Sun.COM /* Nodeid is always ROOTINO. */ 108611757SRobert.Harris@Sun.COM if (mask & AT_NODEID) 108711757SRobert.Harris@Sun.COM vap->va_nodeid = (ino64_t)MNTROOTINO; 10880Sstevel@tonic-gate /* 10890Sstevel@tonic-gate * Set nlink to the number of open vnodes for mnttab info 10900Sstevel@tonic-gate * plus one for existing. 10910Sstevel@tonic-gate */ 109211757SRobert.Harris@Sun.COM if (mask & AT_NLINK) 109311757SRobert.Harris@Sun.COM vap->va_nlink = mntdata->mnt_nopen + 1; 109411757SRobert.Harris@Sun.COM if (mask & AT_SIZE) 109511757SRobert.Harris@Sun.COM vap->va_size = size; 109611757SRobert.Harris@Sun.COM if (mask & AT_ATIME) 109711757SRobert.Harris@Sun.COM vap->va_atime = mtime; 109811757SRobert.Harris@Sun.COM if (mask & AT_MTIME) 109911757SRobert.Harris@Sun.COM vap->va_mtime = mtime; 110011757SRobert.Harris@Sun.COM if (mask & AT_CTIME) 110111757SRobert.Harris@Sun.COM vap->va_ctime = vfs_mnttab_ctime; 110211757SRobert.Harris@Sun.COM if (mask & AT_RDEV) 110311757SRobert.Harris@Sun.COM vap->va_rdev = 0; 110411757SRobert.Harris@Sun.COM if (mask & AT_BLKSIZE) 110511757SRobert.Harris@Sun.COM vap->va_blksize = DEV_BSIZE; 110611757SRobert.Harris@Sun.COM if (mask & AT_NBLOCKS) 110711757SRobert.Harris@Sun.COM vap->va_nblocks = btod(size); 110811757SRobert.Harris@Sun.COM if (mask & AT_SEQ) 110911757SRobert.Harris@Sun.COM vap->va_seq = 0; 111011757SRobert.Harris@Sun.COM 11110Sstevel@tonic-gate return (0); 11120Sstevel@tonic-gate } 11130Sstevel@tonic-gate 11140Sstevel@tonic-gate static int 11155331Samw mntaccess(vnode_t *vp, int mode, int flags, cred_t *cr, 11165331Samw caller_context_t *ct) 11170Sstevel@tonic-gate { 11180Sstevel@tonic-gate mntnode_t *mnp = VTOM(vp); 11190Sstevel@tonic-gate 11200Sstevel@tonic-gate if (mode & (VWRITE|VEXEC)) 11210Sstevel@tonic-gate return (EROFS); 11220Sstevel@tonic-gate 11230Sstevel@tonic-gate /* 11240Sstevel@tonic-gate * Do access check on the underlying directory vnode. 11250Sstevel@tonic-gate */ 11265331Samw return (VOP_ACCESS(mnp->mnt_mountvp, mode, flags, cr, ct)); 11270Sstevel@tonic-gate } 11280Sstevel@tonic-gate 11290Sstevel@tonic-gate 11300Sstevel@tonic-gate /* 11310Sstevel@tonic-gate * New /mntfs vnode required; allocate it and fill in most of the fields. 11320Sstevel@tonic-gate */ 11330Sstevel@tonic-gate static mntnode_t * 11340Sstevel@tonic-gate mntgetnode(vnode_t *dp) 11350Sstevel@tonic-gate { 11360Sstevel@tonic-gate mntnode_t *mnp; 11370Sstevel@tonic-gate vnode_t *vp; 11380Sstevel@tonic-gate 11390Sstevel@tonic-gate mnp = kmem_zalloc(sizeof (mntnode_t), KM_SLEEP); 11400Sstevel@tonic-gate mnp->mnt_vnode = vn_alloc(KM_SLEEP); 11410Sstevel@tonic-gate mnp->mnt_mountvp = VTOM(dp)->mnt_mountvp; 11428004SViswanathan.Kannappan@Sun.COM rw_init(&mnp->mnt_contents, NULL, RW_DEFAULT, NULL); 11430Sstevel@tonic-gate vp = MTOV(mnp); 11440Sstevel@tonic-gate vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT; 11450Sstevel@tonic-gate vn_setops(vp, mntvnodeops); 11460Sstevel@tonic-gate vp->v_vfsp = dp->v_vfsp; 11470Sstevel@tonic-gate vp->v_type = VREG; 11480Sstevel@tonic-gate vp->v_data = (caddr_t)mnp; 11490Sstevel@tonic-gate 11500Sstevel@tonic-gate return (mnp); 11510Sstevel@tonic-gate } 11520Sstevel@tonic-gate 11530Sstevel@tonic-gate /* 11540Sstevel@tonic-gate * Free the storage obtained from mntgetnode(). 11550Sstevel@tonic-gate */ 11560Sstevel@tonic-gate static void 11570Sstevel@tonic-gate mntfreenode(mntnode_t *mnp) 11580Sstevel@tonic-gate { 11590Sstevel@tonic-gate vnode_t *vp = MTOV(mnp); 11600Sstevel@tonic-gate 11618004SViswanathan.Kannappan@Sun.COM rw_destroy(&mnp->mnt_contents); 11620Sstevel@tonic-gate vn_invalid(vp); 11630Sstevel@tonic-gate vn_free(vp); 11640Sstevel@tonic-gate kmem_free(mnp, sizeof (*mnp)); 11650Sstevel@tonic-gate } 11660Sstevel@tonic-gate 11670Sstevel@tonic-gate 11680Sstevel@tonic-gate /* ARGSUSED */ 11690Sstevel@tonic-gate static int 11705331Samw mntfsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 11710Sstevel@tonic-gate { 11720Sstevel@tonic-gate return (0); 11730Sstevel@tonic-gate } 11740Sstevel@tonic-gate 11750Sstevel@tonic-gate /* ARGSUSED */ 11760Sstevel@tonic-gate static void 11775331Samw mntinactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 11780Sstevel@tonic-gate { 11790Sstevel@tonic-gate mntnode_t *mnp = VTOM(vp); 11800Sstevel@tonic-gate 11810Sstevel@tonic-gate mntfreenode(mnp); 11820Sstevel@tonic-gate } 11830Sstevel@tonic-gate 118410910SRobert.Harris@Sun.COM /* 118511757SRobert.Harris@Sun.COM * lseek(2) is supported only to rewind the file by resetmnttab(3C). Rewinding 118611757SRobert.Harris@Sun.COM * has a special meaning for /etc/mnttab: it forces mntfs to refresh the 118711757SRobert.Harris@Sun.COM * snapshot at the next ioctl(). 118810910SRobert.Harris@Sun.COM * 118911757SRobert.Harris@Sun.COM * mnttab(4) explains that "the snapshot...is taken any time a read(2) is 119011757SRobert.Harris@Sun.COM * performed at offset 0". We therefore ignore the read snapshot here. 119110910SRobert.Harris@Sun.COM */ 11920Sstevel@tonic-gate /* ARGSUSED */ 11930Sstevel@tonic-gate static int 119410910SRobert.Harris@Sun.COM mntseek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct) 11950Sstevel@tonic-gate { 11968004SViswanathan.Kannappan@Sun.COM mntnode_t *mnp = VTOM(vp); 11978004SViswanathan.Kannappan@Sun.COM 11988004SViswanathan.Kannappan@Sun.COM if (*noffp == 0) { 11998004SViswanathan.Kannappan@Sun.COM rw_enter(&mnp->mnt_contents, RW_WRITER); 120010910SRobert.Harris@Sun.COM mnp->mnt_ioctl.mnts_flags |= MNTS_REWIND; 12018004SViswanathan.Kannappan@Sun.COM rw_exit(&mnp->mnt_contents); 12028004SViswanathan.Kannappan@Sun.COM } 12030Sstevel@tonic-gate 12040Sstevel@tonic-gate return (0); 12050Sstevel@tonic-gate } 12060Sstevel@tonic-gate 12070Sstevel@tonic-gate /* 12080Sstevel@tonic-gate * Return the answer requested to poll(). 12090Sstevel@tonic-gate * POLLRDBAND will return when the mtime of the mnttab 12100Sstevel@tonic-gate * information is newer than the latest one read for this open. 12110Sstevel@tonic-gate */ 12120Sstevel@tonic-gate /* ARGSUSED */ 12130Sstevel@tonic-gate static int 12145331Samw mntpoll(vnode_t *vp, short ev, int any, short *revp, pollhead_t **phpp, 12155331Samw caller_context_t *ct) 12160Sstevel@tonic-gate { 12170Sstevel@tonic-gate mntnode_t *mnp = VTOM(vp); 121810910SRobert.Harris@Sun.COM mntsnap_t *snapp; 12190Sstevel@tonic-gate 12208004SViswanathan.Kannappan@Sun.COM rw_enter(&mnp->mnt_contents, RW_READER); 122110910SRobert.Harris@Sun.COM if (mntfs_newest(&mnp->mnt_ioctl.mnts_last_mtime, 122210910SRobert.Harris@Sun.COM &mnp->mnt_read.mnts_last_mtime) == MNTFS_FIRST) 122310910SRobert.Harris@Sun.COM snapp = &mnp->mnt_ioctl; 122410910SRobert.Harris@Sun.COM else 122510910SRobert.Harris@Sun.COM snapp = &mnp->mnt_read; 12260Sstevel@tonic-gate 12270Sstevel@tonic-gate *revp = 0; 12280Sstevel@tonic-gate *phpp = (pollhead_t *)NULL; 12290Sstevel@tonic-gate if (ev & POLLIN) 12300Sstevel@tonic-gate *revp |= POLLIN; 12310Sstevel@tonic-gate 12320Sstevel@tonic-gate if (ev & POLLRDNORM) 12330Sstevel@tonic-gate *revp |= POLLRDNORM; 12340Sstevel@tonic-gate 12350Sstevel@tonic-gate if (ev & POLLRDBAND) { 123610910SRobert.Harris@Sun.COM vfs_mnttab_poll(&snapp->mnts_last_mtime, phpp); 12370Sstevel@tonic-gate if (*phpp == (pollhead_t *)NULL) 12380Sstevel@tonic-gate *revp |= POLLRDBAND; 12390Sstevel@tonic-gate } 12408004SViswanathan.Kannappan@Sun.COM rw_exit(&mnp->mnt_contents); 12418004SViswanathan.Kannappan@Sun.COM 12420Sstevel@tonic-gate if (*revp || *phpp != NULL || any) { 12430Sstevel@tonic-gate return (0); 12440Sstevel@tonic-gate } 12450Sstevel@tonic-gate /* 12460Sstevel@tonic-gate * If someone is polling an unsupported poll events (e.g. 12470Sstevel@tonic-gate * POLLOUT, POLLPRI, etc.), just return POLLERR revents. 12480Sstevel@tonic-gate * That way we will ensure that we don't return a 0 12490Sstevel@tonic-gate * revents with a NULL pollhead pointer. 12500Sstevel@tonic-gate */ 12510Sstevel@tonic-gate *revp = POLLERR; 12520Sstevel@tonic-gate return (0); 12530Sstevel@tonic-gate } 125410910SRobert.Harris@Sun.COM 125510910SRobert.Harris@Sun.COM /* 125610910SRobert.Harris@Sun.COM * mntfs_same_word() returns 1 if two words are the same in the context of 125710910SRobert.Harris@Sun.COM * MNTIOC_GETMNTANY and 0 otherwise. 125810910SRobert.Harris@Sun.COM * 125910910SRobert.Harris@Sun.COM * worda is a memory address that lies somewhere in the buffer bufa; it cannot 126010910SRobert.Harris@Sun.COM * be NULL since this is used to indicate to getmntany(3C) that the user does 126110910SRobert.Harris@Sun.COM * not wish to match a particular field. The text to which worda points is 126210910SRobert.Harris@Sun.COM * supplied by the user; if it is not null-terminated then it cannot match. 126310910SRobert.Harris@Sun.COM * 126410910SRobert.Harris@Sun.COM * Buffer bufb contains a line from /etc/mnttab, in which the fields are 126510910SRobert.Harris@Sun.COM * delimited by tab or new-line characters. offb is the offset of the second 126610910SRobert.Harris@Sun.COM * word within this buffer. 126710910SRobert.Harris@Sun.COM * 126810910SRobert.Harris@Sun.COM * mntfs_same_word() returns 1 if the words are the same and 0 otherwise. 126910910SRobert.Harris@Sun.COM */ 127010910SRobert.Harris@Sun.COM int 127110910SRobert.Harris@Sun.COM mntfs_same_word(char *worda, char *bufa, size_t sizea, off_t offb, char *bufb, 127210910SRobert.Harris@Sun.COM size_t sizeb) 127310910SRobert.Harris@Sun.COM { 127410910SRobert.Harris@Sun.COM char *wordb = bufb + offb; 127510910SRobert.Harris@Sun.COM int bytes_remaining; 127610910SRobert.Harris@Sun.COM 127710910SRobert.Harris@Sun.COM ASSERT(worda != NULL); 127810910SRobert.Harris@Sun.COM 127910910SRobert.Harris@Sun.COM bytes_remaining = MIN(((bufa + sizea) - worda), 128010910SRobert.Harris@Sun.COM ((bufb + sizeb) - wordb)); 128110910SRobert.Harris@Sun.COM while (bytes_remaining && *worda == *wordb) { 128210910SRobert.Harris@Sun.COM worda++; 128310910SRobert.Harris@Sun.COM wordb++; 128410910SRobert.Harris@Sun.COM bytes_remaining--; 128510910SRobert.Harris@Sun.COM } 128610910SRobert.Harris@Sun.COM if (bytes_remaining && 128710910SRobert.Harris@Sun.COM *worda == '\0' && (*wordb == '\t' || *wordb == '\n')) 128810910SRobert.Harris@Sun.COM return (1); 128910910SRobert.Harris@Sun.COM else 129010910SRobert.Harris@Sun.COM return (0); 129110910SRobert.Harris@Sun.COM } 129210910SRobert.Harris@Sun.COM 129310910SRobert.Harris@Sun.COM /* 129410910SRobert.Harris@Sun.COM * mntfs_special_info_string() returns which, if either, of VBLK or VCHR 129510910SRobert.Harris@Sun.COM * corresponds to a supplied path. If the path is a special device then the 129610910SRobert.Harris@Sun.COM * function optionally sets the major and minor numbers. 129710910SRobert.Harris@Sun.COM */ 129810910SRobert.Harris@Sun.COM vtype_t 129910910SRobert.Harris@Sun.COM mntfs_special_info_string(char *path, uint_t *major, uint_t *minor, cred_t *cr) 130010910SRobert.Harris@Sun.COM { 130110910SRobert.Harris@Sun.COM vattr_t vattr; 130210910SRobert.Harris@Sun.COM vnode_t *vp; 130310910SRobert.Harris@Sun.COM vtype_t type; 130410910SRobert.Harris@Sun.COM int error; 130510910SRobert.Harris@Sun.COM 130610910SRobert.Harris@Sun.COM if (path == NULL || *path != '/' || 130710910SRobert.Harris@Sun.COM lookupnameat(path + 1, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) 130810910SRobert.Harris@Sun.COM return (0); 130910910SRobert.Harris@Sun.COM 131010910SRobert.Harris@Sun.COM vattr.va_mask = AT_TYPE | AT_RDEV; 131110910SRobert.Harris@Sun.COM error = VOP_GETATTR(vp, &vattr, ATTR_REAL, cr, NULL); 131210910SRobert.Harris@Sun.COM VN_RELE(vp); 131310910SRobert.Harris@Sun.COM 131410910SRobert.Harris@Sun.COM if (error == 0 && ((type = vattr.va_type) == VBLK || type == VCHR)) { 131510910SRobert.Harris@Sun.COM if (major && minor) { 131610910SRobert.Harris@Sun.COM *major = getmajor(vattr.va_rdev); 131710910SRobert.Harris@Sun.COM *minor = getminor(vattr.va_rdev); 131810910SRobert.Harris@Sun.COM } 131910910SRobert.Harris@Sun.COM return (type); 132010910SRobert.Harris@Sun.COM } else { 132110910SRobert.Harris@Sun.COM return (0); 132210910SRobert.Harris@Sun.COM } 132310910SRobert.Harris@Sun.COM } 132410910SRobert.Harris@Sun.COM 132510910SRobert.Harris@Sun.COM /* 132610910SRobert.Harris@Sun.COM * mntfs_special_info_element() extracts the name of the mounted resource 132710910SRobert.Harris@Sun.COM * for a given element and copies it into a null-terminated string, which it 132810910SRobert.Harris@Sun.COM * then passes to mntfs_special_info_string(). 132910910SRobert.Harris@Sun.COM */ 133010910SRobert.Harris@Sun.COM vtype_t 133110910SRobert.Harris@Sun.COM mntfs_special_info_element(mntelem_t *elemp, cred_t *cr) 133210910SRobert.Harris@Sun.COM { 133310910SRobert.Harris@Sun.COM char *newpath; 133410910SRobert.Harris@Sun.COM vtype_t type; 133510910SRobert.Harris@Sun.COM 133610910SRobert.Harris@Sun.COM newpath = kmem_alloc(elemp->mnte_text_size, KM_SLEEP); 133710910SRobert.Harris@Sun.COM bcopy(elemp->mnte_text, newpath, (off_t)(elemp->mnte_tab.mnt_mountp)); 133810910SRobert.Harris@Sun.COM *(newpath + (off_t)elemp->mnte_tab.mnt_mountp - 1) = '\0'; 133910910SRobert.Harris@Sun.COM type = mntfs_special_info_string(newpath, NULL, NULL, cr); 134010910SRobert.Harris@Sun.COM kmem_free(newpath, elemp->mnte_text_size); 134110910SRobert.Harris@Sun.COM 134210910SRobert.Harris@Sun.COM return (type); 134310910SRobert.Harris@Sun.COM } 134410910SRobert.Harris@Sun.COM 134510910SRobert.Harris@Sun.COM /* 134610910SRobert.Harris@Sun.COM * Convert an address that points to a byte within a user buffer into an 134710910SRobert.Harris@Sun.COM * address that points to the corresponding offset within a kernel buffer. If 134810910SRobert.Harris@Sun.COM * the user address is NULL then make no conversion. If the address does not 134910910SRobert.Harris@Sun.COM * lie within the buffer then reset it to NULL. 135010910SRobert.Harris@Sun.COM */ 135110910SRobert.Harris@Sun.COM char * 135210910SRobert.Harris@Sun.COM mntfs_import_addr(char *uaddr, char *ubufp, char *kbufp, size_t bufsize) 135310910SRobert.Harris@Sun.COM { 135410910SRobert.Harris@Sun.COM if (uaddr < ubufp || uaddr >= ubufp + bufsize) 135510910SRobert.Harris@Sun.COM return (NULL); 135610910SRobert.Harris@Sun.COM else 135710910SRobert.Harris@Sun.COM return (kbufp + (uaddr - ubufp)); 135810910SRobert.Harris@Sun.COM } 135910910SRobert.Harris@Sun.COM 136010910SRobert.Harris@Sun.COM /* 136110910SRobert.Harris@Sun.COM * These 32-bit versions are to support STRUCT_DECL(9F) etc. in 136210910SRobert.Harris@Sun.COM * mntfs_copyout_element() and mntioctl(). 136310910SRobert.Harris@Sun.COM */ 136410910SRobert.Harris@Sun.COM #ifdef _SYSCALL32_IMPL 136510910SRobert.Harris@Sun.COM typedef struct extmnttab32 { 136610910SRobert.Harris@Sun.COM uint32_t mnt_special; 136710910SRobert.Harris@Sun.COM uint32_t mnt_mountp; 136810910SRobert.Harris@Sun.COM uint32_t mnt_fstype; 136910910SRobert.Harris@Sun.COM uint32_t mnt_mntopts; 137010910SRobert.Harris@Sun.COM uint32_t mnt_time; 137110910SRobert.Harris@Sun.COM uint_t mnt_major; 137210910SRobert.Harris@Sun.COM uint_t mnt_minor; 137310910SRobert.Harris@Sun.COM } extmnttab32_t; 137410910SRobert.Harris@Sun.COM 137510910SRobert.Harris@Sun.COM typedef struct mnttab32 { 137610910SRobert.Harris@Sun.COM uint32_t mnt_special; 137710910SRobert.Harris@Sun.COM uint32_t mnt_mountp; 137810910SRobert.Harris@Sun.COM uint32_t mnt_fstype; 137910910SRobert.Harris@Sun.COM uint32_t mnt_mntopts; 138010910SRobert.Harris@Sun.COM uint32_t mnt_time; 138110910SRobert.Harris@Sun.COM } mnttab32_t; 138210910SRobert.Harris@Sun.COM 138310910SRobert.Harris@Sun.COM struct mntentbuf32 { 138410910SRobert.Harris@Sun.COM uint32_t mbuf_emp; 138510910SRobert.Harris@Sun.COM uint_t mbuf_bufsize; 138610910SRobert.Harris@Sun.COM uint32_t mbuf_buf; 138710910SRobert.Harris@Sun.COM }; 138810910SRobert.Harris@Sun.COM #endif 138910910SRobert.Harris@Sun.COM 139010910SRobert.Harris@Sun.COM /* 139110910SRobert.Harris@Sun.COM * mntfs_copyout_element() is common code for the MNTIOC_GETMNTENT, 139210910SRobert.Harris@Sun.COM * MNTIOC_GETEXTMNTENT and MNTIOC_GETMNTANY ioctls. Having identifed the 139310910SRobert.Harris@Sun.COM * database element desired by the user, this function copies out the text and 139410910SRobert.Harris@Sun.COM * the pointers to the relevant userland addresses. It returns 0 on success 139510910SRobert.Harris@Sun.COM * and non-zero otherwise. 139610910SRobert.Harris@Sun.COM */ 139710910SRobert.Harris@Sun.COM int 139810910SRobert.Harris@Sun.COM mntfs_copyout_elem(mntelem_t *elemp, struct extmnttab *uemp, 139910910SRobert.Harris@Sun.COM char *ubufp, int cmd, int datamodel) 140010910SRobert.Harris@Sun.COM { 140110910SRobert.Harris@Sun.COM STRUCT_DECL(extmnttab, ktab); 140210910SRobert.Harris@Sun.COM char *dbbufp = elemp->mnte_text; 140310910SRobert.Harris@Sun.COM size_t dbbufsize = elemp->mnte_text_size; 140410910SRobert.Harris@Sun.COM struct extmnttab *dbtabp = &elemp->mnte_tab; 140510910SRobert.Harris@Sun.COM size_t ssize; 140610910SRobert.Harris@Sun.COM char *kbufp; 140710910SRobert.Harris@Sun.COM int error = 0; 140810910SRobert.Harris@Sun.COM 140910910SRobert.Harris@Sun.COM 141010910SRobert.Harris@Sun.COM /* 141110910SRobert.Harris@Sun.COM * We create a struct extmnttab within the kernel of the size 141210910SRobert.Harris@Sun.COM * determined by the user's data model. We then populate its 141310910SRobert.Harris@Sun.COM * fields by combining the start address of the text buffer 141410910SRobert.Harris@Sun.COM * supplied by the user, ubufp, with the offsets stored for 141510910SRobert.Harris@Sun.COM * this database element within dbtabp, a pointer to a struct 141610910SRobert.Harris@Sun.COM * extmnttab. 141710910SRobert.Harris@Sun.COM * 141810910SRobert.Harris@Sun.COM * Note that if the corresponding field is "-" this signifies 141910910SRobert.Harris@Sun.COM * no real content, and we set the address to NULL. This does 142010910SRobert.Harris@Sun.COM * not apply to mnt_time. 142110910SRobert.Harris@Sun.COM */ 142210910SRobert.Harris@Sun.COM STRUCT_INIT(ktab, datamodel); 142310910SRobert.Harris@Sun.COM STRUCT_FSETP(ktab, mnt_special, 142410910SRobert.Harris@Sun.COM MNTFS_REAL_FIELD(dbbufp) ? ubufp : NULL); 142510910SRobert.Harris@Sun.COM STRUCT_FSETP(ktab, mnt_mountp, 142610910SRobert.Harris@Sun.COM MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_mountp) ? 142710910SRobert.Harris@Sun.COM ubufp + (off_t)dbtabp->mnt_mountp : NULL); 142810910SRobert.Harris@Sun.COM STRUCT_FSETP(ktab, mnt_fstype, 142910910SRobert.Harris@Sun.COM MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_fstype) ? 143010910SRobert.Harris@Sun.COM ubufp + (off_t)dbtabp->mnt_fstype : NULL); 143110910SRobert.Harris@Sun.COM STRUCT_FSETP(ktab, mnt_mntopts, 143210910SRobert.Harris@Sun.COM MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_mntopts) ? 143310910SRobert.Harris@Sun.COM ubufp + (off_t)dbtabp->mnt_mntopts : NULL); 143410910SRobert.Harris@Sun.COM STRUCT_FSETP(ktab, mnt_time, 143510910SRobert.Harris@Sun.COM ubufp + (off_t)dbtabp->mnt_time); 143610910SRobert.Harris@Sun.COM if (cmd == MNTIOC_GETEXTMNTENT) { 143710910SRobert.Harris@Sun.COM STRUCT_FSETP(ktab, mnt_major, dbtabp->mnt_major); 143810910SRobert.Harris@Sun.COM STRUCT_FSETP(ktab, mnt_minor, dbtabp->mnt_minor); 143910910SRobert.Harris@Sun.COM ssize = SIZEOF_STRUCT(extmnttab, datamodel); 144010910SRobert.Harris@Sun.COM } else { 144110910SRobert.Harris@Sun.COM ssize = SIZEOF_STRUCT(mnttab, datamodel); 144210910SRobert.Harris@Sun.COM } 144310910SRobert.Harris@Sun.COM if (copyout(STRUCT_BUF(ktab), uemp, ssize)) 144410910SRobert.Harris@Sun.COM return (EFAULT); 144510910SRobert.Harris@Sun.COM 144610910SRobert.Harris@Sun.COM /* 144710910SRobert.Harris@Sun.COM * We create a text buffer in the kernel into which we copy the 144810910SRobert.Harris@Sun.COM * /etc/mnttab entry for this element. We change the tab and 144910910SRobert.Harris@Sun.COM * new-line delimiters to null bytes before copying out the 145010910SRobert.Harris@Sun.COM * buffer. 145110910SRobert.Harris@Sun.COM */ 145210910SRobert.Harris@Sun.COM kbufp = kmem_alloc(dbbufsize, KM_SLEEP); 145310910SRobert.Harris@Sun.COM bcopy(elemp->mnte_text, kbufp, dbbufsize); 145410910SRobert.Harris@Sun.COM *(kbufp + (off_t)dbtabp->mnt_mountp - 1) = 145510910SRobert.Harris@Sun.COM *(kbufp + (off_t)dbtabp->mnt_fstype - 1) = 145610910SRobert.Harris@Sun.COM *(kbufp + (off_t)dbtabp->mnt_mntopts - 1) = 145710910SRobert.Harris@Sun.COM *(kbufp + (off_t)dbtabp->mnt_time - 1) = 145810910SRobert.Harris@Sun.COM *(kbufp + dbbufsize - 1) = '\0'; 145910910SRobert.Harris@Sun.COM if (copyout(kbufp, ubufp, dbbufsize)) 146010910SRobert.Harris@Sun.COM error = EFAULT; 146110910SRobert.Harris@Sun.COM 146210910SRobert.Harris@Sun.COM kmem_free(kbufp, dbbufsize); 146310910SRobert.Harris@Sun.COM return (error); 146410910SRobert.Harris@Sun.COM } 146510910SRobert.Harris@Sun.COM 14660Sstevel@tonic-gate /* ARGSUSED */ 14670Sstevel@tonic-gate static int 146810910SRobert.Harris@Sun.COM mntioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, cred_t *cr, 146910910SRobert.Harris@Sun.COM int *rvalp, caller_context_t *ct) 14700Sstevel@tonic-gate { 14710Sstevel@tonic-gate uint_t *up = (uint_t *)arg; 14720Sstevel@tonic-gate mntnode_t *mnp = VTOM(vp); 147310910SRobert.Harris@Sun.COM mntsnap_t *snapp = &mnp->mnt_ioctl; 147410910SRobert.Harris@Sun.COM int error = 0; 147510910SRobert.Harris@Sun.COM zone_t *zonep = MTOD(mnp)->mnt_zone; 147610910SRobert.Harris@Sun.COM krwlock_t *dblockp = &zonep->zone_mntfs_db_lock; 147710910SRobert.Harris@Sun.COM model_t datamodel = flag & DATAMODEL_MASK; 14780Sstevel@tonic-gate 14790Sstevel@tonic-gate switch (cmd) { 14800Sstevel@tonic-gate 148110910SRobert.Harris@Sun.COM case MNTIOC_NMNTS: /* get no. of mounted resources */ 148210910SRobert.Harris@Sun.COM { 148310910SRobert.Harris@Sun.COM rw_enter(&mnp->mnt_contents, RW_READER); 148410910SRobert.Harris@Sun.COM if (snapp->mnts_nmnts == 0 || 148510910SRobert.Harris@Sun.COM (snapp->mnts_flags & MNTS_REWIND)) { 14868004SViswanathan.Kannappan@Sun.COM if (!rw_tryupgrade(&mnp->mnt_contents)) { 14878004SViswanathan.Kannappan@Sun.COM rw_exit(&mnp->mnt_contents); 14888004SViswanathan.Kannappan@Sun.COM rw_enter(&mnp->mnt_contents, RW_WRITER); 14898004SViswanathan.Kannappan@Sun.COM } 149010910SRobert.Harris@Sun.COM if (snapp->mnts_nmnts == 0 || 149110910SRobert.Harris@Sun.COM (snapp->mnts_flags & MNTS_REWIND)) 149210910SRobert.Harris@Sun.COM mntfs_snapshot(mnp, snapp); 14930Sstevel@tonic-gate } 149410910SRobert.Harris@Sun.COM rw_exit(&mnp->mnt_contents); 149510910SRobert.Harris@Sun.COM 149610910SRobert.Harris@Sun.COM if (suword32(up, snapp->mnts_nmnts) != 0) 14970Sstevel@tonic-gate error = EFAULT; 14980Sstevel@tonic-gate break; 14990Sstevel@tonic-gate } 15000Sstevel@tonic-gate 150110910SRobert.Harris@Sun.COM case MNTIOC_GETDEVLIST: /* get mounted device major/minor nos */ 150210910SRobert.Harris@Sun.COM { 150310910SRobert.Harris@Sun.COM size_t len; 15040Sstevel@tonic-gate uint_t *devlist; 150510910SRobert.Harris@Sun.COM mntelem_t *elemp; 150610910SRobert.Harris@Sun.COM int i = 0; 15070Sstevel@tonic-gate 150810910SRobert.Harris@Sun.COM rw_enter(&mnp->mnt_contents, RW_READER); 150910910SRobert.Harris@Sun.COM if (snapp->mnts_nmnts == 0 || 151010910SRobert.Harris@Sun.COM (snapp->mnts_flags & MNTS_REWIND)) { 15118004SViswanathan.Kannappan@Sun.COM if (!rw_tryupgrade(&mnp->mnt_contents)) { 15128004SViswanathan.Kannappan@Sun.COM rw_exit(&mnp->mnt_contents); 15138004SViswanathan.Kannappan@Sun.COM rw_enter(&mnp->mnt_contents, RW_WRITER); 15148004SViswanathan.Kannappan@Sun.COM } 151510910SRobert.Harris@Sun.COM if (snapp->mnts_nmnts == 0 || 151610910SRobert.Harris@Sun.COM (snapp->mnts_flags & MNTS_REWIND)) 151710910SRobert.Harris@Sun.COM mntfs_snapshot(mnp, snapp); 15188004SViswanathan.Kannappan@Sun.COM rw_downgrade(&mnp->mnt_contents); 15190Sstevel@tonic-gate } 15200Sstevel@tonic-gate 152110910SRobert.Harris@Sun.COM /* Create a local buffer to hold the device numbers. */ 152210910SRobert.Harris@Sun.COM len = 2 * snapp->mnts_nmnts * sizeof (uint_t); 15230Sstevel@tonic-gate devlist = kmem_alloc(len, KM_SLEEP); 15240Sstevel@tonic-gate 152510910SRobert.Harris@Sun.COM /* 152610910SRobert.Harris@Sun.COM * Walk the database elements for this snapshot and add their 152710910SRobert.Harris@Sun.COM * major and minor numbers. 152810910SRobert.Harris@Sun.COM */ 152910910SRobert.Harris@Sun.COM rw_enter(dblockp, RW_READER); 153010910SRobert.Harris@Sun.COM for (elemp = snapp->mnts_first; elemp; 153110910SRobert.Harris@Sun.COM elemp = mntfs_get_next_elem(snapp, elemp)) { 153210910SRobert.Harris@Sun.COM devlist[2 * i] = elemp->mnte_tab.mnt_major; 153310910SRobert.Harris@Sun.COM devlist[2 * i + 1] = elemp->mnte_tab.mnt_minor; 153410910SRobert.Harris@Sun.COM i++; 153510910SRobert.Harris@Sun.COM } 153610910SRobert.Harris@Sun.COM rw_exit(dblockp); 153710910SRobert.Harris@Sun.COM ASSERT(i == snapp->mnts_nmnts); 153810910SRobert.Harris@Sun.COM rw_exit(&mnp->mnt_contents); 15390Sstevel@tonic-gate 154010910SRobert.Harris@Sun.COM error = xcopyout(devlist, up, len); 15410Sstevel@tonic-gate kmem_free(devlist, len); 15420Sstevel@tonic-gate break; 15430Sstevel@tonic-gate } 15440Sstevel@tonic-gate 15450Sstevel@tonic-gate case MNTIOC_SETTAG: /* set tag on mounted file system */ 15460Sstevel@tonic-gate case MNTIOC_CLRTAG: /* clear tag on mounted file system */ 15470Sstevel@tonic-gate { 15480Sstevel@tonic-gate struct mnttagdesc *dp = (struct mnttagdesc *)arg; 15490Sstevel@tonic-gate STRUCT_DECL(mnttagdesc, tagdesc); 15500Sstevel@tonic-gate char *cptr; 15510Sstevel@tonic-gate uint32_t major, minor; 15520Sstevel@tonic-gate char tagbuf[MAX_MNTOPT_TAG]; 15530Sstevel@tonic-gate char *pbuf; 15540Sstevel@tonic-gate size_t len; 15550Sstevel@tonic-gate uint_t start = 0; 15560Sstevel@tonic-gate mntdata_t *mntdata = MTOD(mnp); 15570Sstevel@tonic-gate zone_t *zone = mntdata->mnt_zone; 15580Sstevel@tonic-gate 15590Sstevel@tonic-gate STRUCT_INIT(tagdesc, flag & DATAMODEL_MASK); 15600Sstevel@tonic-gate if (copyin(dp, STRUCT_BUF(tagdesc), STRUCT_SIZE(tagdesc))) { 15610Sstevel@tonic-gate error = EFAULT; 15620Sstevel@tonic-gate break; 15630Sstevel@tonic-gate } 15640Sstevel@tonic-gate pbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 15650Sstevel@tonic-gate if (zone != global_zone) { 15660Sstevel@tonic-gate (void) strcpy(pbuf, zone->zone_rootpath); 15670Sstevel@tonic-gate /* truncate "/" and nul */ 15680Sstevel@tonic-gate start = zone->zone_rootpathlen - 2; 15690Sstevel@tonic-gate ASSERT(pbuf[start] == '/'); 15700Sstevel@tonic-gate } 15710Sstevel@tonic-gate cptr = STRUCT_FGETP(tagdesc, mtd_mntpt); 15720Sstevel@tonic-gate error = copyinstr(cptr, pbuf + start, MAXPATHLEN - start, &len); 15730Sstevel@tonic-gate if (error) { 15740Sstevel@tonic-gate kmem_free(pbuf, MAXPATHLEN); 15750Sstevel@tonic-gate break; 15760Sstevel@tonic-gate } 15770Sstevel@tonic-gate if (start != 0 && pbuf[start] != '/') { 15780Sstevel@tonic-gate kmem_free(pbuf, MAXPATHLEN); 15790Sstevel@tonic-gate error = EINVAL; 15800Sstevel@tonic-gate break; 15810Sstevel@tonic-gate } 15820Sstevel@tonic-gate cptr = STRUCT_FGETP(tagdesc, mtd_tag); 15830Sstevel@tonic-gate if ((error = copyinstr(cptr, tagbuf, MAX_MNTOPT_TAG, &len))) { 15840Sstevel@tonic-gate kmem_free(pbuf, MAXPATHLEN); 15850Sstevel@tonic-gate break; 15860Sstevel@tonic-gate } 15870Sstevel@tonic-gate major = STRUCT_FGET(tagdesc, mtd_major); 15880Sstevel@tonic-gate minor = STRUCT_FGET(tagdesc, mtd_minor); 15890Sstevel@tonic-gate if (cmd == MNTIOC_SETTAG) 15900Sstevel@tonic-gate error = vfs_settag(major, minor, pbuf, tagbuf, cr); 15910Sstevel@tonic-gate else 15920Sstevel@tonic-gate error = vfs_clrtag(major, minor, pbuf, tagbuf, cr); 15930Sstevel@tonic-gate kmem_free(pbuf, MAXPATHLEN); 15940Sstevel@tonic-gate break; 15950Sstevel@tonic-gate } 15960Sstevel@tonic-gate 15970Sstevel@tonic-gate case MNTIOC_SHOWHIDDEN: 15980Sstevel@tonic-gate { 159911757SRobert.Harris@Sun.COM rw_enter(&mnp->mnt_contents, RW_WRITER); 16000Sstevel@tonic-gate mnp->mnt_flags |= MNT_SHOWHIDDEN; 160111757SRobert.Harris@Sun.COM rw_exit(&mnp->mnt_contents); 16020Sstevel@tonic-gate break; 16030Sstevel@tonic-gate } 16040Sstevel@tonic-gate 160510910SRobert.Harris@Sun.COM case MNTIOC_GETMNTANY: 16060Sstevel@tonic-gate { 160710910SRobert.Harris@Sun.COM STRUCT_DECL(mntentbuf, embuf); /* Our copy of user's embuf */ 160810910SRobert.Harris@Sun.COM STRUCT_DECL(extmnttab, ktab); /* Out copy of user's emp */ 160910910SRobert.Harris@Sun.COM struct extmnttab *uemp; /* uaddr of user's emp */ 161010910SRobert.Harris@Sun.COM char *ubufp; /* uaddr of user's text buf */ 161110910SRobert.Harris@Sun.COM size_t ubufsize; /* size of the above */ 161210910SRobert.Harris@Sun.COM struct extmnttab preftab; /* our version of user's emp */ 161310910SRobert.Harris@Sun.COM char *prefbuf; /* our copy of user's text */ 161410910SRobert.Harris@Sun.COM mntelem_t *elemp; /* a database element */ 161510910SRobert.Harris@Sun.COM struct extmnttab *dbtabp; /* element's extmnttab */ 161610910SRobert.Harris@Sun.COM char *dbbufp; /* element's text buf */ 161710910SRobert.Harris@Sun.COM size_t dbbufsize; /* size of the above */ 161810910SRobert.Harris@Sun.COM vtype_t type; /* type, if any, of special */ 16190Sstevel@tonic-gate 162010910SRobert.Harris@Sun.COM 162110910SRobert.Harris@Sun.COM /* 162210910SRobert.Harris@Sun.COM * embuf is a struct embuf within the kernel. We copy into it 162310910SRobert.Harris@Sun.COM * the struct embuf supplied by the user. 162410910SRobert.Harris@Sun.COM */ 162510910SRobert.Harris@Sun.COM STRUCT_INIT(embuf, datamodel); 162610910SRobert.Harris@Sun.COM if (copyin((void *) arg, STRUCT_BUF(embuf), 162710910SRobert.Harris@Sun.COM STRUCT_SIZE(embuf))) { 162810910SRobert.Harris@Sun.COM error = EFAULT; 162910910SRobert.Harris@Sun.COM break; 16308004SViswanathan.Kannappan@Sun.COM } 163110910SRobert.Harris@Sun.COM uemp = STRUCT_FGETP(embuf, mbuf_emp); 163210910SRobert.Harris@Sun.COM ubufp = STRUCT_FGETP(embuf, mbuf_buf); 163310910SRobert.Harris@Sun.COM ubufsize = STRUCT_FGET(embuf, mbuf_bufsize); 163410910SRobert.Harris@Sun.COM 163510910SRobert.Harris@Sun.COM /* 163610910SRobert.Harris@Sun.COM * Check that the text buffer offered by the user is the 163710910SRobert.Harris@Sun.COM * agreed size. 163810910SRobert.Harris@Sun.COM */ 163910910SRobert.Harris@Sun.COM if (ubufsize != MNT_LINE_MAX) { 164010910SRobert.Harris@Sun.COM error = EINVAL; 164110910SRobert.Harris@Sun.COM break; 16420Sstevel@tonic-gate } 164310910SRobert.Harris@Sun.COM 164410910SRobert.Harris@Sun.COM /* Copy the user-supplied entry into a local buffer. */ 164510910SRobert.Harris@Sun.COM prefbuf = kmem_alloc(MNT_LINE_MAX, KM_SLEEP); 164610910SRobert.Harris@Sun.COM if (copyin(ubufp, prefbuf, MNT_LINE_MAX)) { 164710910SRobert.Harris@Sun.COM kmem_free(prefbuf, MNT_LINE_MAX); 164810910SRobert.Harris@Sun.COM error = EFAULT; 164910910SRobert.Harris@Sun.COM break; 165010910SRobert.Harris@Sun.COM } 165110910SRobert.Harris@Sun.COM 165210910SRobert.Harris@Sun.COM /* Ensure that any string within it is null-terminated. */ 165310910SRobert.Harris@Sun.COM *(prefbuf + MNT_LINE_MAX - 1) = 0; 165410910SRobert.Harris@Sun.COM 165510910SRobert.Harris@Sun.COM /* Copy in the user-supplied mpref */ 165610910SRobert.Harris@Sun.COM STRUCT_INIT(ktab, datamodel); 165710910SRobert.Harris@Sun.COM if (copyin(uemp, STRUCT_BUF(ktab), 165810910SRobert.Harris@Sun.COM SIZEOF_STRUCT(mnttab, datamodel))) { 165910910SRobert.Harris@Sun.COM kmem_free(prefbuf, MNT_LINE_MAX); 166010910SRobert.Harris@Sun.COM error = EFAULT; 166110910SRobert.Harris@Sun.COM break; 16620Sstevel@tonic-gate } 16630Sstevel@tonic-gate 166410910SRobert.Harris@Sun.COM /* 166510910SRobert.Harris@Sun.COM * Copy the members of the user's pref struct into a local 166610910SRobert.Harris@Sun.COM * struct. The pointers need to be offset and verified to 166710910SRobert.Harris@Sun.COM * ensure that they lie within the bounds of the buffer. 166810910SRobert.Harris@Sun.COM */ 166910910SRobert.Harris@Sun.COM preftab.mnt_special = mntfs_import_addr(STRUCT_FGETP(ktab, 167010910SRobert.Harris@Sun.COM mnt_special), ubufp, prefbuf, MNT_LINE_MAX); 167110910SRobert.Harris@Sun.COM preftab.mnt_mountp = mntfs_import_addr(STRUCT_FGETP(ktab, 167210910SRobert.Harris@Sun.COM mnt_mountp), ubufp, prefbuf, MNT_LINE_MAX); 167310910SRobert.Harris@Sun.COM preftab.mnt_fstype = mntfs_import_addr(STRUCT_FGETP(ktab, 167410910SRobert.Harris@Sun.COM mnt_fstype), ubufp, prefbuf, MNT_LINE_MAX); 167510910SRobert.Harris@Sun.COM preftab.mnt_mntopts = mntfs_import_addr(STRUCT_FGETP(ktab, 167610910SRobert.Harris@Sun.COM mnt_mntopts), ubufp, prefbuf, MNT_LINE_MAX); 167710910SRobert.Harris@Sun.COM preftab.mnt_time = mntfs_import_addr(STRUCT_FGETP(ktab, 167810910SRobert.Harris@Sun.COM mnt_time), ubufp, prefbuf, MNT_LINE_MAX); 167910910SRobert.Harris@Sun.COM 168010910SRobert.Harris@Sun.COM /* 168110910SRobert.Harris@Sun.COM * If the user specifies a mounted resource that is a special 168210910SRobert.Harris@Sun.COM * device then we capture its mode and major and minor numbers; 168311757SRobert.Harris@Sun.COM * cf. the block comment below. 168410910SRobert.Harris@Sun.COM */ 168510910SRobert.Harris@Sun.COM type = mntfs_special_info_string(preftab.mnt_special, 168610910SRobert.Harris@Sun.COM &preftab.mnt_major, &preftab.mnt_minor, cr); 168710910SRobert.Harris@Sun.COM 168810910SRobert.Harris@Sun.COM rw_enter(&mnp->mnt_contents, RW_WRITER); 168910910SRobert.Harris@Sun.COM if (snapp->mnts_nmnts == 0 || 169010910SRobert.Harris@Sun.COM (snapp->mnts_flags & MNTS_REWIND)) 169110910SRobert.Harris@Sun.COM mntfs_snapshot(mnp, snapp); 16920Sstevel@tonic-gate 169310910SRobert.Harris@Sun.COM /* 169410910SRobert.Harris@Sun.COM * This is the core functionality that implements getmntany(). 169510910SRobert.Harris@Sun.COM * We walk through the mntfs database until we find an element 169610910SRobert.Harris@Sun.COM * matching the user's preferences that are contained in 169710910SRobert.Harris@Sun.COM * preftab. Typically, this means checking that the text 169810910SRobert.Harris@Sun.COM * matches. However, the mounted resource is special: if the 169910910SRobert.Harris@Sun.COM * user is looking for a special device then we must find a 170010910SRobert.Harris@Sun.COM * database element with the same major and minor numbers and 170110910SRobert.Harris@Sun.COM * the same type, i.e. VBLK or VCHR. The type is not recorded 170210910SRobert.Harris@Sun.COM * in the element because it cannot be inferred from the vfs_t. 170310910SRobert.Harris@Sun.COM * We therefore check the type of suitable candidates via 170410910SRobert.Harris@Sun.COM * mntfs_special_info_element(); since this calls into the 170510910SRobert.Harris@Sun.COM * underlying file system we make sure to drop the database lock 170610910SRobert.Harris@Sun.COM * first. 170710910SRobert.Harris@Sun.COM */ 170810910SRobert.Harris@Sun.COM elemp = snapp->mnts_next; 170910910SRobert.Harris@Sun.COM rw_enter(dblockp, RW_READER); 171010910SRobert.Harris@Sun.COM for (;;) { 171110910SRobert.Harris@Sun.COM for (; elemp; elemp = mntfs_get_next_elem(snapp, 171210910SRobert.Harris@Sun.COM elemp)) { 171310910SRobert.Harris@Sun.COM dbtabp = &elemp->mnte_tab; 171410910SRobert.Harris@Sun.COM dbbufp = elemp->mnte_text; 171510910SRobert.Harris@Sun.COM dbbufsize = elemp->mnte_text_size; 171610910SRobert.Harris@Sun.COM 171710910SRobert.Harris@Sun.COM if (((type && 171810910SRobert.Harris@Sun.COM dbtabp->mnt_major == preftab.mnt_major && 171910910SRobert.Harris@Sun.COM dbtabp->mnt_minor == preftab.mnt_minor && 172010910SRobert.Harris@Sun.COM MNTFS_REAL_FIELD(dbbufp)) || 172110910SRobert.Harris@Sun.COM (!type && (!preftab.mnt_special || 172210910SRobert.Harris@Sun.COM mntfs_same_word(preftab.mnt_special, 172310910SRobert.Harris@Sun.COM prefbuf, MNT_LINE_MAX, (off_t)0, dbbufp, 172410910SRobert.Harris@Sun.COM dbbufsize)))) && 172510910SRobert.Harris@Sun.COM 172610910SRobert.Harris@Sun.COM (!preftab.mnt_mountp || mntfs_same_word( 172710910SRobert.Harris@Sun.COM preftab.mnt_mountp, prefbuf, MNT_LINE_MAX, 172810910SRobert.Harris@Sun.COM (off_t)dbtabp->mnt_mountp, dbbufp, 172910910SRobert.Harris@Sun.COM dbbufsize)) && 173010910SRobert.Harris@Sun.COM 173110910SRobert.Harris@Sun.COM (!preftab.mnt_fstype || mntfs_same_word( 173210910SRobert.Harris@Sun.COM preftab.mnt_fstype, prefbuf, MNT_LINE_MAX, 173310910SRobert.Harris@Sun.COM (off_t)dbtabp->mnt_fstype, dbbufp, 173410910SRobert.Harris@Sun.COM dbbufsize)) && 173510910SRobert.Harris@Sun.COM 173610910SRobert.Harris@Sun.COM (!preftab.mnt_mntopts || mntfs_same_word( 173710910SRobert.Harris@Sun.COM preftab.mnt_mntopts, prefbuf, MNT_LINE_MAX, 173810910SRobert.Harris@Sun.COM (off_t)dbtabp->mnt_mntopts, dbbufp, 173910910SRobert.Harris@Sun.COM dbbufsize)) && 174010910SRobert.Harris@Sun.COM 174110910SRobert.Harris@Sun.COM (!preftab.mnt_time || mntfs_same_word( 174210910SRobert.Harris@Sun.COM preftab.mnt_time, prefbuf, MNT_LINE_MAX, 174310910SRobert.Harris@Sun.COM (off_t)dbtabp->mnt_time, dbbufp, 174410910SRobert.Harris@Sun.COM dbbufsize))) 174510910SRobert.Harris@Sun.COM break; 174610910SRobert.Harris@Sun.COM } 174710910SRobert.Harris@Sun.COM rw_exit(dblockp); 174810910SRobert.Harris@Sun.COM 174910910SRobert.Harris@Sun.COM if (elemp == NULL || type == 0 || 175010910SRobert.Harris@Sun.COM type == mntfs_special_info_element(elemp, cr)) 175110910SRobert.Harris@Sun.COM break; 175210910SRobert.Harris@Sun.COM 175310910SRobert.Harris@Sun.COM rw_enter(dblockp, RW_READER); 175410910SRobert.Harris@Sun.COM elemp = mntfs_get_next_elem(snapp, elemp); 17558004SViswanathan.Kannappan@Sun.COM } 17560Sstevel@tonic-gate 175710910SRobert.Harris@Sun.COM kmem_free(prefbuf, MNT_LINE_MAX); 175810910SRobert.Harris@Sun.COM 175910910SRobert.Harris@Sun.COM /* If we failed to find a match then return EOF. */ 176010910SRobert.Harris@Sun.COM if (elemp == NULL) { 176110910SRobert.Harris@Sun.COM rw_exit(&mnp->mnt_contents); 176210910SRobert.Harris@Sun.COM *rvalp = MNTFS_EOF; 176310910SRobert.Harris@Sun.COM break; 176410910SRobert.Harris@Sun.COM } 176510910SRobert.Harris@Sun.COM 176610910SRobert.Harris@Sun.COM /* 176710910SRobert.Harris@Sun.COM * Check that the text buffer offered by the user will be large 176810910SRobert.Harris@Sun.COM * enough to accommodate the text for this entry. 176910910SRobert.Harris@Sun.COM */ 177010910SRobert.Harris@Sun.COM if (elemp->mnte_text_size > MNT_LINE_MAX) { 177110910SRobert.Harris@Sun.COM rw_exit(&mnp->mnt_contents); 177210910SRobert.Harris@Sun.COM *rvalp = MNTFS_TOOLONG; 177310910SRobert.Harris@Sun.COM break; 177410910SRobert.Harris@Sun.COM } 177510910SRobert.Harris@Sun.COM 177610910SRobert.Harris@Sun.COM /* 177710910SRobert.Harris@Sun.COM * Populate the user's struct mnttab and text buffer using the 177810910SRobert.Harris@Sun.COM * element's contents. 177910910SRobert.Harris@Sun.COM */ 178010910SRobert.Harris@Sun.COM if (mntfs_copyout_elem(elemp, uemp, ubufp, cmd, datamodel)) { 178110910SRobert.Harris@Sun.COM error = EFAULT; 178210910SRobert.Harris@Sun.COM } else { 178310910SRobert.Harris@Sun.COM rw_enter(dblockp, RW_READER); 178410910SRobert.Harris@Sun.COM elemp = mntfs_get_next_elem(snapp, elemp); 178510910SRobert.Harris@Sun.COM rw_exit(dblockp); 178610910SRobert.Harris@Sun.COM snapp->mnts_next = elemp; 178710910SRobert.Harris@Sun.COM } 178810910SRobert.Harris@Sun.COM rw_exit(&mnp->mnt_contents); 178910910SRobert.Harris@Sun.COM break; 179010910SRobert.Harris@Sun.COM } 179110910SRobert.Harris@Sun.COM 179210910SRobert.Harris@Sun.COM case MNTIOC_GETMNTENT: 179310910SRobert.Harris@Sun.COM case MNTIOC_GETEXTMNTENT: 179410910SRobert.Harris@Sun.COM { 179510910SRobert.Harris@Sun.COM STRUCT_DECL(mntentbuf, embuf); /* Our copy of user's embuf */ 179610910SRobert.Harris@Sun.COM struct extmnttab *uemp; /* uaddr of user's emp */ 179710910SRobert.Harris@Sun.COM char *ubufp; /* uaddr of user's text buf */ 179810910SRobert.Harris@Sun.COM size_t ubufsize; /* size of the above */ 179910910SRobert.Harris@Sun.COM mntelem_t *elemp; /* a database element */ 180010910SRobert.Harris@Sun.COM 180110910SRobert.Harris@Sun.COM 180210910SRobert.Harris@Sun.COM rw_enter(&mnp->mnt_contents, RW_WRITER); 180310910SRobert.Harris@Sun.COM if (snapp->mnts_nmnts == 0 || 180410910SRobert.Harris@Sun.COM (snapp->mnts_flags & MNTS_REWIND)) 180510910SRobert.Harris@Sun.COM mntfs_snapshot(mnp, snapp); 180610910SRobert.Harris@Sun.COM if ((elemp = snapp->mnts_next) == NULL) { 180710910SRobert.Harris@Sun.COM rw_exit(&mnp->mnt_contents); 180810910SRobert.Harris@Sun.COM *rvalp = MNTFS_EOF; 180910910SRobert.Harris@Sun.COM break; 181010910SRobert.Harris@Sun.COM } 181110910SRobert.Harris@Sun.COM 181210910SRobert.Harris@Sun.COM /* 181310910SRobert.Harris@Sun.COM * embuf is a struct embuf within the kernel. We copy into it 181410910SRobert.Harris@Sun.COM * the struct embuf supplied by the user. 181510910SRobert.Harris@Sun.COM */ 181610910SRobert.Harris@Sun.COM STRUCT_INIT(embuf, datamodel); 181710910SRobert.Harris@Sun.COM if (copyin((void *) arg, STRUCT_BUF(embuf), 181810910SRobert.Harris@Sun.COM STRUCT_SIZE(embuf))) { 181910910SRobert.Harris@Sun.COM rw_exit(&mnp->mnt_contents); 182010910SRobert.Harris@Sun.COM error = EFAULT; 182110910SRobert.Harris@Sun.COM break; 182210910SRobert.Harris@Sun.COM } 182310910SRobert.Harris@Sun.COM uemp = STRUCT_FGETP(embuf, mbuf_emp); 182410910SRobert.Harris@Sun.COM ubufp = STRUCT_FGETP(embuf, mbuf_buf); 182510910SRobert.Harris@Sun.COM ubufsize = STRUCT_FGET(embuf, mbuf_bufsize); 182610910SRobert.Harris@Sun.COM 182710910SRobert.Harris@Sun.COM /* 182810910SRobert.Harris@Sun.COM * Check that the text buffer offered by the user will be large 182910910SRobert.Harris@Sun.COM * enough to accommodate the text for this entry. 183010910SRobert.Harris@Sun.COM */ 183110910SRobert.Harris@Sun.COM if (elemp->mnte_text_size > ubufsize) { 183210910SRobert.Harris@Sun.COM rw_exit(&mnp->mnt_contents); 183310910SRobert.Harris@Sun.COM *rvalp = MNTFS_TOOLONG; 183410910SRobert.Harris@Sun.COM break; 183510910SRobert.Harris@Sun.COM } 183610910SRobert.Harris@Sun.COM 183710910SRobert.Harris@Sun.COM /* 183810910SRobert.Harris@Sun.COM * Populate the user's struct mnttab and text buffer using the 183910910SRobert.Harris@Sun.COM * element's contents. 184010910SRobert.Harris@Sun.COM */ 184110910SRobert.Harris@Sun.COM if (mntfs_copyout_elem(elemp, uemp, ubufp, cmd, datamodel)) { 184210910SRobert.Harris@Sun.COM error = EFAULT; 184310910SRobert.Harris@Sun.COM } else { 184410910SRobert.Harris@Sun.COM rw_enter(dblockp, RW_READER); 184510910SRobert.Harris@Sun.COM elemp = mntfs_get_next_elem(snapp, elemp); 184610910SRobert.Harris@Sun.COM rw_exit(dblockp); 184710910SRobert.Harris@Sun.COM snapp->mnts_next = elemp; 184810910SRobert.Harris@Sun.COM } 184910910SRobert.Harris@Sun.COM rw_exit(&mnp->mnt_contents); 18500Sstevel@tonic-gate break; 18510Sstevel@tonic-gate } 18520Sstevel@tonic-gate 18530Sstevel@tonic-gate default: 18540Sstevel@tonic-gate error = EINVAL; 18550Sstevel@tonic-gate break; 18560Sstevel@tonic-gate } 18570Sstevel@tonic-gate 18580Sstevel@tonic-gate return (error); 18590Sstevel@tonic-gate } 18600Sstevel@tonic-gate 18610Sstevel@tonic-gate /* 186211757SRobert.Harris@Sun.COM * mntfs provides a new vnode for each open(2). Two vnodes will represent the 186311757SRobert.Harris@Sun.COM * same instance of /etc/mnttab if they share the same (zone-specific) vfs. 186411757SRobert.Harris@Sun.COM */ 186511757SRobert.Harris@Sun.COM /* ARGSUSED */ 186611757SRobert.Harris@Sun.COM int 186711757SRobert.Harris@Sun.COM mntcmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct) 186811757SRobert.Harris@Sun.COM { 186911757SRobert.Harris@Sun.COM return (vp1 != NULL && vp2 != NULL && vp1->v_vfsp == vp2->v_vfsp); 187011757SRobert.Harris@Sun.COM } 187111757SRobert.Harris@Sun.COM 187211757SRobert.Harris@Sun.COM /* 18730Sstevel@tonic-gate * /mntfs vnode operations vector 18740Sstevel@tonic-gate */ 18750Sstevel@tonic-gate const fs_operation_def_t mnt_vnodeops_template[] = { 18763898Srsb VOPNAME_OPEN, { .vop_open = mntopen }, 18773898Srsb VOPNAME_CLOSE, { .vop_close = mntclose }, 18783898Srsb VOPNAME_READ, { .vop_read = mntread }, 18793898Srsb VOPNAME_IOCTL, { .vop_ioctl = mntioctl }, 18803898Srsb VOPNAME_GETATTR, { .vop_getattr = mntgetattr }, 18813898Srsb VOPNAME_ACCESS, { .vop_access = mntaccess }, 18823898Srsb VOPNAME_FSYNC, { .vop_fsync = mntfsync }, 18833898Srsb VOPNAME_INACTIVE, { .vop_inactive = mntinactive }, 18843898Srsb VOPNAME_SEEK, { .vop_seek = mntseek }, 18853898Srsb VOPNAME_POLL, { .vop_poll = mntpoll }, 188611757SRobert.Harris@Sun.COM VOPNAME_CMP, { .vop_cmp = mntcmp }, 18873898Srsb VOPNAME_DISPOSE, { .error = fs_error }, 18883898Srsb VOPNAME_SHRLOCK, { .error = fs_error }, 18893898Srsb NULL, NULL 18900Sstevel@tonic-gate }; 1891