xref: /onnv-gate/usr/src/uts/common/fs/nfs/nfs_subr.c (revision 11888:542e7ffc22d6)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51676Sjpk  * Common Development and Distribution License (the "License").
61676Sjpk  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*11888SPavel.Filipensky@Sun.COM  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #include <sys/param.h>
270Sstevel@tonic-gate #include <sys/types.h>
280Sstevel@tonic-gate #include <sys/systm.h>
2911134SCasper.Dik@Sun.COM #include <sys/cred.h>
300Sstevel@tonic-gate #include <sys/proc.h>
310Sstevel@tonic-gate #include <sys/user.h>
320Sstevel@tonic-gate #include <sys/time.h>
330Sstevel@tonic-gate #include <sys/buf.h>
340Sstevel@tonic-gate #include <sys/vfs.h>
350Sstevel@tonic-gate #include <sys/vnode.h>
360Sstevel@tonic-gate #include <sys/socket.h>
370Sstevel@tonic-gate #include <sys/uio.h>
380Sstevel@tonic-gate #include <sys/tiuser.h>
390Sstevel@tonic-gate #include <sys/swap.h>
400Sstevel@tonic-gate #include <sys/errno.h>
410Sstevel@tonic-gate #include <sys/debug.h>
420Sstevel@tonic-gate #include <sys/kmem.h>
430Sstevel@tonic-gate #include <sys/kstat.h>
440Sstevel@tonic-gate #include <sys/cmn_err.h>
450Sstevel@tonic-gate #include <sys/vtrace.h>
460Sstevel@tonic-gate #include <sys/session.h>
470Sstevel@tonic-gate #include <sys/dnlc.h>
480Sstevel@tonic-gate #include <sys/bitmap.h>
490Sstevel@tonic-gate #include <sys/acl.h>
500Sstevel@tonic-gate #include <sys/ddi.h>
510Sstevel@tonic-gate #include <sys/pathname.h>
520Sstevel@tonic-gate #include <sys/flock.h>
530Sstevel@tonic-gate #include <sys/dirent.h>
540Sstevel@tonic-gate #include <sys/flock.h>
550Sstevel@tonic-gate #include <sys/callb.h>
560Sstevel@tonic-gate #include <sys/atomic.h>
570Sstevel@tonic-gate #include <sys/list.h>
581676Sjpk #include <sys/tsol/tnet.h>
591676Sjpk #include <sys/priv.h>
604971Sjarrett #include <sys/sdt.h>
617067Smarks #include <sys/attr.h>
621676Sjpk 
631676Sjpk #include <inet/ip6.h>
640Sstevel@tonic-gate 
650Sstevel@tonic-gate #include <rpc/types.h>
660Sstevel@tonic-gate #include <rpc/xdr.h>
670Sstevel@tonic-gate #include <rpc/auth.h>
680Sstevel@tonic-gate #include <rpc/clnt.h>
690Sstevel@tonic-gate 
700Sstevel@tonic-gate #include <nfs/nfs.h>
710Sstevel@tonic-gate #include <nfs/nfs4.h>
720Sstevel@tonic-gate #include <nfs/nfs_clnt.h>
730Sstevel@tonic-gate #include <nfs/rnode.h>
740Sstevel@tonic-gate #include <nfs/nfs_acl.h>
750Sstevel@tonic-gate 
764971Sjarrett #include <sys/tsol/label.h>
774971Sjarrett 
780Sstevel@tonic-gate /*
790Sstevel@tonic-gate  * The hash queues for the access to active and cached rnodes
800Sstevel@tonic-gate  * are organized as doubly linked lists.  A reader/writer lock
810Sstevel@tonic-gate  * for each hash bucket is used to control access and to synchronize
820Sstevel@tonic-gate  * lookups, additions, and deletions from the hash queue.
830Sstevel@tonic-gate  *
840Sstevel@tonic-gate  * The rnode freelist is organized as a doubly linked list with
850Sstevel@tonic-gate  * a head pointer.  Additions and deletions are synchronized via
860Sstevel@tonic-gate  * a single mutex.
870Sstevel@tonic-gate  *
880Sstevel@tonic-gate  * In order to add an rnode to the free list, it must be hashed into
890Sstevel@tonic-gate  * a hash queue and the exclusive lock to the hash queue be held.
900Sstevel@tonic-gate  * If an rnode is not hashed into a hash queue, then it is destroyed
910Sstevel@tonic-gate  * because it represents no valuable information that can be reused
920Sstevel@tonic-gate  * about the file.  The exclusive lock to the hash queue must be
930Sstevel@tonic-gate  * held in order to prevent a lookup in the hash queue from finding
940Sstevel@tonic-gate  * the rnode and using it and assuming that the rnode is not on the
950Sstevel@tonic-gate  * freelist.  The lookup in the hash queue will have the hash queue
960Sstevel@tonic-gate  * locked, either exclusive or shared.
970Sstevel@tonic-gate  *
980Sstevel@tonic-gate  * The vnode reference count for each rnode is not allowed to drop
990Sstevel@tonic-gate  * below 1.  This prevents external entities, such as the VM
1000Sstevel@tonic-gate  * subsystem, from acquiring references to vnodes already on the
1010Sstevel@tonic-gate  * freelist and then trying to place them back on the freelist
1020Sstevel@tonic-gate  * when their reference is released.  This means that the when an
1030Sstevel@tonic-gate  * rnode is looked up in the hash queues, then either the rnode
1045331Samw  * is removed from the freelist and that reference is transferred to
1050Sstevel@tonic-gate  * the new reference or the vnode reference count must be incremented
1060Sstevel@tonic-gate  * accordingly.  The mutex for the freelist must be held in order to
1070Sstevel@tonic-gate  * accurately test to see if the rnode is on the freelist or not.
1080Sstevel@tonic-gate  * The hash queue lock might be held shared and it is possible that
1090Sstevel@tonic-gate  * two different threads may race to remove the rnode from the
1100Sstevel@tonic-gate  * freelist.  This race can be resolved by holding the mutex for the
1110Sstevel@tonic-gate  * freelist.  Please note that the mutex for the freelist does not
1120Sstevel@tonic-gate  * need to held if the rnode is not on the freelist.  It can not be
1130Sstevel@tonic-gate  * placed on the freelist due to the requirement that the thread
1140Sstevel@tonic-gate  * putting the rnode on the freelist must hold the exclusive lock
1150Sstevel@tonic-gate  * to the hash queue and the thread doing the lookup in the hash
1160Sstevel@tonic-gate  * queue is holding either a shared or exclusive lock to the hash
1170Sstevel@tonic-gate  * queue.
1180Sstevel@tonic-gate  *
1190Sstevel@tonic-gate  * The lock ordering is:
1200Sstevel@tonic-gate  *
1210Sstevel@tonic-gate  *	hash bucket lock -> vnode lock
1220Sstevel@tonic-gate  *	hash bucket lock -> freelist lock
1230Sstevel@tonic-gate  */
1240Sstevel@tonic-gate static rhashq_t *rtable;
1250Sstevel@tonic-gate 
1260Sstevel@tonic-gate static kmutex_t rpfreelist_lock;
1270Sstevel@tonic-gate static rnode_t *rpfreelist = NULL;
1280Sstevel@tonic-gate static long rnew = 0;
1290Sstevel@tonic-gate long nrnode = 0;
1300Sstevel@tonic-gate 
1310Sstevel@tonic-gate static int rtablesize;
1320Sstevel@tonic-gate static int rtablemask;
1330Sstevel@tonic-gate 
1340Sstevel@tonic-gate static int hashlen = 4;
1350Sstevel@tonic-gate 
1360Sstevel@tonic-gate static struct kmem_cache *rnode_cache;
1370Sstevel@tonic-gate 
1380Sstevel@tonic-gate /*
1390Sstevel@tonic-gate  * Mutex to protect the following variables:
1400Sstevel@tonic-gate  *	nfs_major
1410Sstevel@tonic-gate  *	nfs_minor
1420Sstevel@tonic-gate  */
1430Sstevel@tonic-gate kmutex_t nfs_minor_lock;
1440Sstevel@tonic-gate int nfs_major;
1450Sstevel@tonic-gate int nfs_minor;
1460Sstevel@tonic-gate 
1470Sstevel@tonic-gate /* Do we allow preepoch (negative) time values otw? */
1480Sstevel@tonic-gate bool_t nfs_allow_preepoch_time = FALSE;	/* default: do not allow preepoch */
1490Sstevel@tonic-gate 
1500Sstevel@tonic-gate /*
1510Sstevel@tonic-gate  * Access cache
1520Sstevel@tonic-gate  */
1530Sstevel@tonic-gate static acache_hash_t *acache;
1540Sstevel@tonic-gate static long nacache;	/* used strictly to size the number of hash queues */
1550Sstevel@tonic-gate 
1560Sstevel@tonic-gate static int acachesize;
1570Sstevel@tonic-gate static int acachemask;
1580Sstevel@tonic-gate static struct kmem_cache *acache_cache;
1590Sstevel@tonic-gate 
1600Sstevel@tonic-gate /*
1610Sstevel@tonic-gate  * Client side utilities
1620Sstevel@tonic-gate  */
1630Sstevel@tonic-gate 
1640Sstevel@tonic-gate /*
1650Sstevel@tonic-gate  * client side statistics
1660Sstevel@tonic-gate  */
1670Sstevel@tonic-gate static const struct clstat clstat_tmpl = {
1680Sstevel@tonic-gate 	{ "calls",	KSTAT_DATA_UINT64 },
1690Sstevel@tonic-gate 	{ "badcalls",	KSTAT_DATA_UINT64 },
1700Sstevel@tonic-gate 	{ "clgets",	KSTAT_DATA_UINT64 },
1710Sstevel@tonic-gate 	{ "cltoomany",	KSTAT_DATA_UINT64 },
1720Sstevel@tonic-gate #ifdef DEBUG
1730Sstevel@tonic-gate 	{ "clalloc",	KSTAT_DATA_UINT64 },
1740Sstevel@tonic-gate 	{ "noresponse",	KSTAT_DATA_UINT64 },
1750Sstevel@tonic-gate 	{ "failover",	KSTAT_DATA_UINT64 },
1760Sstevel@tonic-gate 	{ "remap",	KSTAT_DATA_UINT64 },
1770Sstevel@tonic-gate #endif
1780Sstevel@tonic-gate };
1790Sstevel@tonic-gate 
1800Sstevel@tonic-gate /*
1810Sstevel@tonic-gate  * The following are statistics that describe behavior of the system as a whole
1820Sstevel@tonic-gate  * and doesn't correspond to any one particular zone.
1830Sstevel@tonic-gate  */
1840Sstevel@tonic-gate #ifdef DEBUG
1850Sstevel@tonic-gate static struct clstat_debug {
1860Sstevel@tonic-gate 	kstat_named_t	nrnode;			/* number of allocated rnodes */
1870Sstevel@tonic-gate 	kstat_named_t	access;			/* size of access cache */
1880Sstevel@tonic-gate 	kstat_named_t	dirent;			/* size of readdir cache */
1890Sstevel@tonic-gate 	kstat_named_t	dirents;		/* size of readdir buf cache */
1900Sstevel@tonic-gate 	kstat_named_t	reclaim;		/* number of reclaims */
1910Sstevel@tonic-gate 	kstat_named_t	clreclaim;		/* number of cl reclaims */
1920Sstevel@tonic-gate 	kstat_named_t	f_reclaim;		/* number of free reclaims */
1930Sstevel@tonic-gate 	kstat_named_t	a_reclaim;		/* number of active reclaims */
1940Sstevel@tonic-gate 	kstat_named_t	r_reclaim;		/* number of rnode reclaims */
1950Sstevel@tonic-gate 	kstat_named_t	rpath;			/* bytes used to store rpaths */
1960Sstevel@tonic-gate } clstat_debug = {
1970Sstevel@tonic-gate 	{ "nrnode",	KSTAT_DATA_UINT64 },
1980Sstevel@tonic-gate 	{ "access",	KSTAT_DATA_UINT64 },
1990Sstevel@tonic-gate 	{ "dirent",	KSTAT_DATA_UINT64 },
2000Sstevel@tonic-gate 	{ "dirents",	KSTAT_DATA_UINT64 },
2010Sstevel@tonic-gate 	{ "reclaim",	KSTAT_DATA_UINT64 },
2020Sstevel@tonic-gate 	{ "clreclaim",	KSTAT_DATA_UINT64 },
2030Sstevel@tonic-gate 	{ "f_reclaim",	KSTAT_DATA_UINT64 },
2040Sstevel@tonic-gate 	{ "a_reclaim",	KSTAT_DATA_UINT64 },
2050Sstevel@tonic-gate 	{ "r_reclaim",	KSTAT_DATA_UINT64 },
2060Sstevel@tonic-gate 	{ "r_path",	KSTAT_DATA_UINT64 },
2070Sstevel@tonic-gate };
2080Sstevel@tonic-gate #endif	/* DEBUG */
2090Sstevel@tonic-gate 
2100Sstevel@tonic-gate /*
2110Sstevel@tonic-gate  * We keep a global list of per-zone client data, so we can clean up all zones
2120Sstevel@tonic-gate  * if we get low on memory.
2130Sstevel@tonic-gate  */
2140Sstevel@tonic-gate static list_t nfs_clnt_list;
2150Sstevel@tonic-gate static kmutex_t nfs_clnt_list_lock;
2160Sstevel@tonic-gate static zone_key_t nfsclnt_zone_key;
2170Sstevel@tonic-gate 
2180Sstevel@tonic-gate static struct kmem_cache *chtab_cache;
2190Sstevel@tonic-gate 
2200Sstevel@tonic-gate /*
2210Sstevel@tonic-gate  * Some servers do not properly update the attributes of the
2220Sstevel@tonic-gate  * directory when changes are made.  To allow interoperability
2230Sstevel@tonic-gate  * with these broken servers, the nfs_disable_rddir_cache
2240Sstevel@tonic-gate  * parameter must be set in /etc/system
2250Sstevel@tonic-gate  */
2260Sstevel@tonic-gate int nfs_disable_rddir_cache = 0;
2270Sstevel@tonic-gate 
2280Sstevel@tonic-gate int		clget(clinfo_t *, servinfo_t *, cred_t *, CLIENT **,
2290Sstevel@tonic-gate 		    struct chtab **);
2300Sstevel@tonic-gate void		clfree(CLIENT *, struct chtab *);
2310Sstevel@tonic-gate static int	acl_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **,
2320Sstevel@tonic-gate 		    struct chtab **, struct nfs_clnt *);
2330Sstevel@tonic-gate static int	nfs_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **,
2340Sstevel@tonic-gate 		    struct chtab **, struct nfs_clnt *);
2350Sstevel@tonic-gate static void	clreclaim(void *);
2360Sstevel@tonic-gate static int	nfs_feedback(int, int, mntinfo_t *);
2370Sstevel@tonic-gate static int	rfscall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t,
2380Sstevel@tonic-gate 		    caddr_t, cred_t *, int *, enum clnt_stat *, int,
2390Sstevel@tonic-gate 		    failinfo_t *);
2400Sstevel@tonic-gate static int	aclcall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t,
2410Sstevel@tonic-gate 		    caddr_t, cred_t *, int *, int, failinfo_t *);
2420Sstevel@tonic-gate static void	rinactive(rnode_t *, cred_t *);
2430Sstevel@tonic-gate static int	rtablehash(nfs_fhandle *);
2440Sstevel@tonic-gate static vnode_t	*make_rnode(nfs_fhandle *, rhashq_t *, struct vfs *,
2450Sstevel@tonic-gate 		    struct vnodeops *,
2460Sstevel@tonic-gate 		    int (*)(vnode_t *, page_t *, u_offset_t *, size_t *, int,
2470Sstevel@tonic-gate 			cred_t *),
2480Sstevel@tonic-gate 		    int (*)(const void *, const void *), int *, cred_t *,
2490Sstevel@tonic-gate 		    char *, char *);
2500Sstevel@tonic-gate static void	rp_rmfree(rnode_t *);
2510Sstevel@tonic-gate static void	rp_addhash(rnode_t *);
2520Sstevel@tonic-gate static void	rp_rmhash_locked(rnode_t *);
2530Sstevel@tonic-gate static rnode_t	*rfind(rhashq_t *, nfs_fhandle *, struct vfs *);
2540Sstevel@tonic-gate static void	destroy_rnode(rnode_t *);
2550Sstevel@tonic-gate static void	rddir_cache_free(rddir_cache *);
2560Sstevel@tonic-gate static int	nfs_free_data_reclaim(rnode_t *);
2570Sstevel@tonic-gate static int	nfs_active_data_reclaim(rnode_t *);
2580Sstevel@tonic-gate static int	nfs_free_reclaim(void);
2590Sstevel@tonic-gate static int	nfs_active_reclaim(void);
2600Sstevel@tonic-gate static int	nfs_rnode_reclaim(void);
2610Sstevel@tonic-gate static void	nfs_reclaim(void *);
2620Sstevel@tonic-gate static int	failover_safe(failinfo_t *);
2630Sstevel@tonic-gate static void	failover_newserver(mntinfo_t *mi);
2640Sstevel@tonic-gate static void	failover_thread(mntinfo_t *mi);
2650Sstevel@tonic-gate static int	failover_wait(mntinfo_t *);
2660Sstevel@tonic-gate static int	failover_remap(failinfo_t *);
2670Sstevel@tonic-gate static int	failover_lookup(char *, vnode_t *,
2680Sstevel@tonic-gate 		    int (*)(vnode_t *, char *, vnode_t **,
2690Sstevel@tonic-gate 			struct pathname *, int, vnode_t *, cred_t *, int),
2700Sstevel@tonic-gate 		    int (*)(vnode_t *, vnode_t **, bool_t, cred_t *, int),
2710Sstevel@tonic-gate 		    vnode_t **);
2720Sstevel@tonic-gate static void	nfs_free_r_path(rnode_t *);
2730Sstevel@tonic-gate static void	nfs_set_vroot(vnode_t *);
2740Sstevel@tonic-gate static char	*nfs_getsrvnames(mntinfo_t *, size_t *);
2750Sstevel@tonic-gate 
2760Sstevel@tonic-gate /*
2770Sstevel@tonic-gate  * from rpcsec module (common/rpcsec)
2780Sstevel@tonic-gate  */
2790Sstevel@tonic-gate extern int sec_clnt_geth(CLIENT *, struct sec_data *, cred_t *, AUTH **);
2800Sstevel@tonic-gate extern void sec_clnt_freeh(AUTH *);
2810Sstevel@tonic-gate extern void sec_clnt_freeinfo(struct sec_data *);
2820Sstevel@tonic-gate 
2830Sstevel@tonic-gate /*
2841676Sjpk  * used in mount policy
2851676Sjpk  */
2861676Sjpk extern ts_label_t *getflabel_cipso(vfs_t *);
2871676Sjpk 
2881676Sjpk /*
2890Sstevel@tonic-gate  * EIO or EINTR are not recoverable errors.
2900Sstevel@tonic-gate  */
2910Sstevel@tonic-gate #define	IS_RECOVERABLE_ERROR(error)	!((error == EINTR) || (error == EIO))
2920Sstevel@tonic-gate 
2939675Sdai.ngo@sun.com #ifdef DEBUG
2949675Sdai.ngo@sun.com #define	SRV_QFULL_MSG	"send queue to NFS%d server %s is full; still trying\n"
2959675Sdai.ngo@sun.com #define	SRV_NOTRESP_MSG	"NFS%d server %s not responding still trying\n"
2969675Sdai.ngo@sun.com #else
2979675Sdai.ngo@sun.com #define	SRV_QFULL_MSG	"send queue to NFS server %s is full still trying\n"
2989675Sdai.ngo@sun.com #define	SRV_NOTRESP_MSG	"NFS server %s not responding still trying\n"
2999675Sdai.ngo@sun.com #endif
3000Sstevel@tonic-gate /*
3010Sstevel@tonic-gate  * Common handle get program for NFS, NFS ACL, and NFS AUTH client.
3020Sstevel@tonic-gate  */
3030Sstevel@tonic-gate static int
clget_impl(clinfo_t * ci,servinfo_t * svp,cred_t * cr,CLIENT ** newcl,struct chtab ** chp,struct nfs_clnt * nfscl)3040Sstevel@tonic-gate clget_impl(clinfo_t *ci, servinfo_t *svp, cred_t *cr, CLIENT **newcl,
3050Sstevel@tonic-gate     struct chtab **chp, struct nfs_clnt *nfscl)
3060Sstevel@tonic-gate {
3070Sstevel@tonic-gate 	struct chhead *ch, *newch;
3080Sstevel@tonic-gate 	struct chhead **plistp;
3090Sstevel@tonic-gate 	struct chtab *cp;
3100Sstevel@tonic-gate 	int error;
3110Sstevel@tonic-gate 	k_sigset_t smask;
3120Sstevel@tonic-gate 
3130Sstevel@tonic-gate 	if (newcl == NULL || chp == NULL || ci == NULL)
3140Sstevel@tonic-gate 		return (EINVAL);
3150Sstevel@tonic-gate 
3160Sstevel@tonic-gate 	*newcl = NULL;
3170Sstevel@tonic-gate 	*chp = NULL;
3180Sstevel@tonic-gate 
3190Sstevel@tonic-gate 	/*
3200Sstevel@tonic-gate 	 * Find an unused handle or create one
3210Sstevel@tonic-gate 	 */
3220Sstevel@tonic-gate 	newch = NULL;
3230Sstevel@tonic-gate 	nfscl->nfscl_stat.clgets.value.ui64++;
3240Sstevel@tonic-gate top:
3250Sstevel@tonic-gate 	/*
3260Sstevel@tonic-gate 	 * Find the correct entry in the cache to check for free
3270Sstevel@tonic-gate 	 * client handles.  The search is based on the RPC program
3280Sstevel@tonic-gate 	 * number, program version number, dev_t for the transport
3290Sstevel@tonic-gate 	 * device, and the protocol family.
3300Sstevel@tonic-gate 	 */
3310Sstevel@tonic-gate 	mutex_enter(&nfscl->nfscl_chtable_lock);
3320Sstevel@tonic-gate 	plistp = &nfscl->nfscl_chtable;
3330Sstevel@tonic-gate 	for (ch = nfscl->nfscl_chtable; ch != NULL; ch = ch->ch_next) {
3340Sstevel@tonic-gate 		if (ch->ch_prog == ci->cl_prog &&
3350Sstevel@tonic-gate 		    ch->ch_vers == ci->cl_vers &&
3360Sstevel@tonic-gate 		    ch->ch_dev == svp->sv_knconf->knc_rdev &&
3370Sstevel@tonic-gate 		    (strcmp(ch->ch_protofmly,
3384300Smarks 		    svp->sv_knconf->knc_protofmly) == 0))
3390Sstevel@tonic-gate 			break;
3400Sstevel@tonic-gate 		plistp = &ch->ch_next;
3410Sstevel@tonic-gate 	}
3420Sstevel@tonic-gate 
3430Sstevel@tonic-gate 	/*
3440Sstevel@tonic-gate 	 * If we didn't find a cache entry for this quadruple, then
3450Sstevel@tonic-gate 	 * create one.  If we don't have one already preallocated,
3460Sstevel@tonic-gate 	 * then drop the cache lock, create one, and then start over.
3470Sstevel@tonic-gate 	 * If we did have a preallocated entry, then just add it to
3480Sstevel@tonic-gate 	 * the front of the list.
3490Sstevel@tonic-gate 	 */
3500Sstevel@tonic-gate 	if (ch == NULL) {
3510Sstevel@tonic-gate 		if (newch == NULL) {
3520Sstevel@tonic-gate 			mutex_exit(&nfscl->nfscl_chtable_lock);
3530Sstevel@tonic-gate 			newch = kmem_alloc(sizeof (*newch), KM_SLEEP);
3540Sstevel@tonic-gate 			newch->ch_timesused = 0;
3550Sstevel@tonic-gate 			newch->ch_prog = ci->cl_prog;
3560Sstevel@tonic-gate 			newch->ch_vers = ci->cl_vers;
3570Sstevel@tonic-gate 			newch->ch_dev = svp->sv_knconf->knc_rdev;
3580Sstevel@tonic-gate 			newch->ch_protofmly = kmem_alloc(
3590Sstevel@tonic-gate 			    strlen(svp->sv_knconf->knc_protofmly) + 1,
3600Sstevel@tonic-gate 			    KM_SLEEP);
3610Sstevel@tonic-gate 			(void) strcpy(newch->ch_protofmly,
3620Sstevel@tonic-gate 			    svp->sv_knconf->knc_protofmly);
3630Sstevel@tonic-gate 			newch->ch_list = NULL;
3640Sstevel@tonic-gate 			goto top;
3650Sstevel@tonic-gate 		}
3660Sstevel@tonic-gate 		ch = newch;
3670Sstevel@tonic-gate 		newch = NULL;
3680Sstevel@tonic-gate 		ch->ch_next = nfscl->nfscl_chtable;
3690Sstevel@tonic-gate 		nfscl->nfscl_chtable = ch;
3700Sstevel@tonic-gate 	/*
3710Sstevel@tonic-gate 	 * We found a cache entry, but if it isn't on the front of the
3720Sstevel@tonic-gate 	 * list, then move it to the front of the list to try to take
3730Sstevel@tonic-gate 	 * advantage of locality of operations.
3740Sstevel@tonic-gate 	 */
3750Sstevel@tonic-gate 	} else if (ch != nfscl->nfscl_chtable) {
3760Sstevel@tonic-gate 		*plistp = ch->ch_next;
3770Sstevel@tonic-gate 		ch->ch_next = nfscl->nfscl_chtable;
3780Sstevel@tonic-gate 		nfscl->nfscl_chtable = ch;
3790Sstevel@tonic-gate 	}
3800Sstevel@tonic-gate 
3810Sstevel@tonic-gate 	/*
3820Sstevel@tonic-gate 	 * If there was a free client handle cached, then remove it
3830Sstevel@tonic-gate 	 * from the list, init it, and use it.
3840Sstevel@tonic-gate 	 */
3850Sstevel@tonic-gate 	if (ch->ch_list != NULL) {
3860Sstevel@tonic-gate 		cp = ch->ch_list;
3870Sstevel@tonic-gate 		ch->ch_list = cp->ch_list;
3880Sstevel@tonic-gate 		mutex_exit(&nfscl->nfscl_chtable_lock);
3890Sstevel@tonic-gate 		if (newch != NULL) {
3900Sstevel@tonic-gate 			kmem_free(newch->ch_protofmly,
3910Sstevel@tonic-gate 			    strlen(newch->ch_protofmly) + 1);
3920Sstevel@tonic-gate 			kmem_free(newch, sizeof (*newch));
3930Sstevel@tonic-gate 		}
3940Sstevel@tonic-gate 		(void) clnt_tli_kinit(cp->ch_client, svp->sv_knconf,
3950Sstevel@tonic-gate 		    &svp->sv_addr, ci->cl_readsize, ci->cl_retrans, cr);
3960Sstevel@tonic-gate 		error = sec_clnt_geth(cp->ch_client, svp->sv_secdata, cr,
3970Sstevel@tonic-gate 		    &cp->ch_client->cl_auth);
3980Sstevel@tonic-gate 		if (error || cp->ch_client->cl_auth == NULL) {
3990Sstevel@tonic-gate 			CLNT_DESTROY(cp->ch_client);
4000Sstevel@tonic-gate 			kmem_cache_free(chtab_cache, cp);
4010Sstevel@tonic-gate 			return ((error != 0) ? error : EINTR);
4020Sstevel@tonic-gate 		}
4030Sstevel@tonic-gate 		ch->ch_timesused++;
4040Sstevel@tonic-gate 		*newcl = cp->ch_client;
4050Sstevel@tonic-gate 		*chp = cp;
4060Sstevel@tonic-gate 		return (0);
4070Sstevel@tonic-gate 	}
4080Sstevel@tonic-gate 
4090Sstevel@tonic-gate 	/*
4100Sstevel@tonic-gate 	 * There weren't any free client handles which fit, so allocate
4110Sstevel@tonic-gate 	 * a new one and use that.
4120Sstevel@tonic-gate 	 */
4130Sstevel@tonic-gate #ifdef DEBUG
4140Sstevel@tonic-gate 	atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, 1);
4150Sstevel@tonic-gate #endif
4160Sstevel@tonic-gate 	mutex_exit(&nfscl->nfscl_chtable_lock);
4170Sstevel@tonic-gate 
4180Sstevel@tonic-gate 	nfscl->nfscl_stat.cltoomany.value.ui64++;
4190Sstevel@tonic-gate 	if (newch != NULL) {
4200Sstevel@tonic-gate 		kmem_free(newch->ch_protofmly, strlen(newch->ch_protofmly) + 1);
4210Sstevel@tonic-gate 		kmem_free(newch, sizeof (*newch));
4220Sstevel@tonic-gate 	}
4230Sstevel@tonic-gate 
4240Sstevel@tonic-gate 	cp = kmem_cache_alloc(chtab_cache, KM_SLEEP);
4250Sstevel@tonic-gate 	cp->ch_head = ch;
4260Sstevel@tonic-gate 
4270Sstevel@tonic-gate 	sigintr(&smask, (int)ci->cl_flags & MI_INT);
4280Sstevel@tonic-gate 	error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, ci->cl_prog,
4290Sstevel@tonic-gate 	    ci->cl_vers, ci->cl_readsize, ci->cl_retrans, cr, &cp->ch_client);
4300Sstevel@tonic-gate 	sigunintr(&smask);
4310Sstevel@tonic-gate 
4320Sstevel@tonic-gate 	if (error != 0) {
4330Sstevel@tonic-gate 		kmem_cache_free(chtab_cache, cp);
4340Sstevel@tonic-gate #ifdef DEBUG
4350Sstevel@tonic-gate 		atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -1);
4360Sstevel@tonic-gate #endif
4370Sstevel@tonic-gate 		/*
4380Sstevel@tonic-gate 		 * Warning is unnecessary if error is EINTR.
4390Sstevel@tonic-gate 		 */
4400Sstevel@tonic-gate 		if (error != EINTR) {
4410Sstevel@tonic-gate 			nfs_cmn_err(error, CE_WARN,
4420Sstevel@tonic-gate 			    "clget: couldn't create handle: %m\n");
4430Sstevel@tonic-gate 		}
4440Sstevel@tonic-gate 		return (error);
4450Sstevel@tonic-gate 	}
4460Sstevel@tonic-gate 	(void) CLNT_CONTROL(cp->ch_client, CLSET_PROGRESS, NULL);
4470Sstevel@tonic-gate 	auth_destroy(cp->ch_client->cl_auth);
4480Sstevel@tonic-gate 	error = sec_clnt_geth(cp->ch_client, svp->sv_secdata, cr,
4490Sstevel@tonic-gate 	    &cp->ch_client->cl_auth);
4500Sstevel@tonic-gate 	if (error || cp->ch_client->cl_auth == NULL) {
4510Sstevel@tonic-gate 		CLNT_DESTROY(cp->ch_client);
4520Sstevel@tonic-gate 		kmem_cache_free(chtab_cache, cp);
4530Sstevel@tonic-gate #ifdef DEBUG
4540Sstevel@tonic-gate 		atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -1);
4550Sstevel@tonic-gate #endif
4560Sstevel@tonic-gate 		return ((error != 0) ? error : EINTR);
4570Sstevel@tonic-gate 	}
4580Sstevel@tonic-gate 	ch->ch_timesused++;
4590Sstevel@tonic-gate 	*newcl = cp->ch_client;
4600Sstevel@tonic-gate 	ASSERT(cp->ch_client->cl_nosignal == FALSE);
4610Sstevel@tonic-gate 	*chp = cp;
4620Sstevel@tonic-gate 	return (0);
4630Sstevel@tonic-gate }
4640Sstevel@tonic-gate 
4650Sstevel@tonic-gate int
clget(clinfo_t * ci,servinfo_t * svp,cred_t * cr,CLIENT ** newcl,struct chtab ** chp)4660Sstevel@tonic-gate clget(clinfo_t *ci, servinfo_t *svp, cred_t *cr, CLIENT **newcl,
4670Sstevel@tonic-gate     struct chtab **chp)
4680Sstevel@tonic-gate {
4690Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
4700Sstevel@tonic-gate 
471766Scarlsonj 	nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
4720Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
4730Sstevel@tonic-gate 
4740Sstevel@tonic-gate 	return (clget_impl(ci, svp, cr, newcl, chp, nfscl));
4750Sstevel@tonic-gate }
4760Sstevel@tonic-gate 
4770Sstevel@tonic-gate static int
acl_clget(mntinfo_t * mi,servinfo_t * svp,cred_t * cr,CLIENT ** newcl,struct chtab ** chp,struct nfs_clnt * nfscl)4780Sstevel@tonic-gate acl_clget(mntinfo_t *mi, servinfo_t *svp, cred_t *cr, CLIENT **newcl,
4790Sstevel@tonic-gate     struct chtab **chp, struct nfs_clnt *nfscl)
4800Sstevel@tonic-gate {
4810Sstevel@tonic-gate 	clinfo_t ci;
4820Sstevel@tonic-gate 	int error;
4830Sstevel@tonic-gate 
4840Sstevel@tonic-gate 	/*
4850Sstevel@tonic-gate 	 * Set read buffer size to rsize
4860Sstevel@tonic-gate 	 * and add room for RPC headers.
4870Sstevel@tonic-gate 	 */
4880Sstevel@tonic-gate 	ci.cl_readsize = mi->mi_tsize;
4890Sstevel@tonic-gate 	if (ci.cl_readsize != 0)
4900Sstevel@tonic-gate 		ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA);
4910Sstevel@tonic-gate 
4920Sstevel@tonic-gate 	/*
4930Sstevel@tonic-gate 	 * If soft mount and server is down just try once.
4940Sstevel@tonic-gate 	 * meaning: do not retransmit.
4950Sstevel@tonic-gate 	 */
4960Sstevel@tonic-gate 	if (!(mi->mi_flags & MI_HARD) && (mi->mi_flags & MI_DOWN))
4970Sstevel@tonic-gate 		ci.cl_retrans = 0;
4980Sstevel@tonic-gate 	else
4990Sstevel@tonic-gate 		ci.cl_retrans = mi->mi_retrans;
5000Sstevel@tonic-gate 
5010Sstevel@tonic-gate 	ci.cl_prog = NFS_ACL_PROGRAM;
5020Sstevel@tonic-gate 	ci.cl_vers = mi->mi_vers;
5030Sstevel@tonic-gate 	ci.cl_flags = mi->mi_flags;
5040Sstevel@tonic-gate 
5050Sstevel@tonic-gate 	/*
5060Sstevel@tonic-gate 	 * clget calls sec_clnt_geth() to get an auth handle. For RPCSEC_GSS
5070Sstevel@tonic-gate 	 * security flavor, the client tries to establish a security context
5080Sstevel@tonic-gate 	 * by contacting the server. If the connection is timed out or reset,
5090Sstevel@tonic-gate 	 * e.g. server reboot, we will try again.
5100Sstevel@tonic-gate 	 */
5110Sstevel@tonic-gate 	do {
5120Sstevel@tonic-gate 		error = clget_impl(&ci, svp, cr, newcl, chp, nfscl);
5130Sstevel@tonic-gate 
5140Sstevel@tonic-gate 		if (error == 0)
5150Sstevel@tonic-gate 			break;
5160Sstevel@tonic-gate 
5170Sstevel@tonic-gate 		/*
5180Sstevel@tonic-gate 		 * For forced unmount or zone shutdown, bail out, no retry.
5190Sstevel@tonic-gate 		 */
5200Sstevel@tonic-gate 		if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
5210Sstevel@tonic-gate 			error = EIO;
5220Sstevel@tonic-gate 			break;
5230Sstevel@tonic-gate 		}
5240Sstevel@tonic-gate 
5250Sstevel@tonic-gate 		/* do not retry for softmount */
5260Sstevel@tonic-gate 		if (!(mi->mi_flags & MI_HARD))
5270Sstevel@tonic-gate 			break;
5280Sstevel@tonic-gate 
5290Sstevel@tonic-gate 		/* let the caller deal with the failover case */
5300Sstevel@tonic-gate 		if (FAILOVER_MOUNT(mi))
5310Sstevel@tonic-gate 			break;
5320Sstevel@tonic-gate 
5330Sstevel@tonic-gate 	} while (error == ETIMEDOUT || error == ECONNRESET);
5340Sstevel@tonic-gate 
5350Sstevel@tonic-gate 	return (error);
5360Sstevel@tonic-gate }
5370Sstevel@tonic-gate 
5380Sstevel@tonic-gate static int
nfs_clget(mntinfo_t * mi,servinfo_t * svp,cred_t * cr,CLIENT ** newcl,struct chtab ** chp,struct nfs_clnt * nfscl)5390Sstevel@tonic-gate nfs_clget(mntinfo_t *mi, servinfo_t *svp, cred_t *cr, CLIENT **newcl,
5400Sstevel@tonic-gate     struct chtab **chp, struct nfs_clnt *nfscl)
5410Sstevel@tonic-gate {
5420Sstevel@tonic-gate 	clinfo_t ci;
5430Sstevel@tonic-gate 	int error;
5440Sstevel@tonic-gate 
5450Sstevel@tonic-gate 	/*
5460Sstevel@tonic-gate 	 * Set read buffer size to rsize
5470Sstevel@tonic-gate 	 * and add room for RPC headers.
5480Sstevel@tonic-gate 	 */
5490Sstevel@tonic-gate 	ci.cl_readsize = mi->mi_tsize;
5500Sstevel@tonic-gate 	if (ci.cl_readsize != 0)
5510Sstevel@tonic-gate 		ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA);
5520Sstevel@tonic-gate 
5530Sstevel@tonic-gate 	/*
5540Sstevel@tonic-gate 	 * If soft mount and server is down just try once.
5550Sstevel@tonic-gate 	 * meaning: do not retransmit.
5560Sstevel@tonic-gate 	 */
5570Sstevel@tonic-gate 	if (!(mi->mi_flags & MI_HARD) && (mi->mi_flags & MI_DOWN))
5580Sstevel@tonic-gate 		ci.cl_retrans = 0;
5590Sstevel@tonic-gate 	else
5600Sstevel@tonic-gate 		ci.cl_retrans = mi->mi_retrans;
5610Sstevel@tonic-gate 
5620Sstevel@tonic-gate 	ci.cl_prog = mi->mi_prog;
5630Sstevel@tonic-gate 	ci.cl_vers = mi->mi_vers;
5640Sstevel@tonic-gate 	ci.cl_flags = mi->mi_flags;
5650Sstevel@tonic-gate 
5660Sstevel@tonic-gate 	/*
5670Sstevel@tonic-gate 	 * clget calls sec_clnt_geth() to get an auth handle. For RPCSEC_GSS
5680Sstevel@tonic-gate 	 * security flavor, the client tries to establish a security context
5690Sstevel@tonic-gate 	 * by contacting the server. If the connection is timed out or reset,
5700Sstevel@tonic-gate 	 * e.g. server reboot, we will try again.
5710Sstevel@tonic-gate 	 */
5720Sstevel@tonic-gate 	do {
5730Sstevel@tonic-gate 		error = clget_impl(&ci, svp, cr, newcl, chp, nfscl);
5740Sstevel@tonic-gate 
5750Sstevel@tonic-gate 		if (error == 0)
5760Sstevel@tonic-gate 			break;
5770Sstevel@tonic-gate 
5780Sstevel@tonic-gate 		/*
5790Sstevel@tonic-gate 		 * For forced unmount or zone shutdown, bail out, no retry.
5800Sstevel@tonic-gate 		 */
5810Sstevel@tonic-gate 		if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
5820Sstevel@tonic-gate 			error = EIO;
5830Sstevel@tonic-gate 			break;
5840Sstevel@tonic-gate 		}
5850Sstevel@tonic-gate 
5860Sstevel@tonic-gate 		/* do not retry for softmount */
5870Sstevel@tonic-gate 		if (!(mi->mi_flags & MI_HARD))
5880Sstevel@tonic-gate 			break;
5890Sstevel@tonic-gate 
5900Sstevel@tonic-gate 		/* let the caller deal with the failover case */
5910Sstevel@tonic-gate 		if (FAILOVER_MOUNT(mi))
5920Sstevel@tonic-gate 			break;
5930Sstevel@tonic-gate 
5940Sstevel@tonic-gate 	} while (error == ETIMEDOUT || error == ECONNRESET);
5950Sstevel@tonic-gate 
5960Sstevel@tonic-gate 	return (error);
5970Sstevel@tonic-gate }
5980Sstevel@tonic-gate 
5990Sstevel@tonic-gate static void
clfree_impl(CLIENT * cl,struct chtab * cp,struct nfs_clnt * nfscl)6000Sstevel@tonic-gate clfree_impl(CLIENT *cl, struct chtab *cp, struct nfs_clnt *nfscl)
6010Sstevel@tonic-gate {
6020Sstevel@tonic-gate 	if (cl->cl_auth != NULL) {
6030Sstevel@tonic-gate 		sec_clnt_freeh(cl->cl_auth);
6040Sstevel@tonic-gate 		cl->cl_auth = NULL;
6050Sstevel@tonic-gate 	}
6060Sstevel@tonic-gate 
6070Sstevel@tonic-gate 	/*
6080Sstevel@tonic-gate 	 * Timestamp this cache entry so that we know when it was last
6090Sstevel@tonic-gate 	 * used.
6100Sstevel@tonic-gate 	 */
6110Sstevel@tonic-gate 	cp->ch_freed = gethrestime_sec();
6120Sstevel@tonic-gate 
6130Sstevel@tonic-gate 	/*
6140Sstevel@tonic-gate 	 * Add the free client handle to the front of the list.
6150Sstevel@tonic-gate 	 * This way, the list will be sorted in youngest to oldest
6160Sstevel@tonic-gate 	 * order.
6170Sstevel@tonic-gate 	 */
6180Sstevel@tonic-gate 	mutex_enter(&nfscl->nfscl_chtable_lock);
6190Sstevel@tonic-gate 	cp->ch_list = cp->ch_head->ch_list;
6200Sstevel@tonic-gate 	cp->ch_head->ch_list = cp;
6210Sstevel@tonic-gate 	mutex_exit(&nfscl->nfscl_chtable_lock);
6220Sstevel@tonic-gate }
6230Sstevel@tonic-gate 
6240Sstevel@tonic-gate void
clfree(CLIENT * cl,struct chtab * cp)6250Sstevel@tonic-gate clfree(CLIENT *cl, struct chtab *cp)
6260Sstevel@tonic-gate {
6270Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
6280Sstevel@tonic-gate 
629766Scarlsonj 	nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
6300Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
6310Sstevel@tonic-gate 
6320Sstevel@tonic-gate 	clfree_impl(cl, cp, nfscl);
6330Sstevel@tonic-gate }
6340Sstevel@tonic-gate 
6350Sstevel@tonic-gate #define	CL_HOLDTIME	60	/* time to hold client handles */
6360Sstevel@tonic-gate 
6370Sstevel@tonic-gate static void
clreclaim_zone(struct nfs_clnt * nfscl,uint_t cl_holdtime)6380Sstevel@tonic-gate clreclaim_zone(struct nfs_clnt *nfscl, uint_t cl_holdtime)
6390Sstevel@tonic-gate {
6400Sstevel@tonic-gate 	struct chhead *ch;
6410Sstevel@tonic-gate 	struct chtab *cp;	/* list of objects that can be reclaimed */
6420Sstevel@tonic-gate 	struct chtab *cpe;
6430Sstevel@tonic-gate 	struct chtab *cpl;
6440Sstevel@tonic-gate 	struct chtab **cpp;
6450Sstevel@tonic-gate #ifdef DEBUG
6460Sstevel@tonic-gate 	int n = 0;
6470Sstevel@tonic-gate #endif
6480Sstevel@tonic-gate 
6490Sstevel@tonic-gate 	/*
6500Sstevel@tonic-gate 	 * Need to reclaim some memory, so step through the cache
6510Sstevel@tonic-gate 	 * looking through the lists for entries which can be freed.
6520Sstevel@tonic-gate 	 */
6530Sstevel@tonic-gate 	cp = NULL;
6540Sstevel@tonic-gate 
6550Sstevel@tonic-gate 	mutex_enter(&nfscl->nfscl_chtable_lock);
6560Sstevel@tonic-gate 
6570Sstevel@tonic-gate 	/*
6580Sstevel@tonic-gate 	 * Here we step through each non-NULL quadruple and start to
6590Sstevel@tonic-gate 	 * construct the reclaim list pointed to by cp.  Note that
6600Sstevel@tonic-gate 	 * cp will contain all eligible chtab entries.  When this traversal
6610Sstevel@tonic-gate 	 * completes, chtab entries from the last quadruple will be at the
6620Sstevel@tonic-gate 	 * front of cp and entries from previously inspected quadruples have
6630Sstevel@tonic-gate 	 * been appended to the rear of cp.
6640Sstevel@tonic-gate 	 */
6650Sstevel@tonic-gate 	for (ch = nfscl->nfscl_chtable; ch != NULL; ch = ch->ch_next) {
6660Sstevel@tonic-gate 		if (ch->ch_list == NULL)
6670Sstevel@tonic-gate 			continue;
6680Sstevel@tonic-gate 		/*
6690Sstevel@tonic-gate 		 * Search each list for entries older then
6700Sstevel@tonic-gate 		 * cl_holdtime seconds.  The lists are maintained
6710Sstevel@tonic-gate 		 * in youngest to oldest order so that when the
6720Sstevel@tonic-gate 		 * first entry is found which is old enough, then
6730Sstevel@tonic-gate 		 * all of the rest of the entries on the list will
6740Sstevel@tonic-gate 		 * be old enough as well.
6750Sstevel@tonic-gate 		 */
6760Sstevel@tonic-gate 		cpl = ch->ch_list;
6770Sstevel@tonic-gate 		cpp = &ch->ch_list;
6780Sstevel@tonic-gate 		while (cpl != NULL &&
6794300Smarks 		    cpl->ch_freed + cl_holdtime > gethrestime_sec()) {
6800Sstevel@tonic-gate 			cpp = &cpl->ch_list;
6810Sstevel@tonic-gate 			cpl = cpl->ch_list;
6820Sstevel@tonic-gate 		}
6830Sstevel@tonic-gate 		if (cpl != NULL) {
6840Sstevel@tonic-gate 			*cpp = NULL;
6850Sstevel@tonic-gate 			if (cp != NULL) {
6860Sstevel@tonic-gate 				cpe = cpl;
6870Sstevel@tonic-gate 				while (cpe->ch_list != NULL)
6880Sstevel@tonic-gate 					cpe = cpe->ch_list;
6890Sstevel@tonic-gate 				cpe->ch_list = cp;
6900Sstevel@tonic-gate 			}
6910Sstevel@tonic-gate 			cp = cpl;
6920Sstevel@tonic-gate 		}
6930Sstevel@tonic-gate 	}
6940Sstevel@tonic-gate 
6950Sstevel@tonic-gate 	mutex_exit(&nfscl->nfscl_chtable_lock);
6960Sstevel@tonic-gate 
6970Sstevel@tonic-gate 	/*
6980Sstevel@tonic-gate 	 * If cp is empty, then there is nothing to reclaim here.
6990Sstevel@tonic-gate 	 */
7000Sstevel@tonic-gate 	if (cp == NULL)
7010Sstevel@tonic-gate 		return;
7020Sstevel@tonic-gate 
7030Sstevel@tonic-gate 	/*
7040Sstevel@tonic-gate 	 * Step through the list of entries to free, destroying each client
7050Sstevel@tonic-gate 	 * handle and kmem_free'ing the memory for each entry.
7060Sstevel@tonic-gate 	 */
7070Sstevel@tonic-gate 	while (cp != NULL) {
7080Sstevel@tonic-gate #ifdef DEBUG
7090Sstevel@tonic-gate 		n++;
7100Sstevel@tonic-gate #endif
7110Sstevel@tonic-gate 		CLNT_DESTROY(cp->ch_client);
7120Sstevel@tonic-gate 		cpl = cp->ch_list;
7130Sstevel@tonic-gate 		kmem_cache_free(chtab_cache, cp);
7140Sstevel@tonic-gate 		cp = cpl;
7150Sstevel@tonic-gate 	}
7160Sstevel@tonic-gate 
7170Sstevel@tonic-gate #ifdef DEBUG
7180Sstevel@tonic-gate 	/*
7190Sstevel@tonic-gate 	 * Update clalloc so that nfsstat shows the current number
7200Sstevel@tonic-gate 	 * of allocated client handles.
7210Sstevel@tonic-gate 	 */
7220Sstevel@tonic-gate 	atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -n);
7230Sstevel@tonic-gate #endif
7240Sstevel@tonic-gate }
7250Sstevel@tonic-gate 
7260Sstevel@tonic-gate /* ARGSUSED */
7270Sstevel@tonic-gate static void
clreclaim(void * all)7280Sstevel@tonic-gate clreclaim(void *all)
7290Sstevel@tonic-gate {
7300Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
7310Sstevel@tonic-gate 
7320Sstevel@tonic-gate #ifdef DEBUG
7330Sstevel@tonic-gate 	clstat_debug.clreclaim.value.ui64++;
7340Sstevel@tonic-gate #endif
7350Sstevel@tonic-gate 	/*
7360Sstevel@tonic-gate 	 * The system is low on memory; go through and try to reclaim some from
7370Sstevel@tonic-gate 	 * every zone on the system.
7380Sstevel@tonic-gate 	 */
7390Sstevel@tonic-gate 	mutex_enter(&nfs_clnt_list_lock);
7400Sstevel@tonic-gate 	nfscl = list_head(&nfs_clnt_list);
7410Sstevel@tonic-gate 	for (; nfscl != NULL; nfscl = list_next(&nfs_clnt_list, nfscl))
7420Sstevel@tonic-gate 		clreclaim_zone(nfscl, CL_HOLDTIME);
7430Sstevel@tonic-gate 	mutex_exit(&nfs_clnt_list_lock);
7440Sstevel@tonic-gate }
7450Sstevel@tonic-gate 
7460Sstevel@tonic-gate /*
7470Sstevel@tonic-gate  * Minimum time-out values indexed by call type
7480Sstevel@tonic-gate  * These units are in "eights" of a second to avoid multiplies
7490Sstevel@tonic-gate  */
7500Sstevel@tonic-gate static unsigned int minimum_timeo[] = {
7510Sstevel@tonic-gate 	6, 7, 10
7520Sstevel@tonic-gate };
7530Sstevel@tonic-gate 
7540Sstevel@tonic-gate /*
7550Sstevel@tonic-gate  * Back off for retransmission timeout, MAXTIMO is in hz of a sec
7560Sstevel@tonic-gate  */
7570Sstevel@tonic-gate #define	MAXTIMO	(20*hz)
7580Sstevel@tonic-gate #define	backoff(tim)	(((tim) < MAXTIMO) ? dobackoff(tim) : (tim))
7590Sstevel@tonic-gate #define	dobackoff(tim)	((((tim) << 1) > MAXTIMO) ? MAXTIMO : ((tim) << 1))
7600Sstevel@tonic-gate 
7610Sstevel@tonic-gate #define	MIN_NFS_TSIZE 512	/* minimum "chunk" of NFS IO */
7620Sstevel@tonic-gate #define	REDUCE_NFS_TIME (hz/2)	/* rtxcur we try to keep under */
7630Sstevel@tonic-gate #define	INCREASE_NFS_TIME (hz/3*8) /* srtt we try to keep under (scaled*8) */
7640Sstevel@tonic-gate 
7650Sstevel@tonic-gate /*
7660Sstevel@tonic-gate  * Function called when rfscall notices that we have been
7670Sstevel@tonic-gate  * re-transmitting, or when we get a response without retransmissions.
7680Sstevel@tonic-gate  * Return 1 if the transfer size was adjusted down - 0 if no change.
7690Sstevel@tonic-gate  */
7700Sstevel@tonic-gate static int
nfs_feedback(int flag,int which,mntinfo_t * mi)7710Sstevel@tonic-gate nfs_feedback(int flag, int which, mntinfo_t *mi)
7720Sstevel@tonic-gate {
7730Sstevel@tonic-gate 	int kind;
7740Sstevel@tonic-gate 	int r = 0;
7750Sstevel@tonic-gate 
7760Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
7770Sstevel@tonic-gate 	if (flag == FEEDBACK_REXMIT1) {
7780Sstevel@tonic-gate 		if (mi->mi_timers[NFS_CALLTYPES].rt_rtxcur != 0 &&
7790Sstevel@tonic-gate 		    mi->mi_timers[NFS_CALLTYPES].rt_rtxcur < REDUCE_NFS_TIME)
7800Sstevel@tonic-gate 			goto done;
7810Sstevel@tonic-gate 		if (mi->mi_curread > MIN_NFS_TSIZE) {
7820Sstevel@tonic-gate 			mi->mi_curread /= 2;
7830Sstevel@tonic-gate 			if (mi->mi_curread < MIN_NFS_TSIZE)
7840Sstevel@tonic-gate 				mi->mi_curread = MIN_NFS_TSIZE;
7850Sstevel@tonic-gate 			r = 1;
7860Sstevel@tonic-gate 		}
7870Sstevel@tonic-gate 
7880Sstevel@tonic-gate 		if (mi->mi_curwrite > MIN_NFS_TSIZE) {
7890Sstevel@tonic-gate 			mi->mi_curwrite /= 2;
7900Sstevel@tonic-gate 			if (mi->mi_curwrite < MIN_NFS_TSIZE)
7910Sstevel@tonic-gate 				mi->mi_curwrite = MIN_NFS_TSIZE;
7920Sstevel@tonic-gate 			r = 1;
7930Sstevel@tonic-gate 		}
7940Sstevel@tonic-gate 	} else if (flag == FEEDBACK_OK) {
7950Sstevel@tonic-gate 		kind = mi->mi_timer_type[which];
7960Sstevel@tonic-gate 		if (kind == 0 ||
7970Sstevel@tonic-gate 		    mi->mi_timers[kind].rt_srtt >= INCREASE_NFS_TIME)
7980Sstevel@tonic-gate 			goto done;
7990Sstevel@tonic-gate 		if (kind == 1) {
8000Sstevel@tonic-gate 			if (mi->mi_curread >= mi->mi_tsize)
8010Sstevel@tonic-gate 				goto done;
8020Sstevel@tonic-gate 			mi->mi_curread +=  MIN_NFS_TSIZE;
8030Sstevel@tonic-gate 			if (mi->mi_curread > mi->mi_tsize/2)
8040Sstevel@tonic-gate 				mi->mi_curread = mi->mi_tsize;
8050Sstevel@tonic-gate 		} else if (kind == 2) {
8060Sstevel@tonic-gate 			if (mi->mi_curwrite >= mi->mi_stsize)
8070Sstevel@tonic-gate 				goto done;
8080Sstevel@tonic-gate 			mi->mi_curwrite += MIN_NFS_TSIZE;
8090Sstevel@tonic-gate 			if (mi->mi_curwrite > mi->mi_stsize/2)
8100Sstevel@tonic-gate 				mi->mi_curwrite = mi->mi_stsize;
8110Sstevel@tonic-gate 		}
8120Sstevel@tonic-gate 	}
8130Sstevel@tonic-gate done:
8140Sstevel@tonic-gate 	mutex_exit(&mi->mi_lock);
8150Sstevel@tonic-gate 	return (r);
8160Sstevel@tonic-gate }
8170Sstevel@tonic-gate 
8180Sstevel@tonic-gate #ifdef DEBUG
8190Sstevel@tonic-gate static int rfs2call_hits = 0;
8200Sstevel@tonic-gate static int rfs2call_misses = 0;
8210Sstevel@tonic-gate #endif
8220Sstevel@tonic-gate 
8230Sstevel@tonic-gate int
rfs2call(mntinfo_t * mi,rpcproc_t which,xdrproc_t xdrargs,caddr_t argsp,xdrproc_t xdrres,caddr_t resp,cred_t * cr,int * douprintf,enum nfsstat * statusp,int flags,failinfo_t * fi)8240Sstevel@tonic-gate rfs2call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
8250Sstevel@tonic-gate     xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
8260Sstevel@tonic-gate     enum nfsstat *statusp, int flags, failinfo_t *fi)
8270Sstevel@tonic-gate {
8280Sstevel@tonic-gate 	int rpcerror;
8290Sstevel@tonic-gate 	enum clnt_stat rpc_status;
8300Sstevel@tonic-gate 
8310Sstevel@tonic-gate 	ASSERT(statusp != NULL);
8320Sstevel@tonic-gate 
8330Sstevel@tonic-gate 	rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres, resp,
8340Sstevel@tonic-gate 	    cr, douprintf, &rpc_status, flags, fi);
8350Sstevel@tonic-gate 	if (!rpcerror) {
8360Sstevel@tonic-gate 		/*
8370Sstevel@tonic-gate 		 * See crnetadjust() for comments.
8380Sstevel@tonic-gate 		 */
8390Sstevel@tonic-gate 		if (*statusp == NFSERR_ACCES &&
8400Sstevel@tonic-gate 		    (cr = crnetadjust(cr)) != NULL) {
8410Sstevel@tonic-gate #ifdef DEBUG
8420Sstevel@tonic-gate 			rfs2call_hits++;
8430Sstevel@tonic-gate #endif
8440Sstevel@tonic-gate 			rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres,
8450Sstevel@tonic-gate 			    resp, cr, douprintf, NULL, flags, fi);
8460Sstevel@tonic-gate 			crfree(cr);
8470Sstevel@tonic-gate #ifdef DEBUG
8480Sstevel@tonic-gate 			if (*statusp == NFSERR_ACCES)
8490Sstevel@tonic-gate 				rfs2call_misses++;
8500Sstevel@tonic-gate #endif
8510Sstevel@tonic-gate 		}
8520Sstevel@tonic-gate 	} else if (rpc_status == RPC_PROCUNAVAIL) {
8530Sstevel@tonic-gate 		*statusp = NFSERR_OPNOTSUPP;
8540Sstevel@tonic-gate 		rpcerror = 0;
8550Sstevel@tonic-gate 	}
8560Sstevel@tonic-gate 
8570Sstevel@tonic-gate 	return (rpcerror);
8580Sstevel@tonic-gate }
8590Sstevel@tonic-gate 
8600Sstevel@tonic-gate #define	NFS3_JUKEBOX_DELAY	10 * hz
8610Sstevel@tonic-gate 
8620Sstevel@tonic-gate static clock_t nfs3_jukebox_delay = 0;
8630Sstevel@tonic-gate 
8640Sstevel@tonic-gate #ifdef DEBUG
8650Sstevel@tonic-gate static int rfs3call_hits = 0;
8660Sstevel@tonic-gate static int rfs3call_misses = 0;
8670Sstevel@tonic-gate #endif
8680Sstevel@tonic-gate 
8690Sstevel@tonic-gate int
rfs3call(mntinfo_t * mi,rpcproc_t which,xdrproc_t xdrargs,caddr_t argsp,xdrproc_t xdrres,caddr_t resp,cred_t * cr,int * douprintf,nfsstat3 * statusp,int flags,failinfo_t * fi)8700Sstevel@tonic-gate rfs3call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
8710Sstevel@tonic-gate     xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
8720Sstevel@tonic-gate     nfsstat3 *statusp, int flags, failinfo_t *fi)
8730Sstevel@tonic-gate {
8740Sstevel@tonic-gate 	int rpcerror;
8750Sstevel@tonic-gate 	int user_informed;
8760Sstevel@tonic-gate 
8770Sstevel@tonic-gate 	user_informed = 0;
8780Sstevel@tonic-gate 	do {
8790Sstevel@tonic-gate 		rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres, resp,
8800Sstevel@tonic-gate 		    cr, douprintf, NULL, flags, fi);
8810Sstevel@tonic-gate 		if (!rpcerror) {
8820Sstevel@tonic-gate 			cred_t *crr;
8830Sstevel@tonic-gate 			if (*statusp == NFS3ERR_JUKEBOX) {
8840Sstevel@tonic-gate 				if (ttoproc(curthread) == &p0) {
8850Sstevel@tonic-gate 					rpcerror = EAGAIN;
8860Sstevel@tonic-gate 					break;
8870Sstevel@tonic-gate 				}
8880Sstevel@tonic-gate 				if (!user_informed) {
8890Sstevel@tonic-gate 					user_informed = 1;
8900Sstevel@tonic-gate 					uprintf(
8910Sstevel@tonic-gate 		"file temporarily unavailable on the server, retrying...\n");
8920Sstevel@tonic-gate 				}
8930Sstevel@tonic-gate 				delay(nfs3_jukebox_delay);
8940Sstevel@tonic-gate 			}
8950Sstevel@tonic-gate 			/*
8960Sstevel@tonic-gate 			 * See crnetadjust() for comments.
8970Sstevel@tonic-gate 			 */
8980Sstevel@tonic-gate 			else if (*statusp == NFS3ERR_ACCES &&
8990Sstevel@tonic-gate 			    (crr = crnetadjust(cr)) != NULL) {
9000Sstevel@tonic-gate #ifdef DEBUG
9010Sstevel@tonic-gate 				rfs3call_hits++;
9020Sstevel@tonic-gate #endif
9030Sstevel@tonic-gate 				rpcerror = rfscall(mi, which, xdrargs, argsp,
9040Sstevel@tonic-gate 				    xdrres, resp, crr, douprintf,
9050Sstevel@tonic-gate 				    NULL, flags, fi);
9060Sstevel@tonic-gate 
9070Sstevel@tonic-gate 				crfree(crr);
9080Sstevel@tonic-gate #ifdef DEBUG
9090Sstevel@tonic-gate 				if (*statusp == NFS3ERR_ACCES)
9100Sstevel@tonic-gate 					rfs3call_misses++;
9110Sstevel@tonic-gate #endif
9120Sstevel@tonic-gate 			}
9130Sstevel@tonic-gate 		}
9140Sstevel@tonic-gate 	} while (!rpcerror && *statusp == NFS3ERR_JUKEBOX);
9150Sstevel@tonic-gate 
9160Sstevel@tonic-gate 	return (rpcerror);
9170Sstevel@tonic-gate }
9180Sstevel@tonic-gate 
9190Sstevel@tonic-gate #define	VALID_FH(fi)	(VTOR(fi->vp)->r_server == VTOMI(fi->vp)->mi_curr_serv)
9200Sstevel@tonic-gate #define	INC_READERS(mi)		{ \
9210Sstevel@tonic-gate 	mi->mi_readers++; \
9220Sstevel@tonic-gate }
9230Sstevel@tonic-gate #define	DEC_READERS(mi)		{ \
9240Sstevel@tonic-gate 	mi->mi_readers--; \
9250Sstevel@tonic-gate 	if (mi->mi_readers == 0) \
9260Sstevel@tonic-gate 		cv_broadcast(&mi->mi_failover_cv); \
9270Sstevel@tonic-gate }
9280Sstevel@tonic-gate 
9290Sstevel@tonic-gate static int
rfscall(mntinfo_t * mi,rpcproc_t which,xdrproc_t xdrargs,caddr_t argsp,xdrproc_t xdrres,caddr_t resp,cred_t * icr,int * douprintf,enum clnt_stat * rpc_status,int flags,failinfo_t * fi)9300Sstevel@tonic-gate rfscall(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
9311676Sjpk     xdrproc_t xdrres, caddr_t resp, cred_t *icr, int *douprintf,
9320Sstevel@tonic-gate     enum clnt_stat *rpc_status, int flags, failinfo_t *fi)
9330Sstevel@tonic-gate {
9340Sstevel@tonic-gate 	CLIENT *client;
9350Sstevel@tonic-gate 	struct chtab *ch;
9361676Sjpk 	cred_t *cr = icr;
9370Sstevel@tonic-gate 	enum clnt_stat status;
9389675Sdai.ngo@sun.com 	struct rpc_err rpcerr, rpcerr_tmp;
9390Sstevel@tonic-gate 	struct timeval wait;
9400Sstevel@tonic-gate 	int timeo;		/* in units of hz */
9410Sstevel@tonic-gate 	int my_rsize, my_wsize;
9420Sstevel@tonic-gate 	bool_t tryagain;
9431676Sjpk 	bool_t cred_cloned = FALSE;
9440Sstevel@tonic-gate 	k_sigset_t smask;
9450Sstevel@tonic-gate 	servinfo_t *svp;
9460Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
9470Sstevel@tonic-gate 	zoneid_t zoneid = getzoneid();
9489675Sdai.ngo@sun.com 	char *msg;
9490Sstevel@tonic-gate #ifdef DEBUG
9500Sstevel@tonic-gate 	char *bufp;
9510Sstevel@tonic-gate #endif
9520Sstevel@tonic-gate 
9530Sstevel@tonic-gate 
9540Sstevel@tonic-gate 	TRACE_2(TR_FAC_NFS, TR_RFSCALL_START,
9554300Smarks 	    "rfscall_start:which %d mi %p", which, mi);
9560Sstevel@tonic-gate 
957766Scarlsonj 	nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
9580Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
9590Sstevel@tonic-gate 
9600Sstevel@tonic-gate 	nfscl->nfscl_stat.calls.value.ui64++;
9610Sstevel@tonic-gate 	mi->mi_reqs[which].value.ui64++;
9620Sstevel@tonic-gate 
9630Sstevel@tonic-gate 	rpcerr.re_status = RPC_SUCCESS;
9640Sstevel@tonic-gate 
9650Sstevel@tonic-gate 	/*
9660Sstevel@tonic-gate 	 * In case of forced unmount or zone shutdown, return EIO.
9670Sstevel@tonic-gate 	 */
9680Sstevel@tonic-gate 
9690Sstevel@tonic-gate 	if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
9700Sstevel@tonic-gate 		rpcerr.re_status = RPC_FAILED;
9710Sstevel@tonic-gate 		rpcerr.re_errno = EIO;
9720Sstevel@tonic-gate 		return (rpcerr.re_errno);
9730Sstevel@tonic-gate 	}
9740Sstevel@tonic-gate 
9750Sstevel@tonic-gate 	/*
9760Sstevel@tonic-gate 	 * Remember the transfer sizes in case
9770Sstevel@tonic-gate 	 * nfs_feedback changes them underneath us.
9780Sstevel@tonic-gate 	 */
9790Sstevel@tonic-gate 	my_rsize = mi->mi_curread;
9800Sstevel@tonic-gate 	my_wsize = mi->mi_curwrite;
9810Sstevel@tonic-gate 
9820Sstevel@tonic-gate 	/*
9830Sstevel@tonic-gate 	 * NFS client failover support
9840Sstevel@tonic-gate 	 *
9850Sstevel@tonic-gate 	 * If this rnode is not in sync with the current server (VALID_FH),
9860Sstevel@tonic-gate 	 * we'd like to do a remap to get in sync.  We can be interrupted
9870Sstevel@tonic-gate 	 * in failover_remap(), and if so we'll bail.  Otherwise, we'll
9880Sstevel@tonic-gate 	 * use the best info we have to try the RPC.  Part of that is
9890Sstevel@tonic-gate 	 * unconditionally updating the filehandle copy kept for V3.
9900Sstevel@tonic-gate 	 *
9910Sstevel@tonic-gate 	 * Locking: INC_READERS/DEC_READERS is a poor man's interrruptible
9920Sstevel@tonic-gate 	 * rw_enter(); we're trying to keep the current server from being
9930Sstevel@tonic-gate 	 * changed on us until we're done with the remapping and have a
9940Sstevel@tonic-gate 	 * matching client handle.  We don't want to sending a filehandle
9950Sstevel@tonic-gate 	 * to the wrong host.
9960Sstevel@tonic-gate 	 */
9970Sstevel@tonic-gate failoverretry:
9980Sstevel@tonic-gate 	if (FAILOVER_MOUNT(mi)) {
9990Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
10000Sstevel@tonic-gate 		if (!(flags & RFSCALL_SOFT) && failover_safe(fi)) {
10010Sstevel@tonic-gate 			if (failover_wait(mi)) {
10020Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
10030Sstevel@tonic-gate 				return (EINTR);
10040Sstevel@tonic-gate 			}
10050Sstevel@tonic-gate 		}
10060Sstevel@tonic-gate 		INC_READERS(mi);
10070Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
10080Sstevel@tonic-gate 		if (fi) {
10090Sstevel@tonic-gate 			if (!VALID_FH(fi) &&
10100Sstevel@tonic-gate 			    !(flags & RFSCALL_SOFT) && failover_safe(fi)) {
10110Sstevel@tonic-gate 				int remaperr;
10120Sstevel@tonic-gate 
10130Sstevel@tonic-gate 				svp = mi->mi_curr_serv;
10140Sstevel@tonic-gate 				remaperr = failover_remap(fi);
10150Sstevel@tonic-gate 				if (remaperr != 0) {
10160Sstevel@tonic-gate #ifdef DEBUG
10170Sstevel@tonic-gate 					if (remaperr != EINTR)
10180Sstevel@tonic-gate 						nfs_cmn_err(remaperr, CE_WARN,
10190Sstevel@tonic-gate 					    "rfscall couldn't failover: %m");
10200Sstevel@tonic-gate #endif
10210Sstevel@tonic-gate 					mutex_enter(&mi->mi_lock);
10220Sstevel@tonic-gate 					DEC_READERS(mi);
10230Sstevel@tonic-gate 					mutex_exit(&mi->mi_lock);
10240Sstevel@tonic-gate 					/*
10250Sstevel@tonic-gate 					 * If failover_remap returns ETIMEDOUT
10260Sstevel@tonic-gate 					 * and the filesystem is hard mounted
10270Sstevel@tonic-gate 					 * we have to retry the call with a new
10280Sstevel@tonic-gate 					 * server.
10290Sstevel@tonic-gate 					 */
10300Sstevel@tonic-gate 					if ((mi->mi_flags & MI_HARD) &&
10310Sstevel@tonic-gate 					    IS_RECOVERABLE_ERROR(remaperr)) {
10320Sstevel@tonic-gate 						if (svp == mi->mi_curr_serv)
10330Sstevel@tonic-gate 							failover_newserver(mi);
10340Sstevel@tonic-gate 						rpcerr.re_status = RPC_SUCCESS;
10350Sstevel@tonic-gate 						goto failoverretry;
10360Sstevel@tonic-gate 					}
10370Sstevel@tonic-gate 					rpcerr.re_errno = remaperr;
10380Sstevel@tonic-gate 					return (remaperr);
10390Sstevel@tonic-gate 				}
10400Sstevel@tonic-gate 			}
10410Sstevel@tonic-gate 			if (fi->fhp && fi->copyproc)
10420Sstevel@tonic-gate 				(*fi->copyproc)(fi->fhp, fi->vp);
10430Sstevel@tonic-gate 		}
10440Sstevel@tonic-gate 	}
10450Sstevel@tonic-gate 
10461676Sjpk 	/* For TSOL, use a new cred which has net_mac_aware flag */
10471676Sjpk 	if (!cred_cloned && is_system_labeled()) {
10481676Sjpk 		cred_cloned = TRUE;
10491676Sjpk 		cr = crdup(icr);
10501676Sjpk 		(void) setpflags(NET_MAC_AWARE, 1, cr);
10511676Sjpk 	}
10521676Sjpk 
10530Sstevel@tonic-gate 	/*
10540Sstevel@tonic-gate 	 * clget() calls clnt_tli_kinit() which clears the xid, so we
10550Sstevel@tonic-gate 	 * are guaranteed to reprocess the retry as a new request.
10560Sstevel@tonic-gate 	 */
10570Sstevel@tonic-gate 	svp = mi->mi_curr_serv;
10580Sstevel@tonic-gate 	rpcerr.re_errno = nfs_clget(mi, svp, cr, &client, &ch, nfscl);
10590Sstevel@tonic-gate 
10600Sstevel@tonic-gate 	if (FAILOVER_MOUNT(mi)) {
10610Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
10620Sstevel@tonic-gate 		DEC_READERS(mi);
10630Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
10640Sstevel@tonic-gate 
10650Sstevel@tonic-gate 		if ((rpcerr.re_errno == ETIMEDOUT ||
10664300Smarks 		    rpcerr.re_errno == ECONNRESET) &&
10674300Smarks 		    failover_safe(fi)) {
10680Sstevel@tonic-gate 			if (svp == mi->mi_curr_serv)
10690Sstevel@tonic-gate 				failover_newserver(mi);
10700Sstevel@tonic-gate 			goto failoverretry;
10710Sstevel@tonic-gate 		}
10720Sstevel@tonic-gate 	}
10730Sstevel@tonic-gate 	if (rpcerr.re_errno != 0)
10740Sstevel@tonic-gate 		return (rpcerr.re_errno);
10750Sstevel@tonic-gate 
10760Sstevel@tonic-gate 	if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD ||
10770Sstevel@tonic-gate 	    svp->sv_knconf->knc_semantics == NC_TPI_COTS) {
10780Sstevel@tonic-gate 		timeo = (mi->mi_timeo * hz) / 10;
10790Sstevel@tonic-gate 	} else {
10800Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
10810Sstevel@tonic-gate 		timeo = CLNT_SETTIMERS(client,
10820Sstevel@tonic-gate 		    &(mi->mi_timers[mi->mi_timer_type[which]]),
10830Sstevel@tonic-gate 		    &(mi->mi_timers[NFS_CALLTYPES]),
10840Sstevel@tonic-gate 		    (minimum_timeo[mi->mi_call_type[which]]*hz)>>3,
10850Sstevel@tonic-gate 		    (void (*)())NULL, (caddr_t)mi, 0);
10860Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
10870Sstevel@tonic-gate 	}
10880Sstevel@tonic-gate 
10890Sstevel@tonic-gate 	/*
10900Sstevel@tonic-gate 	 * If hard mounted fs, retry call forever unless hard error occurs.
10910Sstevel@tonic-gate 	 */
10920Sstevel@tonic-gate 	do {
10930Sstevel@tonic-gate 		tryagain = FALSE;
10940Sstevel@tonic-gate 
10950Sstevel@tonic-gate 		if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
10960Sstevel@tonic-gate 			status = RPC_FAILED;
10970Sstevel@tonic-gate 			rpcerr.re_status = RPC_FAILED;
10980Sstevel@tonic-gate 			rpcerr.re_errno = EIO;
10990Sstevel@tonic-gate 			break;
11000Sstevel@tonic-gate 		}
11010Sstevel@tonic-gate 
11020Sstevel@tonic-gate 		TICK_TO_TIMEVAL(timeo, &wait);
11030Sstevel@tonic-gate 
11040Sstevel@tonic-gate 		/*
11050Sstevel@tonic-gate 		 * Mask out all signals except SIGHUP, SIGINT, SIGQUIT
11060Sstevel@tonic-gate 		 * and SIGTERM. (Preserving the existing masks).
11070Sstevel@tonic-gate 		 * Mask out SIGINT if mount option nointr is specified.
11080Sstevel@tonic-gate 		 */
11090Sstevel@tonic-gate 		sigintr(&smask, (int)mi->mi_flags & MI_INT);
11100Sstevel@tonic-gate 		if (!(mi->mi_flags & MI_INT))
11110Sstevel@tonic-gate 			client->cl_nosignal = TRUE;
11120Sstevel@tonic-gate 
11130Sstevel@tonic-gate 		/*
11140Sstevel@tonic-gate 		 * If there is a current signal, then don't bother
11150Sstevel@tonic-gate 		 * even trying to send out the request because we
11160Sstevel@tonic-gate 		 * won't be able to block waiting for the response.
11170Sstevel@tonic-gate 		 * Simply assume RPC_INTR and get on with it.
11180Sstevel@tonic-gate 		 */
11190Sstevel@tonic-gate 		if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING))
11200Sstevel@tonic-gate 			status = RPC_INTR;
11210Sstevel@tonic-gate 		else {
11220Sstevel@tonic-gate 			status = CLNT_CALL(client, which, xdrargs, argsp,
11230Sstevel@tonic-gate 			    xdrres, resp, wait);
11240Sstevel@tonic-gate 		}
11250Sstevel@tonic-gate 
11260Sstevel@tonic-gate 		if (!(mi->mi_flags & MI_INT))
11270Sstevel@tonic-gate 			client->cl_nosignal = FALSE;
11280Sstevel@tonic-gate 		/*
11290Sstevel@tonic-gate 		 * restore original signal mask
11300Sstevel@tonic-gate 		 */
11310Sstevel@tonic-gate 		sigunintr(&smask);
11320Sstevel@tonic-gate 
11330Sstevel@tonic-gate 		switch (status) {
11340Sstevel@tonic-gate 		case RPC_SUCCESS:
11350Sstevel@tonic-gate 			if ((mi->mi_flags & MI_DYNAMIC) &&
11360Sstevel@tonic-gate 			    mi->mi_timer_type[which] != 0 &&
11370Sstevel@tonic-gate 			    (mi->mi_curread != my_rsize ||
11380Sstevel@tonic-gate 			    mi->mi_curwrite != my_wsize))
11390Sstevel@tonic-gate 				(void) nfs_feedback(FEEDBACK_OK, which, mi);
11400Sstevel@tonic-gate 			break;
11410Sstevel@tonic-gate 
11420Sstevel@tonic-gate 		case RPC_INTR:
11430Sstevel@tonic-gate 			/*
11440Sstevel@tonic-gate 			 * There is no way to recover from this error,
11450Sstevel@tonic-gate 			 * even if mount option nointr is specified.
11460Sstevel@tonic-gate 			 * SIGKILL, for example, cannot be blocked.
11470Sstevel@tonic-gate 			 */
11480Sstevel@tonic-gate 			rpcerr.re_status = RPC_INTR;
11490Sstevel@tonic-gate 			rpcerr.re_errno = EINTR;
11500Sstevel@tonic-gate 			break;
11510Sstevel@tonic-gate 
11520Sstevel@tonic-gate 		case RPC_UDERROR:
11530Sstevel@tonic-gate 			/*
11540Sstevel@tonic-gate 			 * If the NFS server is local (vold) and
11550Sstevel@tonic-gate 			 * it goes away then we get RPC_UDERROR.
11560Sstevel@tonic-gate 			 * This is a retryable error, so we would
11570Sstevel@tonic-gate 			 * loop, so check to see if the specific
11580Sstevel@tonic-gate 			 * error was ECONNRESET, indicating that
11590Sstevel@tonic-gate 			 * target did not exist at all.  If so,
11600Sstevel@tonic-gate 			 * return with RPC_PROGUNAVAIL and
11610Sstevel@tonic-gate 			 * ECONNRESET to indicate why.
11620Sstevel@tonic-gate 			 */
11630Sstevel@tonic-gate 			CLNT_GETERR(client, &rpcerr);
11640Sstevel@tonic-gate 			if (rpcerr.re_errno == ECONNRESET) {
11650Sstevel@tonic-gate 				rpcerr.re_status = RPC_PROGUNAVAIL;
11660Sstevel@tonic-gate 				rpcerr.re_errno = ECONNRESET;
11670Sstevel@tonic-gate 				break;
11680Sstevel@tonic-gate 			}
11690Sstevel@tonic-gate 			/*FALLTHROUGH*/
11700Sstevel@tonic-gate 
11710Sstevel@tonic-gate 		default:		/* probably RPC_TIMEDOUT */
11720Sstevel@tonic-gate 			if (IS_UNRECOVERABLE_RPC(status))
11730Sstevel@tonic-gate 				break;
11740Sstevel@tonic-gate 
11750Sstevel@tonic-gate 			/*
11760Sstevel@tonic-gate 			 * increment server not responding count
11770Sstevel@tonic-gate 			 */
11780Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
11790Sstevel@tonic-gate 			mi->mi_noresponse++;
11800Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
11810Sstevel@tonic-gate #ifdef DEBUG
11820Sstevel@tonic-gate 			nfscl->nfscl_stat.noresponse.value.ui64++;
11830Sstevel@tonic-gate #endif
11840Sstevel@tonic-gate 
11850Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_HARD)) {
11860Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_SEMISOFT) ||
11870Sstevel@tonic-gate 				    (mi->mi_ss_call_type[which] == 0))
11880Sstevel@tonic-gate 					break;
11890Sstevel@tonic-gate 			}
11900Sstevel@tonic-gate 
11910Sstevel@tonic-gate 			/*
11920Sstevel@tonic-gate 			 * The call is in progress (over COTS).
11930Sstevel@tonic-gate 			 * Try the CLNT_CALL again, but don't
11940Sstevel@tonic-gate 			 * print a noisy error message.
11950Sstevel@tonic-gate 			 */
11960Sstevel@tonic-gate 			if (status == RPC_INPROGRESS) {
11970Sstevel@tonic-gate 				tryagain = TRUE;
11980Sstevel@tonic-gate 				break;
11990Sstevel@tonic-gate 			}
12000Sstevel@tonic-gate 
12010Sstevel@tonic-gate 			if (flags & RFSCALL_SOFT)
12020Sstevel@tonic-gate 				break;
12030Sstevel@tonic-gate 
12040Sstevel@tonic-gate 			/*
12050Sstevel@tonic-gate 			 * On zone shutdown, just move on.
12060Sstevel@tonic-gate 			 */
12070Sstevel@tonic-gate 			if (zone_status_get(curproc->p_zone) >=
12080Sstevel@tonic-gate 			    ZONE_IS_SHUTTING_DOWN) {
12090Sstevel@tonic-gate 				rpcerr.re_status = RPC_FAILED;
12100Sstevel@tonic-gate 				rpcerr.re_errno = EIO;
12110Sstevel@tonic-gate 				break;
12120Sstevel@tonic-gate 			}
12130Sstevel@tonic-gate 
12140Sstevel@tonic-gate 			/*
12150Sstevel@tonic-gate 			 * NFS client failover support
12160Sstevel@tonic-gate 			 *
12170Sstevel@tonic-gate 			 * If the current server just failed us, we'll
12180Sstevel@tonic-gate 			 * start the process of finding a new server.
12190Sstevel@tonic-gate 			 * After that, we can just retry.
12200Sstevel@tonic-gate 			 */
12210Sstevel@tonic-gate 			if (FAILOVER_MOUNT(mi) && failover_safe(fi)) {
12220Sstevel@tonic-gate 				if (svp == mi->mi_curr_serv)
12230Sstevel@tonic-gate 					failover_newserver(mi);
12240Sstevel@tonic-gate 				clfree_impl(client, ch, nfscl);
12250Sstevel@tonic-gate 				goto failoverretry;
12260Sstevel@tonic-gate 			}
12270Sstevel@tonic-gate 
12280Sstevel@tonic-gate 			tryagain = TRUE;
12290Sstevel@tonic-gate 			timeo = backoff(timeo);
12309675Sdai.ngo@sun.com 
12319675Sdai.ngo@sun.com 			CLNT_GETERR(client, &rpcerr_tmp);
12329675Sdai.ngo@sun.com 			if ((status == RPC_CANTSEND) &&
12339675Sdai.ngo@sun.com 			    (rpcerr_tmp.re_errno == ENOBUFS))
12349675Sdai.ngo@sun.com 				msg = SRV_QFULL_MSG;
12359675Sdai.ngo@sun.com 			else
12369675Sdai.ngo@sun.com 				msg = SRV_NOTRESP_MSG;
12379675Sdai.ngo@sun.com 
12380Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
12390Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_PRINTED)) {
12400Sstevel@tonic-gate 				mi->mi_flags |= MI_PRINTED;
12410Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
12420Sstevel@tonic-gate #ifdef DEBUG
12439675Sdai.ngo@sun.com 				zprintf(zoneid, msg, mi->mi_vers,
12449675Sdai.ngo@sun.com 				    svp->sv_hostname);
12450Sstevel@tonic-gate #else
12469675Sdai.ngo@sun.com 				zprintf(zoneid, msg, svp->sv_hostname);
12470Sstevel@tonic-gate #endif
12480Sstevel@tonic-gate 			} else
12490Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
12502712Snn35248 			if (*douprintf && nfs_has_ctty()) {
12510Sstevel@tonic-gate 				*douprintf = 0;
12520Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_NOPRINT))
12530Sstevel@tonic-gate #ifdef DEBUG
12549675Sdai.ngo@sun.com 					uprintf(msg, mi->mi_vers,
12559675Sdai.ngo@sun.com 					    svp->sv_hostname);
12560Sstevel@tonic-gate #else
12579675Sdai.ngo@sun.com 					uprintf(msg, svp->sv_hostname);
12580Sstevel@tonic-gate #endif
12590Sstevel@tonic-gate 			}
12600Sstevel@tonic-gate 
12610Sstevel@tonic-gate 			/*
12620Sstevel@tonic-gate 			 * If doing dynamic adjustment of transfer
12630Sstevel@tonic-gate 			 * size and if it's a read or write call
12640Sstevel@tonic-gate 			 * and if the transfer size changed while
12650Sstevel@tonic-gate 			 * retransmitting or if the feedback routine
12660Sstevel@tonic-gate 			 * changed the transfer size,
12670Sstevel@tonic-gate 			 * then exit rfscall so that the transfer
12680Sstevel@tonic-gate 			 * size can be adjusted at the vnops level.
12690Sstevel@tonic-gate 			 */
12700Sstevel@tonic-gate 			if ((mi->mi_flags & MI_DYNAMIC) &&
12710Sstevel@tonic-gate 			    mi->mi_timer_type[which] != 0 &&
12720Sstevel@tonic-gate 			    (mi->mi_curread != my_rsize ||
12730Sstevel@tonic-gate 			    mi->mi_curwrite != my_wsize ||
12740Sstevel@tonic-gate 			    nfs_feedback(FEEDBACK_REXMIT1, which, mi))) {
12750Sstevel@tonic-gate 				/*
12760Sstevel@tonic-gate 				 * On read or write calls, return
12770Sstevel@tonic-gate 				 * back to the vnode ops level if
12780Sstevel@tonic-gate 				 * the transfer size changed.
12790Sstevel@tonic-gate 				 */
12800Sstevel@tonic-gate 				clfree_impl(client, ch, nfscl);
12811676Sjpk 				if (cred_cloned)
12821676Sjpk 					crfree(cr);
12830Sstevel@tonic-gate 				return (ENFS_TRYAGAIN);
12840Sstevel@tonic-gate 			}
12850Sstevel@tonic-gate 		}
12860Sstevel@tonic-gate 	} while (tryagain);
12870Sstevel@tonic-gate 
12880Sstevel@tonic-gate 	if (status != RPC_SUCCESS) {
12890Sstevel@tonic-gate 		/*
12900Sstevel@tonic-gate 		 * Let soft mounts use the timed out message.
12910Sstevel@tonic-gate 		 */
12920Sstevel@tonic-gate 		if (status == RPC_INPROGRESS)
12930Sstevel@tonic-gate 			status = RPC_TIMEDOUT;
12940Sstevel@tonic-gate 		nfscl->nfscl_stat.badcalls.value.ui64++;
12950Sstevel@tonic-gate 		if (status != RPC_INTR) {
12960Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
12970Sstevel@tonic-gate 			mi->mi_flags |= MI_DOWN;
12980Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
12990Sstevel@tonic-gate 			CLNT_GETERR(client, &rpcerr);
13000Sstevel@tonic-gate #ifdef DEBUG
13010Sstevel@tonic-gate 			bufp = clnt_sperror(client, svp->sv_hostname);
13020Sstevel@tonic-gate 			zprintf(zoneid, "NFS%d %s failed for %s\n",
13030Sstevel@tonic-gate 			    mi->mi_vers, mi->mi_rfsnames[which], bufp);
13042712Snn35248 			if (nfs_has_ctty()) {
13050Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_NOPRINT)) {
13060Sstevel@tonic-gate 					uprintf("NFS%d %s failed for %s\n",
13070Sstevel@tonic-gate 					    mi->mi_vers, mi->mi_rfsnames[which],
13080Sstevel@tonic-gate 					    bufp);
13090Sstevel@tonic-gate 				}
13100Sstevel@tonic-gate 			}
13110Sstevel@tonic-gate 			kmem_free(bufp, MAXPATHLEN);
13120Sstevel@tonic-gate #else
13130Sstevel@tonic-gate 			zprintf(zoneid,
13140Sstevel@tonic-gate 			    "NFS %s failed for server %s: error %d (%s)\n",
13150Sstevel@tonic-gate 			    mi->mi_rfsnames[which], svp->sv_hostname,
13160Sstevel@tonic-gate 			    status, clnt_sperrno(status));
13172712Snn35248 			if (nfs_has_ctty()) {
13180Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_NOPRINT)) {
13190Sstevel@tonic-gate 					uprintf(
13200Sstevel@tonic-gate 				"NFS %s failed for server %s: error %d (%s)\n",
13210Sstevel@tonic-gate 					    mi->mi_rfsnames[which],
13220Sstevel@tonic-gate 					    svp->sv_hostname, status,
13230Sstevel@tonic-gate 					    clnt_sperrno(status));
13240Sstevel@tonic-gate 				}
13250Sstevel@tonic-gate 			}
13260Sstevel@tonic-gate #endif
13270Sstevel@tonic-gate 			/*
13280Sstevel@tonic-gate 			 * when CLNT_CALL() fails with RPC_AUTHERROR,
13290Sstevel@tonic-gate 			 * re_errno is set appropriately depending on
13300Sstevel@tonic-gate 			 * the authentication error
13310Sstevel@tonic-gate 			 */
13320Sstevel@tonic-gate 			if (status == RPC_VERSMISMATCH ||
13330Sstevel@tonic-gate 			    status == RPC_PROGVERSMISMATCH)
13340Sstevel@tonic-gate 				rpcerr.re_errno = EIO;
13350Sstevel@tonic-gate 		}
13360Sstevel@tonic-gate 	} else {
13370Sstevel@tonic-gate 		/*
13380Sstevel@tonic-gate 		 * Test the value of mi_down and mi_printed without
13390Sstevel@tonic-gate 		 * holding the mi_lock mutex.  If they are both zero,
13400Sstevel@tonic-gate 		 * then it is okay to skip the down and printed
13410Sstevel@tonic-gate 		 * processing.  This saves on a mutex_enter and
13420Sstevel@tonic-gate 		 * mutex_exit pair for a normal, successful RPC.
13430Sstevel@tonic-gate 		 * This was just complete overhead.
13440Sstevel@tonic-gate 		 */
13450Sstevel@tonic-gate 		if (mi->mi_flags & (MI_DOWN | MI_PRINTED)) {
13460Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
13470Sstevel@tonic-gate 			mi->mi_flags &= ~MI_DOWN;
13480Sstevel@tonic-gate 			if (mi->mi_flags & MI_PRINTED) {
13490Sstevel@tonic-gate 				mi->mi_flags &= ~MI_PRINTED;
13500Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
13510Sstevel@tonic-gate #ifdef DEBUG
13520Sstevel@tonic-gate 			if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
13530Sstevel@tonic-gate 				zprintf(zoneid, "NFS%d server %s ok\n",
13540Sstevel@tonic-gate 				    mi->mi_vers, svp->sv_hostname);
13550Sstevel@tonic-gate #else
13560Sstevel@tonic-gate 			if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
13570Sstevel@tonic-gate 				zprintf(zoneid, "NFS server %s ok\n",
13580Sstevel@tonic-gate 				    svp->sv_hostname);
13590Sstevel@tonic-gate #endif
13600Sstevel@tonic-gate 			} else
13610Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
13620Sstevel@tonic-gate 		}
13630Sstevel@tonic-gate 
13640Sstevel@tonic-gate 		if (*douprintf == 0) {
13650Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_NOPRINT))
13660Sstevel@tonic-gate #ifdef DEBUG
13670Sstevel@tonic-gate 				if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
13680Sstevel@tonic-gate 					uprintf("NFS%d server %s ok\n",
13690Sstevel@tonic-gate 					    mi->mi_vers, svp->sv_hostname);
13700Sstevel@tonic-gate #else
13710Sstevel@tonic-gate 			if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
13720Sstevel@tonic-gate 				uprintf("NFS server %s ok\n", svp->sv_hostname);
13730Sstevel@tonic-gate #endif
13740Sstevel@tonic-gate 			*douprintf = 1;
13750Sstevel@tonic-gate 		}
13760Sstevel@tonic-gate 	}
13770Sstevel@tonic-gate 
13780Sstevel@tonic-gate 	clfree_impl(client, ch, nfscl);
13791676Sjpk 	if (cred_cloned)
13801676Sjpk 		crfree(cr);
13810Sstevel@tonic-gate 
13820Sstevel@tonic-gate 	ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0);
13830Sstevel@tonic-gate 
13840Sstevel@tonic-gate 	if (rpc_status != NULL)
13850Sstevel@tonic-gate 		*rpc_status = rpcerr.re_status;
13860Sstevel@tonic-gate 
13870Sstevel@tonic-gate 	TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "rfscall_end:errno %d",
13880Sstevel@tonic-gate 	    rpcerr.re_errno);
13890Sstevel@tonic-gate 
13900Sstevel@tonic-gate 	return (rpcerr.re_errno);
13910Sstevel@tonic-gate }
13920Sstevel@tonic-gate 
13930Sstevel@tonic-gate #ifdef DEBUG
13940Sstevel@tonic-gate static int acl2call_hits = 0;
13950Sstevel@tonic-gate static int acl2call_misses = 0;
13960Sstevel@tonic-gate #endif
13970Sstevel@tonic-gate 
13980Sstevel@tonic-gate int
acl2call(mntinfo_t * mi,rpcproc_t which,xdrproc_t xdrargs,caddr_t argsp,xdrproc_t xdrres,caddr_t resp,cred_t * cr,int * douprintf,enum nfsstat * statusp,int flags,failinfo_t * fi)13990Sstevel@tonic-gate acl2call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
14000Sstevel@tonic-gate     xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
14010Sstevel@tonic-gate     enum nfsstat *statusp, int flags, failinfo_t *fi)
14020Sstevel@tonic-gate {
14030Sstevel@tonic-gate 	int rpcerror;
14040Sstevel@tonic-gate 
14050Sstevel@tonic-gate 	rpcerror = aclcall(mi, which, xdrargs, argsp, xdrres, resp,
14060Sstevel@tonic-gate 	    cr, douprintf, flags, fi);
14070Sstevel@tonic-gate 	if (!rpcerror) {
14080Sstevel@tonic-gate 		/*
14090Sstevel@tonic-gate 		 * See comments with crnetadjust().
14100Sstevel@tonic-gate 		 */
14110Sstevel@tonic-gate 		if (*statusp == NFSERR_ACCES &&
14120Sstevel@tonic-gate 		    (cr = crnetadjust(cr)) != NULL) {
14130Sstevel@tonic-gate #ifdef DEBUG
14140Sstevel@tonic-gate 			acl2call_hits++;
14150Sstevel@tonic-gate #endif
14160Sstevel@tonic-gate 			rpcerror = aclcall(mi, which, xdrargs, argsp, xdrres,
14170Sstevel@tonic-gate 			    resp, cr, douprintf, flags, fi);
14180Sstevel@tonic-gate 			crfree(cr);
14190Sstevel@tonic-gate #ifdef DEBUG
14200Sstevel@tonic-gate 			if (*statusp == NFSERR_ACCES)
14210Sstevel@tonic-gate 				acl2call_misses++;
14220Sstevel@tonic-gate #endif
14230Sstevel@tonic-gate 		}
14240Sstevel@tonic-gate 	}
14250Sstevel@tonic-gate 
14260Sstevel@tonic-gate 	return (rpcerror);
14270Sstevel@tonic-gate }
14280Sstevel@tonic-gate 
14290Sstevel@tonic-gate #ifdef DEBUG
14300Sstevel@tonic-gate static int acl3call_hits = 0;
14310Sstevel@tonic-gate static int acl3call_misses = 0;
14320Sstevel@tonic-gate #endif
14330Sstevel@tonic-gate 
14340Sstevel@tonic-gate int
acl3call(mntinfo_t * mi,rpcproc_t which,xdrproc_t xdrargs,caddr_t argsp,xdrproc_t xdrres,caddr_t resp,cred_t * cr,int * douprintf,nfsstat3 * statusp,int flags,failinfo_t * fi)14350Sstevel@tonic-gate acl3call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
14360Sstevel@tonic-gate     xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
14370Sstevel@tonic-gate     nfsstat3 *statusp, int flags, failinfo_t *fi)
14380Sstevel@tonic-gate {
14390Sstevel@tonic-gate 	int rpcerror;
14400Sstevel@tonic-gate 	int user_informed;
14410Sstevel@tonic-gate 
14420Sstevel@tonic-gate 	user_informed = 0;
14430Sstevel@tonic-gate 
14440Sstevel@tonic-gate 	do {
14450Sstevel@tonic-gate 		rpcerror = aclcall(mi, which, xdrargs, argsp, xdrres, resp,
14460Sstevel@tonic-gate 		    cr, douprintf, flags, fi);
14470Sstevel@tonic-gate 		if (!rpcerror) {
14480Sstevel@tonic-gate 			cred_t *crr;
14490Sstevel@tonic-gate 			if (*statusp == NFS3ERR_JUKEBOX) {
14500Sstevel@tonic-gate 				if (!user_informed) {
14510Sstevel@tonic-gate 					user_informed = 1;
14520Sstevel@tonic-gate 					uprintf(
14530Sstevel@tonic-gate 		"file temporarily unavailable on the server, retrying...\n");
14540Sstevel@tonic-gate 				}
14550Sstevel@tonic-gate 				delay(nfs3_jukebox_delay);
14560Sstevel@tonic-gate 			}
14570Sstevel@tonic-gate 			/*
14580Sstevel@tonic-gate 			 * See crnetadjust() for comments.
14590Sstevel@tonic-gate 			 */
14600Sstevel@tonic-gate 			else if (*statusp == NFS3ERR_ACCES &&
14610Sstevel@tonic-gate 			    (crr = crnetadjust(cr)) != NULL) {
14620Sstevel@tonic-gate #ifdef DEBUG
14630Sstevel@tonic-gate 				acl3call_hits++;
14640Sstevel@tonic-gate #endif
14650Sstevel@tonic-gate 				rpcerror = aclcall(mi, which, xdrargs, argsp,
14660Sstevel@tonic-gate 				    xdrres, resp, crr, douprintf, flags, fi);
14670Sstevel@tonic-gate 
14680Sstevel@tonic-gate 				crfree(crr);
14690Sstevel@tonic-gate #ifdef DEBUG
14700Sstevel@tonic-gate 				if (*statusp == NFS3ERR_ACCES)
14710Sstevel@tonic-gate 					acl3call_misses++;
14720Sstevel@tonic-gate #endif
14730Sstevel@tonic-gate 			}
14740Sstevel@tonic-gate 		}
14750Sstevel@tonic-gate 	} while (!rpcerror && *statusp == NFS3ERR_JUKEBOX);
14760Sstevel@tonic-gate 
14770Sstevel@tonic-gate 	return (rpcerror);
14780Sstevel@tonic-gate }
14790Sstevel@tonic-gate 
14800Sstevel@tonic-gate static int
aclcall(mntinfo_t * mi,rpcproc_t which,xdrproc_t xdrargs,caddr_t argsp,xdrproc_t xdrres,caddr_t resp,cred_t * icr,int * douprintf,int flags,failinfo_t * fi)14810Sstevel@tonic-gate aclcall(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
14821676Sjpk     xdrproc_t xdrres, caddr_t resp, cred_t *icr, int *douprintf,
14830Sstevel@tonic-gate     int flags, failinfo_t *fi)
14840Sstevel@tonic-gate {
14850Sstevel@tonic-gate 	CLIENT *client;
14860Sstevel@tonic-gate 	struct chtab *ch;
14871676Sjpk 	cred_t *cr = icr;
14881676Sjpk 	bool_t cred_cloned = FALSE;
14890Sstevel@tonic-gate 	enum clnt_stat status;
14900Sstevel@tonic-gate 	struct rpc_err rpcerr;
14910Sstevel@tonic-gate 	struct timeval wait;
14920Sstevel@tonic-gate 	int timeo;		/* in units of hz */
14930Sstevel@tonic-gate #if 0 /* notyet */
14940Sstevel@tonic-gate 	int my_rsize, my_wsize;
14950Sstevel@tonic-gate #endif
14960Sstevel@tonic-gate 	bool_t tryagain;
14970Sstevel@tonic-gate 	k_sigset_t smask;
14980Sstevel@tonic-gate 	servinfo_t *svp;
14990Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
15000Sstevel@tonic-gate 	zoneid_t zoneid = getzoneid();
15010Sstevel@tonic-gate #ifdef DEBUG
15020Sstevel@tonic-gate 	char *bufp;
15030Sstevel@tonic-gate #endif
15040Sstevel@tonic-gate 
15050Sstevel@tonic-gate #if 0 /* notyet */
15060Sstevel@tonic-gate 	TRACE_2(TR_FAC_NFS, TR_RFSCALL_START,
15074300Smarks 	    "rfscall_start:which %d mi %p", which, mi);
15080Sstevel@tonic-gate #endif
15090Sstevel@tonic-gate 
1510766Scarlsonj 	nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
15110Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
15120Sstevel@tonic-gate 
15130Sstevel@tonic-gate 	nfscl->nfscl_stat.calls.value.ui64++;
15140Sstevel@tonic-gate 	mi->mi_aclreqs[which].value.ui64++;
15150Sstevel@tonic-gate 
15160Sstevel@tonic-gate 	rpcerr.re_status = RPC_SUCCESS;
15170Sstevel@tonic-gate 
15180Sstevel@tonic-gate 	if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
15190Sstevel@tonic-gate 		rpcerr.re_status = RPC_FAILED;
15200Sstevel@tonic-gate 		rpcerr.re_errno = EIO;
15210Sstevel@tonic-gate 		return (rpcerr.re_errno);
15220Sstevel@tonic-gate 	}
15230Sstevel@tonic-gate 
15240Sstevel@tonic-gate #if 0 /* notyet */
15250Sstevel@tonic-gate 	/*
15260Sstevel@tonic-gate 	 * Remember the transfer sizes in case
15270Sstevel@tonic-gate 	 * nfs_feedback changes them underneath us.
15280Sstevel@tonic-gate 	 */
15290Sstevel@tonic-gate 	my_rsize = mi->mi_curread;
15300Sstevel@tonic-gate 	my_wsize = mi->mi_curwrite;
15310Sstevel@tonic-gate #endif
15320Sstevel@tonic-gate 
15330Sstevel@tonic-gate 	/*
15340Sstevel@tonic-gate 	 * NFS client failover support
15350Sstevel@tonic-gate 	 *
15360Sstevel@tonic-gate 	 * If this rnode is not in sync with the current server (VALID_FH),
15370Sstevel@tonic-gate 	 * we'd like to do a remap to get in sync.  We can be interrupted
15380Sstevel@tonic-gate 	 * in failover_remap(), and if so we'll bail.  Otherwise, we'll
15390Sstevel@tonic-gate 	 * use the best info we have to try the RPC.  Part of that is
15400Sstevel@tonic-gate 	 * unconditionally updating the filehandle copy kept for V3.
15410Sstevel@tonic-gate 	 *
15420Sstevel@tonic-gate 	 * Locking: INC_READERS/DEC_READERS is a poor man's interrruptible
15430Sstevel@tonic-gate 	 * rw_enter(); we're trying to keep the current server from being
15440Sstevel@tonic-gate 	 * changed on us until we're done with the remapping and have a
15450Sstevel@tonic-gate 	 * matching client handle.  We don't want to sending a filehandle
15460Sstevel@tonic-gate 	 * to the wrong host.
15470Sstevel@tonic-gate 	 */
15480Sstevel@tonic-gate failoverretry:
15490Sstevel@tonic-gate 	if (FAILOVER_MOUNT(mi)) {
15500Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
15510Sstevel@tonic-gate 		if (!(flags & RFSCALL_SOFT) && failover_safe(fi)) {
15520Sstevel@tonic-gate 			if (failover_wait(mi)) {
15530Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
15540Sstevel@tonic-gate 				return (EINTR);
15550Sstevel@tonic-gate 			}
15560Sstevel@tonic-gate 		}
15570Sstevel@tonic-gate 		INC_READERS(mi);
15580Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
15590Sstevel@tonic-gate 		if (fi) {
15600Sstevel@tonic-gate 			if (!VALID_FH(fi) &&
15610Sstevel@tonic-gate 			    !(flags & RFSCALL_SOFT) && failover_safe(fi)) {
15620Sstevel@tonic-gate 				int remaperr;
15630Sstevel@tonic-gate 
15640Sstevel@tonic-gate 				svp = mi->mi_curr_serv;
15650Sstevel@tonic-gate 				remaperr = failover_remap(fi);
15660Sstevel@tonic-gate 				if (remaperr != 0) {
15670Sstevel@tonic-gate #ifdef DEBUG
15680Sstevel@tonic-gate 					if (remaperr != EINTR)
15690Sstevel@tonic-gate 						nfs_cmn_err(remaperr, CE_WARN,
15700Sstevel@tonic-gate 					    "aclcall couldn't failover: %m");
15710Sstevel@tonic-gate #endif
15720Sstevel@tonic-gate 					mutex_enter(&mi->mi_lock);
15730Sstevel@tonic-gate 					DEC_READERS(mi);
15740Sstevel@tonic-gate 					mutex_exit(&mi->mi_lock);
15750Sstevel@tonic-gate 
15760Sstevel@tonic-gate 					/*
15770Sstevel@tonic-gate 					 * If failover_remap returns ETIMEDOUT
15780Sstevel@tonic-gate 					 * and the filesystem is hard mounted
15790Sstevel@tonic-gate 					 * we have to retry the call with a new
15800Sstevel@tonic-gate 					 * server.
15810Sstevel@tonic-gate 					 */
15820Sstevel@tonic-gate 					if ((mi->mi_flags & MI_HARD) &&
15830Sstevel@tonic-gate 					    IS_RECOVERABLE_ERROR(remaperr)) {
15840Sstevel@tonic-gate 						if (svp == mi->mi_curr_serv)
15850Sstevel@tonic-gate 							failover_newserver(mi);
15860Sstevel@tonic-gate 						rpcerr.re_status = RPC_SUCCESS;
15870Sstevel@tonic-gate 						goto failoverretry;
15880Sstevel@tonic-gate 					}
15890Sstevel@tonic-gate 					return (remaperr);
15900Sstevel@tonic-gate 				}
15910Sstevel@tonic-gate 			}
15920Sstevel@tonic-gate 			if (fi->fhp && fi->copyproc)
15930Sstevel@tonic-gate 				(*fi->copyproc)(fi->fhp, fi->vp);
15940Sstevel@tonic-gate 		}
15950Sstevel@tonic-gate 	}
15960Sstevel@tonic-gate 
15971676Sjpk 	/* For TSOL, use a new cred which has net_mac_aware flag */
15981676Sjpk 	if (!cred_cloned && is_system_labeled()) {
15991676Sjpk 		cred_cloned = TRUE;
16001676Sjpk 		cr = crdup(icr);
16011676Sjpk 		(void) setpflags(NET_MAC_AWARE, 1, cr);
16021676Sjpk 	}
16031676Sjpk 
16040Sstevel@tonic-gate 	/*
16050Sstevel@tonic-gate 	 * acl_clget() calls clnt_tli_kinit() which clears the xid, so we
16060Sstevel@tonic-gate 	 * are guaranteed to reprocess the retry as a new request.
16070Sstevel@tonic-gate 	 */
16080Sstevel@tonic-gate 	svp = mi->mi_curr_serv;
16090Sstevel@tonic-gate 	rpcerr.re_errno = acl_clget(mi, svp, cr, &client, &ch, nfscl);
16100Sstevel@tonic-gate 	if (FAILOVER_MOUNT(mi)) {
16110Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
16120Sstevel@tonic-gate 		DEC_READERS(mi);
16130Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
16140Sstevel@tonic-gate 
16150Sstevel@tonic-gate 		if ((rpcerr.re_errno == ETIMEDOUT ||
16164300Smarks 		    rpcerr.re_errno == ECONNRESET) &&
16174300Smarks 		    failover_safe(fi)) {
16180Sstevel@tonic-gate 			if (svp == mi->mi_curr_serv)
16190Sstevel@tonic-gate 				failover_newserver(mi);
16200Sstevel@tonic-gate 			goto failoverretry;
16210Sstevel@tonic-gate 		}
16220Sstevel@tonic-gate 	}
16231676Sjpk 	if (rpcerr.re_errno != 0) {
16241676Sjpk 		if (cred_cloned)
16251676Sjpk 			crfree(cr);
16260Sstevel@tonic-gate 		return (rpcerr.re_errno);
16271676Sjpk 	}
16280Sstevel@tonic-gate 
16290Sstevel@tonic-gate 	if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD ||
16300Sstevel@tonic-gate 	    svp->sv_knconf->knc_semantics == NC_TPI_COTS) {
16310Sstevel@tonic-gate 		timeo = (mi->mi_timeo * hz) / 10;
16320Sstevel@tonic-gate 	} else {
16330Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
16340Sstevel@tonic-gate 		timeo = CLNT_SETTIMERS(client,
16350Sstevel@tonic-gate 		    &(mi->mi_timers[mi->mi_acl_timer_type[which]]),
16360Sstevel@tonic-gate 		    &(mi->mi_timers[NFS_CALLTYPES]),
16370Sstevel@tonic-gate 		    (minimum_timeo[mi->mi_acl_call_type[which]]*hz)>>3,
16380Sstevel@tonic-gate 		    (void (*)()) 0, (caddr_t)mi, 0);
16390Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
16400Sstevel@tonic-gate 	}
16410Sstevel@tonic-gate 
16420Sstevel@tonic-gate 	/*
16430Sstevel@tonic-gate 	 * If hard mounted fs, retry call forever unless hard error occurs.
16440Sstevel@tonic-gate 	 */
16450Sstevel@tonic-gate 	do {
16460Sstevel@tonic-gate 		tryagain = FALSE;
16470Sstevel@tonic-gate 
16480Sstevel@tonic-gate 		if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
16490Sstevel@tonic-gate 			status = RPC_FAILED;
16500Sstevel@tonic-gate 			rpcerr.re_status = RPC_FAILED;
16510Sstevel@tonic-gate 			rpcerr.re_errno = EIO;
16520Sstevel@tonic-gate 			break;
16530Sstevel@tonic-gate 		}
16540Sstevel@tonic-gate 
16550Sstevel@tonic-gate 		TICK_TO_TIMEVAL(timeo, &wait);
16560Sstevel@tonic-gate 
16570Sstevel@tonic-gate 		/*
16580Sstevel@tonic-gate 		 * Mask out all signals except SIGHUP, SIGINT, SIGQUIT
16590Sstevel@tonic-gate 		 * and SIGTERM. (Preserving the existing masks).
16600Sstevel@tonic-gate 		 * Mask out SIGINT if mount option nointr is specified.
16610Sstevel@tonic-gate 		 */
16620Sstevel@tonic-gate 		sigintr(&smask, (int)mi->mi_flags & MI_INT);
16630Sstevel@tonic-gate 		if (!(mi->mi_flags & MI_INT))
16640Sstevel@tonic-gate 			client->cl_nosignal = TRUE;
16650Sstevel@tonic-gate 
16660Sstevel@tonic-gate 		/*
16670Sstevel@tonic-gate 		 * If there is a current signal, then don't bother
16680Sstevel@tonic-gate 		 * even trying to send out the request because we
16690Sstevel@tonic-gate 		 * won't be able to block waiting for the response.
16700Sstevel@tonic-gate 		 * Simply assume RPC_INTR and get on with it.
16710Sstevel@tonic-gate 		 */
16720Sstevel@tonic-gate 		if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING))
16730Sstevel@tonic-gate 			status = RPC_INTR;
16740Sstevel@tonic-gate 		else {
16750Sstevel@tonic-gate 			status = CLNT_CALL(client, which, xdrargs, argsp,
16760Sstevel@tonic-gate 			    xdrres, resp, wait);
16770Sstevel@tonic-gate 		}
16780Sstevel@tonic-gate 
16790Sstevel@tonic-gate 		if (!(mi->mi_flags & MI_INT))
16800Sstevel@tonic-gate 			client->cl_nosignal = FALSE;
16810Sstevel@tonic-gate 		/*
16820Sstevel@tonic-gate 		 * restore original signal mask
16830Sstevel@tonic-gate 		 */
16840Sstevel@tonic-gate 		sigunintr(&smask);
16850Sstevel@tonic-gate 
16860Sstevel@tonic-gate 		switch (status) {
16870Sstevel@tonic-gate 		case RPC_SUCCESS:
16880Sstevel@tonic-gate #if 0 /* notyet */
16890Sstevel@tonic-gate 			if ((mi->mi_flags & MI_DYNAMIC) &&
16900Sstevel@tonic-gate 			    mi->mi_timer_type[which] != 0 &&
16910Sstevel@tonic-gate 			    (mi->mi_curread != my_rsize ||
16920Sstevel@tonic-gate 			    mi->mi_curwrite != my_wsize))
16930Sstevel@tonic-gate 				(void) nfs_feedback(FEEDBACK_OK, which, mi);
16940Sstevel@tonic-gate #endif
16950Sstevel@tonic-gate 			break;
16960Sstevel@tonic-gate 
16970Sstevel@tonic-gate 		/*
16980Sstevel@tonic-gate 		 * Unfortunately, there are servers in the world which
16990Sstevel@tonic-gate 		 * are not coded correctly.  They are not prepared to
17000Sstevel@tonic-gate 		 * handle RPC requests to the NFS port which are not
17010Sstevel@tonic-gate 		 * NFS requests.  Thus, they may try to process the
17020Sstevel@tonic-gate 		 * NFS_ACL request as if it were an NFS request.  This
17030Sstevel@tonic-gate 		 * does not work.  Generally, an error will be generated
17040Sstevel@tonic-gate 		 * on the client because it will not be able to decode
17050Sstevel@tonic-gate 		 * the response from the server.  However, it seems
17060Sstevel@tonic-gate 		 * possible that the server may not be able to decode
17070Sstevel@tonic-gate 		 * the arguments.  Thus, the criteria for deciding
17080Sstevel@tonic-gate 		 * whether the server supports NFS_ACL or not is whether
17090Sstevel@tonic-gate 		 * the following RPC errors are returned from CLNT_CALL.
17100Sstevel@tonic-gate 		 */
17110Sstevel@tonic-gate 		case RPC_CANTDECODERES:
17120Sstevel@tonic-gate 		case RPC_PROGUNAVAIL:
17130Sstevel@tonic-gate 		case RPC_CANTDECODEARGS:
17140Sstevel@tonic-gate 		case RPC_PROGVERSMISMATCH:
17150Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
17160Sstevel@tonic-gate 			mi->mi_flags &= ~(MI_ACL | MI_EXTATTR);
17170Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
17180Sstevel@tonic-gate 			break;
17190Sstevel@tonic-gate 
17200Sstevel@tonic-gate 		/*
17210Sstevel@tonic-gate 		 * If the server supports NFS_ACL but not the new ops
17220Sstevel@tonic-gate 		 * for extended attributes, make sure we don't retry.
17230Sstevel@tonic-gate 		 */
17240Sstevel@tonic-gate 		case RPC_PROCUNAVAIL:
17250Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
17260Sstevel@tonic-gate 			mi->mi_flags &= ~MI_EXTATTR;
17270Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
17280Sstevel@tonic-gate 			break;
17290Sstevel@tonic-gate 
17300Sstevel@tonic-gate 		case RPC_INTR:
17310Sstevel@tonic-gate 			/*
17320Sstevel@tonic-gate 			 * There is no way to recover from this error,
17330Sstevel@tonic-gate 			 * even if mount option nointr is specified.
17340Sstevel@tonic-gate 			 * SIGKILL, for example, cannot be blocked.
17350Sstevel@tonic-gate 			 */
17360Sstevel@tonic-gate 			rpcerr.re_status = RPC_INTR;
17370Sstevel@tonic-gate 			rpcerr.re_errno = EINTR;
17380Sstevel@tonic-gate 			break;
17390Sstevel@tonic-gate 
17400Sstevel@tonic-gate 		case RPC_UDERROR:
17410Sstevel@tonic-gate 			/*
17420Sstevel@tonic-gate 			 * If the NFS server is local (vold) and
17430Sstevel@tonic-gate 			 * it goes away then we get RPC_UDERROR.
17440Sstevel@tonic-gate 			 * This is a retryable error, so we would
17450Sstevel@tonic-gate 			 * loop, so check to see if the specific
17460Sstevel@tonic-gate 			 * error was ECONNRESET, indicating that
17470Sstevel@tonic-gate 			 * target did not exist at all.  If so,
17480Sstevel@tonic-gate 			 * return with RPC_PROGUNAVAIL and
17490Sstevel@tonic-gate 			 * ECONNRESET to indicate why.
17500Sstevel@tonic-gate 			 */
17510Sstevel@tonic-gate 			CLNT_GETERR(client, &rpcerr);
17520Sstevel@tonic-gate 			if (rpcerr.re_errno == ECONNRESET) {
17530Sstevel@tonic-gate 				rpcerr.re_status = RPC_PROGUNAVAIL;
17540Sstevel@tonic-gate 				rpcerr.re_errno = ECONNRESET;
17550Sstevel@tonic-gate 				break;
17560Sstevel@tonic-gate 			}
17570Sstevel@tonic-gate 			/*FALLTHROUGH*/
17580Sstevel@tonic-gate 
17590Sstevel@tonic-gate 		default:		/* probably RPC_TIMEDOUT */
17600Sstevel@tonic-gate 			if (IS_UNRECOVERABLE_RPC(status))
17610Sstevel@tonic-gate 				break;
17620Sstevel@tonic-gate 
17630Sstevel@tonic-gate 			/*
17640Sstevel@tonic-gate 			 * increment server not responding count
17650Sstevel@tonic-gate 			 */
17660Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
17670Sstevel@tonic-gate 			mi->mi_noresponse++;
17680Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
17690Sstevel@tonic-gate #ifdef DEBUG
17700Sstevel@tonic-gate 			nfscl->nfscl_stat.noresponse.value.ui64++;
17710Sstevel@tonic-gate #endif
17720Sstevel@tonic-gate 
17730Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_HARD)) {
17740Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_SEMISOFT) ||
17750Sstevel@tonic-gate 				    (mi->mi_acl_ss_call_type[which] == 0))
17760Sstevel@tonic-gate 					break;
17770Sstevel@tonic-gate 			}
17780Sstevel@tonic-gate 
17790Sstevel@tonic-gate 			/*
17800Sstevel@tonic-gate 			 * The call is in progress (over COTS).
17810Sstevel@tonic-gate 			 * Try the CLNT_CALL again, but don't
17820Sstevel@tonic-gate 			 * print a noisy error message.
17830Sstevel@tonic-gate 			 */
17840Sstevel@tonic-gate 			if (status == RPC_INPROGRESS) {
17850Sstevel@tonic-gate 				tryagain = TRUE;
17860Sstevel@tonic-gate 				break;
17870Sstevel@tonic-gate 			}
17880Sstevel@tonic-gate 
17890Sstevel@tonic-gate 			if (flags & RFSCALL_SOFT)
17900Sstevel@tonic-gate 				break;
17910Sstevel@tonic-gate 
17920Sstevel@tonic-gate 			/*
17930Sstevel@tonic-gate 			 * On zone shutdown, just move on.
17940Sstevel@tonic-gate 			 */
17950Sstevel@tonic-gate 			if (zone_status_get(curproc->p_zone) >=
17960Sstevel@tonic-gate 			    ZONE_IS_SHUTTING_DOWN) {
17970Sstevel@tonic-gate 				rpcerr.re_status = RPC_FAILED;
17980Sstevel@tonic-gate 				rpcerr.re_errno = EIO;
17990Sstevel@tonic-gate 				break;
18000Sstevel@tonic-gate 			}
18010Sstevel@tonic-gate 
18020Sstevel@tonic-gate 			/*
18030Sstevel@tonic-gate 			 * NFS client failover support
18040Sstevel@tonic-gate 			 *
18050Sstevel@tonic-gate 			 * If the current server just failed us, we'll
18060Sstevel@tonic-gate 			 * start the process of finding a new server.
18070Sstevel@tonic-gate 			 * After that, we can just retry.
18080Sstevel@tonic-gate 			 */
18090Sstevel@tonic-gate 			if (FAILOVER_MOUNT(mi) && failover_safe(fi)) {
18100Sstevel@tonic-gate 				if (svp == mi->mi_curr_serv)
18110Sstevel@tonic-gate 					failover_newserver(mi);
18120Sstevel@tonic-gate 				clfree_impl(client, ch, nfscl);
18130Sstevel@tonic-gate 				goto failoverretry;
18140Sstevel@tonic-gate 			}
18150Sstevel@tonic-gate 
18160Sstevel@tonic-gate 			tryagain = TRUE;
18170Sstevel@tonic-gate 			timeo = backoff(timeo);
18180Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
18190Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_PRINTED)) {
18200Sstevel@tonic-gate 				mi->mi_flags |= MI_PRINTED;
18210Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
18220Sstevel@tonic-gate #ifdef DEBUG
18230Sstevel@tonic-gate 				zprintf(zoneid,
18240Sstevel@tonic-gate 			"NFS_ACL%d server %s not responding still trying\n",
18250Sstevel@tonic-gate 				    mi->mi_vers, svp->sv_hostname);
18260Sstevel@tonic-gate #else
18270Sstevel@tonic-gate 				zprintf(zoneid,
18280Sstevel@tonic-gate 			    "NFS server %s not responding still trying\n",
18290Sstevel@tonic-gate 				    svp->sv_hostname);
18300Sstevel@tonic-gate #endif
18310Sstevel@tonic-gate 			} else
18320Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
18332712Snn35248 			if (*douprintf && nfs_has_ctty()) {
18340Sstevel@tonic-gate 				*douprintf = 0;
18350Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_NOPRINT))
18360Sstevel@tonic-gate #ifdef DEBUG
18370Sstevel@tonic-gate 					uprintf(
18380Sstevel@tonic-gate 			"NFS_ACL%d server %s not responding still trying\n",
18390Sstevel@tonic-gate 					    mi->mi_vers, svp->sv_hostname);
18400Sstevel@tonic-gate #else
18410Sstevel@tonic-gate 					uprintf(
18420Sstevel@tonic-gate 			    "NFS server %s not responding still trying\n",
18430Sstevel@tonic-gate 					    svp->sv_hostname);
18440Sstevel@tonic-gate #endif
18450Sstevel@tonic-gate 			}
18460Sstevel@tonic-gate 
18470Sstevel@tonic-gate #if 0 /* notyet */
18480Sstevel@tonic-gate 			/*
18490Sstevel@tonic-gate 			 * If doing dynamic adjustment of transfer
18500Sstevel@tonic-gate 			 * size and if it's a read or write call
18510Sstevel@tonic-gate 			 * and if the transfer size changed while
18520Sstevel@tonic-gate 			 * retransmitting or if the feedback routine
18530Sstevel@tonic-gate 			 * changed the transfer size,
18540Sstevel@tonic-gate 			 * then exit rfscall so that the transfer
18550Sstevel@tonic-gate 			 * size can be adjusted at the vnops level.
18560Sstevel@tonic-gate 			 */
18570Sstevel@tonic-gate 			if ((mi->mi_flags & MI_DYNAMIC) &&
18580Sstevel@tonic-gate 			    mi->mi_acl_timer_type[which] != 0 &&
18590Sstevel@tonic-gate 			    (mi->mi_curread != my_rsize ||
18600Sstevel@tonic-gate 			    mi->mi_curwrite != my_wsize ||
18610Sstevel@tonic-gate 			    nfs_feedback(FEEDBACK_REXMIT1, which, mi))) {
18620Sstevel@tonic-gate 				/*
18630Sstevel@tonic-gate 				 * On read or write calls, return
18640Sstevel@tonic-gate 				 * back to the vnode ops level if
18650Sstevel@tonic-gate 				 * the transfer size changed.
18660Sstevel@tonic-gate 				 */
18670Sstevel@tonic-gate 				clfree_impl(client, ch, nfscl);
18681676Sjpk 				if (cred_cloned)
18691676Sjpk 					crfree(cr);
18700Sstevel@tonic-gate 				return (ENFS_TRYAGAIN);
18710Sstevel@tonic-gate 			}
18720Sstevel@tonic-gate #endif
18730Sstevel@tonic-gate 		}
18740Sstevel@tonic-gate 	} while (tryagain);
18750Sstevel@tonic-gate 
18760Sstevel@tonic-gate 	if (status != RPC_SUCCESS) {
18770Sstevel@tonic-gate 		/*
18780Sstevel@tonic-gate 		 * Let soft mounts use the timed out message.
18790Sstevel@tonic-gate 		 */
18800Sstevel@tonic-gate 		if (status == RPC_INPROGRESS)
18810Sstevel@tonic-gate 			status = RPC_TIMEDOUT;
18820Sstevel@tonic-gate 		nfscl->nfscl_stat.badcalls.value.ui64++;
18830Sstevel@tonic-gate 		if (status == RPC_CANTDECODERES ||
18840Sstevel@tonic-gate 		    status == RPC_PROGUNAVAIL ||
18850Sstevel@tonic-gate 		    status == RPC_PROCUNAVAIL ||
18860Sstevel@tonic-gate 		    status == RPC_CANTDECODEARGS ||
18870Sstevel@tonic-gate 		    status == RPC_PROGVERSMISMATCH)
18880Sstevel@tonic-gate 			CLNT_GETERR(client, &rpcerr);
18890Sstevel@tonic-gate 		else if (status != RPC_INTR) {
18900Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
18910Sstevel@tonic-gate 			mi->mi_flags |= MI_DOWN;
18920Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
18930Sstevel@tonic-gate 			CLNT_GETERR(client, &rpcerr);
18940Sstevel@tonic-gate #ifdef DEBUG
18950Sstevel@tonic-gate 			bufp = clnt_sperror(client, svp->sv_hostname);
18960Sstevel@tonic-gate 			zprintf(zoneid, "NFS_ACL%d %s failed for %s\n",
18970Sstevel@tonic-gate 			    mi->mi_vers, mi->mi_aclnames[which], bufp);
18982712Snn35248 			if (nfs_has_ctty()) {
18990Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_NOPRINT)) {
19000Sstevel@tonic-gate 					uprintf("NFS_ACL%d %s failed for %s\n",
19010Sstevel@tonic-gate 					    mi->mi_vers, mi->mi_aclnames[which],
19020Sstevel@tonic-gate 					    bufp);
19030Sstevel@tonic-gate 				}
19040Sstevel@tonic-gate 			}
19050Sstevel@tonic-gate 			kmem_free(bufp, MAXPATHLEN);
19060Sstevel@tonic-gate #else
19070Sstevel@tonic-gate 			zprintf(zoneid,
19080Sstevel@tonic-gate 			    "NFS %s failed for server %s: error %d (%s)\n",
19090Sstevel@tonic-gate 			    mi->mi_aclnames[which], svp->sv_hostname,
19100Sstevel@tonic-gate 			    status, clnt_sperrno(status));
19112712Snn35248 			if (nfs_has_ctty()) {
19120Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_NOPRINT))
19130Sstevel@tonic-gate 					uprintf(
19140Sstevel@tonic-gate 				"NFS %s failed for server %s: error %d (%s)\n",
19150Sstevel@tonic-gate 					    mi->mi_aclnames[which],
19160Sstevel@tonic-gate 					    svp->sv_hostname, status,
19170Sstevel@tonic-gate 					    clnt_sperrno(status));
19180Sstevel@tonic-gate 			}
19190Sstevel@tonic-gate #endif
19200Sstevel@tonic-gate 			/*
19210Sstevel@tonic-gate 			 * when CLNT_CALL() fails with RPC_AUTHERROR,
19220Sstevel@tonic-gate 			 * re_errno is set appropriately depending on
19230Sstevel@tonic-gate 			 * the authentication error
19240Sstevel@tonic-gate 			 */
19250Sstevel@tonic-gate 			if (status == RPC_VERSMISMATCH ||
19260Sstevel@tonic-gate 			    status == RPC_PROGVERSMISMATCH)
19270Sstevel@tonic-gate 				rpcerr.re_errno = EIO;
19280Sstevel@tonic-gate 		}
19290Sstevel@tonic-gate 	} else {
19300Sstevel@tonic-gate 		/*
19310Sstevel@tonic-gate 		 * Test the value of mi_down and mi_printed without
19320Sstevel@tonic-gate 		 * holding the mi_lock mutex.  If they are both zero,
19330Sstevel@tonic-gate 		 * then it is okay to skip the down and printed
19340Sstevel@tonic-gate 		 * processing.  This saves on a mutex_enter and
19350Sstevel@tonic-gate 		 * mutex_exit pair for a normal, successful RPC.
19360Sstevel@tonic-gate 		 * This was just complete overhead.
19370Sstevel@tonic-gate 		 */
19380Sstevel@tonic-gate 		if (mi->mi_flags & (MI_DOWN | MI_PRINTED)) {
19390Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
19400Sstevel@tonic-gate 			mi->mi_flags &= ~MI_DOWN;
19410Sstevel@tonic-gate 			if (mi->mi_flags & MI_PRINTED) {
19420Sstevel@tonic-gate 				mi->mi_flags &= ~MI_PRINTED;
19430Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
19440Sstevel@tonic-gate #ifdef DEBUG
19450Sstevel@tonic-gate 				zprintf(zoneid, "NFS_ACL%d server %s ok\n",
19460Sstevel@tonic-gate 				    mi->mi_vers, svp->sv_hostname);
19470Sstevel@tonic-gate #else
19480Sstevel@tonic-gate 				zprintf(zoneid, "NFS server %s ok\n",
19490Sstevel@tonic-gate 				    svp->sv_hostname);
19500Sstevel@tonic-gate #endif
19510Sstevel@tonic-gate 			} else
19520Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
19530Sstevel@tonic-gate 		}
19540Sstevel@tonic-gate 
19550Sstevel@tonic-gate 		if (*douprintf == 0) {
19560Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_NOPRINT))
19570Sstevel@tonic-gate #ifdef DEBUG
19580Sstevel@tonic-gate 				uprintf("NFS_ACL%d server %s ok\n",
19590Sstevel@tonic-gate 				    mi->mi_vers, svp->sv_hostname);
19600Sstevel@tonic-gate #else
19610Sstevel@tonic-gate 				uprintf("NFS server %s ok\n", svp->sv_hostname);
19620Sstevel@tonic-gate #endif
19630Sstevel@tonic-gate 			*douprintf = 1;
19640Sstevel@tonic-gate 		}
19650Sstevel@tonic-gate 	}
19660Sstevel@tonic-gate 
19670Sstevel@tonic-gate 	clfree_impl(client, ch, nfscl);
19681676Sjpk 	if (cred_cloned)
19691676Sjpk 		crfree(cr);
19700Sstevel@tonic-gate 
19710Sstevel@tonic-gate 	ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0);
19720Sstevel@tonic-gate 
19730Sstevel@tonic-gate #if 0 /* notyet */
19740Sstevel@tonic-gate 	TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "rfscall_end:errno %d",
19750Sstevel@tonic-gate 	    rpcerr.re_errno);
19760Sstevel@tonic-gate #endif
19770Sstevel@tonic-gate 
19780Sstevel@tonic-gate 	return (rpcerr.re_errno);
19790Sstevel@tonic-gate }
19800Sstevel@tonic-gate 
19810Sstevel@tonic-gate int
vattr_to_sattr(struct vattr * vap,struct nfssattr * sa)19820Sstevel@tonic-gate vattr_to_sattr(struct vattr *vap, struct nfssattr *sa)
19830Sstevel@tonic-gate {
19840Sstevel@tonic-gate 	uint_t mask = vap->va_mask;
19850Sstevel@tonic-gate 
19860Sstevel@tonic-gate 	if (!(mask & AT_MODE))
19870Sstevel@tonic-gate 		sa->sa_mode = (uint32_t)-1;
19880Sstevel@tonic-gate 	else
19890Sstevel@tonic-gate 		sa->sa_mode = vap->va_mode;
19900Sstevel@tonic-gate 	if (!(mask & AT_UID))
19910Sstevel@tonic-gate 		sa->sa_uid = (uint32_t)-1;
19920Sstevel@tonic-gate 	else
19930Sstevel@tonic-gate 		sa->sa_uid = (uint32_t)vap->va_uid;
19940Sstevel@tonic-gate 	if (!(mask & AT_GID))
19950Sstevel@tonic-gate 		sa->sa_gid = (uint32_t)-1;
19960Sstevel@tonic-gate 	else
19970Sstevel@tonic-gate 		sa->sa_gid = (uint32_t)vap->va_gid;
19980Sstevel@tonic-gate 	if (!(mask & AT_SIZE))
19990Sstevel@tonic-gate 		sa->sa_size = (uint32_t)-1;
20000Sstevel@tonic-gate 	else
20010Sstevel@tonic-gate 		sa->sa_size = (uint32_t)vap->va_size;
20020Sstevel@tonic-gate 	if (!(mask & AT_ATIME))
20030Sstevel@tonic-gate 		sa->sa_atime.tv_sec = sa->sa_atime.tv_usec = (int32_t)-1;
20040Sstevel@tonic-gate 	else {
20050Sstevel@tonic-gate 		/* check time validity */
20060Sstevel@tonic-gate 		if (! NFS_TIME_T_OK(vap->va_atime.tv_sec)) {
20070Sstevel@tonic-gate 			return (EOVERFLOW);
20080Sstevel@tonic-gate 		}
20090Sstevel@tonic-gate 		sa->sa_atime.tv_sec = vap->va_atime.tv_sec;
20100Sstevel@tonic-gate 		sa->sa_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
20110Sstevel@tonic-gate 	}
20120Sstevel@tonic-gate 	if (!(mask & AT_MTIME))
20130Sstevel@tonic-gate 		sa->sa_mtime.tv_sec = sa->sa_mtime.tv_usec = (int32_t)-1;
20140Sstevel@tonic-gate 	else {
20150Sstevel@tonic-gate 		/* check time validity */
20160Sstevel@tonic-gate 		if (! NFS_TIME_T_OK(vap->va_mtime.tv_sec)) {
20170Sstevel@tonic-gate 			return (EOVERFLOW);
20180Sstevel@tonic-gate 		}
20190Sstevel@tonic-gate 		sa->sa_mtime.tv_sec = vap->va_mtime.tv_sec;
20200Sstevel@tonic-gate 		sa->sa_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
20210Sstevel@tonic-gate 	}
20220Sstevel@tonic-gate 	return (0);
20230Sstevel@tonic-gate }
20240Sstevel@tonic-gate 
20250Sstevel@tonic-gate int
vattr_to_sattr3(struct vattr * vap,sattr3 * sa)20260Sstevel@tonic-gate vattr_to_sattr3(struct vattr *vap, sattr3 *sa)
20270Sstevel@tonic-gate {
20280Sstevel@tonic-gate 	uint_t mask = vap->va_mask;
20290Sstevel@tonic-gate 
20300Sstevel@tonic-gate 	if (!(mask & AT_MODE))
20310Sstevel@tonic-gate 		sa->mode.set_it = FALSE;
20320Sstevel@tonic-gate 	else {
20330Sstevel@tonic-gate 		sa->mode.set_it = TRUE;
20340Sstevel@tonic-gate 		sa->mode.mode = (mode3)vap->va_mode;
20350Sstevel@tonic-gate 	}
20360Sstevel@tonic-gate 	if (!(mask & AT_UID))
20370Sstevel@tonic-gate 		sa->uid.set_it = FALSE;
20380Sstevel@tonic-gate 	else {
20390Sstevel@tonic-gate 		sa->uid.set_it = TRUE;
20400Sstevel@tonic-gate 		sa->uid.uid = (uid3)vap->va_uid;
20410Sstevel@tonic-gate 	}
20420Sstevel@tonic-gate 	if (!(mask & AT_GID))
20430Sstevel@tonic-gate 		sa->gid.set_it = FALSE;
20440Sstevel@tonic-gate 	else {
20450Sstevel@tonic-gate 		sa->gid.set_it = TRUE;
20460Sstevel@tonic-gate 		sa->gid.gid = (gid3)vap->va_gid;
20470Sstevel@tonic-gate 	}
20480Sstevel@tonic-gate 	if (!(mask & AT_SIZE))
20490Sstevel@tonic-gate 		sa->size.set_it = FALSE;
20500Sstevel@tonic-gate 	else {
20510Sstevel@tonic-gate 		sa->size.set_it = TRUE;
20520Sstevel@tonic-gate 		sa->size.size = (size3)vap->va_size;
20530Sstevel@tonic-gate 	}
20540Sstevel@tonic-gate 	if (!(mask & AT_ATIME))
20550Sstevel@tonic-gate 		sa->atime.set_it = DONT_CHANGE;
20560Sstevel@tonic-gate 	else {
20570Sstevel@tonic-gate 		/* check time validity */
20580Sstevel@tonic-gate 		if (! NFS_TIME_T_OK(vap->va_atime.tv_sec)) {
20590Sstevel@tonic-gate 			return (EOVERFLOW);
20600Sstevel@tonic-gate 		}
20610Sstevel@tonic-gate 		sa->atime.set_it = SET_TO_CLIENT_TIME;
20620Sstevel@tonic-gate 		sa->atime.atime.seconds = (uint32)vap->va_atime.tv_sec;
20630Sstevel@tonic-gate 		sa->atime.atime.nseconds = (uint32)vap->va_atime.tv_nsec;
20640Sstevel@tonic-gate 	}
20650Sstevel@tonic-gate 	if (!(mask & AT_MTIME))
20660Sstevel@tonic-gate 		sa->mtime.set_it = DONT_CHANGE;
20670Sstevel@tonic-gate 	else {
20680Sstevel@tonic-gate 		/* check time validity */
20690Sstevel@tonic-gate 		if (! NFS_TIME_T_OK(vap->va_mtime.tv_sec)) {
20700Sstevel@tonic-gate 			return (EOVERFLOW);
20710Sstevel@tonic-gate 		}
20720Sstevel@tonic-gate 		sa->mtime.set_it = SET_TO_CLIENT_TIME;
20730Sstevel@tonic-gate 		sa->mtime.mtime.seconds = (uint32)vap->va_mtime.tv_sec;
20740Sstevel@tonic-gate 		sa->mtime.mtime.nseconds = (uint32)vap->va_mtime.tv_nsec;
20750Sstevel@tonic-gate 	}
20760Sstevel@tonic-gate 	return (0);
20770Sstevel@tonic-gate }
20780Sstevel@tonic-gate 
20790Sstevel@tonic-gate void
setdiropargs(struct nfsdiropargs * da,char * nm,vnode_t * dvp)20800Sstevel@tonic-gate setdiropargs(struct nfsdiropargs *da, char *nm, vnode_t *dvp)
20810Sstevel@tonic-gate {
20820Sstevel@tonic-gate 
20830Sstevel@tonic-gate 	da->da_fhandle = VTOFH(dvp);
20840Sstevel@tonic-gate 	da->da_name = nm;
20850Sstevel@tonic-gate 	da->da_flags = 0;
20860Sstevel@tonic-gate }
20870Sstevel@tonic-gate 
20880Sstevel@tonic-gate void
setdiropargs3(diropargs3 * da,char * nm,vnode_t * dvp)20890Sstevel@tonic-gate setdiropargs3(diropargs3 *da, char *nm, vnode_t *dvp)
20900Sstevel@tonic-gate {
20910Sstevel@tonic-gate 
20920Sstevel@tonic-gate 	da->dirp = VTOFH3(dvp);
20930Sstevel@tonic-gate 	da->name = nm;
20940Sstevel@tonic-gate }
20950Sstevel@tonic-gate 
20960Sstevel@tonic-gate int
setdirgid(vnode_t * dvp,gid_t * gidp,cred_t * cr)20970Sstevel@tonic-gate setdirgid(vnode_t *dvp, gid_t *gidp, cred_t *cr)
20980Sstevel@tonic-gate {
20990Sstevel@tonic-gate 	int error;
21000Sstevel@tonic-gate 	rnode_t *rp;
21010Sstevel@tonic-gate 	struct vattr va;
21020Sstevel@tonic-gate 
21030Sstevel@tonic-gate 	va.va_mask = AT_MODE | AT_GID;
21045331Samw 	error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
21050Sstevel@tonic-gate 	if (error)
21060Sstevel@tonic-gate 		return (error);
21070Sstevel@tonic-gate 
21080Sstevel@tonic-gate 	/*
21090Sstevel@tonic-gate 	 * To determine the expected group-id of the created file:
21100Sstevel@tonic-gate 	 *  1)	If the filesystem was not mounted with the Old-BSD-compatible
21110Sstevel@tonic-gate 	 *	GRPID option, and the directory's set-gid bit is clear,
21120Sstevel@tonic-gate 	 *	then use the process's gid.
21130Sstevel@tonic-gate 	 *  2)	Otherwise, set the group-id to the gid of the parent directory.
21140Sstevel@tonic-gate 	 */
21150Sstevel@tonic-gate 	rp = VTOR(dvp);
21160Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
21170Sstevel@tonic-gate 	if (!(VTOMI(dvp)->mi_flags & MI_GRPID) && !(va.va_mode & VSGID))
21180Sstevel@tonic-gate 		*gidp = crgetgid(cr);
21190Sstevel@tonic-gate 	else
21200Sstevel@tonic-gate 		*gidp = va.va_gid;
21210Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
21220Sstevel@tonic-gate 	return (0);
21230Sstevel@tonic-gate }
21240Sstevel@tonic-gate 
21250Sstevel@tonic-gate int
setdirmode(vnode_t * dvp,mode_t * omp,cred_t * cr)21260Sstevel@tonic-gate setdirmode(vnode_t *dvp, mode_t *omp, cred_t *cr)
21270Sstevel@tonic-gate {
21280Sstevel@tonic-gate 	int error;
21290Sstevel@tonic-gate 	struct vattr va;
21300Sstevel@tonic-gate 
21310Sstevel@tonic-gate 	va.va_mask = AT_MODE;
21325331Samw 	error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
21330Sstevel@tonic-gate 	if (error)
21340Sstevel@tonic-gate 		return (error);
21350Sstevel@tonic-gate 
21360Sstevel@tonic-gate 	/*
21370Sstevel@tonic-gate 	 * Modify the expected mode (om) so that the set-gid bit matches
21380Sstevel@tonic-gate 	 * that of the parent directory (dvp).
21390Sstevel@tonic-gate 	 */
21400Sstevel@tonic-gate 	if (va.va_mode & VSGID)
21410Sstevel@tonic-gate 		*omp |= VSGID;
21420Sstevel@tonic-gate 	else
21430Sstevel@tonic-gate 		*omp &= ~VSGID;
21440Sstevel@tonic-gate 	return (0);
21450Sstevel@tonic-gate }
21460Sstevel@tonic-gate 
21470Sstevel@tonic-gate void
nfs_setswaplike(vnode_t * vp,vattr_t * vap)21480Sstevel@tonic-gate nfs_setswaplike(vnode_t *vp, vattr_t *vap)
21490Sstevel@tonic-gate {
21500Sstevel@tonic-gate 
21510Sstevel@tonic-gate 	if (vp->v_type == VREG && (vap->va_mode & (VEXEC | VSVTX)) == VSVTX) {
21520Sstevel@tonic-gate 		if (!(vp->v_flag & VSWAPLIKE)) {
21530Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
21540Sstevel@tonic-gate 			vp->v_flag |= VSWAPLIKE;
21550Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
21560Sstevel@tonic-gate 		}
21570Sstevel@tonic-gate 	} else {
21580Sstevel@tonic-gate 		if (vp->v_flag & VSWAPLIKE) {
21590Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
21600Sstevel@tonic-gate 			vp->v_flag &= ~VSWAPLIKE;
21610Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
21620Sstevel@tonic-gate 		}
21630Sstevel@tonic-gate 	}
21640Sstevel@tonic-gate }
21650Sstevel@tonic-gate 
21660Sstevel@tonic-gate /*
21670Sstevel@tonic-gate  * Free the resources associated with an rnode.
21680Sstevel@tonic-gate  */
21690Sstevel@tonic-gate static void
rinactive(rnode_t * rp,cred_t * cr)21700Sstevel@tonic-gate rinactive(rnode_t *rp, cred_t *cr)
21710Sstevel@tonic-gate {
21720Sstevel@tonic-gate 	vnode_t *vp;
21730Sstevel@tonic-gate 	cred_t *cred;
21740Sstevel@tonic-gate 	char *contents;
21750Sstevel@tonic-gate 	int size;
21760Sstevel@tonic-gate 	vsecattr_t *vsp;
21770Sstevel@tonic-gate 	int error;
21780Sstevel@tonic-gate 	nfs3_pathconf_info *info;
21790Sstevel@tonic-gate 
21800Sstevel@tonic-gate 	/*
21810Sstevel@tonic-gate 	 * Before freeing anything, wait until all asynchronous
21820Sstevel@tonic-gate 	 * activity is done on this rnode.  This will allow all
21830Sstevel@tonic-gate 	 * asynchronous read ahead and write behind i/o's to
21840Sstevel@tonic-gate 	 * finish.
21850Sstevel@tonic-gate 	 */
21860Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
21870Sstevel@tonic-gate 	while (rp->r_count > 0)
21880Sstevel@tonic-gate 		cv_wait(&rp->r_cv, &rp->r_statelock);
21890Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
21900Sstevel@tonic-gate 
21910Sstevel@tonic-gate 	/*
21920Sstevel@tonic-gate 	 * Flush and invalidate all pages associated with the vnode.
21930Sstevel@tonic-gate 	 */
21940Sstevel@tonic-gate 	vp = RTOV(rp);
21950Sstevel@tonic-gate 	if (vn_has_cached_data(vp)) {
21960Sstevel@tonic-gate 		ASSERT(vp->v_type != VCHR);
21970Sstevel@tonic-gate 		if ((rp->r_flags & RDIRTY) && !rp->r_error) {
21985331Samw 			error = VOP_PUTPAGE(vp, (u_offset_t)0, 0, 0, cr, NULL);
21990Sstevel@tonic-gate 			if (error && (error == ENOSPC || error == EDQUOT)) {
22000Sstevel@tonic-gate 				mutex_enter(&rp->r_statelock);
22010Sstevel@tonic-gate 				if (!rp->r_error)
22020Sstevel@tonic-gate 					rp->r_error = error;
22030Sstevel@tonic-gate 				mutex_exit(&rp->r_statelock);
22040Sstevel@tonic-gate 			}
22050Sstevel@tonic-gate 		}
22060Sstevel@tonic-gate 		nfs_invalidate_pages(vp, (u_offset_t)0, cr);
22070Sstevel@tonic-gate 	}
22080Sstevel@tonic-gate 
22090Sstevel@tonic-gate 	/*
22100Sstevel@tonic-gate 	 * Free any held credentials and caches which may be associated
22110Sstevel@tonic-gate 	 * with this rnode.
22120Sstevel@tonic-gate 	 */
22130Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
22140Sstevel@tonic-gate 	cred = rp->r_cred;
22150Sstevel@tonic-gate 	rp->r_cred = NULL;
22160Sstevel@tonic-gate 	contents = rp->r_symlink.contents;
22170Sstevel@tonic-gate 	size = rp->r_symlink.size;
22180Sstevel@tonic-gate 	rp->r_symlink.contents = NULL;
22190Sstevel@tonic-gate 	vsp = rp->r_secattr;
22200Sstevel@tonic-gate 	rp->r_secattr = NULL;
22210Sstevel@tonic-gate 	info = rp->r_pathconf;
22220Sstevel@tonic-gate 	rp->r_pathconf = NULL;
22230Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
22240Sstevel@tonic-gate 
22250Sstevel@tonic-gate 	/*
22260Sstevel@tonic-gate 	 * Free the held credential.
22270Sstevel@tonic-gate 	 */
22280Sstevel@tonic-gate 	if (cred != NULL)
22290Sstevel@tonic-gate 		crfree(cred);
22300Sstevel@tonic-gate 
22310Sstevel@tonic-gate 	/*
22320Sstevel@tonic-gate 	 * Free the access cache entries.
22330Sstevel@tonic-gate 	 */
22340Sstevel@tonic-gate 	(void) nfs_access_purge_rp(rp);
22350Sstevel@tonic-gate 
22360Sstevel@tonic-gate 	/*
22370Sstevel@tonic-gate 	 * Free the readdir cache entries.
22380Sstevel@tonic-gate 	 */
22390Sstevel@tonic-gate 	if (HAVE_RDDIR_CACHE(rp))
22400Sstevel@tonic-gate 		nfs_purge_rddir_cache(vp);
22410Sstevel@tonic-gate 
22420Sstevel@tonic-gate 	/*
22430Sstevel@tonic-gate 	 * Free the symbolic link cache.
22440Sstevel@tonic-gate 	 */
22450Sstevel@tonic-gate 	if (contents != NULL) {
22460Sstevel@tonic-gate 
22470Sstevel@tonic-gate 		kmem_free((void *)contents, size);
22480Sstevel@tonic-gate 	}
22490Sstevel@tonic-gate 
22500Sstevel@tonic-gate 	/*
22510Sstevel@tonic-gate 	 * Free any cached ACL.
22520Sstevel@tonic-gate 	 */
22530Sstevel@tonic-gate 	if (vsp != NULL)
22540Sstevel@tonic-gate 		nfs_acl_free(vsp);
22550Sstevel@tonic-gate 
22560Sstevel@tonic-gate 	/*
22570Sstevel@tonic-gate 	 * Free any cached pathconf information.
22580Sstevel@tonic-gate 	 */
22590Sstevel@tonic-gate 	if (info != NULL)
22600Sstevel@tonic-gate 		kmem_free(info, sizeof (*info));
22610Sstevel@tonic-gate }
22620Sstevel@tonic-gate 
22630Sstevel@tonic-gate /*
22640Sstevel@tonic-gate  * Return a vnode for the given NFS Version 2 file handle.
22650Sstevel@tonic-gate  * If no rnode exists for this fhandle, create one and put it
22660Sstevel@tonic-gate  * into the hash queues.  If the rnode for this fhandle
22670Sstevel@tonic-gate  * already exists, return it.
22680Sstevel@tonic-gate  *
22690Sstevel@tonic-gate  * Note: make_rnode() may upgrade the hash bucket lock to exclusive.
22700Sstevel@tonic-gate  */
22710Sstevel@tonic-gate vnode_t *
makenfsnode(fhandle_t * fh,struct nfsfattr * attr,struct vfs * vfsp,hrtime_t t,cred_t * cr,char * dnm,char * nm)22720Sstevel@tonic-gate makenfsnode(fhandle_t *fh, struct nfsfattr *attr, struct vfs *vfsp,
22730Sstevel@tonic-gate     hrtime_t t, cred_t *cr, char *dnm, char *nm)
22740Sstevel@tonic-gate {
22750Sstevel@tonic-gate 	int newnode;
22760Sstevel@tonic-gate 	int index;
22770Sstevel@tonic-gate 	vnode_t *vp;
22780Sstevel@tonic-gate 	nfs_fhandle nfh;
22790Sstevel@tonic-gate 	vattr_t va;
22800Sstevel@tonic-gate 
22810Sstevel@tonic-gate 	nfh.fh_len = NFS_FHSIZE;
22820Sstevel@tonic-gate 	bcopy(fh, nfh.fh_buf, NFS_FHSIZE);
22830Sstevel@tonic-gate 
22840Sstevel@tonic-gate 	index = rtablehash(&nfh);
22850Sstevel@tonic-gate 	rw_enter(&rtable[index].r_lock, RW_READER);
22860Sstevel@tonic-gate 
22870Sstevel@tonic-gate 	vp = make_rnode(&nfh, &rtable[index], vfsp, nfs_vnodeops,
22880Sstevel@tonic-gate 	    nfs_putapage, nfs_rddir_compar, &newnode, cr, dnm, nm);
22890Sstevel@tonic-gate 
22900Sstevel@tonic-gate 	if (attr != NULL) {
22910Sstevel@tonic-gate 		if (!newnode) {
22920Sstevel@tonic-gate 			rw_exit(&rtable[index].r_lock);
22930Sstevel@tonic-gate 			(void) nfs_cache_fattr(vp, attr, &va, t, cr);
22940Sstevel@tonic-gate 		} else {
22950Sstevel@tonic-gate 			if (attr->na_type < NFNON || attr->na_type > NFSOC)
22960Sstevel@tonic-gate 				vp->v_type = VBAD;
22970Sstevel@tonic-gate 			else
22980Sstevel@tonic-gate 				vp->v_type = n2v_type(attr);
22990Sstevel@tonic-gate 			/*
23000Sstevel@tonic-gate 			 * A translation here seems to be necessary
23010Sstevel@tonic-gate 			 * because this function can be called
23020Sstevel@tonic-gate 			 * with `attr' that has come from the wire,
23030Sstevel@tonic-gate 			 * and been operated on by vattr_to_nattr().
23040Sstevel@tonic-gate 			 * See nfsrootvp()->VOP_GETTATTR()->nfsgetattr()
23050Sstevel@tonic-gate 			 * ->nfs_getattr_otw()->rfscall()->vattr_to_nattr()
23060Sstevel@tonic-gate 			 * ->makenfsnode().
23070Sstevel@tonic-gate 			 */
23080Sstevel@tonic-gate 			if ((attr->na_rdev & 0xffff0000) == 0)
23090Sstevel@tonic-gate 				vp->v_rdev = nfsv2_expdev(attr->na_rdev);
23100Sstevel@tonic-gate 			else
23110Sstevel@tonic-gate 				vp->v_rdev = expldev(n2v_rdev(attr));
23120Sstevel@tonic-gate 			nfs_attrcache(vp, attr, t);
23130Sstevel@tonic-gate 			rw_exit(&rtable[index].r_lock);
23140Sstevel@tonic-gate 		}
23150Sstevel@tonic-gate 	} else {
23160Sstevel@tonic-gate 		if (newnode) {
23170Sstevel@tonic-gate 			PURGE_ATTRCACHE(vp);
23180Sstevel@tonic-gate 		}
23190Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
23200Sstevel@tonic-gate 	}
23210Sstevel@tonic-gate 
23220Sstevel@tonic-gate 	return (vp);
23230Sstevel@tonic-gate }
23240Sstevel@tonic-gate 
23250Sstevel@tonic-gate /*
23260Sstevel@tonic-gate  * Return a vnode for the given NFS Version 3 file handle.
23270Sstevel@tonic-gate  * If no rnode exists for this fhandle, create one and put it
23280Sstevel@tonic-gate  * into the hash queues.  If the rnode for this fhandle
23290Sstevel@tonic-gate  * already exists, return it.
23300Sstevel@tonic-gate  *
23310Sstevel@tonic-gate  * Note: make_rnode() may upgrade the hash bucket lock to exclusive.
23320Sstevel@tonic-gate  */
23330Sstevel@tonic-gate vnode_t *
makenfs3node_va(nfs_fh3 * fh,vattr_t * vap,struct vfs * vfsp,hrtime_t t,cred_t * cr,char * dnm,char * nm)23340Sstevel@tonic-gate makenfs3node_va(nfs_fh3 *fh, vattr_t *vap, struct vfs *vfsp, hrtime_t t,
23350Sstevel@tonic-gate     cred_t *cr, char *dnm, char *nm)
23360Sstevel@tonic-gate {
23370Sstevel@tonic-gate 	int newnode;
23380Sstevel@tonic-gate 	int index;
23390Sstevel@tonic-gate 	vnode_t *vp;
23400Sstevel@tonic-gate 
23410Sstevel@tonic-gate 	index = rtablehash((nfs_fhandle *)fh);
23420Sstevel@tonic-gate 	rw_enter(&rtable[index].r_lock, RW_READER);
23430Sstevel@tonic-gate 
23440Sstevel@tonic-gate 	vp = make_rnode((nfs_fhandle *)fh, &rtable[index], vfsp,
23450Sstevel@tonic-gate 	    nfs3_vnodeops, nfs3_putapage, nfs3_rddir_compar, &newnode, cr,
23460Sstevel@tonic-gate 	    dnm, nm);
23470Sstevel@tonic-gate 
23480Sstevel@tonic-gate 	if (vap == NULL) {
23490Sstevel@tonic-gate 		if (newnode) {
23500Sstevel@tonic-gate 			PURGE_ATTRCACHE(vp);
23510Sstevel@tonic-gate 		}
23520Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
23530Sstevel@tonic-gate 		return (vp);
23540Sstevel@tonic-gate 	}
23550Sstevel@tonic-gate 
23560Sstevel@tonic-gate 	if (!newnode) {
23570Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
23580Sstevel@tonic-gate 		nfs_attr_cache(vp, vap, t, cr);
23590Sstevel@tonic-gate 	} else {
23600Sstevel@tonic-gate 		rnode_t *rp = VTOR(vp);
23610Sstevel@tonic-gate 
23620Sstevel@tonic-gate 		vp->v_type = vap->va_type;
23630Sstevel@tonic-gate 		vp->v_rdev = vap->va_rdev;
23640Sstevel@tonic-gate 
23650Sstevel@tonic-gate 		mutex_enter(&rp->r_statelock);
23660Sstevel@tonic-gate 		if (rp->r_mtime <= t)
23670Sstevel@tonic-gate 			nfs_attrcache_va(vp, vap);
23680Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
23690Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
23700Sstevel@tonic-gate 	}
23710Sstevel@tonic-gate 
23720Sstevel@tonic-gate 	return (vp);
23730Sstevel@tonic-gate }
23740Sstevel@tonic-gate 
23750Sstevel@tonic-gate vnode_t *
makenfs3node(nfs_fh3 * fh,fattr3 * attr,struct vfs * vfsp,hrtime_t t,cred_t * cr,char * dnm,char * nm)23760Sstevel@tonic-gate makenfs3node(nfs_fh3 *fh, fattr3 *attr, struct vfs *vfsp, hrtime_t t,
23770Sstevel@tonic-gate     cred_t *cr, char *dnm, char *nm)
23780Sstevel@tonic-gate {
23790Sstevel@tonic-gate 	int newnode;
23800Sstevel@tonic-gate 	int index;
23810Sstevel@tonic-gate 	vnode_t *vp;
23820Sstevel@tonic-gate 	vattr_t va;
23830Sstevel@tonic-gate 
23840Sstevel@tonic-gate 	index = rtablehash((nfs_fhandle *)fh);
23850Sstevel@tonic-gate 	rw_enter(&rtable[index].r_lock, RW_READER);
23860Sstevel@tonic-gate 
23870Sstevel@tonic-gate 	vp = make_rnode((nfs_fhandle *)fh, &rtable[index], vfsp,
23880Sstevel@tonic-gate 	    nfs3_vnodeops, nfs3_putapage, nfs3_rddir_compar, &newnode, cr,
23890Sstevel@tonic-gate 	    dnm, nm);
23900Sstevel@tonic-gate 
23910Sstevel@tonic-gate 	if (attr == NULL) {
23920Sstevel@tonic-gate 		if (newnode) {
23930Sstevel@tonic-gate 			PURGE_ATTRCACHE(vp);
23940Sstevel@tonic-gate 		}
23950Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
23960Sstevel@tonic-gate 		return (vp);
23970Sstevel@tonic-gate 	}
23980Sstevel@tonic-gate 
23990Sstevel@tonic-gate 	if (!newnode) {
24000Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
24010Sstevel@tonic-gate 		(void) nfs3_cache_fattr3(vp, attr, &va, t, cr);
24020Sstevel@tonic-gate 	} else {
24030Sstevel@tonic-gate 		if (attr->type < NF3REG || attr->type > NF3FIFO)
24040Sstevel@tonic-gate 			vp->v_type = VBAD;
24050Sstevel@tonic-gate 		else
24060Sstevel@tonic-gate 			vp->v_type = nf3_to_vt[attr->type];
24070Sstevel@tonic-gate 		vp->v_rdev = makedevice(attr->rdev.specdata1,
24084300Smarks 		    attr->rdev.specdata2);
24090Sstevel@tonic-gate 		nfs3_attrcache(vp, attr, t);
24100Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
24110Sstevel@tonic-gate 	}
24120Sstevel@tonic-gate 
24130Sstevel@tonic-gate 	return (vp);
24140Sstevel@tonic-gate }
24150Sstevel@tonic-gate 
24160Sstevel@tonic-gate /*
24170Sstevel@tonic-gate  * Read this comment before making changes to rtablehash()!
24180Sstevel@tonic-gate  * This is a hash function in which seemingly obvious and harmless
24190Sstevel@tonic-gate  * changes can cause escalations costing million dollars!
24200Sstevel@tonic-gate  * Know what you are doing.
24210Sstevel@tonic-gate  *
24220Sstevel@tonic-gate  * rtablehash() implements Jenkins' one-at-a-time hash algorithm.  The
24230Sstevel@tonic-gate  * algorithm is currently detailed here:
24240Sstevel@tonic-gate  *
24250Sstevel@tonic-gate  *   http://burtleburtle.net/bob/hash/doobs.html
24260Sstevel@tonic-gate  *
24270Sstevel@tonic-gate  * Of course, the above link may not be valid by the time you are reading
24280Sstevel@tonic-gate  * this, but suffice it to say that the one-at-a-time algorithm works well in
24290Sstevel@tonic-gate  * almost all cases.  If you are changing the algorithm be sure to verify that
24300Sstevel@tonic-gate  * the hash algorithm still provides even distribution in all cases and with
24310Sstevel@tonic-gate  * any server returning filehandles in whatever order (sequential or random).
24320Sstevel@tonic-gate  */
24330Sstevel@tonic-gate static int
rtablehash(nfs_fhandle * fh)24340Sstevel@tonic-gate rtablehash(nfs_fhandle *fh)
24350Sstevel@tonic-gate {
24360Sstevel@tonic-gate 	ulong_t hash, len, i;
24370Sstevel@tonic-gate 	char *key;
24380Sstevel@tonic-gate 
24390Sstevel@tonic-gate 	key = fh->fh_buf;
24400Sstevel@tonic-gate 	len = (ulong_t)fh->fh_len;
24410Sstevel@tonic-gate 	for (hash = 0, i = 0; i < len; i++) {
24420Sstevel@tonic-gate 		hash += key[i];
24430Sstevel@tonic-gate 		hash += (hash << 10);
24440Sstevel@tonic-gate 		hash ^= (hash >> 6);
24450Sstevel@tonic-gate 	}
24460Sstevel@tonic-gate 	hash += (hash << 3);
24470Sstevel@tonic-gate 	hash ^= (hash >> 11);
24480Sstevel@tonic-gate 	hash += (hash << 15);
24490Sstevel@tonic-gate 	return (hash & rtablemask);
24500Sstevel@tonic-gate }
24510Sstevel@tonic-gate 
24520Sstevel@tonic-gate static vnode_t *
make_rnode(nfs_fhandle * fh,rhashq_t * rhtp,struct vfs * vfsp,struct vnodeops * vops,int (* putapage)(vnode_t *,page_t *,u_offset_t *,size_t *,int,cred_t *),int (* compar)(const void *,const void *),int * newnode,cred_t * cr,char * dnm,char * nm)24530Sstevel@tonic-gate make_rnode(nfs_fhandle *fh, rhashq_t *rhtp, struct vfs *vfsp,
24540Sstevel@tonic-gate     struct vnodeops *vops,
24550Sstevel@tonic-gate     int (*putapage)(vnode_t *, page_t *, u_offset_t *, size_t *, int, cred_t *),
24560Sstevel@tonic-gate     int (*compar)(const void *, const void *),
24570Sstevel@tonic-gate     int *newnode, cred_t *cr, char *dnm, char *nm)
24580Sstevel@tonic-gate {
24590Sstevel@tonic-gate 	rnode_t *rp;
24600Sstevel@tonic-gate 	rnode_t *trp;
24610Sstevel@tonic-gate 	vnode_t *vp;
24620Sstevel@tonic-gate 	mntinfo_t *mi;
24630Sstevel@tonic-gate 
24640Sstevel@tonic-gate 	ASSERT(RW_READ_HELD(&rhtp->r_lock));
24650Sstevel@tonic-gate 
24660Sstevel@tonic-gate 	mi = VFTOMI(vfsp);
24670Sstevel@tonic-gate start:
24680Sstevel@tonic-gate 	if ((rp = rfind(rhtp, fh, vfsp)) != NULL) {
24690Sstevel@tonic-gate 		vp = RTOV(rp);
24700Sstevel@tonic-gate 		nfs_set_vroot(vp);
24710Sstevel@tonic-gate 		*newnode = 0;
24720Sstevel@tonic-gate 		return (vp);
24730Sstevel@tonic-gate 	}
24740Sstevel@tonic-gate 	rw_exit(&rhtp->r_lock);
24750Sstevel@tonic-gate 
24760Sstevel@tonic-gate 	mutex_enter(&rpfreelist_lock);
24770Sstevel@tonic-gate 	if (rpfreelist != NULL && rnew >= nrnode) {
24780Sstevel@tonic-gate 		rp = rpfreelist;
24790Sstevel@tonic-gate 		rp_rmfree(rp);
24800Sstevel@tonic-gate 		mutex_exit(&rpfreelist_lock);
24810Sstevel@tonic-gate 
24820Sstevel@tonic-gate 		vp = RTOV(rp);
24830Sstevel@tonic-gate 
24840Sstevel@tonic-gate 		if (rp->r_flags & RHASHED) {
24850Sstevel@tonic-gate 			rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
24860Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
24870Sstevel@tonic-gate 			if (vp->v_count > 1) {
24880Sstevel@tonic-gate 				vp->v_count--;
24890Sstevel@tonic-gate 				mutex_exit(&vp->v_lock);
24900Sstevel@tonic-gate 				rw_exit(&rp->r_hashq->r_lock);
24910Sstevel@tonic-gate 				rw_enter(&rhtp->r_lock, RW_READER);
24920Sstevel@tonic-gate 				goto start;
24930Sstevel@tonic-gate 			}
24940Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
24950Sstevel@tonic-gate 			rp_rmhash_locked(rp);
24960Sstevel@tonic-gate 			rw_exit(&rp->r_hashq->r_lock);
24970Sstevel@tonic-gate 		}
24980Sstevel@tonic-gate 
24990Sstevel@tonic-gate 		rinactive(rp, cr);
25000Sstevel@tonic-gate 
25010Sstevel@tonic-gate 		mutex_enter(&vp->v_lock);
25020Sstevel@tonic-gate 		if (vp->v_count > 1) {
25030Sstevel@tonic-gate 			vp->v_count--;
25040Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
25050Sstevel@tonic-gate 			rw_enter(&rhtp->r_lock, RW_READER);
25060Sstevel@tonic-gate 			goto start;
25070Sstevel@tonic-gate 		}
25080Sstevel@tonic-gate 		mutex_exit(&vp->v_lock);
25090Sstevel@tonic-gate 		vn_invalid(vp);
25100Sstevel@tonic-gate 		/*
25110Sstevel@tonic-gate 		 * destroy old locks before bzero'ing and
25120Sstevel@tonic-gate 		 * recreating the locks below.
25130Sstevel@tonic-gate 		 */
25140Sstevel@tonic-gate 		nfs_rw_destroy(&rp->r_rwlock);
25150Sstevel@tonic-gate 		nfs_rw_destroy(&rp->r_lkserlock);
25160Sstevel@tonic-gate 		mutex_destroy(&rp->r_statelock);
25170Sstevel@tonic-gate 		cv_destroy(&rp->r_cv);
25180Sstevel@tonic-gate 		cv_destroy(&rp->r_commit.c_cv);
25190Sstevel@tonic-gate 		nfs_free_r_path(rp);
25200Sstevel@tonic-gate 		avl_destroy(&rp->r_dir);
25210Sstevel@tonic-gate 		/*
25220Sstevel@tonic-gate 		 * Make sure that if rnode is recycled then
25230Sstevel@tonic-gate 		 * VFS count is decremented properly before
25240Sstevel@tonic-gate 		 * reuse.
25250Sstevel@tonic-gate 		 */
25260Sstevel@tonic-gate 		VFS_RELE(vp->v_vfsp);
25270Sstevel@tonic-gate 		vn_reinit(vp);
25280Sstevel@tonic-gate 	} else {
25290Sstevel@tonic-gate 		vnode_t *new_vp;
25300Sstevel@tonic-gate 
25310Sstevel@tonic-gate 		mutex_exit(&rpfreelist_lock);
25320Sstevel@tonic-gate 
25330Sstevel@tonic-gate 		rp = kmem_cache_alloc(rnode_cache, KM_SLEEP);
25340Sstevel@tonic-gate 		new_vp = vn_alloc(KM_SLEEP);
25350Sstevel@tonic-gate 
25360Sstevel@tonic-gate 		atomic_add_long((ulong_t *)&rnew, 1);
25370Sstevel@tonic-gate #ifdef DEBUG
25380Sstevel@tonic-gate 		clstat_debug.nrnode.value.ui64++;
25390Sstevel@tonic-gate #endif
25400Sstevel@tonic-gate 		vp = new_vp;
25410Sstevel@tonic-gate 	}
25420Sstevel@tonic-gate 
25430Sstevel@tonic-gate 	bzero(rp, sizeof (*rp));
25440Sstevel@tonic-gate 	rp->r_vnode = vp;
25450Sstevel@tonic-gate 	nfs_rw_init(&rp->r_rwlock, NULL, RW_DEFAULT, NULL);
25460Sstevel@tonic-gate 	nfs_rw_init(&rp->r_lkserlock, NULL, RW_DEFAULT, NULL);
25470Sstevel@tonic-gate 	mutex_init(&rp->r_statelock, NULL, MUTEX_DEFAULT, NULL);
25480Sstevel@tonic-gate 	cv_init(&rp->r_cv, NULL, CV_DEFAULT, NULL);
25490Sstevel@tonic-gate 	cv_init(&rp->r_commit.c_cv, NULL, CV_DEFAULT, NULL);
25500Sstevel@tonic-gate 	rp->r_fh.fh_len = fh->fh_len;
25510Sstevel@tonic-gate 	bcopy(fh->fh_buf, rp->r_fh.fh_buf, fh->fh_len);
25520Sstevel@tonic-gate 	rp->r_server = mi->mi_curr_serv;
25530Sstevel@tonic-gate 	if (FAILOVER_MOUNT(mi)) {
25540Sstevel@tonic-gate 		/*
25550Sstevel@tonic-gate 		 * If replicated servers, stash pathnames
25560Sstevel@tonic-gate 		 */
25570Sstevel@tonic-gate 		if (dnm != NULL && nm != NULL) {
25580Sstevel@tonic-gate 			char *s, *p;
25590Sstevel@tonic-gate 			uint_t len;
25600Sstevel@tonic-gate 
25610Sstevel@tonic-gate 			len = (uint_t)(strlen(dnm) + strlen(nm) + 2);
25620Sstevel@tonic-gate 			rp->r_path = kmem_alloc(len, KM_SLEEP);
25630Sstevel@tonic-gate #ifdef DEBUG
25640Sstevel@tonic-gate 			clstat_debug.rpath.value.ui64 += len;
25650Sstevel@tonic-gate #endif
25660Sstevel@tonic-gate 			s = rp->r_path;
25670Sstevel@tonic-gate 			for (p = dnm; *p; p++)
25680Sstevel@tonic-gate 				*s++ = *p;
25690Sstevel@tonic-gate 			*s++ = '/';
25700Sstevel@tonic-gate 			for (p = nm; *p; p++)
25710Sstevel@tonic-gate 				*s++ = *p;
25720Sstevel@tonic-gate 			*s = '\0';
25730Sstevel@tonic-gate 		} else {
25740Sstevel@tonic-gate 			/* special case for root */
25750Sstevel@tonic-gate 			rp->r_path = kmem_alloc(2, KM_SLEEP);
25760Sstevel@tonic-gate #ifdef DEBUG
25770Sstevel@tonic-gate 			clstat_debug.rpath.value.ui64 += 2;
25780Sstevel@tonic-gate #endif
25790Sstevel@tonic-gate 			*rp->r_path = '.';
25800Sstevel@tonic-gate 			*(rp->r_path + 1) = '\0';
25810Sstevel@tonic-gate 		}
25820Sstevel@tonic-gate 	}
25830Sstevel@tonic-gate 	VFS_HOLD(vfsp);
25840Sstevel@tonic-gate 	rp->r_putapage = putapage;
25850Sstevel@tonic-gate 	rp->r_hashq = rhtp;
25860Sstevel@tonic-gate 	rp->r_flags = RREADDIRPLUS;
25870Sstevel@tonic-gate 	avl_create(&rp->r_dir, compar, sizeof (rddir_cache),
25880Sstevel@tonic-gate 	    offsetof(rddir_cache, tree));
25890Sstevel@tonic-gate 	vn_setops(vp, vops);
25900Sstevel@tonic-gate 	vp->v_data = (caddr_t)rp;
25910Sstevel@tonic-gate 	vp->v_vfsp = vfsp;
25920Sstevel@tonic-gate 	vp->v_type = VNON;
2593*11888SPavel.Filipensky@Sun.COM 	vp->v_flag |= VMODSORT;
25940Sstevel@tonic-gate 	nfs_set_vroot(vp);
25950Sstevel@tonic-gate 
25960Sstevel@tonic-gate 	/*
25970Sstevel@tonic-gate 	 * There is a race condition if someone else
25980Sstevel@tonic-gate 	 * alloc's the rnode while no locks are held, so we
25990Sstevel@tonic-gate 	 * check again and recover if found.
26000Sstevel@tonic-gate 	 */
26010Sstevel@tonic-gate 	rw_enter(&rhtp->r_lock, RW_WRITER);
26020Sstevel@tonic-gate 	if ((trp = rfind(rhtp, fh, vfsp)) != NULL) {
26030Sstevel@tonic-gate 		vp = RTOV(trp);
26040Sstevel@tonic-gate 		nfs_set_vroot(vp);
26050Sstevel@tonic-gate 		*newnode = 0;
26060Sstevel@tonic-gate 		rw_exit(&rhtp->r_lock);
26070Sstevel@tonic-gate 		rp_addfree(rp, cr);
26080Sstevel@tonic-gate 		rw_enter(&rhtp->r_lock, RW_READER);
26090Sstevel@tonic-gate 		return (vp);
26100Sstevel@tonic-gate 	}
26110Sstevel@tonic-gate 	rp_addhash(rp);
26120Sstevel@tonic-gate 	*newnode = 1;
26130Sstevel@tonic-gate 	return (vp);
26140Sstevel@tonic-gate }
26150Sstevel@tonic-gate 
2616*11888SPavel.Filipensky@Sun.COM /*
2617*11888SPavel.Filipensky@Sun.COM  * Callback function to check if the page should be marked as
2618*11888SPavel.Filipensky@Sun.COM  * modified. In the positive case, p_fsdata is set to C_NOCOMMIT.
2619*11888SPavel.Filipensky@Sun.COM  */
2620*11888SPavel.Filipensky@Sun.COM int
nfs_setmod_check(page_t * pp)2621*11888SPavel.Filipensky@Sun.COM nfs_setmod_check(page_t *pp)
2622*11888SPavel.Filipensky@Sun.COM {
2623*11888SPavel.Filipensky@Sun.COM 	if (pp->p_fsdata != C_NOCOMMIT) {
2624*11888SPavel.Filipensky@Sun.COM 		pp->p_fsdata = C_NOCOMMIT;
2625*11888SPavel.Filipensky@Sun.COM 		return (1);
2626*11888SPavel.Filipensky@Sun.COM 	}
2627*11888SPavel.Filipensky@Sun.COM 	return (0);
2628*11888SPavel.Filipensky@Sun.COM }
2629*11888SPavel.Filipensky@Sun.COM 
26300Sstevel@tonic-gate static void
nfs_set_vroot(vnode_t * vp)26310Sstevel@tonic-gate nfs_set_vroot(vnode_t *vp)
26320Sstevel@tonic-gate {
26330Sstevel@tonic-gate 	rnode_t *rp;
26340Sstevel@tonic-gate 	nfs_fhandle *rootfh;
26350Sstevel@tonic-gate 
26360Sstevel@tonic-gate 	rp = VTOR(vp);
26370Sstevel@tonic-gate 	rootfh = &rp->r_server->sv_fhandle;
26380Sstevel@tonic-gate 	if (rootfh->fh_len == rp->r_fh.fh_len &&
26390Sstevel@tonic-gate 	    bcmp(rootfh->fh_buf, rp->r_fh.fh_buf, rp->r_fh.fh_len) == 0) {
26400Sstevel@tonic-gate 		if (!(vp->v_flag & VROOT)) {
26410Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
26420Sstevel@tonic-gate 			vp->v_flag |= VROOT;
26430Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
26440Sstevel@tonic-gate 		}
26450Sstevel@tonic-gate 	}
26460Sstevel@tonic-gate }
26470Sstevel@tonic-gate 
26480Sstevel@tonic-gate static void
nfs_free_r_path(rnode_t * rp)26490Sstevel@tonic-gate nfs_free_r_path(rnode_t *rp)
26500Sstevel@tonic-gate {
26510Sstevel@tonic-gate 	char *path;
26520Sstevel@tonic-gate 	size_t len;
26530Sstevel@tonic-gate 
26540Sstevel@tonic-gate 	path = rp->r_path;
26550Sstevel@tonic-gate 	if (path) {
26560Sstevel@tonic-gate 		rp->r_path = NULL;
26570Sstevel@tonic-gate 		len = strlen(path) + 1;
26580Sstevel@tonic-gate 		kmem_free(path, len);
26590Sstevel@tonic-gate #ifdef DEBUG
26600Sstevel@tonic-gate 		clstat_debug.rpath.value.ui64 -= len;
26610Sstevel@tonic-gate #endif
26620Sstevel@tonic-gate 	}
26630Sstevel@tonic-gate }
26640Sstevel@tonic-gate 
26650Sstevel@tonic-gate /*
26660Sstevel@tonic-gate  * Put an rnode on the free list.
26670Sstevel@tonic-gate  *
26680Sstevel@tonic-gate  * Rnodes which were allocated above and beyond the normal limit
26690Sstevel@tonic-gate  * are immediately freed.
26700Sstevel@tonic-gate  */
26710Sstevel@tonic-gate void
rp_addfree(rnode_t * rp,cred_t * cr)26720Sstevel@tonic-gate rp_addfree(rnode_t *rp, cred_t *cr)
26730Sstevel@tonic-gate {
26740Sstevel@tonic-gate 	vnode_t *vp;
26750Sstevel@tonic-gate 	struct vfs *vfsp;
26760Sstevel@tonic-gate 
26770Sstevel@tonic-gate 	vp = RTOV(rp);
26780Sstevel@tonic-gate 	ASSERT(vp->v_count >= 1);
26790Sstevel@tonic-gate 	ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL);
26800Sstevel@tonic-gate 
26810Sstevel@tonic-gate 	/*
26820Sstevel@tonic-gate 	 * If we have too many rnodes allocated and there are no
26830Sstevel@tonic-gate 	 * references to this rnode, or if the rnode is no longer
26840Sstevel@tonic-gate 	 * accessible by it does not reside in the hash queues,
26850Sstevel@tonic-gate 	 * or if an i/o error occurred while writing to the file,
26860Sstevel@tonic-gate 	 * then just free it instead of putting it on the rnode
26870Sstevel@tonic-gate 	 * freelist.
26880Sstevel@tonic-gate 	 */
26890Sstevel@tonic-gate 	vfsp = vp->v_vfsp;
26900Sstevel@tonic-gate 	if (((rnew > nrnode || !(rp->r_flags & RHASHED) || rp->r_error ||
26910Sstevel@tonic-gate 	    (vfsp->vfs_flag & VFS_UNMOUNTED)) && rp->r_count == 0)) {
26920Sstevel@tonic-gate 		if (rp->r_flags & RHASHED) {
26930Sstevel@tonic-gate 			rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
26940Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
26950Sstevel@tonic-gate 			if (vp->v_count > 1) {
26960Sstevel@tonic-gate 				vp->v_count--;
26970Sstevel@tonic-gate 				mutex_exit(&vp->v_lock);
26980Sstevel@tonic-gate 				rw_exit(&rp->r_hashq->r_lock);
26990Sstevel@tonic-gate 				return;
27000Sstevel@tonic-gate 			}
27010Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
27020Sstevel@tonic-gate 			rp_rmhash_locked(rp);
27030Sstevel@tonic-gate 			rw_exit(&rp->r_hashq->r_lock);
27040Sstevel@tonic-gate 		}
27050Sstevel@tonic-gate 
27060Sstevel@tonic-gate 		rinactive(rp, cr);
27070Sstevel@tonic-gate 
27080Sstevel@tonic-gate 		/*
27090Sstevel@tonic-gate 		 * Recheck the vnode reference count.  We need to
27100Sstevel@tonic-gate 		 * make sure that another reference has not been
27110Sstevel@tonic-gate 		 * acquired while we were not holding v_lock.  The
27120Sstevel@tonic-gate 		 * rnode is not in the rnode hash queues, so the
27130Sstevel@tonic-gate 		 * only way for a reference to have been acquired
27140Sstevel@tonic-gate 		 * is for a VOP_PUTPAGE because the rnode was marked
27150Sstevel@tonic-gate 		 * with RDIRTY or for a modified page.  This
27160Sstevel@tonic-gate 		 * reference may have been acquired before our call
27170Sstevel@tonic-gate 		 * to rinactive.  The i/o may have been completed,
27180Sstevel@tonic-gate 		 * thus allowing rinactive to complete, but the
27190Sstevel@tonic-gate 		 * reference to the vnode may not have been released
27200Sstevel@tonic-gate 		 * yet.  In any case, the rnode can not be destroyed
27210Sstevel@tonic-gate 		 * until the other references to this vnode have been
27220Sstevel@tonic-gate 		 * released.  The other references will take care of
27230Sstevel@tonic-gate 		 * either destroying the rnode or placing it on the
27240Sstevel@tonic-gate 		 * rnode freelist.  If there are no other references,
27250Sstevel@tonic-gate 		 * then the rnode may be safely destroyed.
27260Sstevel@tonic-gate 		 */
27270Sstevel@tonic-gate 		mutex_enter(&vp->v_lock);
27280Sstevel@tonic-gate 		if (vp->v_count > 1) {
27290Sstevel@tonic-gate 			vp->v_count--;
27300Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
27310Sstevel@tonic-gate 			return;
27320Sstevel@tonic-gate 		}
27330Sstevel@tonic-gate 		mutex_exit(&vp->v_lock);
27340Sstevel@tonic-gate 
27350Sstevel@tonic-gate 		destroy_rnode(rp);
27360Sstevel@tonic-gate 		return;
27370Sstevel@tonic-gate 	}
27380Sstevel@tonic-gate 
27390Sstevel@tonic-gate 	/*
27400Sstevel@tonic-gate 	 * Lock the hash queue and then recheck the reference count
27410Sstevel@tonic-gate 	 * to ensure that no other threads have acquired a reference
27420Sstevel@tonic-gate 	 * to indicate that the rnode should not be placed on the
27430Sstevel@tonic-gate 	 * freelist.  If another reference has been acquired, then
27440Sstevel@tonic-gate 	 * just release this one and let the other thread complete
27450Sstevel@tonic-gate 	 * the processing of adding this rnode to the freelist.
27460Sstevel@tonic-gate 	 */
27470Sstevel@tonic-gate 	rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
27480Sstevel@tonic-gate 
27490Sstevel@tonic-gate 	mutex_enter(&vp->v_lock);
27500Sstevel@tonic-gate 	if (vp->v_count > 1) {
27510Sstevel@tonic-gate 		vp->v_count--;
27520Sstevel@tonic-gate 		mutex_exit(&vp->v_lock);
27530Sstevel@tonic-gate 		rw_exit(&rp->r_hashq->r_lock);
27540Sstevel@tonic-gate 		return;
27550Sstevel@tonic-gate 	}
27560Sstevel@tonic-gate 	mutex_exit(&vp->v_lock);
27570Sstevel@tonic-gate 
27580Sstevel@tonic-gate 	/*
27590Sstevel@tonic-gate 	 * If there is no cached data or metadata for this file, then
27600Sstevel@tonic-gate 	 * put the rnode on the front of the freelist so that it will
27610Sstevel@tonic-gate 	 * be reused before other rnodes which may have cached data or
27620Sstevel@tonic-gate 	 * metadata associated with them.
27630Sstevel@tonic-gate 	 */
27640Sstevel@tonic-gate 	mutex_enter(&rpfreelist_lock);
27650Sstevel@tonic-gate 	if (rpfreelist == NULL) {
27660Sstevel@tonic-gate 		rp->r_freef = rp;
27670Sstevel@tonic-gate 		rp->r_freeb = rp;
27680Sstevel@tonic-gate 		rpfreelist = rp;
27690Sstevel@tonic-gate 	} else {
27700Sstevel@tonic-gate 		rp->r_freef = rpfreelist;
27710Sstevel@tonic-gate 		rp->r_freeb = rpfreelist->r_freeb;
27720Sstevel@tonic-gate 		rpfreelist->r_freeb->r_freef = rp;
27730Sstevel@tonic-gate 		rpfreelist->r_freeb = rp;
27740Sstevel@tonic-gate 		if (!vn_has_cached_data(vp) &&
27750Sstevel@tonic-gate 		    !HAVE_RDDIR_CACHE(rp) &&
27760Sstevel@tonic-gate 		    rp->r_symlink.contents == NULL &&
27770Sstevel@tonic-gate 		    rp->r_secattr == NULL &&
27780Sstevel@tonic-gate 		    rp->r_pathconf == NULL)
27790Sstevel@tonic-gate 			rpfreelist = rp;
27800Sstevel@tonic-gate 	}
27810Sstevel@tonic-gate 	mutex_exit(&rpfreelist_lock);
27820Sstevel@tonic-gate 
27830Sstevel@tonic-gate 	rw_exit(&rp->r_hashq->r_lock);
27840Sstevel@tonic-gate }
27850Sstevel@tonic-gate 
27860Sstevel@tonic-gate /*
27870Sstevel@tonic-gate  * Remove an rnode from the free list.
27880Sstevel@tonic-gate  *
27890Sstevel@tonic-gate  * The caller must be holding rpfreelist_lock and the rnode
27900Sstevel@tonic-gate  * must be on the freelist.
27910Sstevel@tonic-gate  */
27920Sstevel@tonic-gate static void
rp_rmfree(rnode_t * rp)27930Sstevel@tonic-gate rp_rmfree(rnode_t *rp)
27940Sstevel@tonic-gate {
27950Sstevel@tonic-gate 
27960Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&rpfreelist_lock));
27970Sstevel@tonic-gate 	ASSERT(rp->r_freef != NULL && rp->r_freeb != NULL);
27980Sstevel@tonic-gate 
27990Sstevel@tonic-gate 	if (rp == rpfreelist) {
28000Sstevel@tonic-gate 		rpfreelist = rp->r_freef;
28010Sstevel@tonic-gate 		if (rp == rpfreelist)
28020Sstevel@tonic-gate 			rpfreelist = NULL;
28030Sstevel@tonic-gate 	}
28040Sstevel@tonic-gate 
28050Sstevel@tonic-gate 	rp->r_freeb->r_freef = rp->r_freef;
28060Sstevel@tonic-gate 	rp->r_freef->r_freeb = rp->r_freeb;
28070Sstevel@tonic-gate 
28080Sstevel@tonic-gate 	rp->r_freef = rp->r_freeb = NULL;
28090Sstevel@tonic-gate }
28100Sstevel@tonic-gate 
28110Sstevel@tonic-gate /*
28120Sstevel@tonic-gate  * Put a rnode in the hash table.
28130Sstevel@tonic-gate  *
28140Sstevel@tonic-gate  * The caller must be holding the exclusive hash queue lock.
28150Sstevel@tonic-gate  */
28160Sstevel@tonic-gate static void
rp_addhash(rnode_t * rp)28170Sstevel@tonic-gate rp_addhash(rnode_t *rp)
28180Sstevel@tonic-gate {
28190Sstevel@tonic-gate 
28200Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
28210Sstevel@tonic-gate 	ASSERT(!(rp->r_flags & RHASHED));
28220Sstevel@tonic-gate 
28230Sstevel@tonic-gate 	rp->r_hashf = rp->r_hashq->r_hashf;
28240Sstevel@tonic-gate 	rp->r_hashq->r_hashf = rp;
28250Sstevel@tonic-gate 	rp->r_hashb = (rnode_t *)rp->r_hashq;
28260Sstevel@tonic-gate 	rp->r_hashf->r_hashb = rp;
28270Sstevel@tonic-gate 
28280Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
28290Sstevel@tonic-gate 	rp->r_flags |= RHASHED;
28300Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
28310Sstevel@tonic-gate }
28320Sstevel@tonic-gate 
28330Sstevel@tonic-gate /*
28340Sstevel@tonic-gate  * Remove a rnode from the hash table.
28350Sstevel@tonic-gate  *
28360Sstevel@tonic-gate  * The caller must be holding the hash queue lock.
28370Sstevel@tonic-gate  */
28380Sstevel@tonic-gate static void
rp_rmhash_locked(rnode_t * rp)28390Sstevel@tonic-gate rp_rmhash_locked(rnode_t *rp)
28400Sstevel@tonic-gate {
28410Sstevel@tonic-gate 
28420Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
28430Sstevel@tonic-gate 	ASSERT(rp->r_flags & RHASHED);
28440Sstevel@tonic-gate 
28450Sstevel@tonic-gate 	rp->r_hashb->r_hashf = rp->r_hashf;
28460Sstevel@tonic-gate 	rp->r_hashf->r_hashb = rp->r_hashb;
28470Sstevel@tonic-gate 
28480Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
28490Sstevel@tonic-gate 	rp->r_flags &= ~RHASHED;
28500Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
28510Sstevel@tonic-gate }
28520Sstevel@tonic-gate 
28530Sstevel@tonic-gate /*
28540Sstevel@tonic-gate  * Remove a rnode from the hash table.
28550Sstevel@tonic-gate  *
28560Sstevel@tonic-gate  * The caller must not be holding the hash queue lock.
28570Sstevel@tonic-gate  */
28580Sstevel@tonic-gate void
rp_rmhash(rnode_t * rp)28590Sstevel@tonic-gate rp_rmhash(rnode_t *rp)
28600Sstevel@tonic-gate {
28610Sstevel@tonic-gate 
28620Sstevel@tonic-gate 	rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
28630Sstevel@tonic-gate 	rp_rmhash_locked(rp);
28640Sstevel@tonic-gate 	rw_exit(&rp->r_hashq->r_lock);
28650Sstevel@tonic-gate }
28660Sstevel@tonic-gate 
28670Sstevel@tonic-gate /*
28680Sstevel@tonic-gate  * Lookup a rnode by fhandle.
28690Sstevel@tonic-gate  *
28700Sstevel@tonic-gate  * The caller must be holding the hash queue lock, either shared or exclusive.
28710Sstevel@tonic-gate  */
28720Sstevel@tonic-gate static rnode_t *
rfind(rhashq_t * rhtp,nfs_fhandle * fh,struct vfs * vfsp)28730Sstevel@tonic-gate rfind(rhashq_t *rhtp, nfs_fhandle *fh, struct vfs *vfsp)
28740Sstevel@tonic-gate {
28750Sstevel@tonic-gate 	rnode_t *rp;
28760Sstevel@tonic-gate 	vnode_t *vp;
28770Sstevel@tonic-gate 
28780Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&rhtp->r_lock));
28790Sstevel@tonic-gate 
28800Sstevel@tonic-gate 	for (rp = rhtp->r_hashf; rp != (rnode_t *)rhtp; rp = rp->r_hashf) {
28810Sstevel@tonic-gate 		vp = RTOV(rp);
28820Sstevel@tonic-gate 		if (vp->v_vfsp == vfsp &&
28830Sstevel@tonic-gate 		    rp->r_fh.fh_len == fh->fh_len &&
28840Sstevel@tonic-gate 		    bcmp(rp->r_fh.fh_buf, fh->fh_buf, fh->fh_len) == 0) {
28850Sstevel@tonic-gate 			/*
28860Sstevel@tonic-gate 			 * remove rnode from free list, if necessary.
28870Sstevel@tonic-gate 			 */
28880Sstevel@tonic-gate 			if (rp->r_freef != NULL) {
28890Sstevel@tonic-gate 				mutex_enter(&rpfreelist_lock);
28900Sstevel@tonic-gate 				/*
28910Sstevel@tonic-gate 				 * If the rnode is on the freelist,
28920Sstevel@tonic-gate 				 * then remove it and use that reference
28930Sstevel@tonic-gate 				 * as the new reference.  Otherwise,
28940Sstevel@tonic-gate 				 * need to increment the reference count.
28950Sstevel@tonic-gate 				 */
28960Sstevel@tonic-gate 				if (rp->r_freef != NULL) {
28970Sstevel@tonic-gate 					rp_rmfree(rp);
28980Sstevel@tonic-gate 					mutex_exit(&rpfreelist_lock);
28990Sstevel@tonic-gate 				} else {
29000Sstevel@tonic-gate 					mutex_exit(&rpfreelist_lock);
29010Sstevel@tonic-gate 					VN_HOLD(vp);
29020Sstevel@tonic-gate 				}
29030Sstevel@tonic-gate 			} else
29040Sstevel@tonic-gate 				VN_HOLD(vp);
29050Sstevel@tonic-gate 			return (rp);
29060Sstevel@tonic-gate 		}
29070Sstevel@tonic-gate 	}
29080Sstevel@tonic-gate 	return (NULL);
29090Sstevel@tonic-gate }
29100Sstevel@tonic-gate 
29110Sstevel@tonic-gate /*
29120Sstevel@tonic-gate  * Return 1 if there is a active vnode belonging to this vfs in the
29130Sstevel@tonic-gate  * rtable cache.
29140Sstevel@tonic-gate  *
29150Sstevel@tonic-gate  * Several of these checks are done without holding the usual
29160Sstevel@tonic-gate  * locks.  This is safe because destroy_rtable(), rp_addfree(),
29170Sstevel@tonic-gate  * etc. will redo the necessary checks before actually destroying
29180Sstevel@tonic-gate  * any rnodes.
29190Sstevel@tonic-gate  */
29200Sstevel@tonic-gate int
check_rtable(struct vfs * vfsp)29210Sstevel@tonic-gate check_rtable(struct vfs *vfsp)
29220Sstevel@tonic-gate {
29230Sstevel@tonic-gate 	int index;
29240Sstevel@tonic-gate 	rnode_t *rp;
29250Sstevel@tonic-gate 	vnode_t *vp;
29260Sstevel@tonic-gate 
29270Sstevel@tonic-gate 	for (index = 0; index < rtablesize; index++) {
29280Sstevel@tonic-gate 		rw_enter(&rtable[index].r_lock, RW_READER);
29290Sstevel@tonic-gate 		for (rp = rtable[index].r_hashf;
29300Sstevel@tonic-gate 		    rp != (rnode_t *)(&rtable[index]);
29310Sstevel@tonic-gate 		    rp = rp->r_hashf) {
29320Sstevel@tonic-gate 			vp = RTOV(rp);
29330Sstevel@tonic-gate 			if (vp->v_vfsp == vfsp) {
29340Sstevel@tonic-gate 				if (rp->r_freef == NULL ||
29350Sstevel@tonic-gate 				    (vn_has_cached_data(vp) &&
29360Sstevel@tonic-gate 				    (rp->r_flags & RDIRTY)) ||
29370Sstevel@tonic-gate 				    rp->r_count > 0) {
29380Sstevel@tonic-gate 					rw_exit(&rtable[index].r_lock);
29390Sstevel@tonic-gate 					return (1);
29400Sstevel@tonic-gate 				}
29410Sstevel@tonic-gate 			}
29420Sstevel@tonic-gate 		}
29430Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
29440Sstevel@tonic-gate 	}
29450Sstevel@tonic-gate 	return (0);
29460Sstevel@tonic-gate }
29470Sstevel@tonic-gate 
29480Sstevel@tonic-gate /*
29490Sstevel@tonic-gate  * Destroy inactive vnodes from the hash queues which belong to this
29500Sstevel@tonic-gate  * vfs.  It is essential that we destroy all inactive vnodes during a
29510Sstevel@tonic-gate  * forced unmount as well as during a normal unmount.
29520Sstevel@tonic-gate  */
29530Sstevel@tonic-gate void
destroy_rtable(struct vfs * vfsp,cred_t * cr)29540Sstevel@tonic-gate destroy_rtable(struct vfs *vfsp, cred_t *cr)
29550Sstevel@tonic-gate {
29560Sstevel@tonic-gate 	int index;
29570Sstevel@tonic-gate 	rnode_t *rp;
29580Sstevel@tonic-gate 	rnode_t *rlist;
29590Sstevel@tonic-gate 	rnode_t *r_hashf;
29600Sstevel@tonic-gate 	vnode_t *vp;
29610Sstevel@tonic-gate 
29620Sstevel@tonic-gate 	rlist = NULL;
29630Sstevel@tonic-gate 
29640Sstevel@tonic-gate 	for (index = 0; index < rtablesize; index++) {
29650Sstevel@tonic-gate 		rw_enter(&rtable[index].r_lock, RW_WRITER);
29660Sstevel@tonic-gate 		for (rp = rtable[index].r_hashf;
29670Sstevel@tonic-gate 		    rp != (rnode_t *)(&rtable[index]);
29680Sstevel@tonic-gate 		    rp = r_hashf) {
29690Sstevel@tonic-gate 			/* save the hash pointer before destroying */
29700Sstevel@tonic-gate 			r_hashf = rp->r_hashf;
29710Sstevel@tonic-gate 			vp = RTOV(rp);
29720Sstevel@tonic-gate 			if (vp->v_vfsp == vfsp) {
29730Sstevel@tonic-gate 				mutex_enter(&rpfreelist_lock);
29740Sstevel@tonic-gate 				if (rp->r_freef != NULL) {
29750Sstevel@tonic-gate 					rp_rmfree(rp);
29760Sstevel@tonic-gate 					mutex_exit(&rpfreelist_lock);
29770Sstevel@tonic-gate 					rp_rmhash_locked(rp);
29780Sstevel@tonic-gate 					rp->r_hashf = rlist;
29790Sstevel@tonic-gate 					rlist = rp;
29800Sstevel@tonic-gate 				} else
29810Sstevel@tonic-gate 					mutex_exit(&rpfreelist_lock);
29820Sstevel@tonic-gate 			}
29830Sstevel@tonic-gate 		}
29840Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
29850Sstevel@tonic-gate 	}
29860Sstevel@tonic-gate 
29870Sstevel@tonic-gate 	for (rp = rlist; rp != NULL; rp = rlist) {
29880Sstevel@tonic-gate 		rlist = rp->r_hashf;
29890Sstevel@tonic-gate 		/*
29900Sstevel@tonic-gate 		 * This call to rp_addfree will end up destroying the
29910Sstevel@tonic-gate 		 * rnode, but in a safe way with the appropriate set
29920Sstevel@tonic-gate 		 * of checks done.
29930Sstevel@tonic-gate 		 */
29940Sstevel@tonic-gate 		rp_addfree(rp, cr);
29950Sstevel@tonic-gate 	}
29960Sstevel@tonic-gate 
29970Sstevel@tonic-gate }
29980Sstevel@tonic-gate 
29990Sstevel@tonic-gate /*
30000Sstevel@tonic-gate  * This routine destroys all the resources associated with the rnode
30010Sstevel@tonic-gate  * and then the rnode itself.
30020Sstevel@tonic-gate  */
30030Sstevel@tonic-gate static void
destroy_rnode(rnode_t * rp)30040Sstevel@tonic-gate destroy_rnode(rnode_t *rp)
30050Sstevel@tonic-gate {
30060Sstevel@tonic-gate 	vnode_t *vp;
30070Sstevel@tonic-gate 	vfs_t *vfsp;
30080Sstevel@tonic-gate 
30090Sstevel@tonic-gate 	vp = RTOV(rp);
30100Sstevel@tonic-gate 	vfsp = vp->v_vfsp;
30110Sstevel@tonic-gate 
30120Sstevel@tonic-gate 	ASSERT(vp->v_count == 1);
30130Sstevel@tonic-gate 	ASSERT(rp->r_count == 0);
30140Sstevel@tonic-gate 	ASSERT(rp->r_lmpl == NULL);
30150Sstevel@tonic-gate 	ASSERT(rp->r_mapcnt == 0);
30160Sstevel@tonic-gate 	ASSERT(!(rp->r_flags & RHASHED));
30170Sstevel@tonic-gate 	ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL);
30180Sstevel@tonic-gate 	atomic_add_long((ulong_t *)&rnew, -1);
30190Sstevel@tonic-gate #ifdef DEBUG
30200Sstevel@tonic-gate 	clstat_debug.nrnode.value.ui64--;
30210Sstevel@tonic-gate #endif
30220Sstevel@tonic-gate 	nfs_rw_destroy(&rp->r_rwlock);
30230Sstevel@tonic-gate 	nfs_rw_destroy(&rp->r_lkserlock);
30240Sstevel@tonic-gate 	mutex_destroy(&rp->r_statelock);
30250Sstevel@tonic-gate 	cv_destroy(&rp->r_cv);
30260Sstevel@tonic-gate 	cv_destroy(&rp->r_commit.c_cv);
30270Sstevel@tonic-gate 	if (rp->r_flags & RDELMAPLIST)
30280Sstevel@tonic-gate 		list_destroy(&rp->r_indelmap);
30290Sstevel@tonic-gate 	nfs_free_r_path(rp);
30300Sstevel@tonic-gate 	avl_destroy(&rp->r_dir);
30310Sstevel@tonic-gate 	vn_invalid(vp);
30320Sstevel@tonic-gate 	vn_free(vp);
30330Sstevel@tonic-gate 	kmem_cache_free(rnode_cache, rp);
30340Sstevel@tonic-gate 	VFS_RELE(vfsp);
30350Sstevel@tonic-gate }
30360Sstevel@tonic-gate 
30370Sstevel@tonic-gate /*
30380Sstevel@tonic-gate  * Flush all vnodes in this (or every) vfs.
30390Sstevel@tonic-gate  * Used by nfs_sync and by nfs_unmount.
30400Sstevel@tonic-gate  */
30410Sstevel@tonic-gate void
rflush(struct vfs * vfsp,cred_t * cr)30420Sstevel@tonic-gate rflush(struct vfs *vfsp, cred_t *cr)
30430Sstevel@tonic-gate {
30440Sstevel@tonic-gate 	int index;
30450Sstevel@tonic-gate 	rnode_t *rp;
30460Sstevel@tonic-gate 	vnode_t *vp, **vplist;
30470Sstevel@tonic-gate 	long num, cnt;
30480Sstevel@tonic-gate 
30490Sstevel@tonic-gate 	/*
30500Sstevel@tonic-gate 	 * Check to see whether there is anything to do.
30510Sstevel@tonic-gate 	 */
30520Sstevel@tonic-gate 	num = rnew;
30530Sstevel@tonic-gate 	if (num == 0)
30540Sstevel@tonic-gate 		return;
30550Sstevel@tonic-gate 
30560Sstevel@tonic-gate 	/*
30570Sstevel@tonic-gate 	 * Allocate a slot for all currently active rnodes on the
30580Sstevel@tonic-gate 	 * supposition that they all may need flushing.
30590Sstevel@tonic-gate 	 */
30600Sstevel@tonic-gate 	vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP);
30610Sstevel@tonic-gate 	cnt = 0;
30620Sstevel@tonic-gate 
30630Sstevel@tonic-gate 	/*
30640Sstevel@tonic-gate 	 * Walk the hash queues looking for rnodes with page
30650Sstevel@tonic-gate 	 * lists associated with them.  Make a list of these
30660Sstevel@tonic-gate 	 * files.
30670Sstevel@tonic-gate 	 */
30680Sstevel@tonic-gate 	for (index = 0; index < rtablesize; index++) {
30690Sstevel@tonic-gate 		rw_enter(&rtable[index].r_lock, RW_READER);
30700Sstevel@tonic-gate 		for (rp = rtable[index].r_hashf;
30710Sstevel@tonic-gate 		    rp != (rnode_t *)(&rtable[index]);
30720Sstevel@tonic-gate 		    rp = rp->r_hashf) {
30730Sstevel@tonic-gate 			vp = RTOV(rp);
30740Sstevel@tonic-gate 			/*
30750Sstevel@tonic-gate 			 * Don't bother sync'ing a vp if it
30760Sstevel@tonic-gate 			 * is part of virtual swap device or
30770Sstevel@tonic-gate 			 * if VFS is read-only
30780Sstevel@tonic-gate 			 */
30790Sstevel@tonic-gate 			if (IS_SWAPVP(vp) || vn_is_readonly(vp))
30800Sstevel@tonic-gate 				continue;
30810Sstevel@tonic-gate 			/*
30820Sstevel@tonic-gate 			 * If flushing all mounted file systems or
30830Sstevel@tonic-gate 			 * the vnode belongs to this vfs, has pages
30840Sstevel@tonic-gate 			 * and is marked as either dirty or mmap'd,
30850Sstevel@tonic-gate 			 * hold and add this vnode to the list of
30860Sstevel@tonic-gate 			 * vnodes to flush.
30870Sstevel@tonic-gate 			 */
30880Sstevel@tonic-gate 			if ((vfsp == NULL || vp->v_vfsp == vfsp) &&
30890Sstevel@tonic-gate 			    vn_has_cached_data(vp) &&
30900Sstevel@tonic-gate 			    ((rp->r_flags & RDIRTY) || rp->r_mapcnt > 0)) {
30910Sstevel@tonic-gate 				VN_HOLD(vp);
30920Sstevel@tonic-gate 				vplist[cnt++] = vp;
30930Sstevel@tonic-gate 				if (cnt == num) {
30940Sstevel@tonic-gate 					rw_exit(&rtable[index].r_lock);
30950Sstevel@tonic-gate 					goto toomany;
30960Sstevel@tonic-gate 				}
30970Sstevel@tonic-gate 			}
30980Sstevel@tonic-gate 		}
30990Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
31000Sstevel@tonic-gate 	}
31010Sstevel@tonic-gate toomany:
31020Sstevel@tonic-gate 
31030Sstevel@tonic-gate 	/*
31040Sstevel@tonic-gate 	 * Flush and release all of the files on the list.
31050Sstevel@tonic-gate 	 */
31060Sstevel@tonic-gate 	while (cnt-- > 0) {
31070Sstevel@tonic-gate 		vp = vplist[cnt];
31085331Samw 		(void) VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_ASYNC, cr, NULL);
31090Sstevel@tonic-gate 		VN_RELE(vp);
31100Sstevel@tonic-gate 	}
31110Sstevel@tonic-gate 
31120Sstevel@tonic-gate 	/*
31130Sstevel@tonic-gate 	 * Free the space allocated to hold the list.
31140Sstevel@tonic-gate 	 */
31150Sstevel@tonic-gate 	kmem_free(vplist, num * sizeof (*vplist));
31160Sstevel@tonic-gate }
31170Sstevel@tonic-gate 
31180Sstevel@tonic-gate /*
31190Sstevel@tonic-gate  * This probably needs to be larger than or equal to
31200Sstevel@tonic-gate  * log2(sizeof (struct rnode)) due to the way that rnodes are
31210Sstevel@tonic-gate  * allocated.
31220Sstevel@tonic-gate  */
31230Sstevel@tonic-gate #define	ACACHE_SHIFT_BITS	9
31240Sstevel@tonic-gate 
31250Sstevel@tonic-gate static int
acachehash(rnode_t * rp,cred_t * cr)31260Sstevel@tonic-gate acachehash(rnode_t *rp, cred_t *cr)
31270Sstevel@tonic-gate {
31280Sstevel@tonic-gate 
31290Sstevel@tonic-gate 	return ((((intptr_t)rp >> ACACHE_SHIFT_BITS) + crgetuid(cr)) &
31300Sstevel@tonic-gate 	    acachemask);
31310Sstevel@tonic-gate }
31320Sstevel@tonic-gate 
31330Sstevel@tonic-gate #ifdef DEBUG
31340Sstevel@tonic-gate static long nfs_access_cache_hits = 0;
31350Sstevel@tonic-gate static long nfs_access_cache_misses = 0;
31360Sstevel@tonic-gate #endif
31370Sstevel@tonic-gate 
31380Sstevel@tonic-gate nfs_access_type_t
nfs_access_check(rnode_t * rp,uint32_t acc,cred_t * cr)31390Sstevel@tonic-gate nfs_access_check(rnode_t *rp, uint32_t acc, cred_t *cr)
31400Sstevel@tonic-gate {
31410Sstevel@tonic-gate 	vnode_t *vp;
31420Sstevel@tonic-gate 	acache_t *ap;
31430Sstevel@tonic-gate 	acache_hash_t *hp;
31440Sstevel@tonic-gate 	nfs_access_type_t all;
31450Sstevel@tonic-gate 
31460Sstevel@tonic-gate 	vp = RTOV(rp);
31470Sstevel@tonic-gate 	if (!ATTRCACHE_VALID(vp) || nfs_waitfor_purge_complete(vp))
31480Sstevel@tonic-gate 		return (NFS_ACCESS_UNKNOWN);
31490Sstevel@tonic-gate 
31500Sstevel@tonic-gate 	if (rp->r_acache != NULL) {
31510Sstevel@tonic-gate 		hp = &acache[acachehash(rp, cr)];
31520Sstevel@tonic-gate 		rw_enter(&hp->lock, RW_READER);
31530Sstevel@tonic-gate 		ap = hp->next;
31540Sstevel@tonic-gate 		while (ap != (acache_t *)hp) {
31550Sstevel@tonic-gate 			if (crcmp(ap->cred, cr) == 0 && ap->rnode == rp) {
31560Sstevel@tonic-gate 				if ((ap->known & acc) == acc) {
31570Sstevel@tonic-gate #ifdef DEBUG
31580Sstevel@tonic-gate 					nfs_access_cache_hits++;
31590Sstevel@tonic-gate #endif
31600Sstevel@tonic-gate 					if ((ap->allowed & acc) == acc)
31610Sstevel@tonic-gate 						all = NFS_ACCESS_ALLOWED;
31620Sstevel@tonic-gate 					else
31630Sstevel@tonic-gate 						all = NFS_ACCESS_DENIED;
31640Sstevel@tonic-gate 				} else {
31650Sstevel@tonic-gate #ifdef DEBUG
31660Sstevel@tonic-gate 					nfs_access_cache_misses++;
31670Sstevel@tonic-gate #endif
31680Sstevel@tonic-gate 					all = NFS_ACCESS_UNKNOWN;
31690Sstevel@tonic-gate 				}
31700Sstevel@tonic-gate 				rw_exit(&hp->lock);
31710Sstevel@tonic-gate 				return (all);
31720Sstevel@tonic-gate 			}
31730Sstevel@tonic-gate 			ap = ap->next;
31740Sstevel@tonic-gate 		}
31750Sstevel@tonic-gate 		rw_exit(&hp->lock);
31760Sstevel@tonic-gate 	}
31770Sstevel@tonic-gate 
31780Sstevel@tonic-gate #ifdef DEBUG
31790Sstevel@tonic-gate 	nfs_access_cache_misses++;
31800Sstevel@tonic-gate #endif
31810Sstevel@tonic-gate 	return (NFS_ACCESS_UNKNOWN);
31820Sstevel@tonic-gate }
31830Sstevel@tonic-gate 
31840Sstevel@tonic-gate void
nfs_access_cache(rnode_t * rp,uint32_t acc,uint32_t resacc,cred_t * cr)31850Sstevel@tonic-gate nfs_access_cache(rnode_t *rp, uint32_t acc, uint32_t resacc, cred_t *cr)
31860Sstevel@tonic-gate {
31870Sstevel@tonic-gate 	acache_t *ap;
31880Sstevel@tonic-gate 	acache_t *nap;
31890Sstevel@tonic-gate 	acache_hash_t *hp;
31900Sstevel@tonic-gate 
31910Sstevel@tonic-gate 	hp = &acache[acachehash(rp, cr)];
31920Sstevel@tonic-gate 
31930Sstevel@tonic-gate 	/*
31940Sstevel@tonic-gate 	 * Allocate now assuming that mostly an allocation will be
31950Sstevel@tonic-gate 	 * required.  This allows the allocation to happen without
31960Sstevel@tonic-gate 	 * holding the hash bucket locked.
31970Sstevel@tonic-gate 	 */
31980Sstevel@tonic-gate 	nap = kmem_cache_alloc(acache_cache, KM_NOSLEEP);
31990Sstevel@tonic-gate 	if (nap != NULL) {
32000Sstevel@tonic-gate 		nap->known = acc;
32010Sstevel@tonic-gate 		nap->allowed = resacc;
32020Sstevel@tonic-gate 		nap->rnode = rp;
32030Sstevel@tonic-gate 		crhold(cr);
32040Sstevel@tonic-gate 		nap->cred = cr;
32050Sstevel@tonic-gate 		nap->hashq = hp;
32060Sstevel@tonic-gate 	}
32070Sstevel@tonic-gate 
32080Sstevel@tonic-gate 	rw_enter(&hp->lock, RW_WRITER);
32090Sstevel@tonic-gate 
32100Sstevel@tonic-gate 	if (rp->r_acache != NULL) {
32110Sstevel@tonic-gate 		ap = hp->next;
32120Sstevel@tonic-gate 		while (ap != (acache_t *)hp) {
32130Sstevel@tonic-gate 			if (crcmp(ap->cred, cr) == 0 && ap->rnode == rp) {
32140Sstevel@tonic-gate 				ap->known |= acc;
32150Sstevel@tonic-gate 				ap->allowed &= ~acc;
32160Sstevel@tonic-gate 				ap->allowed |= resacc;
32170Sstevel@tonic-gate 				rw_exit(&hp->lock);
32180Sstevel@tonic-gate 				if (nap != NULL) {
32190Sstevel@tonic-gate 					crfree(nap->cred);
32200Sstevel@tonic-gate 					kmem_cache_free(acache_cache, nap);
32210Sstevel@tonic-gate 				}
32220Sstevel@tonic-gate 				return;
32230Sstevel@tonic-gate 			}
32240Sstevel@tonic-gate 			ap = ap->next;
32250Sstevel@tonic-gate 		}
32260Sstevel@tonic-gate 	}
32270Sstevel@tonic-gate 
32280Sstevel@tonic-gate 	if (nap != NULL) {
32290Sstevel@tonic-gate #ifdef DEBUG
32300Sstevel@tonic-gate 		clstat_debug.access.value.ui64++;
32310Sstevel@tonic-gate #endif
32320Sstevel@tonic-gate 		nap->next = hp->next;
32330Sstevel@tonic-gate 		hp->next = nap;
32340Sstevel@tonic-gate 		nap->next->prev = nap;
32350Sstevel@tonic-gate 		nap->prev = (acache_t *)hp;
32360Sstevel@tonic-gate 
32370Sstevel@tonic-gate 		mutex_enter(&rp->r_statelock);
32380Sstevel@tonic-gate 		nap->list = rp->r_acache;
32390Sstevel@tonic-gate 		rp->r_acache = nap;
32400Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
32410Sstevel@tonic-gate 	}
32420Sstevel@tonic-gate 
32430Sstevel@tonic-gate 	rw_exit(&hp->lock);
32440Sstevel@tonic-gate }
32450Sstevel@tonic-gate 
32460Sstevel@tonic-gate int
nfs_access_purge_rp(rnode_t * rp)32470Sstevel@tonic-gate nfs_access_purge_rp(rnode_t *rp)
32480Sstevel@tonic-gate {
32490Sstevel@tonic-gate 	acache_t *ap;
32500Sstevel@tonic-gate 	acache_t *tmpap;
32510Sstevel@tonic-gate 	acache_t *rplist;
32520Sstevel@tonic-gate 
32530Sstevel@tonic-gate 	/*
32540Sstevel@tonic-gate 	 * If there aren't any cached entries, then there is nothing
32550Sstevel@tonic-gate 	 * to free.
32560Sstevel@tonic-gate 	 */
32570Sstevel@tonic-gate 	if (rp->r_acache == NULL)
32580Sstevel@tonic-gate 		return (0);
32590Sstevel@tonic-gate 
32600Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
32610Sstevel@tonic-gate 	rplist = rp->r_acache;
32620Sstevel@tonic-gate 	rp->r_acache = NULL;
32630Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
32640Sstevel@tonic-gate 
32650Sstevel@tonic-gate 	/*
32660Sstevel@tonic-gate 	 * Loop through each entry in the list pointed to in the
32670Sstevel@tonic-gate 	 * rnode.  Remove each of these entries from the hash
32680Sstevel@tonic-gate 	 * queue that it is on and remove it from the list in
32690Sstevel@tonic-gate 	 * the rnode.
32700Sstevel@tonic-gate 	 */
32710Sstevel@tonic-gate 	for (ap = rplist; ap != NULL; ap = tmpap) {
32720Sstevel@tonic-gate 		rw_enter(&ap->hashq->lock, RW_WRITER);
32730Sstevel@tonic-gate 		ap->prev->next = ap->next;
32740Sstevel@tonic-gate 		ap->next->prev = ap->prev;
32750Sstevel@tonic-gate 		rw_exit(&ap->hashq->lock);
32760Sstevel@tonic-gate 
32770Sstevel@tonic-gate 		tmpap = ap->list;
32780Sstevel@tonic-gate 		crfree(ap->cred);
32790Sstevel@tonic-gate 		kmem_cache_free(acache_cache, ap);
32800Sstevel@tonic-gate #ifdef DEBUG
32810Sstevel@tonic-gate 		clstat_debug.access.value.ui64--;
32820Sstevel@tonic-gate #endif
32830Sstevel@tonic-gate 	}
32840Sstevel@tonic-gate 
32850Sstevel@tonic-gate 	return (1);
32860Sstevel@tonic-gate }
32870Sstevel@tonic-gate 
32880Sstevel@tonic-gate static const char prefix[] = ".nfs";
32890Sstevel@tonic-gate 
32900Sstevel@tonic-gate static kmutex_t newnum_lock;
32910Sstevel@tonic-gate 
32920Sstevel@tonic-gate int
newnum(void)32930Sstevel@tonic-gate newnum(void)
32940Sstevel@tonic-gate {
32950Sstevel@tonic-gate 	static uint_t newnum = 0;
32960Sstevel@tonic-gate 	uint_t id;
32970Sstevel@tonic-gate 
32980Sstevel@tonic-gate 	mutex_enter(&newnum_lock);
32990Sstevel@tonic-gate 	if (newnum == 0)
33000Sstevel@tonic-gate 		newnum = gethrestime_sec() & 0xffff;
33010Sstevel@tonic-gate 	id = newnum++;
33020Sstevel@tonic-gate 	mutex_exit(&newnum_lock);
33030Sstevel@tonic-gate 	return (id);
33040Sstevel@tonic-gate }
33050Sstevel@tonic-gate 
33060Sstevel@tonic-gate char *
newname(void)33070Sstevel@tonic-gate newname(void)
33080Sstevel@tonic-gate {
33090Sstevel@tonic-gate 	char *news;
33100Sstevel@tonic-gate 	char *s;
33110Sstevel@tonic-gate 	const char *p;
33120Sstevel@tonic-gate 	uint_t id;
33130Sstevel@tonic-gate 
33140Sstevel@tonic-gate 	id = newnum();
33150Sstevel@tonic-gate 	news = kmem_alloc(MAXNAMELEN, KM_SLEEP);
33160Sstevel@tonic-gate 	s = news;
33170Sstevel@tonic-gate 	p = prefix;
33180Sstevel@tonic-gate 	while (*p != '\0')
33190Sstevel@tonic-gate 		*s++ = *p++;
33200Sstevel@tonic-gate 	while (id != 0) {
33210Sstevel@tonic-gate 		*s++ = "0123456789ABCDEF"[id & 0x0f];
33220Sstevel@tonic-gate 		id >>= 4;
33230Sstevel@tonic-gate 	}
33240Sstevel@tonic-gate 	*s = '\0';
33250Sstevel@tonic-gate 	return (news);
33260Sstevel@tonic-gate }
33270Sstevel@tonic-gate 
33280Sstevel@tonic-gate /*
33290Sstevel@tonic-gate  * Snapshot callback for nfs:0:nfs_client as registered with the kstat
33300Sstevel@tonic-gate  * framework.
33310Sstevel@tonic-gate  */
33320Sstevel@tonic-gate static int
cl_snapshot(kstat_t * ksp,void * buf,int rw)33330Sstevel@tonic-gate cl_snapshot(kstat_t *ksp, void *buf, int rw)
33340Sstevel@tonic-gate {
33350Sstevel@tonic-gate 	ksp->ks_snaptime = gethrtime();
33360Sstevel@tonic-gate 	if (rw == KSTAT_WRITE) {
33370Sstevel@tonic-gate 		bcopy(buf, ksp->ks_private, sizeof (clstat_tmpl));
33380Sstevel@tonic-gate #ifdef DEBUG
33390Sstevel@tonic-gate 		/*
33400Sstevel@tonic-gate 		 * Currently only the global zone can write to kstats, but we
33410Sstevel@tonic-gate 		 * add the check just for paranoia.
33420Sstevel@tonic-gate 		 */
33430Sstevel@tonic-gate 		if (INGLOBALZONE(curproc))
33440Sstevel@tonic-gate 			bcopy((char *)buf + sizeof (clstat_tmpl), &clstat_debug,
33450Sstevel@tonic-gate 			    sizeof (clstat_debug));
33460Sstevel@tonic-gate #endif
33470Sstevel@tonic-gate 	} else {
33480Sstevel@tonic-gate 		bcopy(ksp->ks_private, buf, sizeof (clstat_tmpl));
33490Sstevel@tonic-gate #ifdef DEBUG
33500Sstevel@tonic-gate 		/*
33510Sstevel@tonic-gate 		 * If we're displaying the "global" debug kstat values, we
33520Sstevel@tonic-gate 		 * display them as-is to all zones since in fact they apply to
33530Sstevel@tonic-gate 		 * the system as a whole.
33540Sstevel@tonic-gate 		 */
33550Sstevel@tonic-gate 		bcopy(&clstat_debug, (char *)buf + sizeof (clstat_tmpl),
33560Sstevel@tonic-gate 		    sizeof (clstat_debug));
33570Sstevel@tonic-gate #endif
33580Sstevel@tonic-gate 	}
33590Sstevel@tonic-gate 	return (0);
33600Sstevel@tonic-gate }
33610Sstevel@tonic-gate 
33620Sstevel@tonic-gate static void *
clinit_zone(zoneid_t zoneid)33630Sstevel@tonic-gate clinit_zone(zoneid_t zoneid)
33640Sstevel@tonic-gate {
33650Sstevel@tonic-gate 	kstat_t *nfs_client_kstat;
33660Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
33670Sstevel@tonic-gate 	uint_t ndata;
33680Sstevel@tonic-gate 
33690Sstevel@tonic-gate 	nfscl = kmem_alloc(sizeof (*nfscl), KM_SLEEP);
33700Sstevel@tonic-gate 	mutex_init(&nfscl->nfscl_chtable_lock, NULL, MUTEX_DEFAULT, NULL);
33710Sstevel@tonic-gate 	nfscl->nfscl_chtable = NULL;
33720Sstevel@tonic-gate 	nfscl->nfscl_zoneid = zoneid;
33730Sstevel@tonic-gate 
33740Sstevel@tonic-gate 	bcopy(&clstat_tmpl, &nfscl->nfscl_stat, sizeof (clstat_tmpl));
33750Sstevel@tonic-gate 	ndata = sizeof (clstat_tmpl) / sizeof (kstat_named_t);
33760Sstevel@tonic-gate #ifdef DEBUG
33770Sstevel@tonic-gate 	ndata += sizeof (clstat_debug) / sizeof (kstat_named_t);
33780Sstevel@tonic-gate #endif
33790Sstevel@tonic-gate 	if ((nfs_client_kstat = kstat_create_zone("nfs", 0, "nfs_client",
33800Sstevel@tonic-gate 	    "misc", KSTAT_TYPE_NAMED, ndata,
33810Sstevel@tonic-gate 	    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, zoneid)) != NULL) {
33820Sstevel@tonic-gate 		nfs_client_kstat->ks_private = &nfscl->nfscl_stat;
33830Sstevel@tonic-gate 		nfs_client_kstat->ks_snapshot = cl_snapshot;
33840Sstevel@tonic-gate 		kstat_install(nfs_client_kstat);
33850Sstevel@tonic-gate 	}
33860Sstevel@tonic-gate 	mutex_enter(&nfs_clnt_list_lock);
33870Sstevel@tonic-gate 	list_insert_head(&nfs_clnt_list, nfscl);
33880Sstevel@tonic-gate 	mutex_exit(&nfs_clnt_list_lock);
33890Sstevel@tonic-gate 	return (nfscl);
33900Sstevel@tonic-gate }
33910Sstevel@tonic-gate 
33920Sstevel@tonic-gate /*ARGSUSED*/
33930Sstevel@tonic-gate static void
clfini_zone(zoneid_t zoneid,void * arg)33940Sstevel@tonic-gate clfini_zone(zoneid_t zoneid, void *arg)
33950Sstevel@tonic-gate {
33960Sstevel@tonic-gate 	struct nfs_clnt *nfscl = arg;
33970Sstevel@tonic-gate 	chhead_t *chp, *next;
33980Sstevel@tonic-gate 
33990Sstevel@tonic-gate 	if (nfscl == NULL)
34000Sstevel@tonic-gate 		return;
34010Sstevel@tonic-gate 	mutex_enter(&nfs_clnt_list_lock);
34020Sstevel@tonic-gate 	list_remove(&nfs_clnt_list, nfscl);
34030Sstevel@tonic-gate 	mutex_exit(&nfs_clnt_list_lock);
34040Sstevel@tonic-gate 	clreclaim_zone(nfscl, 0);
34050Sstevel@tonic-gate 	for (chp = nfscl->nfscl_chtable; chp != NULL; chp = next) {
34060Sstevel@tonic-gate 		ASSERT(chp->ch_list == NULL);
34070Sstevel@tonic-gate 		kmem_free(chp->ch_protofmly, strlen(chp->ch_protofmly) + 1);
34080Sstevel@tonic-gate 		next = chp->ch_next;
34090Sstevel@tonic-gate 		kmem_free(chp, sizeof (*chp));
34100Sstevel@tonic-gate 	}
34110Sstevel@tonic-gate 	kstat_delete_byname_zone("nfs", 0, "nfs_client", zoneid);
34120Sstevel@tonic-gate 	mutex_destroy(&nfscl->nfscl_chtable_lock);
34130Sstevel@tonic-gate 	kmem_free(nfscl, sizeof (*nfscl));
34140Sstevel@tonic-gate }
34150Sstevel@tonic-gate 
34160Sstevel@tonic-gate /*
34170Sstevel@tonic-gate  * Called by endpnt_destructor to make sure the client handles are
34180Sstevel@tonic-gate  * cleaned up before the RPC endpoints.  This becomes a no-op if
34190Sstevel@tonic-gate  * clfini_zone (above) is called first.  This function is needed
34200Sstevel@tonic-gate  * (rather than relying on clfini_zone to clean up) because the ZSD
34210Sstevel@tonic-gate  * callbacks have no ordering mechanism, so we have no way to ensure
34220Sstevel@tonic-gate  * that clfini_zone is called before endpnt_destructor.
34230Sstevel@tonic-gate  */
34240Sstevel@tonic-gate void
clcleanup_zone(zoneid_t zoneid)34250Sstevel@tonic-gate clcleanup_zone(zoneid_t zoneid)
34260Sstevel@tonic-gate {
34270Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
34280Sstevel@tonic-gate 
34290Sstevel@tonic-gate 	mutex_enter(&nfs_clnt_list_lock);
34300Sstevel@tonic-gate 	nfscl = list_head(&nfs_clnt_list);
34310Sstevel@tonic-gate 	for (; nfscl != NULL; nfscl = list_next(&nfs_clnt_list, nfscl)) {
34320Sstevel@tonic-gate 		if (nfscl->nfscl_zoneid == zoneid) {
34330Sstevel@tonic-gate 			clreclaim_zone(nfscl, 0);
34340Sstevel@tonic-gate 			break;
34350Sstevel@tonic-gate 		}
34360Sstevel@tonic-gate 	}
34370Sstevel@tonic-gate 	mutex_exit(&nfs_clnt_list_lock);
34380Sstevel@tonic-gate }
34390Sstevel@tonic-gate 
34400Sstevel@tonic-gate int
nfs_subrinit(void)34410Sstevel@tonic-gate nfs_subrinit(void)
34420Sstevel@tonic-gate {
34430Sstevel@tonic-gate 	int i;
34440Sstevel@tonic-gate 	ulong_t nrnode_max;
34450Sstevel@tonic-gate 
34460Sstevel@tonic-gate 	/*
34470Sstevel@tonic-gate 	 * Allocate and initialize the rnode hash queues
34480Sstevel@tonic-gate 	 */
34490Sstevel@tonic-gate 	if (nrnode <= 0)
34500Sstevel@tonic-gate 		nrnode = ncsize;
34510Sstevel@tonic-gate 	nrnode_max = (ulong_t)((kmem_maxavail() >> 2) / sizeof (struct rnode));
34520Sstevel@tonic-gate 	if (nrnode > nrnode_max || (nrnode == 0 && ncsize == 0)) {
34530Sstevel@tonic-gate 		zcmn_err(GLOBAL_ZONEID, CE_NOTE,
34540Sstevel@tonic-gate 		    "setting nrnode to max value of %ld", nrnode_max);
34550Sstevel@tonic-gate 		nrnode = nrnode_max;
34560Sstevel@tonic-gate 	}
34570Sstevel@tonic-gate 
34580Sstevel@tonic-gate 	rtablesize = 1 << highbit(nrnode / hashlen);
34590Sstevel@tonic-gate 	rtablemask = rtablesize - 1;
34600Sstevel@tonic-gate 	rtable = kmem_alloc(rtablesize * sizeof (*rtable), KM_SLEEP);
34610Sstevel@tonic-gate 	for (i = 0; i < rtablesize; i++) {
34620Sstevel@tonic-gate 		rtable[i].r_hashf = (rnode_t *)(&rtable[i]);
34630Sstevel@tonic-gate 		rtable[i].r_hashb = (rnode_t *)(&rtable[i]);
34640Sstevel@tonic-gate 		rw_init(&rtable[i].r_lock, NULL, RW_DEFAULT, NULL);
34650Sstevel@tonic-gate 	}
34660Sstevel@tonic-gate 	rnode_cache = kmem_cache_create("rnode_cache", sizeof (rnode_t),
34670Sstevel@tonic-gate 	    0, NULL, NULL, nfs_reclaim, NULL, NULL, 0);
34680Sstevel@tonic-gate 
34690Sstevel@tonic-gate 	/*
34700Sstevel@tonic-gate 	 * Allocate and initialize the access cache
34710Sstevel@tonic-gate 	 */
34720Sstevel@tonic-gate 
34730Sstevel@tonic-gate 	/*
34740Sstevel@tonic-gate 	 * Initial guess is one access cache entry per rnode unless
34750Sstevel@tonic-gate 	 * nacache is set to a non-zero value and then it is used to
34760Sstevel@tonic-gate 	 * indicate a guess at the number of access cache entries.
34770Sstevel@tonic-gate 	 */
34780Sstevel@tonic-gate 	if (nacache > 0)
34790Sstevel@tonic-gate 		acachesize = 1 << highbit(nacache / hashlen);
34800Sstevel@tonic-gate 	else
34810Sstevel@tonic-gate 		acachesize = rtablesize;
34820Sstevel@tonic-gate 	acachemask = acachesize - 1;
34830Sstevel@tonic-gate 	acache = kmem_alloc(acachesize * sizeof (*acache), KM_SLEEP);
34840Sstevel@tonic-gate 	for (i = 0; i < acachesize; i++) {
34850Sstevel@tonic-gate 		acache[i].next = (acache_t *)&acache[i];
34860Sstevel@tonic-gate 		acache[i].prev = (acache_t *)&acache[i];
34870Sstevel@tonic-gate 		rw_init(&acache[i].lock, NULL, RW_DEFAULT, NULL);
34880Sstevel@tonic-gate 	}
34890Sstevel@tonic-gate 	acache_cache = kmem_cache_create("nfs_access_cache",
34900Sstevel@tonic-gate 	    sizeof (acache_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
34910Sstevel@tonic-gate 	/*
34920Sstevel@tonic-gate 	 * Allocate and initialize the client handle cache
34930Sstevel@tonic-gate 	 */
34940Sstevel@tonic-gate 	chtab_cache = kmem_cache_create("client_handle_cache",
34954300Smarks 	    sizeof (struct chtab), 0, NULL, NULL, clreclaim, NULL, NULL, 0);
34960Sstevel@tonic-gate 	/*
34970Sstevel@tonic-gate 	 * Initialize the list of per-zone client handles (and associated data).
34980Sstevel@tonic-gate 	 * This needs to be done before we call zone_key_create().
34990Sstevel@tonic-gate 	 */
35000Sstevel@tonic-gate 	list_create(&nfs_clnt_list, sizeof (struct nfs_clnt),
35010Sstevel@tonic-gate 	    offsetof(struct nfs_clnt, nfscl_node));
35020Sstevel@tonic-gate 	/*
35030Sstevel@tonic-gate 	 * Initialize the zone_key for per-zone client handle lists.
35040Sstevel@tonic-gate 	 */
35050Sstevel@tonic-gate 	zone_key_create(&nfsclnt_zone_key, clinit_zone, NULL, clfini_zone);
35060Sstevel@tonic-gate 	/*
35070Sstevel@tonic-gate 	 * Initialize the various mutexes and reader/writer locks
35080Sstevel@tonic-gate 	 */
35090Sstevel@tonic-gate 	mutex_init(&rpfreelist_lock, NULL, MUTEX_DEFAULT, NULL);
35100Sstevel@tonic-gate 	mutex_init(&newnum_lock, NULL, MUTEX_DEFAULT, NULL);
35110Sstevel@tonic-gate 	mutex_init(&nfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
35120Sstevel@tonic-gate 
35130Sstevel@tonic-gate 	/*
35140Sstevel@tonic-gate 	 * Assign unique major number for all nfs mounts
35150Sstevel@tonic-gate 	 */
35160Sstevel@tonic-gate 	if ((nfs_major = getudev()) == -1) {
35170Sstevel@tonic-gate 		zcmn_err(GLOBAL_ZONEID, CE_WARN,
35180Sstevel@tonic-gate 		    "nfs: init: can't get unique device number");
35190Sstevel@tonic-gate 		nfs_major = 0;
35200Sstevel@tonic-gate 	}
35210Sstevel@tonic-gate 	nfs_minor = 0;
35220Sstevel@tonic-gate 
35230Sstevel@tonic-gate 	if (nfs3_jukebox_delay == 0)
35240Sstevel@tonic-gate 		nfs3_jukebox_delay = NFS3_JUKEBOX_DELAY;
35250Sstevel@tonic-gate 
35260Sstevel@tonic-gate 	return (0);
35270Sstevel@tonic-gate }
35280Sstevel@tonic-gate 
35290Sstevel@tonic-gate void
nfs_subrfini(void)35300Sstevel@tonic-gate nfs_subrfini(void)
35310Sstevel@tonic-gate {
35320Sstevel@tonic-gate 	int i;
35330Sstevel@tonic-gate 
35340Sstevel@tonic-gate 	/*
35350Sstevel@tonic-gate 	 * Deallocate the rnode hash queues
35360Sstevel@tonic-gate 	 */
35370Sstevel@tonic-gate 	kmem_cache_destroy(rnode_cache);
35380Sstevel@tonic-gate 
35390Sstevel@tonic-gate 	for (i = 0; i < rtablesize; i++)
35400Sstevel@tonic-gate 		rw_destroy(&rtable[i].r_lock);
35410Sstevel@tonic-gate 	kmem_free(rtable, rtablesize * sizeof (*rtable));
35420Sstevel@tonic-gate 
35430Sstevel@tonic-gate 	/*
35440Sstevel@tonic-gate 	 * Deallocated the access cache
35450Sstevel@tonic-gate 	 */
35460Sstevel@tonic-gate 	kmem_cache_destroy(acache_cache);
35470Sstevel@tonic-gate 
35480Sstevel@tonic-gate 	for (i = 0; i < acachesize; i++)
35490Sstevel@tonic-gate 		rw_destroy(&acache[i].lock);
35500Sstevel@tonic-gate 	kmem_free(acache, acachesize * sizeof (*acache));
35510Sstevel@tonic-gate 
35520Sstevel@tonic-gate 	/*
35530Sstevel@tonic-gate 	 * Deallocate the client handle cache
35540Sstevel@tonic-gate 	 */
35550Sstevel@tonic-gate 	kmem_cache_destroy(chtab_cache);
35560Sstevel@tonic-gate 
35570Sstevel@tonic-gate 	/*
35580Sstevel@tonic-gate 	 * Destroy the various mutexes and reader/writer locks
35590Sstevel@tonic-gate 	 */
35600Sstevel@tonic-gate 	mutex_destroy(&rpfreelist_lock);
35610Sstevel@tonic-gate 	mutex_destroy(&newnum_lock);
35620Sstevel@tonic-gate 	mutex_destroy(&nfs_minor_lock);
35630Sstevel@tonic-gate 	(void) zone_key_delete(nfsclnt_zone_key);
35640Sstevel@tonic-gate }
35650Sstevel@tonic-gate 
35660Sstevel@tonic-gate enum nfsstat
puterrno(int error)35670Sstevel@tonic-gate puterrno(int error)
35680Sstevel@tonic-gate {
35690Sstevel@tonic-gate 
35700Sstevel@tonic-gate 	switch (error) {
35710Sstevel@tonic-gate 	case EOPNOTSUPP:
35720Sstevel@tonic-gate 		return (NFSERR_OPNOTSUPP);
35730Sstevel@tonic-gate 	case ENAMETOOLONG:
35740Sstevel@tonic-gate 		return (NFSERR_NAMETOOLONG);
35750Sstevel@tonic-gate 	case ENOTEMPTY:
35760Sstevel@tonic-gate 		return (NFSERR_NOTEMPTY);
35770Sstevel@tonic-gate 	case EDQUOT:
35780Sstevel@tonic-gate 		return (NFSERR_DQUOT);
35790Sstevel@tonic-gate 	case ESTALE:
35800Sstevel@tonic-gate 		return (NFSERR_STALE);
35810Sstevel@tonic-gate 	case EREMOTE:
35820Sstevel@tonic-gate 		return (NFSERR_REMOTE);
35830Sstevel@tonic-gate 	case ENOSYS:
35840Sstevel@tonic-gate 		return (NFSERR_OPNOTSUPP);
35850Sstevel@tonic-gate 	case EOVERFLOW:
35860Sstevel@tonic-gate 		return (NFSERR_INVAL);
35870Sstevel@tonic-gate 	default:
35880Sstevel@tonic-gate 		return ((enum nfsstat)error);
35890Sstevel@tonic-gate 	}
35900Sstevel@tonic-gate 	/* NOTREACHED */
35910Sstevel@tonic-gate }
35920Sstevel@tonic-gate 
35930Sstevel@tonic-gate int
geterrno(enum nfsstat status)35940Sstevel@tonic-gate geterrno(enum nfsstat status)
35950Sstevel@tonic-gate {
35960Sstevel@tonic-gate 
35970Sstevel@tonic-gate 	switch (status) {
35980Sstevel@tonic-gate 	case NFSERR_OPNOTSUPP:
35990Sstevel@tonic-gate 		return (EOPNOTSUPP);
36000Sstevel@tonic-gate 	case NFSERR_NAMETOOLONG:
36010Sstevel@tonic-gate 		return (ENAMETOOLONG);
36020Sstevel@tonic-gate 	case NFSERR_NOTEMPTY:
36030Sstevel@tonic-gate 		return (ENOTEMPTY);
36040Sstevel@tonic-gate 	case NFSERR_DQUOT:
36050Sstevel@tonic-gate 		return (EDQUOT);
36060Sstevel@tonic-gate 	case NFSERR_STALE:
36070Sstevel@tonic-gate 		return (ESTALE);
36080Sstevel@tonic-gate 	case NFSERR_REMOTE:
36090Sstevel@tonic-gate 		return (EREMOTE);
36100Sstevel@tonic-gate 	case NFSERR_WFLUSH:
36110Sstevel@tonic-gate 		return (EIO);
36120Sstevel@tonic-gate 	default:
36130Sstevel@tonic-gate 		return ((int)status);
36140Sstevel@tonic-gate 	}
36150Sstevel@tonic-gate 	/* NOTREACHED */
36160Sstevel@tonic-gate }
36170Sstevel@tonic-gate 
36180Sstevel@tonic-gate enum nfsstat3
puterrno3(int error)36190Sstevel@tonic-gate puterrno3(int error)
36200Sstevel@tonic-gate {
36210Sstevel@tonic-gate 
36220Sstevel@tonic-gate #ifdef DEBUG
36230Sstevel@tonic-gate 	switch (error) {
36240Sstevel@tonic-gate 	case 0:
36250Sstevel@tonic-gate 		return (NFS3_OK);
36260Sstevel@tonic-gate 	case EPERM:
36270Sstevel@tonic-gate 		return (NFS3ERR_PERM);
36280Sstevel@tonic-gate 	case ENOENT:
36290Sstevel@tonic-gate 		return (NFS3ERR_NOENT);
36300Sstevel@tonic-gate 	case EIO:
36310Sstevel@tonic-gate 		return (NFS3ERR_IO);
36320Sstevel@tonic-gate 	case ENXIO:
36330Sstevel@tonic-gate 		return (NFS3ERR_NXIO);
36340Sstevel@tonic-gate 	case EACCES:
36350Sstevel@tonic-gate 		return (NFS3ERR_ACCES);
36360Sstevel@tonic-gate 	case EEXIST:
36370Sstevel@tonic-gate 		return (NFS3ERR_EXIST);
36380Sstevel@tonic-gate 	case EXDEV:
36390Sstevel@tonic-gate 		return (NFS3ERR_XDEV);
36400Sstevel@tonic-gate 	case ENODEV:
36410Sstevel@tonic-gate 		return (NFS3ERR_NODEV);
36420Sstevel@tonic-gate 	case ENOTDIR:
36430Sstevel@tonic-gate 		return (NFS3ERR_NOTDIR);
36440Sstevel@tonic-gate 	case EISDIR:
36450Sstevel@tonic-gate 		return (NFS3ERR_ISDIR);
36460Sstevel@tonic-gate 	case EINVAL:
36470Sstevel@tonic-gate 		return (NFS3ERR_INVAL);
36480Sstevel@tonic-gate 	case EFBIG:
36490Sstevel@tonic-gate 		return (NFS3ERR_FBIG);
36500Sstevel@tonic-gate 	case ENOSPC:
36510Sstevel@tonic-gate 		return (NFS3ERR_NOSPC);
36520Sstevel@tonic-gate 	case EROFS:
36530Sstevel@tonic-gate 		return (NFS3ERR_ROFS);
36540Sstevel@tonic-gate 	case EMLINK:
36550Sstevel@tonic-gate 		return (NFS3ERR_MLINK);
36560Sstevel@tonic-gate 	case ENAMETOOLONG:
36570Sstevel@tonic-gate 		return (NFS3ERR_NAMETOOLONG);
36580Sstevel@tonic-gate 	case ENOTEMPTY:
36590Sstevel@tonic-gate 		return (NFS3ERR_NOTEMPTY);
36600Sstevel@tonic-gate 	case EDQUOT:
36610Sstevel@tonic-gate 		return (NFS3ERR_DQUOT);
36620Sstevel@tonic-gate 	case ESTALE:
36630Sstevel@tonic-gate 		return (NFS3ERR_STALE);
36640Sstevel@tonic-gate 	case EREMOTE:
36650Sstevel@tonic-gate 		return (NFS3ERR_REMOTE);
36664300Smarks 	case ENOSYS:
36670Sstevel@tonic-gate 	case EOPNOTSUPP:
36680Sstevel@tonic-gate 		return (NFS3ERR_NOTSUPP);
36690Sstevel@tonic-gate 	case EOVERFLOW:
36700Sstevel@tonic-gate 		return (NFS3ERR_INVAL);
36710Sstevel@tonic-gate 	default:
36720Sstevel@tonic-gate 		zcmn_err(getzoneid(), CE_WARN,
36730Sstevel@tonic-gate 		    "puterrno3: got error %d", error);
36740Sstevel@tonic-gate 		return ((enum nfsstat3)error);
36750Sstevel@tonic-gate 	}
36760Sstevel@tonic-gate #else
36770Sstevel@tonic-gate 	switch (error) {
36780Sstevel@tonic-gate 	case ENAMETOOLONG:
36790Sstevel@tonic-gate 		return (NFS3ERR_NAMETOOLONG);
36800Sstevel@tonic-gate 	case ENOTEMPTY:
36810Sstevel@tonic-gate 		return (NFS3ERR_NOTEMPTY);
36820Sstevel@tonic-gate 	case EDQUOT:
36830Sstevel@tonic-gate 		return (NFS3ERR_DQUOT);
36840Sstevel@tonic-gate 	case ESTALE:
36850Sstevel@tonic-gate 		return (NFS3ERR_STALE);
36864300Smarks 	case ENOSYS:
36870Sstevel@tonic-gate 	case EOPNOTSUPP:
36880Sstevel@tonic-gate 		return (NFS3ERR_NOTSUPP);
36890Sstevel@tonic-gate 	case EREMOTE:
36900Sstevel@tonic-gate 		return (NFS3ERR_REMOTE);
36910Sstevel@tonic-gate 	case EOVERFLOW:
36920Sstevel@tonic-gate 		return (NFS3ERR_INVAL);
36930Sstevel@tonic-gate 	default:
36940Sstevel@tonic-gate 		return ((enum nfsstat3)error);
36950Sstevel@tonic-gate 	}
36960Sstevel@tonic-gate #endif
36970Sstevel@tonic-gate }
36980Sstevel@tonic-gate 
36990Sstevel@tonic-gate int
geterrno3(enum nfsstat3 status)37000Sstevel@tonic-gate geterrno3(enum nfsstat3 status)
37010Sstevel@tonic-gate {
37020Sstevel@tonic-gate 
37030Sstevel@tonic-gate #ifdef DEBUG
37040Sstevel@tonic-gate 	switch (status) {
37050Sstevel@tonic-gate 	case NFS3_OK:
37060Sstevel@tonic-gate 		return (0);
37070Sstevel@tonic-gate 	case NFS3ERR_PERM:
37080Sstevel@tonic-gate 		return (EPERM);
37090Sstevel@tonic-gate 	case NFS3ERR_NOENT:
37100Sstevel@tonic-gate 		return (ENOENT);
37110Sstevel@tonic-gate 	case NFS3ERR_IO:
37120Sstevel@tonic-gate 		return (EIO);
37130Sstevel@tonic-gate 	case NFS3ERR_NXIO:
37140Sstevel@tonic-gate 		return (ENXIO);
37150Sstevel@tonic-gate 	case NFS3ERR_ACCES:
37160Sstevel@tonic-gate 		return (EACCES);
37170Sstevel@tonic-gate 	case NFS3ERR_EXIST:
37180Sstevel@tonic-gate 		return (EEXIST);
37190Sstevel@tonic-gate 	case NFS3ERR_XDEV:
37200Sstevel@tonic-gate 		return (EXDEV);
37210Sstevel@tonic-gate 	case NFS3ERR_NODEV:
37220Sstevel@tonic-gate 		return (ENODEV);
37230Sstevel@tonic-gate 	case NFS3ERR_NOTDIR:
37240Sstevel@tonic-gate 		return (ENOTDIR);
37250Sstevel@tonic-gate 	case NFS3ERR_ISDIR:
37260Sstevel@tonic-gate 		return (EISDIR);
37270Sstevel@tonic-gate 	case NFS3ERR_INVAL:
37280Sstevel@tonic-gate 		return (EINVAL);
37290Sstevel@tonic-gate 	case NFS3ERR_FBIG:
37300Sstevel@tonic-gate 		return (EFBIG);
37310Sstevel@tonic-gate 	case NFS3ERR_NOSPC:
37320Sstevel@tonic-gate 		return (ENOSPC);
37330Sstevel@tonic-gate 	case NFS3ERR_ROFS:
37340Sstevel@tonic-gate 		return (EROFS);
37350Sstevel@tonic-gate 	case NFS3ERR_MLINK:
37360Sstevel@tonic-gate 		return (EMLINK);
37370Sstevel@tonic-gate 	case NFS3ERR_NAMETOOLONG:
37380Sstevel@tonic-gate 		return (ENAMETOOLONG);
37390Sstevel@tonic-gate 	case NFS3ERR_NOTEMPTY:
37400Sstevel@tonic-gate 		return (ENOTEMPTY);
37410Sstevel@tonic-gate 	case NFS3ERR_DQUOT:
37420Sstevel@tonic-gate 		return (EDQUOT);
37430Sstevel@tonic-gate 	case NFS3ERR_STALE:
37440Sstevel@tonic-gate 		return (ESTALE);
37450Sstevel@tonic-gate 	case NFS3ERR_REMOTE:
37460Sstevel@tonic-gate 		return (EREMOTE);
37470Sstevel@tonic-gate 	case NFS3ERR_BADHANDLE:
37480Sstevel@tonic-gate 		return (ESTALE);
37490Sstevel@tonic-gate 	case NFS3ERR_NOT_SYNC:
37500Sstevel@tonic-gate 		return (EINVAL);
37510Sstevel@tonic-gate 	case NFS3ERR_BAD_COOKIE:
37520Sstevel@tonic-gate 		return (ENOENT);
37530Sstevel@tonic-gate 	case NFS3ERR_NOTSUPP:
37540Sstevel@tonic-gate 		return (EOPNOTSUPP);
37550Sstevel@tonic-gate 	case NFS3ERR_TOOSMALL:
37560Sstevel@tonic-gate 		return (EINVAL);
37570Sstevel@tonic-gate 	case NFS3ERR_SERVERFAULT:
37580Sstevel@tonic-gate 		return (EIO);
37590Sstevel@tonic-gate 	case NFS3ERR_BADTYPE:
37600Sstevel@tonic-gate 		return (EINVAL);
37610Sstevel@tonic-gate 	case NFS3ERR_JUKEBOX:
37620Sstevel@tonic-gate 		return (ENXIO);
37630Sstevel@tonic-gate 	default:
37640Sstevel@tonic-gate 		zcmn_err(getzoneid(), CE_WARN,
37650Sstevel@tonic-gate 		    "geterrno3: got status %d", status);
37660Sstevel@tonic-gate 		return ((int)status);
37670Sstevel@tonic-gate 	}
37680Sstevel@tonic-gate #else
37690Sstevel@tonic-gate 	switch (status) {
37700Sstevel@tonic-gate 	case NFS3ERR_NAMETOOLONG:
37710Sstevel@tonic-gate 		return (ENAMETOOLONG);
37720Sstevel@tonic-gate 	case NFS3ERR_NOTEMPTY:
37730Sstevel@tonic-gate 		return (ENOTEMPTY);
37740Sstevel@tonic-gate 	case NFS3ERR_DQUOT:
37750Sstevel@tonic-gate 		return (EDQUOT);
37760Sstevel@tonic-gate 	case NFS3ERR_STALE:
37770Sstevel@tonic-gate 	case NFS3ERR_BADHANDLE:
37780Sstevel@tonic-gate 		return (ESTALE);
37790Sstevel@tonic-gate 	case NFS3ERR_NOTSUPP:
37800Sstevel@tonic-gate 		return (EOPNOTSUPP);
37810Sstevel@tonic-gate 	case NFS3ERR_REMOTE:
37820Sstevel@tonic-gate 		return (EREMOTE);
37830Sstevel@tonic-gate 	case NFS3ERR_NOT_SYNC:
37840Sstevel@tonic-gate 	case NFS3ERR_TOOSMALL:
37850Sstevel@tonic-gate 	case NFS3ERR_BADTYPE:
37860Sstevel@tonic-gate 		return (EINVAL);
37870Sstevel@tonic-gate 	case NFS3ERR_BAD_COOKIE:
37880Sstevel@tonic-gate 		return (ENOENT);
37890Sstevel@tonic-gate 	case NFS3ERR_SERVERFAULT:
37900Sstevel@tonic-gate 		return (EIO);
37910Sstevel@tonic-gate 	case NFS3ERR_JUKEBOX:
37920Sstevel@tonic-gate 		return (ENXIO);
37930Sstevel@tonic-gate 	default:
37940Sstevel@tonic-gate 		return ((int)status);
37950Sstevel@tonic-gate 	}
37960Sstevel@tonic-gate #endif
37970Sstevel@tonic-gate }
37980Sstevel@tonic-gate 
37990Sstevel@tonic-gate rddir_cache *
rddir_cache_alloc(int flags)38000Sstevel@tonic-gate rddir_cache_alloc(int flags)
38010Sstevel@tonic-gate {
38020Sstevel@tonic-gate 	rddir_cache *rc;
38030Sstevel@tonic-gate 
38040Sstevel@tonic-gate 	rc = kmem_alloc(sizeof (*rc), flags);
38050Sstevel@tonic-gate 	if (rc != NULL) {
38060Sstevel@tonic-gate 		rc->entries = NULL;
38070Sstevel@tonic-gate 		rc->flags = RDDIR;
38080Sstevel@tonic-gate 		cv_init(&rc->cv, NULL, CV_DEFAULT, NULL);
38090Sstevel@tonic-gate 		mutex_init(&rc->lock, NULL, MUTEX_DEFAULT, NULL);
38100Sstevel@tonic-gate 		rc->count = 1;
38110Sstevel@tonic-gate #ifdef DEBUG
38120Sstevel@tonic-gate 		atomic_add_64(&clstat_debug.dirent.value.ui64, 1);
38130Sstevel@tonic-gate #endif
38140Sstevel@tonic-gate 	}
38150Sstevel@tonic-gate 	return (rc);
38160Sstevel@tonic-gate }
38170Sstevel@tonic-gate 
38180Sstevel@tonic-gate static void
rddir_cache_free(rddir_cache * rc)38190Sstevel@tonic-gate rddir_cache_free(rddir_cache *rc)
38200Sstevel@tonic-gate {
38210Sstevel@tonic-gate 
38220Sstevel@tonic-gate #ifdef DEBUG
38230Sstevel@tonic-gate 	atomic_add_64(&clstat_debug.dirent.value.ui64, -1);
38240Sstevel@tonic-gate #endif
38250Sstevel@tonic-gate 	if (rc->entries != NULL) {
38260Sstevel@tonic-gate #ifdef DEBUG
38270Sstevel@tonic-gate 		rddir_cache_buf_free(rc->entries, rc->buflen);
38280Sstevel@tonic-gate #else
38290Sstevel@tonic-gate 		kmem_free(rc->entries, rc->buflen);
38300Sstevel@tonic-gate #endif
38310Sstevel@tonic-gate 	}
38320Sstevel@tonic-gate 	cv_destroy(&rc->cv);
38330Sstevel@tonic-gate 	mutex_destroy(&rc->lock);
38340Sstevel@tonic-gate 	kmem_free(rc, sizeof (*rc));
38350Sstevel@tonic-gate }
38360Sstevel@tonic-gate 
38370Sstevel@tonic-gate void
rddir_cache_hold(rddir_cache * rc)38380Sstevel@tonic-gate rddir_cache_hold(rddir_cache *rc)
38390Sstevel@tonic-gate {
38400Sstevel@tonic-gate 
38410Sstevel@tonic-gate 	mutex_enter(&rc->lock);
38420Sstevel@tonic-gate 	rc->count++;
38430Sstevel@tonic-gate 	mutex_exit(&rc->lock);
38440Sstevel@tonic-gate }
38450Sstevel@tonic-gate 
38460Sstevel@tonic-gate void
rddir_cache_rele(rddir_cache * rc)38470Sstevel@tonic-gate rddir_cache_rele(rddir_cache *rc)
38480Sstevel@tonic-gate {
38490Sstevel@tonic-gate 
38500Sstevel@tonic-gate 	mutex_enter(&rc->lock);
38510Sstevel@tonic-gate 	ASSERT(rc->count > 0);
38520Sstevel@tonic-gate 	if (--rc->count == 0) {
38530Sstevel@tonic-gate 		mutex_exit(&rc->lock);
38540Sstevel@tonic-gate 		rddir_cache_free(rc);
38550Sstevel@tonic-gate 	} else
38560Sstevel@tonic-gate 		mutex_exit(&rc->lock);
38570Sstevel@tonic-gate }
38580Sstevel@tonic-gate 
38590Sstevel@tonic-gate #ifdef DEBUG
38600Sstevel@tonic-gate char *
rddir_cache_buf_alloc(size_t size,int flags)38610Sstevel@tonic-gate rddir_cache_buf_alloc(size_t size, int flags)
38620Sstevel@tonic-gate {
38630Sstevel@tonic-gate 	char *rc;
38640Sstevel@tonic-gate 
38650Sstevel@tonic-gate 	rc = kmem_alloc(size, flags);
38660Sstevel@tonic-gate 	if (rc != NULL)
38670Sstevel@tonic-gate 		atomic_add_64(&clstat_debug.dirents.value.ui64, size);
38680Sstevel@tonic-gate 	return (rc);
38690Sstevel@tonic-gate }
38700Sstevel@tonic-gate 
38710Sstevel@tonic-gate void
rddir_cache_buf_free(void * addr,size_t size)38720Sstevel@tonic-gate rddir_cache_buf_free(void *addr, size_t size)
38730Sstevel@tonic-gate {
38740Sstevel@tonic-gate 
38750Sstevel@tonic-gate 	atomic_add_64(&clstat_debug.dirents.value.ui64, -(int64_t)size);
38760Sstevel@tonic-gate 	kmem_free(addr, size);
38770Sstevel@tonic-gate }
38780Sstevel@tonic-gate #endif
38790Sstevel@tonic-gate 
38800Sstevel@tonic-gate static int
nfs_free_data_reclaim(rnode_t * rp)38810Sstevel@tonic-gate nfs_free_data_reclaim(rnode_t *rp)
38820Sstevel@tonic-gate {
38830Sstevel@tonic-gate 	char *contents;
38840Sstevel@tonic-gate 	int size;
38850Sstevel@tonic-gate 	vsecattr_t *vsp;
38860Sstevel@tonic-gate 	nfs3_pathconf_info *info;
38870Sstevel@tonic-gate 	int freed;
38880Sstevel@tonic-gate 	cred_t *cred;
38890Sstevel@tonic-gate 
38900Sstevel@tonic-gate 	/*
38910Sstevel@tonic-gate 	 * Free any held credentials and caches which
38920Sstevel@tonic-gate 	 * may be associated with this rnode.
38930Sstevel@tonic-gate 	 */
38940Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
38950Sstevel@tonic-gate 	cred = rp->r_cred;
38960Sstevel@tonic-gate 	rp->r_cred = NULL;
38970Sstevel@tonic-gate 	contents = rp->r_symlink.contents;
38980Sstevel@tonic-gate 	size = rp->r_symlink.size;
38990Sstevel@tonic-gate 	rp->r_symlink.contents = NULL;
39000Sstevel@tonic-gate 	vsp = rp->r_secattr;
39010Sstevel@tonic-gate 	rp->r_secattr = NULL;
39020Sstevel@tonic-gate 	info = rp->r_pathconf;
39030Sstevel@tonic-gate 	rp->r_pathconf = NULL;
39040Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
39050Sstevel@tonic-gate 
39060Sstevel@tonic-gate 	if (cred != NULL)
39070Sstevel@tonic-gate 		crfree(cred);
39080Sstevel@tonic-gate 
39090Sstevel@tonic-gate 	/*
39100Sstevel@tonic-gate 	 * Free the access cache entries.
39110Sstevel@tonic-gate 	 */
39120Sstevel@tonic-gate 	freed = nfs_access_purge_rp(rp);
39130Sstevel@tonic-gate 
39140Sstevel@tonic-gate 	if (!HAVE_RDDIR_CACHE(rp) &&
39150Sstevel@tonic-gate 	    contents == NULL &&
39160Sstevel@tonic-gate 	    vsp == NULL &&
39170Sstevel@tonic-gate 	    info == NULL)
39180Sstevel@tonic-gate 		return (freed);
39190Sstevel@tonic-gate 
39200Sstevel@tonic-gate 	/*
39210Sstevel@tonic-gate 	 * Free the readdir cache entries
39220Sstevel@tonic-gate 	 */
39230Sstevel@tonic-gate 	if (HAVE_RDDIR_CACHE(rp))
39240Sstevel@tonic-gate 		nfs_purge_rddir_cache(RTOV(rp));
39250Sstevel@tonic-gate 
39260Sstevel@tonic-gate 	/*
39270Sstevel@tonic-gate 	 * Free the symbolic link cache.
39280Sstevel@tonic-gate 	 */
39290Sstevel@tonic-gate 	if (contents != NULL) {
39300Sstevel@tonic-gate 
39310Sstevel@tonic-gate 		kmem_free((void *)contents, size);
39320Sstevel@tonic-gate 	}
39330Sstevel@tonic-gate 
39340Sstevel@tonic-gate 	/*
39350Sstevel@tonic-gate 	 * Free any cached ACL.
39360Sstevel@tonic-gate 	 */
39370Sstevel@tonic-gate 	if (vsp != NULL)
39380Sstevel@tonic-gate 		nfs_acl_free(vsp);
39390Sstevel@tonic-gate 
39400Sstevel@tonic-gate 	/*
39410Sstevel@tonic-gate 	 * Free any cached pathconf information.
39420Sstevel@tonic-gate 	 */
39430Sstevel@tonic-gate 	if (info != NULL)
39440Sstevel@tonic-gate 		kmem_free(info, sizeof (*info));
39450Sstevel@tonic-gate 
39460Sstevel@tonic-gate 	return (1);
39470Sstevel@tonic-gate }
39480Sstevel@tonic-gate 
39490Sstevel@tonic-gate static int
nfs_active_data_reclaim(rnode_t * rp)39500Sstevel@tonic-gate nfs_active_data_reclaim(rnode_t *rp)
39510Sstevel@tonic-gate {
39520Sstevel@tonic-gate 	char *contents;
39530Sstevel@tonic-gate 	int size;
39540Sstevel@tonic-gate 	vsecattr_t *vsp;
39550Sstevel@tonic-gate 	nfs3_pathconf_info *info;
39560Sstevel@tonic-gate 	int freed;
39570Sstevel@tonic-gate 
39580Sstevel@tonic-gate 	/*
39590Sstevel@tonic-gate 	 * Free any held credentials and caches which
39600Sstevel@tonic-gate 	 * may be associated with this rnode.
39610Sstevel@tonic-gate 	 */
39620Sstevel@tonic-gate 	if (!mutex_tryenter(&rp->r_statelock))
39630Sstevel@tonic-gate 		return (0);
39640Sstevel@tonic-gate 	contents = rp->r_symlink.contents;
39650Sstevel@tonic-gate 	size = rp->r_symlink.size;
39660Sstevel@tonic-gate 	rp->r_symlink.contents = NULL;
39670Sstevel@tonic-gate 	vsp = rp->r_secattr;
39680Sstevel@tonic-gate 	rp->r_secattr = NULL;
39690Sstevel@tonic-gate 	info = rp->r_pathconf;
39700Sstevel@tonic-gate 	rp->r_pathconf = NULL;
39710Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
39720Sstevel@tonic-gate 
39730Sstevel@tonic-gate 	/*
39740Sstevel@tonic-gate 	 * Free the access cache entries.
39750Sstevel@tonic-gate 	 */
39760Sstevel@tonic-gate 	freed = nfs_access_purge_rp(rp);
39770Sstevel@tonic-gate 
39780Sstevel@tonic-gate 	if (!HAVE_RDDIR_CACHE(rp) &&
39790Sstevel@tonic-gate 	    contents == NULL &&
39800Sstevel@tonic-gate 	    vsp == NULL &&
39810Sstevel@tonic-gate 	    info == NULL)
39820Sstevel@tonic-gate 		return (freed);
39830Sstevel@tonic-gate 
39840Sstevel@tonic-gate 	/*
39850Sstevel@tonic-gate 	 * Free the readdir cache entries
39860Sstevel@tonic-gate 	 */
39870Sstevel@tonic-gate 	if (HAVE_RDDIR_CACHE(rp))
39880Sstevel@tonic-gate 		nfs_purge_rddir_cache(RTOV(rp));
39890Sstevel@tonic-gate 
39900Sstevel@tonic-gate 	/*
39910Sstevel@tonic-gate 	 * Free the symbolic link cache.
39920Sstevel@tonic-gate 	 */
39930Sstevel@tonic-gate 	if (contents != NULL) {
39940Sstevel@tonic-gate 
39950Sstevel@tonic-gate 		kmem_free((void *)contents, size);
39960Sstevel@tonic-gate 	}
39970Sstevel@tonic-gate 
39980Sstevel@tonic-gate 	/*
39990Sstevel@tonic-gate 	 * Free any cached ACL.
40000Sstevel@tonic-gate 	 */
40010Sstevel@tonic-gate 	if (vsp != NULL)
40020Sstevel@tonic-gate 		nfs_acl_free(vsp);
40030Sstevel@tonic-gate 
40040Sstevel@tonic-gate 	/*
40050Sstevel@tonic-gate 	 * Free any cached pathconf information.
40060Sstevel@tonic-gate 	 */
40070Sstevel@tonic-gate 	if (info != NULL)
40080Sstevel@tonic-gate 		kmem_free(info, sizeof (*info));
40090Sstevel@tonic-gate 
40100Sstevel@tonic-gate 	return (1);
40110Sstevel@tonic-gate }
40120Sstevel@tonic-gate 
40130Sstevel@tonic-gate static int
nfs_free_reclaim(void)40140Sstevel@tonic-gate nfs_free_reclaim(void)
40150Sstevel@tonic-gate {
40160Sstevel@tonic-gate 	int freed;
40170Sstevel@tonic-gate 	rnode_t *rp;
40180Sstevel@tonic-gate 
40190Sstevel@tonic-gate #ifdef DEBUG
40200Sstevel@tonic-gate 	clstat_debug.f_reclaim.value.ui64++;
40210Sstevel@tonic-gate #endif
40220Sstevel@tonic-gate 	freed = 0;
40230Sstevel@tonic-gate 	mutex_enter(&rpfreelist_lock);
40240Sstevel@tonic-gate 	rp = rpfreelist;
40250Sstevel@tonic-gate 	if (rp != NULL) {
40260Sstevel@tonic-gate 		do {
40270Sstevel@tonic-gate 			if (nfs_free_data_reclaim(rp))
40280Sstevel@tonic-gate 				freed = 1;
40290Sstevel@tonic-gate 		} while ((rp = rp->r_freef) != rpfreelist);
40300Sstevel@tonic-gate 	}
40310Sstevel@tonic-gate 	mutex_exit(&rpfreelist_lock);
40320Sstevel@tonic-gate 	return (freed);
40330Sstevel@tonic-gate }
40340Sstevel@tonic-gate 
40350Sstevel@tonic-gate static int
nfs_active_reclaim(void)40360Sstevel@tonic-gate nfs_active_reclaim(void)
40370Sstevel@tonic-gate {
40380Sstevel@tonic-gate 	int freed;
40390Sstevel@tonic-gate 	int index;
40400Sstevel@tonic-gate 	rnode_t *rp;
40410Sstevel@tonic-gate 
40420Sstevel@tonic-gate #ifdef DEBUG
40430Sstevel@tonic-gate 	clstat_debug.a_reclaim.value.ui64++;
40440Sstevel@tonic-gate #endif
40450Sstevel@tonic-gate 	freed = 0;
40460Sstevel@tonic-gate 	for (index = 0; index < rtablesize; index++) {
40470Sstevel@tonic-gate 		rw_enter(&rtable[index].r_lock, RW_READER);
40480Sstevel@tonic-gate 		for (rp = rtable[index].r_hashf;
40490Sstevel@tonic-gate 		    rp != (rnode_t *)(&rtable[index]);
40500Sstevel@tonic-gate 		    rp = rp->r_hashf) {
40510Sstevel@tonic-gate 			if (nfs_active_data_reclaim(rp))
40520Sstevel@tonic-gate 				freed = 1;
40530Sstevel@tonic-gate 		}
40540Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
40550Sstevel@tonic-gate 	}
40560Sstevel@tonic-gate 	return (freed);
40570Sstevel@tonic-gate }
40580Sstevel@tonic-gate 
40590Sstevel@tonic-gate static int
nfs_rnode_reclaim(void)40600Sstevel@tonic-gate nfs_rnode_reclaim(void)
40610Sstevel@tonic-gate {
40620Sstevel@tonic-gate 	int freed;
40630Sstevel@tonic-gate 	rnode_t *rp;
40640Sstevel@tonic-gate 	vnode_t *vp;
40650Sstevel@tonic-gate 
40660Sstevel@tonic-gate #ifdef DEBUG
40670Sstevel@tonic-gate 	clstat_debug.r_reclaim.value.ui64++;
40680Sstevel@tonic-gate #endif
40690Sstevel@tonic-gate 	freed = 0;
40700Sstevel@tonic-gate 	mutex_enter(&rpfreelist_lock);
40710Sstevel@tonic-gate 	while ((rp = rpfreelist) != NULL) {
40720Sstevel@tonic-gate 		rp_rmfree(rp);
40730Sstevel@tonic-gate 		mutex_exit(&rpfreelist_lock);
40740Sstevel@tonic-gate 		if (rp->r_flags & RHASHED) {
40750Sstevel@tonic-gate 			vp = RTOV(rp);
40760Sstevel@tonic-gate 			rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
40770Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
40780Sstevel@tonic-gate 			if (vp->v_count > 1) {
40790Sstevel@tonic-gate 				vp->v_count--;
40800Sstevel@tonic-gate 				mutex_exit(&vp->v_lock);
40810Sstevel@tonic-gate 				rw_exit(&rp->r_hashq->r_lock);
40820Sstevel@tonic-gate 				mutex_enter(&rpfreelist_lock);
40830Sstevel@tonic-gate 				continue;
40840Sstevel@tonic-gate 			}
40850Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
40860Sstevel@tonic-gate 			rp_rmhash_locked(rp);
40870Sstevel@tonic-gate 			rw_exit(&rp->r_hashq->r_lock);
40880Sstevel@tonic-gate 		}
40890Sstevel@tonic-gate 		/*
40900Sstevel@tonic-gate 		 * This call to rp_addfree will end up destroying the
40910Sstevel@tonic-gate 		 * rnode, but in a safe way with the appropriate set
40920Sstevel@tonic-gate 		 * of checks done.
40930Sstevel@tonic-gate 		 */
40940Sstevel@tonic-gate 		rp_addfree(rp, CRED());
40950Sstevel@tonic-gate 		mutex_enter(&rpfreelist_lock);
40960Sstevel@tonic-gate 	}
40970Sstevel@tonic-gate 	mutex_exit(&rpfreelist_lock);
40980Sstevel@tonic-gate 	return (freed);
40990Sstevel@tonic-gate }
41000Sstevel@tonic-gate 
41010Sstevel@tonic-gate /*ARGSUSED*/
41020Sstevel@tonic-gate static void
nfs_reclaim(void * cdrarg)41030Sstevel@tonic-gate nfs_reclaim(void *cdrarg)
41040Sstevel@tonic-gate {
41050Sstevel@tonic-gate 
41060Sstevel@tonic-gate #ifdef DEBUG
41070Sstevel@tonic-gate 	clstat_debug.reclaim.value.ui64++;
41080Sstevel@tonic-gate #endif
41090Sstevel@tonic-gate 	if (nfs_free_reclaim())
41100Sstevel@tonic-gate 		return;
41110Sstevel@tonic-gate 
41120Sstevel@tonic-gate 	if (nfs_active_reclaim())
41130Sstevel@tonic-gate 		return;
41140Sstevel@tonic-gate 
41150Sstevel@tonic-gate 	(void) nfs_rnode_reclaim();
41160Sstevel@tonic-gate }
41170Sstevel@tonic-gate 
41180Sstevel@tonic-gate /*
41190Sstevel@tonic-gate  * NFS client failover support
41200Sstevel@tonic-gate  *
41210Sstevel@tonic-gate  * Routines to copy filehandles
41220Sstevel@tonic-gate  */
41230Sstevel@tonic-gate void
nfscopyfh(caddr_t fhp,vnode_t * vp)41240Sstevel@tonic-gate nfscopyfh(caddr_t fhp, vnode_t *vp)
41250Sstevel@tonic-gate {
41260Sstevel@tonic-gate 	fhandle_t *dest = (fhandle_t *)fhp;
41270Sstevel@tonic-gate 
41280Sstevel@tonic-gate 	if (dest != NULL)
41290Sstevel@tonic-gate 		*dest = *VTOFH(vp);
41300Sstevel@tonic-gate }
41310Sstevel@tonic-gate 
41320Sstevel@tonic-gate void
nfs3copyfh(caddr_t fhp,vnode_t * vp)41330Sstevel@tonic-gate nfs3copyfh(caddr_t fhp, vnode_t *vp)
41340Sstevel@tonic-gate {
41350Sstevel@tonic-gate 	nfs_fh3 *dest = (nfs_fh3 *)fhp;
41360Sstevel@tonic-gate 
41370Sstevel@tonic-gate 	if (dest != NULL)
41380Sstevel@tonic-gate 		*dest = *VTOFH3(vp);
41390Sstevel@tonic-gate }
41400Sstevel@tonic-gate 
41410Sstevel@tonic-gate /*
41420Sstevel@tonic-gate  * NFS client failover support
41430Sstevel@tonic-gate  *
41440Sstevel@tonic-gate  * failover_safe() will test various conditions to ensure that
41450Sstevel@tonic-gate  * failover is permitted for this vnode.  It will be denied
41460Sstevel@tonic-gate  * if:
41470Sstevel@tonic-gate  *	1) the operation in progress does not support failover (NULL fi)
41480Sstevel@tonic-gate  *	2) there are no available replicas (NULL mi_servers->sv_next)
41490Sstevel@tonic-gate  *	3) any locks are outstanding on this file
41500Sstevel@tonic-gate  */
41510Sstevel@tonic-gate static int
failover_safe(failinfo_t * fi)41520Sstevel@tonic-gate failover_safe(failinfo_t *fi)
41530Sstevel@tonic-gate {
41540Sstevel@tonic-gate 
41550Sstevel@tonic-gate 	/*
41560Sstevel@tonic-gate 	 * Does this op permit failover?
41570Sstevel@tonic-gate 	 */
41580Sstevel@tonic-gate 	if (fi == NULL || fi->vp == NULL)
41590Sstevel@tonic-gate 		return (0);
41600Sstevel@tonic-gate 
41610Sstevel@tonic-gate 	/*
41620Sstevel@tonic-gate 	 * Are there any alternates to failover to?
41630Sstevel@tonic-gate 	 */
41640Sstevel@tonic-gate 	if (VTOMI(fi->vp)->mi_servers->sv_next == NULL)
41650Sstevel@tonic-gate 		return (0);
41660Sstevel@tonic-gate 
41670Sstevel@tonic-gate 	/*
41680Sstevel@tonic-gate 	 * Disable check; we've forced local locking
41690Sstevel@tonic-gate 	 *
41700Sstevel@tonic-gate 	 * if (flk_has_remote_locks(fi->vp))
41710Sstevel@tonic-gate 	 *	return (0);
41720Sstevel@tonic-gate 	 */
41730Sstevel@tonic-gate 
41740Sstevel@tonic-gate 	/*
41750Sstevel@tonic-gate 	 * If we have no partial path, we can't do anything
41760Sstevel@tonic-gate 	 */
41770Sstevel@tonic-gate 	if (VTOR(fi->vp)->r_path == NULL)
41780Sstevel@tonic-gate 		return (0);
41790Sstevel@tonic-gate 
41800Sstevel@tonic-gate 	return (1);
41810Sstevel@tonic-gate }
41820Sstevel@tonic-gate 
41830Sstevel@tonic-gate #include <sys/thread.h>
41840Sstevel@tonic-gate 
41850Sstevel@tonic-gate /*
41860Sstevel@tonic-gate  * NFS client failover support
41870Sstevel@tonic-gate  *
41880Sstevel@tonic-gate  * failover_newserver() will start a search for a new server,
41890Sstevel@tonic-gate  * preferably by starting an async thread to do the work.  If
41900Sstevel@tonic-gate  * someone is already doing this (recognizable by MI_BINDINPROG
41910Sstevel@tonic-gate  * being set), it will simply return and the calling thread
41920Sstevel@tonic-gate  * will queue on the mi_failover_cv condition variable.
41930Sstevel@tonic-gate  */
41940Sstevel@tonic-gate static void
failover_newserver(mntinfo_t * mi)41950Sstevel@tonic-gate failover_newserver(mntinfo_t *mi)
41960Sstevel@tonic-gate {
41970Sstevel@tonic-gate 	/*
41980Sstevel@tonic-gate 	 * Check if someone else is doing this already
41990Sstevel@tonic-gate 	 */
42000Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
42010Sstevel@tonic-gate 	if (mi->mi_flags & MI_BINDINPROG) {
42020Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
42030Sstevel@tonic-gate 		return;
42040Sstevel@tonic-gate 	}
42050Sstevel@tonic-gate 	mi->mi_flags |= MI_BINDINPROG;
42060Sstevel@tonic-gate 
42070Sstevel@tonic-gate 	/*
42080Sstevel@tonic-gate 	 * Need to hold the vfs struct so that it can't be released
42090Sstevel@tonic-gate 	 * while the failover thread is selecting a new server.
42100Sstevel@tonic-gate 	 */
42110Sstevel@tonic-gate 	VFS_HOLD(mi->mi_vfsp);
42120Sstevel@tonic-gate 
42130Sstevel@tonic-gate 	/*
42140Sstevel@tonic-gate 	 * Start a thread to do the real searching.
42150Sstevel@tonic-gate 	 */
42160Sstevel@tonic-gate 	(void) zthread_create(NULL, 0, failover_thread, mi, 0, minclsyspri);
42170Sstevel@tonic-gate 
42180Sstevel@tonic-gate 	mutex_exit(&mi->mi_lock);
42190Sstevel@tonic-gate }
42200Sstevel@tonic-gate 
42210Sstevel@tonic-gate /*
42220Sstevel@tonic-gate  * NFS client failover support
42230Sstevel@tonic-gate  *
42240Sstevel@tonic-gate  * failover_thread() will find a new server to replace the one
42250Sstevel@tonic-gate  * currently in use, wake up other threads waiting on this mount
42260Sstevel@tonic-gate  * point, and die.  It will start at the head of the server list
42270Sstevel@tonic-gate  * and poll servers until it finds one with an NFS server which is
42280Sstevel@tonic-gate  * registered and responds to a NULL procedure ping.
42290Sstevel@tonic-gate  *
42300Sstevel@tonic-gate  * XXX failover_thread is unsafe within the scope of the
42310Sstevel@tonic-gate  * present model defined for cpr to suspend the system.
42320Sstevel@tonic-gate  * Specifically, over-the-wire calls made by the thread
42330Sstevel@tonic-gate  * are unsafe. The thread needs to be reevaluated in case of
42340Sstevel@tonic-gate  * future updates to the cpr suspend model.
42350Sstevel@tonic-gate  */
42360Sstevel@tonic-gate static void
failover_thread(mntinfo_t * mi)42370Sstevel@tonic-gate failover_thread(mntinfo_t *mi)
42380Sstevel@tonic-gate {
42390Sstevel@tonic-gate 	servinfo_t *svp = NULL;
42400Sstevel@tonic-gate 	CLIENT *cl;
42410Sstevel@tonic-gate 	enum clnt_stat status;
42420Sstevel@tonic-gate 	struct timeval tv;
42430Sstevel@tonic-gate 	int error;
42440Sstevel@tonic-gate 	int oncethru = 0;
42450Sstevel@tonic-gate 	callb_cpr_t cprinfo;
42460Sstevel@tonic-gate 	rnode_t *rp;
42470Sstevel@tonic-gate 	int index;
42480Sstevel@tonic-gate 	char *srvnames;
42490Sstevel@tonic-gate 	size_t srvnames_len;
42500Sstevel@tonic-gate 	struct nfs_clnt *nfscl = NULL;
42510Sstevel@tonic-gate 	zoneid_t zoneid = getzoneid();
42520Sstevel@tonic-gate 
42530Sstevel@tonic-gate #ifdef DEBUG
42540Sstevel@tonic-gate 	/*
42550Sstevel@tonic-gate 	 * This is currently only needed to access counters which exist on
42560Sstevel@tonic-gate 	 * DEBUG kernels, hence we don't want to pay the penalty of the lookup
42570Sstevel@tonic-gate 	 * on non-DEBUG kernels.
42580Sstevel@tonic-gate 	 */
4259766Scarlsonj 	nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
42600Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
42610Sstevel@tonic-gate #endif
42620Sstevel@tonic-gate 
42630Sstevel@tonic-gate 	/*
42640Sstevel@tonic-gate 	 * Its safe to piggyback on the mi_lock since failover_newserver()
42650Sstevel@tonic-gate 	 * code guarantees that there will be only one failover thread
42660Sstevel@tonic-gate 	 * per mountinfo at any instance.
42670Sstevel@tonic-gate 	 */
42680Sstevel@tonic-gate 	CALLB_CPR_INIT(&cprinfo, &mi->mi_lock, callb_generic_cpr,
42690Sstevel@tonic-gate 	    "failover_thread");
42700Sstevel@tonic-gate 
42710Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
42720Sstevel@tonic-gate 	while (mi->mi_readers) {
42730Sstevel@tonic-gate 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
42740Sstevel@tonic-gate 		cv_wait(&mi->mi_failover_cv, &mi->mi_lock);
42750Sstevel@tonic-gate 		CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_lock);
42760Sstevel@tonic-gate 	}
42770Sstevel@tonic-gate 	mutex_exit(&mi->mi_lock);
42780Sstevel@tonic-gate 
42790Sstevel@tonic-gate 	tv.tv_sec = 2;
42800Sstevel@tonic-gate 	tv.tv_usec = 0;
42810Sstevel@tonic-gate 
42820Sstevel@tonic-gate 	/*
42830Sstevel@tonic-gate 	 * Ping the null NFS procedure of every server in
42840Sstevel@tonic-gate 	 * the list until one responds.  We always start
42850Sstevel@tonic-gate 	 * at the head of the list and always skip the one
42860Sstevel@tonic-gate 	 * that is current, since it's caused us a problem.
42870Sstevel@tonic-gate 	 */
42880Sstevel@tonic-gate 	while (svp == NULL) {
42890Sstevel@tonic-gate 		for (svp = mi->mi_servers; svp; svp = svp->sv_next) {
42900Sstevel@tonic-gate 			if (!oncethru && svp == mi->mi_curr_serv)
42910Sstevel@tonic-gate 				continue;
42920Sstevel@tonic-gate 
42930Sstevel@tonic-gate 			/*
42940Sstevel@tonic-gate 			 * If the file system was forcibly umounted
42950Sstevel@tonic-gate 			 * while trying to do a failover, then just
42960Sstevel@tonic-gate 			 * give up on the failover.  It won't matter
42970Sstevel@tonic-gate 			 * what the server is.
42980Sstevel@tonic-gate 			 */
42990Sstevel@tonic-gate 			if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
43000Sstevel@tonic-gate 				svp = NULL;
43010Sstevel@tonic-gate 				goto done;
43020Sstevel@tonic-gate 			}
43030Sstevel@tonic-gate 
43040Sstevel@tonic-gate 			error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr,
43050Sstevel@tonic-gate 			    NFS_PROGRAM, NFS_VERSION, 0, 1, CRED(), &cl);
43060Sstevel@tonic-gate 			if (error)
43070Sstevel@tonic-gate 				continue;
43080Sstevel@tonic-gate 
43090Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_INT))
43100Sstevel@tonic-gate 				cl->cl_nosignal = TRUE;
43110Sstevel@tonic-gate 			status = CLNT_CALL(cl, RFS_NULL, xdr_void, NULL,
43120Sstevel@tonic-gate 			    xdr_void, NULL, tv);
43130Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_INT))
43140Sstevel@tonic-gate 				cl->cl_nosignal = FALSE;
43150Sstevel@tonic-gate 			AUTH_DESTROY(cl->cl_auth);
43160Sstevel@tonic-gate 			CLNT_DESTROY(cl);
43170Sstevel@tonic-gate 			if (status == RPC_SUCCESS) {
43180Sstevel@tonic-gate 				if (svp == mi->mi_curr_serv) {
43190Sstevel@tonic-gate #ifdef DEBUG
43200Sstevel@tonic-gate 					zcmn_err(zoneid, CE_NOTE,
43210Sstevel@tonic-gate 			"NFS%d: failing over: selecting original server %s",
43220Sstevel@tonic-gate 					    mi->mi_vers, svp->sv_hostname);
43230Sstevel@tonic-gate #else
43240Sstevel@tonic-gate 					zcmn_err(zoneid, CE_NOTE,
43250Sstevel@tonic-gate 			"NFS: failing over: selecting original server %s",
43260Sstevel@tonic-gate 					    svp->sv_hostname);
43270Sstevel@tonic-gate #endif
43280Sstevel@tonic-gate 				} else {
43290Sstevel@tonic-gate #ifdef DEBUG
43300Sstevel@tonic-gate 					zcmn_err(zoneid, CE_NOTE,
43310Sstevel@tonic-gate 				    "NFS%d: failing over from %s to %s",
43320Sstevel@tonic-gate 					    mi->mi_vers,
43330Sstevel@tonic-gate 					    mi->mi_curr_serv->sv_hostname,
43340Sstevel@tonic-gate 					    svp->sv_hostname);
43350Sstevel@tonic-gate #else
43360Sstevel@tonic-gate 					zcmn_err(zoneid, CE_NOTE,
43370Sstevel@tonic-gate 				    "NFS: failing over from %s to %s",
43380Sstevel@tonic-gate 					    mi->mi_curr_serv->sv_hostname,
43390Sstevel@tonic-gate 					    svp->sv_hostname);
43400Sstevel@tonic-gate #endif
43410Sstevel@tonic-gate 				}
43420Sstevel@tonic-gate 				break;
43430Sstevel@tonic-gate 			}
43440Sstevel@tonic-gate 		}
43450Sstevel@tonic-gate 
43460Sstevel@tonic-gate 		if (svp == NULL) {
43470Sstevel@tonic-gate 			if (!oncethru) {
43480Sstevel@tonic-gate 				srvnames = nfs_getsrvnames(mi, &srvnames_len);
43490Sstevel@tonic-gate #ifdef DEBUG
43500Sstevel@tonic-gate 				zprintf(zoneid,
43510Sstevel@tonic-gate 				    "NFS%d servers %s not responding "
43520Sstevel@tonic-gate 				    "still trying\n", mi->mi_vers, srvnames);
43530Sstevel@tonic-gate #else
43540Sstevel@tonic-gate 				zprintf(zoneid, "NFS servers %s not responding "
43550Sstevel@tonic-gate 				    "still trying\n", srvnames);
43560Sstevel@tonic-gate #endif
43570Sstevel@tonic-gate 				oncethru = 1;
43580Sstevel@tonic-gate 			}
43590Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
43600Sstevel@tonic-gate 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
43610Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
43620Sstevel@tonic-gate 			delay(hz);
43630Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
43640Sstevel@tonic-gate 			CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_lock);
43650Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
43660Sstevel@tonic-gate 		}
43670Sstevel@tonic-gate 	}
43680Sstevel@tonic-gate 
43690Sstevel@tonic-gate 	if (oncethru) {
43700Sstevel@tonic-gate #ifdef DEBUG
43710Sstevel@tonic-gate 		zprintf(zoneid, "NFS%d servers %s ok\n", mi->mi_vers, srvnames);
43720Sstevel@tonic-gate #else
43730Sstevel@tonic-gate 		zprintf(zoneid, "NFS servers %s ok\n", srvnames);
43740Sstevel@tonic-gate #endif
43750Sstevel@tonic-gate 	}
43760Sstevel@tonic-gate 
43770Sstevel@tonic-gate 	if (svp != mi->mi_curr_serv) {
43780Sstevel@tonic-gate 		(void) dnlc_purge_vfsp(mi->mi_vfsp, 0);
43790Sstevel@tonic-gate 		index = rtablehash(&mi->mi_curr_serv->sv_fhandle);
43800Sstevel@tonic-gate 		rw_enter(&rtable[index].r_lock, RW_WRITER);
43810Sstevel@tonic-gate 		rp = rfind(&rtable[index], &mi->mi_curr_serv->sv_fhandle,
43820Sstevel@tonic-gate 		    mi->mi_vfsp);
43830Sstevel@tonic-gate 		if (rp != NULL) {
43840Sstevel@tonic-gate 			if (rp->r_flags & RHASHED)
43850Sstevel@tonic-gate 				rp_rmhash_locked(rp);
43860Sstevel@tonic-gate 			rw_exit(&rtable[index].r_lock);
43870Sstevel@tonic-gate 			rp->r_server = svp;
43880Sstevel@tonic-gate 			rp->r_fh = svp->sv_fhandle;
43890Sstevel@tonic-gate 			(void) nfs_free_data_reclaim(rp);
43900Sstevel@tonic-gate 			index = rtablehash(&rp->r_fh);
43910Sstevel@tonic-gate 			rp->r_hashq = &rtable[index];
43920Sstevel@tonic-gate 			rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
43930Sstevel@tonic-gate 			vn_exists(RTOV(rp));
43940Sstevel@tonic-gate 			rp_addhash(rp);
43950Sstevel@tonic-gate 			rw_exit(&rp->r_hashq->r_lock);
43960Sstevel@tonic-gate 			VN_RELE(RTOV(rp));
43970Sstevel@tonic-gate 		} else
43980Sstevel@tonic-gate 			rw_exit(&rtable[index].r_lock);
43990Sstevel@tonic-gate 	}
44000Sstevel@tonic-gate 
44010Sstevel@tonic-gate done:
44020Sstevel@tonic-gate 	if (oncethru)
44030Sstevel@tonic-gate 		kmem_free(srvnames, srvnames_len);
44040Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
44050Sstevel@tonic-gate 	mi->mi_flags &= ~MI_BINDINPROG;
44060Sstevel@tonic-gate 	if (svp != NULL) {
44070Sstevel@tonic-gate 		mi->mi_curr_serv = svp;
44080Sstevel@tonic-gate 		mi->mi_failover++;
44090Sstevel@tonic-gate #ifdef DEBUG
44100Sstevel@tonic-gate 	nfscl->nfscl_stat.failover.value.ui64++;
44110Sstevel@tonic-gate #endif
44120Sstevel@tonic-gate 	}
44130Sstevel@tonic-gate 	cv_broadcast(&mi->mi_failover_cv);
44140Sstevel@tonic-gate 	CALLB_CPR_EXIT(&cprinfo);
44150Sstevel@tonic-gate 	VFS_RELE(mi->mi_vfsp);
44160Sstevel@tonic-gate 	zthread_exit();
44170Sstevel@tonic-gate 	/* NOTREACHED */
44180Sstevel@tonic-gate }
44190Sstevel@tonic-gate 
44200Sstevel@tonic-gate /*
44210Sstevel@tonic-gate  * NFS client failover support
44220Sstevel@tonic-gate  *
44230Sstevel@tonic-gate  * failover_wait() will put the thread to sleep until MI_BINDINPROG
44240Sstevel@tonic-gate  * is cleared, meaning that failover is complete.  Called with
44250Sstevel@tonic-gate  * mi_lock mutex held.
44260Sstevel@tonic-gate  */
44270Sstevel@tonic-gate static int
failover_wait(mntinfo_t * mi)44280Sstevel@tonic-gate failover_wait(mntinfo_t *mi)
44290Sstevel@tonic-gate {
44300Sstevel@tonic-gate 	k_sigset_t smask;
44310Sstevel@tonic-gate 
44320Sstevel@tonic-gate 	/*
44330Sstevel@tonic-gate 	 * If someone else is hunting for a living server,
44340Sstevel@tonic-gate 	 * sleep until it's done.  After our sleep, we may
44350Sstevel@tonic-gate 	 * be bound to the right server and get off cheaply.
44360Sstevel@tonic-gate 	 */
44370Sstevel@tonic-gate 	while (mi->mi_flags & MI_BINDINPROG) {
44380Sstevel@tonic-gate 		/*
44390Sstevel@tonic-gate 		 * Mask out all signals except SIGHUP, SIGINT, SIGQUIT
44400Sstevel@tonic-gate 		 * and SIGTERM. (Preserving the existing masks).
44410Sstevel@tonic-gate 		 * Mask out SIGINT if mount option nointr is specified.
44420Sstevel@tonic-gate 		 */
44430Sstevel@tonic-gate 		sigintr(&smask, (int)mi->mi_flags & MI_INT);
44440Sstevel@tonic-gate 		if (!cv_wait_sig(&mi->mi_failover_cv, &mi->mi_lock)) {
44450Sstevel@tonic-gate 			/*
44460Sstevel@tonic-gate 			 * restore original signal mask
44470Sstevel@tonic-gate 			 */
44480Sstevel@tonic-gate 			sigunintr(&smask);
44490Sstevel@tonic-gate 			return (EINTR);
44500Sstevel@tonic-gate 		}
44510Sstevel@tonic-gate 		/*
44520Sstevel@tonic-gate 		 * restore original signal mask
44530Sstevel@tonic-gate 		 */
44540Sstevel@tonic-gate 		sigunintr(&smask);
44550Sstevel@tonic-gate 	}
44560Sstevel@tonic-gate 	return (0);
44570Sstevel@tonic-gate }
44580Sstevel@tonic-gate 
44590Sstevel@tonic-gate /*
44600Sstevel@tonic-gate  * NFS client failover support
44610Sstevel@tonic-gate  *
44620Sstevel@tonic-gate  * failover_remap() will do a partial pathname lookup and find the
44630Sstevel@tonic-gate  * desired vnode on the current server.  The interim vnode will be
44640Sstevel@tonic-gate  * discarded after we pilfer the new filehandle.
44650Sstevel@tonic-gate  *
44660Sstevel@tonic-gate  * Side effects:
44670Sstevel@tonic-gate  * - This routine will also update the filehandle in the args structure
44680Sstevel@tonic-gate  *    pointed to by the fi->fhp pointer if it is non-NULL.
44690Sstevel@tonic-gate  */
44700Sstevel@tonic-gate 
44710Sstevel@tonic-gate static int
failover_remap(failinfo_t * fi)44720Sstevel@tonic-gate failover_remap(failinfo_t *fi)
44730Sstevel@tonic-gate {
44740Sstevel@tonic-gate 	vnode_t *vp, *nvp, *rootvp;
44750Sstevel@tonic-gate 	rnode_t *rp, *nrp;
44760Sstevel@tonic-gate 	mntinfo_t *mi;
44770Sstevel@tonic-gate 	int error;
44780Sstevel@tonic-gate #ifdef DEBUG
44790Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
44800Sstevel@tonic-gate 
4481766Scarlsonj 	nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
44820Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
44830Sstevel@tonic-gate #endif
44840Sstevel@tonic-gate 	/*
44850Sstevel@tonic-gate 	 * Sanity check
44860Sstevel@tonic-gate 	 */
44870Sstevel@tonic-gate 	if (fi == NULL || fi->vp == NULL || fi->lookupproc == NULL)
44880Sstevel@tonic-gate 		return (EINVAL);
44890Sstevel@tonic-gate 	vp = fi->vp;
44900Sstevel@tonic-gate 	rp = VTOR(vp);
44910Sstevel@tonic-gate 	mi = VTOMI(vp);
44920Sstevel@tonic-gate 
44930Sstevel@tonic-gate 	if (!(vp->v_flag & VROOT)) {
44940Sstevel@tonic-gate 		/*
44950Sstevel@tonic-gate 		 * Given the root fh, use the path stored in
44960Sstevel@tonic-gate 		 * the rnode to find the fh for the new server.
44970Sstevel@tonic-gate 		 */
44980Sstevel@tonic-gate 		error = VFS_ROOT(mi->mi_vfsp, &rootvp);
44990Sstevel@tonic-gate 		if (error)
45000Sstevel@tonic-gate 			return (error);
45010Sstevel@tonic-gate 
45020Sstevel@tonic-gate 		error = failover_lookup(rp->r_path, rootvp,
45030Sstevel@tonic-gate 		    fi->lookupproc, fi->xattrdirproc, &nvp);
45040Sstevel@tonic-gate 
45050Sstevel@tonic-gate 		VN_RELE(rootvp);
45060Sstevel@tonic-gate 
45070Sstevel@tonic-gate 		if (error)
45080Sstevel@tonic-gate 			return (error);
45090Sstevel@tonic-gate 
45100Sstevel@tonic-gate 		/*
45110Sstevel@tonic-gate 		 * If we found the same rnode, we're done now
45120Sstevel@tonic-gate 		 */
45130Sstevel@tonic-gate 		if (nvp == vp) {
45140Sstevel@tonic-gate 			/*
45150Sstevel@tonic-gate 			 * Failed and the new server may physically be same
45160Sstevel@tonic-gate 			 * OR may share a same disk subsystem. In this case
45170Sstevel@tonic-gate 			 * file handle for a particular file path is not going
45180Sstevel@tonic-gate 			 * to change, given the same filehandle lookup will
45190Sstevel@tonic-gate 			 * always locate the same rnode as the existing one.
45200Sstevel@tonic-gate 			 * All we might need to do is to update the r_server
45210Sstevel@tonic-gate 			 * with the current servinfo.
45220Sstevel@tonic-gate 			 */
45230Sstevel@tonic-gate 			if (!VALID_FH(fi)) {
45240Sstevel@tonic-gate 				rp->r_server = mi->mi_curr_serv;
45250Sstevel@tonic-gate 			}
45260Sstevel@tonic-gate 			VN_RELE(nvp);
45270Sstevel@tonic-gate 			return (0);
45280Sstevel@tonic-gate 		}
45290Sstevel@tonic-gate 
45300Sstevel@tonic-gate 		/*
45310Sstevel@tonic-gate 		 * Try to make it so that no one else will find this
45320Sstevel@tonic-gate 		 * vnode because it is just a temporary to hold the
45330Sstevel@tonic-gate 		 * new file handle until that file handle can be
45340Sstevel@tonic-gate 		 * copied to the original vnode/rnode.
45350Sstevel@tonic-gate 		 */
45360Sstevel@tonic-gate 		nrp = VTOR(nvp);
45371068Svv149972 		mutex_enter(&mi->mi_remap_lock);
45381068Svv149972 		/*
45391068Svv149972 		 * Some other thread could have raced in here and could
45401068Svv149972 		 * have done the remap for this particular rnode before
45411068Svv149972 		 * this thread here. Check for rp->r_server and
45421068Svv149972 		 * mi->mi_curr_serv and return if they are same.
45431068Svv149972 		 */
45441068Svv149972 		if (VALID_FH(fi)) {
45451068Svv149972 			mutex_exit(&mi->mi_remap_lock);
45461068Svv149972 			VN_RELE(nvp);
45471068Svv149972 			return (0);
45481068Svv149972 		}
45491068Svv149972 
45500Sstevel@tonic-gate 		if (nrp->r_flags & RHASHED)
45510Sstevel@tonic-gate 			rp_rmhash(nrp);
45520Sstevel@tonic-gate 
45530Sstevel@tonic-gate 		/*
45540Sstevel@tonic-gate 		 * As a heuristic check on the validity of the new
45550Sstevel@tonic-gate 		 * file, check that the size and type match against
45560Sstevel@tonic-gate 		 * that we remember from the old version.
45570Sstevel@tonic-gate 		 */
45580Sstevel@tonic-gate 		if (rp->r_size != nrp->r_size || vp->v_type != nvp->v_type) {
45591068Svv149972 			mutex_exit(&mi->mi_remap_lock);
45600Sstevel@tonic-gate 			zcmn_err(mi->mi_zone->zone_id, CE_WARN,
45610Sstevel@tonic-gate 			    "NFS replicas %s and %s: file %s not same.",
45620Sstevel@tonic-gate 			    rp->r_server->sv_hostname,
45630Sstevel@tonic-gate 			    nrp->r_server->sv_hostname, rp->r_path);
45640Sstevel@tonic-gate 			VN_RELE(nvp);
45650Sstevel@tonic-gate 			return (EINVAL);
45660Sstevel@tonic-gate 		}
45670Sstevel@tonic-gate 
45680Sstevel@tonic-gate 		/*
45690Sstevel@tonic-gate 		 * snarf the filehandle from the new rnode
45700Sstevel@tonic-gate 		 * then release it, again while updating the
45710Sstevel@tonic-gate 		 * hash queues for the rnode.
45720Sstevel@tonic-gate 		 */
45730Sstevel@tonic-gate 		if (rp->r_flags & RHASHED)
45740Sstevel@tonic-gate 			rp_rmhash(rp);
45750Sstevel@tonic-gate 		rp->r_server = mi->mi_curr_serv;
45760Sstevel@tonic-gate 		rp->r_fh = nrp->r_fh;
45771068Svv149972 		rp->r_hashq = nrp->r_hashq;
45780Sstevel@tonic-gate 		/*
45790Sstevel@tonic-gate 		 * Copy the attributes from the new rnode to the old
45800Sstevel@tonic-gate 		 * rnode.  This will help to reduce unnecessary page
45810Sstevel@tonic-gate 		 * cache flushes.
45820Sstevel@tonic-gate 		 */
45830Sstevel@tonic-gate 		rp->r_attr = nrp->r_attr;
45840Sstevel@tonic-gate 		rp->r_attrtime = nrp->r_attrtime;
45850Sstevel@tonic-gate 		rp->r_mtime = nrp->r_mtime;
45860Sstevel@tonic-gate 		(void) nfs_free_data_reclaim(rp);
45870Sstevel@tonic-gate 		nfs_setswaplike(vp, &rp->r_attr);
45880Sstevel@tonic-gate 		rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
45890Sstevel@tonic-gate 		rp_addhash(rp);
45900Sstevel@tonic-gate 		rw_exit(&rp->r_hashq->r_lock);
45911068Svv149972 		mutex_exit(&mi->mi_remap_lock);
45920Sstevel@tonic-gate 		VN_RELE(nvp);
45930Sstevel@tonic-gate 	}
45940Sstevel@tonic-gate 
45950Sstevel@tonic-gate 	/*
45960Sstevel@tonic-gate 	 * Update successful failover remap count
45970Sstevel@tonic-gate 	 */
45980Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
45990Sstevel@tonic-gate 	mi->mi_remap++;
46000Sstevel@tonic-gate 	mutex_exit(&mi->mi_lock);
46010Sstevel@tonic-gate #ifdef DEBUG
46020Sstevel@tonic-gate 	nfscl->nfscl_stat.remap.value.ui64++;
46030Sstevel@tonic-gate #endif
46040Sstevel@tonic-gate 
46050Sstevel@tonic-gate 	/*
46060Sstevel@tonic-gate 	 * If we have a copied filehandle to update, do it now.
46070Sstevel@tonic-gate 	 */
46080Sstevel@tonic-gate 	if (fi->fhp != NULL && fi->copyproc != NULL)
46090Sstevel@tonic-gate 		(*fi->copyproc)(fi->fhp, vp);
46100Sstevel@tonic-gate 
46110Sstevel@tonic-gate 	return (0);
46120Sstevel@tonic-gate }
46130Sstevel@tonic-gate 
46140Sstevel@tonic-gate /*
46150Sstevel@tonic-gate  * NFS client failover support
46160Sstevel@tonic-gate  *
46170Sstevel@tonic-gate  * We want a simple pathname lookup routine to parse the pieces
46180Sstevel@tonic-gate  * of path in rp->r_path.  We know that the path was a created
46190Sstevel@tonic-gate  * as rnodes were made, so we know we have only to deal with
46200Sstevel@tonic-gate  * paths that look like:
46210Sstevel@tonic-gate  *	dir1/dir2/dir3/file
46220Sstevel@tonic-gate  * Any evidence of anything like .., symlinks, and ENOTDIR
46230Sstevel@tonic-gate  * are hard errors, because they mean something in this filesystem
46240Sstevel@tonic-gate  * is different from the one we came from, or has changed under
46250Sstevel@tonic-gate  * us in some way.  If this is true, we want the failure.
46260Sstevel@tonic-gate  *
46270Sstevel@tonic-gate  * Extended attributes: if the filesystem is mounted with extended
46280Sstevel@tonic-gate  * attributes enabled (-o xattr), the attribute directory will be
46290Sstevel@tonic-gate  * represented in the r_path as the magic name XATTR_RPATH. So if
46300Sstevel@tonic-gate  * we see that name in the pathname, is must be because this node
46310Sstevel@tonic-gate  * is an extended attribute.  Therefore, look it up that way.
46320Sstevel@tonic-gate  */
46330Sstevel@tonic-gate static int
failover_lookup(char * path,vnode_t * root,int (* lookupproc)(vnode_t *,char *,vnode_t **,struct pathname *,int,vnode_t *,cred_t *,int),int (* xattrdirproc)(vnode_t *,vnode_t **,bool_t,cred_t *,int),vnode_t ** new)46340Sstevel@tonic-gate failover_lookup(char *path, vnode_t *root,
46350Sstevel@tonic-gate     int (*lookupproc)(vnode_t *, char *, vnode_t **, struct pathname *, int,
46360Sstevel@tonic-gate 	vnode_t *, cred_t *, int),
46370Sstevel@tonic-gate     int (*xattrdirproc)(vnode_t *, vnode_t **, bool_t, cred_t *, int),
46380Sstevel@tonic-gate     vnode_t **new)
46390Sstevel@tonic-gate {
46400Sstevel@tonic-gate 	vnode_t *dvp, *nvp;
46410Sstevel@tonic-gate 	int error = EINVAL;
46420Sstevel@tonic-gate 	char *s, *p, *tmppath;
46430Sstevel@tonic-gate 	size_t len;
46440Sstevel@tonic-gate 	mntinfo_t *mi;
46450Sstevel@tonic-gate 	bool_t xattr;
46460Sstevel@tonic-gate 
46470Sstevel@tonic-gate 	/* Make local copy of path */
46480Sstevel@tonic-gate 	len = strlen(path) + 1;
46490Sstevel@tonic-gate 	tmppath = kmem_alloc(len, KM_SLEEP);
46500Sstevel@tonic-gate 	(void) strcpy(tmppath, path);
46510Sstevel@tonic-gate 	s = tmppath;
46520Sstevel@tonic-gate 
46530Sstevel@tonic-gate 	dvp = root;
46540Sstevel@tonic-gate 	VN_HOLD(dvp);
46550Sstevel@tonic-gate 	mi = VTOMI(root);
46560Sstevel@tonic-gate 	xattr = mi->mi_flags & MI_EXTATTR;
46570Sstevel@tonic-gate 
46580Sstevel@tonic-gate 	do {
46590Sstevel@tonic-gate 		p = strchr(s, '/');
46600Sstevel@tonic-gate 		if (p != NULL)
46610Sstevel@tonic-gate 			*p = '\0';
46620Sstevel@tonic-gate 		if (xattr && strcmp(s, XATTR_RPATH) == 0) {
46630Sstevel@tonic-gate 			error = (*xattrdirproc)(dvp, &nvp, FALSE, CRED(),
46640Sstevel@tonic-gate 			    RFSCALL_SOFT);
46650Sstevel@tonic-gate 		} else {
46660Sstevel@tonic-gate 			error = (*lookupproc)(dvp, s, &nvp, NULL, 0, NULL,
46670Sstevel@tonic-gate 			    CRED(), RFSCALL_SOFT);
46680Sstevel@tonic-gate 		}
46690Sstevel@tonic-gate 		if (p != NULL)
46700Sstevel@tonic-gate 			*p++ = '/';
46710Sstevel@tonic-gate 		if (error) {
46720Sstevel@tonic-gate 			VN_RELE(dvp);
46730Sstevel@tonic-gate 			kmem_free(tmppath, len);
46740Sstevel@tonic-gate 			return (error);
46750Sstevel@tonic-gate 		}
46760Sstevel@tonic-gate 		s = p;
46770Sstevel@tonic-gate 		VN_RELE(dvp);
46780Sstevel@tonic-gate 		dvp = nvp;
46790Sstevel@tonic-gate 	} while (p != NULL);
46800Sstevel@tonic-gate 
46810Sstevel@tonic-gate 	if (nvp != NULL && new != NULL)
46820Sstevel@tonic-gate 		*new = nvp;
46830Sstevel@tonic-gate 	kmem_free(tmppath, len);
46840Sstevel@tonic-gate 	return (0);
46850Sstevel@tonic-gate }
46860Sstevel@tonic-gate 
46870Sstevel@tonic-gate /*
46880Sstevel@tonic-gate  * NFS client failover support
46890Sstevel@tonic-gate  *
46900Sstevel@tonic-gate  * sv_free() frees the malloc'd portion of a "servinfo_t".
46910Sstevel@tonic-gate  */
46920Sstevel@tonic-gate void
sv_free(servinfo_t * svp)46930Sstevel@tonic-gate sv_free(servinfo_t *svp)
46940Sstevel@tonic-gate {
46950Sstevel@tonic-gate 	servinfo_t *next;
46960Sstevel@tonic-gate 	struct knetconfig *knconf;
46970Sstevel@tonic-gate 
46980Sstevel@tonic-gate 	while (svp != NULL) {
46990Sstevel@tonic-gate 		next = svp->sv_next;
47000Sstevel@tonic-gate 		if (svp->sv_secdata)
47010Sstevel@tonic-gate 			sec_clnt_freeinfo(svp->sv_secdata);
47020Sstevel@tonic-gate 		if (svp->sv_hostname && svp->sv_hostnamelen > 0)
47030Sstevel@tonic-gate 			kmem_free(svp->sv_hostname, svp->sv_hostnamelen);
47040Sstevel@tonic-gate 		knconf = svp->sv_knconf;
47050Sstevel@tonic-gate 		if (knconf != NULL) {
47060Sstevel@tonic-gate 			if (knconf->knc_protofmly != NULL)
47070Sstevel@tonic-gate 				kmem_free(knconf->knc_protofmly, KNC_STRSIZE);
47080Sstevel@tonic-gate 			if (knconf->knc_proto != NULL)
47090Sstevel@tonic-gate 				kmem_free(knconf->knc_proto, KNC_STRSIZE);
47100Sstevel@tonic-gate 			kmem_free(knconf, sizeof (*knconf));
47110Sstevel@tonic-gate 		}
47120Sstevel@tonic-gate 		knconf = svp->sv_origknconf;
47130Sstevel@tonic-gate 		if (knconf != NULL) {
47140Sstevel@tonic-gate 			if (knconf->knc_protofmly != NULL)
47150Sstevel@tonic-gate 				kmem_free(knconf->knc_protofmly, KNC_STRSIZE);
47160Sstevel@tonic-gate 			if (knconf->knc_proto != NULL)
47170Sstevel@tonic-gate 				kmem_free(knconf->knc_proto, KNC_STRSIZE);
47180Sstevel@tonic-gate 			kmem_free(knconf, sizeof (*knconf));
47190Sstevel@tonic-gate 		}
47200Sstevel@tonic-gate 		if (svp->sv_addr.buf != NULL && svp->sv_addr.maxlen != 0)
47210Sstevel@tonic-gate 			kmem_free(svp->sv_addr.buf, svp->sv_addr.maxlen);
47220Sstevel@tonic-gate 		mutex_destroy(&svp->sv_lock);
47230Sstevel@tonic-gate 		kmem_free(svp, sizeof (*svp));
47240Sstevel@tonic-gate 		svp = next;
47250Sstevel@tonic-gate 	}
47260Sstevel@tonic-gate }
47270Sstevel@tonic-gate 
47280Sstevel@tonic-gate /*
47290Sstevel@tonic-gate  * Only can return non-zero if intr != 0.
47300Sstevel@tonic-gate  */
47310Sstevel@tonic-gate int
nfs_rw_enter_sig(nfs_rwlock_t * l,krw_t rw,int intr)47320Sstevel@tonic-gate nfs_rw_enter_sig(nfs_rwlock_t *l, krw_t rw, int intr)
47330Sstevel@tonic-gate {
47340Sstevel@tonic-gate 
47350Sstevel@tonic-gate 	mutex_enter(&l->lock);
47360Sstevel@tonic-gate 
47370Sstevel@tonic-gate 	/*
47380Sstevel@tonic-gate 	 * If this is a nested enter, then allow it.  There
47390Sstevel@tonic-gate 	 * must be as many exits as enters through.
47400Sstevel@tonic-gate 	 */
47410Sstevel@tonic-gate 	if (l->owner == curthread) {
47420Sstevel@tonic-gate 		/* lock is held for writing by current thread */
47430Sstevel@tonic-gate 		ASSERT(rw == RW_READER || rw == RW_WRITER);
47440Sstevel@tonic-gate 		l->count--;
47450Sstevel@tonic-gate 	} else if (rw == RW_READER) {
47460Sstevel@tonic-gate 		/*
47470Sstevel@tonic-gate 		 * While there is a writer active or writers waiting,
47480Sstevel@tonic-gate 		 * then wait for them to finish up and move on.  Then,
47490Sstevel@tonic-gate 		 * increment the count to indicate that a reader is
47500Sstevel@tonic-gate 		 * active.
47510Sstevel@tonic-gate 		 */
47520Sstevel@tonic-gate 		while (l->count < 0 || l->waiters > 0) {
47530Sstevel@tonic-gate 			if (intr) {
47540Sstevel@tonic-gate 				klwp_t *lwp = ttolwp(curthread);
47550Sstevel@tonic-gate 
47560Sstevel@tonic-gate 				if (lwp != NULL)
47570Sstevel@tonic-gate 					lwp->lwp_nostop++;
47580Sstevel@tonic-gate 				if (!cv_wait_sig(&l->cv, &l->lock)) {
47590Sstevel@tonic-gate 					if (lwp != NULL)
47600Sstevel@tonic-gate 						lwp->lwp_nostop--;
47610Sstevel@tonic-gate 					mutex_exit(&l->lock);
47620Sstevel@tonic-gate 					return (EINTR);
47630Sstevel@tonic-gate 				}
47640Sstevel@tonic-gate 				if (lwp != NULL)
47650Sstevel@tonic-gate 					lwp->lwp_nostop--;
47660Sstevel@tonic-gate 			} else
47670Sstevel@tonic-gate 				cv_wait(&l->cv, &l->lock);
47680Sstevel@tonic-gate 		}
47690Sstevel@tonic-gate 		ASSERT(l->count < INT_MAX);
47700Sstevel@tonic-gate #ifdef	DEBUG
47710Sstevel@tonic-gate 		if ((l->count % 10000) == 9999)
47720Sstevel@tonic-gate 			cmn_err(CE_WARN, "nfs_rw_enter_sig: count %d on"
47734300Smarks 			    "rwlock @ %p\n", l->count, (void *)&l);
47740Sstevel@tonic-gate #endif
47750Sstevel@tonic-gate 		l->count++;
47760Sstevel@tonic-gate 	} else {
47770Sstevel@tonic-gate 		ASSERT(rw == RW_WRITER);
47780Sstevel@tonic-gate 		/*
47790Sstevel@tonic-gate 		 * While there are readers active or a writer
47800Sstevel@tonic-gate 		 * active, then wait for all of the readers
47810Sstevel@tonic-gate 		 * to finish or for the writer to finish.
47820Sstevel@tonic-gate 		 * Then, set the owner field to curthread and
47830Sstevel@tonic-gate 		 * decrement count to indicate that a writer
47840Sstevel@tonic-gate 		 * is active.
47850Sstevel@tonic-gate 		 */
47860Sstevel@tonic-gate 		while (l->count > 0 || l->owner != NULL) {
47870Sstevel@tonic-gate 			l->waiters++;
47880Sstevel@tonic-gate 			if (intr) {
47890Sstevel@tonic-gate 				klwp_t *lwp = ttolwp(curthread);
47900Sstevel@tonic-gate 
47910Sstevel@tonic-gate 				if (lwp != NULL)
47920Sstevel@tonic-gate 					lwp->lwp_nostop++;
47930Sstevel@tonic-gate 				if (!cv_wait_sig(&l->cv, &l->lock)) {
47940Sstevel@tonic-gate 					if (lwp != NULL)
47950Sstevel@tonic-gate 						lwp->lwp_nostop--;
47960Sstevel@tonic-gate 					l->waiters--;
47970Sstevel@tonic-gate 					cv_broadcast(&l->cv);
47980Sstevel@tonic-gate 					mutex_exit(&l->lock);
47990Sstevel@tonic-gate 					return (EINTR);
48000Sstevel@tonic-gate 				}
48010Sstevel@tonic-gate 				if (lwp != NULL)
48020Sstevel@tonic-gate 					lwp->lwp_nostop--;
48030Sstevel@tonic-gate 			} else
48040Sstevel@tonic-gate 				cv_wait(&l->cv, &l->lock);
48050Sstevel@tonic-gate 			l->waiters--;
48060Sstevel@tonic-gate 		}
48070Sstevel@tonic-gate 		l->owner = curthread;
48080Sstevel@tonic-gate 		l->count--;
48090Sstevel@tonic-gate 	}
48100Sstevel@tonic-gate 
48110Sstevel@tonic-gate 	mutex_exit(&l->lock);
48120Sstevel@tonic-gate 
48130Sstevel@tonic-gate 	return (0);
48140Sstevel@tonic-gate }
48150Sstevel@tonic-gate 
48160Sstevel@tonic-gate /*
48170Sstevel@tonic-gate  * If the lock is available, obtain it and return non-zero.  If there is
48180Sstevel@tonic-gate  * already a conflicting lock, return 0 immediately.
48190Sstevel@tonic-gate  */
48200Sstevel@tonic-gate 
48210Sstevel@tonic-gate int
nfs_rw_tryenter(nfs_rwlock_t * l,krw_t rw)48220Sstevel@tonic-gate nfs_rw_tryenter(nfs_rwlock_t *l, krw_t rw)
48230Sstevel@tonic-gate {
48240Sstevel@tonic-gate 	mutex_enter(&l->lock);
48250Sstevel@tonic-gate 
48260Sstevel@tonic-gate 	/*
48270Sstevel@tonic-gate 	 * If this is a nested enter, then allow it.  There
48280Sstevel@tonic-gate 	 * must be as many exits as enters through.
48290Sstevel@tonic-gate 	 */
48300Sstevel@tonic-gate 	if (l->owner == curthread) {
48310Sstevel@tonic-gate 		/* lock is held for writing by current thread */
48320Sstevel@tonic-gate 		ASSERT(rw == RW_READER || rw == RW_WRITER);
48330Sstevel@tonic-gate 		l->count--;
48340Sstevel@tonic-gate 	} else if (rw == RW_READER) {
48350Sstevel@tonic-gate 		/*
48360Sstevel@tonic-gate 		 * If there is a writer active or writers waiting, deny the
48370Sstevel@tonic-gate 		 * lock.  Otherwise, bump the count of readers.
48380Sstevel@tonic-gate 		 */
48390Sstevel@tonic-gate 		if (l->count < 0 || l->waiters > 0) {
48400Sstevel@tonic-gate 			mutex_exit(&l->lock);
48410Sstevel@tonic-gate 			return (0);
48420Sstevel@tonic-gate 		}
48430Sstevel@tonic-gate 		l->count++;
48440Sstevel@tonic-gate 	} else {
48450Sstevel@tonic-gate 		ASSERT(rw == RW_WRITER);
48460Sstevel@tonic-gate 		/*
48470Sstevel@tonic-gate 		 * If there are readers active or a writer active, deny the
48480Sstevel@tonic-gate 		 * lock.  Otherwise, set the owner field to curthread and
48490Sstevel@tonic-gate 		 * decrement count to indicate that a writer is active.
48500Sstevel@tonic-gate 		 */
48510Sstevel@tonic-gate 		if (l->count > 0 || l->owner != NULL) {
48520Sstevel@tonic-gate 			mutex_exit(&l->lock);
48530Sstevel@tonic-gate 			return (0);
48540Sstevel@tonic-gate 		}
48550Sstevel@tonic-gate 		l->owner = curthread;
48560Sstevel@tonic-gate 		l->count--;
48570Sstevel@tonic-gate 	}
48580Sstevel@tonic-gate 
48590Sstevel@tonic-gate 	mutex_exit(&l->lock);
48600Sstevel@tonic-gate 
48610Sstevel@tonic-gate 	return (1);
48620Sstevel@tonic-gate }
48630Sstevel@tonic-gate 
48640Sstevel@tonic-gate void
nfs_rw_exit(nfs_rwlock_t * l)48650Sstevel@tonic-gate nfs_rw_exit(nfs_rwlock_t *l)
48660Sstevel@tonic-gate {
48670Sstevel@tonic-gate 
48680Sstevel@tonic-gate 	mutex_enter(&l->lock);
48690Sstevel@tonic-gate 	/*
48700Sstevel@tonic-gate 	 * If this is releasing a writer lock, then increment count to
48710Sstevel@tonic-gate 	 * indicate that there is one less writer active.  If this was
48720Sstevel@tonic-gate 	 * the last of possibly nested writer locks, then clear the owner
48730Sstevel@tonic-gate 	 * field as well to indicate that there is no writer active
48740Sstevel@tonic-gate 	 * and wakeup any possible waiting writers or readers.
48750Sstevel@tonic-gate 	 *
48760Sstevel@tonic-gate 	 * If releasing a reader lock, then just decrement count to
48770Sstevel@tonic-gate 	 * indicate that there is one less reader active.  If this was
48780Sstevel@tonic-gate 	 * the last active reader and there are writer(s) waiting,
48790Sstevel@tonic-gate 	 * then wake up the first.
48800Sstevel@tonic-gate 	 */
48810Sstevel@tonic-gate 	if (l->owner != NULL) {
48820Sstevel@tonic-gate 		ASSERT(l->owner == curthread);
48830Sstevel@tonic-gate 		l->count++;
48840Sstevel@tonic-gate 		if (l->count == 0) {
48850Sstevel@tonic-gate 			l->owner = NULL;
48860Sstevel@tonic-gate 			cv_broadcast(&l->cv);
48870Sstevel@tonic-gate 		}
48880Sstevel@tonic-gate 	} else {
48890Sstevel@tonic-gate 		ASSERT(l->count > 0);
48900Sstevel@tonic-gate 		l->count--;
48910Sstevel@tonic-gate 		if (l->count == 0 && l->waiters > 0)
48920Sstevel@tonic-gate 			cv_broadcast(&l->cv);
48930Sstevel@tonic-gate 	}
48940Sstevel@tonic-gate 	mutex_exit(&l->lock);
48950Sstevel@tonic-gate }
48960Sstevel@tonic-gate 
48970Sstevel@tonic-gate int
nfs_rw_lock_held(nfs_rwlock_t * l,krw_t rw)48980Sstevel@tonic-gate nfs_rw_lock_held(nfs_rwlock_t *l, krw_t rw)
48990Sstevel@tonic-gate {
49000Sstevel@tonic-gate 
49010Sstevel@tonic-gate 	if (rw == RW_READER)
49020Sstevel@tonic-gate 		return (l->count > 0);
49030Sstevel@tonic-gate 	ASSERT(rw == RW_WRITER);
49040Sstevel@tonic-gate 	return (l->count < 0);
49050Sstevel@tonic-gate }
49060Sstevel@tonic-gate 
49070Sstevel@tonic-gate /* ARGSUSED */
49080Sstevel@tonic-gate void
nfs_rw_init(nfs_rwlock_t * l,char * name,krw_type_t type,void * arg)49090Sstevel@tonic-gate nfs_rw_init(nfs_rwlock_t *l, char *name, krw_type_t type, void *arg)
49100Sstevel@tonic-gate {
49110Sstevel@tonic-gate 
49120Sstevel@tonic-gate 	l->count = 0;
49130Sstevel@tonic-gate 	l->waiters = 0;
49140Sstevel@tonic-gate 	l->owner = NULL;
49150Sstevel@tonic-gate 	mutex_init(&l->lock, NULL, MUTEX_DEFAULT, NULL);
49160Sstevel@tonic-gate 	cv_init(&l->cv, NULL, CV_DEFAULT, NULL);
49170Sstevel@tonic-gate }
49180Sstevel@tonic-gate 
49190Sstevel@tonic-gate void
nfs_rw_destroy(nfs_rwlock_t * l)49200Sstevel@tonic-gate nfs_rw_destroy(nfs_rwlock_t *l)
49210Sstevel@tonic-gate {
49220Sstevel@tonic-gate 
49230Sstevel@tonic-gate 	mutex_destroy(&l->lock);
49240Sstevel@tonic-gate 	cv_destroy(&l->cv);
49250Sstevel@tonic-gate }
49260Sstevel@tonic-gate 
49270Sstevel@tonic-gate int
nfs3_rddir_compar(const void * x,const void * y)49280Sstevel@tonic-gate nfs3_rddir_compar(const void *x, const void *y)
49290Sstevel@tonic-gate {
49300Sstevel@tonic-gate 	rddir_cache *a = (rddir_cache *)x;
49310Sstevel@tonic-gate 	rddir_cache *b = (rddir_cache *)y;
49320Sstevel@tonic-gate 
49330Sstevel@tonic-gate 	if (a->nfs3_cookie == b->nfs3_cookie) {
49340Sstevel@tonic-gate 		if (a->buflen == b->buflen)
49350Sstevel@tonic-gate 			return (0);
49360Sstevel@tonic-gate 		if (a->buflen < b->buflen)
49370Sstevel@tonic-gate 			return (-1);
49380Sstevel@tonic-gate 		return (1);
49390Sstevel@tonic-gate 	}
49400Sstevel@tonic-gate 
49410Sstevel@tonic-gate 	if (a->nfs3_cookie < b->nfs3_cookie)
49420Sstevel@tonic-gate 		return (-1);
49430Sstevel@tonic-gate 
49440Sstevel@tonic-gate 	return (1);
49450Sstevel@tonic-gate }
49460Sstevel@tonic-gate 
49470Sstevel@tonic-gate int
nfs_rddir_compar(const void * x,const void * y)49480Sstevel@tonic-gate nfs_rddir_compar(const void *x, const void *y)
49490Sstevel@tonic-gate {
49500Sstevel@tonic-gate 	rddir_cache *a = (rddir_cache *)x;
49510Sstevel@tonic-gate 	rddir_cache *b = (rddir_cache *)y;
49520Sstevel@tonic-gate 
49530Sstevel@tonic-gate 	if (a->nfs_cookie == b->nfs_cookie) {
49540Sstevel@tonic-gate 		if (a->buflen == b->buflen)
49550Sstevel@tonic-gate 			return (0);
49560Sstevel@tonic-gate 		if (a->buflen < b->buflen)
49570Sstevel@tonic-gate 			return (-1);
49580Sstevel@tonic-gate 		return (1);
49590Sstevel@tonic-gate 	}
49600Sstevel@tonic-gate 
49610Sstevel@tonic-gate 	if (a->nfs_cookie < b->nfs_cookie)
49620Sstevel@tonic-gate 		return (-1);
49630Sstevel@tonic-gate 
49640Sstevel@tonic-gate 	return (1);
49650Sstevel@tonic-gate }
49660Sstevel@tonic-gate 
49670Sstevel@tonic-gate static char *
nfs_getsrvnames(mntinfo_t * mi,size_t * len)49680Sstevel@tonic-gate nfs_getsrvnames(mntinfo_t *mi, size_t *len)
49690Sstevel@tonic-gate {
49700Sstevel@tonic-gate 	servinfo_t *s;
49710Sstevel@tonic-gate 	char *srvnames;
49720Sstevel@tonic-gate 	char *namep;
49730Sstevel@tonic-gate 	size_t length;
49740Sstevel@tonic-gate 
49750Sstevel@tonic-gate 	/*
49760Sstevel@tonic-gate 	 * Calculate the length of the string required to hold all
49770Sstevel@tonic-gate 	 * of the server names plus either a comma or a null
49780Sstevel@tonic-gate 	 * character following each individual one.
49790Sstevel@tonic-gate 	 */
49800Sstevel@tonic-gate 	length = 0;
49810Sstevel@tonic-gate 	for (s = mi->mi_servers; s != NULL; s = s->sv_next)
49820Sstevel@tonic-gate 		length += s->sv_hostnamelen;
49830Sstevel@tonic-gate 
49840Sstevel@tonic-gate 	srvnames = kmem_alloc(length, KM_SLEEP);
49850Sstevel@tonic-gate 
49860Sstevel@tonic-gate 	namep = srvnames;
49870Sstevel@tonic-gate 	for (s = mi->mi_servers; s != NULL; s = s->sv_next) {
49880Sstevel@tonic-gate 		(void) strcpy(namep, s->sv_hostname);
49890Sstevel@tonic-gate 		namep += s->sv_hostnamelen - 1;
49900Sstevel@tonic-gate 		*namep++ = ',';
49910Sstevel@tonic-gate 	}
49920Sstevel@tonic-gate 	*--namep = '\0';
49930Sstevel@tonic-gate 
49940Sstevel@tonic-gate 	*len = length;
49950Sstevel@tonic-gate 
49960Sstevel@tonic-gate 	return (srvnames);
49970Sstevel@tonic-gate }
4998766Scarlsonj 
4999766Scarlsonj /*
5000766Scarlsonj  * These two functions are temporary and designed for the upgrade-workaround
5001766Scarlsonj  * only.  They cannot be used for general zone-crossing NFS client support, and
5002766Scarlsonj  * will be removed shortly.
5003766Scarlsonj  *
5004766Scarlsonj  * When the workaround is enabled, all NFS traffic is forced into the global
5005766Scarlsonj  * zone.  These functions are called when the code needs to refer to the state
5006766Scarlsonj  * of the underlying network connection.  They're not called when the function
5007766Scarlsonj  * needs to refer to the state of the process that invoked the system call.
5008766Scarlsonj  * (E.g., when checking whether the zone is shutting down during the mount()
5009766Scarlsonj  * call.)
5010766Scarlsonj  */
5011766Scarlsonj 
5012766Scarlsonj struct zone *
nfs_zone(void)5013766Scarlsonj nfs_zone(void)
5014766Scarlsonj {
5015766Scarlsonj 	return (nfs_global_client_only != 0 ? global_zone : curproc->p_zone);
5016766Scarlsonj }
5017766Scarlsonj 
5018766Scarlsonj zoneid_t
nfs_zoneid(void)5019766Scarlsonj nfs_zoneid(void)
5020766Scarlsonj {
5021766Scarlsonj 	return (nfs_global_client_only != 0 ? GLOBAL_ZONEID : getzoneid());
5022766Scarlsonj }
50231676Sjpk 
50241676Sjpk /*
50251676Sjpk  * nfs_mount_label_policy:
50261676Sjpk  *	Determine whether the mount is allowed according to MAC check,
50271676Sjpk  *	by comparing (where appropriate) label of the remote server
50281676Sjpk  *	against the label of the zone being mounted into.
50291676Sjpk  *
50301676Sjpk  *	Returns:
50311676Sjpk  *		 0 :	access allowed
50321676Sjpk  *		-1 :	read-only access allowed (i.e., read-down)
50331676Sjpk  *		>0 :	error code, such as EACCES
50341676Sjpk  */
50351676Sjpk int
nfs_mount_label_policy(vfs_t * vfsp,struct netbuf * addr,struct knetconfig * knconf,cred_t * cr)50361676Sjpk nfs_mount_label_policy(vfs_t *vfsp, struct netbuf *addr,
50371676Sjpk     struct knetconfig *knconf, cred_t *cr)
50381676Sjpk {
50391676Sjpk 	int		addr_type;
50401676Sjpk 	void		*ipaddr;
50411676Sjpk 	bslabel_t	*server_sl, *mntlabel;
50421676Sjpk 	zone_t		*mntzone = NULL;
50431676Sjpk 	ts_label_t	*zlabel;
50441676Sjpk 	tsol_tpc_t	*tp;
50451676Sjpk 	ts_label_t	*tsl = NULL;
50461676Sjpk 	int		retv;
50471676Sjpk 
50481676Sjpk 	/*
50491676Sjpk 	 * Get the zone's label.  Each zone on a labeled system has a label.
50501676Sjpk 	 */
50511676Sjpk 	mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE);
50521676Sjpk 	zlabel = mntzone->zone_slabel;
50531676Sjpk 	ASSERT(zlabel != NULL);
50541676Sjpk 	label_hold(zlabel);
50551676Sjpk 
50561676Sjpk 	if (strcmp(knconf->knc_protofmly, NC_INET) == 0) {
50571676Sjpk 		addr_type = IPV4_VERSION;
50581676Sjpk 		ipaddr = &((struct sockaddr_in *)addr->buf)->sin_addr;
50591676Sjpk 	} else if (strcmp(knconf->knc_protofmly, NC_INET6) == 0) {
50601676Sjpk 		addr_type = IPV6_VERSION;
50611676Sjpk 		ipaddr = &((struct sockaddr_in6 *)addr->buf)->sin6_addr;
50621676Sjpk 	} else {
50631676Sjpk 		retv = 0;
50641676Sjpk 		goto out;
50651676Sjpk 	}
50661676Sjpk 
50671676Sjpk 	retv = EACCES;				/* assume the worst */
50681676Sjpk 
50691676Sjpk 	/*
50701676Sjpk 	 * Next, get the assigned label of the remote server.
50711676Sjpk 	 */
50721676Sjpk 	tp = find_tpc(ipaddr, addr_type, B_FALSE);
50731676Sjpk 	if (tp == NULL)
50741676Sjpk 		goto out;			/* error getting host entry */
50751676Sjpk 
50761676Sjpk 	if (tp->tpc_tp.tp_doi != zlabel->tsl_doi)
50771676Sjpk 		goto rel_tpc;			/* invalid domain */
50781676Sjpk 	if ((tp->tpc_tp.host_type != SUN_CIPSO) &&
50791676Sjpk 	    (tp->tpc_tp.host_type != UNLABELED))
50801676Sjpk 		goto rel_tpc;			/* invalid hosttype */
50811676Sjpk 
50821676Sjpk 	if (tp->tpc_tp.host_type == SUN_CIPSO) {
50831676Sjpk 		tsl = getflabel_cipso(vfsp);
50841676Sjpk 		if (tsl == NULL)
50851676Sjpk 			goto rel_tpc;		/* error getting server lbl */
50861676Sjpk 
50871676Sjpk 		server_sl = label2bslabel(tsl);
50881676Sjpk 	} else {	/* UNLABELED */
50891676Sjpk 		server_sl = &tp->tpc_tp.tp_def_label;
50901676Sjpk 	}
50911676Sjpk 
50921676Sjpk 	mntlabel = label2bslabel(zlabel);
50931676Sjpk 
50941676Sjpk 	/*
50951676Sjpk 	 * Now compare labels to complete the MAC check.  If the labels
50961676Sjpk 	 * are equal or if the requestor is in the global zone and has
50971676Sjpk 	 * NET_MAC_AWARE, then allow read-write access.   (Except for
50981676Sjpk 	 * mounts into the global zone itself; restrict these to
50991676Sjpk 	 * read-only.)
51001676Sjpk 	 *
51011676Sjpk 	 * If the requestor is in some other zone, but his label
51021676Sjpk 	 * dominates the server, then allow read-down.
51031676Sjpk 	 *
51041676Sjpk 	 * Otherwise, access is denied.
51051676Sjpk 	 */
51061676Sjpk 	if (blequal(mntlabel, server_sl) ||
51071676Sjpk 	    (crgetzoneid(cr) == GLOBAL_ZONEID &&
51081676Sjpk 	    getpflags(NET_MAC_AWARE, cr) != 0)) {
51091676Sjpk 		if ((mntzone == global_zone) ||
51101676Sjpk 		    !blequal(mntlabel, server_sl))
51111676Sjpk 			retv = -1;		/* read-only */
51121676Sjpk 		else
51131676Sjpk 			retv = 0;		/* access OK */
51141676Sjpk 	} else if (bldominates(mntlabel, server_sl)) {
51151676Sjpk 		retv = -1;			/* read-only */
51161676Sjpk 	} else {
51171676Sjpk 		retv = EACCES;
51181676Sjpk 	}
51191676Sjpk 
51201676Sjpk 	if (tsl != NULL)
51211676Sjpk 		label_rele(tsl);
51221676Sjpk 
51231676Sjpk rel_tpc:
51241676Sjpk 	TPC_RELE(tp);
51251676Sjpk out:
51261676Sjpk 	if (mntzone)
51271676Sjpk 		zone_rele(mntzone);
51281676Sjpk 	label_rele(zlabel);
51291676Sjpk 	return (retv);
51301676Sjpk }
51312712Snn35248 
51322712Snn35248 boolean_t
nfs_has_ctty(void)51332712Snn35248 nfs_has_ctty(void)
51342712Snn35248 {
51352712Snn35248 	boolean_t rv;
51362712Snn35248 	mutex_enter(&curproc->p_splock);
51372712Snn35248 	rv = (curproc->p_sessp->s_vp != NULL);
51382712Snn35248 	mutex_exit(&curproc->p_splock);
51392712Snn35248 	return (rv);
51402712Snn35248 }
51414971Sjarrett 
51424971Sjarrett /*
51437067Smarks  * See if xattr directory to see if it has any generic user attributes
51447067Smarks  */
51457067Smarks int
do_xattr_exists_check(vnode_t * vp,ulong_t * valp,cred_t * cr)51467067Smarks do_xattr_exists_check(vnode_t *vp, ulong_t *valp, cred_t *cr)
51477067Smarks {
51487067Smarks 	struct uio uio;
51497067Smarks 	struct iovec iov;
51507067Smarks 	char *dbuf;
51517067Smarks 	struct dirent64 *dp;
51527067Smarks 	size_t dlen = 8 * 1024;
51537067Smarks 	size_t dbuflen;
51547067Smarks 	int eof = 0;
51557067Smarks 	int error;
51567067Smarks 
51577067Smarks 	*valp = 0;
51587067Smarks 	dbuf = kmem_alloc(dlen, KM_SLEEP);
51597067Smarks 	uio.uio_iov = &iov;
51607067Smarks 	uio.uio_iovcnt = 1;
51617067Smarks 	uio.uio_segflg = UIO_SYSSPACE;
51627067Smarks 	uio.uio_fmode = 0;
51637067Smarks 	uio.uio_extflg = UIO_COPY_CACHED;
51647067Smarks 	uio.uio_loffset = 0;
51657067Smarks 	uio.uio_resid = dlen;
51667067Smarks 	iov.iov_base = dbuf;
51677067Smarks 	iov.iov_len = dlen;
51687067Smarks 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
51697067Smarks 	error = VOP_READDIR(vp, &uio, cr, &eof, NULL, 0);
51707067Smarks 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
51717067Smarks 
51727067Smarks 	dbuflen = dlen - uio.uio_resid;
51737067Smarks 
51747067Smarks 	if (error || dbuflen == 0) {
51757067Smarks 		kmem_free(dbuf, dlen);
51767067Smarks 		return (error);
51777067Smarks 	}
51787067Smarks 
51797067Smarks 	dp = (dirent64_t *)dbuf;
51807067Smarks 
51817067Smarks 	while ((intptr_t)dp < (intptr_t)dbuf + dbuflen) {
51827067Smarks 		if (strcmp(dp->d_name, ".") == 0 ||
51837067Smarks 		    strcmp(dp->d_name, "..") == 0 || strcmp(dp->d_name,
51847067Smarks 		    VIEW_READWRITE) == 0 || strcmp(dp->d_name,
51857067Smarks 		    VIEW_READONLY) == 0) {
51867067Smarks 			dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen);
51877067Smarks 			continue;
51887067Smarks 		}
51897067Smarks 
51907067Smarks 		*valp = 1;
51917067Smarks 		break;
51927067Smarks 	}
51937067Smarks 	kmem_free(dbuf, dlen);
51947067Smarks 	return (0);
51957067Smarks }
5196