1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate /*
28*0Sstevel@tonic-gate  * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T
29*0Sstevel@tonic-gate  *		All Rights Reserved
30*0Sstevel@tonic-gate  */
31*0Sstevel@tonic-gate 
32*0Sstevel@tonic-gate /*
33*0Sstevel@tonic-gate  * Portions of this source code were derived from Berkeley 4.3 BSD
34*0Sstevel@tonic-gate  * under license from the Regents of the University of California.
35*0Sstevel@tonic-gate  */
36*0Sstevel@tonic-gate 
37*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
38*0Sstevel@tonic-gate 
39*0Sstevel@tonic-gate /*
40*0Sstevel@tonic-gate  * Implements a kernel based, client side RPC over Connection Oriented
41*0Sstevel@tonic-gate  * Transports (COTS).
42*0Sstevel@tonic-gate  */
43*0Sstevel@tonic-gate 
44*0Sstevel@tonic-gate /*
45*0Sstevel@tonic-gate  * Much of this file has been re-written to let NFS work better over slow
46*0Sstevel@tonic-gate  * transports. A description follows.
47*0Sstevel@tonic-gate  *
48*0Sstevel@tonic-gate  * One of the annoying things about kRPC/COTS is that it will temporarily
49*0Sstevel@tonic-gate  * create more than one connection between a client and server. This
50*0Sstevel@tonic-gate  * happens because when a connection is made, the end-points entry in the
51*0Sstevel@tonic-gate  * linked list of connections (headed by cm_hd), is removed so that other
52*0Sstevel@tonic-gate  * threads don't mess with it. Went ahead and bit the bullet by keeping
53*0Sstevel@tonic-gate  * the endpoint on the connection list and introducing state bits,
54*0Sstevel@tonic-gate  * condition variables etc. to the connection entry data structure (struct
55*0Sstevel@tonic-gate  * cm_xprt).
56*0Sstevel@tonic-gate  *
57*0Sstevel@tonic-gate  * Here is a summary of the changes to cm-xprt:
58*0Sstevel@tonic-gate  *
59*0Sstevel@tonic-gate  *	x_ctime is the timestamp of when the endpoint was last
60*0Sstevel@tonic-gate  *	connected or disconnected. If an end-point is ever disconnected
61*0Sstevel@tonic-gate  *	or re-connected, then any outstanding RPC request is presumed
62*0Sstevel@tonic-gate  *	lost, telling clnt_cots_kcallit that it needs to re-send the
63*0Sstevel@tonic-gate  *	request, not just wait for the original request's reply to
64*0Sstevel@tonic-gate  *	arrive.
65*0Sstevel@tonic-gate  *
66*0Sstevel@tonic-gate  *	x_thread flag which tells us if a thread is doing a connection attempt.
67*0Sstevel@tonic-gate  *
68*0Sstevel@tonic-gate  *	x_waitdis flag which tells us we are waiting a disconnect ACK.
69*0Sstevel@tonic-gate  *
70*0Sstevel@tonic-gate  *	x_needdis flag which tells us we need to send a T_DISCONN_REQ
71*0Sstevel@tonic-gate  *	to kill the connection.
72*0Sstevel@tonic-gate  *
73*0Sstevel@tonic-gate  *	x_needrel flag which tells us we need to send a T_ORDREL_REQ to
74*0Sstevel@tonic-gate  *	gracefully close the connection.
75*0Sstevel@tonic-gate  *
76*0Sstevel@tonic-gate  *	#defined bitmasks for the all the b_* bits so that more
77*0Sstevel@tonic-gate  *	efficient (and at times less clumsy) masks can be used to
78*0Sstevel@tonic-gate  *	manipulated state in cases where multiple bits have to
79*0Sstevel@tonic-gate  *	set/cleared/checked in the same critical section.
80*0Sstevel@tonic-gate  *
81*0Sstevel@tonic-gate  *	x_conn_cv and x_dis-_cv are new condition variables to let
82*0Sstevel@tonic-gate  *	threads knows when the connection attempt is done, and to let
83*0Sstevel@tonic-gate  *	the connecting thread know when the disconnect handshake is
84*0Sstevel@tonic-gate  *	done.
85*0Sstevel@tonic-gate  *
86*0Sstevel@tonic-gate  * Added the CONN_HOLD() macro so that all reference holds have the same
87*0Sstevel@tonic-gate  * look and feel.
88*0Sstevel@tonic-gate  *
89*0Sstevel@tonic-gate  * In the private (cku_private) portion of the client handle,
90*0Sstevel@tonic-gate  *
91*0Sstevel@tonic-gate  *	cku_flags replaces the cku_sent a boolean. cku_flags keeps
92*0Sstevel@tonic-gate  *	track of whether a request as been sent, and whether the
93*0Sstevel@tonic-gate  *	client's handles call record is on the dispatch list (so that
94*0Sstevel@tonic-gate  *	the reply can be matched by XID to the right client handle).
95*0Sstevel@tonic-gate  *	The idea of CKU_ONQUEUE is that we can exit clnt_cots_kcallit()
96*0Sstevel@tonic-gate  *	and still have the response find the right client handle so
97*0Sstevel@tonic-gate  *	that the retry of CLNT_CALL() gets the result. Testing, found
98*0Sstevel@tonic-gate  *	situations where if the timeout was increased, performance
99*0Sstevel@tonic-gate  *	degraded. This was due to us hitting a window where the thread
100*0Sstevel@tonic-gate  *	was back in rfscall() (probably printing server not responding)
101*0Sstevel@tonic-gate  *	while the response came back but no place to put it.
102*0Sstevel@tonic-gate  *
103*0Sstevel@tonic-gate  *	cku_ctime is just a cache of x_ctime. If they match,
104*0Sstevel@tonic-gate  *	clnt_cots_kcallit() won't to send a retry (unless the maximum
105*0Sstevel@tonic-gate  *	receive count limit as been reached). If the don't match, then
106*0Sstevel@tonic-gate  *	we assume the request has been lost, and a retry of the request
107*0Sstevel@tonic-gate  *	is needed.
108*0Sstevel@tonic-gate  *
109*0Sstevel@tonic-gate  *	cku_recv_attempts counts the number of receive count attempts
110*0Sstevel@tonic-gate  *	after one try is sent on the wire.
111*0Sstevel@tonic-gate  *
112*0Sstevel@tonic-gate  * Added the clnt_delay() routine so that interruptible and
113*0Sstevel@tonic-gate  * noninterruptible delays are possible.
114*0Sstevel@tonic-gate  *
115*0Sstevel@tonic-gate  * CLNT_MIN_TIMEOUT has been bumped to 10 seconds from 3. This is used to
116*0Sstevel@tonic-gate  * control how long the client delays before returned after getting
117*0Sstevel@tonic-gate  * ECONNREFUSED. At 3 seconds, 8 client threads per mount really does bash
118*0Sstevel@tonic-gate  * a server that may be booting and not yet started nfsd.
119*0Sstevel@tonic-gate  *
120*0Sstevel@tonic-gate  * CLNT_MAXRECV_WITHOUT_RETRY is a new macro (value of 3) (with a tunable)
121*0Sstevel@tonic-gate  * Why don't we just wait forever (receive an infinite # of times)?
122*0Sstevel@tonic-gate  * Because the server may have rebooted. More insidious is that some
123*0Sstevel@tonic-gate  * servers (ours) will drop NFS/TCP requests in some cases. This is bad,
124*0Sstevel@tonic-gate  * but it is a reality.
125*0Sstevel@tonic-gate  *
126*0Sstevel@tonic-gate  * The case of a server doing orderly release really messes up the
127*0Sstevel@tonic-gate  * client's recovery, especially if the server's TCP implementation is
128*0Sstevel@tonic-gate  * buggy.  It was found was that the kRPC/COTS client was breaking some
129*0Sstevel@tonic-gate  * TPI rules, such as not waiting for the acknowledgement of a
130*0Sstevel@tonic-gate  * T_DISCON_REQ (hence the added case statements T_ERROR_ACK, T_OK_ACK and
131*0Sstevel@tonic-gate  * T_DISCON_REQ in clnt_dispatch_notifyall()).
132*0Sstevel@tonic-gate  *
133*0Sstevel@tonic-gate  * One of things that we've seen is that a kRPC TCP endpoint goes into
134*0Sstevel@tonic-gate  * TIMEWAIT and a thus a reconnect takes a long time to satisfy because
135*0Sstevel@tonic-gate  * that the TIMEWAIT state takes a while to finish.  If a server sends a
136*0Sstevel@tonic-gate  * T_ORDREL_IND, there is little point in an RPC client doing a
137*0Sstevel@tonic-gate  * T_ORDREL_REQ, because the RPC request isn't going to make it (the
138*0Sstevel@tonic-gate  * server is saying that it won't accept any more data). So kRPC was
139*0Sstevel@tonic-gate  * changed to send a T_DISCON_REQ when we get a T_ORDREL_IND. So now the
140*0Sstevel@tonic-gate  * connection skips the TIMEWAIT state and goes straight to a bound state
141*0Sstevel@tonic-gate  * that kRPC can quickly switch to connected.
142*0Sstevel@tonic-gate  *
143*0Sstevel@tonic-gate  * Code that issues TPI request must use waitforack() to wait for the
144*0Sstevel@tonic-gate  * corresponding ack (assuming there is one) in any future modifications.
145*0Sstevel@tonic-gate  * This works around problems that may be introduced by breaking TPI rules
146*0Sstevel@tonic-gate  * (by submitting new calls before earlier requests have been acked) in the
147*0Sstevel@tonic-gate  * case of a signal or other early return.  waitforack() depends on
148*0Sstevel@tonic-gate  * clnt_dispatch_notifyconn() to issue the wakeup when the ack
149*0Sstevel@tonic-gate  * arrives, so adding new TPI calls may require corresponding changes
150*0Sstevel@tonic-gate  * to clnt_dispatch_notifyconn(). Presently, the timeout period is based on
151*0Sstevel@tonic-gate  * CLNT_MIN_TIMEOUT which is 10 seconds. If you modify this value, be sure
152*0Sstevel@tonic-gate  * not to set it too low or TPI ACKS will be lost.
153*0Sstevel@tonic-gate  */
154*0Sstevel@tonic-gate 
155*0Sstevel@tonic-gate #include <sys/param.h>
156*0Sstevel@tonic-gate #include <sys/types.h>
157*0Sstevel@tonic-gate #include <sys/user.h>
158*0Sstevel@tonic-gate #include <sys/systm.h>
159*0Sstevel@tonic-gate #include <sys/sysmacros.h>
160*0Sstevel@tonic-gate #include <sys/proc.h>
161*0Sstevel@tonic-gate #include <sys/socket.h>
162*0Sstevel@tonic-gate #include <sys/file.h>
163*0Sstevel@tonic-gate #include <sys/stream.h>
164*0Sstevel@tonic-gate #include <sys/strsubr.h>
165*0Sstevel@tonic-gate #include <sys/stropts.h>
166*0Sstevel@tonic-gate #include <sys/strsun.h>
167*0Sstevel@tonic-gate #include <sys/timod.h>
168*0Sstevel@tonic-gate #include <sys/tiuser.h>
169*0Sstevel@tonic-gate #include <sys/tihdr.h>
170*0Sstevel@tonic-gate #include <sys/t_kuser.h>
171*0Sstevel@tonic-gate #include <sys/fcntl.h>
172*0Sstevel@tonic-gate #include <sys/errno.h>
173*0Sstevel@tonic-gate #include <sys/kmem.h>
174*0Sstevel@tonic-gate #include <sys/debug.h>
175*0Sstevel@tonic-gate #include <sys/systm.h>
176*0Sstevel@tonic-gate #include <sys/kstat.h>
177*0Sstevel@tonic-gate #include <sys/t_lock.h>
178*0Sstevel@tonic-gate #include <sys/ddi.h>
179*0Sstevel@tonic-gate #include <sys/cmn_err.h>
180*0Sstevel@tonic-gate #include <sys/time.h>
181*0Sstevel@tonic-gate #include <sys/isa_defs.h>
182*0Sstevel@tonic-gate #include <sys/callb.h>
183*0Sstevel@tonic-gate #include <sys/sunddi.h>
184*0Sstevel@tonic-gate #include <sys/atomic.h>
185*0Sstevel@tonic-gate 
186*0Sstevel@tonic-gate #include <netinet/in.h>
187*0Sstevel@tonic-gate #include <netinet/tcp.h>
188*0Sstevel@tonic-gate 
189*0Sstevel@tonic-gate #include <rpc/types.h>
190*0Sstevel@tonic-gate #include <rpc/xdr.h>
191*0Sstevel@tonic-gate #include <rpc/auth.h>
192*0Sstevel@tonic-gate #include <rpc/clnt.h>
193*0Sstevel@tonic-gate #include <rpc/rpc_msg.h>
194*0Sstevel@tonic-gate 
195*0Sstevel@tonic-gate #define	COTS_DEFAULT_ALLOCSIZE	2048
196*0Sstevel@tonic-gate 
197*0Sstevel@tonic-gate #define	WIRE_HDR_SIZE	20	/* serialized call header, sans proc number */
198*0Sstevel@tonic-gate #define	MSG_OFFSET	128	/* offset of call into the mblk */
199*0Sstevel@tonic-gate 
200*0Sstevel@tonic-gate const char *kinet_ntop6(uchar_t *, char *, size_t);
201*0Sstevel@tonic-gate 
202*0Sstevel@tonic-gate static int	clnt_cots_ksettimers(CLIENT *, struct rpc_timers *,
203*0Sstevel@tonic-gate     struct rpc_timers *, int, void(*)(int, int, caddr_t), caddr_t, uint32_t);
204*0Sstevel@tonic-gate static enum clnt_stat	clnt_cots_kcallit(CLIENT *, rpcproc_t, xdrproc_t,
205*0Sstevel@tonic-gate     caddr_t, xdrproc_t, caddr_t, struct timeval);
206*0Sstevel@tonic-gate static void	clnt_cots_kabort(CLIENT *);
207*0Sstevel@tonic-gate static void	clnt_cots_kerror(CLIENT *, struct rpc_err *);
208*0Sstevel@tonic-gate static bool_t	clnt_cots_kfreeres(CLIENT *, xdrproc_t, caddr_t);
209*0Sstevel@tonic-gate static void	clnt_cots_kdestroy(CLIENT *);
210*0Sstevel@tonic-gate static bool_t	clnt_cots_kcontrol(CLIENT *, int, char *);
211*0Sstevel@tonic-gate 
212*0Sstevel@tonic-gate 
213*0Sstevel@tonic-gate /* List of transports managed by the connection manager. */
214*0Sstevel@tonic-gate struct cm_xprt {
215*0Sstevel@tonic-gate 	TIUSER		*x_tiptr;	/* transport handle */
216*0Sstevel@tonic-gate 	queue_t		*x_wq;		/* send queue */
217*0Sstevel@tonic-gate 	clock_t		x_time;		/* last time we handed this xprt out */
218*0Sstevel@tonic-gate 	clock_t		x_ctime;	/* time we went to CONNECTED */
219*0Sstevel@tonic-gate 	int		x_tidu_size;    /* TIDU size of this transport */
220*0Sstevel@tonic-gate 	union {
221*0Sstevel@tonic-gate 	    struct {
222*0Sstevel@tonic-gate 		unsigned int
223*0Sstevel@tonic-gate #ifdef	_BIT_FIELDS_HTOL
224*0Sstevel@tonic-gate 		b_closing:	1,	/* we've sent a ord rel on this conn */
225*0Sstevel@tonic-gate 		b_dead:		1,	/* transport is closed or disconn */
226*0Sstevel@tonic-gate 		b_doomed:	1,	/* too many conns, let this go idle */
227*0Sstevel@tonic-gate 		b_connected:	1,	/* this connection is connected */
228*0Sstevel@tonic-gate 
229*0Sstevel@tonic-gate 		b_ordrel:	1,	/* do an orderly release? */
230*0Sstevel@tonic-gate 		b_thread:	1,	/* thread doing connect */
231*0Sstevel@tonic-gate 		b_waitdis:	1,	/* waiting for disconnect ACK */
232*0Sstevel@tonic-gate 		b_needdis:	1,	/* need T_DISCON_REQ */
233*0Sstevel@tonic-gate 
234*0Sstevel@tonic-gate 		b_needrel:	1,	/* need T_ORDREL_REQ */
235*0Sstevel@tonic-gate 		b_early_disc:	1,	/* got a T_ORDREL_IND or T_DISCON_IND */
236*0Sstevel@tonic-gate 					/* disconnect during connect */
237*0Sstevel@tonic-gate 
238*0Sstevel@tonic-gate 		b_pad:		22;
239*0Sstevel@tonic-gate 
240*0Sstevel@tonic-gate #endif
241*0Sstevel@tonic-gate 
242*0Sstevel@tonic-gate #ifdef	_BIT_FIELDS_LTOH
243*0Sstevel@tonic-gate 		b_pad:		22,
244*0Sstevel@tonic-gate 
245*0Sstevel@tonic-gate 		b_early_disc:	1,	/* got a T_ORDREL_IND or T_DISCON_IND */
246*0Sstevel@tonic-gate 					/* disconnect during connect */
247*0Sstevel@tonic-gate 		b_needrel:	1,	/* need T_ORDREL_REQ */
248*0Sstevel@tonic-gate 
249*0Sstevel@tonic-gate 		b_needdis:	1,	/* need T_DISCON_REQ */
250*0Sstevel@tonic-gate 		b_waitdis:	1,	/* waiting for disconnect ACK */
251*0Sstevel@tonic-gate 		b_thread:	1,	/* thread doing connect */
252*0Sstevel@tonic-gate 		b_ordrel:	1,	/* do an orderly release? */
253*0Sstevel@tonic-gate 
254*0Sstevel@tonic-gate 		b_connected:	1,	/* this connection is connected */
255*0Sstevel@tonic-gate 		b_doomed:	1,	/* too many conns, let this go idle */
256*0Sstevel@tonic-gate 		b_dead:		1,	/* transport is closed or disconn */
257*0Sstevel@tonic-gate 		b_closing:	1;	/* we've sent a ord rel on this conn */
258*0Sstevel@tonic-gate #endif
259*0Sstevel@tonic-gate 	    } bit;	    unsigned int word;
260*0Sstevel@tonic-gate 
261*0Sstevel@tonic-gate #define	x_closing	x_state.bit.b_closing
262*0Sstevel@tonic-gate #define	x_dead		x_state.bit.b_dead
263*0Sstevel@tonic-gate #define	x_doomed	x_state.bit.b_doomed
264*0Sstevel@tonic-gate #define	x_connected	x_state.bit.b_connected
265*0Sstevel@tonic-gate 
266*0Sstevel@tonic-gate #define	x_ordrel	x_state.bit.b_ordrel
267*0Sstevel@tonic-gate #define	x_thread	x_state.bit.b_thread
268*0Sstevel@tonic-gate #define	x_waitdis	x_state.bit.b_waitdis
269*0Sstevel@tonic-gate #define	x_needdis	x_state.bit.b_needdis
270*0Sstevel@tonic-gate 
271*0Sstevel@tonic-gate #define	x_needrel	x_state.bit.b_needrel
272*0Sstevel@tonic-gate #define	x_early_disc    x_state.bit.b_early_disc
273*0Sstevel@tonic-gate 
274*0Sstevel@tonic-gate #define	x_state_flags	x_state.word
275*0Sstevel@tonic-gate 
276*0Sstevel@tonic-gate #define	X_CLOSING	0x80000000
277*0Sstevel@tonic-gate #define	X_DEAD		0x40000000
278*0Sstevel@tonic-gate #define	X_DOOMED	0x20000000
279*0Sstevel@tonic-gate #define	X_CONNECTED	0x10000000
280*0Sstevel@tonic-gate 
281*0Sstevel@tonic-gate #define	X_ORDREL	0x08000000
282*0Sstevel@tonic-gate #define	X_THREAD	0x04000000
283*0Sstevel@tonic-gate #define	X_WAITDIS	0x02000000
284*0Sstevel@tonic-gate #define	X_NEEDDIS	0x01000000
285*0Sstevel@tonic-gate 
286*0Sstevel@tonic-gate #define	X_NEEDREL	0x00800000
287*0Sstevel@tonic-gate #define	X_EARLYDISC	0x00400000
288*0Sstevel@tonic-gate 
289*0Sstevel@tonic-gate #define	X_BADSTATES	(X_CLOSING | X_DEAD | X_DOOMED)
290*0Sstevel@tonic-gate 
291*0Sstevel@tonic-gate 	}		x_state;
292*0Sstevel@tonic-gate 	int		x_ref;		/* number of users of this xprt */
293*0Sstevel@tonic-gate 	int		x_family;	/* address family of transport */
294*0Sstevel@tonic-gate 	dev_t		x_rdev;		/* device number of transport */
295*0Sstevel@tonic-gate 	struct cm_xprt	*x_next;
296*0Sstevel@tonic-gate 
297*0Sstevel@tonic-gate 	struct netbuf	x_server;	/* destination address */
298*0Sstevel@tonic-gate 	struct netbuf	x_src;		/* src address (for retries) */
299*0Sstevel@tonic-gate 	kmutex_t	x_lock;		/* lock on this entry */
300*0Sstevel@tonic-gate 	kcondvar_t	x_cv;		/* to signal when can be closed */
301*0Sstevel@tonic-gate 	kcondvar_t	x_conn_cv;	/* to signal when connection attempt */
302*0Sstevel@tonic-gate 					/* is complete */
303*0Sstevel@tonic-gate 	kstat_t		*x_ksp;
304*0Sstevel@tonic-gate 
305*0Sstevel@tonic-gate 	kcondvar_t	x_dis_cv;	/* to signal when disconnect attempt */
306*0Sstevel@tonic-gate 					/* is complete */
307*0Sstevel@tonic-gate 	zoneid_t	x_zoneid;	/* zone this xprt belongs to */
308*0Sstevel@tonic-gate };
309*0Sstevel@tonic-gate 
310*0Sstevel@tonic-gate typedef struct cm_kstat_xprt {
311*0Sstevel@tonic-gate 	kstat_named_t	x_wq;
312*0Sstevel@tonic-gate 	kstat_named_t	x_server;
313*0Sstevel@tonic-gate 	kstat_named_t	x_family;
314*0Sstevel@tonic-gate 	kstat_named_t	x_rdev;
315*0Sstevel@tonic-gate 	kstat_named_t	x_time;
316*0Sstevel@tonic-gate 	kstat_named_t	x_state;
317*0Sstevel@tonic-gate 	kstat_named_t	x_ref;
318*0Sstevel@tonic-gate 	kstat_named_t	x_port;
319*0Sstevel@tonic-gate } cm_kstat_xprt_t;
320*0Sstevel@tonic-gate 
321*0Sstevel@tonic-gate static cm_kstat_xprt_t cm_kstat_template = {
322*0Sstevel@tonic-gate 	{ "write_queue", KSTAT_DATA_UINT32 },
323*0Sstevel@tonic-gate 	{ "server",	KSTAT_DATA_STRING },
324*0Sstevel@tonic-gate 	{ "addr_family", KSTAT_DATA_UINT32 },
325*0Sstevel@tonic-gate 	{ "device",	KSTAT_DATA_UINT32 },
326*0Sstevel@tonic-gate 	{ "time_stamp",	KSTAT_DATA_UINT32 },
327*0Sstevel@tonic-gate 	{ "status",	KSTAT_DATA_UINT32 },
328*0Sstevel@tonic-gate 	{ "ref_count",	KSTAT_DATA_INT32 },
329*0Sstevel@tonic-gate 	{ "port",	KSTAT_DATA_UINT32 },
330*0Sstevel@tonic-gate };
331*0Sstevel@tonic-gate 
332*0Sstevel@tonic-gate /*
333*0Sstevel@tonic-gate  * The inverse of this is connmgr_release().
334*0Sstevel@tonic-gate  */
335*0Sstevel@tonic-gate #define	CONN_HOLD(Cm_entry)	{\
336*0Sstevel@tonic-gate 	mutex_enter(&(Cm_entry)->x_lock);	\
337*0Sstevel@tonic-gate 	(Cm_entry)->x_ref++;	\
338*0Sstevel@tonic-gate 	mutex_exit(&(Cm_entry)->x_lock);	\
339*0Sstevel@tonic-gate }
340*0Sstevel@tonic-gate 
341*0Sstevel@tonic-gate 
342*0Sstevel@tonic-gate /*
343*0Sstevel@tonic-gate  * Private data per rpc handle.  This structure is allocated by
344*0Sstevel@tonic-gate  * clnt_cots_kcreate, and freed by clnt_cots_kdestroy.
345*0Sstevel@tonic-gate  */
346*0Sstevel@tonic-gate typedef struct cku_private_s {
347*0Sstevel@tonic-gate 	CLIENT			cku_client;	/* client handle */
348*0Sstevel@tonic-gate 	calllist_t		cku_call;	/* for dispatching calls */
349*0Sstevel@tonic-gate 	struct rpc_err		cku_err;	/* error status */
350*0Sstevel@tonic-gate 
351*0Sstevel@tonic-gate 	struct netbuf		cku_srcaddr;	/* source address for retries */
352*0Sstevel@tonic-gate 	int			cku_addrfmly;  /* for binding port */
353*0Sstevel@tonic-gate 	struct netbuf		cku_addr;	/* remote address */
354*0Sstevel@tonic-gate 	dev_t			cku_device;	/* device to use */
355*0Sstevel@tonic-gate 	uint_t			cku_flags;
356*0Sstevel@tonic-gate #define	CKU_ONQUEUE		0x1
357*0Sstevel@tonic-gate #define	CKU_SENT		0x2
358*0Sstevel@tonic-gate 
359*0Sstevel@tonic-gate 	bool_t			cku_progress;	/* for CLSET_PROGRESS */
360*0Sstevel@tonic-gate 	uint32_t		cku_xid;	/* current XID */
361*0Sstevel@tonic-gate 	clock_t			cku_ctime;	/* time stamp of when */
362*0Sstevel@tonic-gate 						/* connection was created */
363*0Sstevel@tonic-gate 	uint_t			cku_recv_attempts;
364*0Sstevel@tonic-gate 	XDR			cku_outxdr;	/* xdr routine for output */
365*0Sstevel@tonic-gate 	XDR			cku_inxdr;	/* xdr routine for input */
366*0Sstevel@tonic-gate 	char			cku_rpchdr[WIRE_HDR_SIZE + 4];
367*0Sstevel@tonic-gate 						/* pre-serialized rpc header */
368*0Sstevel@tonic-gate 
369*0Sstevel@tonic-gate 	uint_t			cku_outbuflen;	/* default output mblk length */
370*0Sstevel@tonic-gate 	struct cred		*cku_cred;	/* credentials */
371*0Sstevel@tonic-gate 	bool_t			cku_nodelayonerr;
372*0Sstevel@tonic-gate 						/* for CLSET_NODELAYONERR */
373*0Sstevel@tonic-gate 	int			cku_useresvport; /* Use reserved port */
374*0Sstevel@tonic-gate 	struct rpc_cots_client	*cku_stats;	/* stats for zone */
375*0Sstevel@tonic-gate } cku_private_t;
376*0Sstevel@tonic-gate 
377*0Sstevel@tonic-gate static struct cm_xprt *connmgr_wrapconnect(struct cm_xprt *,
378*0Sstevel@tonic-gate 	const struct timeval *, struct netbuf *, int, struct netbuf *,
379*0Sstevel@tonic-gate 	struct rpc_err *, bool_t, bool_t);
380*0Sstevel@tonic-gate 
381*0Sstevel@tonic-gate static bool_t	connmgr_connect(struct cm_xprt *, queue_t *, struct netbuf *,
382*0Sstevel@tonic-gate 				int, calllist_t *, int *, bool_t reconnect,
383*0Sstevel@tonic-gate 				const struct timeval *, bool_t);
384*0Sstevel@tonic-gate 
385*0Sstevel@tonic-gate static bool_t	connmgr_setopt(queue_t *, int, int, calllist_t *);
386*0Sstevel@tonic-gate static void	connmgr_sndrel(struct cm_xprt *);
387*0Sstevel@tonic-gate static void	connmgr_snddis(struct cm_xprt *);
388*0Sstevel@tonic-gate static void	connmgr_close(struct cm_xprt *);
389*0Sstevel@tonic-gate static void	connmgr_release(struct cm_xprt *);
390*0Sstevel@tonic-gate static struct cm_xprt *connmgr_wrapget(struct netbuf *, const struct timeval *,
391*0Sstevel@tonic-gate 	cku_private_t *);
392*0Sstevel@tonic-gate 
393*0Sstevel@tonic-gate static struct cm_xprt *connmgr_get(struct netbuf *, const struct timeval *,
394*0Sstevel@tonic-gate 	struct netbuf *, int, struct netbuf *, struct rpc_err *, dev_t,
395*0Sstevel@tonic-gate 	bool_t, int);
396*0Sstevel@tonic-gate 
397*0Sstevel@tonic-gate static void connmgr_cancelconn(struct cm_xprt *);
398*0Sstevel@tonic-gate static enum clnt_stat connmgr_cwait(struct cm_xprt *, const struct timeval *,
399*0Sstevel@tonic-gate 	bool_t);
400*0Sstevel@tonic-gate static void connmgr_dis_and_wait(struct cm_xprt *);
401*0Sstevel@tonic-gate 
402*0Sstevel@tonic-gate static void	clnt_dispatch_send(queue_t *, mblk_t *, calllist_t *, uint_t,
403*0Sstevel@tonic-gate 					uint_t);
404*0Sstevel@tonic-gate 
405*0Sstevel@tonic-gate static int clnt_delay(clock_t, bool_t);
406*0Sstevel@tonic-gate 
407*0Sstevel@tonic-gate static int waitforack(calllist_t *, t_scalar_t, const struct timeval *, bool_t);
408*0Sstevel@tonic-gate 
409*0Sstevel@tonic-gate /*
410*0Sstevel@tonic-gate  * Operations vector for TCP/IP based RPC
411*0Sstevel@tonic-gate  */
412*0Sstevel@tonic-gate static struct clnt_ops tcp_ops = {
413*0Sstevel@tonic-gate 	clnt_cots_kcallit,	/* do rpc call */
414*0Sstevel@tonic-gate 	clnt_cots_kabort,	/* abort call */
415*0Sstevel@tonic-gate 	clnt_cots_kerror,	/* return error status */
416*0Sstevel@tonic-gate 	clnt_cots_kfreeres,	/* free results */
417*0Sstevel@tonic-gate 	clnt_cots_kdestroy,	/* destroy rpc handle */
418*0Sstevel@tonic-gate 	clnt_cots_kcontrol,	/* the ioctl() of rpc */
419*0Sstevel@tonic-gate 	clnt_cots_ksettimers,	/* set retry timers */
420*0Sstevel@tonic-gate };
421*0Sstevel@tonic-gate 
422*0Sstevel@tonic-gate static int rpc_kstat_instance = 0;  /* keeps the current instance */
423*0Sstevel@tonic-gate 				/* number for the next kstat_create */
424*0Sstevel@tonic-gate 
425*0Sstevel@tonic-gate static struct cm_xprt *cm_hd = NULL;
426*0Sstevel@tonic-gate static kmutex_t connmgr_lock;	/* for connection mngr's list of transports */
427*0Sstevel@tonic-gate 
428*0Sstevel@tonic-gate extern kmutex_t clnt_max_msg_lock;
429*0Sstevel@tonic-gate 
430*0Sstevel@tonic-gate static calllist_t *clnt_pending = NULL;
431*0Sstevel@tonic-gate extern kmutex_t clnt_pending_lock;
432*0Sstevel@tonic-gate 
433*0Sstevel@tonic-gate static int clnt_cots_hash_size = DEFAULT_HASH_SIZE;
434*0Sstevel@tonic-gate 
435*0Sstevel@tonic-gate static call_table_t *cots_call_ht;
436*0Sstevel@tonic-gate 
437*0Sstevel@tonic-gate static const struct rpc_cots_client {
438*0Sstevel@tonic-gate 	kstat_named_t	rccalls;
439*0Sstevel@tonic-gate 	kstat_named_t	rcbadcalls;
440*0Sstevel@tonic-gate 	kstat_named_t	rcbadxids;
441*0Sstevel@tonic-gate 	kstat_named_t	rctimeouts;
442*0Sstevel@tonic-gate 	kstat_named_t	rcnewcreds;
443*0Sstevel@tonic-gate 	kstat_named_t	rcbadverfs;
444*0Sstevel@tonic-gate 	kstat_named_t	rctimers;
445*0Sstevel@tonic-gate 	kstat_named_t	rccantconn;
446*0Sstevel@tonic-gate 	kstat_named_t	rcnomem;
447*0Sstevel@tonic-gate 	kstat_named_t	rcintrs;
448*0Sstevel@tonic-gate } cots_rcstat_tmpl = {
449*0Sstevel@tonic-gate 	{ "calls",	KSTAT_DATA_UINT64 },
450*0Sstevel@tonic-gate 	{ "badcalls",	KSTAT_DATA_UINT64 },
451*0Sstevel@tonic-gate 	{ "badxids",	KSTAT_DATA_UINT64 },
452*0Sstevel@tonic-gate 	{ "timeouts",	KSTAT_DATA_UINT64 },
453*0Sstevel@tonic-gate 	{ "newcreds",	KSTAT_DATA_UINT64 },
454*0Sstevel@tonic-gate 	{ "badverfs",	KSTAT_DATA_UINT64 },
455*0Sstevel@tonic-gate 	{ "timers",	KSTAT_DATA_UINT64 },
456*0Sstevel@tonic-gate 	{ "cantconn",	KSTAT_DATA_UINT64 },
457*0Sstevel@tonic-gate 	{ "nomem",	KSTAT_DATA_UINT64 },
458*0Sstevel@tonic-gate 	{ "interrupts", KSTAT_DATA_UINT64 }
459*0Sstevel@tonic-gate };
460*0Sstevel@tonic-gate 
461*0Sstevel@tonic-gate #define	COTSRCSTAT_INCR(p, x)	\
462*0Sstevel@tonic-gate 	atomic_add_64(&(p)->x.value.ui64, 1)
463*0Sstevel@tonic-gate 
464*0Sstevel@tonic-gate #define	CLNT_MAX_CONNS	1	/* concurrent connections between clnt/srvr */
465*0Sstevel@tonic-gate static int clnt_max_conns = CLNT_MAX_CONNS;
466*0Sstevel@tonic-gate 
467*0Sstevel@tonic-gate #define	CLNT_MIN_TIMEOUT	10	/* seconds to wait after we get a */
468*0Sstevel@tonic-gate 					/* connection reset */
469*0Sstevel@tonic-gate #define	CLNT_MIN_CONNTIMEOUT	5	/* seconds to wait for a connection */
470*0Sstevel@tonic-gate 
471*0Sstevel@tonic-gate 
472*0Sstevel@tonic-gate static int clnt_cots_min_tout = CLNT_MIN_TIMEOUT;
473*0Sstevel@tonic-gate static int clnt_cots_min_conntout = CLNT_MIN_CONNTIMEOUT;
474*0Sstevel@tonic-gate 
475*0Sstevel@tonic-gate /*
476*0Sstevel@tonic-gate  * Limit the number of times we will attempt to receive a reply without
477*0Sstevel@tonic-gate  * re-sending a response.
478*0Sstevel@tonic-gate  */
479*0Sstevel@tonic-gate #define	CLNT_MAXRECV_WITHOUT_RETRY	3
480*0Sstevel@tonic-gate static uint_t clnt_cots_maxrecv	= CLNT_MAXRECV_WITHOUT_RETRY;
481*0Sstevel@tonic-gate 
482*0Sstevel@tonic-gate uint_t *clnt_max_msg_sizep;
483*0Sstevel@tonic-gate void (*clnt_stop_idle)(queue_t *wq);
484*0Sstevel@tonic-gate 
485*0Sstevel@tonic-gate #define	ptoh(p)		(&((p)->cku_client))
486*0Sstevel@tonic-gate #define	htop(h)		((cku_private_t *)((h)->cl_private))
487*0Sstevel@tonic-gate 
488*0Sstevel@tonic-gate /*
489*0Sstevel@tonic-gate  * Times to retry
490*0Sstevel@tonic-gate  */
491*0Sstevel@tonic-gate #define	REFRESHES	2	/* authentication refreshes */
492*0Sstevel@tonic-gate 
493*0Sstevel@tonic-gate static int clnt_cots_do_bindresvport = 1; /* bind to reserved port */
494*0Sstevel@tonic-gate 
495*0Sstevel@tonic-gate static zone_key_t zone_cots_key;
496*0Sstevel@tonic-gate 
497*0Sstevel@tonic-gate /*
498*0Sstevel@tonic-gate  * We need to do this after all kernel threads in the zone have exited.
499*0Sstevel@tonic-gate  */
500*0Sstevel@tonic-gate /* ARGSUSED */
501*0Sstevel@tonic-gate static void
502*0Sstevel@tonic-gate clnt_zone_destroy(zoneid_t zoneid, void *unused)
503*0Sstevel@tonic-gate {
504*0Sstevel@tonic-gate 	struct cm_xprt **cmp;
505*0Sstevel@tonic-gate 	struct cm_xprt *cm_entry;
506*0Sstevel@tonic-gate 	struct cm_xprt *freelist = NULL;
507*0Sstevel@tonic-gate 
508*0Sstevel@tonic-gate 	mutex_enter(&connmgr_lock);
509*0Sstevel@tonic-gate 	cmp = &cm_hd;
510*0Sstevel@tonic-gate 	while ((cm_entry = *cmp) != NULL) {
511*0Sstevel@tonic-gate 		if (cm_entry->x_zoneid == zoneid) {
512*0Sstevel@tonic-gate 			*cmp = cm_entry->x_next;
513*0Sstevel@tonic-gate 			cm_entry->x_next = freelist;
514*0Sstevel@tonic-gate 			freelist = cm_entry;
515*0Sstevel@tonic-gate 		} else {
516*0Sstevel@tonic-gate 			cmp = &cm_entry->x_next;
517*0Sstevel@tonic-gate 		}
518*0Sstevel@tonic-gate 	}
519*0Sstevel@tonic-gate 	mutex_exit(&connmgr_lock);
520*0Sstevel@tonic-gate 	while ((cm_entry = freelist) != NULL) {
521*0Sstevel@tonic-gate 		freelist = cm_entry->x_next;
522*0Sstevel@tonic-gate 		connmgr_close(cm_entry);
523*0Sstevel@tonic-gate 	}
524*0Sstevel@tonic-gate }
525*0Sstevel@tonic-gate 
526*0Sstevel@tonic-gate int
527*0Sstevel@tonic-gate clnt_cots_kcreate(dev_t dev, struct netbuf *addr, int family, rpcprog_t prog,
528*0Sstevel@tonic-gate 	rpcvers_t vers, uint_t max_msgsize, cred_t *cred, CLIENT **ncl)
529*0Sstevel@tonic-gate {
530*0Sstevel@tonic-gate 	CLIENT *h;
531*0Sstevel@tonic-gate 	cku_private_t *p;
532*0Sstevel@tonic-gate 	struct rpc_msg call_msg;
533*0Sstevel@tonic-gate 	struct rpcstat *rpcstat;
534*0Sstevel@tonic-gate 
535*0Sstevel@tonic-gate 	RPCLOG(8, "clnt_cots_kcreate: prog %u\n", prog);
536*0Sstevel@tonic-gate 
537*0Sstevel@tonic-gate 	rpcstat = zone_getspecific(rpcstat_zone_key, curproc->p_zone);
538*0Sstevel@tonic-gate 	ASSERT(rpcstat != NULL);
539*0Sstevel@tonic-gate 
540*0Sstevel@tonic-gate 	/* Allocate and intialize the client handle. */
541*0Sstevel@tonic-gate 	p = kmem_zalloc(sizeof (*p), KM_SLEEP);
542*0Sstevel@tonic-gate 
543*0Sstevel@tonic-gate 	h = ptoh(p);
544*0Sstevel@tonic-gate 
545*0Sstevel@tonic-gate 	h->cl_private = (caddr_t)p;
546*0Sstevel@tonic-gate 	h->cl_auth = authkern_create();
547*0Sstevel@tonic-gate 	h->cl_ops = &tcp_ops;
548*0Sstevel@tonic-gate 
549*0Sstevel@tonic-gate 	cv_init(&p->cku_call.call_cv, NULL, CV_DEFAULT, NULL);
550*0Sstevel@tonic-gate 	mutex_init(&p->cku_call.call_lock, NULL, MUTEX_DEFAULT, NULL);
551*0Sstevel@tonic-gate 
552*0Sstevel@tonic-gate 	/*
553*0Sstevel@tonic-gate 	 * If the current sanity check size in rpcmod is smaller
554*0Sstevel@tonic-gate 	 * than the size needed, then increase the sanity check.
555*0Sstevel@tonic-gate 	 */
556*0Sstevel@tonic-gate 	if (max_msgsize != 0 && clnt_max_msg_sizep != NULL &&
557*0Sstevel@tonic-gate 	    max_msgsize > *clnt_max_msg_sizep) {
558*0Sstevel@tonic-gate 		mutex_enter(&clnt_max_msg_lock);
559*0Sstevel@tonic-gate 		if (max_msgsize > *clnt_max_msg_sizep)
560*0Sstevel@tonic-gate 			*clnt_max_msg_sizep = max_msgsize;
561*0Sstevel@tonic-gate 		mutex_exit(&clnt_max_msg_lock);
562*0Sstevel@tonic-gate 	}
563*0Sstevel@tonic-gate 
564*0Sstevel@tonic-gate 	p->cku_outbuflen = COTS_DEFAULT_ALLOCSIZE;
565*0Sstevel@tonic-gate 
566*0Sstevel@tonic-gate 	/* Preserialize the call message header */
567*0Sstevel@tonic-gate 
568*0Sstevel@tonic-gate 	call_msg.rm_xid = 0;
569*0Sstevel@tonic-gate 	call_msg.rm_direction = CALL;
570*0Sstevel@tonic-gate 	call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION;
571*0Sstevel@tonic-gate 	call_msg.rm_call.cb_prog = prog;
572*0Sstevel@tonic-gate 	call_msg.rm_call.cb_vers = vers;
573*0Sstevel@tonic-gate 
574*0Sstevel@tonic-gate 	xdrmem_create(&p->cku_outxdr, p->cku_rpchdr, WIRE_HDR_SIZE, XDR_ENCODE);
575*0Sstevel@tonic-gate 
576*0Sstevel@tonic-gate 	if (!xdr_callhdr(&p->cku_outxdr, &call_msg)) {
577*0Sstevel@tonic-gate 		RPCLOG0(1, "clnt_cots_kcreate - Fatal header serialization "
578*0Sstevel@tonic-gate 		    "error\n");
579*0Sstevel@tonic-gate 		auth_destroy(h->cl_auth);
580*0Sstevel@tonic-gate 		kmem_free(p, sizeof (cku_private_t));
581*0Sstevel@tonic-gate 		RPCLOG0(1, "clnt_cots_kcreate: create failed error EINVAL\n");
582*0Sstevel@tonic-gate 		return (EINVAL);		/* XXX */
583*0Sstevel@tonic-gate 	}
584*0Sstevel@tonic-gate 
585*0Sstevel@tonic-gate 	/*
586*0Sstevel@tonic-gate 	 * The zalloc initialized the fields below.
587*0Sstevel@tonic-gate 	 * p->cku_xid = 0;
588*0Sstevel@tonic-gate 	 * p->cku_flags = 0;
589*0Sstevel@tonic-gate 	 * p->cku_srcaddr.len = 0;
590*0Sstevel@tonic-gate 	 * p->cku_srcaddr.maxlen = 0;
591*0Sstevel@tonic-gate 	 */
592*0Sstevel@tonic-gate 
593*0Sstevel@tonic-gate 	p->cku_cred = cred;
594*0Sstevel@tonic-gate 	p->cku_device = dev;
595*0Sstevel@tonic-gate 	p->cku_addrfmly = family;
596*0Sstevel@tonic-gate 	p->cku_addr.buf = kmem_zalloc(addr->maxlen, KM_SLEEP);
597*0Sstevel@tonic-gate 	p->cku_addr.maxlen = addr->maxlen;
598*0Sstevel@tonic-gate 	p->cku_addr.len = addr->len;
599*0Sstevel@tonic-gate 	bcopy(addr->buf, p->cku_addr.buf, addr->len);
600*0Sstevel@tonic-gate 	p->cku_stats = rpcstat->rpc_cots_client;
601*0Sstevel@tonic-gate 	p->cku_useresvport = -1; /* value is has not been set */
602*0Sstevel@tonic-gate 
603*0Sstevel@tonic-gate 	*ncl = h;
604*0Sstevel@tonic-gate 	return (0);
605*0Sstevel@tonic-gate }
606*0Sstevel@tonic-gate 
607*0Sstevel@tonic-gate /*ARGSUSED*/
608*0Sstevel@tonic-gate static void
609*0Sstevel@tonic-gate clnt_cots_kabort(CLIENT *h)
610*0Sstevel@tonic-gate {
611*0Sstevel@tonic-gate }
612*0Sstevel@tonic-gate 
613*0Sstevel@tonic-gate /*
614*0Sstevel@tonic-gate  * Return error info on this handle.
615*0Sstevel@tonic-gate  */
616*0Sstevel@tonic-gate static void
617*0Sstevel@tonic-gate clnt_cots_kerror(CLIENT *h, struct rpc_err *err)
618*0Sstevel@tonic-gate {
619*0Sstevel@tonic-gate 	/* LINTED pointer alignment */
620*0Sstevel@tonic-gate 	cku_private_t *p = htop(h);
621*0Sstevel@tonic-gate 
622*0Sstevel@tonic-gate 	*err = p->cku_err;
623*0Sstevel@tonic-gate }
624*0Sstevel@tonic-gate 
625*0Sstevel@tonic-gate static bool_t
626*0Sstevel@tonic-gate clnt_cots_kfreeres(CLIENT *h, xdrproc_t xdr_res, caddr_t res_ptr)
627*0Sstevel@tonic-gate {
628*0Sstevel@tonic-gate 	/* LINTED pointer alignment */
629*0Sstevel@tonic-gate 	cku_private_t *p = htop(h);
630*0Sstevel@tonic-gate 	XDR *xdrs;
631*0Sstevel@tonic-gate 
632*0Sstevel@tonic-gate 	xdrs = &(p->cku_outxdr);
633*0Sstevel@tonic-gate 	xdrs->x_op = XDR_FREE;
634*0Sstevel@tonic-gate 	return ((*xdr_res)(xdrs, res_ptr));
635*0Sstevel@tonic-gate }
636*0Sstevel@tonic-gate 
637*0Sstevel@tonic-gate static bool_t
638*0Sstevel@tonic-gate clnt_cots_kcontrol(CLIENT *h, int cmd, char *arg)
639*0Sstevel@tonic-gate {
640*0Sstevel@tonic-gate 	cku_private_t *p = htop(h);
641*0Sstevel@tonic-gate 
642*0Sstevel@tonic-gate 	switch (cmd) {
643*0Sstevel@tonic-gate 	case CLSET_PROGRESS:
644*0Sstevel@tonic-gate 		p->cku_progress = TRUE;
645*0Sstevel@tonic-gate 		return (TRUE);
646*0Sstevel@tonic-gate 
647*0Sstevel@tonic-gate 	case CLSET_XID:
648*0Sstevel@tonic-gate 		if (arg == NULL)
649*0Sstevel@tonic-gate 			return (FALSE);
650*0Sstevel@tonic-gate 
651*0Sstevel@tonic-gate 		p->cku_xid = *((uint32_t *)arg);
652*0Sstevel@tonic-gate 		return (TRUE);
653*0Sstevel@tonic-gate 
654*0Sstevel@tonic-gate 	case CLGET_XID:
655*0Sstevel@tonic-gate 		if (arg == NULL)
656*0Sstevel@tonic-gate 			return (FALSE);
657*0Sstevel@tonic-gate 
658*0Sstevel@tonic-gate 		*((uint32_t *)arg) = p->cku_xid;
659*0Sstevel@tonic-gate 		return (TRUE);
660*0Sstevel@tonic-gate 
661*0Sstevel@tonic-gate 	case CLSET_NODELAYONERR:
662*0Sstevel@tonic-gate 		if (arg == NULL)
663*0Sstevel@tonic-gate 			return (FALSE);
664*0Sstevel@tonic-gate 
665*0Sstevel@tonic-gate 		if (*((bool_t *)arg) == TRUE) {
666*0Sstevel@tonic-gate 			p->cku_nodelayonerr = TRUE;
667*0Sstevel@tonic-gate 			return (TRUE);
668*0Sstevel@tonic-gate 		}
669*0Sstevel@tonic-gate 		if (*((bool_t *)arg) == FALSE) {
670*0Sstevel@tonic-gate 			p->cku_nodelayonerr = FALSE;
671*0Sstevel@tonic-gate 			return (TRUE);
672*0Sstevel@tonic-gate 		}
673*0Sstevel@tonic-gate 		return (FALSE);
674*0Sstevel@tonic-gate 
675*0Sstevel@tonic-gate 	case CLGET_NODELAYONERR:
676*0Sstevel@tonic-gate 		if (arg == NULL)
677*0Sstevel@tonic-gate 			return (FALSE);
678*0Sstevel@tonic-gate 
679*0Sstevel@tonic-gate 		*((bool_t *)arg) = p->cku_nodelayonerr;
680*0Sstevel@tonic-gate 		return (TRUE);
681*0Sstevel@tonic-gate 
682*0Sstevel@tonic-gate 	case CLSET_BINDRESVPORT:
683*0Sstevel@tonic-gate 		if (arg == NULL)
684*0Sstevel@tonic-gate 			return (FALSE);
685*0Sstevel@tonic-gate 
686*0Sstevel@tonic-gate 		if (*(int *)arg != 1 && *(int *)arg != 0)
687*0Sstevel@tonic-gate 			return (FALSE);
688*0Sstevel@tonic-gate 
689*0Sstevel@tonic-gate 		p->cku_useresvport = *(int *)arg;
690*0Sstevel@tonic-gate 
691*0Sstevel@tonic-gate 		return (TRUE);
692*0Sstevel@tonic-gate 
693*0Sstevel@tonic-gate 	case CLGET_BINDRESVPORT:
694*0Sstevel@tonic-gate 		if (arg == NULL)
695*0Sstevel@tonic-gate 			return (FALSE);
696*0Sstevel@tonic-gate 
697*0Sstevel@tonic-gate 		*(int *)arg = p->cku_useresvport;
698*0Sstevel@tonic-gate 
699*0Sstevel@tonic-gate 		return (TRUE);
700*0Sstevel@tonic-gate 
701*0Sstevel@tonic-gate 	default:
702*0Sstevel@tonic-gate 		return (FALSE);
703*0Sstevel@tonic-gate 	}
704*0Sstevel@tonic-gate }
705*0Sstevel@tonic-gate 
706*0Sstevel@tonic-gate /*
707*0Sstevel@tonic-gate  * Destroy rpc handle.  Frees the space used for output buffer,
708*0Sstevel@tonic-gate  * private data, and handle structure.
709*0Sstevel@tonic-gate  */
710*0Sstevel@tonic-gate static void
711*0Sstevel@tonic-gate clnt_cots_kdestroy(CLIENT *h)
712*0Sstevel@tonic-gate {
713*0Sstevel@tonic-gate 	/* LINTED pointer alignment */
714*0Sstevel@tonic-gate 	cku_private_t *p = htop(h);
715*0Sstevel@tonic-gate 	calllist_t *call = &p->cku_call;
716*0Sstevel@tonic-gate 
717*0Sstevel@tonic-gate 	RPCLOG(8, "clnt_cots_kdestroy h: %p\n", (void *)h);
718*0Sstevel@tonic-gate 	RPCLOG(8, "clnt_cots_kdestroy h: xid=0x%x\n", p->cku_xid);
719*0Sstevel@tonic-gate 
720*0Sstevel@tonic-gate 	if (p->cku_flags & CKU_ONQUEUE) {
721*0Sstevel@tonic-gate 		RPCLOG(64, "clnt_cots_kdestroy h: removing call for xid 0x%x "
722*0Sstevel@tonic-gate 		    "from dispatch list\n", p->cku_xid);
723*0Sstevel@tonic-gate 		call_table_remove(call);
724*0Sstevel@tonic-gate 	}
725*0Sstevel@tonic-gate 
726*0Sstevel@tonic-gate 	if (call->call_reply)
727*0Sstevel@tonic-gate 		freemsg(call->call_reply);
728*0Sstevel@tonic-gate 	cv_destroy(&call->call_cv);
729*0Sstevel@tonic-gate 	mutex_destroy(&call->call_lock);
730*0Sstevel@tonic-gate 
731*0Sstevel@tonic-gate 	kmem_free(p->cku_srcaddr.buf, p->cku_srcaddr.maxlen);
732*0Sstevel@tonic-gate 	kmem_free(p->cku_addr.buf, p->cku_addr.maxlen);
733*0Sstevel@tonic-gate 	kmem_free(p, sizeof (*p));
734*0Sstevel@tonic-gate }
735*0Sstevel@tonic-gate 
736*0Sstevel@tonic-gate static int clnt_cots_pulls;
737*0Sstevel@tonic-gate #define	RM_HDR_SIZE	4	/* record mark header size */
738*0Sstevel@tonic-gate 
739*0Sstevel@tonic-gate /*
740*0Sstevel@tonic-gate  * Call remote procedure.
741*0Sstevel@tonic-gate  */
742*0Sstevel@tonic-gate static enum clnt_stat
743*0Sstevel@tonic-gate clnt_cots_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args,
744*0Sstevel@tonic-gate     caddr_t argsp, xdrproc_t xdr_results, caddr_t resultsp, struct timeval wait)
745*0Sstevel@tonic-gate {
746*0Sstevel@tonic-gate 	/* LINTED pointer alignment */
747*0Sstevel@tonic-gate 	cku_private_t *p = htop(h);
748*0Sstevel@tonic-gate 	calllist_t *call = &p->cku_call;
749*0Sstevel@tonic-gate 	XDR *xdrs;
750*0Sstevel@tonic-gate 	struct rpc_msg reply_msg;
751*0Sstevel@tonic-gate 	mblk_t *mp;
752*0Sstevel@tonic-gate #ifdef	RPCDEBUG
753*0Sstevel@tonic-gate 	clock_t time_sent;
754*0Sstevel@tonic-gate #endif
755*0Sstevel@tonic-gate 	struct netbuf *retryaddr;
756*0Sstevel@tonic-gate 	struct cm_xprt *cm_entry = NULL;
757*0Sstevel@tonic-gate 	queue_t *wq;
758*0Sstevel@tonic-gate 	int len;
759*0Sstevel@tonic-gate 	int mpsize;
760*0Sstevel@tonic-gate 	int refreshes = REFRESHES;
761*0Sstevel@tonic-gate 	int interrupted;
762*0Sstevel@tonic-gate 	int tidu_size;
763*0Sstevel@tonic-gate 	enum clnt_stat status;
764*0Sstevel@tonic-gate 	struct timeval cwait;
765*0Sstevel@tonic-gate 	bool_t delay_first = FALSE;
766*0Sstevel@tonic-gate 	clock_t ticks;
767*0Sstevel@tonic-gate 
768*0Sstevel@tonic-gate 	RPCLOG(2, "clnt_cots_kcallit, procnum %u\n", procnum);
769*0Sstevel@tonic-gate 	COTSRCSTAT_INCR(p->cku_stats, rccalls);
770*0Sstevel@tonic-gate 
771*0Sstevel@tonic-gate 	RPCLOG(2, "clnt_cots_kcallit: wait.tv_sec: %ld\n", wait.tv_sec);
772*0Sstevel@tonic-gate 	RPCLOG(2, "clnt_cots_kcallit: wait.tv_usec: %ld\n", wait.tv_usec);
773*0Sstevel@tonic-gate 
774*0Sstevel@tonic-gate 	/*
775*0Sstevel@tonic-gate 	 * Bug ID 1240234:
776*0Sstevel@tonic-gate 	 * Look out for zero length timeouts. We don't want to
777*0Sstevel@tonic-gate 	 * wait zero seconds for a connection to be established.
778*0Sstevel@tonic-gate 	 */
779*0Sstevel@tonic-gate 	if (wait.tv_sec < clnt_cots_min_conntout) {
780*0Sstevel@tonic-gate 		cwait.tv_sec = clnt_cots_min_conntout;
781*0Sstevel@tonic-gate 		cwait.tv_usec = 0;
782*0Sstevel@tonic-gate 		RPCLOG(8, "clnt_cots_kcallit: wait.tv_sec (%ld) too low,",
783*0Sstevel@tonic-gate 		    wait.tv_sec);
784*0Sstevel@tonic-gate 		RPCLOG(8, " setting to: %d\n", clnt_cots_min_conntout);
785*0Sstevel@tonic-gate 	} else {
786*0Sstevel@tonic-gate 		cwait = wait;
787*0Sstevel@tonic-gate 	}
788*0Sstevel@tonic-gate 
789*0Sstevel@tonic-gate call_again:
790*0Sstevel@tonic-gate 	if (cm_entry) {
791*0Sstevel@tonic-gate 		connmgr_release(cm_entry);
792*0Sstevel@tonic-gate 		cm_entry = NULL;
793*0Sstevel@tonic-gate 	}
794*0Sstevel@tonic-gate 
795*0Sstevel@tonic-gate 	mp = NULL;
796*0Sstevel@tonic-gate 
797*0Sstevel@tonic-gate 	/*
798*0Sstevel@tonic-gate 	 * If the call is not a retry, allocate a new xid and cache it
799*0Sstevel@tonic-gate 	 * for future retries.
800*0Sstevel@tonic-gate 	 * Bug ID 1246045:
801*0Sstevel@tonic-gate 	 * Treat call as a retry for purposes of binding the source
802*0Sstevel@tonic-gate 	 * port only if we actually attempted to send anything on
803*0Sstevel@tonic-gate 	 * the previous call.
804*0Sstevel@tonic-gate 	 */
805*0Sstevel@tonic-gate 	if (p->cku_xid == 0) {
806*0Sstevel@tonic-gate 		p->cku_xid = alloc_xid();
807*0Sstevel@tonic-gate 		/*
808*0Sstevel@tonic-gate 		 * We need to ASSERT here that our xid != 0 because this
809*0Sstevel@tonic-gate 		 * determines whether or not our call record gets placed on
810*0Sstevel@tonic-gate 		 * the hash table or the linked list.  By design, we mandate
811*0Sstevel@tonic-gate 		 * that RPC calls over cots must have xid's != 0, so we can
812*0Sstevel@tonic-gate 		 * ensure proper management of the hash table.
813*0Sstevel@tonic-gate 		 */
814*0Sstevel@tonic-gate 		ASSERT(p->cku_xid != 0);
815*0Sstevel@tonic-gate 
816*0Sstevel@tonic-gate 		retryaddr = NULL;
817*0Sstevel@tonic-gate 		p->cku_flags &= ~CKU_SENT;
818*0Sstevel@tonic-gate 
819*0Sstevel@tonic-gate 		if (p->cku_flags & CKU_ONQUEUE) {
820*0Sstevel@tonic-gate 			RPCLOG(8, "clnt_cots_kcallit: new call, dequeuing old"
821*0Sstevel@tonic-gate 			    " one (%p)\n", (void *)call);
822*0Sstevel@tonic-gate 			call_table_remove(call);
823*0Sstevel@tonic-gate 			p->cku_flags &= ~CKU_ONQUEUE;
824*0Sstevel@tonic-gate 			RPCLOG(64, "clnt_cots_kcallit: removing call from "
825*0Sstevel@tonic-gate 			    "dispatch list because xid was zero (now 0x%x)\n",
826*0Sstevel@tonic-gate 			    p->cku_xid);
827*0Sstevel@tonic-gate 		}
828*0Sstevel@tonic-gate 
829*0Sstevel@tonic-gate 		if (call->call_reply != NULL) {
830*0Sstevel@tonic-gate 			freemsg(call->call_reply);
831*0Sstevel@tonic-gate 			call->call_reply = NULL;
832*0Sstevel@tonic-gate 		}
833*0Sstevel@tonic-gate 	} else if (p->cku_srcaddr.buf == NULL || p->cku_srcaddr.len == 0) {
834*0Sstevel@tonic-gate 		retryaddr = NULL;
835*0Sstevel@tonic-gate 
836*0Sstevel@tonic-gate 	} else if (p->cku_flags & CKU_SENT) {
837*0Sstevel@tonic-gate 		retryaddr = &p->cku_srcaddr;
838*0Sstevel@tonic-gate 
839*0Sstevel@tonic-gate 	} else {
840*0Sstevel@tonic-gate 		/*
841*0Sstevel@tonic-gate 		 * Bug ID 1246045: Nothing was sent, so set retryaddr to
842*0Sstevel@tonic-gate 		 * NULL and let connmgr_get() bind to any source port it
843*0Sstevel@tonic-gate 		 * can get.
844*0Sstevel@tonic-gate 		 */
845*0Sstevel@tonic-gate 		retryaddr = NULL;
846*0Sstevel@tonic-gate 	}
847*0Sstevel@tonic-gate 
848*0Sstevel@tonic-gate 	RPCLOG(64, "clnt_cots_kcallit: xid = 0x%x", p->cku_xid);
849*0Sstevel@tonic-gate 	RPCLOG(64, " flags = 0x%x\n", p->cku_flags);
850*0Sstevel@tonic-gate 
851*0Sstevel@tonic-gate 	p->cku_err.re_status = RPC_TIMEDOUT;
852*0Sstevel@tonic-gate 	p->cku_err.re_errno = p->cku_err.re_terrno = 0;
853*0Sstevel@tonic-gate 
854*0Sstevel@tonic-gate 	cm_entry = connmgr_wrapget(retryaddr, &cwait, p);
855*0Sstevel@tonic-gate 
856*0Sstevel@tonic-gate 	if (cm_entry == NULL) {
857*0Sstevel@tonic-gate 		RPCLOG(1, "clnt_cots_kcallit: can't connect status %s\n",
858*0Sstevel@tonic-gate 		    clnt_sperrno(p->cku_err.re_status));
859*0Sstevel@tonic-gate 
860*0Sstevel@tonic-gate 		/*
861*0Sstevel@tonic-gate 		 * The reasons why we fail to create a connection are
862*0Sstevel@tonic-gate 		 * varied. In most cases we don't want the caller to
863*0Sstevel@tonic-gate 		 * immediately retry. This could have one or more
864*0Sstevel@tonic-gate 		 * bad effects. This includes flooding the net with
865*0Sstevel@tonic-gate 		 * connect requests to ports with no listener; a hard
866*0Sstevel@tonic-gate 		 * kernel loop due to all the "reserved" TCP ports being
867*0Sstevel@tonic-gate 		 * in use.
868*0Sstevel@tonic-gate 		 */
869*0Sstevel@tonic-gate 		delay_first = TRUE;
870*0Sstevel@tonic-gate 
871*0Sstevel@tonic-gate 		/*
872*0Sstevel@tonic-gate 		 * Even if we end up returning EINTR, we still count a
873*0Sstevel@tonic-gate 		 * a "can't connect", because the connection manager
874*0Sstevel@tonic-gate 		 * might have been committed to waiting for or timing out on
875*0Sstevel@tonic-gate 		 * a connection.
876*0Sstevel@tonic-gate 		 */
877*0Sstevel@tonic-gate 		COTSRCSTAT_INCR(p->cku_stats, rccantconn);
878*0Sstevel@tonic-gate 		switch (p->cku_err.re_status) {
879*0Sstevel@tonic-gate 		case RPC_INTR:
880*0Sstevel@tonic-gate 			p->cku_err.re_errno = EINTR;
881*0Sstevel@tonic-gate 
882*0Sstevel@tonic-gate 			/*
883*0Sstevel@tonic-gate 			 * No need to delay because a UNIX signal(2)
884*0Sstevel@tonic-gate 			 * interrupted us. The caller likely won't
885*0Sstevel@tonic-gate 			 * retry the CLNT_CALL() and even if it does,
886*0Sstevel@tonic-gate 			 * we assume the caller knows what it is doing.
887*0Sstevel@tonic-gate 			 */
888*0Sstevel@tonic-gate 			delay_first = FALSE;
889*0Sstevel@tonic-gate 			break;
890*0Sstevel@tonic-gate 
891*0Sstevel@tonic-gate 		case RPC_TIMEDOUT:
892*0Sstevel@tonic-gate 			p->cku_err.re_errno = ETIMEDOUT;
893*0Sstevel@tonic-gate 
894*0Sstevel@tonic-gate 			/*
895*0Sstevel@tonic-gate 			 * No need to delay because timed out already
896*0Sstevel@tonic-gate 			 * on the connection request and assume that the
897*0Sstevel@tonic-gate 			 * transport time out is longer than our minimum
898*0Sstevel@tonic-gate 			 * timeout, or least not too much smaller.
899*0Sstevel@tonic-gate 			 */
900*0Sstevel@tonic-gate 			delay_first = FALSE;
901*0Sstevel@tonic-gate 			break;
902*0Sstevel@tonic-gate 
903*0Sstevel@tonic-gate 		case RPC_SYSTEMERROR:
904*0Sstevel@tonic-gate 		case RPC_TLIERROR:
905*0Sstevel@tonic-gate 			/*
906*0Sstevel@tonic-gate 			 * We want to delay here because a transient
907*0Sstevel@tonic-gate 			 * system error has a better chance of going away
908*0Sstevel@tonic-gate 			 * if we delay a bit. If it's not transient, then
909*0Sstevel@tonic-gate 			 * we don't want end up in a hard kernel loop
910*0Sstevel@tonic-gate 			 * due to retries.
911*0Sstevel@tonic-gate 			 */
912*0Sstevel@tonic-gate 			ASSERT(p->cku_err.re_errno != 0);
913*0Sstevel@tonic-gate 			break;
914*0Sstevel@tonic-gate 
915*0Sstevel@tonic-gate 
916*0Sstevel@tonic-gate 		case RPC_CANTCONNECT:
917*0Sstevel@tonic-gate 			/*
918*0Sstevel@tonic-gate 			 * RPC_CANTCONNECT is set on T_ERROR_ACK which
919*0Sstevel@tonic-gate 			 * implies some error down in the TCP layer or
920*0Sstevel@tonic-gate 			 * below. If cku_nodelayonerror is set then we
921*0Sstevel@tonic-gate 			 * assume the caller knows not to try too hard.
922*0Sstevel@tonic-gate 			 */
923*0Sstevel@tonic-gate 			RPCLOG0(8, "clnt_cots_kcallit: connection failed,");
924*0Sstevel@tonic-gate 			RPCLOG0(8, " re_status=RPC_CANTCONNECT,");
925*0Sstevel@tonic-gate 			RPCLOG(8, " re_errno=%d,", p->cku_err.re_errno);
926*0Sstevel@tonic-gate 			RPCLOG(8, " cku_nodelayonerr=%d", p->cku_nodelayonerr);
927*0Sstevel@tonic-gate 			if (p->cku_nodelayonerr == TRUE)
928*0Sstevel@tonic-gate 				delay_first = FALSE;
929*0Sstevel@tonic-gate 
930*0Sstevel@tonic-gate 			p->cku_err.re_errno = EIO;
931*0Sstevel@tonic-gate 
932*0Sstevel@tonic-gate 			break;
933*0Sstevel@tonic-gate 
934*0Sstevel@tonic-gate 		case RPC_XPRTFAILED:
935*0Sstevel@tonic-gate 			/*
936*0Sstevel@tonic-gate 			 * We want to delay here because we likely
937*0Sstevel@tonic-gate 			 * got a refused connection.
938*0Sstevel@tonic-gate 			 */
939*0Sstevel@tonic-gate 			if (p->cku_err.re_errno != 0)
940*0Sstevel@tonic-gate 				break;
941*0Sstevel@tonic-gate 
942*0Sstevel@tonic-gate 			/* fall thru */
943*0Sstevel@tonic-gate 
944*0Sstevel@tonic-gate 		default:
945*0Sstevel@tonic-gate 			/*
946*0Sstevel@tonic-gate 			 * We delay here because it is better to err
947*0Sstevel@tonic-gate 			 * on the side of caution. If we got here then
948*0Sstevel@tonic-gate 			 * status could have been RPC_SUCCESS, but we
949*0Sstevel@tonic-gate 			 * know that we did not get a connection, so
950*0Sstevel@tonic-gate 			 * force the rpc status to RPC_CANTCONNECT.
951*0Sstevel@tonic-gate 			 */
952*0Sstevel@tonic-gate 			p->cku_err.re_status = RPC_CANTCONNECT;
953*0Sstevel@tonic-gate 			p->cku_err.re_errno = EIO;
954*0Sstevel@tonic-gate 			break;
955*0Sstevel@tonic-gate 		}
956*0Sstevel@tonic-gate 		if (delay_first == TRUE)
957*0Sstevel@tonic-gate 			ticks = clnt_cots_min_tout * drv_usectohz(1000000);
958*0Sstevel@tonic-gate 		goto cots_done;
959*0Sstevel@tonic-gate 	}
960*0Sstevel@tonic-gate 
961*0Sstevel@tonic-gate 	/*
962*0Sstevel@tonic-gate 	 * If we've never sent any request on this connection (send count
963*0Sstevel@tonic-gate 	 * is zero, or the connection has been reset), cache the
964*0Sstevel@tonic-gate 	 * the connection's create time and send a request (possibly a retry)
965*0Sstevel@tonic-gate 	 */
966*0Sstevel@tonic-gate 	if ((p->cku_flags & CKU_SENT) == 0 ||
967*0Sstevel@tonic-gate 	    p->cku_ctime != cm_entry->x_ctime) {
968*0Sstevel@tonic-gate 		p->cku_ctime = cm_entry->x_ctime;
969*0Sstevel@tonic-gate 
970*0Sstevel@tonic-gate 	} else if ((p->cku_flags & CKU_SENT) && (p->cku_flags & CKU_ONQUEUE) &&
971*0Sstevel@tonic-gate 	    (call->call_reply != NULL ||
972*0Sstevel@tonic-gate 	    p->cku_recv_attempts < clnt_cots_maxrecv)) {
973*0Sstevel@tonic-gate 
974*0Sstevel@tonic-gate 		/*
975*0Sstevel@tonic-gate 		 * If we've sent a request and our call is on the dispatch
976*0Sstevel@tonic-gate 		 * queue and we haven't made too many receive attempts, then
977*0Sstevel@tonic-gate 		 * don't re-send, just receive.
978*0Sstevel@tonic-gate 		 */
979*0Sstevel@tonic-gate 		p->cku_recv_attempts++;
980*0Sstevel@tonic-gate 		goto read_again;
981*0Sstevel@tonic-gate 	}
982*0Sstevel@tonic-gate 
983*0Sstevel@tonic-gate 	/*
984*0Sstevel@tonic-gate 	 * Now we create the RPC request in a STREAMS message.  We have to do
985*0Sstevel@tonic-gate 	 * this after the call to connmgr_get so that we have the correct
986*0Sstevel@tonic-gate 	 * TIDU size for the transport.
987*0Sstevel@tonic-gate 	 */
988*0Sstevel@tonic-gate 	tidu_size = cm_entry->x_tidu_size;
989*0Sstevel@tonic-gate 	len = MSG_OFFSET + MAX(tidu_size, RM_HDR_SIZE + WIRE_HDR_SIZE);
990*0Sstevel@tonic-gate 
991*0Sstevel@tonic-gate 	while ((mp = allocb(len, BPRI_MED)) == NULL) {
992*0Sstevel@tonic-gate 		if (strwaitbuf(len, BPRI_MED)) {
993*0Sstevel@tonic-gate 			p->cku_err.re_status = RPC_SYSTEMERROR;
994*0Sstevel@tonic-gate 			p->cku_err.re_errno = ENOSR;
995*0Sstevel@tonic-gate 			COTSRCSTAT_INCR(p->cku_stats, rcnomem);
996*0Sstevel@tonic-gate 			goto cots_done;
997*0Sstevel@tonic-gate 		}
998*0Sstevel@tonic-gate 	}
999*0Sstevel@tonic-gate 	xdrs = &p->cku_outxdr;
1000*0Sstevel@tonic-gate 	xdrmblk_init(xdrs, mp, XDR_ENCODE, tidu_size);
1001*0Sstevel@tonic-gate 	mpsize = MBLKSIZE(mp);
1002*0Sstevel@tonic-gate 	ASSERT(mpsize >= len);
1003*0Sstevel@tonic-gate 	ASSERT(mp->b_rptr == mp->b_datap->db_base);
1004*0Sstevel@tonic-gate 
1005*0Sstevel@tonic-gate 	/*
1006*0Sstevel@tonic-gate 	 * If the size of mblk is not appreciably larger than what we
1007*0Sstevel@tonic-gate 	 * asked, then resize the mblk to exactly len bytes. The reason for
1008*0Sstevel@tonic-gate 	 * this: suppose len is 1600 bytes, the tidu is 1460 bytes
1009*0Sstevel@tonic-gate 	 * (from TCP over ethernet), and the arguments to the RPC require
1010*0Sstevel@tonic-gate 	 * 2800 bytes. Ideally we want the protocol to render two
1011*0Sstevel@tonic-gate 	 * ~1400 byte segments over the wire. However if allocb() gives us a 2k
1012*0Sstevel@tonic-gate 	 * mblk, and we allocate a second mblk for the remainder, the protocol
1013*0Sstevel@tonic-gate 	 * module may generate 3 segments over the wire:
1014*0Sstevel@tonic-gate 	 * 1460 bytes for the first, 448 (2048 - 1600) for the second, and
1015*0Sstevel@tonic-gate 	 * 892 for the third. If we "waste" 448 bytes in the first mblk,
1016*0Sstevel@tonic-gate 	 * the XDR encoding will generate two ~1400 byte mblks, and the
1017*0Sstevel@tonic-gate 	 * protocol module is more likely to produce properly sized segments.
1018*0Sstevel@tonic-gate 	 */
1019*0Sstevel@tonic-gate 	if ((mpsize >> 1) <= len)
1020*0Sstevel@tonic-gate 		mp->b_rptr += (mpsize - len);
1021*0Sstevel@tonic-gate 
1022*0Sstevel@tonic-gate 	/*
1023*0Sstevel@tonic-gate 	 * Adjust b_rptr to reserve space for the non-data protocol headers
1024*0Sstevel@tonic-gate 	 * any downstream modules might like to add, and for the
1025*0Sstevel@tonic-gate 	 * record marking header.
1026*0Sstevel@tonic-gate 	 */
1027*0Sstevel@tonic-gate 	mp->b_rptr += (MSG_OFFSET + RM_HDR_SIZE);
1028*0Sstevel@tonic-gate 
1029*0Sstevel@tonic-gate 	if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
1030*0Sstevel@tonic-gate 		/* Copy in the preserialized RPC header information. */
1031*0Sstevel@tonic-gate 		bcopy(p->cku_rpchdr, mp->b_rptr, WIRE_HDR_SIZE);
1032*0Sstevel@tonic-gate 
1033*0Sstevel@tonic-gate 		/* Use XDR_SETPOS() to set the b_wptr to past the RPC header. */
1034*0Sstevel@tonic-gate 		XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base +
1035*0Sstevel@tonic-gate 		    WIRE_HDR_SIZE));
1036*0Sstevel@tonic-gate 
1037*0Sstevel@tonic-gate 		ASSERT((mp->b_wptr - mp->b_rptr) == WIRE_HDR_SIZE);
1038*0Sstevel@tonic-gate 
1039*0Sstevel@tonic-gate 		/* Serialize the procedure number and the arguments. */
1040*0Sstevel@tonic-gate 		if ((!XDR_PUTINT32(xdrs, (int32_t *)&procnum)) ||
1041*0Sstevel@tonic-gate 		    (!AUTH_MARSHALL(h->cl_auth, xdrs, p->cku_cred)) ||
1042*0Sstevel@tonic-gate 		    (!(*xdr_args)(xdrs, argsp))) {
1043*0Sstevel@tonic-gate 			p->cku_err.re_status = RPC_CANTENCODEARGS;
1044*0Sstevel@tonic-gate 			p->cku_err.re_errno = EIO;
1045*0Sstevel@tonic-gate 			goto cots_done;
1046*0Sstevel@tonic-gate 		}
1047*0Sstevel@tonic-gate 
1048*0Sstevel@tonic-gate 		(*(uint32_t *)(mp->b_rptr)) = p->cku_xid;
1049*0Sstevel@tonic-gate 	} else {
1050*0Sstevel@tonic-gate 		uint32_t *uproc = (uint32_t *)&p->cku_rpchdr[WIRE_HDR_SIZE];
1051*0Sstevel@tonic-gate 		IXDR_PUT_U_INT32(uproc, procnum);
1052*0Sstevel@tonic-gate 
1053*0Sstevel@tonic-gate 		(*(uint32_t *)(&p->cku_rpchdr[0])) = p->cku_xid;
1054*0Sstevel@tonic-gate 
1055*0Sstevel@tonic-gate 		/* Use XDR_SETPOS() to set the b_wptr. */
1056*0Sstevel@tonic-gate 		XDR_SETPOS(xdrs, (uint_t)(mp->b_rptr - mp->b_datap->db_base));
1057*0Sstevel@tonic-gate 
1058*0Sstevel@tonic-gate 		/* Serialize the procedure number and the arguments. */
1059*0Sstevel@tonic-gate 		if (!AUTH_WRAP(h->cl_auth, p->cku_rpchdr, WIRE_HDR_SIZE+4,
1060*0Sstevel@tonic-gate 		    xdrs, xdr_args, argsp)) {
1061*0Sstevel@tonic-gate 			p->cku_err.re_status = RPC_CANTENCODEARGS;
1062*0Sstevel@tonic-gate 			p->cku_err.re_errno = EIO;
1063*0Sstevel@tonic-gate 			goto cots_done;
1064*0Sstevel@tonic-gate 		}
1065*0Sstevel@tonic-gate 	}
1066*0Sstevel@tonic-gate 
1067*0Sstevel@tonic-gate 	RPCLOG(2, "clnt_cots_kcallit: connected, sending call, tidu_size %d\n",
1068*0Sstevel@tonic-gate 	    tidu_size);
1069*0Sstevel@tonic-gate 
1070*0Sstevel@tonic-gate 	wq = cm_entry->x_wq;
1071*0Sstevel@tonic-gate 	clnt_dispatch_send(wq, mp, call, p->cku_xid,
1072*0Sstevel@tonic-gate 				(p->cku_flags & CKU_ONQUEUE));
1073*0Sstevel@tonic-gate 
1074*0Sstevel@tonic-gate 	RPCLOG(64, "clnt_cots_kcallit: sent call for xid 0x%x\n",
1075*0Sstevel@tonic-gate 		(uint_t)p->cku_xid);
1076*0Sstevel@tonic-gate 	p->cku_flags = (CKU_ONQUEUE|CKU_SENT);
1077*0Sstevel@tonic-gate 	p->cku_recv_attempts = 1;
1078*0Sstevel@tonic-gate 
1079*0Sstevel@tonic-gate #ifdef	RPCDEBUG
1080*0Sstevel@tonic-gate 	time_sent = lbolt;
1081*0Sstevel@tonic-gate #endif
1082*0Sstevel@tonic-gate 
1083*0Sstevel@tonic-gate 	/*
1084*0Sstevel@tonic-gate 	 * Wait for a reply or a timeout.  If there is no error or timeout,
1085*0Sstevel@tonic-gate 	 * (both indicated by call_status), call->call_reply will contain
1086*0Sstevel@tonic-gate 	 * the RPC reply message.
1087*0Sstevel@tonic-gate 	 */
1088*0Sstevel@tonic-gate read_again:
1089*0Sstevel@tonic-gate 	mutex_enter(&call->call_lock);
1090*0Sstevel@tonic-gate 	interrupted = 0;
1091*0Sstevel@tonic-gate 	if (call->call_status == RPC_TIMEDOUT) {
1092*0Sstevel@tonic-gate 		/*
1093*0Sstevel@tonic-gate 		 * Indicate that the lwp is not to be stopped while waiting
1094*0Sstevel@tonic-gate 		 * for this network traffic.  This is to avoid deadlock while
1095*0Sstevel@tonic-gate 		 * debugging a process via /proc and also to avoid recursive
1096*0Sstevel@tonic-gate 		 * mutex_enter()s due to NFS page faults while stopping
1097*0Sstevel@tonic-gate 		 * (NFS holds locks when it calls here).
1098*0Sstevel@tonic-gate 		 */
1099*0Sstevel@tonic-gate 		clock_t cv_wait_ret;
1100*0Sstevel@tonic-gate 		clock_t timout;
1101*0Sstevel@tonic-gate 		clock_t oldlbolt;
1102*0Sstevel@tonic-gate 
1103*0Sstevel@tonic-gate 		klwp_t *lwp = ttolwp(curthread);
1104*0Sstevel@tonic-gate 
1105*0Sstevel@tonic-gate 		if (lwp != NULL)
1106*0Sstevel@tonic-gate 			lwp->lwp_nostop++;
1107*0Sstevel@tonic-gate 
1108*0Sstevel@tonic-gate 		oldlbolt = lbolt;
1109*0Sstevel@tonic-gate 		timout = wait.tv_sec * drv_usectohz(1000000) +
1110*0Sstevel@tonic-gate 		    drv_usectohz(wait.tv_usec) + oldlbolt;
1111*0Sstevel@tonic-gate 		/*
1112*0Sstevel@tonic-gate 		 * Iterate until the call_status is changed to something
1113*0Sstevel@tonic-gate 		 * other that RPC_TIMEDOUT, or if cv_timedwait_sig() returns
1114*0Sstevel@tonic-gate 		 * something <=0 zero. The latter means that we timed
1115*0Sstevel@tonic-gate 		 * out.
1116*0Sstevel@tonic-gate 		 */
1117*0Sstevel@tonic-gate 		if (h->cl_nosignal)
1118*0Sstevel@tonic-gate 			while ((cv_wait_ret = cv_timedwait(&call->call_cv,
1119*0Sstevel@tonic-gate 			    &call->call_lock, timout)) > 0 &&
1120*0Sstevel@tonic-gate 			    call->call_status == RPC_TIMEDOUT);
1121*0Sstevel@tonic-gate 		else
1122*0Sstevel@tonic-gate 			while ((cv_wait_ret = cv_timedwait_sig(
1123*0Sstevel@tonic-gate 			    &call->call_cv,
1124*0Sstevel@tonic-gate 			    &call->call_lock, timout)) > 0 &&
1125*0Sstevel@tonic-gate 			    call->call_status == RPC_TIMEDOUT);
1126*0Sstevel@tonic-gate 
1127*0Sstevel@tonic-gate 		switch (cv_wait_ret) {
1128*0Sstevel@tonic-gate 		case 0:
1129*0Sstevel@tonic-gate 			/*
1130*0Sstevel@tonic-gate 			 * If we got out of the above loop with
1131*0Sstevel@tonic-gate 			 * cv_timedwait_sig() returning 0, then we were
1132*0Sstevel@tonic-gate 			 * interrupted regardless what call_status is.
1133*0Sstevel@tonic-gate 			 */
1134*0Sstevel@tonic-gate 			interrupted = 1;
1135*0Sstevel@tonic-gate 			break;
1136*0Sstevel@tonic-gate 		case -1:
1137*0Sstevel@tonic-gate 			/* cv_timedwait_sig() timed out */
1138*0Sstevel@tonic-gate 			break;
1139*0Sstevel@tonic-gate 		default:
1140*0Sstevel@tonic-gate 
1141*0Sstevel@tonic-gate 			/*
1142*0Sstevel@tonic-gate 			 * We were cv_signaled(). If we didn't
1143*0Sstevel@tonic-gate 			 * get a successful call_status and returned
1144*0Sstevel@tonic-gate 			 * before time expired, delay up to clnt_cots_min_tout
1145*0Sstevel@tonic-gate 			 * seconds so that the caller doesn't immediately
1146*0Sstevel@tonic-gate 			 * try to call us again and thus force the
1147*0Sstevel@tonic-gate 			 * same condition that got us here (such
1148*0Sstevel@tonic-gate 			 * as a RPC_XPRTFAILED due to the server not
1149*0Sstevel@tonic-gate 			 * listening on the end-point.
1150*0Sstevel@tonic-gate 			 */
1151*0Sstevel@tonic-gate 			if (call->call_status != RPC_SUCCESS) {
1152*0Sstevel@tonic-gate 				clock_t curlbolt;
1153*0Sstevel@tonic-gate 				clock_t diff;
1154*0Sstevel@tonic-gate 
1155*0Sstevel@tonic-gate 				curlbolt = ddi_get_lbolt();
1156*0Sstevel@tonic-gate 				ticks = clnt_cots_min_tout *
1157*0Sstevel@tonic-gate 				    drv_usectohz(1000000);
1158*0Sstevel@tonic-gate 				diff = curlbolt - oldlbolt;
1159*0Sstevel@tonic-gate 				if (diff < ticks) {
1160*0Sstevel@tonic-gate 					delay_first = TRUE;
1161*0Sstevel@tonic-gate 					if (diff > 0)
1162*0Sstevel@tonic-gate 						ticks -= diff;
1163*0Sstevel@tonic-gate 				}
1164*0Sstevel@tonic-gate 			}
1165*0Sstevel@tonic-gate 			break;
1166*0Sstevel@tonic-gate 		}
1167*0Sstevel@tonic-gate 
1168*0Sstevel@tonic-gate 		if (lwp != NULL)
1169*0Sstevel@tonic-gate 			lwp->lwp_nostop--;
1170*0Sstevel@tonic-gate 	}
1171*0Sstevel@tonic-gate 	/*
1172*0Sstevel@tonic-gate 	 * Get the reply message, if any.  This will be freed at the end
1173*0Sstevel@tonic-gate 	 * whether or not an error occurred.
1174*0Sstevel@tonic-gate 	 */
1175*0Sstevel@tonic-gate 	mp = call->call_reply;
1176*0Sstevel@tonic-gate 	call->call_reply = NULL;
1177*0Sstevel@tonic-gate 
1178*0Sstevel@tonic-gate 	/*
1179*0Sstevel@tonic-gate 	 * call_err is the error info when the call is on dispatch queue.
1180*0Sstevel@tonic-gate 	 * cku_err is the error info returned to the caller.
1181*0Sstevel@tonic-gate 	 * Sync cku_err with call_err for local message processing.
1182*0Sstevel@tonic-gate 	 */
1183*0Sstevel@tonic-gate 
1184*0Sstevel@tonic-gate 	status = call->call_status;
1185*0Sstevel@tonic-gate 	p->cku_err = call->call_err;
1186*0Sstevel@tonic-gate 	mutex_exit(&call->call_lock);
1187*0Sstevel@tonic-gate 
1188*0Sstevel@tonic-gate 	if (status != RPC_SUCCESS) {
1189*0Sstevel@tonic-gate 		switch (status) {
1190*0Sstevel@tonic-gate 		case RPC_TIMEDOUT:
1191*0Sstevel@tonic-gate 			if (interrupted) {
1192*0Sstevel@tonic-gate 				COTSRCSTAT_INCR(p->cku_stats, rcintrs);
1193*0Sstevel@tonic-gate 				p->cku_err.re_status = RPC_INTR;
1194*0Sstevel@tonic-gate 				p->cku_err.re_errno = EINTR;
1195*0Sstevel@tonic-gate 				RPCLOG(1, "clnt_cots_kcallit: xid 0x%x",
1196*0Sstevel@tonic-gate 				    p->cku_xid);
1197*0Sstevel@tonic-gate 				RPCLOG(1, "signal interrupted at %ld", lbolt);
1198*0Sstevel@tonic-gate 				RPCLOG(1, ", was sent at %ld\n", time_sent);
1199*0Sstevel@tonic-gate 			} else {
1200*0Sstevel@tonic-gate 				COTSRCSTAT_INCR(p->cku_stats, rctimeouts);
1201*0Sstevel@tonic-gate 				p->cku_err.re_errno = ETIMEDOUT;
1202*0Sstevel@tonic-gate 				RPCLOG(1, "clnt_cots_kcallit: timed out at %ld",
1203*0Sstevel@tonic-gate 				    lbolt);
1204*0Sstevel@tonic-gate 				RPCLOG(1, ", was sent at %ld\n", time_sent);
1205*0Sstevel@tonic-gate 			}
1206*0Sstevel@tonic-gate 			break;
1207*0Sstevel@tonic-gate 
1208*0Sstevel@tonic-gate 		case RPC_XPRTFAILED:
1209*0Sstevel@tonic-gate 			if (p->cku_err.re_errno == 0)
1210*0Sstevel@tonic-gate 				p->cku_err.re_errno = EIO;
1211*0Sstevel@tonic-gate 
1212*0Sstevel@tonic-gate 			RPCLOG(1, "clnt_cots_kcallit: transport failed: %d\n",
1213*0Sstevel@tonic-gate 			    p->cku_err.re_errno);
1214*0Sstevel@tonic-gate 			break;
1215*0Sstevel@tonic-gate 
1216*0Sstevel@tonic-gate 		case RPC_SYSTEMERROR:
1217*0Sstevel@tonic-gate 			ASSERT(p->cku_err.re_errno);
1218*0Sstevel@tonic-gate 			RPCLOG(1, "clnt_cots_kcallit: system error: %d\n",
1219*0Sstevel@tonic-gate 			    p->cku_err.re_errno);
1220*0Sstevel@tonic-gate 			break;
1221*0Sstevel@tonic-gate 
1222*0Sstevel@tonic-gate 		default:
1223*0Sstevel@tonic-gate 			p->cku_err.re_status = RPC_SYSTEMERROR;
1224*0Sstevel@tonic-gate 			p->cku_err.re_errno = EIO;
1225*0Sstevel@tonic-gate 			RPCLOG(1, "clnt_cots_kcallit: error: %s\n",
1226*0Sstevel@tonic-gate 			    clnt_sperrno(status));
1227*0Sstevel@tonic-gate 			break;
1228*0Sstevel@tonic-gate 		}
1229*0Sstevel@tonic-gate 		if (p->cku_err.re_status != RPC_TIMEDOUT) {
1230*0Sstevel@tonic-gate 
1231*0Sstevel@tonic-gate 			if (p->cku_flags & CKU_ONQUEUE) {
1232*0Sstevel@tonic-gate 				call_table_remove(call);
1233*0Sstevel@tonic-gate 				p->cku_flags &= ~CKU_ONQUEUE;
1234*0Sstevel@tonic-gate 			}
1235*0Sstevel@tonic-gate 
1236*0Sstevel@tonic-gate 			RPCLOG(64, "clnt_cots_kcallit: non TIMEOUT so xid 0x%x "
1237*0Sstevel@tonic-gate 			    "taken off dispatch list\n", p->cku_xid);
1238*0Sstevel@tonic-gate 			if (call->call_reply) {
1239*0Sstevel@tonic-gate 				freemsg(call->call_reply);
1240*0Sstevel@tonic-gate 				call->call_reply = NULL;
1241*0Sstevel@tonic-gate 			}
1242*0Sstevel@tonic-gate 		} else if (wait.tv_sec != 0) {
1243*0Sstevel@tonic-gate 			/*
1244*0Sstevel@tonic-gate 			 * We've sent the request over TCP and so we have
1245*0Sstevel@tonic-gate 			 * every reason to believe it will get
1246*0Sstevel@tonic-gate 			 * delivered. In which case returning a timeout is not
1247*0Sstevel@tonic-gate 			 * appropriate.
1248*0Sstevel@tonic-gate 			 */
1249*0Sstevel@tonic-gate 			if (p->cku_progress == TRUE &&
1250*0Sstevel@tonic-gate 			    p->cku_recv_attempts < clnt_cots_maxrecv) {
1251*0Sstevel@tonic-gate 				p->cku_err.re_status = RPC_INPROGRESS;
1252*0Sstevel@tonic-gate 			}
1253*0Sstevel@tonic-gate 		}
1254*0Sstevel@tonic-gate 		goto cots_done;
1255*0Sstevel@tonic-gate 	}
1256*0Sstevel@tonic-gate 
1257*0Sstevel@tonic-gate 	xdrs = &p->cku_inxdr;
1258*0Sstevel@tonic-gate 	xdrmblk_init(xdrs, mp, XDR_DECODE, 0);
1259*0Sstevel@tonic-gate 
1260*0Sstevel@tonic-gate 	reply_msg.rm_direction = REPLY;
1261*0Sstevel@tonic-gate 	reply_msg.rm_reply.rp_stat = MSG_ACCEPTED;
1262*0Sstevel@tonic-gate 	reply_msg.acpted_rply.ar_stat = SUCCESS;
1263*0Sstevel@tonic-gate 
1264*0Sstevel@tonic-gate 	reply_msg.acpted_rply.ar_verf = _null_auth;
1265*0Sstevel@tonic-gate 	/*
1266*0Sstevel@tonic-gate 	 *  xdr_results will be done in AUTH_UNWRAP.
1267*0Sstevel@tonic-gate 	 */
1268*0Sstevel@tonic-gate 	reply_msg.acpted_rply.ar_results.where = NULL;
1269*0Sstevel@tonic-gate 	reply_msg.acpted_rply.ar_results.proc = xdr_void;
1270*0Sstevel@tonic-gate 
1271*0Sstevel@tonic-gate 	if (xdr_replymsg(xdrs, &reply_msg)) {
1272*0Sstevel@tonic-gate 		enum clnt_stat re_status;
1273*0Sstevel@tonic-gate 
1274*0Sstevel@tonic-gate 		_seterr_reply(&reply_msg, &p->cku_err);
1275*0Sstevel@tonic-gate 
1276*0Sstevel@tonic-gate 		re_status = p->cku_err.re_status;
1277*0Sstevel@tonic-gate 		if (re_status == RPC_SUCCESS) {
1278*0Sstevel@tonic-gate 			/*
1279*0Sstevel@tonic-gate 			 * Reply is good, check auth.
1280*0Sstevel@tonic-gate 			 */
1281*0Sstevel@tonic-gate 			if (!AUTH_VALIDATE(h->cl_auth,
1282*0Sstevel@tonic-gate 			    &reply_msg.acpted_rply.ar_verf)) {
1283*0Sstevel@tonic-gate 				COTSRCSTAT_INCR(p->cku_stats, rcbadverfs);
1284*0Sstevel@tonic-gate 				RPCLOG0(1, "clnt_cots_kcallit: validation "
1285*0Sstevel@tonic-gate 				    "failure\n");
1286*0Sstevel@tonic-gate 				freemsg(mp);
1287*0Sstevel@tonic-gate 				(void) xdr_rpc_free_verifier(xdrs, &reply_msg);
1288*0Sstevel@tonic-gate 				mutex_enter(&call->call_lock);
1289*0Sstevel@tonic-gate 				if (call->call_reply == NULL)
1290*0Sstevel@tonic-gate 					call->call_status = RPC_TIMEDOUT;
1291*0Sstevel@tonic-gate 				mutex_exit(&call->call_lock);
1292*0Sstevel@tonic-gate 				goto read_again;
1293*0Sstevel@tonic-gate 			} else if (!AUTH_UNWRAP(h->cl_auth, xdrs,
1294*0Sstevel@tonic-gate 			    xdr_results, resultsp)) {
1295*0Sstevel@tonic-gate 				RPCLOG0(1, "clnt_cots_kcallit: validation "
1296*0Sstevel@tonic-gate 				    "failure (unwrap)\n");
1297*0Sstevel@tonic-gate 				p->cku_err.re_status = RPC_CANTDECODERES;
1298*0Sstevel@tonic-gate 				p->cku_err.re_errno = EIO;
1299*0Sstevel@tonic-gate 			}
1300*0Sstevel@tonic-gate 		} else {
1301*0Sstevel@tonic-gate 			/* set errno in case we can't recover */
1302*0Sstevel@tonic-gate 			if (re_status != RPC_VERSMISMATCH &&
1303*0Sstevel@tonic-gate 			    re_status != RPC_AUTHERROR &&
1304*0Sstevel@tonic-gate 			    re_status != RPC_PROGVERSMISMATCH)
1305*0Sstevel@tonic-gate 				p->cku_err.re_errno = EIO;
1306*0Sstevel@tonic-gate 
1307*0Sstevel@tonic-gate 			if (re_status == RPC_AUTHERROR) {
1308*0Sstevel@tonic-gate 				/*
1309*0Sstevel@tonic-gate 				 * Maybe our credential need to be
1310*0Sstevel@tonic-gate 				 * refreshed
1311*0Sstevel@tonic-gate 				 */
1312*0Sstevel@tonic-gate 			    if ((refreshes > 0) &&
1313*0Sstevel@tonic-gate 				AUTH_REFRESH(h->cl_auth, &reply_msg,
1314*0Sstevel@tonic-gate 						p->cku_cred)) {
1315*0Sstevel@tonic-gate 				refreshes--;
1316*0Sstevel@tonic-gate 				(void) xdr_rpc_free_verifier(xdrs, &reply_msg);
1317*0Sstevel@tonic-gate 				freemsg(mp);
1318*0Sstevel@tonic-gate 				mp = NULL;
1319*0Sstevel@tonic-gate 
1320*0Sstevel@tonic-gate 				if (p->cku_flags & CKU_ONQUEUE) {
1321*0Sstevel@tonic-gate 					call_table_remove(call);
1322*0Sstevel@tonic-gate 					p->cku_flags &= ~CKU_ONQUEUE;
1323*0Sstevel@tonic-gate 				}
1324*0Sstevel@tonic-gate 
1325*0Sstevel@tonic-gate 				RPCLOG(64, "clnt_cots_kcallit: AUTH_ERROR, so "
1326*0Sstevel@tonic-gate 				    "xid 0x%x taken off dispatch list\n",
1327*0Sstevel@tonic-gate 				    p->cku_xid);
1328*0Sstevel@tonic-gate 				if (call->call_reply) {
1329*0Sstevel@tonic-gate 					freemsg(call->call_reply);
1330*0Sstevel@tonic-gate 					call->call_reply = NULL;
1331*0Sstevel@tonic-gate 				}
1332*0Sstevel@tonic-gate 				COTSRCSTAT_INCR(p->cku_stats, rcbadcalls);
1333*0Sstevel@tonic-gate 				COTSRCSTAT_INCR(p->cku_stats, rcnewcreds);
1334*0Sstevel@tonic-gate 				goto call_again;
1335*0Sstevel@tonic-gate 			    } else {
1336*0Sstevel@tonic-gate 				/*
1337*0Sstevel@tonic-gate 				 * We have used the client handle to
1338*0Sstevel@tonic-gate 				 * do an AUTH_REFRESH and the RPC status may
1339*0Sstevel@tonic-gate 				 * be set to RPC_SUCCESS; Let's make sure to
1340*0Sstevel@tonic-gate 				 * set it to RPC_AUTHERROR.
1341*0Sstevel@tonic-gate 				 */
1342*0Sstevel@tonic-gate 				p->cku_err.re_status = RPC_AUTHERROR;
1343*0Sstevel@tonic-gate 				/*
1344*0Sstevel@tonic-gate 				 * Map recoverable and unrecoverable
1345*0Sstevel@tonic-gate 				 * authentication errors to appropriate errno
1346*0Sstevel@tonic-gate 				 */
1347*0Sstevel@tonic-gate 				switch (p->cku_err.re_why) {
1348*0Sstevel@tonic-gate 				case AUTH_BADCRED:
1349*0Sstevel@tonic-gate 				case AUTH_BADVERF:
1350*0Sstevel@tonic-gate 				case AUTH_INVALIDRESP:
1351*0Sstevel@tonic-gate 				case AUTH_TOOWEAK:
1352*0Sstevel@tonic-gate 				case AUTH_FAILED:
1353*0Sstevel@tonic-gate 				case RPCSEC_GSS_NOCRED:
1354*0Sstevel@tonic-gate 				case RPCSEC_GSS_FAILED:
1355*0Sstevel@tonic-gate 						p->cku_err.re_errno = EACCES;
1356*0Sstevel@tonic-gate 						break;
1357*0Sstevel@tonic-gate 				case AUTH_REJECTEDCRED:
1358*0Sstevel@tonic-gate 				case AUTH_REJECTEDVERF:
1359*0Sstevel@tonic-gate 				default:	p->cku_err.re_errno = EIO;
1360*0Sstevel@tonic-gate 						break;
1361*0Sstevel@tonic-gate 				}
1362*0Sstevel@tonic-gate 				RPCLOG(1, "clnt_cots_kcallit : authentication"
1363*0Sstevel@tonic-gate 				    " failed with RPC_AUTHERROR of type %d\n",
1364*0Sstevel@tonic-gate 				    (int)p->cku_err.re_why);
1365*0Sstevel@tonic-gate 			    }
1366*0Sstevel@tonic-gate 			}
1367*0Sstevel@tonic-gate 		}
1368*0Sstevel@tonic-gate 	} else {
1369*0Sstevel@tonic-gate 		/* reply didn't decode properly. */
1370*0Sstevel@tonic-gate 		p->cku_err.re_status = RPC_CANTDECODERES;
1371*0Sstevel@tonic-gate 		p->cku_err.re_errno = EIO;
1372*0Sstevel@tonic-gate 		RPCLOG0(1, "clnt_cots_kcallit: decode failure\n");
1373*0Sstevel@tonic-gate 	}
1374*0Sstevel@tonic-gate 
1375*0Sstevel@tonic-gate 	(void) xdr_rpc_free_verifier(xdrs, &reply_msg);
1376*0Sstevel@tonic-gate 
1377*0Sstevel@tonic-gate 	if (p->cku_flags & CKU_ONQUEUE) {
1378*0Sstevel@tonic-gate 		call_table_remove(call);
1379*0Sstevel@tonic-gate 		p->cku_flags &= ~CKU_ONQUEUE;
1380*0Sstevel@tonic-gate 	}
1381*0Sstevel@tonic-gate 
1382*0Sstevel@tonic-gate 	RPCLOG(64, "clnt_cots_kcallit: xid 0x%x taken off dispatch list",
1383*0Sstevel@tonic-gate 	    p->cku_xid);
1384*0Sstevel@tonic-gate 	RPCLOG(64, " status is %s\n", clnt_sperrno(p->cku_err.re_status));
1385*0Sstevel@tonic-gate cots_done:
1386*0Sstevel@tonic-gate 	if (cm_entry)
1387*0Sstevel@tonic-gate 		connmgr_release(cm_entry);
1388*0Sstevel@tonic-gate 
1389*0Sstevel@tonic-gate 	if (mp != NULL)
1390*0Sstevel@tonic-gate 		freemsg(mp);
1391*0Sstevel@tonic-gate 	if ((p->cku_flags & CKU_ONQUEUE) == 0 && call->call_reply) {
1392*0Sstevel@tonic-gate 		freemsg(call->call_reply);
1393*0Sstevel@tonic-gate 		call->call_reply = NULL;
1394*0Sstevel@tonic-gate 	}
1395*0Sstevel@tonic-gate 	if (p->cku_err.re_status != RPC_SUCCESS) {
1396*0Sstevel@tonic-gate 		RPCLOG0(1, "clnt_cots_kcallit: tail-end failure\n");
1397*0Sstevel@tonic-gate 		COTSRCSTAT_INCR(p->cku_stats, rcbadcalls);
1398*0Sstevel@tonic-gate 	}
1399*0Sstevel@tonic-gate 
1400*0Sstevel@tonic-gate 	/*
1401*0Sstevel@tonic-gate 	 * No point in delaying if the zone is going away.
1402*0Sstevel@tonic-gate 	 */
1403*0Sstevel@tonic-gate 	if (delay_first == TRUE &&
1404*0Sstevel@tonic-gate 	    !(zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN)) {
1405*0Sstevel@tonic-gate 		if (clnt_delay(ticks, h->cl_nosignal) == EINTR) {
1406*0Sstevel@tonic-gate 			p->cku_err.re_errno = EINTR;
1407*0Sstevel@tonic-gate 			p->cku_err.re_status = RPC_INTR;
1408*0Sstevel@tonic-gate 		}
1409*0Sstevel@tonic-gate 	}
1410*0Sstevel@tonic-gate 	return (p->cku_err.re_status);
1411*0Sstevel@tonic-gate }
1412*0Sstevel@tonic-gate 
1413*0Sstevel@tonic-gate /*
1414*0Sstevel@tonic-gate  * Kinit routine for cots.  This sets up the correct operations in
1415*0Sstevel@tonic-gate  * the client handle, as the handle may have previously been a clts
1416*0Sstevel@tonic-gate  * handle, and clears the xid field so there is no way a new call
1417*0Sstevel@tonic-gate  * could be mistaken for a retry.  It also sets in the handle the
1418*0Sstevel@tonic-gate  * information that is passed at create/kinit time but needed at
1419*0Sstevel@tonic-gate  * call time, as cots creates the transport at call time - device,
1420*0Sstevel@tonic-gate  * address of the server, protocol family.
1421*0Sstevel@tonic-gate  */
1422*0Sstevel@tonic-gate void
1423*0Sstevel@tonic-gate clnt_cots_kinit(CLIENT *h, dev_t dev, int family, struct netbuf *addr,
1424*0Sstevel@tonic-gate 	int max_msgsize, cred_t *cred)
1425*0Sstevel@tonic-gate {
1426*0Sstevel@tonic-gate 	/* LINTED pointer alignment */
1427*0Sstevel@tonic-gate 	cku_private_t *p = htop(h);
1428*0Sstevel@tonic-gate 	calllist_t *call = &p->cku_call;
1429*0Sstevel@tonic-gate 
1430*0Sstevel@tonic-gate 	h->cl_ops = &tcp_ops;
1431*0Sstevel@tonic-gate 	if (p->cku_flags & CKU_ONQUEUE) {
1432*0Sstevel@tonic-gate 		call_table_remove(call);
1433*0Sstevel@tonic-gate 		p->cku_flags &= ~CKU_ONQUEUE;
1434*0Sstevel@tonic-gate 		RPCLOG(64, "clnt_cots_kinit: removing call for xid 0x%x from"
1435*0Sstevel@tonic-gate 		    " dispatch list\n", p->cku_xid);
1436*0Sstevel@tonic-gate 	}
1437*0Sstevel@tonic-gate 
1438*0Sstevel@tonic-gate 	if (call->call_reply != NULL) {
1439*0Sstevel@tonic-gate 		freemsg(call->call_reply);
1440*0Sstevel@tonic-gate 		call->call_reply = NULL;
1441*0Sstevel@tonic-gate 	}
1442*0Sstevel@tonic-gate 
1443*0Sstevel@tonic-gate 	call->call_bucket = NULL;
1444*0Sstevel@tonic-gate 	call->call_hash = 0;
1445*0Sstevel@tonic-gate 
1446*0Sstevel@tonic-gate 	/*
1447*0Sstevel@tonic-gate 	 * We don't clear cku_flags here, because clnt_cots_kcallit()
1448*0Sstevel@tonic-gate 	 * takes care of handling the cku_flags reset.
1449*0Sstevel@tonic-gate 	 */
1450*0Sstevel@tonic-gate 	p->cku_xid = 0;
1451*0Sstevel@tonic-gate 	p->cku_device = dev;
1452*0Sstevel@tonic-gate 	p->cku_addrfmly = family;
1453*0Sstevel@tonic-gate 	p->cku_cred = cred;
1454*0Sstevel@tonic-gate 
1455*0Sstevel@tonic-gate 	if (p->cku_addr.maxlen < addr->len) {
1456*0Sstevel@tonic-gate 		if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL)
1457*0Sstevel@tonic-gate 			kmem_free(p->cku_addr.buf, p->cku_addr.maxlen);
1458*0Sstevel@tonic-gate 		p->cku_addr.buf = kmem_zalloc(addr->maxlen, KM_SLEEP);
1459*0Sstevel@tonic-gate 		p->cku_addr.maxlen = addr->maxlen;
1460*0Sstevel@tonic-gate 	}
1461*0Sstevel@tonic-gate 
1462*0Sstevel@tonic-gate 	p->cku_addr.len = addr->len;
1463*0Sstevel@tonic-gate 	bcopy(addr->buf, p->cku_addr.buf, addr->len);
1464*0Sstevel@tonic-gate 
1465*0Sstevel@tonic-gate 	/*
1466*0Sstevel@tonic-gate 	 * If the current sanity check size in rpcmod is smaller
1467*0Sstevel@tonic-gate 	 * than the size needed, then increase the sanity check.
1468*0Sstevel@tonic-gate 	 */
1469*0Sstevel@tonic-gate 	if (max_msgsize != 0 && clnt_max_msg_sizep != NULL &&
1470*0Sstevel@tonic-gate 	    max_msgsize > *clnt_max_msg_sizep) {
1471*0Sstevel@tonic-gate 		mutex_enter(&clnt_max_msg_lock);
1472*0Sstevel@tonic-gate 		if (max_msgsize > *clnt_max_msg_sizep)
1473*0Sstevel@tonic-gate 			*clnt_max_msg_sizep = max_msgsize;
1474*0Sstevel@tonic-gate 		mutex_exit(&clnt_max_msg_lock);
1475*0Sstevel@tonic-gate 	}
1476*0Sstevel@tonic-gate }
1477*0Sstevel@tonic-gate 
1478*0Sstevel@tonic-gate /*
1479*0Sstevel@tonic-gate  * ksettimers is a no-op for cots, with the exception of setting the xid.
1480*0Sstevel@tonic-gate  */
1481*0Sstevel@tonic-gate /* ARGSUSED */
1482*0Sstevel@tonic-gate static int
1483*0Sstevel@tonic-gate clnt_cots_ksettimers(CLIENT *h, struct rpc_timers *t, struct rpc_timers *all,
1484*0Sstevel@tonic-gate 	int minimum, void (*feedback)(int, int, caddr_t), caddr_t arg,
1485*0Sstevel@tonic-gate 	uint32_t xid)
1486*0Sstevel@tonic-gate {
1487*0Sstevel@tonic-gate 	/* LINTED pointer alignment */
1488*0Sstevel@tonic-gate 	cku_private_t *p = htop(h);
1489*0Sstevel@tonic-gate 
1490*0Sstevel@tonic-gate 	if (xid)
1491*0Sstevel@tonic-gate 		p->cku_xid = xid;
1492*0Sstevel@tonic-gate 	COTSRCSTAT_INCR(p->cku_stats, rctimers);
1493*0Sstevel@tonic-gate 	return (0);
1494*0Sstevel@tonic-gate }
1495*0Sstevel@tonic-gate 
1496*0Sstevel@tonic-gate extern void rpc_poptimod(struct vnode *);
1497*0Sstevel@tonic-gate extern int kstr_push(struct vnode *, char *);
1498*0Sstevel@tonic-gate 
1499*0Sstevel@tonic-gate int
1500*0Sstevel@tonic-gate conn_kstat_update(kstat_t *ksp, int rw)
1501*0Sstevel@tonic-gate {
1502*0Sstevel@tonic-gate 	struct cm_xprt *cm_entry;
1503*0Sstevel@tonic-gate 	struct cm_kstat_xprt *cm_ksp_data;
1504*0Sstevel@tonic-gate 	uchar_t *b;
1505*0Sstevel@tonic-gate 	char *fbuf;
1506*0Sstevel@tonic-gate 
1507*0Sstevel@tonic-gate 	if (rw == KSTAT_WRITE)
1508*0Sstevel@tonic-gate 		return (EACCES);
1509*0Sstevel@tonic-gate 	if (ksp == NULL || ksp->ks_private == NULL)
1510*0Sstevel@tonic-gate 		return (EIO);
1511*0Sstevel@tonic-gate 	cm_entry  = (struct cm_xprt *)ksp->ks_private;
1512*0Sstevel@tonic-gate 	cm_ksp_data = (struct cm_kstat_xprt *)ksp->ks_data;
1513*0Sstevel@tonic-gate 
1514*0Sstevel@tonic-gate 	cm_ksp_data->x_wq.value.ui32 = (uint32_t)(uintptr_t)cm_entry->x_wq;
1515*0Sstevel@tonic-gate 	cm_ksp_data->x_family.value.ui32 = cm_entry->x_family;
1516*0Sstevel@tonic-gate 	cm_ksp_data->x_rdev.value.ui32 = (uint32_t)cm_entry->x_rdev;
1517*0Sstevel@tonic-gate 	cm_ksp_data->x_time.value.ui32 = cm_entry->x_time;
1518*0Sstevel@tonic-gate 	cm_ksp_data->x_ref.value.ui32 = cm_entry->x_ref;
1519*0Sstevel@tonic-gate 	cm_ksp_data->x_state.value.ui32 = cm_entry->x_state_flags;
1520*0Sstevel@tonic-gate 
1521*0Sstevel@tonic-gate 	if (cm_entry->x_server.buf) {
1522*0Sstevel@tonic-gate 		fbuf = cm_ksp_data->x_server.value.string.addr.ptr;
1523*0Sstevel@tonic-gate 		if (cm_entry->x_family == AF_INET &&
1524*0Sstevel@tonic-gate 		    cm_entry->x_server.len ==
1525*0Sstevel@tonic-gate 		    sizeof (struct sockaddr_in)) {
1526*0Sstevel@tonic-gate 			struct sockaddr_in  *sa;
1527*0Sstevel@tonic-gate 			sa = (struct sockaddr_in *)
1528*0Sstevel@tonic-gate 				cm_entry->x_server.buf;
1529*0Sstevel@tonic-gate 			b = (uchar_t *)&sa->sin_addr;
1530*0Sstevel@tonic-gate 			(void) sprintf(fbuf,
1531*0Sstevel@tonic-gate 			    "%03d.%03d.%03d.%03d", b[0] & 0xFF, b[1] & 0xFF,
1532*0Sstevel@tonic-gate 			    b[2] & 0xFF, b[3] & 0xFF);
1533*0Sstevel@tonic-gate 			cm_ksp_data->x_port.value.ui32 =
1534*0Sstevel@tonic-gate 				(uint32_t)sa->sin_port;
1535*0Sstevel@tonic-gate 		} else if (cm_entry->x_family == AF_INET6 &&
1536*0Sstevel@tonic-gate 				cm_entry->x_server.len >=
1537*0Sstevel@tonic-gate 				sizeof (struct sockaddr_in6)) {
1538*0Sstevel@tonic-gate 			/* extract server IP address & port */
1539*0Sstevel@tonic-gate 			struct sockaddr_in6 *sin6;
1540*0Sstevel@tonic-gate 			sin6 = (struct sockaddr_in6 *)cm_entry->x_server.buf;
1541*0Sstevel@tonic-gate 			(void) kinet_ntop6((uchar_t *)&sin6->sin6_addr, fbuf,
1542*0Sstevel@tonic-gate 				INET6_ADDRSTRLEN);
1543*0Sstevel@tonic-gate 			cm_ksp_data->x_port.value.ui32 = sin6->sin6_port;
1544*0Sstevel@tonic-gate 		} else {
1545*0Sstevel@tonic-gate 			struct sockaddr_in  *sa;
1546*0Sstevel@tonic-gate 
1547*0Sstevel@tonic-gate 			sa = (struct sockaddr_in *)cm_entry->x_server.buf;
1548*0Sstevel@tonic-gate 			b = (uchar_t *)&sa->sin_addr;
1549*0Sstevel@tonic-gate 			(void) sprintf(fbuf,
1550*0Sstevel@tonic-gate 			    "%03d.%03d.%03d.%03d", b[0] & 0xFF, b[1] & 0xFF,
1551*0Sstevel@tonic-gate 			    b[2] & 0xFF, b[3] & 0xFF);
1552*0Sstevel@tonic-gate 		}
1553*0Sstevel@tonic-gate 		KSTAT_NAMED_STR_BUFLEN(&cm_ksp_data->x_server) =
1554*0Sstevel@tonic-gate 			strlen(fbuf) + 1;
1555*0Sstevel@tonic-gate 	}
1556*0Sstevel@tonic-gate 
1557*0Sstevel@tonic-gate 	return (0);
1558*0Sstevel@tonic-gate }
1559*0Sstevel@tonic-gate 
1560*0Sstevel@tonic-gate 
1561*0Sstevel@tonic-gate /*
1562*0Sstevel@tonic-gate  * We want a version of delay which is interruptible by a UNIX signal
1563*0Sstevel@tonic-gate  * Return EINTR if an interrupt occured.
1564*0Sstevel@tonic-gate  */
1565*0Sstevel@tonic-gate static int
1566*0Sstevel@tonic-gate clnt_delay(clock_t ticks, bool_t nosignal)
1567*0Sstevel@tonic-gate {
1568*0Sstevel@tonic-gate 	if (nosignal == TRUE) {
1569*0Sstevel@tonic-gate 		delay(ticks);
1570*0Sstevel@tonic-gate 		return (0);
1571*0Sstevel@tonic-gate 	}
1572*0Sstevel@tonic-gate 	return (delay_sig(ticks));
1573*0Sstevel@tonic-gate }
1574*0Sstevel@tonic-gate 
1575*0Sstevel@tonic-gate /*
1576*0Sstevel@tonic-gate  * Wait for a connection until a timeout, or until we are
1577*0Sstevel@tonic-gate  * signalled that there has been a connection state change.
1578*0Sstevel@tonic-gate  */
1579*0Sstevel@tonic-gate static enum clnt_stat
1580*0Sstevel@tonic-gate connmgr_cwait(struct cm_xprt *cm_entry, const struct timeval *waitp,
1581*0Sstevel@tonic-gate 	bool_t nosignal)
1582*0Sstevel@tonic-gate {
1583*0Sstevel@tonic-gate 	bool_t interrupted;
1584*0Sstevel@tonic-gate 	clock_t timout, cv_stat;
1585*0Sstevel@tonic-gate 	enum clnt_stat clstat;
1586*0Sstevel@tonic-gate 	unsigned int old_state;
1587*0Sstevel@tonic-gate 
1588*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connmgr_lock));
1589*0Sstevel@tonic-gate 	/*
1590*0Sstevel@tonic-gate 	 * We wait for the transport connection to be made, or an
1591*0Sstevel@tonic-gate 	 * indication that it could not be made.
1592*0Sstevel@tonic-gate 	 */
1593*0Sstevel@tonic-gate 	clstat = RPC_TIMEDOUT;
1594*0Sstevel@tonic-gate 	interrupted = FALSE;
1595*0Sstevel@tonic-gate 
1596*0Sstevel@tonic-gate 	old_state = cm_entry->x_state_flags;
1597*0Sstevel@tonic-gate 	/*
1598*0Sstevel@tonic-gate 	 * Now loop until cv_timedwait{_sig} returns because of
1599*0Sstevel@tonic-gate 	 * a signal(0) or timeout(-1) or cv_signal(>0). But it may be
1600*0Sstevel@tonic-gate 	 * cv_signalled for various other reasons too. So loop
1601*0Sstevel@tonic-gate 	 * until there is a state change on the connection.
1602*0Sstevel@tonic-gate 	 */
1603*0Sstevel@tonic-gate 
1604*0Sstevel@tonic-gate 	timout = waitp->tv_sec * drv_usectohz(1000000) +
1605*0Sstevel@tonic-gate 	    drv_usectohz(waitp->tv_usec) + lbolt;
1606*0Sstevel@tonic-gate 
1607*0Sstevel@tonic-gate 	if (nosignal) {
1608*0Sstevel@tonic-gate 		while ((cv_stat = cv_timedwait(&cm_entry->x_conn_cv,
1609*0Sstevel@tonic-gate 		    &connmgr_lock, timout)) > 0 &&
1610*0Sstevel@tonic-gate 		    cm_entry->x_state_flags == old_state)
1611*0Sstevel@tonic-gate 			;
1612*0Sstevel@tonic-gate 	} else {
1613*0Sstevel@tonic-gate 		while ((cv_stat = cv_timedwait_sig(&cm_entry->x_conn_cv,
1614*0Sstevel@tonic-gate 		    &connmgr_lock, timout)) > 0 &&
1615*0Sstevel@tonic-gate 		    cm_entry->x_state_flags == old_state)
1616*0Sstevel@tonic-gate 			;
1617*0Sstevel@tonic-gate 
1618*0Sstevel@tonic-gate 		if (cv_stat == 0) /* got intr signal? */
1619*0Sstevel@tonic-gate 			interrupted = TRUE;
1620*0Sstevel@tonic-gate 	}
1621*0Sstevel@tonic-gate 
1622*0Sstevel@tonic-gate 	if ((cm_entry->x_state_flags & (X_BADSTATES|X_CONNECTED)) ==
1623*0Sstevel@tonic-gate 	    X_CONNECTED) {
1624*0Sstevel@tonic-gate 		clstat = RPC_SUCCESS;
1625*0Sstevel@tonic-gate 	} else {
1626*0Sstevel@tonic-gate 		if (interrupted == TRUE)
1627*0Sstevel@tonic-gate 			clstat = RPC_INTR;
1628*0Sstevel@tonic-gate 		RPCLOG(1, "connmgr_cwait: can't connect, error: %s\n",
1629*0Sstevel@tonic-gate 		    clnt_sperrno(clstat));
1630*0Sstevel@tonic-gate 	}
1631*0Sstevel@tonic-gate 
1632*0Sstevel@tonic-gate 	return (clstat);
1633*0Sstevel@tonic-gate }
1634*0Sstevel@tonic-gate 
1635*0Sstevel@tonic-gate /*
1636*0Sstevel@tonic-gate  * Primary interface for how RPC grabs a connection.
1637*0Sstevel@tonic-gate  */
1638*0Sstevel@tonic-gate static struct cm_xprt *
1639*0Sstevel@tonic-gate connmgr_wrapget(
1640*0Sstevel@tonic-gate 	struct netbuf *retryaddr,
1641*0Sstevel@tonic-gate 	const struct timeval *waitp,
1642*0Sstevel@tonic-gate 	cku_private_t *p)
1643*0Sstevel@tonic-gate {
1644*0Sstevel@tonic-gate 	struct cm_xprt *cm_entry;
1645*0Sstevel@tonic-gate 
1646*0Sstevel@tonic-gate 	cm_entry = connmgr_get(retryaddr, waitp, &p->cku_addr, p->cku_addrfmly,
1647*0Sstevel@tonic-gate 	    &p->cku_srcaddr, &p->cku_err, p->cku_device,
1648*0Sstevel@tonic-gate 	    p->cku_client.cl_nosignal, p->cku_useresvport);
1649*0Sstevel@tonic-gate 
1650*0Sstevel@tonic-gate 	if (cm_entry == NULL) {
1651*0Sstevel@tonic-gate 		/*
1652*0Sstevel@tonic-gate 		 * Re-map the call status to RPC_INTR if the err code is
1653*0Sstevel@tonic-gate 		 * EINTR. This can happen if calls status is RPC_TLIERROR.
1654*0Sstevel@tonic-gate 		 * However, don't re-map if signalling has been turned off.
1655*0Sstevel@tonic-gate 		 * XXX Really need to create a separate thread whenever
1656*0Sstevel@tonic-gate 		 * there isn't an existing connection.
1657*0Sstevel@tonic-gate 		 */
1658*0Sstevel@tonic-gate 		if (p->cku_err.re_errno == EINTR) {
1659*0Sstevel@tonic-gate 			if (p->cku_client.cl_nosignal == TRUE)
1660*0Sstevel@tonic-gate 				p->cku_err.re_errno = EIO;
1661*0Sstevel@tonic-gate 			else
1662*0Sstevel@tonic-gate 				p->cku_err.re_status = RPC_INTR;
1663*0Sstevel@tonic-gate 		}
1664*0Sstevel@tonic-gate 	}
1665*0Sstevel@tonic-gate 
1666*0Sstevel@tonic-gate 	return (cm_entry);
1667*0Sstevel@tonic-gate }
1668*0Sstevel@tonic-gate 
1669*0Sstevel@tonic-gate /*
1670*0Sstevel@tonic-gate  * Obtains a transport to the server specified in addr.  If a suitable transport
1671*0Sstevel@tonic-gate  * does not already exist in the list of cached transports, a new connection
1672*0Sstevel@tonic-gate  * is created, connected, and added to the list. The connection is for sending
1673*0Sstevel@tonic-gate  * only - the reply message may come back on another transport connection.
1674*0Sstevel@tonic-gate  */
1675*0Sstevel@tonic-gate static struct cm_xprt *
1676*0Sstevel@tonic-gate connmgr_get(
1677*0Sstevel@tonic-gate 	struct netbuf	*retryaddr,
1678*0Sstevel@tonic-gate 	const struct timeval	*waitp,	/* changed to a ptr to converse stack */
1679*0Sstevel@tonic-gate 	struct netbuf	*destaddr,
1680*0Sstevel@tonic-gate 	int		addrfmly,
1681*0Sstevel@tonic-gate 	struct netbuf	*srcaddr,
1682*0Sstevel@tonic-gate 	struct rpc_err	*rpcerr,
1683*0Sstevel@tonic-gate 	dev_t		device,
1684*0Sstevel@tonic-gate 	bool_t		nosignal,
1685*0Sstevel@tonic-gate 	int		useresvport)
1686*0Sstevel@tonic-gate {
1687*0Sstevel@tonic-gate 	struct cm_xprt *cm_entry;
1688*0Sstevel@tonic-gate 	struct cm_xprt *lru_entry;
1689*0Sstevel@tonic-gate 	struct cm_xprt **cmp;
1690*0Sstevel@tonic-gate 	queue_t *wq;
1691*0Sstevel@tonic-gate 	TIUSER *tiptr;
1692*0Sstevel@tonic-gate 	int i;
1693*0Sstevel@tonic-gate 	int retval;
1694*0Sstevel@tonic-gate 	clock_t prev_time;
1695*0Sstevel@tonic-gate 	int tidu_size;
1696*0Sstevel@tonic-gate 	bool_t	connected;
1697*0Sstevel@tonic-gate 	zoneid_t zoneid = getzoneid();
1698*0Sstevel@tonic-gate 
1699*0Sstevel@tonic-gate 	/*
1700*0Sstevel@tonic-gate 	 * If the call is not a retry, look for a transport entry that
1701*0Sstevel@tonic-gate 	 * goes to the server of interest.
1702*0Sstevel@tonic-gate 	 */
1703*0Sstevel@tonic-gate 	mutex_enter(&connmgr_lock);
1704*0Sstevel@tonic-gate 
1705*0Sstevel@tonic-gate 	if (retryaddr == NULL) {
1706*0Sstevel@tonic-gate use_new_conn:
1707*0Sstevel@tonic-gate 		i = 0;
1708*0Sstevel@tonic-gate 		cm_entry = lru_entry = NULL;
1709*0Sstevel@tonic-gate 		prev_time = lbolt;
1710*0Sstevel@tonic-gate 
1711*0Sstevel@tonic-gate 		cmp = &cm_hd;
1712*0Sstevel@tonic-gate 		while ((cm_entry = *cmp) != NULL) {
1713*0Sstevel@tonic-gate 			ASSERT(cm_entry != cm_entry->x_next);
1714*0Sstevel@tonic-gate 			/*
1715*0Sstevel@tonic-gate 			 * Garbage collect conections that are marked
1716*0Sstevel@tonic-gate 			 * for needs disconnect.
1717*0Sstevel@tonic-gate 			 */
1718*0Sstevel@tonic-gate 			if (cm_entry->x_needdis) {
1719*0Sstevel@tonic-gate 				connmgr_dis_and_wait(cm_entry);
1720*0Sstevel@tonic-gate 				/*
1721*0Sstevel@tonic-gate 				 * connmgr_lock could have been
1722*0Sstevel@tonic-gate 				 * dropped for the disconnect
1723*0Sstevel@tonic-gate 				 * processing so start over.
1724*0Sstevel@tonic-gate 				 */
1725*0Sstevel@tonic-gate 				goto use_new_conn;
1726*0Sstevel@tonic-gate 			}
1727*0Sstevel@tonic-gate 
1728*0Sstevel@tonic-gate 			/*
1729*0Sstevel@tonic-gate 			 * Garbage collect the dead connections that have
1730*0Sstevel@tonic-gate 			 * no threads working on them.
1731*0Sstevel@tonic-gate 			 */
1732*0Sstevel@tonic-gate 			if ((cm_entry->x_state_flags & (X_DEAD|X_THREAD)) ==
1733*0Sstevel@tonic-gate 			    X_DEAD) {
1734*0Sstevel@tonic-gate 				*cmp = cm_entry->x_next;
1735*0Sstevel@tonic-gate 				mutex_exit(&connmgr_lock);
1736*0Sstevel@tonic-gate 				connmgr_close(cm_entry);
1737*0Sstevel@tonic-gate 				mutex_enter(&connmgr_lock);
1738*0Sstevel@tonic-gate 				goto use_new_conn;
1739*0Sstevel@tonic-gate 			}
1740*0Sstevel@tonic-gate 
1741*0Sstevel@tonic-gate 
1742*0Sstevel@tonic-gate 			if ((cm_entry->x_state_flags & X_BADSTATES) == 0 &&
1743*0Sstevel@tonic-gate 			    cm_entry->x_zoneid == zoneid &&
1744*0Sstevel@tonic-gate 			    cm_entry->x_rdev == device &&
1745*0Sstevel@tonic-gate 			    destaddr->len == cm_entry->x_server.len &&
1746*0Sstevel@tonic-gate 			    bcmp(destaddr->buf, cm_entry->x_server.buf,
1747*0Sstevel@tonic-gate 			    destaddr->len) == 0) {
1748*0Sstevel@tonic-gate 				/*
1749*0Sstevel@tonic-gate 				 * If the matching entry isn't connected,
1750*0Sstevel@tonic-gate 				 * attempt to reconnect it.
1751*0Sstevel@tonic-gate 				 */
1752*0Sstevel@tonic-gate 				if (cm_entry->x_connected == FALSE) {
1753*0Sstevel@tonic-gate 					/*
1754*0Sstevel@tonic-gate 					 * We don't go through trying
1755*0Sstevel@tonic-gate 					 * to find the least recently
1756*0Sstevel@tonic-gate 					 * used connected because
1757*0Sstevel@tonic-gate 					 * connmgr_reconnect() briefly
1758*0Sstevel@tonic-gate 					 * dropped the connmgr_lock,
1759*0Sstevel@tonic-gate 					 * allowing a window for our
1760*0Sstevel@tonic-gate 					 * accounting to be messed up.
1761*0Sstevel@tonic-gate 					 * In any case, a re-connected
1762*0Sstevel@tonic-gate 					 * connection is as good as
1763*0Sstevel@tonic-gate 					 * a LRU connection.
1764*0Sstevel@tonic-gate 					 */
1765*0Sstevel@tonic-gate 					return (connmgr_wrapconnect(cm_entry,
1766*0Sstevel@tonic-gate 					    waitp, destaddr, addrfmly, srcaddr,
1767*0Sstevel@tonic-gate 					    rpcerr, TRUE, nosignal));
1768*0Sstevel@tonic-gate 				}
1769*0Sstevel@tonic-gate 				i++;
1770*0Sstevel@tonic-gate 				if (cm_entry->x_time - prev_time <= 0 ||
1771*0Sstevel@tonic-gate 				    lru_entry == NULL) {
1772*0Sstevel@tonic-gate 					prev_time = cm_entry->x_time;
1773*0Sstevel@tonic-gate 					lru_entry = cm_entry;
1774*0Sstevel@tonic-gate 				}
1775*0Sstevel@tonic-gate 			}
1776*0Sstevel@tonic-gate 			cmp = &cm_entry->x_next;
1777*0Sstevel@tonic-gate 		}
1778*0Sstevel@tonic-gate 
1779*0Sstevel@tonic-gate 		if (i > clnt_max_conns) {
1780*0Sstevel@tonic-gate 			RPCLOG(8, "connmgr_get: too many conns, dooming entry"
1781*0Sstevel@tonic-gate 			    " %p\n", (void *)lru_entry->x_tiptr);
1782*0Sstevel@tonic-gate 			lru_entry->x_doomed = TRUE;
1783*0Sstevel@tonic-gate 			goto use_new_conn;
1784*0Sstevel@tonic-gate 		}
1785*0Sstevel@tonic-gate 
1786*0Sstevel@tonic-gate 		/*
1787*0Sstevel@tonic-gate 		 * If we are at the maximum number of connections to
1788*0Sstevel@tonic-gate 		 * the server, hand back the least recently used one.
1789*0Sstevel@tonic-gate 		 */
1790*0Sstevel@tonic-gate 		if (i == clnt_max_conns) {
1791*0Sstevel@tonic-gate 			/*
1792*0Sstevel@tonic-gate 			 * Copy into the handle the source address of
1793*0Sstevel@tonic-gate 			 * the connection, which we will use in case of
1794*0Sstevel@tonic-gate 			 * a later retry.
1795*0Sstevel@tonic-gate 			 */
1796*0Sstevel@tonic-gate 			if (srcaddr->len != lru_entry->x_src.len) {
1797*0Sstevel@tonic-gate 				if (srcaddr->len > 0)
1798*0Sstevel@tonic-gate 					kmem_free(srcaddr->buf,
1799*0Sstevel@tonic-gate 					    srcaddr->maxlen);
1800*0Sstevel@tonic-gate 				srcaddr->buf = kmem_zalloc(
1801*0Sstevel@tonic-gate 				    lru_entry->x_src.len, KM_SLEEP);
1802*0Sstevel@tonic-gate 				srcaddr->maxlen = srcaddr->len =
1803*0Sstevel@tonic-gate 				    lru_entry->x_src.len;
1804*0Sstevel@tonic-gate 			}
1805*0Sstevel@tonic-gate 			bcopy(lru_entry->x_src.buf, srcaddr->buf, srcaddr->len);
1806*0Sstevel@tonic-gate 			RPCLOG(2, "connmgr_get: call going out on %p\n",
1807*0Sstevel@tonic-gate 			    (void *)lru_entry);
1808*0Sstevel@tonic-gate 			lru_entry->x_time = lbolt;
1809*0Sstevel@tonic-gate 			CONN_HOLD(lru_entry);
1810*0Sstevel@tonic-gate 			mutex_exit(&connmgr_lock);
1811*0Sstevel@tonic-gate 			return (lru_entry);
1812*0Sstevel@tonic-gate 		}
1813*0Sstevel@tonic-gate 
1814*0Sstevel@tonic-gate 	} else {
1815*0Sstevel@tonic-gate 		/*
1816*0Sstevel@tonic-gate 		 * This is the retry case (retryaddr != NULL).  Retries must
1817*0Sstevel@tonic-gate 		 * be sent on the same source port as the original call.
1818*0Sstevel@tonic-gate 		 */
1819*0Sstevel@tonic-gate 
1820*0Sstevel@tonic-gate 		/*
1821*0Sstevel@tonic-gate 		 * Walk the list looking for a connection with a source address
1822*0Sstevel@tonic-gate 		 * that matches the retry address.
1823*0Sstevel@tonic-gate 		 */
1824*0Sstevel@tonic-gate 		cmp = &cm_hd;
1825*0Sstevel@tonic-gate 		while ((cm_entry = *cmp) != NULL) {
1826*0Sstevel@tonic-gate 			ASSERT(cm_entry != cm_entry->x_next);
1827*0Sstevel@tonic-gate 			if (zoneid != cm_entry->x_zoneid ||
1828*0Sstevel@tonic-gate 			    device != cm_entry->x_rdev ||
1829*0Sstevel@tonic-gate 			    retryaddr->len != cm_entry->x_src.len ||
1830*0Sstevel@tonic-gate 			    bcmp(retryaddr->buf, cm_entry->x_src.buf,
1831*0Sstevel@tonic-gate 				    retryaddr->len) != 0) {
1832*0Sstevel@tonic-gate 				cmp = &cm_entry->x_next;
1833*0Sstevel@tonic-gate 				continue;
1834*0Sstevel@tonic-gate 			}
1835*0Sstevel@tonic-gate 
1836*0Sstevel@tonic-gate 			/*
1837*0Sstevel@tonic-gate 			 * Sanity check: if the connection with our source
1838*0Sstevel@tonic-gate 			 * port is going to some other server, something went
1839*0Sstevel@tonic-gate 			 * wrong, as we never delete connections (i.e. release
1840*0Sstevel@tonic-gate 			 * ports) unless they have been idle.  In this case,
1841*0Sstevel@tonic-gate 			 * it is probably better to send the call out using
1842*0Sstevel@tonic-gate 			 * a new source address than to fail it altogether,
1843*0Sstevel@tonic-gate 			 * since that port may never be released.
1844*0Sstevel@tonic-gate 			 */
1845*0Sstevel@tonic-gate 			if (destaddr->len != cm_entry->x_server.len ||
1846*0Sstevel@tonic-gate 				bcmp(destaddr->buf, cm_entry->x_server.buf,
1847*0Sstevel@tonic-gate 					destaddr->len) != 0) {
1848*0Sstevel@tonic-gate 				RPCLOG(1, "connmgr_get: tiptr %p"
1849*0Sstevel@tonic-gate 				    " is going to a different server"
1850*0Sstevel@tonic-gate 				    " with the port that belongs"
1851*0Sstevel@tonic-gate 				    " to us!\n", (void *)cm_entry->x_tiptr);
1852*0Sstevel@tonic-gate 				retryaddr = NULL;
1853*0Sstevel@tonic-gate 				goto use_new_conn;
1854*0Sstevel@tonic-gate 			}
1855*0Sstevel@tonic-gate 
1856*0Sstevel@tonic-gate 			/*
1857*0Sstevel@tonic-gate 			 * If the connection of interest is not connected and we
1858*0Sstevel@tonic-gate 			 * can't reconnect it, then the server is probably
1859*0Sstevel@tonic-gate 			 * still down.  Return NULL to the caller and let it
1860*0Sstevel@tonic-gate 			 * retry later if it wants to.  We have a delay so the
1861*0Sstevel@tonic-gate 			 * machine doesn't go into a tight retry loop.  If the
1862*0Sstevel@tonic-gate 			 * entry was already connected, or the reconnected was
1863*0Sstevel@tonic-gate 			 * successful, return this entry.
1864*0Sstevel@tonic-gate 			 */
1865*0Sstevel@tonic-gate 			if (cm_entry->x_connected == FALSE) {
1866*0Sstevel@tonic-gate 				return (connmgr_wrapconnect(cm_entry,
1867*0Sstevel@tonic-gate 				    waitp, destaddr, addrfmly, NULL,
1868*0Sstevel@tonic-gate 				    rpcerr, TRUE, nosignal));
1869*0Sstevel@tonic-gate 			} else {
1870*0Sstevel@tonic-gate 				CONN_HOLD(cm_entry);
1871*0Sstevel@tonic-gate 
1872*0Sstevel@tonic-gate 				cm_entry->x_time = lbolt;
1873*0Sstevel@tonic-gate 				mutex_exit(&connmgr_lock);
1874*0Sstevel@tonic-gate 				RPCLOG(2, "connmgr_get: found old "
1875*0Sstevel@tonic-gate 				    "transport %p for retry\n",
1876*0Sstevel@tonic-gate 				    (void *)cm_entry);
1877*0Sstevel@tonic-gate 				return (cm_entry);
1878*0Sstevel@tonic-gate 			}
1879*0Sstevel@tonic-gate 		}
1880*0Sstevel@tonic-gate 
1881*0Sstevel@tonic-gate 		/*
1882*0Sstevel@tonic-gate 		 * We cannot find an entry in the list for this retry.
1883*0Sstevel@tonic-gate 		 * Either the entry has been removed temporarily to be
1884*0Sstevel@tonic-gate 		 * reconnected by another thread, or the original call
1885*0Sstevel@tonic-gate 		 * got a port but never got connected,
1886*0Sstevel@tonic-gate 		 * and hence the transport never got put in the
1887*0Sstevel@tonic-gate 		 * list.  Fall through to the "create new connection" code -
1888*0Sstevel@tonic-gate 		 * the former case will fail there trying to rebind the port,
1889*0Sstevel@tonic-gate 		 * and the later case (and any other pathological cases) will
1890*0Sstevel@tonic-gate 		 * rebind and reconnect and not hang the client machine.
1891*0Sstevel@tonic-gate 		 */
1892*0Sstevel@tonic-gate 		RPCLOG0(8, "connmgr_get: no entry in list for retry\n");
1893*0Sstevel@tonic-gate 	}
1894*0Sstevel@tonic-gate 	/*
1895*0Sstevel@tonic-gate 	 * Set up a transport entry in the connection manager's list.
1896*0Sstevel@tonic-gate 	 */
1897*0Sstevel@tonic-gate 	cm_entry = (struct cm_xprt *)
1898*0Sstevel@tonic-gate 	    kmem_zalloc(sizeof (struct cm_xprt), KM_SLEEP);
1899*0Sstevel@tonic-gate 
1900*0Sstevel@tonic-gate 	cm_entry->x_server.buf = kmem_zalloc(destaddr->len, KM_SLEEP);
1901*0Sstevel@tonic-gate 	bcopy(destaddr->buf, cm_entry->x_server.buf, destaddr->len);
1902*0Sstevel@tonic-gate 	cm_entry->x_server.len = cm_entry->x_server.maxlen = destaddr->len;
1903*0Sstevel@tonic-gate 
1904*0Sstevel@tonic-gate 	cm_entry->x_state_flags = X_THREAD;
1905*0Sstevel@tonic-gate 	cm_entry->x_ref = 1;
1906*0Sstevel@tonic-gate 	cm_entry->x_family = addrfmly;
1907*0Sstevel@tonic-gate 	cm_entry->x_rdev = device;
1908*0Sstevel@tonic-gate 	cm_entry->x_zoneid = zoneid;
1909*0Sstevel@tonic-gate 	mutex_init(&cm_entry->x_lock, NULL, MUTEX_DEFAULT, NULL);
1910*0Sstevel@tonic-gate 	cv_init(&cm_entry->x_cv, NULL, CV_DEFAULT, NULL);
1911*0Sstevel@tonic-gate 	cv_init(&cm_entry->x_conn_cv, NULL, CV_DEFAULT, NULL);
1912*0Sstevel@tonic-gate 	cv_init(&cm_entry->x_dis_cv, NULL, CV_DEFAULT, NULL);
1913*0Sstevel@tonic-gate 
1914*0Sstevel@tonic-gate 	/*
1915*0Sstevel@tonic-gate 	 * Note that we add this partially initialized entry to the
1916*0Sstevel@tonic-gate 	 * connection list. This is so that we don't have connections to
1917*0Sstevel@tonic-gate 	 * the same server.
1918*0Sstevel@tonic-gate 	 *
1919*0Sstevel@tonic-gate 	 * Note that x_src is not initialized at this point. This is because
1920*0Sstevel@tonic-gate 	 * retryaddr might be NULL in which case x_src is whatever
1921*0Sstevel@tonic-gate 	 * t_kbind/bindresvport gives us. If another thread wants a
1922*0Sstevel@tonic-gate 	 * connection to the same server, seemingly we have an issue, but we
1923*0Sstevel@tonic-gate 	 * don't. If the other thread comes in with retryaddr == NULL, then it
1924*0Sstevel@tonic-gate 	 * will never look at x_src, and it will end up waiting in
1925*0Sstevel@tonic-gate 	 * connmgr_cwait() for the first thread to finish the connection
1926*0Sstevel@tonic-gate 	 * attempt. If the other thread comes in with retryaddr != NULL, then
1927*0Sstevel@tonic-gate 	 * that means there was a request sent on a connection, in which case
1928*0Sstevel@tonic-gate 	 * the the connection should already exist. Thus the first thread
1929*0Sstevel@tonic-gate 	 * never gets here ... it finds the connection it its server in the
1930*0Sstevel@tonic-gate 	 * connection list.
1931*0Sstevel@tonic-gate 	 *
1932*0Sstevel@tonic-gate 	 * But even if theory is wrong, in the retryaddr != NULL case, the 2nd
1933*0Sstevel@tonic-gate 	 * thread will skip us because x_src.len == 0.
1934*0Sstevel@tonic-gate 	 */
1935*0Sstevel@tonic-gate 	cm_entry->x_next = cm_hd;
1936*0Sstevel@tonic-gate 	cm_hd = cm_entry;
1937*0Sstevel@tonic-gate 	mutex_exit(&connmgr_lock);
1938*0Sstevel@tonic-gate 
1939*0Sstevel@tonic-gate 	/*
1940*0Sstevel@tonic-gate 	 * Either we didn't find an entry to the server of interest, or we
1941*0Sstevel@tonic-gate 	 * don't have the maximum number of connections to that server -
1942*0Sstevel@tonic-gate 	 * create a new connection.
1943*0Sstevel@tonic-gate 	 */
1944*0Sstevel@tonic-gate 	RPCLOG0(8, "connmgr_get: creating new connection\n");
1945*0Sstevel@tonic-gate 	rpcerr->re_status = RPC_TLIERROR;
1946*0Sstevel@tonic-gate 
1947*0Sstevel@tonic-gate 	i = t_kopen(NULL, device, FREAD|FWRITE|FNDELAY, &tiptr, kcred);
1948*0Sstevel@tonic-gate 	if (i) {
1949*0Sstevel@tonic-gate 		RPCLOG(1, "connmgr_get: can't open cots device, error %d\n", i);
1950*0Sstevel@tonic-gate 		rpcerr->re_errno = i;
1951*0Sstevel@tonic-gate 		connmgr_cancelconn(cm_entry);
1952*0Sstevel@tonic-gate 		return (NULL);
1953*0Sstevel@tonic-gate 	}
1954*0Sstevel@tonic-gate 	rpc_poptimod(tiptr->fp->f_vnode);
1955*0Sstevel@tonic-gate 
1956*0Sstevel@tonic-gate 	if (i = strioctl(tiptr->fp->f_vnode, I_PUSH, (intptr_t)"rpcmod", 0,
1957*0Sstevel@tonic-gate 			K_TO_K, kcred, &retval)) {
1958*0Sstevel@tonic-gate 		RPCLOG(1, "connmgr_get: can't push cots module, %d\n", i);
1959*0Sstevel@tonic-gate 		(void) t_kclose(tiptr, 1);
1960*0Sstevel@tonic-gate 		rpcerr->re_errno = i;
1961*0Sstevel@tonic-gate 		connmgr_cancelconn(cm_entry);
1962*0Sstevel@tonic-gate 		return (NULL);
1963*0Sstevel@tonic-gate 	}
1964*0Sstevel@tonic-gate 
1965*0Sstevel@tonic-gate 	if (i = strioctl(tiptr->fp->f_vnode, RPC_CLIENT, 0, 0, K_TO_K,
1966*0Sstevel@tonic-gate 		kcred, &retval)) {
1967*0Sstevel@tonic-gate 		RPCLOG(1, "connmgr_get: can't set client status with cots "
1968*0Sstevel@tonic-gate 		    "module, %d\n", i);
1969*0Sstevel@tonic-gate 		(void) t_kclose(tiptr, 1);
1970*0Sstevel@tonic-gate 		rpcerr->re_errno = i;
1971*0Sstevel@tonic-gate 		connmgr_cancelconn(cm_entry);
1972*0Sstevel@tonic-gate 		return (NULL);
1973*0Sstevel@tonic-gate 	}
1974*0Sstevel@tonic-gate 
1975*0Sstevel@tonic-gate 	mutex_enter(&connmgr_lock);
1976*0Sstevel@tonic-gate 
1977*0Sstevel@tonic-gate 	wq = tiptr->fp->f_vnode->v_stream->sd_wrq->q_next;
1978*0Sstevel@tonic-gate 	cm_entry->x_wq = wq;
1979*0Sstevel@tonic-gate 
1980*0Sstevel@tonic-gate 	mutex_exit(&connmgr_lock);
1981*0Sstevel@tonic-gate 
1982*0Sstevel@tonic-gate 	if (i = strioctl(tiptr->fp->f_vnode, I_PUSH, (intptr_t)"timod", 0,
1983*0Sstevel@tonic-gate 			K_TO_K, kcred, &retval)) {
1984*0Sstevel@tonic-gate 		RPCLOG(1, "connmgr_get: can't push timod, %d\n", i);
1985*0Sstevel@tonic-gate 		(void) t_kclose(tiptr, 1);
1986*0Sstevel@tonic-gate 		rpcerr->re_errno = i;
1987*0Sstevel@tonic-gate 		connmgr_cancelconn(cm_entry);
1988*0Sstevel@tonic-gate 		return (NULL);
1989*0Sstevel@tonic-gate 	}
1990*0Sstevel@tonic-gate 
1991*0Sstevel@tonic-gate 	/*
1992*0Sstevel@tonic-gate 	 * If the caller has not specified reserved port usage then
1993*0Sstevel@tonic-gate 	 * take the system default.
1994*0Sstevel@tonic-gate 	 */
1995*0Sstevel@tonic-gate 	if (useresvport == -1)
1996*0Sstevel@tonic-gate 		useresvport = clnt_cots_do_bindresvport;
1997*0Sstevel@tonic-gate 
1998*0Sstevel@tonic-gate 	if ((useresvport || retryaddr != NULL) &&
1999*0Sstevel@tonic-gate 	    (addrfmly == AF_INET || addrfmly == AF_INET6)) {
2000*0Sstevel@tonic-gate 		bool_t alloc_src = FALSE;
2001*0Sstevel@tonic-gate 
2002*0Sstevel@tonic-gate 		if (srcaddr->len != destaddr->len) {
2003*0Sstevel@tonic-gate 			kmem_free(srcaddr->buf, srcaddr->maxlen);
2004*0Sstevel@tonic-gate 			srcaddr->buf = kmem_zalloc(destaddr->len, KM_SLEEP);
2005*0Sstevel@tonic-gate 			srcaddr->maxlen = destaddr->len;
2006*0Sstevel@tonic-gate 			srcaddr->len = destaddr->len;
2007*0Sstevel@tonic-gate 			alloc_src = TRUE;
2008*0Sstevel@tonic-gate 		}
2009*0Sstevel@tonic-gate 
2010*0Sstevel@tonic-gate 		if ((i = bindresvport(tiptr, retryaddr, srcaddr, TRUE)) != 0) {
2011*0Sstevel@tonic-gate 			(void) t_kclose(tiptr, 1);
2012*0Sstevel@tonic-gate 			RPCLOG(1, "connmgr_get: couldn't bind, retryaddr: "
2013*0Sstevel@tonic-gate 				"%p\n", (void *)retryaddr);
2014*0Sstevel@tonic-gate 
2015*0Sstevel@tonic-gate 			/*
2016*0Sstevel@tonic-gate 			 * 1225408: If we allocated a source address, then it
2017*0Sstevel@tonic-gate 			 * is either garbage or all zeroes. In that case
2018*0Sstevel@tonic-gate 			 * we need to clear srcaddr.
2019*0Sstevel@tonic-gate 			 */
2020*0Sstevel@tonic-gate 			if (alloc_src == TRUE) {
2021*0Sstevel@tonic-gate 				kmem_free(srcaddr->buf, srcaddr->maxlen);
2022*0Sstevel@tonic-gate 				srcaddr->maxlen = srcaddr->len = 0;
2023*0Sstevel@tonic-gate 				srcaddr->buf = NULL;
2024*0Sstevel@tonic-gate 			}
2025*0Sstevel@tonic-gate 			rpcerr->re_errno = i;
2026*0Sstevel@tonic-gate 			connmgr_cancelconn(cm_entry);
2027*0Sstevel@tonic-gate 			return (NULL);
2028*0Sstevel@tonic-gate 		}
2029*0Sstevel@tonic-gate 	} else {
2030*0Sstevel@tonic-gate 		if ((i = t_kbind(tiptr, NULL, NULL)) != 0) {
2031*0Sstevel@tonic-gate 			RPCLOG(1, "clnt_cots_kcreate: t_kbind: %d\n", i);
2032*0Sstevel@tonic-gate 			(void) t_kclose(tiptr, 1);
2033*0Sstevel@tonic-gate 			rpcerr->re_errno = i;
2034*0Sstevel@tonic-gate 			connmgr_cancelconn(cm_entry);
2035*0Sstevel@tonic-gate 			return (NULL);
2036*0Sstevel@tonic-gate 		}
2037*0Sstevel@tonic-gate 	}
2038*0Sstevel@tonic-gate 
2039*0Sstevel@tonic-gate 	{
2040*0Sstevel@tonic-gate 		/*
2041*0Sstevel@tonic-gate 		 * Keep the kernel stack lean. Don't move this call
2042*0Sstevel@tonic-gate 		 * declaration to the top of this function because a
2043*0Sstevel@tonic-gate 		 * call is declared in connmgr_wrapconnect()
2044*0Sstevel@tonic-gate 		 */
2045*0Sstevel@tonic-gate 		calllist_t call;
2046*0Sstevel@tonic-gate 
2047*0Sstevel@tonic-gate 		bzero(&call, sizeof (call));
2048*0Sstevel@tonic-gate 		cv_init(&call.call_cv, NULL, CV_DEFAULT, NULL);
2049*0Sstevel@tonic-gate 
2050*0Sstevel@tonic-gate 		/*
2051*0Sstevel@tonic-gate 		 * This is a bound end-point so don't close it's stream.
2052*0Sstevel@tonic-gate 		 */
2053*0Sstevel@tonic-gate 		connected = connmgr_connect(cm_entry, wq, destaddr, addrfmly,
2054*0Sstevel@tonic-gate 						&call, &tidu_size, FALSE, waitp,
2055*0Sstevel@tonic-gate 						nosignal);
2056*0Sstevel@tonic-gate 		*rpcerr = call.call_err;
2057*0Sstevel@tonic-gate 		cv_destroy(&call.call_cv);
2058*0Sstevel@tonic-gate 
2059*0Sstevel@tonic-gate 	}
2060*0Sstevel@tonic-gate 
2061*0Sstevel@tonic-gate 	mutex_enter(&connmgr_lock);
2062*0Sstevel@tonic-gate 
2063*0Sstevel@tonic-gate 	/*
2064*0Sstevel@tonic-gate 	 * Set up a transport entry in the connection manager's list.
2065*0Sstevel@tonic-gate 	 */
2066*0Sstevel@tonic-gate 	cm_entry->x_src.buf = kmem_zalloc(srcaddr->len, KM_SLEEP);
2067*0Sstevel@tonic-gate 	bcopy(srcaddr->buf, cm_entry->x_src.buf, srcaddr->len);
2068*0Sstevel@tonic-gate 	cm_entry->x_src.len = cm_entry->x_src.maxlen = srcaddr->len;
2069*0Sstevel@tonic-gate 
2070*0Sstevel@tonic-gate 	cm_entry->x_tiptr = tiptr;
2071*0Sstevel@tonic-gate 	cm_entry->x_time = lbolt;
2072*0Sstevel@tonic-gate 
2073*0Sstevel@tonic-gate 	if (tiptr->tp_info.servtype == T_COTS_ORD)
2074*0Sstevel@tonic-gate 		cm_entry->x_ordrel = TRUE;
2075*0Sstevel@tonic-gate 	else
2076*0Sstevel@tonic-gate 		cm_entry->x_ordrel = FALSE;
2077*0Sstevel@tonic-gate 
2078*0Sstevel@tonic-gate 	cm_entry->x_tidu_size = tidu_size;
2079*0Sstevel@tonic-gate 
2080*0Sstevel@tonic-gate 	if (cm_entry->x_early_disc)
2081*0Sstevel@tonic-gate 		cm_entry->x_connected = FALSE;
2082*0Sstevel@tonic-gate 	else
2083*0Sstevel@tonic-gate 		cm_entry->x_connected = connected;
2084*0Sstevel@tonic-gate 
2085*0Sstevel@tonic-gate 	/*
2086*0Sstevel@tonic-gate 	 * There could be a discrepancy here such that
2087*0Sstevel@tonic-gate 	 * x_early_disc is TRUE yet connected is TRUE as well
2088*0Sstevel@tonic-gate 	 * and the connection is actually connected. In that case
2089*0Sstevel@tonic-gate 	 * lets be conservative and declare the connection as not
2090*0Sstevel@tonic-gate 	 * connected.
2091*0Sstevel@tonic-gate 	 */
2092*0Sstevel@tonic-gate 	cm_entry->x_early_disc = FALSE;
2093*0Sstevel@tonic-gate 	cm_entry->x_needdis = (cm_entry->x_connected == FALSE);
2094*0Sstevel@tonic-gate 	cm_entry->x_ctime = lbolt;
2095*0Sstevel@tonic-gate 
2096*0Sstevel@tonic-gate 	/*
2097*0Sstevel@tonic-gate 	 * Notify any threads waiting that the connection attempt is done.
2098*0Sstevel@tonic-gate 	 */
2099*0Sstevel@tonic-gate 	cm_entry->x_thread = FALSE;
2100*0Sstevel@tonic-gate 	cv_broadcast(&cm_entry->x_conn_cv);
2101*0Sstevel@tonic-gate 
2102*0Sstevel@tonic-gate 	mutex_exit(&connmgr_lock);
2103*0Sstevel@tonic-gate 
2104*0Sstevel@tonic-gate 	if (cm_entry->x_connected == FALSE) {
2105*0Sstevel@tonic-gate 		connmgr_release(cm_entry);
2106*0Sstevel@tonic-gate 		return (NULL);
2107*0Sstevel@tonic-gate 	}
2108*0Sstevel@tonic-gate 	return (cm_entry);
2109*0Sstevel@tonic-gate }
2110*0Sstevel@tonic-gate 
2111*0Sstevel@tonic-gate /*
2112*0Sstevel@tonic-gate  * Keep the cm_xprt entry on the connecton list when making a connection. This
2113*0Sstevel@tonic-gate  * is to prevent multiple connections to a slow server from appearing.
2114*0Sstevel@tonic-gate  * We use the bit field x_thread to tell if a thread is doing a connection
2115*0Sstevel@tonic-gate  * which keeps other interested threads from messing with connection.
2116*0Sstevel@tonic-gate  * Those other threads just wait if x_thread is set.
2117*0Sstevel@tonic-gate  *
2118*0Sstevel@tonic-gate  * If x_thread is not set, then we do the actual work of connecting via
2119*0Sstevel@tonic-gate  * connmgr_connect().
2120*0Sstevel@tonic-gate  *
2121*0Sstevel@tonic-gate  * mutex convention: called with connmgr_lock held, returns with it released.
2122*0Sstevel@tonic-gate  */
2123*0Sstevel@tonic-gate static struct cm_xprt *
2124*0Sstevel@tonic-gate connmgr_wrapconnect(
2125*0Sstevel@tonic-gate 	struct cm_xprt	*cm_entry,
2126*0Sstevel@tonic-gate 	const struct timeval	*waitp,
2127*0Sstevel@tonic-gate 	struct netbuf	*destaddr,
2128*0Sstevel@tonic-gate 	int		addrfmly,
2129*0Sstevel@tonic-gate 	struct netbuf	*srcaddr,
2130*0Sstevel@tonic-gate 	struct rpc_err	*rpcerr,
2131*0Sstevel@tonic-gate 	bool_t		reconnect,
2132*0Sstevel@tonic-gate 	bool_t		nosignal)
2133*0Sstevel@tonic-gate {
2134*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connmgr_lock));
2135*0Sstevel@tonic-gate 	/*
2136*0Sstevel@tonic-gate 	 * Hold this entry as we are about to drop connmgr_lock.
2137*0Sstevel@tonic-gate 	 */
2138*0Sstevel@tonic-gate 	CONN_HOLD(cm_entry);
2139*0Sstevel@tonic-gate 
2140*0Sstevel@tonic-gate 	/*
2141*0Sstevel@tonic-gate 	 * If there is a thread already making a connection for us, then
2142*0Sstevel@tonic-gate 	 * wait for it to complete the connection.
2143*0Sstevel@tonic-gate 	 */
2144*0Sstevel@tonic-gate 	if (cm_entry->x_thread == TRUE) {
2145*0Sstevel@tonic-gate 		rpcerr->re_status = connmgr_cwait(cm_entry, waitp, nosignal);
2146*0Sstevel@tonic-gate 
2147*0Sstevel@tonic-gate 		if (rpcerr->re_status != RPC_SUCCESS) {
2148*0Sstevel@tonic-gate 			mutex_exit(&connmgr_lock);
2149*0Sstevel@tonic-gate 			connmgr_release(cm_entry);
2150*0Sstevel@tonic-gate 			return (NULL);
2151*0Sstevel@tonic-gate 		}
2152*0Sstevel@tonic-gate 	} else {
2153*0Sstevel@tonic-gate 		bool_t connected;
2154*0Sstevel@tonic-gate 		calllist_t call;
2155*0Sstevel@tonic-gate 
2156*0Sstevel@tonic-gate 		cm_entry->x_thread = TRUE;
2157*0Sstevel@tonic-gate 
2158*0Sstevel@tonic-gate 		while (cm_entry->x_needrel == TRUE) {
2159*0Sstevel@tonic-gate 			cm_entry->x_needrel = FALSE;
2160*0Sstevel@tonic-gate 
2161*0Sstevel@tonic-gate 			connmgr_sndrel(cm_entry);
2162*0Sstevel@tonic-gate 			delay(drv_usectohz(1000000));
2163*0Sstevel@tonic-gate 
2164*0Sstevel@tonic-gate 			mutex_enter(&connmgr_lock);
2165*0Sstevel@tonic-gate 		}
2166*0Sstevel@tonic-gate 
2167*0Sstevel@tonic-gate 		/*
2168*0Sstevel@tonic-gate 		 * If we need to send a T_DISCON_REQ, send one.
2169*0Sstevel@tonic-gate 		 */
2170*0Sstevel@tonic-gate 		connmgr_dis_and_wait(cm_entry);
2171*0Sstevel@tonic-gate 
2172*0Sstevel@tonic-gate 		mutex_exit(&connmgr_lock);
2173*0Sstevel@tonic-gate 
2174*0Sstevel@tonic-gate 		bzero(&call, sizeof (call));
2175*0Sstevel@tonic-gate 		cv_init(&call.call_cv, NULL, CV_DEFAULT, NULL);
2176*0Sstevel@tonic-gate 
2177*0Sstevel@tonic-gate 		connected = connmgr_connect(cm_entry, cm_entry->x_wq,
2178*0Sstevel@tonic-gate 					    destaddr, addrfmly, &call,
2179*0Sstevel@tonic-gate 					    &cm_entry->x_tidu_size,
2180*0Sstevel@tonic-gate 					    reconnect, waitp, nosignal);
2181*0Sstevel@tonic-gate 
2182*0Sstevel@tonic-gate 		*rpcerr = call.call_err;
2183*0Sstevel@tonic-gate 		cv_destroy(&call.call_cv);
2184*0Sstevel@tonic-gate 
2185*0Sstevel@tonic-gate 		mutex_enter(&connmgr_lock);
2186*0Sstevel@tonic-gate 
2187*0Sstevel@tonic-gate 
2188*0Sstevel@tonic-gate 		if (cm_entry->x_early_disc)
2189*0Sstevel@tonic-gate 			cm_entry->x_connected = FALSE;
2190*0Sstevel@tonic-gate 		else
2191*0Sstevel@tonic-gate 			cm_entry->x_connected = connected;
2192*0Sstevel@tonic-gate 
2193*0Sstevel@tonic-gate 		/*
2194*0Sstevel@tonic-gate 		 * There could be a discrepancy here such that
2195*0Sstevel@tonic-gate 		 * x_early_disc is TRUE yet connected is TRUE as well
2196*0Sstevel@tonic-gate 		 * and the connection is actually connected. In that case
2197*0Sstevel@tonic-gate 		 * lets be conservative and declare the connection as not
2198*0Sstevel@tonic-gate 		 * connected.
2199*0Sstevel@tonic-gate 		 */
2200*0Sstevel@tonic-gate 
2201*0Sstevel@tonic-gate 		cm_entry->x_early_disc = FALSE;
2202*0Sstevel@tonic-gate 		cm_entry->x_needdis = (cm_entry->x_connected == FALSE);
2203*0Sstevel@tonic-gate 
2204*0Sstevel@tonic-gate 
2205*0Sstevel@tonic-gate 		/*
2206*0Sstevel@tonic-gate 		 * connmgr_connect() may have given up before the connection
2207*0Sstevel@tonic-gate 		 * actually timed out. So ensure that before the next
2208*0Sstevel@tonic-gate 		 * connection attempt we do a disconnect.
2209*0Sstevel@tonic-gate 		 */
2210*0Sstevel@tonic-gate 		cm_entry->x_ctime = lbolt;
2211*0Sstevel@tonic-gate 		cm_entry->x_thread = FALSE;
2212*0Sstevel@tonic-gate 
2213*0Sstevel@tonic-gate 		cv_broadcast(&cm_entry->x_conn_cv);
2214*0Sstevel@tonic-gate 
2215*0Sstevel@tonic-gate 		if (cm_entry->x_connected == FALSE) {
2216*0Sstevel@tonic-gate 			mutex_exit(&connmgr_lock);
2217*0Sstevel@tonic-gate 			connmgr_release(cm_entry);
2218*0Sstevel@tonic-gate 			return (NULL);
2219*0Sstevel@tonic-gate 		}
2220*0Sstevel@tonic-gate 	}
2221*0Sstevel@tonic-gate 
2222*0Sstevel@tonic-gate 	if (srcaddr != NULL) {
2223*0Sstevel@tonic-gate 		/*
2224*0Sstevel@tonic-gate 		 * Copy into the handle the
2225*0Sstevel@tonic-gate 		 * source address of the
2226*0Sstevel@tonic-gate 		 * connection, which we will use
2227*0Sstevel@tonic-gate 		 * in case of a later retry.
2228*0Sstevel@tonic-gate 		 */
2229*0Sstevel@tonic-gate 		if (srcaddr->len != cm_entry->x_src.len) {
2230*0Sstevel@tonic-gate 			if (srcaddr->maxlen > 0)
2231*0Sstevel@tonic-gate 				kmem_free(srcaddr->buf, srcaddr->maxlen);
2232*0Sstevel@tonic-gate 			srcaddr->buf = kmem_zalloc(cm_entry->x_src.len,
2233*0Sstevel@tonic-gate 			    KM_SLEEP);
2234*0Sstevel@tonic-gate 			srcaddr->maxlen = srcaddr->len =
2235*0Sstevel@tonic-gate 			    cm_entry->x_src.len;
2236*0Sstevel@tonic-gate 		}
2237*0Sstevel@tonic-gate 		bcopy(cm_entry->x_src.buf, srcaddr->buf, srcaddr->len);
2238*0Sstevel@tonic-gate 	}
2239*0Sstevel@tonic-gate 	cm_entry->x_time = lbolt;
2240*0Sstevel@tonic-gate 	mutex_exit(&connmgr_lock);
2241*0Sstevel@tonic-gate 	return (cm_entry);
2242*0Sstevel@tonic-gate }
2243*0Sstevel@tonic-gate 
2244*0Sstevel@tonic-gate /*
2245*0Sstevel@tonic-gate  * If we need to send a T_DISCON_REQ, send one.
2246*0Sstevel@tonic-gate  */
2247*0Sstevel@tonic-gate static void
2248*0Sstevel@tonic-gate connmgr_dis_and_wait(struct cm_xprt *cm_entry)
2249*0Sstevel@tonic-gate {
2250*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connmgr_lock));
2251*0Sstevel@tonic-gate 	for (;;) {
2252*0Sstevel@tonic-gate 		while (cm_entry->x_needdis == TRUE) {
2253*0Sstevel@tonic-gate 			RPCLOG(8, "connmgr_dis_and_wait: need "
2254*0Sstevel@tonic-gate 				"T_DISCON_REQ for connection 0x%p\n",
2255*0Sstevel@tonic-gate 				(void *)cm_entry);
2256*0Sstevel@tonic-gate 			cm_entry->x_needdis = FALSE;
2257*0Sstevel@tonic-gate 			cm_entry->x_waitdis = TRUE;
2258*0Sstevel@tonic-gate 
2259*0Sstevel@tonic-gate 			connmgr_snddis(cm_entry);
2260*0Sstevel@tonic-gate 
2261*0Sstevel@tonic-gate 			mutex_enter(&connmgr_lock);
2262*0Sstevel@tonic-gate 		}
2263*0Sstevel@tonic-gate 
2264*0Sstevel@tonic-gate 		if (cm_entry->x_waitdis == TRUE) {
2265*0Sstevel@tonic-gate 			clock_t curlbolt;
2266*0Sstevel@tonic-gate 			clock_t timout;
2267*0Sstevel@tonic-gate 
2268*0Sstevel@tonic-gate 			RPCLOG(8, "connmgr_dis_and_wait waiting for "
2269*0Sstevel@tonic-gate 				"T_DISCON_REQ's ACK for connection %p\n",
2270*0Sstevel@tonic-gate 				(void *)cm_entry);
2271*0Sstevel@tonic-gate 			curlbolt = ddi_get_lbolt();
2272*0Sstevel@tonic-gate 
2273*0Sstevel@tonic-gate 			timout = clnt_cots_min_conntout *
2274*0Sstevel@tonic-gate 				drv_usectohz(1000000) + curlbolt;
2275*0Sstevel@tonic-gate 
2276*0Sstevel@tonic-gate 			/*
2277*0Sstevel@tonic-gate 			 * The TPI spec says that the T_DISCON_REQ
2278*0Sstevel@tonic-gate 			 * will get acknowledged, but in practice
2279*0Sstevel@tonic-gate 			 * the ACK may never get sent. So don't
2280*0Sstevel@tonic-gate 			 * block forever.
2281*0Sstevel@tonic-gate 			 */
2282*0Sstevel@tonic-gate 			(void) cv_timedwait(&cm_entry->x_dis_cv,
2283*0Sstevel@tonic-gate 					    &connmgr_lock, timout);
2284*0Sstevel@tonic-gate 		}
2285*0Sstevel@tonic-gate 		/*
2286*0Sstevel@tonic-gate 		 * If we got the ACK, break. If we didn't,
2287*0Sstevel@tonic-gate 		 * then send another T_DISCON_REQ.
2288*0Sstevel@tonic-gate 		 */
2289*0Sstevel@tonic-gate 		if (cm_entry->x_waitdis == FALSE) {
2290*0Sstevel@tonic-gate 			break;
2291*0Sstevel@tonic-gate 		} else {
2292*0Sstevel@tonic-gate 			RPCLOG(8, "connmgr_dis_and_wait: did"
2293*0Sstevel@tonic-gate 				"not get T_DISCON_REQ's ACK for "
2294*0Sstevel@tonic-gate 				"connection  %p\n", (void *)cm_entry);
2295*0Sstevel@tonic-gate 			cm_entry->x_needdis = TRUE;
2296*0Sstevel@tonic-gate 		}
2297*0Sstevel@tonic-gate 	}
2298*0Sstevel@tonic-gate }
2299*0Sstevel@tonic-gate 
2300*0Sstevel@tonic-gate static void
2301*0Sstevel@tonic-gate connmgr_cancelconn(struct cm_xprt *cm_entry)
2302*0Sstevel@tonic-gate {
2303*0Sstevel@tonic-gate 	/*
2304*0Sstevel@tonic-gate 	 * Mark the connection table entry as dead; the next thread that
2305*0Sstevel@tonic-gate 	 * goes through connmgr_release() will notice this and deal with it.
2306*0Sstevel@tonic-gate 	 */
2307*0Sstevel@tonic-gate 	mutex_enter(&connmgr_lock);
2308*0Sstevel@tonic-gate 	cm_entry->x_dead = TRUE;
2309*0Sstevel@tonic-gate 
2310*0Sstevel@tonic-gate 	/*
2311*0Sstevel@tonic-gate 	 * Notify any threads waiting for the connection that it isn't
2312*0Sstevel@tonic-gate 	 * going to happen.
2313*0Sstevel@tonic-gate 	 */
2314*0Sstevel@tonic-gate 	cm_entry->x_thread = FALSE;
2315*0Sstevel@tonic-gate 	cv_broadcast(&cm_entry->x_conn_cv);
2316*0Sstevel@tonic-gate 	mutex_exit(&connmgr_lock);
2317*0Sstevel@tonic-gate 
2318*0Sstevel@tonic-gate 	connmgr_release(cm_entry);
2319*0Sstevel@tonic-gate }
2320*0Sstevel@tonic-gate 
2321*0Sstevel@tonic-gate static void
2322*0Sstevel@tonic-gate connmgr_close(struct cm_xprt *cm_entry)
2323*0Sstevel@tonic-gate {
2324*0Sstevel@tonic-gate 	mutex_enter(&cm_entry->x_lock);
2325*0Sstevel@tonic-gate 	while (cm_entry->x_ref != 0) {
2326*0Sstevel@tonic-gate 		/*
2327*0Sstevel@tonic-gate 		 * Must be a noninterruptible wait.
2328*0Sstevel@tonic-gate 		 */
2329*0Sstevel@tonic-gate 		cv_wait(&cm_entry->x_cv, &cm_entry->x_lock);
2330*0Sstevel@tonic-gate 	}
2331*0Sstevel@tonic-gate 
2332*0Sstevel@tonic-gate 	if (cm_entry->x_tiptr != NULL)
2333*0Sstevel@tonic-gate 		(void) t_kclose(cm_entry->x_tiptr, 1);
2334*0Sstevel@tonic-gate 
2335*0Sstevel@tonic-gate 	mutex_exit(&cm_entry->x_lock);
2336*0Sstevel@tonic-gate 	if (cm_entry->x_ksp != NULL) {
2337*0Sstevel@tonic-gate 		mutex_enter(&connmgr_lock);
2338*0Sstevel@tonic-gate 		cm_entry->x_ksp->ks_private = NULL;
2339*0Sstevel@tonic-gate 		mutex_exit(&connmgr_lock);
2340*0Sstevel@tonic-gate 
2341*0Sstevel@tonic-gate 		/*
2342*0Sstevel@tonic-gate 		 * Must free the buffer we allocated for the
2343*0Sstevel@tonic-gate 		 * server address in the update function
2344*0Sstevel@tonic-gate 		 */
2345*0Sstevel@tonic-gate 		if (((struct cm_kstat_xprt *)(cm_entry->x_ksp->ks_data))->
2346*0Sstevel@tonic-gate 		    x_server.value.string.addr.ptr != NULL)
2347*0Sstevel@tonic-gate 			kmem_free(((struct cm_kstat_xprt *)(cm_entry->x_ksp->
2348*0Sstevel@tonic-gate 			    ks_data))->x_server.value.string.addr.ptr,
2349*0Sstevel@tonic-gate 				    INET6_ADDRSTRLEN);
2350*0Sstevel@tonic-gate 		kmem_free(cm_entry->x_ksp->ks_data,
2351*0Sstevel@tonic-gate 			    cm_entry->x_ksp->ks_data_size);
2352*0Sstevel@tonic-gate 		kstat_delete(cm_entry->x_ksp);
2353*0Sstevel@tonic-gate 	}
2354*0Sstevel@tonic-gate 
2355*0Sstevel@tonic-gate 	mutex_destroy(&cm_entry->x_lock);
2356*0Sstevel@tonic-gate 	cv_destroy(&cm_entry->x_cv);
2357*0Sstevel@tonic-gate 	cv_destroy(&cm_entry->x_conn_cv);
2358*0Sstevel@tonic-gate 	cv_destroy(&cm_entry->x_dis_cv);
2359*0Sstevel@tonic-gate 
2360*0Sstevel@tonic-gate 	if (cm_entry->x_server.buf != NULL)
2361*0Sstevel@tonic-gate 		kmem_free(cm_entry->x_server.buf, cm_entry->x_server.maxlen);
2362*0Sstevel@tonic-gate 	if (cm_entry->x_src.buf != NULL)
2363*0Sstevel@tonic-gate 		kmem_free(cm_entry->x_src.buf, cm_entry->x_src.maxlen);
2364*0Sstevel@tonic-gate 	kmem_free(cm_entry, sizeof (struct cm_xprt));
2365*0Sstevel@tonic-gate }
2366*0Sstevel@tonic-gate 
2367*0Sstevel@tonic-gate /*
2368*0Sstevel@tonic-gate  * Called by KRPC after sending the call message to release the connection
2369*0Sstevel@tonic-gate  * it was using.
2370*0Sstevel@tonic-gate  */
2371*0Sstevel@tonic-gate static void
2372*0Sstevel@tonic-gate connmgr_release(struct cm_xprt *cm_entry)
2373*0Sstevel@tonic-gate {
2374*0Sstevel@tonic-gate 	mutex_enter(&cm_entry->x_lock);
2375*0Sstevel@tonic-gate 	cm_entry->x_ref--;
2376*0Sstevel@tonic-gate 	if (cm_entry->x_ref == 0)
2377*0Sstevel@tonic-gate 		cv_signal(&cm_entry->x_cv);
2378*0Sstevel@tonic-gate 	mutex_exit(&cm_entry->x_lock);
2379*0Sstevel@tonic-gate }
2380*0Sstevel@tonic-gate 
2381*0Sstevel@tonic-gate /*
2382*0Sstevel@tonic-gate  * Given an open stream, connect to the remote.  Returns true if connected,
2383*0Sstevel@tonic-gate  * false otherwise.
2384*0Sstevel@tonic-gate  */
2385*0Sstevel@tonic-gate static bool_t
2386*0Sstevel@tonic-gate connmgr_connect(
2387*0Sstevel@tonic-gate 	struct cm_xprt		*cm_entry,
2388*0Sstevel@tonic-gate 	queue_t			*wq,
2389*0Sstevel@tonic-gate 	struct netbuf		*addr,
2390*0Sstevel@tonic-gate 	int			addrfmly,
2391*0Sstevel@tonic-gate 	calllist_t 		*e,
2392*0Sstevel@tonic-gate 	int 			*tidu_ptr,
2393*0Sstevel@tonic-gate 	bool_t 			reconnect,
2394*0Sstevel@tonic-gate 	const struct timeval 	*waitp,
2395*0Sstevel@tonic-gate 	bool_t 			nosignal)
2396*0Sstevel@tonic-gate {
2397*0Sstevel@tonic-gate 	mblk_t *mp;
2398*0Sstevel@tonic-gate 	struct T_conn_req *tcr;
2399*0Sstevel@tonic-gate 	struct T_info_ack *tinfo;
2400*0Sstevel@tonic-gate 	int interrupted, error;
2401*0Sstevel@tonic-gate 	int tidu_size, kstat_instance;
2402*0Sstevel@tonic-gate 
2403*0Sstevel@tonic-gate 	/* if it's a reconnect, flush any lingering data messages */
2404*0Sstevel@tonic-gate 	if (reconnect)
2405*0Sstevel@tonic-gate 		(void) putctl1(wq, M_FLUSH, FLUSHRW);
2406*0Sstevel@tonic-gate 
2407*0Sstevel@tonic-gate 	mp = allocb(sizeof (*tcr) + addr->len, BPRI_LO);
2408*0Sstevel@tonic-gate 	if (mp == NULL) {
2409*0Sstevel@tonic-gate 		/*
2410*0Sstevel@tonic-gate 		 * This is unfortunate, but we need to look up the stats for
2411*0Sstevel@tonic-gate 		 * this zone to increment the "memory allocation failed"
2412*0Sstevel@tonic-gate 		 * counter.  curproc->p_zone is safe since we're initiating a
2413*0Sstevel@tonic-gate 		 * connection and not in some strange streams context.
2414*0Sstevel@tonic-gate 		 */
2415*0Sstevel@tonic-gate 		struct rpcstat *rpcstat;
2416*0Sstevel@tonic-gate 
2417*0Sstevel@tonic-gate 		rpcstat = zone_getspecific(rpcstat_zone_key, curproc->p_zone);
2418*0Sstevel@tonic-gate 		ASSERT(rpcstat != NULL);
2419*0Sstevel@tonic-gate 
2420*0Sstevel@tonic-gate 		RPCLOG0(1, "connmgr_connect: cannot alloc mp for "
2421*0Sstevel@tonic-gate 		    "sending conn request\n");
2422*0Sstevel@tonic-gate 		COTSRCSTAT_INCR(rpcstat->rpc_cots_client, rcnomem);
2423*0Sstevel@tonic-gate 		e->call_status = RPC_SYSTEMERROR;
2424*0Sstevel@tonic-gate 		e->call_reason = ENOSR;
2425*0Sstevel@tonic-gate 		return (FALSE);
2426*0Sstevel@tonic-gate 	}
2427*0Sstevel@tonic-gate 
2428*0Sstevel@tonic-gate 	mp->b_datap->db_type = M_PROTO;
2429*0Sstevel@tonic-gate 	tcr = (struct T_conn_req *)mp->b_rptr;
2430*0Sstevel@tonic-gate 	bzero(tcr, sizeof (*tcr));
2431*0Sstevel@tonic-gate 	tcr->PRIM_type = T_CONN_REQ;
2432*0Sstevel@tonic-gate 	tcr->DEST_length = addr->len;
2433*0Sstevel@tonic-gate 	tcr->DEST_offset = sizeof (struct T_conn_req);
2434*0Sstevel@tonic-gate 	mp->b_wptr = mp->b_rptr + sizeof (*tcr);
2435*0Sstevel@tonic-gate 
2436*0Sstevel@tonic-gate 	bcopy(addr->buf, mp->b_wptr, tcr->DEST_length);
2437*0Sstevel@tonic-gate 	mp->b_wptr += tcr->DEST_length;
2438*0Sstevel@tonic-gate 
2439*0Sstevel@tonic-gate 	RPCLOG(8, "connmgr_connect: sending conn request on queue "
2440*0Sstevel@tonic-gate 	    "%p", (void *)wq);
2441*0Sstevel@tonic-gate 	RPCLOG(8, " call %p\n", (void *)wq);
2442*0Sstevel@tonic-gate 	/*
2443*0Sstevel@tonic-gate 	 * We use the entry in the handle that is normally used for
2444*0Sstevel@tonic-gate 	 * waiting for RPC replies to wait for the connection accept.
2445*0Sstevel@tonic-gate 	 */
2446*0Sstevel@tonic-gate 	clnt_dispatch_send(wq, mp, e, 0, 0);
2447*0Sstevel@tonic-gate 
2448*0Sstevel@tonic-gate 	mutex_enter(&clnt_pending_lock);
2449*0Sstevel@tonic-gate 
2450*0Sstevel@tonic-gate 	/*
2451*0Sstevel@tonic-gate 	 * We wait for the transport connection to be made, or an
2452*0Sstevel@tonic-gate 	 * indication that it could not be made.
2453*0Sstevel@tonic-gate 	 */
2454*0Sstevel@tonic-gate 	interrupted = 0;
2455*0Sstevel@tonic-gate 
2456*0Sstevel@tonic-gate 	/*
2457*0Sstevel@tonic-gate 	 * waitforack should have been called with T_OK_ACK, but the
2458*0Sstevel@tonic-gate 	 * present implementation needs to be passed T_INFO_ACK to
2459*0Sstevel@tonic-gate 	 * work correctly.
2460*0Sstevel@tonic-gate 	 */
2461*0Sstevel@tonic-gate 	error = waitforack(e, T_INFO_ACK, waitp, nosignal);
2462*0Sstevel@tonic-gate 	if (error == EINTR)
2463*0Sstevel@tonic-gate 		interrupted = 1;
2464*0Sstevel@tonic-gate 	if (zone_status_get(curproc->p_zone) >= ZONE_IS_EMPTY) {
2465*0Sstevel@tonic-gate 		/*
2466*0Sstevel@tonic-gate 		 * No time to lose; we essentially have been signaled to
2467*0Sstevel@tonic-gate 		 * quit.
2468*0Sstevel@tonic-gate 		 */
2469*0Sstevel@tonic-gate 		interrupted = 1;
2470*0Sstevel@tonic-gate 	}
2471*0Sstevel@tonic-gate #ifdef RPCDEBUG
2472*0Sstevel@tonic-gate 	if (error == ETIME)
2473*0Sstevel@tonic-gate 		RPCLOG0(8, "connmgr_connect: giving up "
2474*0Sstevel@tonic-gate 		    "on connection attempt; "
2475*0Sstevel@tonic-gate 		    "clnt_dispatch notifyconn "
2476*0Sstevel@tonic-gate 		    "diagnostic 'no one waiting for "
2477*0Sstevel@tonic-gate 		    "connection' should not be "
2478*0Sstevel@tonic-gate 		    "unexpected\n");
2479*0Sstevel@tonic-gate #endif
2480*0Sstevel@tonic-gate 	if (e->call_prev)
2481*0Sstevel@tonic-gate 		e->call_prev->call_next = e->call_next;
2482*0Sstevel@tonic-gate 	else
2483*0Sstevel@tonic-gate 		clnt_pending = e->call_next;
2484*0Sstevel@tonic-gate 	if (e->call_next)
2485*0Sstevel@tonic-gate 		e->call_next->call_prev = e->call_prev;
2486*0Sstevel@tonic-gate 	mutex_exit(&clnt_pending_lock);
2487*0Sstevel@tonic-gate 
2488*0Sstevel@tonic-gate 	if (e->call_status != RPC_SUCCESS || error != 0) {
2489*0Sstevel@tonic-gate 		if (interrupted)
2490*0Sstevel@tonic-gate 			e->call_status = RPC_INTR;
2491*0Sstevel@tonic-gate 		else if (error == ETIME)
2492*0Sstevel@tonic-gate 			e->call_status = RPC_TIMEDOUT;
2493*0Sstevel@tonic-gate 		else if (error == EPROTO)
2494*0Sstevel@tonic-gate 			e->call_status = RPC_SYSTEMERROR;
2495*0Sstevel@tonic-gate 
2496*0Sstevel@tonic-gate 		RPCLOG(8, "connmgr_connect: can't connect, status: "
2497*0Sstevel@tonic-gate 		    "%s\n", clnt_sperrno(e->call_status));
2498*0Sstevel@tonic-gate 
2499*0Sstevel@tonic-gate 		if (e->call_reply) {
2500*0Sstevel@tonic-gate 			freemsg(e->call_reply);
2501*0Sstevel@tonic-gate 			e->call_reply = NULL;
2502*0Sstevel@tonic-gate 		}
2503*0Sstevel@tonic-gate 
2504*0Sstevel@tonic-gate 		return (FALSE);
2505*0Sstevel@tonic-gate 	}
2506*0Sstevel@tonic-gate 	/*
2507*0Sstevel@tonic-gate 	 * The result of the "connection accept" is a T_info_ack
2508*0Sstevel@tonic-gate 	 * in the call_reply field.
2509*0Sstevel@tonic-gate 	 */
2510*0Sstevel@tonic-gate 	ASSERT(e->call_reply != NULL);
2511*0Sstevel@tonic-gate 	mp = e->call_reply;
2512*0Sstevel@tonic-gate 	e->call_reply = NULL;
2513*0Sstevel@tonic-gate 	tinfo = (struct T_info_ack *)mp->b_rptr;
2514*0Sstevel@tonic-gate 
2515*0Sstevel@tonic-gate 	tidu_size = tinfo->TIDU_size;
2516*0Sstevel@tonic-gate 	tidu_size -= (tidu_size % BYTES_PER_XDR_UNIT);
2517*0Sstevel@tonic-gate 	if (tidu_size > COTS_DEFAULT_ALLOCSIZE || (tidu_size <= 0))
2518*0Sstevel@tonic-gate 		tidu_size = COTS_DEFAULT_ALLOCSIZE;
2519*0Sstevel@tonic-gate 	*tidu_ptr = tidu_size;
2520*0Sstevel@tonic-gate 
2521*0Sstevel@tonic-gate 	freemsg(mp);
2522*0Sstevel@tonic-gate 
2523*0Sstevel@tonic-gate 	/*
2524*0Sstevel@tonic-gate 	 * Set up the pertinent options.  NODELAY is so the transport doesn't
2525*0Sstevel@tonic-gate 	 * buffer up RPC messages on either end.  This may not be valid for
2526*0Sstevel@tonic-gate 	 * all transports.  Failure to set this option is not cause to
2527*0Sstevel@tonic-gate 	 * bail out so we return success anyway.  Note that lack of NODELAY
2528*0Sstevel@tonic-gate 	 * or some other way to flush the message on both ends will cause
2529*0Sstevel@tonic-gate 	 * lots of retries and terrible performance.
2530*0Sstevel@tonic-gate 	 */
2531*0Sstevel@tonic-gate 	if (addrfmly == AF_INET || addrfmly == AF_INET6) {
2532*0Sstevel@tonic-gate 		(void) connmgr_setopt(wq, IPPROTO_TCP, TCP_NODELAY, e);
2533*0Sstevel@tonic-gate 		if (e->call_status == RPC_XPRTFAILED)
2534*0Sstevel@tonic-gate 			return (FALSE);
2535*0Sstevel@tonic-gate 	}
2536*0Sstevel@tonic-gate 
2537*0Sstevel@tonic-gate 	/*
2538*0Sstevel@tonic-gate 	 * Since we have a connection, we now need to figure out if
2539*0Sstevel@tonic-gate 	 * we need to create a kstat. If x_ksp is not NULL then we
2540*0Sstevel@tonic-gate 	 * are reusing a connection and so we do not need to create
2541*0Sstevel@tonic-gate 	 * another kstat -- lets just return.
2542*0Sstevel@tonic-gate 	 */
2543*0Sstevel@tonic-gate 	if (cm_entry->x_ksp != NULL)
2544*0Sstevel@tonic-gate 		return (TRUE);
2545*0Sstevel@tonic-gate 
2546*0Sstevel@tonic-gate 	/*
2547*0Sstevel@tonic-gate 	 * We need to increment rpc_kstat_instance atomically to prevent
2548*0Sstevel@tonic-gate 	 * two kstats being created with the same instance.
2549*0Sstevel@tonic-gate 	 */
2550*0Sstevel@tonic-gate 	kstat_instance = atomic_add_32_nv((uint32_t *)&rpc_kstat_instance, 1);
2551*0Sstevel@tonic-gate 
2552*0Sstevel@tonic-gate 	if ((cm_entry->x_ksp = kstat_create_zone("unix", kstat_instance,
2553*0Sstevel@tonic-gate 	    "rpc_cots_connections", "rpc", KSTAT_TYPE_NAMED,
2554*0Sstevel@tonic-gate 	    (uint_t)(sizeof (cm_kstat_xprt_t) / sizeof (kstat_named_t)),
2555*0Sstevel@tonic-gate 	    KSTAT_FLAG_VIRTUAL, cm_entry->x_zoneid)) == NULL) {
2556*0Sstevel@tonic-gate 		return (TRUE);
2557*0Sstevel@tonic-gate 	    }
2558*0Sstevel@tonic-gate 
2559*0Sstevel@tonic-gate 	cm_entry->x_ksp->ks_lock = &connmgr_lock;
2560*0Sstevel@tonic-gate 	cm_entry->x_ksp->ks_private = cm_entry;
2561*0Sstevel@tonic-gate 	cm_entry->x_ksp->ks_data_size = ((INET6_ADDRSTRLEN * sizeof (char))
2562*0Sstevel@tonic-gate 					    + sizeof (cm_kstat_template));
2563*0Sstevel@tonic-gate 	cm_entry->x_ksp->ks_data = kmem_alloc(cm_entry->x_ksp->ks_data_size,
2564*0Sstevel@tonic-gate 					    KM_SLEEP);
2565*0Sstevel@tonic-gate 	bcopy(&cm_kstat_template, cm_entry->x_ksp->ks_data,
2566*0Sstevel@tonic-gate 	    cm_entry->x_ksp->ks_data_size);
2567*0Sstevel@tonic-gate 	((struct cm_kstat_xprt *)(cm_entry->x_ksp->ks_data))->
2568*0Sstevel@tonic-gate 		    x_server.value.string.addr.ptr =
2569*0Sstevel@tonic-gate 		    kmem_alloc(INET6_ADDRSTRLEN, KM_SLEEP);
2570*0Sstevel@tonic-gate 
2571*0Sstevel@tonic-gate 	cm_entry->x_ksp->ks_update = conn_kstat_update;
2572*0Sstevel@tonic-gate 	kstat_install(cm_entry->x_ksp);
2573*0Sstevel@tonic-gate 	return (TRUE);
2574*0Sstevel@tonic-gate }
2575*0Sstevel@tonic-gate 
2576*0Sstevel@tonic-gate /*
2577*0Sstevel@tonic-gate  * Called by connmgr_connect to set an option on the new stream.
2578*0Sstevel@tonic-gate  */
2579*0Sstevel@tonic-gate static bool_t
2580*0Sstevel@tonic-gate connmgr_setopt(queue_t *wq, int level, int name, calllist_t *e)
2581*0Sstevel@tonic-gate {
2582*0Sstevel@tonic-gate 	mblk_t *mp;
2583*0Sstevel@tonic-gate 	struct opthdr *opt;
2584*0Sstevel@tonic-gate 	struct T_optmgmt_req *tor;
2585*0Sstevel@tonic-gate 	struct timeval waitp;
2586*0Sstevel@tonic-gate 	int error;
2587*0Sstevel@tonic-gate 
2588*0Sstevel@tonic-gate 	mp = allocb(sizeof (struct T_optmgmt_req) + sizeof (struct opthdr) +
2589*0Sstevel@tonic-gate 	    sizeof (int), BPRI_LO);
2590*0Sstevel@tonic-gate 	if (mp == NULL) {
2591*0Sstevel@tonic-gate 		RPCLOG0(1, "connmgr_setopt: cannot alloc mp for option "
2592*0Sstevel@tonic-gate 		    "request\n");
2593*0Sstevel@tonic-gate 		return (FALSE);
2594*0Sstevel@tonic-gate 	}
2595*0Sstevel@tonic-gate 
2596*0Sstevel@tonic-gate 	mp->b_datap->db_type = M_PROTO;
2597*0Sstevel@tonic-gate 	tor = (struct T_optmgmt_req *)(mp->b_rptr);
2598*0Sstevel@tonic-gate 	tor->PRIM_type = T_SVR4_OPTMGMT_REQ;
2599*0Sstevel@tonic-gate 	tor->MGMT_flags = T_NEGOTIATE;
2600*0Sstevel@tonic-gate 	tor->OPT_length = sizeof (struct opthdr) + sizeof (int);
2601*0Sstevel@tonic-gate 	tor->OPT_offset = sizeof (struct T_optmgmt_req);
2602*0Sstevel@tonic-gate 
2603*0Sstevel@tonic-gate 	opt = (struct opthdr *)(mp->b_rptr + sizeof (struct T_optmgmt_req));
2604*0Sstevel@tonic-gate 	opt->level = level;
2605*0Sstevel@tonic-gate 	opt->name = name;
2606*0Sstevel@tonic-gate 	opt->len = sizeof (int);
2607*0Sstevel@tonic-gate 	*(int *)((char *)opt + sizeof (*opt)) = 1;
2608*0Sstevel@tonic-gate 	mp->b_wptr += sizeof (struct T_optmgmt_req) + sizeof (struct opthdr) +
2609*0Sstevel@tonic-gate 	    sizeof (int);
2610*0Sstevel@tonic-gate 
2611*0Sstevel@tonic-gate 	/*
2612*0Sstevel@tonic-gate 	 * We will use this connection regardless
2613*0Sstevel@tonic-gate 	 * of whether or not the option is settable.
2614*0Sstevel@tonic-gate 	 */
2615*0Sstevel@tonic-gate 	clnt_dispatch_send(wq, mp, e, 0, 0);
2616*0Sstevel@tonic-gate 	mutex_enter(&clnt_pending_lock);
2617*0Sstevel@tonic-gate 
2618*0Sstevel@tonic-gate 	waitp.tv_sec = clnt_cots_min_conntout;
2619*0Sstevel@tonic-gate 	waitp.tv_usec = 0;
2620*0Sstevel@tonic-gate 	error = waitforack(e, T_OPTMGMT_ACK, &waitp, 1);
2621*0Sstevel@tonic-gate 
2622*0Sstevel@tonic-gate 	if (e->call_prev)
2623*0Sstevel@tonic-gate 		e->call_prev->call_next = e->call_next;
2624*0Sstevel@tonic-gate 	else
2625*0Sstevel@tonic-gate 		clnt_pending = e->call_next;
2626*0Sstevel@tonic-gate 	if (e->call_next)
2627*0Sstevel@tonic-gate 		e->call_next->call_prev = e->call_prev;
2628*0Sstevel@tonic-gate 	mutex_exit(&clnt_pending_lock);
2629*0Sstevel@tonic-gate 
2630*0Sstevel@tonic-gate 	if (e->call_reply != NULL) {
2631*0Sstevel@tonic-gate 		freemsg(e->call_reply);
2632*0Sstevel@tonic-gate 		e->call_reply = NULL;
2633*0Sstevel@tonic-gate 	}
2634*0Sstevel@tonic-gate 
2635*0Sstevel@tonic-gate 	if (e->call_status != RPC_SUCCESS || error != 0) {
2636*0Sstevel@tonic-gate 		RPCLOG(1, "connmgr_setopt: can't set option: %d\n", name);
2637*0Sstevel@tonic-gate 		return (FALSE);
2638*0Sstevel@tonic-gate 	}
2639*0Sstevel@tonic-gate 	RPCLOG(8, "connmgr_setopt: successfully set option: %d\n", name);
2640*0Sstevel@tonic-gate 	return (TRUE);
2641*0Sstevel@tonic-gate }
2642*0Sstevel@tonic-gate 
2643*0Sstevel@tonic-gate #ifdef	DEBUG
2644*0Sstevel@tonic-gate 
2645*0Sstevel@tonic-gate /*
2646*0Sstevel@tonic-gate  * This is a knob to let us force code coverage in allocation failure
2647*0Sstevel@tonic-gate  * case.
2648*0Sstevel@tonic-gate  */
2649*0Sstevel@tonic-gate static int	connmgr_failsnd;
2650*0Sstevel@tonic-gate #define	CONN_SND_ALLOC(Size, Pri)	\
2651*0Sstevel@tonic-gate 	((connmgr_failsnd-- > 0) ? NULL : allocb(Size, Pri))
2652*0Sstevel@tonic-gate 
2653*0Sstevel@tonic-gate #else
2654*0Sstevel@tonic-gate 
2655*0Sstevel@tonic-gate #define	CONN_SND_ALLOC(Size, Pri)	allocb(Size, Pri)
2656*0Sstevel@tonic-gate 
2657*0Sstevel@tonic-gate #endif
2658*0Sstevel@tonic-gate 
2659*0Sstevel@tonic-gate /*
2660*0Sstevel@tonic-gate  * Sends an orderly release on the specified queue.
2661*0Sstevel@tonic-gate  * Entered with connmgr_lock. Exited without connmgr_lock
2662*0Sstevel@tonic-gate  */
2663*0Sstevel@tonic-gate static void
2664*0Sstevel@tonic-gate connmgr_sndrel(struct cm_xprt *cm_entry)
2665*0Sstevel@tonic-gate {
2666*0Sstevel@tonic-gate 	struct T_ordrel_req *torr;
2667*0Sstevel@tonic-gate 	mblk_t *mp;
2668*0Sstevel@tonic-gate 	queue_t *q = cm_entry->x_wq;
2669*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connmgr_lock));
2670*0Sstevel@tonic-gate 	mp = CONN_SND_ALLOC(sizeof (struct T_ordrel_req), BPRI_LO);
2671*0Sstevel@tonic-gate 	if (mp == NULL) {
2672*0Sstevel@tonic-gate 		cm_entry->x_needrel = TRUE;
2673*0Sstevel@tonic-gate 		mutex_exit(&connmgr_lock);
2674*0Sstevel@tonic-gate 		RPCLOG(1, "connmgr_sndrel: cannot alloc mp for sending ordrel "
2675*0Sstevel@tonic-gate 			"to queue %p\n", (void *)q);
2676*0Sstevel@tonic-gate 		return;
2677*0Sstevel@tonic-gate 	}
2678*0Sstevel@tonic-gate 	mutex_exit(&connmgr_lock);
2679*0Sstevel@tonic-gate 
2680*0Sstevel@tonic-gate 	mp->b_datap->db_type = M_PROTO;
2681*0Sstevel@tonic-gate 	torr = (struct T_ordrel_req *)(mp->b_rptr);
2682*0Sstevel@tonic-gate 	torr->PRIM_type = T_ORDREL_REQ;
2683*0Sstevel@tonic-gate 	mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_req);
2684*0Sstevel@tonic-gate 
2685*0Sstevel@tonic-gate 	RPCLOG(8, "connmgr_sndrel: sending ordrel to queue %p\n", (void *)q);
2686*0Sstevel@tonic-gate 	put(q, mp);
2687*0Sstevel@tonic-gate }
2688*0Sstevel@tonic-gate 
2689*0Sstevel@tonic-gate /*
2690*0Sstevel@tonic-gate  * Sends an disconnect on the specified queue.
2691*0Sstevel@tonic-gate  * Entered with connmgr_lock. Exited without connmgr_lock
2692*0Sstevel@tonic-gate  */
2693*0Sstevel@tonic-gate static void
2694*0Sstevel@tonic-gate connmgr_snddis(struct cm_xprt *cm_entry)
2695*0Sstevel@tonic-gate {
2696*0Sstevel@tonic-gate 	struct T_discon_req *tdis;
2697*0Sstevel@tonic-gate 	mblk_t *mp;
2698*0Sstevel@tonic-gate 	queue_t *q = cm_entry->x_wq;
2699*0Sstevel@tonic-gate 
2700*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connmgr_lock));
2701*0Sstevel@tonic-gate 	mp = CONN_SND_ALLOC(sizeof (*tdis), BPRI_LO);
2702*0Sstevel@tonic-gate 	if (mp == NULL) {
2703*0Sstevel@tonic-gate 		cm_entry->x_needdis = TRUE;
2704*0Sstevel@tonic-gate 		mutex_exit(&connmgr_lock);
2705*0Sstevel@tonic-gate 		RPCLOG(1, "connmgr_snddis: cannot alloc mp for sending discon "
2706*0Sstevel@tonic-gate 		    "to queue %p\n", (void *)q);
2707*0Sstevel@tonic-gate 		return;
2708*0Sstevel@tonic-gate 	}
2709*0Sstevel@tonic-gate 	mutex_exit(&connmgr_lock);
2710*0Sstevel@tonic-gate 
2711*0Sstevel@tonic-gate 	mp->b_datap->db_type = M_PROTO;
2712*0Sstevel@tonic-gate 	tdis = (struct T_discon_req *)mp->b_rptr;
2713*0Sstevel@tonic-gate 	tdis->PRIM_type = T_DISCON_REQ;
2714*0Sstevel@tonic-gate 	mp->b_wptr = mp->b_rptr + sizeof (*tdis);
2715*0Sstevel@tonic-gate 
2716*0Sstevel@tonic-gate 	RPCLOG(8, "connmgr_snddis: sending discon to queue %p\n", (void *)q);
2717*0Sstevel@tonic-gate 	put(q, mp);
2718*0Sstevel@tonic-gate }
2719*0Sstevel@tonic-gate 
2720*0Sstevel@tonic-gate /*
2721*0Sstevel@tonic-gate  * Sets up the entry for receiving replies, and calls rpcmod's write put proc
2722*0Sstevel@tonic-gate  * (through put) to send the call.
2723*0Sstevel@tonic-gate  */
2724*0Sstevel@tonic-gate static void
2725*0Sstevel@tonic-gate clnt_dispatch_send(queue_t *q, mblk_t *mp, calllist_t *e, uint_t xid,
2726*0Sstevel@tonic-gate 			uint_t queue_flag)
2727*0Sstevel@tonic-gate {
2728*0Sstevel@tonic-gate 	ASSERT(e != NULL);
2729*0Sstevel@tonic-gate 
2730*0Sstevel@tonic-gate 	e->call_status = RPC_TIMEDOUT;	/* optimistic, eh? */
2731*0Sstevel@tonic-gate 	e->call_reason = 0;
2732*0Sstevel@tonic-gate 	e->call_wq = q;
2733*0Sstevel@tonic-gate 	e->call_xid = xid;
2734*0Sstevel@tonic-gate 	e->call_notified = FALSE;
2735*0Sstevel@tonic-gate 
2736*0Sstevel@tonic-gate 	/*
2737*0Sstevel@tonic-gate 	 * If queue_flag is set then the calllist_t is already on the hash
2738*0Sstevel@tonic-gate 	 * queue.  In this case just send the message and return.
2739*0Sstevel@tonic-gate 	 */
2740*0Sstevel@tonic-gate 	if (queue_flag) {
2741*0Sstevel@tonic-gate 		put(q, mp);
2742*0Sstevel@tonic-gate 		return;
2743*0Sstevel@tonic-gate 	}
2744*0Sstevel@tonic-gate 
2745*0Sstevel@tonic-gate 	/*
2746*0Sstevel@tonic-gate 	 * Set up calls for RPC requests (with XID != 0) on the hash
2747*0Sstevel@tonic-gate 	 * queue for fast lookups and place other calls (i.e.
2748*0Sstevel@tonic-gate 	 * connection management) on the linked list.
2749*0Sstevel@tonic-gate 	 */
2750*0Sstevel@tonic-gate 	if (xid != 0) {
2751*0Sstevel@tonic-gate 		RPCLOG(64, "clnt_dispatch_send: putting xid 0x%x on "
2752*0Sstevel@tonic-gate 			"dispatch list\n", xid);
2753*0Sstevel@tonic-gate 		e->call_hash = call_hash(xid, clnt_cots_hash_size);
2754*0Sstevel@tonic-gate 		e->call_bucket = &cots_call_ht[e->call_hash];
2755*0Sstevel@tonic-gate 		call_table_enter(e);
2756*0Sstevel@tonic-gate 	} else {
2757*0Sstevel@tonic-gate 		mutex_enter(&clnt_pending_lock);
2758*0Sstevel@tonic-gate 		if (clnt_pending)
2759*0Sstevel@tonic-gate 			clnt_pending->call_prev = e;
2760*0Sstevel@tonic-gate 		e->call_next = clnt_pending;
2761*0Sstevel@tonic-gate 		e->call_prev = NULL;
2762*0Sstevel@tonic-gate 		clnt_pending = e;
2763*0Sstevel@tonic-gate 		mutex_exit(&clnt_pending_lock);
2764*0Sstevel@tonic-gate 	}
2765*0Sstevel@tonic-gate 
2766*0Sstevel@tonic-gate 	put(q, mp);
2767*0Sstevel@tonic-gate }
2768*0Sstevel@tonic-gate 
2769*0Sstevel@tonic-gate /*
2770*0Sstevel@tonic-gate  * Called by rpcmod to notify a client with a clnt_pending call that its reply
2771*0Sstevel@tonic-gate  * has arrived.  If we can't find a client waiting for this reply, we log
2772*0Sstevel@tonic-gate  * the error and return.
2773*0Sstevel@tonic-gate  */
2774*0Sstevel@tonic-gate bool_t
2775*0Sstevel@tonic-gate clnt_dispatch_notify(mblk_t *mp, zoneid_t zoneid)
2776*0Sstevel@tonic-gate {
2777*0Sstevel@tonic-gate 	calllist_t *e = NULL;
2778*0Sstevel@tonic-gate 	call_table_t *chtp;
2779*0Sstevel@tonic-gate 	uint32_t xid;
2780*0Sstevel@tonic-gate 	uint_t hash;
2781*0Sstevel@tonic-gate 
2782*0Sstevel@tonic-gate 	if ((IS_P2ALIGNED(mp->b_rptr, sizeof (uint32_t))) &&
2783*0Sstevel@tonic-gate 	    (mp->b_wptr - mp->b_rptr) >= sizeof (xid))
2784*0Sstevel@tonic-gate 		xid = *((uint32_t *)mp->b_rptr);
2785*0Sstevel@tonic-gate 	else {
2786*0Sstevel@tonic-gate 		int i = 0;
2787*0Sstevel@tonic-gate 		unsigned char *p = (unsigned char *)&xid;
2788*0Sstevel@tonic-gate 		unsigned char *rptr;
2789*0Sstevel@tonic-gate 		mblk_t *tmp = mp;
2790*0Sstevel@tonic-gate 
2791*0Sstevel@tonic-gate 		/*
2792*0Sstevel@tonic-gate 		 * Copy the xid, byte-by-byte into xid.
2793*0Sstevel@tonic-gate 		 */
2794*0Sstevel@tonic-gate 		while (tmp) {
2795*0Sstevel@tonic-gate 			rptr = tmp->b_rptr;
2796*0Sstevel@tonic-gate 			while (rptr < tmp->b_wptr) {
2797*0Sstevel@tonic-gate 				*p++ = *rptr++;
2798*0Sstevel@tonic-gate 				if (++i >= sizeof (xid))
2799*0Sstevel@tonic-gate 					goto done_xid_copy;
2800*0Sstevel@tonic-gate 			}
2801*0Sstevel@tonic-gate 			tmp = tmp->b_cont;
2802*0Sstevel@tonic-gate 		}
2803*0Sstevel@tonic-gate 
2804*0Sstevel@tonic-gate 		/*
2805*0Sstevel@tonic-gate 		 * If we got here, we ran out of mblk space before the
2806*0Sstevel@tonic-gate 		 * xid could be copied.
2807*0Sstevel@tonic-gate 		 */
2808*0Sstevel@tonic-gate 		ASSERT(tmp == NULL && i < sizeof (xid));
2809*0Sstevel@tonic-gate 
2810*0Sstevel@tonic-gate 		RPCLOG0(1,
2811*0Sstevel@tonic-gate 		    "clnt_dispatch_notify: message less than size of xid\n");
2812*0Sstevel@tonic-gate 		return (FALSE);
2813*0Sstevel@tonic-gate 
2814*0Sstevel@tonic-gate 	}
2815*0Sstevel@tonic-gate done_xid_copy:
2816*0Sstevel@tonic-gate 
2817*0Sstevel@tonic-gate 	hash = call_hash(xid, clnt_cots_hash_size);
2818*0Sstevel@tonic-gate 	chtp = &cots_call_ht[hash];
2819*0Sstevel@tonic-gate 	/* call_table_find returns with the hash bucket locked */
2820*0Sstevel@tonic-gate 	call_table_find(chtp, xid, e);
2821*0Sstevel@tonic-gate 
2822*0Sstevel@tonic-gate 	if (e != NULL) {
2823*0Sstevel@tonic-gate 		/*
2824*0Sstevel@tonic-gate 		 * Found thread waiting for this reply
2825*0Sstevel@tonic-gate 		 */
2826*0Sstevel@tonic-gate 		mutex_enter(&e->call_lock);
2827*0Sstevel@tonic-gate 		if (e->call_reply)
2828*0Sstevel@tonic-gate 			/*
2829*0Sstevel@tonic-gate 			 * This can happen under the following scenario:
2830*0Sstevel@tonic-gate 			 * clnt_cots_kcallit() times out on the response,
2831*0Sstevel@tonic-gate 			 * rfscall() repeats the CLNT_CALL() with
2832*0Sstevel@tonic-gate 			 * the same xid, clnt_cots_kcallit() sends the retry,
2833*0Sstevel@tonic-gate 			 * thereby putting the clnt handle on the pending list,
2834*0Sstevel@tonic-gate 			 * the first response arrives, signalling the thread
2835*0Sstevel@tonic-gate 			 * in clnt_cots_kcallit(). Before that thread is
2836*0Sstevel@tonic-gate 			 * dispatched, the second response arrives as well,
2837*0Sstevel@tonic-gate 			 * and clnt_dispatch_notify still finds the handle on
2838*0Sstevel@tonic-gate 			 * the pending list, with call_reply set. So free the
2839*0Sstevel@tonic-gate 			 * old reply now.
2840*0Sstevel@tonic-gate 			 *
2841*0Sstevel@tonic-gate 			 * It is also possible for a response intended for
2842*0Sstevel@tonic-gate 			 * an RPC call with a different xid to reside here.
2843*0Sstevel@tonic-gate 			 * This can happen if the thread that owned this
2844*0Sstevel@tonic-gate 			 * client handle prior to the current owner bailed
2845*0Sstevel@tonic-gate 			 * out and left its call record on the dispatch
2846*0Sstevel@tonic-gate 			 * queue.  A window exists where the response can
2847*0Sstevel@tonic-gate 			 * arrive before the current owner dispatches its
2848*0Sstevel@tonic-gate 			 * RPC call.
2849*0Sstevel@tonic-gate 			 *
2850*0Sstevel@tonic-gate 			 * In any case, this is the very last point where we
2851*0Sstevel@tonic-gate 			 * can safely check the call_reply field before
2852*0Sstevel@tonic-gate 			 * placing the new response there.
2853*0Sstevel@tonic-gate 			 */
2854*0Sstevel@tonic-gate 			freemsg(e->call_reply);
2855*0Sstevel@tonic-gate 		e->call_reply = mp;
2856*0Sstevel@tonic-gate 		e->call_status = RPC_SUCCESS;
2857*0Sstevel@tonic-gate 		e->call_notified = TRUE;
2858*0Sstevel@tonic-gate 		cv_signal(&e->call_cv);
2859*0Sstevel@tonic-gate 		mutex_exit(&e->call_lock);
2860*0Sstevel@tonic-gate 		mutex_exit(&chtp->ct_lock);
2861*0Sstevel@tonic-gate 		return (TRUE);
2862*0Sstevel@tonic-gate 	} else {
2863*0Sstevel@tonic-gate 		zone_t *zone;
2864*0Sstevel@tonic-gate 		struct rpcstat *rpcstat;
2865*0Sstevel@tonic-gate 
2866*0Sstevel@tonic-gate 		mutex_exit(&chtp->ct_lock);
2867*0Sstevel@tonic-gate 		RPCLOG(65, "clnt_dispatch_notify: no caller for reply 0x%x\n",
2868*0Sstevel@tonic-gate 		    xid);
2869*0Sstevel@tonic-gate 		/*
2870*0Sstevel@tonic-gate 		 * This is unfortunate, but we need to lookup the zone so we
2871*0Sstevel@tonic-gate 		 * can increment its "rcbadxids" counter.
2872*0Sstevel@tonic-gate 		 */
2873*0Sstevel@tonic-gate 		zone = zone_find_by_id(zoneid);
2874*0Sstevel@tonic-gate 		if (zone == NULL) {
2875*0Sstevel@tonic-gate 			/*
2876*0Sstevel@tonic-gate 			 * The zone went away...
2877*0Sstevel@tonic-gate 			 */
2878*0Sstevel@tonic-gate 			return (FALSE);
2879*0Sstevel@tonic-gate 		}
2880*0Sstevel@tonic-gate 		rpcstat = zone_getspecific(rpcstat_zone_key, zone);
2881*0Sstevel@tonic-gate 		if (zone_status_get(zone) >= ZONE_IS_SHUTTING_DOWN) {
2882*0Sstevel@tonic-gate 			/*
2883*0Sstevel@tonic-gate 			 * Not interested
2884*0Sstevel@tonic-gate 			 */
2885*0Sstevel@tonic-gate 			zone_rele(zone);
2886*0Sstevel@tonic-gate 			return (FALSE);
2887*0Sstevel@tonic-gate 		}
2888*0Sstevel@tonic-gate 		COTSRCSTAT_INCR(rpcstat->rpc_cots_client, rcbadxids);
2889*0Sstevel@tonic-gate 		zone_rele(zone);
2890*0Sstevel@tonic-gate 	}
2891*0Sstevel@tonic-gate 	return (FALSE);
2892*0Sstevel@tonic-gate }
2893*0Sstevel@tonic-gate 
2894*0Sstevel@tonic-gate /*
2895*0Sstevel@tonic-gate  * Called by rpcmod when a non-data indication arrives.  The ones in which we
2896*0Sstevel@tonic-gate  * are interested are connection indications and options acks.  We dispatch
2897*0Sstevel@tonic-gate  * based on the queue the indication came in on.  If we are not interested in
2898*0Sstevel@tonic-gate  * what came in, we return false to rpcmod, who will then pass it upstream.
2899*0Sstevel@tonic-gate  */
2900*0Sstevel@tonic-gate bool_t
2901*0Sstevel@tonic-gate clnt_dispatch_notifyconn(queue_t *q, mblk_t *mp)
2902*0Sstevel@tonic-gate {
2903*0Sstevel@tonic-gate 	calllist_t *e;
2904*0Sstevel@tonic-gate 	int type;
2905*0Sstevel@tonic-gate 
2906*0Sstevel@tonic-gate 	ASSERT((q->q_flag & QREADR) == 0);
2907*0Sstevel@tonic-gate 
2908*0Sstevel@tonic-gate 	type = ((union T_primitives *)mp->b_rptr)->type;
2909*0Sstevel@tonic-gate 	RPCLOG(8, "clnt_dispatch_notifyconn: prim type: [%s]\n",
2910*0Sstevel@tonic-gate 	    rpc_tpiprim2name(type));
2911*0Sstevel@tonic-gate 	mutex_enter(&clnt_pending_lock);
2912*0Sstevel@tonic-gate 	for (e = clnt_pending; /* NO CONDITION */; e = e->call_next) {
2913*0Sstevel@tonic-gate 		if (e == NULL) {
2914*0Sstevel@tonic-gate 			mutex_exit(&clnt_pending_lock);
2915*0Sstevel@tonic-gate 			RPCLOG(1, "clnt_dispatch_notifyconn: no one waiting "
2916*0Sstevel@tonic-gate 			    "for connection on queue 0x%p\n", (void *)q);
2917*0Sstevel@tonic-gate 			return (FALSE);
2918*0Sstevel@tonic-gate 		}
2919*0Sstevel@tonic-gate 		if (e->call_wq == q)
2920*0Sstevel@tonic-gate 			break;
2921*0Sstevel@tonic-gate 	}
2922*0Sstevel@tonic-gate 
2923*0Sstevel@tonic-gate 	switch (type) {
2924*0Sstevel@tonic-gate 	case T_CONN_CON:
2925*0Sstevel@tonic-gate 		/*
2926*0Sstevel@tonic-gate 		 * The transport is now connected, send a T_INFO_REQ to get
2927*0Sstevel@tonic-gate 		 * the tidu size.
2928*0Sstevel@tonic-gate 		 */
2929*0Sstevel@tonic-gate 		mutex_exit(&clnt_pending_lock);
2930*0Sstevel@tonic-gate 		ASSERT(mp->b_datap->db_lim - mp->b_datap->db_base >=
2931*0Sstevel@tonic-gate 			sizeof (struct T_info_req));
2932*0Sstevel@tonic-gate 		mp->b_rptr = mp->b_datap->db_base;
2933*0Sstevel@tonic-gate 		((union T_primitives *)mp->b_rptr)->type = T_INFO_REQ;
2934*0Sstevel@tonic-gate 		mp->b_wptr = mp->b_rptr + sizeof (struct T_info_req);
2935*0Sstevel@tonic-gate 		mp->b_datap->db_type = M_PCPROTO;
2936*0Sstevel@tonic-gate 		put(q, mp);
2937*0Sstevel@tonic-gate 		return (TRUE);
2938*0Sstevel@tonic-gate 	case T_INFO_ACK:
2939*0Sstevel@tonic-gate 	case T_OPTMGMT_ACK:
2940*0Sstevel@tonic-gate 		e->call_status = RPC_SUCCESS;
2941*0Sstevel@tonic-gate 		e->call_reply = mp;
2942*0Sstevel@tonic-gate 		e->call_notified = TRUE;
2943*0Sstevel@tonic-gate 		cv_signal(&e->call_cv);
2944*0Sstevel@tonic-gate 		break;
2945*0Sstevel@tonic-gate 	case T_ERROR_ACK:
2946*0Sstevel@tonic-gate 		e->call_status = RPC_CANTCONNECT;
2947*0Sstevel@tonic-gate 		e->call_reply = mp;
2948*0Sstevel@tonic-gate 		e->call_notified = TRUE;
2949*0Sstevel@tonic-gate 		cv_signal(&e->call_cv);
2950*0Sstevel@tonic-gate 		break;
2951*0Sstevel@tonic-gate 	case T_OK_ACK:
2952*0Sstevel@tonic-gate 		/*
2953*0Sstevel@tonic-gate 		 * Great, but we are really waiting for a T_CONN_CON
2954*0Sstevel@tonic-gate 		 */
2955*0Sstevel@tonic-gate 		freemsg(mp);
2956*0Sstevel@tonic-gate 		break;
2957*0Sstevel@tonic-gate 	default:
2958*0Sstevel@tonic-gate 		mutex_exit(&clnt_pending_lock);
2959*0Sstevel@tonic-gate 		RPCLOG(1, "clnt_dispatch_notifyconn: bad type %d\n", type);
2960*0Sstevel@tonic-gate 		return (FALSE);
2961*0Sstevel@tonic-gate 	}
2962*0Sstevel@tonic-gate 
2963*0Sstevel@tonic-gate 	mutex_exit(&clnt_pending_lock);
2964*0Sstevel@tonic-gate 	return (TRUE);
2965*0Sstevel@tonic-gate }
2966*0Sstevel@tonic-gate 
2967*0Sstevel@tonic-gate /*
2968*0Sstevel@tonic-gate  * Called by rpcmod when the transport is (or should be) going away.  Informs
2969*0Sstevel@tonic-gate  * all callers waiting for replies and marks the entry in the connection
2970*0Sstevel@tonic-gate  * manager's list as unconnected, and either closing (close handshake in
2971*0Sstevel@tonic-gate  * progress) or dead.
2972*0Sstevel@tonic-gate  */
2973*0Sstevel@tonic-gate void
2974*0Sstevel@tonic-gate clnt_dispatch_notifyall(queue_t *q, int32_t msg_type, int32_t reason)
2975*0Sstevel@tonic-gate {
2976*0Sstevel@tonic-gate 	calllist_t *e;
2977*0Sstevel@tonic-gate 	call_table_t *ctp;
2978*0Sstevel@tonic-gate 	struct cm_xprt *cm_entry;
2979*0Sstevel@tonic-gate 	int have_connmgr_lock;
2980*0Sstevel@tonic-gate 	int i;
2981*0Sstevel@tonic-gate 
2982*0Sstevel@tonic-gate 	ASSERT((q->q_flag & QREADR) == 0);
2983*0Sstevel@tonic-gate 
2984*0Sstevel@tonic-gate 	RPCLOG(1, "clnt_dispatch_notifyall on queue %p", (void *)q);
2985*0Sstevel@tonic-gate 	RPCLOG(1, " received a notifcation prim type [%s]",
2986*0Sstevel@tonic-gate 	    rpc_tpiprim2name(msg_type));
2987*0Sstevel@tonic-gate 	RPCLOG(1, " and reason %d\n", reason);
2988*0Sstevel@tonic-gate 
2989*0Sstevel@tonic-gate 	/*
2990*0Sstevel@tonic-gate 	 * Find the transport entry in the connection manager's list, close
2991*0Sstevel@tonic-gate 	 * the transport and delete the entry.  In the case where rpcmod's
2992*0Sstevel@tonic-gate 	 * idle timer goes off, it sends us a T_ORDREL_REQ, indicating we
2993*0Sstevel@tonic-gate 	 * should gracefully close the connection.
2994*0Sstevel@tonic-gate 	 */
2995*0Sstevel@tonic-gate 	have_connmgr_lock = 1;
2996*0Sstevel@tonic-gate 	mutex_enter(&connmgr_lock);
2997*0Sstevel@tonic-gate 	for (cm_entry = cm_hd; cm_entry; cm_entry = cm_entry->x_next) {
2998*0Sstevel@tonic-gate 		ASSERT(cm_entry != cm_entry->x_next);
2999*0Sstevel@tonic-gate 		if (cm_entry->x_wq == q) {
3000*0Sstevel@tonic-gate 			ASSERT(MUTEX_HELD(&connmgr_lock));
3001*0Sstevel@tonic-gate 			ASSERT(have_connmgr_lock == 1);
3002*0Sstevel@tonic-gate 			switch (msg_type) {
3003*0Sstevel@tonic-gate 			case T_ORDREL_REQ:
3004*0Sstevel@tonic-gate 
3005*0Sstevel@tonic-gate 				if (cm_entry->x_dead) {
3006*0Sstevel@tonic-gate 					RPCLOG(1, "idle timeout on dead "
3007*0Sstevel@tonic-gate 					    "connection: %p\n",
3008*0Sstevel@tonic-gate 					    (void *)cm_entry);
3009*0Sstevel@tonic-gate 					if (clnt_stop_idle != NULL)
3010*0Sstevel@tonic-gate 						(*clnt_stop_idle)(q);
3011*0Sstevel@tonic-gate 					break;
3012*0Sstevel@tonic-gate 				}
3013*0Sstevel@tonic-gate 
3014*0Sstevel@tonic-gate 				/*
3015*0Sstevel@tonic-gate 				 * Only mark the connection as dead if it is
3016*0Sstevel@tonic-gate 				 * connected and idle.
3017*0Sstevel@tonic-gate 				 * An unconnected connection has probably
3018*0Sstevel@tonic-gate 				 * gone idle because the server is down,
3019*0Sstevel@tonic-gate 				 * and when it comes back up there will be
3020*0Sstevel@tonic-gate 				 * retries that need to use that connection.
3021*0Sstevel@tonic-gate 				 */
3022*0Sstevel@tonic-gate 				if (cm_entry->x_connected ||
3023*0Sstevel@tonic-gate 				    cm_entry->x_doomed) {
3024*0Sstevel@tonic-gate 				    if (cm_entry->x_ordrel) {
3025*0Sstevel@tonic-gate 					if (cm_entry->x_closing == TRUE) {
3026*0Sstevel@tonic-gate 					/*
3027*0Sstevel@tonic-gate 					 * The connection is obviously
3028*0Sstevel@tonic-gate 					 * wedged due to a bug or problem
3029*0Sstevel@tonic-gate 					 * with the transport. Mark it
3030*0Sstevel@tonic-gate 					 * as dead. Otherwise we can leak
3031*0Sstevel@tonic-gate 					 * connections.
3032*0Sstevel@tonic-gate 					 */
3033*0Sstevel@tonic-gate 					    cm_entry->x_dead = TRUE;
3034*0Sstevel@tonic-gate 					    mutex_exit(&connmgr_lock);
3035*0Sstevel@tonic-gate 					    have_connmgr_lock = 0;
3036*0Sstevel@tonic-gate 					    if (clnt_stop_idle != NULL)
3037*0Sstevel@tonic-gate 						(*clnt_stop_idle)(q);
3038*0Sstevel@tonic-gate 					    break;
3039*0Sstevel@tonic-gate 					}
3040*0Sstevel@tonic-gate 					cm_entry->x_closing = TRUE;
3041*0Sstevel@tonic-gate 					connmgr_sndrel(cm_entry);
3042*0Sstevel@tonic-gate 					have_connmgr_lock = 0;
3043*0Sstevel@tonic-gate 				    } else {
3044*0Sstevel@tonic-gate 					cm_entry->x_dead = TRUE;
3045*0Sstevel@tonic-gate 					mutex_exit(&connmgr_lock);
3046*0Sstevel@tonic-gate 					have_connmgr_lock = 0;
3047*0Sstevel@tonic-gate 					if (clnt_stop_idle != NULL)
3048*0Sstevel@tonic-gate 						(*clnt_stop_idle)(q);
3049*0Sstevel@tonic-gate 				    }
3050*0Sstevel@tonic-gate 				} else {
3051*0Sstevel@tonic-gate 					/*
3052*0Sstevel@tonic-gate 					 * We don't mark the connection
3053*0Sstevel@tonic-gate 					 * as dead, but we turn off the
3054*0Sstevel@tonic-gate 					 * idle timer.
3055*0Sstevel@tonic-gate 					 */
3056*0Sstevel@tonic-gate 					mutex_exit(&connmgr_lock);
3057*0Sstevel@tonic-gate 					have_connmgr_lock = 0;
3058*0Sstevel@tonic-gate 					if (clnt_stop_idle != NULL)
3059*0Sstevel@tonic-gate 						(*clnt_stop_idle)(q);
3060*0Sstevel@tonic-gate 					RPCLOG(1, "clnt_dispatch_notifyall:"
3061*0Sstevel@tonic-gate 					    " ignoring timeout from rpcmod"
3062*0Sstevel@tonic-gate 					    " (q %p) because we are not "
3063*0Sstevel@tonic-gate 					    " connected\n", (void *)q);
3064*0Sstevel@tonic-gate 				}
3065*0Sstevel@tonic-gate 				break;
3066*0Sstevel@tonic-gate 			case T_ORDREL_IND:
3067*0Sstevel@tonic-gate 				/*
3068*0Sstevel@tonic-gate 				 * If this entry is marked closing, then we are
3069*0Sstevel@tonic-gate 				 * completing a close handshake, and the
3070*0Sstevel@tonic-gate 				 * connection is dead.  Otherwise, the server is
3071*0Sstevel@tonic-gate 				 * trying to close. Since the server will not
3072*0Sstevel@tonic-gate 				 * be sending any more RPC replies, we abort
3073*0Sstevel@tonic-gate 				 * the connection, including flushing
3074*0Sstevel@tonic-gate 				 * any RPC requests that are in-transit.
3075*0Sstevel@tonic-gate 				 */
3076*0Sstevel@tonic-gate 				if (cm_entry->x_closing) {
3077*0Sstevel@tonic-gate 					cm_entry->x_dead = TRUE;
3078*0Sstevel@tonic-gate 					mutex_exit(&connmgr_lock);
3079*0Sstevel@tonic-gate 					have_connmgr_lock = 0;
3080*0Sstevel@tonic-gate 					if (clnt_stop_idle != NULL)
3081*0Sstevel@tonic-gate 						(*clnt_stop_idle)(q);
3082*0Sstevel@tonic-gate 				} else {
3083*0Sstevel@tonic-gate 					/*
3084*0Sstevel@tonic-gate 					 * if we're getting a disconnect
3085*0Sstevel@tonic-gate 					 * before we've finished our
3086*0Sstevel@tonic-gate 					 * connect attempt, mark it for
3087*0Sstevel@tonic-gate 					 * later processing
3088*0Sstevel@tonic-gate 					 */
3089*0Sstevel@tonic-gate 					if (cm_entry->x_thread)
3090*0Sstevel@tonic-gate 						cm_entry->x_early_disc = TRUE;
3091*0Sstevel@tonic-gate 					else
3092*0Sstevel@tonic-gate 						cm_entry->x_connected = FALSE;
3093*0Sstevel@tonic-gate 					cm_entry->x_waitdis = TRUE;
3094*0Sstevel@tonic-gate 					connmgr_snddis(cm_entry);
3095*0Sstevel@tonic-gate 					have_connmgr_lock = 0;
3096*0Sstevel@tonic-gate 				}
3097*0Sstevel@tonic-gate 				break;
3098*0Sstevel@tonic-gate 
3099*0Sstevel@tonic-gate 			case T_ERROR_ACK:
3100*0Sstevel@tonic-gate 			case T_OK_ACK:
3101*0Sstevel@tonic-gate 				cm_entry->x_waitdis = FALSE;
3102*0Sstevel@tonic-gate 				cv_signal(&cm_entry->x_dis_cv);
3103*0Sstevel@tonic-gate 				mutex_exit(&connmgr_lock);
3104*0Sstevel@tonic-gate 				return;
3105*0Sstevel@tonic-gate 
3106*0Sstevel@tonic-gate 			case T_DISCON_REQ:
3107*0Sstevel@tonic-gate 				if (cm_entry->x_thread)
3108*0Sstevel@tonic-gate 					cm_entry->x_early_disc = TRUE;
3109*0Sstevel@tonic-gate 				else
3110*0Sstevel@tonic-gate 					cm_entry->x_connected = FALSE;
3111*0Sstevel@tonic-gate 				cm_entry->x_waitdis = TRUE;
3112*0Sstevel@tonic-gate 
3113*0Sstevel@tonic-gate 				connmgr_snddis(cm_entry);
3114*0Sstevel@tonic-gate 				have_connmgr_lock = 0;
3115*0Sstevel@tonic-gate 				break;
3116*0Sstevel@tonic-gate 
3117*0Sstevel@tonic-gate 			case T_DISCON_IND:
3118*0Sstevel@tonic-gate 			default:
3119*0Sstevel@tonic-gate 				/*
3120*0Sstevel@tonic-gate 				 * if we're getting a disconnect before
3121*0Sstevel@tonic-gate 				 * we've finished our connect attempt,
3122*0Sstevel@tonic-gate 				 * mark it for later processing
3123*0Sstevel@tonic-gate 				 */
3124*0Sstevel@tonic-gate 				if (cm_entry->x_closing) {
3125*0Sstevel@tonic-gate 					cm_entry->x_dead = TRUE;
3126*0Sstevel@tonic-gate 					mutex_exit(&connmgr_lock);
3127*0Sstevel@tonic-gate 					have_connmgr_lock = 0;
3128*0Sstevel@tonic-gate 					if (clnt_stop_idle != NULL)
3129*0Sstevel@tonic-gate 						(*clnt_stop_idle)(q);
3130*0Sstevel@tonic-gate 				} else {
3131*0Sstevel@tonic-gate 					if (cm_entry->x_thread) {
3132*0Sstevel@tonic-gate 						cm_entry->x_early_disc = TRUE;
3133*0Sstevel@tonic-gate 					} else {
3134*0Sstevel@tonic-gate 						cm_entry->x_dead = TRUE;
3135*0Sstevel@tonic-gate 						cm_entry->x_connected = FALSE;
3136*0Sstevel@tonic-gate 					}
3137*0Sstevel@tonic-gate 				}
3138*0Sstevel@tonic-gate 				break;
3139*0Sstevel@tonic-gate 			}
3140*0Sstevel@tonic-gate 			break;
3141*0Sstevel@tonic-gate 		}
3142*0Sstevel@tonic-gate 	}
3143*0Sstevel@tonic-gate 
3144*0Sstevel@tonic-gate 	if (have_connmgr_lock)
3145*0Sstevel@tonic-gate 		mutex_exit(&connmgr_lock);
3146*0Sstevel@tonic-gate 
3147*0Sstevel@tonic-gate 	if (msg_type == T_ERROR_ACK || msg_type == T_OK_ACK) {
3148*0Sstevel@tonic-gate 		RPCLOG(1, "clnt_dispatch_notifyall: (wq %p) could not find "
3149*0Sstevel@tonic-gate 		    "connmgr entry for discon ack\n", (void *)q);
3150*0Sstevel@tonic-gate 		return;
3151*0Sstevel@tonic-gate 	}
3152*0Sstevel@tonic-gate 
3153*0Sstevel@tonic-gate 	/*
3154*0Sstevel@tonic-gate 	 * Then kick all the clnt_pending calls out of their wait.  There
3155*0Sstevel@tonic-gate 	 * should be no clnt_pending calls in the case of rpcmod's idle
3156*0Sstevel@tonic-gate 	 * timer firing.
3157*0Sstevel@tonic-gate 	 */
3158*0Sstevel@tonic-gate 	for (i = 0; i < clnt_cots_hash_size; i++) {
3159*0Sstevel@tonic-gate 		ctp = &cots_call_ht[i];
3160*0Sstevel@tonic-gate 		mutex_enter(&ctp->ct_lock);
3161*0Sstevel@tonic-gate 		for (e = ctp->ct_call_next;
3162*0Sstevel@tonic-gate 			e != (calllist_t *)ctp;
3163*0Sstevel@tonic-gate 			e = e->call_next) {
3164*0Sstevel@tonic-gate 			if (e->call_wq == q && e->call_notified == FALSE) {
3165*0Sstevel@tonic-gate 				RPCLOG(1,
3166*0Sstevel@tonic-gate 				"clnt_dispatch_notifyall for queue %p ",
3167*0Sstevel@tonic-gate 					(void *)q);
3168*0Sstevel@tonic-gate 				RPCLOG(1, "aborting clnt_pending call %p\n",
3169*0Sstevel@tonic-gate 					(void *)e);
3170*0Sstevel@tonic-gate 
3171*0Sstevel@tonic-gate 				if (msg_type == T_DISCON_IND)
3172*0Sstevel@tonic-gate 					e->call_reason = reason;
3173*0Sstevel@tonic-gate 				e->call_notified = TRUE;
3174*0Sstevel@tonic-gate 				e->call_status = RPC_XPRTFAILED;
3175*0Sstevel@tonic-gate 				cv_signal(&e->call_cv);
3176*0Sstevel@tonic-gate 			}
3177*0Sstevel@tonic-gate 		}
3178*0Sstevel@tonic-gate 		mutex_exit(&ctp->ct_lock);
3179*0Sstevel@tonic-gate 	}
3180*0Sstevel@tonic-gate 
3181*0Sstevel@tonic-gate 	mutex_enter(&clnt_pending_lock);
3182*0Sstevel@tonic-gate 	for (e = clnt_pending; e; e = e->call_next) {
3183*0Sstevel@tonic-gate 		/*
3184*0Sstevel@tonic-gate 		 * Only signal those RPC handles that haven't been
3185*0Sstevel@tonic-gate 		 * signalled yet. Otherwise we can get a bogus call_reason.
3186*0Sstevel@tonic-gate 		 * This can happen if thread A is making a call over a
3187*0Sstevel@tonic-gate 		 * connection. If the server is killed, it will cause
3188*0Sstevel@tonic-gate 		 * reset, and reason will default to EIO as a result of
3189*0Sstevel@tonic-gate 		 * a T_ORDREL_IND. Thread B then attempts to recreate
3190*0Sstevel@tonic-gate 		 * the connection but gets a T_DISCON_IND. If we set the
3191*0Sstevel@tonic-gate 		 * call_reason code for all threads, then if thread A
3192*0Sstevel@tonic-gate 		 * hasn't been dispatched yet, it will get the wrong
3193*0Sstevel@tonic-gate 		 * reason. The bogus call_reason can make it harder to
3194*0Sstevel@tonic-gate 		 * discriminate between calls that fail because the
3195*0Sstevel@tonic-gate 		 * connection attempt failed versus those where the call
3196*0Sstevel@tonic-gate 		 * may have been executed on the server.
3197*0Sstevel@tonic-gate 		 */
3198*0Sstevel@tonic-gate 		if (e->call_wq == q && e->call_notified == FALSE) {
3199*0Sstevel@tonic-gate 			RPCLOG(1, "clnt_dispatch_notifyall for queue %p ",
3200*0Sstevel@tonic-gate 			    (void *)q);
3201*0Sstevel@tonic-gate 			RPCLOG(1, " aborting clnt_pending call %p\n",
3202*0Sstevel@tonic-gate 			    (void *)e);
3203*0Sstevel@tonic-gate 
3204*0Sstevel@tonic-gate 			if (msg_type == T_DISCON_IND)
3205*0Sstevel@tonic-gate 				e->call_reason = reason;
3206*0Sstevel@tonic-gate 			e->call_notified = TRUE;
3207*0Sstevel@tonic-gate 			/*
3208*0Sstevel@tonic-gate 			 * Let the caller timeout, else he will retry
3209*0Sstevel@tonic-gate 			 * immediately.
3210*0Sstevel@tonic-gate 			 */
3211*0Sstevel@tonic-gate 			e->call_status = RPC_XPRTFAILED;
3212*0Sstevel@tonic-gate 
3213*0Sstevel@tonic-gate 			/*
3214*0Sstevel@tonic-gate 			 * We used to just signal those threads
3215*0Sstevel@tonic-gate 			 * waiting for a connection, (call_xid = 0).
3216*0Sstevel@tonic-gate 			 * That meant that threads waiting for a response
3217*0Sstevel@tonic-gate 			 * waited till their timeout expired. This
3218*0Sstevel@tonic-gate 			 * could be a long time if they've specified a
3219*0Sstevel@tonic-gate 			 * maximum timeout. (2^31 - 1). So we
3220*0Sstevel@tonic-gate 			 * Signal all threads now.
3221*0Sstevel@tonic-gate 			 */
3222*0Sstevel@tonic-gate 			cv_signal(&e->call_cv);
3223*0Sstevel@tonic-gate 		}
3224*0Sstevel@tonic-gate 	}
3225*0Sstevel@tonic-gate 	mutex_exit(&clnt_pending_lock);
3226*0Sstevel@tonic-gate }
3227*0Sstevel@tonic-gate 
3228*0Sstevel@tonic-gate 
3229*0Sstevel@tonic-gate /*ARGSUSED*/
3230*0Sstevel@tonic-gate /*
3231*0Sstevel@tonic-gate  * after resuming a system that's been suspended for longer than the
3232*0Sstevel@tonic-gate  * NFS server's idle timeout (svc_idle_timeout for Solaris 2), rfscall()
3233*0Sstevel@tonic-gate  * generates "NFS server X not responding" and "NFS server X ok" messages;
3234*0Sstevel@tonic-gate  * here we reset inet connections to cause a re-connect and avoid those
3235*0Sstevel@tonic-gate  * NFS messages.  see 4045054
3236*0Sstevel@tonic-gate  */
3237*0Sstevel@tonic-gate boolean_t
3238*0Sstevel@tonic-gate connmgr_cpr_reset(void *arg, int code)
3239*0Sstevel@tonic-gate {
3240*0Sstevel@tonic-gate 	struct cm_xprt *cxp;
3241*0Sstevel@tonic-gate 
3242*0Sstevel@tonic-gate 	if (code == CB_CODE_CPR_CHKPT)
3243*0Sstevel@tonic-gate 		return (B_TRUE);
3244*0Sstevel@tonic-gate 
3245*0Sstevel@tonic-gate 	if (mutex_tryenter(&connmgr_lock) == 0)
3246*0Sstevel@tonic-gate 		return (B_FALSE);
3247*0Sstevel@tonic-gate 	for (cxp = cm_hd; cxp; cxp = cxp->x_next) {
3248*0Sstevel@tonic-gate 		if ((cxp->x_family == AF_INET || cxp->x_family == AF_INET6) &&
3249*0Sstevel@tonic-gate 			cxp->x_connected == TRUE) {
3250*0Sstevel@tonic-gate 			if (cxp->x_thread)
3251*0Sstevel@tonic-gate 				cxp->x_early_disc = TRUE;
3252*0Sstevel@tonic-gate 			else
3253*0Sstevel@tonic-gate 				cxp->x_connected = FALSE;
3254*0Sstevel@tonic-gate 			cxp->x_needdis = TRUE;
3255*0Sstevel@tonic-gate 		}
3256*0Sstevel@tonic-gate 	}
3257*0Sstevel@tonic-gate 	mutex_exit(&connmgr_lock);
3258*0Sstevel@tonic-gate 	return (B_TRUE);
3259*0Sstevel@tonic-gate }
3260*0Sstevel@tonic-gate 
3261*0Sstevel@tonic-gate void
3262*0Sstevel@tonic-gate clnt_cots_stats_init(zoneid_t zoneid, struct rpc_cots_client **statsp)
3263*0Sstevel@tonic-gate {
3264*0Sstevel@tonic-gate 
3265*0Sstevel@tonic-gate 	*statsp = (struct rpc_cots_client *)rpcstat_zone_init_common(zoneid,
3266*0Sstevel@tonic-gate 	    "unix", "rpc_cots_client", (const kstat_named_t *)&cots_rcstat_tmpl,
3267*0Sstevel@tonic-gate 	    sizeof (cots_rcstat_tmpl));
3268*0Sstevel@tonic-gate }
3269*0Sstevel@tonic-gate 
3270*0Sstevel@tonic-gate void
3271*0Sstevel@tonic-gate clnt_cots_stats_fini(zoneid_t zoneid, struct rpc_cots_client **statsp)
3272*0Sstevel@tonic-gate {
3273*0Sstevel@tonic-gate 	rpcstat_zone_fini_common(zoneid, "unix", "rpc_cots_client");
3274*0Sstevel@tonic-gate 	kmem_free(*statsp, sizeof (cots_rcstat_tmpl));
3275*0Sstevel@tonic-gate }
3276*0Sstevel@tonic-gate 
3277*0Sstevel@tonic-gate void
3278*0Sstevel@tonic-gate clnt_cots_init(void)
3279*0Sstevel@tonic-gate {
3280*0Sstevel@tonic-gate 	mutex_init(&connmgr_lock, NULL, MUTEX_DEFAULT, NULL);
3281*0Sstevel@tonic-gate 	mutex_init(&clnt_pending_lock, NULL, MUTEX_DEFAULT, NULL);
3282*0Sstevel@tonic-gate 
3283*0Sstevel@tonic-gate 	if (clnt_cots_hash_size < DEFAULT_MIN_HASH_SIZE)
3284*0Sstevel@tonic-gate 		clnt_cots_hash_size = DEFAULT_MIN_HASH_SIZE;
3285*0Sstevel@tonic-gate 
3286*0Sstevel@tonic-gate 	cots_call_ht = call_table_init(clnt_cots_hash_size);
3287*0Sstevel@tonic-gate 	zone_key_create(&zone_cots_key, NULL, NULL, clnt_zone_destroy);
3288*0Sstevel@tonic-gate }
3289*0Sstevel@tonic-gate 
3290*0Sstevel@tonic-gate void
3291*0Sstevel@tonic-gate clnt_cots_fini(void)
3292*0Sstevel@tonic-gate {
3293*0Sstevel@tonic-gate 	(void) zone_key_delete(zone_cots_key);
3294*0Sstevel@tonic-gate }
3295*0Sstevel@tonic-gate 
3296*0Sstevel@tonic-gate /*
3297*0Sstevel@tonic-gate  * Wait for TPI ack, returns success only if expected ack is received
3298*0Sstevel@tonic-gate  * within timeout period.
3299*0Sstevel@tonic-gate  */
3300*0Sstevel@tonic-gate 
3301*0Sstevel@tonic-gate static int
3302*0Sstevel@tonic-gate waitforack(calllist_t *e, t_scalar_t ack_prim, const struct timeval *waitp,
3303*0Sstevel@tonic-gate     bool_t nosignal)
3304*0Sstevel@tonic-gate {
3305*0Sstevel@tonic-gate 	union T_primitives *tpr;
3306*0Sstevel@tonic-gate 	clock_t timout;
3307*0Sstevel@tonic-gate 	int cv_stat = 1;
3308*0Sstevel@tonic-gate 
3309*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&clnt_pending_lock));
3310*0Sstevel@tonic-gate 	while (e->call_reply == NULL) {
3311*0Sstevel@tonic-gate 		if (waitp != NULL) {
3312*0Sstevel@tonic-gate 			timout = waitp->tv_sec * drv_usectohz(MICROSEC) +
3313*0Sstevel@tonic-gate 			    drv_usectohz(waitp->tv_usec) + lbolt;
3314*0Sstevel@tonic-gate 			if (nosignal)
3315*0Sstevel@tonic-gate 				cv_stat = cv_timedwait(&e->call_cv,
3316*0Sstevel@tonic-gate 				    &clnt_pending_lock, timout);
3317*0Sstevel@tonic-gate 			else
3318*0Sstevel@tonic-gate 				cv_stat = cv_timedwait_sig(&e->call_cv,
3319*0Sstevel@tonic-gate 				    &clnt_pending_lock, timout);
3320*0Sstevel@tonic-gate 		} else {
3321*0Sstevel@tonic-gate 			if (nosignal)
3322*0Sstevel@tonic-gate 				cv_wait(&e->call_cv, &clnt_pending_lock);
3323*0Sstevel@tonic-gate 			else
3324*0Sstevel@tonic-gate 				cv_stat = cv_wait_sig(&e->call_cv,
3325*0Sstevel@tonic-gate 				    &clnt_pending_lock);
3326*0Sstevel@tonic-gate 		}
3327*0Sstevel@tonic-gate 		if (cv_stat == -1)
3328*0Sstevel@tonic-gate 			return (ETIME);
3329*0Sstevel@tonic-gate 		if (cv_stat == 0)
3330*0Sstevel@tonic-gate 			return (EINTR);
3331*0Sstevel@tonic-gate 	}
3332*0Sstevel@tonic-gate 	tpr = (union T_primitives *)e->call_reply->b_rptr;
3333*0Sstevel@tonic-gate 	if (tpr->type == ack_prim)
3334*0Sstevel@tonic-gate 		return (0); /* Success */
3335*0Sstevel@tonic-gate 
3336*0Sstevel@tonic-gate 	if (tpr->type == T_ERROR_ACK) {
3337*0Sstevel@tonic-gate 		if (tpr->error_ack.TLI_error == TSYSERR)
3338*0Sstevel@tonic-gate 			return (tpr->error_ack.UNIX_error);
3339*0Sstevel@tonic-gate 		else
3340*0Sstevel@tonic-gate 			return (t_tlitosyserr(tpr->error_ack.TLI_error));
3341*0Sstevel@tonic-gate 	}
3342*0Sstevel@tonic-gate 
3343*0Sstevel@tonic-gate 	return (EPROTO); /* unknown or unexpected primitive */
3344*0Sstevel@tonic-gate }
3345