xref: /onnv-gate/usr/src/uts/common/rpc/clnt_rdma.c (revision 7387:0b3a92e31fd8)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7387SRobert.Gordon@Sun.COM  * Common Development and Distribution License (the "License").
6*7387SRobert.Gordon@Sun.COM  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*7387SRobert.Gordon@Sun.COM  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
260Sstevel@tonic-gate /* All Rights Reserved */
270Sstevel@tonic-gate /*
280Sstevel@tonic-gate  * Portions of this source code were derived from Berkeley
290Sstevel@tonic-gate  * 4.3 BSD under license from the Regents of the University of
300Sstevel@tonic-gate  * California.
310Sstevel@tonic-gate  */
320Sstevel@tonic-gate 
330Sstevel@tonic-gate #include <sys/param.h>
340Sstevel@tonic-gate #include <sys/types.h>
350Sstevel@tonic-gate #include <sys/user.h>
360Sstevel@tonic-gate #include <sys/systm.h>
370Sstevel@tonic-gate #include <sys/sysmacros.h>
380Sstevel@tonic-gate #include <sys/errno.h>
390Sstevel@tonic-gate #include <sys/kmem.h>
400Sstevel@tonic-gate #include <sys/debug.h>
410Sstevel@tonic-gate #include <sys/systm.h>
420Sstevel@tonic-gate #include <sys/kstat.h>
430Sstevel@tonic-gate #include <sys/t_lock.h>
440Sstevel@tonic-gate #include <sys/ddi.h>
450Sstevel@tonic-gate #include <sys/cmn_err.h>
460Sstevel@tonic-gate #include <sys/time.h>
470Sstevel@tonic-gate #include <sys/isa_defs.h>
480Sstevel@tonic-gate #include <sys/zone.h>
49*7387SRobert.Gordon@Sun.COM #include <sys/sdt.h>
500Sstevel@tonic-gate 
510Sstevel@tonic-gate #include <rpc/types.h>
520Sstevel@tonic-gate #include <rpc/xdr.h>
530Sstevel@tonic-gate #include <rpc/auth.h>
540Sstevel@tonic-gate #include <rpc/clnt.h>
550Sstevel@tonic-gate #include <rpc/rpc_msg.h>
560Sstevel@tonic-gate #include <rpc/rpc_rdma.h>
57*7387SRobert.Gordon@Sun.COM #include <nfs/nfs.h>
58*7387SRobert.Gordon@Sun.COM #include <nfs/nfs4_kprot.h>
590Sstevel@tonic-gate 
60*7387SRobert.Gordon@Sun.COM static uint32_t rdma_bufs_rqst = RDMA_BUFS_RQST;
61*7387SRobert.Gordon@Sun.COM 
62*7387SRobert.Gordon@Sun.COM static int clnt_compose_rpcmsg(CLIENT *, rpcproc_t, rdma_buf_t *,
63*7387SRobert.Gordon@Sun.COM 			    XDR *, xdrproc_t, caddr_t);
64*7387SRobert.Gordon@Sun.COM static int  clnt_compose_rdma_header(CONN *, CLIENT *, rdma_buf_t *,
65*7387SRobert.Gordon@Sun.COM 		    XDR **, uint_t *);
66*7387SRobert.Gordon@Sun.COM static int clnt_setup_rlist(CONN *, XDR *, XDR *);
67*7387SRobert.Gordon@Sun.COM static int clnt_setup_wlist(CONN *, XDR *, XDR *);
68*7387SRobert.Gordon@Sun.COM static int clnt_setup_long_reply(CONN *, struct clist **, uint_t);
69*7387SRobert.Gordon@Sun.COM static void clnt_check_credit(CONN *);
70*7387SRobert.Gordon@Sun.COM static void clnt_return_credit(CONN *);
71*7387SRobert.Gordon@Sun.COM static void clnt_decode_long_reply(CONN *, struct clist *,
72*7387SRobert.Gordon@Sun.COM 		struct clist *, XDR *, XDR **, struct clist *,
73*7387SRobert.Gordon@Sun.COM 		struct clist *, uint_t, uint_t);
74*7387SRobert.Gordon@Sun.COM 
75*7387SRobert.Gordon@Sun.COM static void clnt_update_credit(CONN *, uint32_t);
76*7387SRobert.Gordon@Sun.COM static void check_dereg_wlist(CONN *, struct clist *);
770Sstevel@tonic-gate 
780Sstevel@tonic-gate static enum clnt_stat clnt_rdma_kcallit(CLIENT *, rpcproc_t, xdrproc_t,
790Sstevel@tonic-gate     caddr_t, xdrproc_t, caddr_t, struct timeval);
800Sstevel@tonic-gate static void	clnt_rdma_kabort(CLIENT *);
810Sstevel@tonic-gate static void	clnt_rdma_kerror(CLIENT *, struct rpc_err *);
820Sstevel@tonic-gate static bool_t	clnt_rdma_kfreeres(CLIENT *, xdrproc_t, caddr_t);
830Sstevel@tonic-gate static void	clnt_rdma_kdestroy(CLIENT *);
840Sstevel@tonic-gate static bool_t	clnt_rdma_kcontrol(CLIENT *, int, char *);
850Sstevel@tonic-gate static int	clnt_rdma_ksettimers(CLIENT *, struct rpc_timers *,
860Sstevel@tonic-gate     struct rpc_timers *, int, void(*)(int, int, caddr_t), caddr_t, uint32_t);
870Sstevel@tonic-gate 
880Sstevel@tonic-gate /*
890Sstevel@tonic-gate  * Operations vector for RDMA based RPC
900Sstevel@tonic-gate  */
910Sstevel@tonic-gate static struct clnt_ops rdma_clnt_ops = {
920Sstevel@tonic-gate 	clnt_rdma_kcallit,	/* do rpc call */
930Sstevel@tonic-gate 	clnt_rdma_kabort,	/* abort call */
940Sstevel@tonic-gate 	clnt_rdma_kerror,	/* return error status */
950Sstevel@tonic-gate 	clnt_rdma_kfreeres,	/* free results */
960Sstevel@tonic-gate 	clnt_rdma_kdestroy,	/* destroy rpc handle */
970Sstevel@tonic-gate 	clnt_rdma_kcontrol,	/* the ioctl() of rpc */
980Sstevel@tonic-gate 	clnt_rdma_ksettimers,	/* set retry timers */
990Sstevel@tonic-gate };
1000Sstevel@tonic-gate 
1010Sstevel@tonic-gate /*
1020Sstevel@tonic-gate  * The size of the preserialized RPC header information.
1030Sstevel@tonic-gate  */
1040Sstevel@tonic-gate #define	CKU_HDRSIZE	20
105*7387SRobert.Gordon@Sun.COM #define	CLNT_RDMA_SUCCESS 0
106*7387SRobert.Gordon@Sun.COM #define	CLNT_RDMA_FAIL (-1)
107*7387SRobert.Gordon@Sun.COM 
108*7387SRobert.Gordon@Sun.COM #define	AUTH_REFRESH_COUNT 2
109*7387SRobert.Gordon@Sun.COM 
110*7387SRobert.Gordon@Sun.COM #define	IS_RPCSEC_GSS(authh)			\
111*7387SRobert.Gordon@Sun.COM 	(authh->cl_auth->ah_cred.oa_flavor == RPCSEC_GSS)
1120Sstevel@tonic-gate 
1130Sstevel@tonic-gate /*
1140Sstevel@tonic-gate  * Per RPC RDMA endpoint details
1150Sstevel@tonic-gate  */
1160Sstevel@tonic-gate typedef struct cku_private {
1170Sstevel@tonic-gate 	CLIENT			cku_client;	/* client handle */
1180Sstevel@tonic-gate 	rdma_mod_t		*cku_rd_mod;	/* underlying RDMA mod */
1190Sstevel@tonic-gate 	void			*cku_rd_handle;	/* underlying RDMA device */
1200Sstevel@tonic-gate 	struct netbuf		cku_addr;	/* remote netbuf address */
1210Sstevel@tonic-gate 	int			cku_addrfmly;	/* for finding addr_type */
1220Sstevel@tonic-gate 	struct rpc_err		cku_err;	/* error status */
1230Sstevel@tonic-gate 	struct cred		*cku_cred;	/* credentials */
1240Sstevel@tonic-gate 	XDR			cku_outxdr;	/* xdr stream for output */
1250Sstevel@tonic-gate 	uint32_t		cku_outsz;
1260Sstevel@tonic-gate 	XDR			cku_inxdr;	/* xdr stream for input */
1270Sstevel@tonic-gate 	char			cku_rpchdr[CKU_HDRSIZE+4]; /* rpc header */
1280Sstevel@tonic-gate 	uint32_t		cku_xid;	/* current XID */
1290Sstevel@tonic-gate } cku_private_t;
1300Sstevel@tonic-gate 
1310Sstevel@tonic-gate #define	CLNT_RDMA_DELAY	10	/* secs to delay after a connection failure */
1320Sstevel@tonic-gate static int clnt_rdma_min_delay = CLNT_RDMA_DELAY;
1330Sstevel@tonic-gate 
1340Sstevel@tonic-gate struct {
1350Sstevel@tonic-gate 	kstat_named_t	rccalls;
1360Sstevel@tonic-gate 	kstat_named_t	rcbadcalls;
1370Sstevel@tonic-gate 	kstat_named_t	rcbadxids;
1380Sstevel@tonic-gate 	kstat_named_t	rctimeouts;
1390Sstevel@tonic-gate 	kstat_named_t	rcnewcreds;
1400Sstevel@tonic-gate 	kstat_named_t	rcbadverfs;
1410Sstevel@tonic-gate 	kstat_named_t	rctimers;
1420Sstevel@tonic-gate 	kstat_named_t	rccantconn;
1430Sstevel@tonic-gate 	kstat_named_t	rcnomem;
1440Sstevel@tonic-gate 	kstat_named_t	rcintrs;
1450Sstevel@tonic-gate 	kstat_named_t	rclongrpcs;
1460Sstevel@tonic-gate } rdmarcstat = {
1470Sstevel@tonic-gate 	{ "calls",	KSTAT_DATA_UINT64 },
1480Sstevel@tonic-gate 	{ "badcalls",	KSTAT_DATA_UINT64 },
1490Sstevel@tonic-gate 	{ "badxids",	KSTAT_DATA_UINT64 },
1500Sstevel@tonic-gate 	{ "timeouts",	KSTAT_DATA_UINT64 },
1510Sstevel@tonic-gate 	{ "newcreds",	KSTAT_DATA_UINT64 },
1520Sstevel@tonic-gate 	{ "badverfs",	KSTAT_DATA_UINT64 },
1530Sstevel@tonic-gate 	{ "timers",	KSTAT_DATA_UINT64 },
1540Sstevel@tonic-gate 	{ "cantconn",	KSTAT_DATA_UINT64 },
1550Sstevel@tonic-gate 	{ "nomem",	KSTAT_DATA_UINT64 },
1560Sstevel@tonic-gate 	{ "interrupts", KSTAT_DATA_UINT64 },
1570Sstevel@tonic-gate 	{ "longrpc", 	KSTAT_DATA_UINT64 }
1580Sstevel@tonic-gate };
1590Sstevel@tonic-gate 
1600Sstevel@tonic-gate kstat_named_t *rdmarcstat_ptr = (kstat_named_t *)&rdmarcstat;
1610Sstevel@tonic-gate uint_t rdmarcstat_ndata = sizeof (rdmarcstat) / sizeof (kstat_named_t);
1620Sstevel@tonic-gate 
1630Sstevel@tonic-gate #ifdef DEBUG
1640Sstevel@tonic-gate int rdma_clnt_debug = 0;
1650Sstevel@tonic-gate #endif
1660Sstevel@tonic-gate 
1670Sstevel@tonic-gate #ifdef accurate_stats
1680Sstevel@tonic-gate extern kmutex_t rdmarcstat_lock;    /* mutex for rcstat updates */
1690Sstevel@tonic-gate 
1700Sstevel@tonic-gate #define	RCSTAT_INCR(x)			\
1710Sstevel@tonic-gate 	mutex_enter(&rdmarcstat_lock);	\
1720Sstevel@tonic-gate 	rdmarcstat.x.value.ui64++;	\
1730Sstevel@tonic-gate 	mutex_exit(&rdmarcstat_lock);
1740Sstevel@tonic-gate #else
1750Sstevel@tonic-gate #define	RCSTAT_INCR(x)			\
1760Sstevel@tonic-gate 	rdmarcstat.x.value.ui64++;
1770Sstevel@tonic-gate #endif
1780Sstevel@tonic-gate 
1790Sstevel@tonic-gate #define	ptoh(p)		(&((p)->cku_client))
1800Sstevel@tonic-gate #define	htop(h)		((cku_private_t *)((h)->cl_private))
1810Sstevel@tonic-gate 
182*7387SRobert.Gordon@Sun.COM uint_t
183*7387SRobert.Gordon@Sun.COM calc_length(uint_t len)
184*7387SRobert.Gordon@Sun.COM {
185*7387SRobert.Gordon@Sun.COM 	len = RNDUP(len);
186*7387SRobert.Gordon@Sun.COM 
187*7387SRobert.Gordon@Sun.COM 	if (len <= 64 * 1024) {
188*7387SRobert.Gordon@Sun.COM 		if (len > 32 * 1024) {
189*7387SRobert.Gordon@Sun.COM 			len = 64 * 1024;
190*7387SRobert.Gordon@Sun.COM 		} else {
191*7387SRobert.Gordon@Sun.COM 			if (len > 16 * 1024) {
192*7387SRobert.Gordon@Sun.COM 				len = 32 * 1024;
193*7387SRobert.Gordon@Sun.COM 			} else {
194*7387SRobert.Gordon@Sun.COM 				if (len > 8 * 1024) {
195*7387SRobert.Gordon@Sun.COM 					len = 16 * 1024;
196*7387SRobert.Gordon@Sun.COM 				} else {
197*7387SRobert.Gordon@Sun.COM 					len = 8 * 1024;
198*7387SRobert.Gordon@Sun.COM 				}
199*7387SRobert.Gordon@Sun.COM 			}
200*7387SRobert.Gordon@Sun.COM 		}
201*7387SRobert.Gordon@Sun.COM 	}
202*7387SRobert.Gordon@Sun.COM 	return (len);
203*7387SRobert.Gordon@Sun.COM }
2040Sstevel@tonic-gate int
2050Sstevel@tonic-gate clnt_rdma_kcreate(char *proto, void *handle, struct netbuf *raddr, int family,
2060Sstevel@tonic-gate     rpcprog_t pgm, rpcvers_t vers, struct cred *cred, CLIENT **cl)
2070Sstevel@tonic-gate {
2080Sstevel@tonic-gate 	CLIENT *h;
2090Sstevel@tonic-gate 	struct cku_private *p;
2100Sstevel@tonic-gate 	struct rpc_msg call_msg;
2110Sstevel@tonic-gate 	rdma_registry_t *rp;
2120Sstevel@tonic-gate 
2130Sstevel@tonic-gate 	ASSERT(INGLOBALZONE(curproc));
2140Sstevel@tonic-gate 
2150Sstevel@tonic-gate 	if (cl == NULL)
2160Sstevel@tonic-gate 		return (EINVAL);
2170Sstevel@tonic-gate 	*cl = NULL;
2180Sstevel@tonic-gate 
2190Sstevel@tonic-gate 	p = kmem_zalloc(sizeof (*p), KM_SLEEP);
2200Sstevel@tonic-gate 
2210Sstevel@tonic-gate 	/*
2220Sstevel@tonic-gate 	 * Find underlying RDMATF plugin
2230Sstevel@tonic-gate 	 */
2240Sstevel@tonic-gate 	rw_enter(&rdma_lock, RW_READER);
2250Sstevel@tonic-gate 	rp = rdma_mod_head;
2260Sstevel@tonic-gate 	while (rp != NULL) {
2270Sstevel@tonic-gate 		if (strcmp(rp->r_mod->rdma_api, proto))
2280Sstevel@tonic-gate 			rp = rp->r_next;
2290Sstevel@tonic-gate 		else {
2300Sstevel@tonic-gate 			p->cku_rd_mod = rp->r_mod;
2310Sstevel@tonic-gate 			p->cku_rd_handle = handle;
2320Sstevel@tonic-gate 			break;
2330Sstevel@tonic-gate 		}
2340Sstevel@tonic-gate 	}
2350Sstevel@tonic-gate 	rw_exit(&rdma_lock);
2360Sstevel@tonic-gate 
2370Sstevel@tonic-gate 	if (p->cku_rd_mod == NULL) {
2380Sstevel@tonic-gate 		/*
2390Sstevel@tonic-gate 		 * Should not happen.
2400Sstevel@tonic-gate 		 * No matching RDMATF plugin.
2410Sstevel@tonic-gate 		 */
2420Sstevel@tonic-gate 		kmem_free(p, sizeof (struct cku_private));
2430Sstevel@tonic-gate 		return (EINVAL);
2440Sstevel@tonic-gate 	}
2450Sstevel@tonic-gate 
2460Sstevel@tonic-gate 	h = ptoh(p);
2470Sstevel@tonic-gate 	h->cl_ops = &rdma_clnt_ops;
2480Sstevel@tonic-gate 	h->cl_private = (caddr_t)p;
2490Sstevel@tonic-gate 	h->cl_auth = authkern_create();
2500Sstevel@tonic-gate 
2510Sstevel@tonic-gate 	/* call message, just used to pre-serialize below */
2520Sstevel@tonic-gate 	call_msg.rm_xid = 0;
2530Sstevel@tonic-gate 	call_msg.rm_direction = CALL;
2540Sstevel@tonic-gate 	call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION;
2550Sstevel@tonic-gate 	call_msg.rm_call.cb_prog = pgm;
2560Sstevel@tonic-gate 	call_msg.rm_call.cb_vers = vers;
2570Sstevel@tonic-gate 
2580Sstevel@tonic-gate 	xdrmem_create(&p->cku_outxdr, p->cku_rpchdr, CKU_HDRSIZE, XDR_ENCODE);
2590Sstevel@tonic-gate 	/* pre-serialize call message header */
2600Sstevel@tonic-gate 	if (!xdr_callhdr(&p->cku_outxdr, &call_msg)) {
2610Sstevel@tonic-gate 		XDR_DESTROY(&p->cku_outxdr);
2620Sstevel@tonic-gate 		auth_destroy(h->cl_auth);
2630Sstevel@tonic-gate 		kmem_free(p, sizeof (struct cku_private));
2640Sstevel@tonic-gate 		return (EINVAL);
2650Sstevel@tonic-gate 	}
2660Sstevel@tonic-gate 
2670Sstevel@tonic-gate 	/*
2680Sstevel@tonic-gate 	 * Set up the rpc information
2690Sstevel@tonic-gate 	 */
2700Sstevel@tonic-gate 	p->cku_cred = cred;
2710Sstevel@tonic-gate 	p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP);
2720Sstevel@tonic-gate 	p->cku_addr.maxlen = raddr->maxlen;
2730Sstevel@tonic-gate 	p->cku_addr.len = raddr->len;
2740Sstevel@tonic-gate 	bcopy(raddr->buf, p->cku_addr.buf, raddr->len);
2750Sstevel@tonic-gate 	p->cku_addrfmly = family;
2760Sstevel@tonic-gate 
2770Sstevel@tonic-gate 	*cl = h;
2780Sstevel@tonic-gate 	return (0);
2790Sstevel@tonic-gate }
2800Sstevel@tonic-gate 
2810Sstevel@tonic-gate static void
2820Sstevel@tonic-gate clnt_rdma_kdestroy(CLIENT *h)
2830Sstevel@tonic-gate {
2840Sstevel@tonic-gate 	struct cku_private *p = htop(h);
2850Sstevel@tonic-gate 
2860Sstevel@tonic-gate 	kmem_free(p->cku_addr.buf, p->cku_addr.maxlen);
2870Sstevel@tonic-gate 	kmem_free(p, sizeof (*p));
2880Sstevel@tonic-gate }
2890Sstevel@tonic-gate 
2900Sstevel@tonic-gate void
2910Sstevel@tonic-gate clnt_rdma_kinit(CLIENT *h, char *proto, void *handle, struct netbuf *raddr,
2920Sstevel@tonic-gate     struct cred *cred)
2930Sstevel@tonic-gate {
2940Sstevel@tonic-gate 	struct cku_private *p = htop(h);
2950Sstevel@tonic-gate 	rdma_registry_t *rp;
2960Sstevel@tonic-gate 
2970Sstevel@tonic-gate 	ASSERT(INGLOBALZONE(curproc));
2980Sstevel@tonic-gate 	/*
2990Sstevel@tonic-gate 	 * Find underlying RDMATF plugin
3000Sstevel@tonic-gate 	 */
3010Sstevel@tonic-gate 	p->cku_rd_mod = NULL;
3020Sstevel@tonic-gate 	rw_enter(&rdma_lock, RW_READER);
3030Sstevel@tonic-gate 	rp = rdma_mod_head;
3040Sstevel@tonic-gate 	while (rp != NULL) {
3050Sstevel@tonic-gate 		if (strcmp(rp->r_mod->rdma_api, proto))
3060Sstevel@tonic-gate 			rp = rp->r_next;
3070Sstevel@tonic-gate 		else {
3080Sstevel@tonic-gate 			p->cku_rd_mod = rp->r_mod;
3090Sstevel@tonic-gate 			p->cku_rd_handle = handle;
3100Sstevel@tonic-gate 			break;
3110Sstevel@tonic-gate 		}
3120Sstevel@tonic-gate 
3130Sstevel@tonic-gate 	}
3140Sstevel@tonic-gate 	rw_exit(&rdma_lock);
3150Sstevel@tonic-gate 
3160Sstevel@tonic-gate 	/*
3170Sstevel@tonic-gate 	 * Set up the rpc information
3180Sstevel@tonic-gate 	 */
3190Sstevel@tonic-gate 	p->cku_cred = cred;
3200Sstevel@tonic-gate 	p->cku_xid = 0;
3210Sstevel@tonic-gate 
3220Sstevel@tonic-gate 	if (p->cku_addr.maxlen < raddr->len) {
3230Sstevel@tonic-gate 		if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL)
3240Sstevel@tonic-gate 			kmem_free(p->cku_addr.buf, p->cku_addr.maxlen);
3250Sstevel@tonic-gate 		p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP);
3260Sstevel@tonic-gate 		p->cku_addr.maxlen = raddr->maxlen;
3270Sstevel@tonic-gate 	}
3280Sstevel@tonic-gate 
3290Sstevel@tonic-gate 	p->cku_addr.len = raddr->len;
3300Sstevel@tonic-gate 	bcopy(raddr->buf, p->cku_addr.buf, raddr->len);
3310Sstevel@tonic-gate 	h->cl_ops = &rdma_clnt_ops;
3320Sstevel@tonic-gate }
3330Sstevel@tonic-gate 
334*7387SRobert.Gordon@Sun.COM static int
335*7387SRobert.Gordon@Sun.COM clnt_compose_rpcmsg(CLIENT *h, rpcproc_t procnum,
336*7387SRobert.Gordon@Sun.COM     rdma_buf_t *rpcmsg, XDR *xdrs,
337*7387SRobert.Gordon@Sun.COM     xdrproc_t xdr_args, caddr_t argsp)
338*7387SRobert.Gordon@Sun.COM {
339*7387SRobert.Gordon@Sun.COM 	cku_private_t *p = htop(h);
340*7387SRobert.Gordon@Sun.COM 
341*7387SRobert.Gordon@Sun.COM 	if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
342*7387SRobert.Gordon@Sun.COM 		/*
343*7387SRobert.Gordon@Sun.COM 		 * Copy in the preserialized RPC header
344*7387SRobert.Gordon@Sun.COM 		 * information.
345*7387SRobert.Gordon@Sun.COM 		 */
346*7387SRobert.Gordon@Sun.COM 		bcopy(p->cku_rpchdr, rpcmsg->addr, CKU_HDRSIZE);
347*7387SRobert.Gordon@Sun.COM 
348*7387SRobert.Gordon@Sun.COM 		/*
349*7387SRobert.Gordon@Sun.COM 		 * transaction id is the 1st thing in the output
350*7387SRobert.Gordon@Sun.COM 		 * buffer.
351*7387SRobert.Gordon@Sun.COM 		 */
352*7387SRobert.Gordon@Sun.COM 		/* LINTED pointer alignment */
353*7387SRobert.Gordon@Sun.COM 		(*(uint32_t *)(rpcmsg->addr)) = p->cku_xid;
354*7387SRobert.Gordon@Sun.COM 
355*7387SRobert.Gordon@Sun.COM 		/* Skip the preserialized stuff. */
356*7387SRobert.Gordon@Sun.COM 		XDR_SETPOS(xdrs, CKU_HDRSIZE);
357*7387SRobert.Gordon@Sun.COM 
358*7387SRobert.Gordon@Sun.COM 		/* Serialize dynamic stuff into the output buffer. */
359*7387SRobert.Gordon@Sun.COM 		if ((!XDR_PUTINT32(xdrs, (int32_t *)&procnum)) ||
360*7387SRobert.Gordon@Sun.COM 		    (!AUTH_MARSHALL(h->cl_auth, xdrs, p->cku_cred)) ||
361*7387SRobert.Gordon@Sun.COM 		    (!(*xdr_args)(xdrs, argsp))) {
362*7387SRobert.Gordon@Sun.COM 			DTRACE_PROBE(krpc__e__clntrdma__rpcmsg__dynargs);
363*7387SRobert.Gordon@Sun.COM 			return (CLNT_RDMA_FAIL);
364*7387SRobert.Gordon@Sun.COM 		}
365*7387SRobert.Gordon@Sun.COM 		p->cku_outsz = XDR_GETPOS(xdrs);
366*7387SRobert.Gordon@Sun.COM 	} else {
367*7387SRobert.Gordon@Sun.COM 		uint32_t *uproc = (uint32_t *)&p->cku_rpchdr[CKU_HDRSIZE];
368*7387SRobert.Gordon@Sun.COM 		IXDR_PUT_U_INT32(uproc, procnum);
369*7387SRobert.Gordon@Sun.COM 		(*(uint32_t *)(&p->cku_rpchdr[0])) = p->cku_xid;
370*7387SRobert.Gordon@Sun.COM 		XDR_SETPOS(xdrs, 0);
371*7387SRobert.Gordon@Sun.COM 
372*7387SRobert.Gordon@Sun.COM 		/* Serialize the procedure number and the arguments. */
373*7387SRobert.Gordon@Sun.COM 		if (!AUTH_WRAP(h->cl_auth, (caddr_t)p->cku_rpchdr,
374*7387SRobert.Gordon@Sun.COM 		    CKU_HDRSIZE+4, xdrs, xdr_args, argsp)) {
375*7387SRobert.Gordon@Sun.COM 			if (rpcmsg->addr != xdrs->x_base) {
376*7387SRobert.Gordon@Sun.COM 				rpcmsg->addr = xdrs->x_base;
377*7387SRobert.Gordon@Sun.COM 				rpcmsg->len = xdr_getbufsize(xdrs);
378*7387SRobert.Gordon@Sun.COM 			}
379*7387SRobert.Gordon@Sun.COM 			DTRACE_PROBE(krpc__e__clntrdma__rpcmsg__procnum);
380*7387SRobert.Gordon@Sun.COM 			return (CLNT_RDMA_FAIL);
381*7387SRobert.Gordon@Sun.COM 		}
382*7387SRobert.Gordon@Sun.COM 		/*
383*7387SRobert.Gordon@Sun.COM 		 * If we had to allocate a new buffer while encoding
384*7387SRobert.Gordon@Sun.COM 		 * then update the addr and len.
385*7387SRobert.Gordon@Sun.COM 		 */
386*7387SRobert.Gordon@Sun.COM 		if (rpcmsg->addr != xdrs->x_base) {
387*7387SRobert.Gordon@Sun.COM 			rpcmsg->addr = xdrs->x_base;
388*7387SRobert.Gordon@Sun.COM 			rpcmsg->len = xdr_getbufsize(xdrs);
389*7387SRobert.Gordon@Sun.COM 		}
390*7387SRobert.Gordon@Sun.COM 
391*7387SRobert.Gordon@Sun.COM 		p->cku_outsz = XDR_GETPOS(xdrs);
392*7387SRobert.Gordon@Sun.COM 		DTRACE_PROBE1(krpc__i__compose__size__sec, int, p->cku_outsz)
393*7387SRobert.Gordon@Sun.COM 	}
394*7387SRobert.Gordon@Sun.COM 
395*7387SRobert.Gordon@Sun.COM 	return (CLNT_RDMA_SUCCESS);
396*7387SRobert.Gordon@Sun.COM }
397*7387SRobert.Gordon@Sun.COM 
398*7387SRobert.Gordon@Sun.COM static int
399*7387SRobert.Gordon@Sun.COM clnt_compose_rdma_header(CONN *conn, CLIENT *h, rdma_buf_t *clmsg,
400*7387SRobert.Gordon@Sun.COM     XDR **xdrs, uint_t *op)
401*7387SRobert.Gordon@Sun.COM {
402*7387SRobert.Gordon@Sun.COM 	cku_private_t *p = htop(h);
403*7387SRobert.Gordon@Sun.COM 	uint_t vers;
404*7387SRobert.Gordon@Sun.COM 	uint32_t rdma_credit = rdma_bufs_rqst;
405*7387SRobert.Gordon@Sun.COM 
406*7387SRobert.Gordon@Sun.COM 	vers = RPCRDMA_VERS;
407*7387SRobert.Gordon@Sun.COM 	clmsg->type = SEND_BUFFER;
408*7387SRobert.Gordon@Sun.COM 
409*7387SRobert.Gordon@Sun.COM 	if (rdma_buf_alloc(conn, clmsg)) {
410*7387SRobert.Gordon@Sun.COM 		return (CLNT_RDMA_FAIL);
411*7387SRobert.Gordon@Sun.COM 	}
412*7387SRobert.Gordon@Sun.COM 
413*7387SRobert.Gordon@Sun.COM 	*xdrs = &p->cku_outxdr;
414*7387SRobert.Gordon@Sun.COM 	xdrmem_create(*xdrs, clmsg->addr, clmsg->len, XDR_ENCODE);
415*7387SRobert.Gordon@Sun.COM 
416*7387SRobert.Gordon@Sun.COM 	(*(uint32_t *)clmsg->addr) = p->cku_xid;
417*7387SRobert.Gordon@Sun.COM 	XDR_SETPOS(*xdrs, sizeof (uint32_t));
418*7387SRobert.Gordon@Sun.COM 	(void) xdr_u_int(*xdrs, &vers);
419*7387SRobert.Gordon@Sun.COM 	(void) xdr_u_int(*xdrs, &rdma_credit);
420*7387SRobert.Gordon@Sun.COM 	(void) xdr_u_int(*xdrs, op);
421*7387SRobert.Gordon@Sun.COM 
422*7387SRobert.Gordon@Sun.COM 	return (CLNT_RDMA_SUCCESS);
423*7387SRobert.Gordon@Sun.COM }
424*7387SRobert.Gordon@Sun.COM 
425*7387SRobert.Gordon@Sun.COM /*
426*7387SRobert.Gordon@Sun.COM  * If xp_cl is NULL value, then the RPC payload will NOT carry
427*7387SRobert.Gordon@Sun.COM  * an RDMA READ chunk list, in this case we insert FALSE into
428*7387SRobert.Gordon@Sun.COM  * the XDR stream. Otherwise we use the clist and RDMA register
429*7387SRobert.Gordon@Sun.COM  * the memory and encode the clist into the outbound XDR stream.
430*7387SRobert.Gordon@Sun.COM  */
431*7387SRobert.Gordon@Sun.COM static int
432*7387SRobert.Gordon@Sun.COM clnt_setup_rlist(CONN *conn, XDR *xdrs, XDR *call_xdrp)
433*7387SRobert.Gordon@Sun.COM {
434*7387SRobert.Gordon@Sun.COM 	int status;
435*7387SRobert.Gordon@Sun.COM 	struct clist *rclp;
436*7387SRobert.Gordon@Sun.COM 	int32_t xdr_flag = XDR_RDMA_RLIST_REG;
437*7387SRobert.Gordon@Sun.COM 
438*7387SRobert.Gordon@Sun.COM 	XDR_CONTROL(call_xdrp, XDR_RDMA_GET_RLIST, &rclp);
439*7387SRobert.Gordon@Sun.COM 
440*7387SRobert.Gordon@Sun.COM 	if (rclp != NULL) {
441*7387SRobert.Gordon@Sun.COM 		status = clist_register(conn, rclp, CLIST_REG_SOURCE);
442*7387SRobert.Gordon@Sun.COM 		if (status != RDMA_SUCCESS) {
443*7387SRobert.Gordon@Sun.COM 			return (CLNT_RDMA_FAIL);
444*7387SRobert.Gordon@Sun.COM 		}
445*7387SRobert.Gordon@Sun.COM 		XDR_CONTROL(call_xdrp, XDR_RDMA_SET_FLAGS, &xdr_flag);
446*7387SRobert.Gordon@Sun.COM 	}
447*7387SRobert.Gordon@Sun.COM 	(void) xdr_do_clist(xdrs, &rclp);
448*7387SRobert.Gordon@Sun.COM 
449*7387SRobert.Gordon@Sun.COM 	return (CLNT_RDMA_SUCCESS);
450*7387SRobert.Gordon@Sun.COM }
451*7387SRobert.Gordon@Sun.COM 
452*7387SRobert.Gordon@Sun.COM /*
453*7387SRobert.Gordon@Sun.COM  * If xp_wcl is NULL value, then the RPC payload will NOT carry
454*7387SRobert.Gordon@Sun.COM  * an RDMA WRITE chunk list, in this case we insert FALSE into
455*7387SRobert.Gordon@Sun.COM  * the XDR stream. Otherwise we use the clist and  RDMA register
456*7387SRobert.Gordon@Sun.COM  * the memory and encode the clist into the outbound XDR stream.
457*7387SRobert.Gordon@Sun.COM  */
458*7387SRobert.Gordon@Sun.COM static int
459*7387SRobert.Gordon@Sun.COM clnt_setup_wlist(CONN *conn, XDR *xdrs, XDR *call_xdrp)
460*7387SRobert.Gordon@Sun.COM {
461*7387SRobert.Gordon@Sun.COM 	int status;
462*7387SRobert.Gordon@Sun.COM 	struct clist *wlist;
463*7387SRobert.Gordon@Sun.COM 	int32_t xdr_flag = XDR_RDMA_WLIST_REG;
464*7387SRobert.Gordon@Sun.COM 
465*7387SRobert.Gordon@Sun.COM 	XDR_CONTROL(call_xdrp, XDR_RDMA_GET_WLIST, &wlist);
466*7387SRobert.Gordon@Sun.COM 
467*7387SRobert.Gordon@Sun.COM 	if (wlist != NULL) {
468*7387SRobert.Gordon@Sun.COM 		status = clist_register(conn, wlist, CLIST_REG_DST);
469*7387SRobert.Gordon@Sun.COM 		if (status != RDMA_SUCCESS) {
470*7387SRobert.Gordon@Sun.COM 			return (CLNT_RDMA_FAIL);
471*7387SRobert.Gordon@Sun.COM 		}
472*7387SRobert.Gordon@Sun.COM 		XDR_CONTROL(call_xdrp, XDR_RDMA_SET_FLAGS, &xdr_flag);
473*7387SRobert.Gordon@Sun.COM 	}
474*7387SRobert.Gordon@Sun.COM 
475*7387SRobert.Gordon@Sun.COM 	if (!xdr_encode_wlist(xdrs, wlist))
476*7387SRobert.Gordon@Sun.COM 		return (CLNT_RDMA_FAIL);
477*7387SRobert.Gordon@Sun.COM 
478*7387SRobert.Gordon@Sun.COM 	return (CLNT_RDMA_SUCCESS);
479*7387SRobert.Gordon@Sun.COM }
480*7387SRobert.Gordon@Sun.COM 
481*7387SRobert.Gordon@Sun.COM static int
482*7387SRobert.Gordon@Sun.COM clnt_setup_long_reply(CONN *conn, struct clist **clpp, uint_t length)
483*7387SRobert.Gordon@Sun.COM {
484*7387SRobert.Gordon@Sun.COM 	if (length == 0) {
485*7387SRobert.Gordon@Sun.COM 		*clpp = NULL;
486*7387SRobert.Gordon@Sun.COM 		return (CLNT_RDMA_SUCCESS);
487*7387SRobert.Gordon@Sun.COM 	}
488*7387SRobert.Gordon@Sun.COM 
489*7387SRobert.Gordon@Sun.COM 	*clpp = clist_alloc();
490*7387SRobert.Gordon@Sun.COM 
491*7387SRobert.Gordon@Sun.COM 	(*clpp)->rb_longbuf.len = calc_length(length);
492*7387SRobert.Gordon@Sun.COM 	(*clpp)->rb_longbuf.type = RDMA_LONG_BUFFER;
493*7387SRobert.Gordon@Sun.COM 
494*7387SRobert.Gordon@Sun.COM 	if (rdma_buf_alloc(conn, &((*clpp)->rb_longbuf))) {
495*7387SRobert.Gordon@Sun.COM 		clist_free(*clpp);
496*7387SRobert.Gordon@Sun.COM 		*clpp = NULL;
497*7387SRobert.Gordon@Sun.COM 		return (CLNT_RDMA_FAIL);
498*7387SRobert.Gordon@Sun.COM 	}
499*7387SRobert.Gordon@Sun.COM 
500*7387SRobert.Gordon@Sun.COM 	(*clpp)->u.c_daddr3 = (*clpp)->rb_longbuf.addr;
501*7387SRobert.Gordon@Sun.COM 	(*clpp)->c_len = (*clpp)->rb_longbuf.len;
502*7387SRobert.Gordon@Sun.COM 	(*clpp)->c_next = NULL;
503*7387SRobert.Gordon@Sun.COM 	(*clpp)->c_dmemhandle = (*clpp)->rb_longbuf.handle;
504*7387SRobert.Gordon@Sun.COM 
505*7387SRobert.Gordon@Sun.COM 	if (clist_register(conn, *clpp, CLIST_REG_DST)) {
506*7387SRobert.Gordon@Sun.COM 		DTRACE_PROBE(krpc__e__clntrdma__longrep_regbuf);
507*7387SRobert.Gordon@Sun.COM 		rdma_buf_free(conn, &((*clpp)->rb_longbuf));
508*7387SRobert.Gordon@Sun.COM 		clist_free(*clpp);
509*7387SRobert.Gordon@Sun.COM 		return (CLNT_RDMA_FAIL);
510*7387SRobert.Gordon@Sun.COM 	}
511*7387SRobert.Gordon@Sun.COM 
512*7387SRobert.Gordon@Sun.COM 	return (CLNT_RDMA_SUCCESS);
513*7387SRobert.Gordon@Sun.COM }
514*7387SRobert.Gordon@Sun.COM 
5150Sstevel@tonic-gate /* ARGSUSED */
5160Sstevel@tonic-gate static enum clnt_stat
5170Sstevel@tonic-gate clnt_rdma_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args,
518*7387SRobert.Gordon@Sun.COM     caddr_t argsp, xdrproc_t xdr_results, caddr_t resultsp,
519*7387SRobert.Gordon@Sun.COM     struct timeval wait)
5200Sstevel@tonic-gate {
5210Sstevel@tonic-gate 	cku_private_t *p = htop(h);
522*7387SRobert.Gordon@Sun.COM 
523*7387SRobert.Gordon@Sun.COM 	int 	try_call_again;
524*7387SRobert.Gordon@Sun.COM 	int	refresh_attempt = AUTH_REFRESH_COUNT;
5250Sstevel@tonic-gate 	int 	status;
526*7387SRobert.Gordon@Sun.COM 	int 	msglen;
527*7387SRobert.Gordon@Sun.COM 
528*7387SRobert.Gordon@Sun.COM 	XDR	*call_xdrp, callxdr; /* for xdrrdma encoding the RPC call */
529*7387SRobert.Gordon@Sun.COM 	XDR	*reply_xdrp, replyxdr; /* for xdrrdma decoding the RPC reply */
530*7387SRobert.Gordon@Sun.COM 	XDR 	*rdmahdr_o_xdrs, *rdmahdr_i_xdrs;
531*7387SRobert.Gordon@Sun.COM 
5320Sstevel@tonic-gate 	struct rpc_msg 	reply_msg;
533*7387SRobert.Gordon@Sun.COM 
534*7387SRobert.Gordon@Sun.COM 	struct clist *cl_sendlist;
535*7387SRobert.Gordon@Sun.COM 	struct clist *cl_recvlist;
536*7387SRobert.Gordon@Sun.COM 	struct clist *cl;
537*7387SRobert.Gordon@Sun.COM 	struct clist *cl_rpcmsg;
538*7387SRobert.Gordon@Sun.COM 	struct clist *cl_rdma_reply;
539*7387SRobert.Gordon@Sun.COM 	struct clist *cl_rpcreply_wlist;
540*7387SRobert.Gordon@Sun.COM 	struct clist *cl_long_reply;
541*7387SRobert.Gordon@Sun.COM 
542*7387SRobert.Gordon@Sun.COM 	uint_t vers;
543*7387SRobert.Gordon@Sun.COM 	uint_t op;
5440Sstevel@tonic-gate 	uint_t off;
545*7387SRobert.Gordon@Sun.COM 	uint32_t seg_array_len;
546*7387SRobert.Gordon@Sun.COM 	uint_t long_reply_len;
547*7387SRobert.Gordon@Sun.COM 	uint_t rpcsec_gss;
548*7387SRobert.Gordon@Sun.COM 	uint_t gss_i_or_p;
549*7387SRobert.Gordon@Sun.COM 
5500Sstevel@tonic-gate 	CONN *conn = NULL;
551*7387SRobert.Gordon@Sun.COM 	rdma_buf_t clmsg;
552*7387SRobert.Gordon@Sun.COM 	rdma_buf_t rpcmsg;
553*7387SRobert.Gordon@Sun.COM 	rdma_chunkinfo_lengths_t rcil;
554*7387SRobert.Gordon@Sun.COM 
5550Sstevel@tonic-gate 	clock_t	ticks;
556*7387SRobert.Gordon@Sun.COM 	bool_t wlist_exists_reply;
557*7387SRobert.Gordon@Sun.COM 
558*7387SRobert.Gordon@Sun.COM 	uint32_t rdma_credit = rdma_bufs_rqst;
5590Sstevel@tonic-gate 
5600Sstevel@tonic-gate 	RCSTAT_INCR(rccalls);
561*7387SRobert.Gordon@Sun.COM 
562*7387SRobert.Gordon@Sun.COM call_again:
563*7387SRobert.Gordon@Sun.COM 
564*7387SRobert.Gordon@Sun.COM 	bzero(&clmsg, sizeof (clmsg));
565*7387SRobert.Gordon@Sun.COM 	bzero(&rpcmsg, sizeof (rpcmsg));
566*7387SRobert.Gordon@Sun.COM 	try_call_again = 0;
567*7387SRobert.Gordon@Sun.COM 	cl_sendlist = NULL;
568*7387SRobert.Gordon@Sun.COM 	cl_recvlist = NULL;
569*7387SRobert.Gordon@Sun.COM 	cl = NULL;
570*7387SRobert.Gordon@Sun.COM 	cl_rpcmsg = NULL;
571*7387SRobert.Gordon@Sun.COM 	cl_rdma_reply = NULL;
572*7387SRobert.Gordon@Sun.COM 	call_xdrp = NULL;
573*7387SRobert.Gordon@Sun.COM 	reply_xdrp = NULL;
574*7387SRobert.Gordon@Sun.COM 	wlist_exists_reply  = FALSE;
575*7387SRobert.Gordon@Sun.COM 	cl_rpcreply_wlist = NULL;
576*7387SRobert.Gordon@Sun.COM 	cl_long_reply = NULL;
577*7387SRobert.Gordon@Sun.COM 	rcil.rcil_len = 0;
578*7387SRobert.Gordon@Sun.COM 	rcil.rcil_len_alt = 0;
579*7387SRobert.Gordon@Sun.COM 	long_reply_len = 0;
580*7387SRobert.Gordon@Sun.COM 
5810Sstevel@tonic-gate 	/*
5820Sstevel@tonic-gate 	 * Get unique xid
5830Sstevel@tonic-gate 	 */
5840Sstevel@tonic-gate 	if (p->cku_xid == 0)
5850Sstevel@tonic-gate 		p->cku_xid = alloc_xid();
5860Sstevel@tonic-gate 
5870Sstevel@tonic-gate 	status = RDMA_GET_CONN(p->cku_rd_mod->rdma_ops, &p->cku_addr,
5880Sstevel@tonic-gate 	    p->cku_addrfmly, p->cku_rd_handle, &conn);
5890Sstevel@tonic-gate 
590*7387SRobert.Gordon@Sun.COM 	/*
591*7387SRobert.Gordon@Sun.COM 	 * If there is a problem with the connection reflect the issue
592*7387SRobert.Gordon@Sun.COM 	 * back to the higher level to address, we MAY delay for a short
593*7387SRobert.Gordon@Sun.COM 	 * period so that we are kind to the transport.
594*7387SRobert.Gordon@Sun.COM 	 */
5950Sstevel@tonic-gate 	if (conn == NULL) {
5960Sstevel@tonic-gate 		/*
5970Sstevel@tonic-gate 		 * Connect failed to server. Could be because of one
5980Sstevel@tonic-gate 		 * of several things. In some cases we don't want
5990Sstevel@tonic-gate 		 * the caller to retry immediately - delay before
6000Sstevel@tonic-gate 		 * returning to caller.
6010Sstevel@tonic-gate 		 */
6020Sstevel@tonic-gate 		switch (status) {
6030Sstevel@tonic-gate 		case RDMA_TIMEDOUT:
6040Sstevel@tonic-gate 			/*
6050Sstevel@tonic-gate 			 * Already timed out. No need to delay
6060Sstevel@tonic-gate 			 * some more.
6070Sstevel@tonic-gate 			 */
6080Sstevel@tonic-gate 			p->cku_err.re_status = RPC_TIMEDOUT;
6090Sstevel@tonic-gate 			p->cku_err.re_errno = ETIMEDOUT;
6100Sstevel@tonic-gate 			break;
6110Sstevel@tonic-gate 		case RDMA_INTR:
6120Sstevel@tonic-gate 			/*
6130Sstevel@tonic-gate 			 * Failed because of an signal. Very likely
6140Sstevel@tonic-gate 			 * the caller will not retry.
6150Sstevel@tonic-gate 			 */
6160Sstevel@tonic-gate 			p->cku_err.re_status = RPC_INTR;
6170Sstevel@tonic-gate 			p->cku_err.re_errno = EINTR;
6180Sstevel@tonic-gate 			break;
6190Sstevel@tonic-gate 		default:
6200Sstevel@tonic-gate 			/*
6210Sstevel@tonic-gate 			 * All other failures - server down or service
6220Sstevel@tonic-gate 			 * down or temporary resource failure. Delay before
6230Sstevel@tonic-gate 			 * returning to caller.
6240Sstevel@tonic-gate 			 */
6250Sstevel@tonic-gate 			ticks = clnt_rdma_min_delay * drv_usectohz(1000000);
6260Sstevel@tonic-gate 			p->cku_err.re_status = RPC_CANTCONNECT;
6270Sstevel@tonic-gate 			p->cku_err.re_errno = EIO;
6280Sstevel@tonic-gate 
6290Sstevel@tonic-gate 			if (h->cl_nosignal == TRUE) {
6300Sstevel@tonic-gate 				delay(ticks);
6310Sstevel@tonic-gate 			} else {
6320Sstevel@tonic-gate 				if (delay_sig(ticks) == EINTR) {
6330Sstevel@tonic-gate 					p->cku_err.re_status = RPC_INTR;
6340Sstevel@tonic-gate 					p->cku_err.re_errno = EINTR;
6350Sstevel@tonic-gate 				}
6360Sstevel@tonic-gate 			}
6370Sstevel@tonic-gate 			break;
6380Sstevel@tonic-gate 		}
6390Sstevel@tonic-gate 
6400Sstevel@tonic-gate 		return (p->cku_err.re_status);
6410Sstevel@tonic-gate 	}
642*7387SRobert.Gordon@Sun.COM 
643*7387SRobert.Gordon@Sun.COM 	clnt_check_credit(conn);
6440Sstevel@tonic-gate 
645*7387SRobert.Gordon@Sun.COM 	status = CLNT_RDMA_FAIL;
646*7387SRobert.Gordon@Sun.COM 
647*7387SRobert.Gordon@Sun.COM 	rpcsec_gss = gss_i_or_p = FALSE;
6480Sstevel@tonic-gate 
649*7387SRobert.Gordon@Sun.COM 	if (IS_RPCSEC_GSS(h)) {
650*7387SRobert.Gordon@Sun.COM 		rpcsec_gss = TRUE;
651*7387SRobert.Gordon@Sun.COM 		if (rpc_gss_get_service_type(h->cl_auth) ==
652*7387SRobert.Gordon@Sun.COM 		    rpc_gss_svc_integrity ||
653*7387SRobert.Gordon@Sun.COM 		    rpc_gss_get_service_type(h->cl_auth) ==
654*7387SRobert.Gordon@Sun.COM 		    rpc_gss_svc_privacy)
655*7387SRobert.Gordon@Sun.COM 			gss_i_or_p = TRUE;
6560Sstevel@tonic-gate 	}
6570Sstevel@tonic-gate 
658*7387SRobert.Gordon@Sun.COM 	/*
659*7387SRobert.Gordon@Sun.COM 	 * Try a regular RDMA message if RPCSEC_GSS is not being used
660*7387SRobert.Gordon@Sun.COM 	 * or if RPCSEC_GSS is being used for authentication only.
661*7387SRobert.Gordon@Sun.COM 	 */
662*7387SRobert.Gordon@Sun.COM 	if (rpcsec_gss == FALSE ||
663*7387SRobert.Gordon@Sun.COM 	    (rpcsec_gss == TRUE && gss_i_or_p == FALSE)) {
6640Sstevel@tonic-gate 		/*
665*7387SRobert.Gordon@Sun.COM 		 * Grab a send buffer for the request.  Try to
666*7387SRobert.Gordon@Sun.COM 		 * encode it to see if it fits. If not, then it
667*7387SRobert.Gordon@Sun.COM 		 * needs to be sent in a chunk.
6680Sstevel@tonic-gate 		 */
669*7387SRobert.Gordon@Sun.COM 		rpcmsg.type = SEND_BUFFER;
670*7387SRobert.Gordon@Sun.COM 		if (rdma_buf_alloc(conn, &rpcmsg)) {
671*7387SRobert.Gordon@Sun.COM 			DTRACE_PROBE(krpc__e__clntrdma__callit_nobufs);
6720Sstevel@tonic-gate 			goto done;
6730Sstevel@tonic-gate 		}
674*7387SRobert.Gordon@Sun.COM 
675*7387SRobert.Gordon@Sun.COM 		/* First try to encode into regular send buffer */
676*7387SRobert.Gordon@Sun.COM 		op = RDMA_MSG;
6770Sstevel@tonic-gate 
678*7387SRobert.Gordon@Sun.COM 		call_xdrp = &callxdr;
679*7387SRobert.Gordon@Sun.COM 
680*7387SRobert.Gordon@Sun.COM 		xdrrdma_create(call_xdrp, rpcmsg.addr, rpcmsg.len,
681*7387SRobert.Gordon@Sun.COM 		    rdma_minchunk, NULL, XDR_ENCODE, conn);
6820Sstevel@tonic-gate 
683*7387SRobert.Gordon@Sun.COM 		status = clnt_compose_rpcmsg(h, procnum, &rpcmsg, call_xdrp,
684*7387SRobert.Gordon@Sun.COM 		    xdr_args, argsp);
685*7387SRobert.Gordon@Sun.COM 
686*7387SRobert.Gordon@Sun.COM 		if (status != CLNT_RDMA_SUCCESS) {
687*7387SRobert.Gordon@Sun.COM 			/* Clean up from previous encode attempt */
688*7387SRobert.Gordon@Sun.COM 			rdma_buf_free(conn, &rpcmsg);
689*7387SRobert.Gordon@Sun.COM 			XDR_DESTROY(call_xdrp);
6900Sstevel@tonic-gate 		} else {
691*7387SRobert.Gordon@Sun.COM 			XDR_CONTROL(call_xdrp, XDR_RDMA_GET_CHUNK_LEN, &rcil);
6920Sstevel@tonic-gate 		}
6930Sstevel@tonic-gate 	}
6940Sstevel@tonic-gate 
695*7387SRobert.Gordon@Sun.COM 	/* If the encode didn't work, then try a NOMSG */
696*7387SRobert.Gordon@Sun.COM 	if (status != CLNT_RDMA_SUCCESS) {
697*7387SRobert.Gordon@Sun.COM 
698*7387SRobert.Gordon@Sun.COM 		msglen = CKU_HDRSIZE + BYTES_PER_XDR_UNIT + MAX_AUTH_BYTES +
699*7387SRobert.Gordon@Sun.COM 		    xdr_sizeof(xdr_args, argsp);
700*7387SRobert.Gordon@Sun.COM 
701*7387SRobert.Gordon@Sun.COM 		msglen = calc_length(msglen);
702*7387SRobert.Gordon@Sun.COM 
703*7387SRobert.Gordon@Sun.COM 		/* pick up the lengths for the reply buffer needed */
704*7387SRobert.Gordon@Sun.COM 		(void) xdrrdma_sizeof(xdr_args, argsp, 0,
705*7387SRobert.Gordon@Sun.COM 		    &rcil.rcil_len, &rcil.rcil_len_alt);
706*7387SRobert.Gordon@Sun.COM 
707*7387SRobert.Gordon@Sun.COM 		/*
708*7387SRobert.Gordon@Sun.COM 		 * Construct a clist to describe the CHUNK_BUFFER
709*7387SRobert.Gordon@Sun.COM 		 * for the rpcmsg.
710*7387SRobert.Gordon@Sun.COM 		 */
711*7387SRobert.Gordon@Sun.COM 		cl_rpcmsg = clist_alloc();
712*7387SRobert.Gordon@Sun.COM 		cl_rpcmsg->c_len = msglen;
713*7387SRobert.Gordon@Sun.COM 		cl_rpcmsg->rb_longbuf.type = RDMA_LONG_BUFFER;
714*7387SRobert.Gordon@Sun.COM 		cl_rpcmsg->rb_longbuf.len = msglen;
715*7387SRobert.Gordon@Sun.COM 		if (rdma_buf_alloc(conn, &cl_rpcmsg->rb_longbuf)) {
716*7387SRobert.Gordon@Sun.COM 			clist_free(cl_rpcmsg);
717*7387SRobert.Gordon@Sun.COM 			goto done;
718*7387SRobert.Gordon@Sun.COM 		}
719*7387SRobert.Gordon@Sun.COM 		cl_rpcmsg->w.c_saddr3 = cl_rpcmsg->rb_longbuf.addr;
720*7387SRobert.Gordon@Sun.COM 
721*7387SRobert.Gordon@Sun.COM 		op = RDMA_NOMSG;
722*7387SRobert.Gordon@Sun.COM 		call_xdrp = &callxdr;
723*7387SRobert.Gordon@Sun.COM 
724*7387SRobert.Gordon@Sun.COM 		xdrrdma_create(call_xdrp, cl_rpcmsg->rb_longbuf.addr,
725*7387SRobert.Gordon@Sun.COM 		    cl_rpcmsg->rb_longbuf.len, 0,
726*7387SRobert.Gordon@Sun.COM 		    cl_rpcmsg, XDR_ENCODE, conn);
727*7387SRobert.Gordon@Sun.COM 
728*7387SRobert.Gordon@Sun.COM 		status = clnt_compose_rpcmsg(h, procnum, &rpcmsg, call_xdrp,
729*7387SRobert.Gordon@Sun.COM 		    xdr_args, argsp);
730*7387SRobert.Gordon@Sun.COM 
731*7387SRobert.Gordon@Sun.COM 		if (status != CLNT_RDMA_SUCCESS) {
732*7387SRobert.Gordon@Sun.COM 			p->cku_err.re_status = RPC_CANTENCODEARGS;
733*7387SRobert.Gordon@Sun.COM 			p->cku_err.re_errno = EIO;
734*7387SRobert.Gordon@Sun.COM 			DTRACE_PROBE(krpc__e__clntrdma__callit__composemsg);
735*7387SRobert.Gordon@Sun.COM 			goto done;
736*7387SRobert.Gordon@Sun.COM 		}
737*7387SRobert.Gordon@Sun.COM 	}
738*7387SRobert.Gordon@Sun.COM 
739*7387SRobert.Gordon@Sun.COM 	/*
740*7387SRobert.Gordon@Sun.COM 	 * During the XDR_ENCODE we may have "allocated" an RDMA READ or
741*7387SRobert.Gordon@Sun.COM 	 * RDMA WRITE clist.
742*7387SRobert.Gordon@Sun.COM 	 *
743*7387SRobert.Gordon@Sun.COM 	 * First pull the RDMA READ chunk list from the XDR private
744*7387SRobert.Gordon@Sun.COM 	 * area to keep it handy.
745*7387SRobert.Gordon@Sun.COM 	 */
746*7387SRobert.Gordon@Sun.COM 	XDR_CONTROL(call_xdrp, XDR_RDMA_GET_RLIST, &cl);
747*7387SRobert.Gordon@Sun.COM 
748*7387SRobert.Gordon@Sun.COM 	if (gss_i_or_p) {
749*7387SRobert.Gordon@Sun.COM 		long_reply_len = rcil.rcil_len + rcil.rcil_len_alt;
750*7387SRobert.Gordon@Sun.COM 		long_reply_len += MAX_AUTH_BYTES;
751*7387SRobert.Gordon@Sun.COM 	} else {
752*7387SRobert.Gordon@Sun.COM 		long_reply_len = rcil.rcil_len;
753*7387SRobert.Gordon@Sun.COM 	}
7540Sstevel@tonic-gate 
7550Sstevel@tonic-gate 	/*
7560Sstevel@tonic-gate 	 * Update the chunk size information for the Long RPC msg.
7570Sstevel@tonic-gate 	 */
7580Sstevel@tonic-gate 	if (cl && op == RDMA_NOMSG)
7590Sstevel@tonic-gate 		cl->c_len = p->cku_outsz;
7600Sstevel@tonic-gate 
7610Sstevel@tonic-gate 	/*
762*7387SRobert.Gordon@Sun.COM 	 * Prepare the RDMA header. On success xdrs will hold the result
763*7387SRobert.Gordon@Sun.COM 	 * of xdrmem_create() for a SEND_BUFFER.
7640Sstevel@tonic-gate 	 */
765*7387SRobert.Gordon@Sun.COM 	status = clnt_compose_rdma_header(conn, h, &clmsg,
766*7387SRobert.Gordon@Sun.COM 	    &rdmahdr_o_xdrs, &op);
767*7387SRobert.Gordon@Sun.COM 
768*7387SRobert.Gordon@Sun.COM 	if (status != CLNT_RDMA_SUCCESS) {
7690Sstevel@tonic-gate 		p->cku_err.re_status = RPC_CANTSEND;
7700Sstevel@tonic-gate 		p->cku_err.re_errno = EIO;
7710Sstevel@tonic-gate 		RCSTAT_INCR(rcnomem);
772*7387SRobert.Gordon@Sun.COM 		DTRACE_PROBE(krpc__e__clntrdma__callit__nobufs2);
7730Sstevel@tonic-gate 		goto done;
7740Sstevel@tonic-gate 	}
775*7387SRobert.Gordon@Sun.COM 
7760Sstevel@tonic-gate 	/*
777*7387SRobert.Gordon@Sun.COM 	 * Now insert the RDMA READ list iff present
7780Sstevel@tonic-gate 	 */
779*7387SRobert.Gordon@Sun.COM 	status = clnt_setup_rlist(conn, rdmahdr_o_xdrs, call_xdrp);
780*7387SRobert.Gordon@Sun.COM 	if (status != CLNT_RDMA_SUCCESS) {
781*7387SRobert.Gordon@Sun.COM 		DTRACE_PROBE(krpc__e__clntrdma__callit__clistreg);
782*7387SRobert.Gordon@Sun.COM 		rdma_buf_free(conn, &clmsg);
783*7387SRobert.Gordon@Sun.COM 		p->cku_err.re_status = RPC_CANTSEND;
784*7387SRobert.Gordon@Sun.COM 		p->cku_err.re_errno = EIO;
785*7387SRobert.Gordon@Sun.COM 		goto done;
786*7387SRobert.Gordon@Sun.COM 	}
787*7387SRobert.Gordon@Sun.COM 
788*7387SRobert.Gordon@Sun.COM 	/*
789*7387SRobert.Gordon@Sun.COM 	 * Setup RDMA WRITE chunk list for nfs read operation
790*7387SRobert.Gordon@Sun.COM 	 * other operations will have a NULL which will result
791*7387SRobert.Gordon@Sun.COM 	 * as a NULL list in the XDR stream.
792*7387SRobert.Gordon@Sun.COM 	 */
793*7387SRobert.Gordon@Sun.COM 	status = clnt_setup_wlist(conn, rdmahdr_o_xdrs, call_xdrp);
794*7387SRobert.Gordon@Sun.COM 	if (status != CLNT_RDMA_SUCCESS) {
795*7387SRobert.Gordon@Sun.COM 		rdma_buf_free(conn, &clmsg);
796*7387SRobert.Gordon@Sun.COM 		p->cku_err.re_status = RPC_CANTSEND;
797*7387SRobert.Gordon@Sun.COM 		p->cku_err.re_errno = EIO;
798*7387SRobert.Gordon@Sun.COM 		goto done;
799*7387SRobert.Gordon@Sun.COM 	}
8000Sstevel@tonic-gate 
8010Sstevel@tonic-gate 	/*
802*7387SRobert.Gordon@Sun.COM 	 * If NULL call and RPCSEC_GSS, provide a chunk such that
803*7387SRobert.Gordon@Sun.COM 	 * large responses can flow back to the client.
804*7387SRobert.Gordon@Sun.COM 	 * If RPCSEC_GSS with integrity or privacy is in use, get chunk.
8050Sstevel@tonic-gate 	 */
806*7387SRobert.Gordon@Sun.COM 	if ((procnum == 0 && rpcsec_gss == TRUE) ||
807*7387SRobert.Gordon@Sun.COM 	    (rpcsec_gss == TRUE && gss_i_or_p == TRUE))
808*7387SRobert.Gordon@Sun.COM 		long_reply_len += 1024;
8090Sstevel@tonic-gate 
810*7387SRobert.Gordon@Sun.COM 	status = clnt_setup_long_reply(conn, &cl_long_reply, long_reply_len);
8110Sstevel@tonic-gate 
812*7387SRobert.Gordon@Sun.COM 	if (status != CLNT_RDMA_SUCCESS) {
813*7387SRobert.Gordon@Sun.COM 		rdma_buf_free(conn, &clmsg);
814*7387SRobert.Gordon@Sun.COM 		p->cku_err.re_status = RPC_CANTSEND;
815*7387SRobert.Gordon@Sun.COM 		p->cku_err.re_errno = EIO;
816*7387SRobert.Gordon@Sun.COM 		goto done;
8170Sstevel@tonic-gate 	}
8180Sstevel@tonic-gate 
8190Sstevel@tonic-gate 	/*
820*7387SRobert.Gordon@Sun.COM 	 * XDR encode the RDMA_REPLY write chunk
821*7387SRobert.Gordon@Sun.COM 	 */
822*7387SRobert.Gordon@Sun.COM 	seg_array_len = (cl_long_reply ? 1 : 0);
823*7387SRobert.Gordon@Sun.COM 	(void) xdr_encode_reply_wchunk(rdmahdr_o_xdrs, cl_long_reply,
824*7387SRobert.Gordon@Sun.COM 	    seg_array_len);
825*7387SRobert.Gordon@Sun.COM 
826*7387SRobert.Gordon@Sun.COM 	/*
827*7387SRobert.Gordon@Sun.COM 	 * Construct a clist in "sendlist" that represents what we
828*7387SRobert.Gordon@Sun.COM 	 * will push over the wire.
829*7387SRobert.Gordon@Sun.COM 	 *
8300Sstevel@tonic-gate 	 * Start with the RDMA header and clist (if any)
8310Sstevel@tonic-gate 	 */
832*7387SRobert.Gordon@Sun.COM 	clist_add(&cl_sendlist, 0, XDR_GETPOS(rdmahdr_o_xdrs), &clmsg.handle,
833*7387SRobert.Gordon@Sun.COM 	    clmsg.addr, NULL, NULL);
8340Sstevel@tonic-gate 
8350Sstevel@tonic-gate 	/*
836*7387SRobert.Gordon@Sun.COM 	 * Put the RPC call message in  sendlist if small RPC
8370Sstevel@tonic-gate 	 */
8380Sstevel@tonic-gate 	if (op == RDMA_MSG) {
839*7387SRobert.Gordon@Sun.COM 		clist_add(&cl_sendlist, 0, p->cku_outsz, &rpcmsg.handle,
840*7387SRobert.Gordon@Sun.COM 		    rpcmsg.addr, NULL, NULL);
8410Sstevel@tonic-gate 	} else {
8420Sstevel@tonic-gate 		/* Long RPC already in chunk list */
8430Sstevel@tonic-gate 		RCSTAT_INCR(rclongrpcs);
8440Sstevel@tonic-gate 	}
8450Sstevel@tonic-gate 
8460Sstevel@tonic-gate 	/*
8470Sstevel@tonic-gate 	 * Set up a reply buffer ready for the reply
8480Sstevel@tonic-gate 	 */
8490Sstevel@tonic-gate 	status = rdma_clnt_postrecv(conn, p->cku_xid);
8500Sstevel@tonic-gate 	if (status != RDMA_SUCCESS) {
8510Sstevel@tonic-gate 		rdma_buf_free(conn, &clmsg);
8520Sstevel@tonic-gate 		p->cku_err.re_status = RPC_CANTSEND;
8530Sstevel@tonic-gate 		p->cku_err.re_errno = EIO;
8540Sstevel@tonic-gate 		goto done;
8550Sstevel@tonic-gate 	}
856*7387SRobert.Gordon@Sun.COM 
8570Sstevel@tonic-gate 	/*
8580Sstevel@tonic-gate 	 * sync the memory for dma
8590Sstevel@tonic-gate 	 */
8600Sstevel@tonic-gate 	if (cl != NULL) {
861*7387SRobert.Gordon@Sun.COM 		status = clist_syncmem(conn, cl, CLIST_REG_SOURCE);
8620Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
863*7387SRobert.Gordon@Sun.COM 			(void) rdma_clnt_postrecv_remove(conn, p->cku_xid);
8640Sstevel@tonic-gate 			rdma_buf_free(conn, &clmsg);
8650Sstevel@tonic-gate 			p->cku_err.re_status = RPC_CANTSEND;
8660Sstevel@tonic-gate 			p->cku_err.re_errno = EIO;
8670Sstevel@tonic-gate 			goto done;
8680Sstevel@tonic-gate 		}
8690Sstevel@tonic-gate 	}
8700Sstevel@tonic-gate 
8710Sstevel@tonic-gate 	/*
872*7387SRobert.Gordon@Sun.COM 	 * Send the RDMA Header and RPC call message to the server
8730Sstevel@tonic-gate 	 */
874*7387SRobert.Gordon@Sun.COM 	status = RDMA_SEND(conn, cl_sendlist, p->cku_xid);
8750Sstevel@tonic-gate 	if (status != RDMA_SUCCESS) {
876*7387SRobert.Gordon@Sun.COM 		(void) rdma_clnt_postrecv_remove(conn, p->cku_xid);
8770Sstevel@tonic-gate 		p->cku_err.re_status = RPC_CANTSEND;
8780Sstevel@tonic-gate 		p->cku_err.re_errno = EIO;
8790Sstevel@tonic-gate 		goto done;
8800Sstevel@tonic-gate 	}
881*7387SRobert.Gordon@Sun.COM 
882*7387SRobert.Gordon@Sun.COM 	/*
883*7387SRobert.Gordon@Sun.COM 	 * RDMA plugin now owns the send msg buffers.
884*7387SRobert.Gordon@Sun.COM 	 * Clear them out and don't free them.
885*7387SRobert.Gordon@Sun.COM 	 */
886*7387SRobert.Gordon@Sun.COM 	clmsg.addr = NULL;
887*7387SRobert.Gordon@Sun.COM 	if (rpcmsg.type == SEND_BUFFER)
888*7387SRobert.Gordon@Sun.COM 		rpcmsg.addr = NULL;
8890Sstevel@tonic-gate 
8900Sstevel@tonic-gate 	/*
8910Sstevel@tonic-gate 	 * Recv rpc reply
8920Sstevel@tonic-gate 	 */
893*7387SRobert.Gordon@Sun.COM 	status = RDMA_RECV(conn, &cl_recvlist, p->cku_xid);
8940Sstevel@tonic-gate 
8950Sstevel@tonic-gate 	/*
8960Sstevel@tonic-gate 	 * Now check recv status
8970Sstevel@tonic-gate 	 */
8980Sstevel@tonic-gate 	if (status != 0) {
8990Sstevel@tonic-gate 		if (status == RDMA_INTR) {
9000Sstevel@tonic-gate 			p->cku_err.re_status = RPC_INTR;
9010Sstevel@tonic-gate 			p->cku_err.re_errno = EINTR;
9020Sstevel@tonic-gate 			RCSTAT_INCR(rcintrs);
9030Sstevel@tonic-gate 		} else if (status == RPC_TIMEDOUT) {
9040Sstevel@tonic-gate 			p->cku_err.re_status = RPC_TIMEDOUT;
9050Sstevel@tonic-gate 			p->cku_err.re_errno = ETIMEDOUT;
9060Sstevel@tonic-gate 			RCSTAT_INCR(rctimeouts);
9070Sstevel@tonic-gate 		} else {
9080Sstevel@tonic-gate 			p->cku_err.re_status = RPC_CANTRECV;
9090Sstevel@tonic-gate 			p->cku_err.re_errno = EIO;
9100Sstevel@tonic-gate 		}
9110Sstevel@tonic-gate 		goto done;
9120Sstevel@tonic-gate 	}
913*7387SRobert.Gordon@Sun.COM 
9140Sstevel@tonic-gate 	/*
9150Sstevel@tonic-gate 	 * Process the reply message.
9160Sstevel@tonic-gate 	 *
9170Sstevel@tonic-gate 	 * First the chunk list (if any)
9180Sstevel@tonic-gate 	 */
919*7387SRobert.Gordon@Sun.COM 	rdmahdr_i_xdrs = &(p->cku_inxdr);
920*7387SRobert.Gordon@Sun.COM 	xdrmem_create(rdmahdr_i_xdrs,
921*7387SRobert.Gordon@Sun.COM 	    (caddr_t)(uintptr_t)cl_recvlist->w.c_saddr3,
922*7387SRobert.Gordon@Sun.COM 	    cl_recvlist->c_len, XDR_DECODE);
923*7387SRobert.Gordon@Sun.COM 
9240Sstevel@tonic-gate 	/*
9250Sstevel@tonic-gate 	 * Treat xid as opaque (xid is the first entity
9260Sstevel@tonic-gate 	 * in the rpc rdma message).
927*7387SRobert.Gordon@Sun.COM 	 * Skip xid and set the xdr position accordingly.
9280Sstevel@tonic-gate 	 */
929*7387SRobert.Gordon@Sun.COM 	XDR_SETPOS(rdmahdr_i_xdrs, sizeof (uint32_t));
930*7387SRobert.Gordon@Sun.COM 	(void) xdr_u_int(rdmahdr_i_xdrs, &vers);
931*7387SRobert.Gordon@Sun.COM 	(void) xdr_u_int(rdmahdr_i_xdrs, &rdma_credit);
932*7387SRobert.Gordon@Sun.COM 	(void) xdr_u_int(rdmahdr_i_xdrs, &op);
933*7387SRobert.Gordon@Sun.COM 	(void) xdr_do_clist(rdmahdr_i_xdrs, &cl);
934*7387SRobert.Gordon@Sun.COM 
935*7387SRobert.Gordon@Sun.COM 	clnt_update_credit(conn, rdma_credit);
936*7387SRobert.Gordon@Sun.COM 
937*7387SRobert.Gordon@Sun.COM 	wlist_exists_reply = FALSE;
938*7387SRobert.Gordon@Sun.COM 	if (! xdr_decode_wlist(rdmahdr_i_xdrs, &cl_rpcreply_wlist,
939*7387SRobert.Gordon@Sun.COM 	    &wlist_exists_reply)) {
940*7387SRobert.Gordon@Sun.COM 		DTRACE_PROBE(krpc__e__clntrdma__callit__wlist_decode);
941*7387SRobert.Gordon@Sun.COM 		p->cku_err.re_status = RPC_CANTDECODERES;
942*7387SRobert.Gordon@Sun.COM 		p->cku_err.re_errno = EIO;
943*7387SRobert.Gordon@Sun.COM 		goto done;
944*7387SRobert.Gordon@Sun.COM 	}
9450Sstevel@tonic-gate 
9460Sstevel@tonic-gate 	/*
947*7387SRobert.Gordon@Sun.COM 	 * The server shouldn't have sent a RDMA_SEND that
948*7387SRobert.Gordon@Sun.COM 	 * the client needs to RDMA_WRITE a reply back to
949*7387SRobert.Gordon@Sun.COM 	 * the server.  So silently ignoring what the
950*7387SRobert.Gordon@Sun.COM 	 * server returns in the rdma_reply section of the
951*7387SRobert.Gordon@Sun.COM 	 * header.
9520Sstevel@tonic-gate 	 */
953*7387SRobert.Gordon@Sun.COM 	(void) xdr_decode_reply_wchunk(rdmahdr_i_xdrs, &cl_rdma_reply);
954*7387SRobert.Gordon@Sun.COM 	off = xdr_getpos(rdmahdr_i_xdrs);
9550Sstevel@tonic-gate 
956*7387SRobert.Gordon@Sun.COM 	clnt_decode_long_reply(conn, cl_long_reply,
957*7387SRobert.Gordon@Sun.COM 	    cl_rdma_reply, &replyxdr, &reply_xdrp,
958*7387SRobert.Gordon@Sun.COM 	    cl, cl_recvlist, op, off);
9590Sstevel@tonic-gate 
960*7387SRobert.Gordon@Sun.COM 	if (reply_xdrp == NULL)
961*7387SRobert.Gordon@Sun.COM 		goto done;
9620Sstevel@tonic-gate 
963*7387SRobert.Gordon@Sun.COM 	if (wlist_exists_reply) {
964*7387SRobert.Gordon@Sun.COM 		XDR_CONTROL(reply_xdrp, XDR_RDMA_SET_WLIST, cl_rpcreply_wlist);
9650Sstevel@tonic-gate 	}
9660Sstevel@tonic-gate 
9670Sstevel@tonic-gate 	reply_msg.rm_direction = REPLY;
9680Sstevel@tonic-gate 	reply_msg.rm_reply.rp_stat = MSG_ACCEPTED;
9690Sstevel@tonic-gate 	reply_msg.acpted_rply.ar_stat = SUCCESS;
9700Sstevel@tonic-gate 	reply_msg.acpted_rply.ar_verf = _null_auth;
971*7387SRobert.Gordon@Sun.COM 
9720Sstevel@tonic-gate 	/*
9730Sstevel@tonic-gate 	 *  xdr_results will be done in AUTH_UNWRAP.
9740Sstevel@tonic-gate 	 */
9750Sstevel@tonic-gate 	reply_msg.acpted_rply.ar_results.where = NULL;
9760Sstevel@tonic-gate 	reply_msg.acpted_rply.ar_results.proc = xdr_void;
9770Sstevel@tonic-gate 
9780Sstevel@tonic-gate 	/*
9790Sstevel@tonic-gate 	 * Decode and validate the response.
9800Sstevel@tonic-gate 	 */
981*7387SRobert.Gordon@Sun.COM 	if (xdr_replymsg(reply_xdrp, &reply_msg)) {
9820Sstevel@tonic-gate 		enum clnt_stat re_status;
9830Sstevel@tonic-gate 
9840Sstevel@tonic-gate 		_seterr_reply(&reply_msg, &(p->cku_err));
9850Sstevel@tonic-gate 
9860Sstevel@tonic-gate 		re_status = p->cku_err.re_status;
9870Sstevel@tonic-gate 		if (re_status == RPC_SUCCESS) {
9880Sstevel@tonic-gate 			/*
9890Sstevel@tonic-gate 			 * Reply is good, check auth.
9900Sstevel@tonic-gate 			 */
9910Sstevel@tonic-gate 			if (!AUTH_VALIDATE(h->cl_auth,
9920Sstevel@tonic-gate 			    &reply_msg.acpted_rply.ar_verf)) {
9930Sstevel@tonic-gate 				p->cku_err.re_status = RPC_AUTHERROR;
9940Sstevel@tonic-gate 				p->cku_err.re_why = AUTH_INVALIDRESP;
9950Sstevel@tonic-gate 				RCSTAT_INCR(rcbadverfs);
996*7387SRobert.Gordon@Sun.COM 				DTRACE_PROBE(
997*7387SRobert.Gordon@Sun.COM 				    krpc__e__clntrdma__callit__authvalidate);
998*7387SRobert.Gordon@Sun.COM 			} else if (!AUTH_UNWRAP(h->cl_auth, reply_xdrp,
9990Sstevel@tonic-gate 			    xdr_results, resultsp)) {
10000Sstevel@tonic-gate 				p->cku_err.re_status = RPC_CANTDECODERES;
10010Sstevel@tonic-gate 				p->cku_err.re_errno = EIO;
1002*7387SRobert.Gordon@Sun.COM 				DTRACE_PROBE(
1003*7387SRobert.Gordon@Sun.COM 				    krpc__e__clntrdma__callit__authunwrap);
10040Sstevel@tonic-gate 			}
10050Sstevel@tonic-gate 		} else {
10060Sstevel@tonic-gate 			/* set errno in case we can't recover */
10070Sstevel@tonic-gate 			if (re_status != RPC_VERSMISMATCH &&
10080Sstevel@tonic-gate 			    re_status != RPC_AUTHERROR &&
10090Sstevel@tonic-gate 			    re_status != RPC_PROGVERSMISMATCH)
10100Sstevel@tonic-gate 				p->cku_err.re_errno = EIO;
10110Sstevel@tonic-gate 
10120Sstevel@tonic-gate 			if (re_status == RPC_AUTHERROR) {
1013*7387SRobert.Gordon@Sun.COM 				if ((refresh_attempt > 0) &&
1014*7387SRobert.Gordon@Sun.COM 				    AUTH_REFRESH(h->cl_auth, &reply_msg,
1015*7387SRobert.Gordon@Sun.COM 				    p->cku_cred)) {
1016*7387SRobert.Gordon@Sun.COM 					refresh_attempt--;
1017*7387SRobert.Gordon@Sun.COM 					try_call_again = 1;
1018*7387SRobert.Gordon@Sun.COM 					goto done;
1019*7387SRobert.Gordon@Sun.COM 				}
1020*7387SRobert.Gordon@Sun.COM 
1021*7387SRobert.Gordon@Sun.COM 				try_call_again = 0;
1022*7387SRobert.Gordon@Sun.COM 
1023*7387SRobert.Gordon@Sun.COM 				/*
1024*7387SRobert.Gordon@Sun.COM 				 * We have used the client handle to
1025*7387SRobert.Gordon@Sun.COM 				 * do an AUTH_REFRESH and the RPC status may
1026*7387SRobert.Gordon@Sun.COM 				 * be set to RPC_SUCCESS; Let's make sure to
1027*7387SRobert.Gordon@Sun.COM 				 * set it to RPC_AUTHERROR.
1028*7387SRobert.Gordon@Sun.COM 				 */
1029*7387SRobert.Gordon@Sun.COM 				p->cku_err.re_status = RPC_AUTHERROR;
1030*7387SRobert.Gordon@Sun.COM 
10310Sstevel@tonic-gate 				/*
10320Sstevel@tonic-gate 				 * Map recoverable and unrecoverable
10330Sstevel@tonic-gate 				 * authentication errors to appropriate
10340Sstevel@tonic-gate 				 * errno
10350Sstevel@tonic-gate 				 */
10360Sstevel@tonic-gate 				switch (p->cku_err.re_why) {
10370Sstevel@tonic-gate 				case AUTH_BADCRED:
10380Sstevel@tonic-gate 				case AUTH_BADVERF:
10390Sstevel@tonic-gate 				case AUTH_INVALIDRESP:
10400Sstevel@tonic-gate 				case AUTH_TOOWEAK:
10410Sstevel@tonic-gate 				case AUTH_FAILED:
10420Sstevel@tonic-gate 				case RPCSEC_GSS_NOCRED:
10430Sstevel@tonic-gate 				case RPCSEC_GSS_FAILED:
10440Sstevel@tonic-gate 					p->cku_err.re_errno = EACCES;
10450Sstevel@tonic-gate 					break;
10460Sstevel@tonic-gate 				case AUTH_REJECTEDCRED:
10470Sstevel@tonic-gate 				case AUTH_REJECTEDVERF:
10480Sstevel@tonic-gate 				default:
10490Sstevel@tonic-gate 					p->cku_err.re_errno = EIO;
10500Sstevel@tonic-gate 					break;
10510Sstevel@tonic-gate 				}
10520Sstevel@tonic-gate 			}
1053*7387SRobert.Gordon@Sun.COM 			DTRACE_PROBE1(krpc__e__clntrdma__callit__rpcfailed,
1054*7387SRobert.Gordon@Sun.COM 			    int, p->cku_err.re_why);
10550Sstevel@tonic-gate 		}
10560Sstevel@tonic-gate 	} else {
10570Sstevel@tonic-gate 		p->cku_err.re_status = RPC_CANTDECODERES;
10580Sstevel@tonic-gate 		p->cku_err.re_errno = EIO;
1059*7387SRobert.Gordon@Sun.COM 		DTRACE_PROBE(krpc__e__clntrdma__callit__replymsg);
10600Sstevel@tonic-gate 	}
10610Sstevel@tonic-gate 
1062*7387SRobert.Gordon@Sun.COM done:
1063*7387SRobert.Gordon@Sun.COM 	clnt_return_credit(conn);
1064*7387SRobert.Gordon@Sun.COM 
1065*7387SRobert.Gordon@Sun.COM 	if (cl_sendlist != NULL)
1066*7387SRobert.Gordon@Sun.COM 		clist_free(cl_sendlist);
1067*7387SRobert.Gordon@Sun.COM 
10680Sstevel@tonic-gate 	/*
10690Sstevel@tonic-gate 	 * If rpc reply is in a chunk, free it now.
10700Sstevel@tonic-gate 	 */
1071*7387SRobert.Gordon@Sun.COM 	if (cl_long_reply) {
1072*7387SRobert.Gordon@Sun.COM 		(void) clist_deregister(conn, cl_long_reply, CLIST_REG_DST);
1073*7387SRobert.Gordon@Sun.COM 		rdma_buf_free(conn, &cl_long_reply->rb_longbuf);
1074*7387SRobert.Gordon@Sun.COM 		clist_free(cl_long_reply);
1075*7387SRobert.Gordon@Sun.COM 	}
10760Sstevel@tonic-gate 
1077*7387SRobert.Gordon@Sun.COM 	if (call_xdrp)
1078*7387SRobert.Gordon@Sun.COM 		XDR_DESTROY(call_xdrp);
10790Sstevel@tonic-gate 
1080*7387SRobert.Gordon@Sun.COM 	if (reply_xdrp) {
1081*7387SRobert.Gordon@Sun.COM 		(void) xdr_rpc_free_verifier(reply_xdrp, &reply_msg);
1082*7387SRobert.Gordon@Sun.COM 		XDR_DESTROY(reply_xdrp);
10830Sstevel@tonic-gate 	}
10840Sstevel@tonic-gate 
1085*7387SRobert.Gordon@Sun.COM 	if (cl_rdma_reply) {
1086*7387SRobert.Gordon@Sun.COM 		clist_free(cl_rdma_reply);
10870Sstevel@tonic-gate 	}
10880Sstevel@tonic-gate 
1089*7387SRobert.Gordon@Sun.COM 	if (cl_recvlist) {
1090*7387SRobert.Gordon@Sun.COM 		rdma_buf_t	recvmsg = {0};
1091*7387SRobert.Gordon@Sun.COM 		recvmsg.addr = (caddr_t)(uintptr_t)cl_recvlist->w.c_saddr3;
10920Sstevel@tonic-gate 		recvmsg.type = RECV_BUFFER;
10930Sstevel@tonic-gate 		RDMA_BUF_FREE(conn, &recvmsg);
1094*7387SRobert.Gordon@Sun.COM 		clist_free(cl_recvlist);
10950Sstevel@tonic-gate 	}
1096*7387SRobert.Gordon@Sun.COM 
10970Sstevel@tonic-gate 	RDMA_REL_CONN(conn);
1098*7387SRobert.Gordon@Sun.COM 
1099*7387SRobert.Gordon@Sun.COM 	if (try_call_again)
1100*7387SRobert.Gordon@Sun.COM 		goto call_again;
1101*7387SRobert.Gordon@Sun.COM 
11020Sstevel@tonic-gate 	if (p->cku_err.re_status != RPC_SUCCESS) {
11030Sstevel@tonic-gate 		RCSTAT_INCR(rcbadcalls);
11040Sstevel@tonic-gate 	}
11050Sstevel@tonic-gate 	return (p->cku_err.re_status);
11060Sstevel@tonic-gate }
11070Sstevel@tonic-gate 
1108*7387SRobert.Gordon@Sun.COM 
1109*7387SRobert.Gordon@Sun.COM static void
1110*7387SRobert.Gordon@Sun.COM clnt_decode_long_reply(CONN *conn,
1111*7387SRobert.Gordon@Sun.COM     struct clist *cl_long_reply,
1112*7387SRobert.Gordon@Sun.COM     struct clist *cl_rdma_reply, XDR *xdrs,
1113*7387SRobert.Gordon@Sun.COM     XDR **rxdrp, struct clist *cl,
1114*7387SRobert.Gordon@Sun.COM     struct clist *cl_recvlist,
1115*7387SRobert.Gordon@Sun.COM     uint_t  op, uint_t off)
1116*7387SRobert.Gordon@Sun.COM {
1117*7387SRobert.Gordon@Sun.COM 	if (op != RDMA_NOMSG) {
1118*7387SRobert.Gordon@Sun.COM 		DTRACE_PROBE1(krpc__i__longrepl__rdmamsg__len,
1119*7387SRobert.Gordon@Sun.COM 		    int, cl_recvlist->c_len - off);
1120*7387SRobert.Gordon@Sun.COM 		xdrrdma_create(xdrs,
1121*7387SRobert.Gordon@Sun.COM 		    (caddr_t)(uintptr_t)(cl_recvlist->w.c_saddr3 + off),
1122*7387SRobert.Gordon@Sun.COM 		    cl_recvlist->c_len - off, 0, cl, XDR_DECODE, conn);
1123*7387SRobert.Gordon@Sun.COM 		*rxdrp = xdrs;
1124*7387SRobert.Gordon@Sun.COM 		return;
1125*7387SRobert.Gordon@Sun.COM 	}
1126*7387SRobert.Gordon@Sun.COM 
1127*7387SRobert.Gordon@Sun.COM 	/* op must be RDMA_NOMSG */
1128*7387SRobert.Gordon@Sun.COM 	if (cl) {
1129*7387SRobert.Gordon@Sun.COM 		DTRACE_PROBE(krpc__e__clntrdma__declongreply__serverreadlist);
1130*7387SRobert.Gordon@Sun.COM 		return;
1131*7387SRobert.Gordon@Sun.COM 	}
1132*7387SRobert.Gordon@Sun.COM 
1133*7387SRobert.Gordon@Sun.COM 	if (cl_long_reply->u.c_daddr) {
1134*7387SRobert.Gordon@Sun.COM 		DTRACE_PROBE1(krpc__i__longrepl__rdmanomsg__len,
1135*7387SRobert.Gordon@Sun.COM 		    int, cl_rdma_reply->c_len);
1136*7387SRobert.Gordon@Sun.COM 
1137*7387SRobert.Gordon@Sun.COM 		xdrrdma_create(xdrs, (caddr_t)cl_long_reply->u.c_daddr3,
1138*7387SRobert.Gordon@Sun.COM 		    cl_rdma_reply->c_len, 0, NULL, XDR_DECODE, conn);
1139*7387SRobert.Gordon@Sun.COM 
1140*7387SRobert.Gordon@Sun.COM 		*rxdrp = xdrs;
1141*7387SRobert.Gordon@Sun.COM 	}
1142*7387SRobert.Gordon@Sun.COM }
1143*7387SRobert.Gordon@Sun.COM 
1144*7387SRobert.Gordon@Sun.COM static void
1145*7387SRobert.Gordon@Sun.COM clnt_return_credit(CONN *conn)
1146*7387SRobert.Gordon@Sun.COM {
1147*7387SRobert.Gordon@Sun.COM 	rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
1148*7387SRobert.Gordon@Sun.COM 
1149*7387SRobert.Gordon@Sun.COM 	mutex_enter(&conn->c_lock);
1150*7387SRobert.Gordon@Sun.COM 	cc_info->clnt_cc_in_flight_ops--;
1151*7387SRobert.Gordon@Sun.COM 	cv_signal(&cc_info->clnt_cc_cv);
1152*7387SRobert.Gordon@Sun.COM 	mutex_exit(&conn->c_lock);
1153*7387SRobert.Gordon@Sun.COM }
1154*7387SRobert.Gordon@Sun.COM 
1155*7387SRobert.Gordon@Sun.COM static void
1156*7387SRobert.Gordon@Sun.COM clnt_update_credit(CONN *conn, uint32_t rdma_credit)
1157*7387SRobert.Gordon@Sun.COM {
1158*7387SRobert.Gordon@Sun.COM 	rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
1159*7387SRobert.Gordon@Sun.COM 
1160*7387SRobert.Gordon@Sun.COM 	/*
1161*7387SRobert.Gordon@Sun.COM 	 * If the granted has not altered, avoid taking the
1162*7387SRobert.Gordon@Sun.COM 	 * mutex, to essentially do nothing..
1163*7387SRobert.Gordon@Sun.COM 	 */
1164*7387SRobert.Gordon@Sun.COM 	if (cc_info->clnt_cc_granted_ops == rdma_credit)
1165*7387SRobert.Gordon@Sun.COM 		return;
1166*7387SRobert.Gordon@Sun.COM 	/*
1167*7387SRobert.Gordon@Sun.COM 	 * Get the granted number of buffers for credit control.
1168*7387SRobert.Gordon@Sun.COM 	 */
1169*7387SRobert.Gordon@Sun.COM 	mutex_enter(&conn->c_lock);
1170*7387SRobert.Gordon@Sun.COM 	cc_info->clnt_cc_granted_ops = rdma_credit;
1171*7387SRobert.Gordon@Sun.COM 	mutex_exit(&conn->c_lock);
1172*7387SRobert.Gordon@Sun.COM }
1173*7387SRobert.Gordon@Sun.COM 
1174*7387SRobert.Gordon@Sun.COM static void
1175*7387SRobert.Gordon@Sun.COM clnt_check_credit(CONN *conn)
1176*7387SRobert.Gordon@Sun.COM {
1177*7387SRobert.Gordon@Sun.COM 	rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
1178*7387SRobert.Gordon@Sun.COM 
1179*7387SRobert.Gordon@Sun.COM 	/*
1180*7387SRobert.Gordon@Sun.COM 	 * Make sure we are not going over our allowed buffer use
1181*7387SRobert.Gordon@Sun.COM 	 * (and make sure we have gotten a granted value before).
1182*7387SRobert.Gordon@Sun.COM 	 */
1183*7387SRobert.Gordon@Sun.COM 	mutex_enter(&conn->c_lock);
1184*7387SRobert.Gordon@Sun.COM 	while (cc_info->clnt_cc_in_flight_ops >= cc_info->clnt_cc_granted_ops &&
1185*7387SRobert.Gordon@Sun.COM 	    cc_info->clnt_cc_granted_ops != 0) {
1186*7387SRobert.Gordon@Sun.COM 		/*
1187*7387SRobert.Gordon@Sun.COM 		 * Client has maxed out its granted buffers due to
1188*7387SRobert.Gordon@Sun.COM 		 * credit control.  Current handling is to block and wait.
1189*7387SRobert.Gordon@Sun.COM 		 */
1190*7387SRobert.Gordon@Sun.COM 		cv_wait(&cc_info->clnt_cc_cv, &conn->c_lock);
1191*7387SRobert.Gordon@Sun.COM 	}
1192*7387SRobert.Gordon@Sun.COM 	cc_info->clnt_cc_in_flight_ops++;
1193*7387SRobert.Gordon@Sun.COM 	mutex_exit(&conn->c_lock);
1194*7387SRobert.Gordon@Sun.COM }
1195*7387SRobert.Gordon@Sun.COM 
11960Sstevel@tonic-gate /* ARGSUSED */
11970Sstevel@tonic-gate static void
11980Sstevel@tonic-gate clnt_rdma_kabort(CLIENT *h)
11990Sstevel@tonic-gate {
12000Sstevel@tonic-gate }
12010Sstevel@tonic-gate 
12020Sstevel@tonic-gate static void
12030Sstevel@tonic-gate clnt_rdma_kerror(CLIENT *h, struct rpc_err *err)
12040Sstevel@tonic-gate {
12050Sstevel@tonic-gate 	struct cku_private *p = htop(h);
12060Sstevel@tonic-gate 	*err = p->cku_err;
12070Sstevel@tonic-gate }
12080Sstevel@tonic-gate 
12090Sstevel@tonic-gate static bool_t
12100Sstevel@tonic-gate clnt_rdma_kfreeres(CLIENT *h, xdrproc_t xdr_res, caddr_t res_ptr)
12110Sstevel@tonic-gate {
12120Sstevel@tonic-gate 	struct cku_private *p = htop(h);
12130Sstevel@tonic-gate 	XDR *xdrs;
12140Sstevel@tonic-gate 
12150Sstevel@tonic-gate 	xdrs = &(p->cku_outxdr);
12160Sstevel@tonic-gate 	xdrs->x_op = XDR_FREE;
12170Sstevel@tonic-gate 	return ((*xdr_res)(xdrs, res_ptr));
12180Sstevel@tonic-gate }
12190Sstevel@tonic-gate 
12200Sstevel@tonic-gate /* ARGSUSED */
12210Sstevel@tonic-gate static bool_t
12220Sstevel@tonic-gate clnt_rdma_kcontrol(CLIENT *h, int cmd, char *arg)
12230Sstevel@tonic-gate {
12240Sstevel@tonic-gate 	return (TRUE);
12250Sstevel@tonic-gate }
12260Sstevel@tonic-gate 
12270Sstevel@tonic-gate /* ARGSUSED */
12280Sstevel@tonic-gate static int
12290Sstevel@tonic-gate clnt_rdma_ksettimers(CLIENT *h, struct rpc_timers *t, struct rpc_timers *all,
12300Sstevel@tonic-gate 	int minimum, void(*feedback)(int, int, caddr_t), caddr_t arg,
12310Sstevel@tonic-gate 	uint32_t xid)
12320Sstevel@tonic-gate {
12330Sstevel@tonic-gate 	RCSTAT_INCR(rctimers);
12340Sstevel@tonic-gate 	return (0);
12350Sstevel@tonic-gate }
12360Sstevel@tonic-gate 
12370Sstevel@tonic-gate int
12380Sstevel@tonic-gate rdma_reachable(int addr_type, struct netbuf *addr, struct knetconfig **knconf)
12390Sstevel@tonic-gate {
12400Sstevel@tonic-gate 	rdma_registry_t	*rp;
12410Sstevel@tonic-gate 	void *handle = NULL;
12420Sstevel@tonic-gate 	struct knetconfig *knc;
12430Sstevel@tonic-gate 	char *pf, *p;
12440Sstevel@tonic-gate 	rdma_stat status;
12450Sstevel@tonic-gate 	int error = 0;
12460Sstevel@tonic-gate 
12470Sstevel@tonic-gate 	if (!INGLOBALZONE(curproc))
12480Sstevel@tonic-gate 		return (-1);
1249*7387SRobert.Gordon@Sun.COM 
12500Sstevel@tonic-gate 	/*
12510Sstevel@tonic-gate 	 * modload the RDMA plugins if not already done.
12520Sstevel@tonic-gate 	 */
12530Sstevel@tonic-gate 	if (!rdma_modloaded) {
12540Sstevel@tonic-gate 		mutex_enter(&rdma_modload_lock);
12550Sstevel@tonic-gate 		if (!rdma_modloaded) {
12560Sstevel@tonic-gate 			error = rdma_modload();
12570Sstevel@tonic-gate 		}
12580Sstevel@tonic-gate 		mutex_exit(&rdma_modload_lock);
12590Sstevel@tonic-gate 		if (error)
12600Sstevel@tonic-gate 			return (-1);
12610Sstevel@tonic-gate 	}
12620Sstevel@tonic-gate 
12630Sstevel@tonic-gate 	if (!rdma_dev_available)
12640Sstevel@tonic-gate 		return (-1);
12650Sstevel@tonic-gate 
12660Sstevel@tonic-gate 	rw_enter(&rdma_lock, RW_READER);
12670Sstevel@tonic-gate 	rp = rdma_mod_head;
12680Sstevel@tonic-gate 	while (rp != NULL) {
12690Sstevel@tonic-gate 		status = RDMA_REACHABLE(rp->r_mod->rdma_ops, addr_type, addr,
12700Sstevel@tonic-gate 		    &handle);
12710Sstevel@tonic-gate 		if (status == RDMA_SUCCESS) {
12720Sstevel@tonic-gate 			knc = kmem_zalloc(sizeof (struct knetconfig),
1273*7387SRobert.Gordon@Sun.COM 			    KM_SLEEP);
12740Sstevel@tonic-gate 			knc->knc_semantics = NC_TPI_RDMA;
12750Sstevel@tonic-gate 			pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
12760Sstevel@tonic-gate 			p = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
12770Sstevel@tonic-gate 			if (addr_type == AF_INET)
12780Sstevel@tonic-gate 				(void) strncpy(pf, NC_INET, KNC_STRSIZE);
12790Sstevel@tonic-gate 			else if (addr_type == AF_INET6)
12800Sstevel@tonic-gate 				(void) strncpy(pf, NC_INET6, KNC_STRSIZE);
12810Sstevel@tonic-gate 			pf[KNC_STRSIZE - 1] = '\0';
12820Sstevel@tonic-gate 
12830Sstevel@tonic-gate 			(void) strncpy(p, rp->r_mod->rdma_api, KNC_STRSIZE);
12840Sstevel@tonic-gate 			p[KNC_STRSIZE - 1] = '\0';
12850Sstevel@tonic-gate 
12860Sstevel@tonic-gate 			knc->knc_protofmly = pf;
12870Sstevel@tonic-gate 			knc->knc_proto = p;
12880Sstevel@tonic-gate 			knc->knc_rdev = (dev_t)handle;
12890Sstevel@tonic-gate 			*knconf = knc;
12900Sstevel@tonic-gate 			rw_exit(&rdma_lock);
12910Sstevel@tonic-gate 			return (0);
12920Sstevel@tonic-gate 		}
12930Sstevel@tonic-gate 		rp = rp->r_next;
12940Sstevel@tonic-gate 	}
12950Sstevel@tonic-gate 	rw_exit(&rdma_lock);
12960Sstevel@tonic-gate 	return (-1);
12970Sstevel@tonic-gate }
1298*7387SRobert.Gordon@Sun.COM 
1299*7387SRobert.Gordon@Sun.COM static void
1300*7387SRobert.Gordon@Sun.COM check_dereg_wlist(CONN *conn, clist *rwc)
1301*7387SRobert.Gordon@Sun.COM {
1302*7387SRobert.Gordon@Sun.COM 	int status;
1303*7387SRobert.Gordon@Sun.COM 
1304*7387SRobert.Gordon@Sun.COM 	if (rwc == NULL)
1305*7387SRobert.Gordon@Sun.COM 		return;
1306*7387SRobert.Gordon@Sun.COM 
1307*7387SRobert.Gordon@Sun.COM 	if (rwc->c_dmemhandle.mrc_rmr && rwc->c_len) {
1308*7387SRobert.Gordon@Sun.COM 
1309*7387SRobert.Gordon@Sun.COM 		status = clist_deregister(conn, rwc, CLIST_REG_DST);
1310*7387SRobert.Gordon@Sun.COM 
1311*7387SRobert.Gordon@Sun.COM 		if (status != RDMA_SUCCESS) {
1312*7387SRobert.Gordon@Sun.COM 			DTRACE_PROBE1(krpc__e__clntrdma__dereg_wlist,
1313*7387SRobert.Gordon@Sun.COM 			    int, status);
1314*7387SRobert.Gordon@Sun.COM 		}
1315*7387SRobert.Gordon@Sun.COM 	}
1316*7387SRobert.Gordon@Sun.COM }
1317