10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
57387SRobert.Gordon@Sun.COM  * Common Development and Distribution License (the "License").
67387SRobert.Gordon@Sun.COM  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*8695SRajkumar.Sivaprakasam@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
260Sstevel@tonic-gate /* All Rights Reserved */
270Sstevel@tonic-gate /*
280Sstevel@tonic-gate  * Portions of this source code were derived from Berkeley
290Sstevel@tonic-gate  * 4.3 BSD under license from the Regents of the University of
300Sstevel@tonic-gate  * California.
310Sstevel@tonic-gate  */
320Sstevel@tonic-gate 
330Sstevel@tonic-gate #include <sys/param.h>
340Sstevel@tonic-gate #include <sys/types.h>
350Sstevel@tonic-gate #include <sys/user.h>
360Sstevel@tonic-gate #include <sys/systm.h>
370Sstevel@tonic-gate #include <sys/sysmacros.h>
380Sstevel@tonic-gate #include <sys/errno.h>
390Sstevel@tonic-gate #include <sys/kmem.h>
400Sstevel@tonic-gate #include <sys/debug.h>
410Sstevel@tonic-gate #include <sys/systm.h>
420Sstevel@tonic-gate #include <sys/kstat.h>
430Sstevel@tonic-gate #include <sys/t_lock.h>
440Sstevel@tonic-gate #include <sys/ddi.h>
450Sstevel@tonic-gate #include <sys/cmn_err.h>
460Sstevel@tonic-gate #include <sys/time.h>
470Sstevel@tonic-gate #include <sys/isa_defs.h>
480Sstevel@tonic-gate #include <sys/zone.h>
497387SRobert.Gordon@Sun.COM #include <sys/sdt.h>
500Sstevel@tonic-gate 
510Sstevel@tonic-gate #include <rpc/types.h>
520Sstevel@tonic-gate #include <rpc/xdr.h>
530Sstevel@tonic-gate #include <rpc/auth.h>
540Sstevel@tonic-gate #include <rpc/clnt.h>
550Sstevel@tonic-gate #include <rpc/rpc_msg.h>
560Sstevel@tonic-gate #include <rpc/rpc_rdma.h>
577387SRobert.Gordon@Sun.COM #include <nfs/nfs.h>
587387SRobert.Gordon@Sun.COM #include <nfs/nfs4_kprot.h>
590Sstevel@tonic-gate 
607387SRobert.Gordon@Sun.COM static uint32_t rdma_bufs_rqst = RDMA_BUFS_RQST;
617387SRobert.Gordon@Sun.COM 
627387SRobert.Gordon@Sun.COM static int clnt_compose_rpcmsg(CLIENT *, rpcproc_t, rdma_buf_t *,
637387SRobert.Gordon@Sun.COM 			    XDR *, xdrproc_t, caddr_t);
647387SRobert.Gordon@Sun.COM static int  clnt_compose_rdma_header(CONN *, CLIENT *, rdma_buf_t *,
657387SRobert.Gordon@Sun.COM 		    XDR **, uint_t *);
667387SRobert.Gordon@Sun.COM static int clnt_setup_rlist(CONN *, XDR *, XDR *);
677387SRobert.Gordon@Sun.COM static int clnt_setup_wlist(CONN *, XDR *, XDR *);
687387SRobert.Gordon@Sun.COM static int clnt_setup_long_reply(CONN *, struct clist **, uint_t);
697387SRobert.Gordon@Sun.COM static void clnt_check_credit(CONN *);
707387SRobert.Gordon@Sun.COM static void clnt_return_credit(CONN *);
717387SRobert.Gordon@Sun.COM static void clnt_decode_long_reply(CONN *, struct clist *,
727387SRobert.Gordon@Sun.COM 		struct clist *, XDR *, XDR **, struct clist *,
737387SRobert.Gordon@Sun.COM 		struct clist *, uint_t, uint_t);
747387SRobert.Gordon@Sun.COM 
757387SRobert.Gordon@Sun.COM static void clnt_update_credit(CONN *, uint32_t);
767387SRobert.Gordon@Sun.COM static void check_dereg_wlist(CONN *, struct clist *);
770Sstevel@tonic-gate 
780Sstevel@tonic-gate static enum clnt_stat clnt_rdma_kcallit(CLIENT *, rpcproc_t, xdrproc_t,
790Sstevel@tonic-gate     caddr_t, xdrproc_t, caddr_t, struct timeval);
800Sstevel@tonic-gate static void	clnt_rdma_kabort(CLIENT *);
810Sstevel@tonic-gate static void	clnt_rdma_kerror(CLIENT *, struct rpc_err *);
820Sstevel@tonic-gate static bool_t	clnt_rdma_kfreeres(CLIENT *, xdrproc_t, caddr_t);
830Sstevel@tonic-gate static void	clnt_rdma_kdestroy(CLIENT *);
840Sstevel@tonic-gate static bool_t	clnt_rdma_kcontrol(CLIENT *, int, char *);
850Sstevel@tonic-gate static int	clnt_rdma_ksettimers(CLIENT *, struct rpc_timers *,
860Sstevel@tonic-gate     struct rpc_timers *, int, void(*)(int, int, caddr_t), caddr_t, uint32_t);
870Sstevel@tonic-gate 
880Sstevel@tonic-gate /*
890Sstevel@tonic-gate  * Operations vector for RDMA based RPC
900Sstevel@tonic-gate  */
910Sstevel@tonic-gate static struct clnt_ops rdma_clnt_ops = {
920Sstevel@tonic-gate 	clnt_rdma_kcallit,	/* do rpc call */
930Sstevel@tonic-gate 	clnt_rdma_kabort,	/* abort call */
940Sstevel@tonic-gate 	clnt_rdma_kerror,	/* return error status */
950Sstevel@tonic-gate 	clnt_rdma_kfreeres,	/* free results */
960Sstevel@tonic-gate 	clnt_rdma_kdestroy,	/* destroy rpc handle */
970Sstevel@tonic-gate 	clnt_rdma_kcontrol,	/* the ioctl() of rpc */
980Sstevel@tonic-gate 	clnt_rdma_ksettimers,	/* set retry timers */
990Sstevel@tonic-gate };
1000Sstevel@tonic-gate 
1010Sstevel@tonic-gate /*
1020Sstevel@tonic-gate  * The size of the preserialized RPC header information.
1030Sstevel@tonic-gate  */
1040Sstevel@tonic-gate #define	CKU_HDRSIZE	20
1057387SRobert.Gordon@Sun.COM #define	CLNT_RDMA_SUCCESS 0
1067387SRobert.Gordon@Sun.COM #define	CLNT_RDMA_FAIL (-1)
1077387SRobert.Gordon@Sun.COM 
1087387SRobert.Gordon@Sun.COM #define	AUTH_REFRESH_COUNT 2
1097387SRobert.Gordon@Sun.COM 
1107387SRobert.Gordon@Sun.COM #define	IS_RPCSEC_GSS(authh)			\
1117387SRobert.Gordon@Sun.COM 	(authh->cl_auth->ah_cred.oa_flavor == RPCSEC_GSS)
1120Sstevel@tonic-gate 
1130Sstevel@tonic-gate /*
1140Sstevel@tonic-gate  * Per RPC RDMA endpoint details
1150Sstevel@tonic-gate  */
1160Sstevel@tonic-gate typedef struct cku_private {
1170Sstevel@tonic-gate 	CLIENT			cku_client;	/* client handle */
1180Sstevel@tonic-gate 	rdma_mod_t		*cku_rd_mod;	/* underlying RDMA mod */
1190Sstevel@tonic-gate 	void			*cku_rd_handle;	/* underlying RDMA device */
1200Sstevel@tonic-gate 	struct netbuf		cku_addr;	/* remote netbuf address */
1210Sstevel@tonic-gate 	int			cku_addrfmly;	/* for finding addr_type */
1220Sstevel@tonic-gate 	struct rpc_err		cku_err;	/* error status */
1230Sstevel@tonic-gate 	struct cred		*cku_cred;	/* credentials */
1240Sstevel@tonic-gate 	XDR			cku_outxdr;	/* xdr stream for output */
1250Sstevel@tonic-gate 	uint32_t		cku_outsz;
1260Sstevel@tonic-gate 	XDR			cku_inxdr;	/* xdr stream for input */
1270Sstevel@tonic-gate 	char			cku_rpchdr[CKU_HDRSIZE+4]; /* rpc header */
1280Sstevel@tonic-gate 	uint32_t		cku_xid;	/* current XID */
1290Sstevel@tonic-gate } cku_private_t;
1300Sstevel@tonic-gate 
1310Sstevel@tonic-gate #define	CLNT_RDMA_DELAY	10	/* secs to delay after a connection failure */
1320Sstevel@tonic-gate static int clnt_rdma_min_delay = CLNT_RDMA_DELAY;
1330Sstevel@tonic-gate 
1340Sstevel@tonic-gate struct {
1350Sstevel@tonic-gate 	kstat_named_t	rccalls;
1360Sstevel@tonic-gate 	kstat_named_t	rcbadcalls;
1370Sstevel@tonic-gate 	kstat_named_t	rcbadxids;
1380Sstevel@tonic-gate 	kstat_named_t	rctimeouts;
1390Sstevel@tonic-gate 	kstat_named_t	rcnewcreds;
1400Sstevel@tonic-gate 	kstat_named_t	rcbadverfs;
1410Sstevel@tonic-gate 	kstat_named_t	rctimers;
1420Sstevel@tonic-gate 	kstat_named_t	rccantconn;
1430Sstevel@tonic-gate 	kstat_named_t	rcnomem;
1440Sstevel@tonic-gate 	kstat_named_t	rcintrs;
1450Sstevel@tonic-gate 	kstat_named_t	rclongrpcs;
1460Sstevel@tonic-gate } rdmarcstat = {
1470Sstevel@tonic-gate 	{ "calls",	KSTAT_DATA_UINT64 },
1480Sstevel@tonic-gate 	{ "badcalls",	KSTAT_DATA_UINT64 },
1490Sstevel@tonic-gate 	{ "badxids",	KSTAT_DATA_UINT64 },
1500Sstevel@tonic-gate 	{ "timeouts",	KSTAT_DATA_UINT64 },
1510Sstevel@tonic-gate 	{ "newcreds",	KSTAT_DATA_UINT64 },
1520Sstevel@tonic-gate 	{ "badverfs",	KSTAT_DATA_UINT64 },
1530Sstevel@tonic-gate 	{ "timers",	KSTAT_DATA_UINT64 },
1540Sstevel@tonic-gate 	{ "cantconn",	KSTAT_DATA_UINT64 },
1550Sstevel@tonic-gate 	{ "nomem",	KSTAT_DATA_UINT64 },
1560Sstevel@tonic-gate 	{ "interrupts", KSTAT_DATA_UINT64 },
1570Sstevel@tonic-gate 	{ "longrpc", 	KSTAT_DATA_UINT64 }
1580Sstevel@tonic-gate };
1590Sstevel@tonic-gate 
1600Sstevel@tonic-gate kstat_named_t *rdmarcstat_ptr = (kstat_named_t *)&rdmarcstat;
1610Sstevel@tonic-gate uint_t rdmarcstat_ndata = sizeof (rdmarcstat) / sizeof (kstat_named_t);
1620Sstevel@tonic-gate 
1630Sstevel@tonic-gate #ifdef DEBUG
1640Sstevel@tonic-gate int rdma_clnt_debug = 0;
1650Sstevel@tonic-gate #endif
1660Sstevel@tonic-gate 
1670Sstevel@tonic-gate #ifdef accurate_stats
1680Sstevel@tonic-gate extern kmutex_t rdmarcstat_lock;    /* mutex for rcstat updates */
1690Sstevel@tonic-gate 
1700Sstevel@tonic-gate #define	RCSTAT_INCR(x)			\
1710Sstevel@tonic-gate 	mutex_enter(&rdmarcstat_lock);	\
1720Sstevel@tonic-gate 	rdmarcstat.x.value.ui64++;	\
1730Sstevel@tonic-gate 	mutex_exit(&rdmarcstat_lock);
1740Sstevel@tonic-gate #else
1750Sstevel@tonic-gate #define	RCSTAT_INCR(x)			\
1760Sstevel@tonic-gate 	rdmarcstat.x.value.ui64++;
1770Sstevel@tonic-gate #endif
1780Sstevel@tonic-gate 
1790Sstevel@tonic-gate #define	ptoh(p)		(&((p)->cku_client))
1800Sstevel@tonic-gate #define	htop(h)		((cku_private_t *)((h)->cl_private))
1810Sstevel@tonic-gate 
1827387SRobert.Gordon@Sun.COM uint_t
1837387SRobert.Gordon@Sun.COM calc_length(uint_t len)
1847387SRobert.Gordon@Sun.COM {
1857387SRobert.Gordon@Sun.COM 	len = RNDUP(len);
1867387SRobert.Gordon@Sun.COM 
1877387SRobert.Gordon@Sun.COM 	if (len <= 64 * 1024) {
1887387SRobert.Gordon@Sun.COM 		if (len > 32 * 1024) {
1897387SRobert.Gordon@Sun.COM 			len = 64 * 1024;
1907387SRobert.Gordon@Sun.COM 		} else {
1917387SRobert.Gordon@Sun.COM 			if (len > 16 * 1024) {
1927387SRobert.Gordon@Sun.COM 				len = 32 * 1024;
1937387SRobert.Gordon@Sun.COM 			} else {
1947387SRobert.Gordon@Sun.COM 				if (len > 8 * 1024) {
1957387SRobert.Gordon@Sun.COM 					len = 16 * 1024;
1967387SRobert.Gordon@Sun.COM 				} else {
1977387SRobert.Gordon@Sun.COM 					len = 8 * 1024;
1987387SRobert.Gordon@Sun.COM 				}
1997387SRobert.Gordon@Sun.COM 			}
2007387SRobert.Gordon@Sun.COM 		}
2017387SRobert.Gordon@Sun.COM 	}
2027387SRobert.Gordon@Sun.COM 	return (len);
2037387SRobert.Gordon@Sun.COM }
2040Sstevel@tonic-gate int
2050Sstevel@tonic-gate clnt_rdma_kcreate(char *proto, void *handle, struct netbuf *raddr, int family,
2060Sstevel@tonic-gate     rpcprog_t pgm, rpcvers_t vers, struct cred *cred, CLIENT **cl)
2070Sstevel@tonic-gate {
2080Sstevel@tonic-gate 	CLIENT *h;
2090Sstevel@tonic-gate 	struct cku_private *p;
2100Sstevel@tonic-gate 	struct rpc_msg call_msg;
2110Sstevel@tonic-gate 	rdma_registry_t *rp;
2120Sstevel@tonic-gate 
2130Sstevel@tonic-gate 	ASSERT(INGLOBALZONE(curproc));
2140Sstevel@tonic-gate 
2150Sstevel@tonic-gate 	if (cl == NULL)
2160Sstevel@tonic-gate 		return (EINVAL);
2170Sstevel@tonic-gate 	*cl = NULL;
2180Sstevel@tonic-gate 
2190Sstevel@tonic-gate 	p = kmem_zalloc(sizeof (*p), KM_SLEEP);
2200Sstevel@tonic-gate 
2210Sstevel@tonic-gate 	/*
2220Sstevel@tonic-gate 	 * Find underlying RDMATF plugin
2230Sstevel@tonic-gate 	 */
2240Sstevel@tonic-gate 	rw_enter(&rdma_lock, RW_READER);
2250Sstevel@tonic-gate 	rp = rdma_mod_head;
2260Sstevel@tonic-gate 	while (rp != NULL) {
2270Sstevel@tonic-gate 		if (strcmp(rp->r_mod->rdma_api, proto))
2280Sstevel@tonic-gate 			rp = rp->r_next;
2290Sstevel@tonic-gate 		else {
2300Sstevel@tonic-gate 			p->cku_rd_mod = rp->r_mod;
2310Sstevel@tonic-gate 			p->cku_rd_handle = handle;
2320Sstevel@tonic-gate 			break;
2330Sstevel@tonic-gate 		}
2340Sstevel@tonic-gate 	}
2350Sstevel@tonic-gate 	rw_exit(&rdma_lock);
2360Sstevel@tonic-gate 
2370Sstevel@tonic-gate 	if (p->cku_rd_mod == NULL) {
2380Sstevel@tonic-gate 		/*
2390Sstevel@tonic-gate 		 * Should not happen.
2400Sstevel@tonic-gate 		 * No matching RDMATF plugin.
2410Sstevel@tonic-gate 		 */
2420Sstevel@tonic-gate 		kmem_free(p, sizeof (struct cku_private));
2430Sstevel@tonic-gate 		return (EINVAL);
2440Sstevel@tonic-gate 	}
2450Sstevel@tonic-gate 
2460Sstevel@tonic-gate 	h = ptoh(p);
2470Sstevel@tonic-gate 	h->cl_ops = &rdma_clnt_ops;
2480Sstevel@tonic-gate 	h->cl_private = (caddr_t)p;
2490Sstevel@tonic-gate 	h->cl_auth = authkern_create();
2500Sstevel@tonic-gate 
2510Sstevel@tonic-gate 	/* call message, just used to pre-serialize below */
2520Sstevel@tonic-gate 	call_msg.rm_xid = 0;
2530Sstevel@tonic-gate 	call_msg.rm_direction = CALL;
2540Sstevel@tonic-gate 	call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION;
2550Sstevel@tonic-gate 	call_msg.rm_call.cb_prog = pgm;
2560Sstevel@tonic-gate 	call_msg.rm_call.cb_vers = vers;
2570Sstevel@tonic-gate 
2580Sstevel@tonic-gate 	xdrmem_create(&p->cku_outxdr, p->cku_rpchdr, CKU_HDRSIZE, XDR_ENCODE);
2590Sstevel@tonic-gate 	/* pre-serialize call message header */
2600Sstevel@tonic-gate 	if (!xdr_callhdr(&p->cku_outxdr, &call_msg)) {
2610Sstevel@tonic-gate 		XDR_DESTROY(&p->cku_outxdr);
2620Sstevel@tonic-gate 		auth_destroy(h->cl_auth);
2630Sstevel@tonic-gate 		kmem_free(p, sizeof (struct cku_private));
2640Sstevel@tonic-gate 		return (EINVAL);
2650Sstevel@tonic-gate 	}
2660Sstevel@tonic-gate 
2670Sstevel@tonic-gate 	/*
2680Sstevel@tonic-gate 	 * Set up the rpc information
2690Sstevel@tonic-gate 	 */
2700Sstevel@tonic-gate 	p->cku_cred = cred;
2710Sstevel@tonic-gate 	p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP);
2720Sstevel@tonic-gate 	p->cku_addr.maxlen = raddr->maxlen;
2730Sstevel@tonic-gate 	p->cku_addr.len = raddr->len;
2740Sstevel@tonic-gate 	bcopy(raddr->buf, p->cku_addr.buf, raddr->len);
2750Sstevel@tonic-gate 	p->cku_addrfmly = family;
2760Sstevel@tonic-gate 
2770Sstevel@tonic-gate 	*cl = h;
2780Sstevel@tonic-gate 	return (0);
2790Sstevel@tonic-gate }
2800Sstevel@tonic-gate 
2810Sstevel@tonic-gate static void
2820Sstevel@tonic-gate clnt_rdma_kdestroy(CLIENT *h)
2830Sstevel@tonic-gate {
2840Sstevel@tonic-gate 	struct cku_private *p = htop(h);
2850Sstevel@tonic-gate 
2860Sstevel@tonic-gate 	kmem_free(p->cku_addr.buf, p->cku_addr.maxlen);
2870Sstevel@tonic-gate 	kmem_free(p, sizeof (*p));
2880Sstevel@tonic-gate }
2890Sstevel@tonic-gate 
2900Sstevel@tonic-gate void
2910Sstevel@tonic-gate clnt_rdma_kinit(CLIENT *h, char *proto, void *handle, struct netbuf *raddr,
2920Sstevel@tonic-gate     struct cred *cred)
2930Sstevel@tonic-gate {
2940Sstevel@tonic-gate 	struct cku_private *p = htop(h);
2950Sstevel@tonic-gate 	rdma_registry_t *rp;
2960Sstevel@tonic-gate 
2970Sstevel@tonic-gate 	ASSERT(INGLOBALZONE(curproc));
2980Sstevel@tonic-gate 	/*
2990Sstevel@tonic-gate 	 * Find underlying RDMATF plugin
3000Sstevel@tonic-gate 	 */
3010Sstevel@tonic-gate 	p->cku_rd_mod = NULL;
3020Sstevel@tonic-gate 	rw_enter(&rdma_lock, RW_READER);
3030Sstevel@tonic-gate 	rp = rdma_mod_head;
3040Sstevel@tonic-gate 	while (rp != NULL) {
3050Sstevel@tonic-gate 		if (strcmp(rp->r_mod->rdma_api, proto))
3060Sstevel@tonic-gate 			rp = rp->r_next;
3070Sstevel@tonic-gate 		else {
3080Sstevel@tonic-gate 			p->cku_rd_mod = rp->r_mod;
3090Sstevel@tonic-gate 			p->cku_rd_handle = handle;
3100Sstevel@tonic-gate 			break;
3110Sstevel@tonic-gate 		}
3120Sstevel@tonic-gate 
3130Sstevel@tonic-gate 	}
3140Sstevel@tonic-gate 	rw_exit(&rdma_lock);
3150Sstevel@tonic-gate 
3160Sstevel@tonic-gate 	/*
3170Sstevel@tonic-gate 	 * Set up the rpc information
3180Sstevel@tonic-gate 	 */
3190Sstevel@tonic-gate 	p->cku_cred = cred;
3200Sstevel@tonic-gate 	p->cku_xid = 0;
3210Sstevel@tonic-gate 
3220Sstevel@tonic-gate 	if (p->cku_addr.maxlen < raddr->len) {
3230Sstevel@tonic-gate 		if (p->cku_addr.maxlen != 0 && p->cku_addr.buf != NULL)
3240Sstevel@tonic-gate 			kmem_free(p->cku_addr.buf, p->cku_addr.maxlen);
3250Sstevel@tonic-gate 		p->cku_addr.buf = kmem_zalloc(raddr->maxlen, KM_SLEEP);
3260Sstevel@tonic-gate 		p->cku_addr.maxlen = raddr->maxlen;
3270Sstevel@tonic-gate 	}
3280Sstevel@tonic-gate 
3290Sstevel@tonic-gate 	p->cku_addr.len = raddr->len;
3300Sstevel@tonic-gate 	bcopy(raddr->buf, p->cku_addr.buf, raddr->len);
3310Sstevel@tonic-gate 	h->cl_ops = &rdma_clnt_ops;
3320Sstevel@tonic-gate }
3330Sstevel@tonic-gate 
3347387SRobert.Gordon@Sun.COM static int
3357387SRobert.Gordon@Sun.COM clnt_compose_rpcmsg(CLIENT *h, rpcproc_t procnum,
3367387SRobert.Gordon@Sun.COM     rdma_buf_t *rpcmsg, XDR *xdrs,
3377387SRobert.Gordon@Sun.COM     xdrproc_t xdr_args, caddr_t argsp)
3387387SRobert.Gordon@Sun.COM {
3397387SRobert.Gordon@Sun.COM 	cku_private_t *p = htop(h);
3407387SRobert.Gordon@Sun.COM 
3417387SRobert.Gordon@Sun.COM 	if (h->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
3427387SRobert.Gordon@Sun.COM 		/*
3437387SRobert.Gordon@Sun.COM 		 * Copy in the preserialized RPC header
3447387SRobert.Gordon@Sun.COM 		 * information.
3457387SRobert.Gordon@Sun.COM 		 */
3467387SRobert.Gordon@Sun.COM 		bcopy(p->cku_rpchdr, rpcmsg->addr, CKU_HDRSIZE);
3477387SRobert.Gordon@Sun.COM 
3487387SRobert.Gordon@Sun.COM 		/*
3497387SRobert.Gordon@Sun.COM 		 * transaction id is the 1st thing in the output
3507387SRobert.Gordon@Sun.COM 		 * buffer.
3517387SRobert.Gordon@Sun.COM 		 */
3527387SRobert.Gordon@Sun.COM 		/* LINTED pointer alignment */
3537387SRobert.Gordon@Sun.COM 		(*(uint32_t *)(rpcmsg->addr)) = p->cku_xid;
3547387SRobert.Gordon@Sun.COM 
3557387SRobert.Gordon@Sun.COM 		/* Skip the preserialized stuff. */
3567387SRobert.Gordon@Sun.COM 		XDR_SETPOS(xdrs, CKU_HDRSIZE);
3577387SRobert.Gordon@Sun.COM 
3587387SRobert.Gordon@Sun.COM 		/* Serialize dynamic stuff into the output buffer. */
3597387SRobert.Gordon@Sun.COM 		if ((!XDR_PUTINT32(xdrs, (int32_t *)&procnum)) ||
3607387SRobert.Gordon@Sun.COM 		    (!AUTH_MARSHALL(h->cl_auth, xdrs, p->cku_cred)) ||
3617387SRobert.Gordon@Sun.COM 		    (!(*xdr_args)(xdrs, argsp))) {
3627387SRobert.Gordon@Sun.COM 			DTRACE_PROBE(krpc__e__clntrdma__rpcmsg__dynargs);
3637387SRobert.Gordon@Sun.COM 			return (CLNT_RDMA_FAIL);
3647387SRobert.Gordon@Sun.COM 		}
3657387SRobert.Gordon@Sun.COM 		p->cku_outsz = XDR_GETPOS(xdrs);
3667387SRobert.Gordon@Sun.COM 	} else {
3677387SRobert.Gordon@Sun.COM 		uint32_t *uproc = (uint32_t *)&p->cku_rpchdr[CKU_HDRSIZE];
3687387SRobert.Gordon@Sun.COM 		IXDR_PUT_U_INT32(uproc, procnum);
3697387SRobert.Gordon@Sun.COM 		(*(uint32_t *)(&p->cku_rpchdr[0])) = p->cku_xid;
3707387SRobert.Gordon@Sun.COM 		XDR_SETPOS(xdrs, 0);
3717387SRobert.Gordon@Sun.COM 
3727387SRobert.Gordon@Sun.COM 		/* Serialize the procedure number and the arguments. */
3737387SRobert.Gordon@Sun.COM 		if (!AUTH_WRAP(h->cl_auth, (caddr_t)p->cku_rpchdr,
3747387SRobert.Gordon@Sun.COM 		    CKU_HDRSIZE+4, xdrs, xdr_args, argsp)) {
3757387SRobert.Gordon@Sun.COM 			if (rpcmsg->addr != xdrs->x_base) {
3767387SRobert.Gordon@Sun.COM 				rpcmsg->addr = xdrs->x_base;
3777387SRobert.Gordon@Sun.COM 				rpcmsg->len = xdr_getbufsize(xdrs);
3787387SRobert.Gordon@Sun.COM 			}
3797387SRobert.Gordon@Sun.COM 			DTRACE_PROBE(krpc__e__clntrdma__rpcmsg__procnum);
3807387SRobert.Gordon@Sun.COM 			return (CLNT_RDMA_FAIL);
3817387SRobert.Gordon@Sun.COM 		}
3827387SRobert.Gordon@Sun.COM 		/*
3837387SRobert.Gordon@Sun.COM 		 * If we had to allocate a new buffer while encoding
3847387SRobert.Gordon@Sun.COM 		 * then update the addr and len.
3857387SRobert.Gordon@Sun.COM 		 */
3867387SRobert.Gordon@Sun.COM 		if (rpcmsg->addr != xdrs->x_base) {
3877387SRobert.Gordon@Sun.COM 			rpcmsg->addr = xdrs->x_base;
3887387SRobert.Gordon@Sun.COM 			rpcmsg->len = xdr_getbufsize(xdrs);
3897387SRobert.Gordon@Sun.COM 		}
3907387SRobert.Gordon@Sun.COM 
3917387SRobert.Gordon@Sun.COM 		p->cku_outsz = XDR_GETPOS(xdrs);
3927387SRobert.Gordon@Sun.COM 		DTRACE_PROBE1(krpc__i__compose__size__sec, int, p->cku_outsz)
3937387SRobert.Gordon@Sun.COM 	}
3947387SRobert.Gordon@Sun.COM 
3957387SRobert.Gordon@Sun.COM 	return (CLNT_RDMA_SUCCESS);
3967387SRobert.Gordon@Sun.COM }
3977387SRobert.Gordon@Sun.COM 
3987387SRobert.Gordon@Sun.COM static int
3997387SRobert.Gordon@Sun.COM clnt_compose_rdma_header(CONN *conn, CLIENT *h, rdma_buf_t *clmsg,
4007387SRobert.Gordon@Sun.COM     XDR **xdrs, uint_t *op)
4017387SRobert.Gordon@Sun.COM {
4027387SRobert.Gordon@Sun.COM 	cku_private_t *p = htop(h);
4037387SRobert.Gordon@Sun.COM 	uint_t vers;
4047387SRobert.Gordon@Sun.COM 	uint32_t rdma_credit = rdma_bufs_rqst;
4057387SRobert.Gordon@Sun.COM 
4067387SRobert.Gordon@Sun.COM 	vers = RPCRDMA_VERS;
4077387SRobert.Gordon@Sun.COM 	clmsg->type = SEND_BUFFER;
4087387SRobert.Gordon@Sun.COM 
4097387SRobert.Gordon@Sun.COM 	if (rdma_buf_alloc(conn, clmsg)) {
4107387SRobert.Gordon@Sun.COM 		return (CLNT_RDMA_FAIL);
4117387SRobert.Gordon@Sun.COM 	}
4127387SRobert.Gordon@Sun.COM 
4137387SRobert.Gordon@Sun.COM 	*xdrs = &p->cku_outxdr;
4147387SRobert.Gordon@Sun.COM 	xdrmem_create(*xdrs, clmsg->addr, clmsg->len, XDR_ENCODE);
4157387SRobert.Gordon@Sun.COM 
4167387SRobert.Gordon@Sun.COM 	(*(uint32_t *)clmsg->addr) = p->cku_xid;
4177387SRobert.Gordon@Sun.COM 	XDR_SETPOS(*xdrs, sizeof (uint32_t));
4187387SRobert.Gordon@Sun.COM 	(void) xdr_u_int(*xdrs, &vers);
4197387SRobert.Gordon@Sun.COM 	(void) xdr_u_int(*xdrs, &rdma_credit);
4207387SRobert.Gordon@Sun.COM 	(void) xdr_u_int(*xdrs, op);
4217387SRobert.Gordon@Sun.COM 
4227387SRobert.Gordon@Sun.COM 	return (CLNT_RDMA_SUCCESS);
4237387SRobert.Gordon@Sun.COM }
4247387SRobert.Gordon@Sun.COM 
4257387SRobert.Gordon@Sun.COM /*
4267387SRobert.Gordon@Sun.COM  * If xp_cl is NULL value, then the RPC payload will NOT carry
4277387SRobert.Gordon@Sun.COM  * an RDMA READ chunk list, in this case we insert FALSE into
4287387SRobert.Gordon@Sun.COM  * the XDR stream. Otherwise we use the clist and RDMA register
4297387SRobert.Gordon@Sun.COM  * the memory and encode the clist into the outbound XDR stream.
4307387SRobert.Gordon@Sun.COM  */
4317387SRobert.Gordon@Sun.COM static int
4327387SRobert.Gordon@Sun.COM clnt_setup_rlist(CONN *conn, XDR *xdrs, XDR *call_xdrp)
4337387SRobert.Gordon@Sun.COM {
4347387SRobert.Gordon@Sun.COM 	int status;
4357387SRobert.Gordon@Sun.COM 	struct clist *rclp;
4367387SRobert.Gordon@Sun.COM 	int32_t xdr_flag = XDR_RDMA_RLIST_REG;
4377387SRobert.Gordon@Sun.COM 
4387387SRobert.Gordon@Sun.COM 	XDR_CONTROL(call_xdrp, XDR_RDMA_GET_RLIST, &rclp);
4397387SRobert.Gordon@Sun.COM 
4407387SRobert.Gordon@Sun.COM 	if (rclp != NULL) {
4417387SRobert.Gordon@Sun.COM 		status = clist_register(conn, rclp, CLIST_REG_SOURCE);
4427387SRobert.Gordon@Sun.COM 		if (status != RDMA_SUCCESS) {
4437387SRobert.Gordon@Sun.COM 			return (CLNT_RDMA_FAIL);
4447387SRobert.Gordon@Sun.COM 		}
4457387SRobert.Gordon@Sun.COM 		XDR_CONTROL(call_xdrp, XDR_RDMA_SET_FLAGS, &xdr_flag);
4467387SRobert.Gordon@Sun.COM 	}
4477387SRobert.Gordon@Sun.COM 	(void) xdr_do_clist(xdrs, &rclp);
4487387SRobert.Gordon@Sun.COM 
4497387SRobert.Gordon@Sun.COM 	return (CLNT_RDMA_SUCCESS);
4507387SRobert.Gordon@Sun.COM }
4517387SRobert.Gordon@Sun.COM 
4527387SRobert.Gordon@Sun.COM /*
4537387SRobert.Gordon@Sun.COM  * If xp_wcl is NULL value, then the RPC payload will NOT carry
4547387SRobert.Gordon@Sun.COM  * an RDMA WRITE chunk list, in this case we insert FALSE into
4557387SRobert.Gordon@Sun.COM  * the XDR stream. Otherwise we use the clist and  RDMA register
4567387SRobert.Gordon@Sun.COM  * the memory and encode the clist into the outbound XDR stream.
4577387SRobert.Gordon@Sun.COM  */
4587387SRobert.Gordon@Sun.COM static int
4597387SRobert.Gordon@Sun.COM clnt_setup_wlist(CONN *conn, XDR *xdrs, XDR *call_xdrp)
4607387SRobert.Gordon@Sun.COM {
4617387SRobert.Gordon@Sun.COM 	int status;
4627387SRobert.Gordon@Sun.COM 	struct clist *wlist;
4637387SRobert.Gordon@Sun.COM 	int32_t xdr_flag = XDR_RDMA_WLIST_REG;
4647387SRobert.Gordon@Sun.COM 
4657387SRobert.Gordon@Sun.COM 	XDR_CONTROL(call_xdrp, XDR_RDMA_GET_WLIST, &wlist);
4667387SRobert.Gordon@Sun.COM 
4677387SRobert.Gordon@Sun.COM 	if (wlist != NULL) {
4687387SRobert.Gordon@Sun.COM 		status = clist_register(conn, wlist, CLIST_REG_DST);
4697387SRobert.Gordon@Sun.COM 		if (status != RDMA_SUCCESS) {
4707387SRobert.Gordon@Sun.COM 			return (CLNT_RDMA_FAIL);
4717387SRobert.Gordon@Sun.COM 		}
4727387SRobert.Gordon@Sun.COM 		XDR_CONTROL(call_xdrp, XDR_RDMA_SET_FLAGS, &xdr_flag);
4737387SRobert.Gordon@Sun.COM 	}
4747387SRobert.Gordon@Sun.COM 
4757387SRobert.Gordon@Sun.COM 	if (!xdr_encode_wlist(xdrs, wlist))
4767387SRobert.Gordon@Sun.COM 		return (CLNT_RDMA_FAIL);
4777387SRobert.Gordon@Sun.COM 
4787387SRobert.Gordon@Sun.COM 	return (CLNT_RDMA_SUCCESS);
4797387SRobert.Gordon@Sun.COM }
4807387SRobert.Gordon@Sun.COM 
4817387SRobert.Gordon@Sun.COM static int
4827387SRobert.Gordon@Sun.COM clnt_setup_long_reply(CONN *conn, struct clist **clpp, uint_t length)
4837387SRobert.Gordon@Sun.COM {
4847387SRobert.Gordon@Sun.COM 	if (length == 0) {
4857387SRobert.Gordon@Sun.COM 		*clpp = NULL;
4867387SRobert.Gordon@Sun.COM 		return (CLNT_RDMA_SUCCESS);
4877387SRobert.Gordon@Sun.COM 	}
4887387SRobert.Gordon@Sun.COM 
4897387SRobert.Gordon@Sun.COM 	*clpp = clist_alloc();
4907387SRobert.Gordon@Sun.COM 
4917387SRobert.Gordon@Sun.COM 	(*clpp)->rb_longbuf.len = calc_length(length);
4927387SRobert.Gordon@Sun.COM 	(*clpp)->rb_longbuf.type = RDMA_LONG_BUFFER;
4937387SRobert.Gordon@Sun.COM 
4947387SRobert.Gordon@Sun.COM 	if (rdma_buf_alloc(conn, &((*clpp)->rb_longbuf))) {
4957387SRobert.Gordon@Sun.COM 		clist_free(*clpp);
4967387SRobert.Gordon@Sun.COM 		*clpp = NULL;
4977387SRobert.Gordon@Sun.COM 		return (CLNT_RDMA_FAIL);
4987387SRobert.Gordon@Sun.COM 	}
4997387SRobert.Gordon@Sun.COM 
5007387SRobert.Gordon@Sun.COM 	(*clpp)->u.c_daddr3 = (*clpp)->rb_longbuf.addr;
5017387SRobert.Gordon@Sun.COM 	(*clpp)->c_len = (*clpp)->rb_longbuf.len;
5027387SRobert.Gordon@Sun.COM 	(*clpp)->c_next = NULL;
5037387SRobert.Gordon@Sun.COM 	(*clpp)->c_dmemhandle = (*clpp)->rb_longbuf.handle;
5047387SRobert.Gordon@Sun.COM 
5057387SRobert.Gordon@Sun.COM 	if (clist_register(conn, *clpp, CLIST_REG_DST)) {
5067387SRobert.Gordon@Sun.COM 		DTRACE_PROBE(krpc__e__clntrdma__longrep_regbuf);
5077387SRobert.Gordon@Sun.COM 		rdma_buf_free(conn, &((*clpp)->rb_longbuf));
5087387SRobert.Gordon@Sun.COM 		clist_free(*clpp);
5097387SRobert.Gordon@Sun.COM 		return (CLNT_RDMA_FAIL);
5107387SRobert.Gordon@Sun.COM 	}
5117387SRobert.Gordon@Sun.COM 
5127387SRobert.Gordon@Sun.COM 	return (CLNT_RDMA_SUCCESS);
5137387SRobert.Gordon@Sun.COM }
5147387SRobert.Gordon@Sun.COM 
5150Sstevel@tonic-gate /* ARGSUSED */
5160Sstevel@tonic-gate static enum clnt_stat
5170Sstevel@tonic-gate clnt_rdma_kcallit(CLIENT *h, rpcproc_t procnum, xdrproc_t xdr_args,
5187387SRobert.Gordon@Sun.COM     caddr_t argsp, xdrproc_t xdr_results, caddr_t resultsp,
5197387SRobert.Gordon@Sun.COM     struct timeval wait)
5200Sstevel@tonic-gate {
5210Sstevel@tonic-gate 	cku_private_t *p = htop(h);
5227387SRobert.Gordon@Sun.COM 
5237387SRobert.Gordon@Sun.COM 	int 	try_call_again;
5247387SRobert.Gordon@Sun.COM 	int	refresh_attempt = AUTH_REFRESH_COUNT;
5250Sstevel@tonic-gate 	int 	status;
5267387SRobert.Gordon@Sun.COM 	int 	msglen;
5277387SRobert.Gordon@Sun.COM 
5287387SRobert.Gordon@Sun.COM 	XDR	*call_xdrp, callxdr; /* for xdrrdma encoding the RPC call */
5297387SRobert.Gordon@Sun.COM 	XDR	*reply_xdrp, replyxdr; /* for xdrrdma decoding the RPC reply */
5307387SRobert.Gordon@Sun.COM 	XDR 	*rdmahdr_o_xdrs, *rdmahdr_i_xdrs;
5317387SRobert.Gordon@Sun.COM 
5320Sstevel@tonic-gate 	struct rpc_msg 	reply_msg;
533*8695SRajkumar.Sivaprakasam@Sun.COM 	rdma_registry_t	*m;
5347387SRobert.Gordon@Sun.COM 
5357387SRobert.Gordon@Sun.COM 	struct clist *cl_sendlist;
5367387SRobert.Gordon@Sun.COM 	struct clist *cl_recvlist;
5377387SRobert.Gordon@Sun.COM 	struct clist *cl;
5387387SRobert.Gordon@Sun.COM 	struct clist *cl_rpcmsg;
5397387SRobert.Gordon@Sun.COM 	struct clist *cl_rdma_reply;
5407387SRobert.Gordon@Sun.COM 	struct clist *cl_rpcreply_wlist;
5417387SRobert.Gordon@Sun.COM 	struct clist *cl_long_reply;
5427387SRobert.Gordon@Sun.COM 
5437387SRobert.Gordon@Sun.COM 	uint_t vers;
5447387SRobert.Gordon@Sun.COM 	uint_t op;
5450Sstevel@tonic-gate 	uint_t off;
5467387SRobert.Gordon@Sun.COM 	uint32_t seg_array_len;
5477387SRobert.Gordon@Sun.COM 	uint_t long_reply_len;
5487387SRobert.Gordon@Sun.COM 	uint_t rpcsec_gss;
5497387SRobert.Gordon@Sun.COM 	uint_t gss_i_or_p;
5507387SRobert.Gordon@Sun.COM 
5510Sstevel@tonic-gate 	CONN *conn = NULL;
5527387SRobert.Gordon@Sun.COM 	rdma_buf_t clmsg;
5537387SRobert.Gordon@Sun.COM 	rdma_buf_t rpcmsg;
5547387SRobert.Gordon@Sun.COM 	rdma_chunkinfo_lengths_t rcil;
5557387SRobert.Gordon@Sun.COM 
5560Sstevel@tonic-gate 	clock_t	ticks;
5577387SRobert.Gordon@Sun.COM 	bool_t wlist_exists_reply;
5587387SRobert.Gordon@Sun.COM 
5597387SRobert.Gordon@Sun.COM 	uint32_t rdma_credit = rdma_bufs_rqst;
5600Sstevel@tonic-gate 
5610Sstevel@tonic-gate 	RCSTAT_INCR(rccalls);
5627387SRobert.Gordon@Sun.COM 
5637387SRobert.Gordon@Sun.COM call_again:
5647387SRobert.Gordon@Sun.COM 
5657387SRobert.Gordon@Sun.COM 	bzero(&clmsg, sizeof (clmsg));
5667387SRobert.Gordon@Sun.COM 	bzero(&rpcmsg, sizeof (rpcmsg));
5677387SRobert.Gordon@Sun.COM 	try_call_again = 0;
5687387SRobert.Gordon@Sun.COM 	cl_sendlist = NULL;
5697387SRobert.Gordon@Sun.COM 	cl_recvlist = NULL;
5707387SRobert.Gordon@Sun.COM 	cl = NULL;
5717387SRobert.Gordon@Sun.COM 	cl_rpcmsg = NULL;
5727387SRobert.Gordon@Sun.COM 	cl_rdma_reply = NULL;
5737387SRobert.Gordon@Sun.COM 	call_xdrp = NULL;
5747387SRobert.Gordon@Sun.COM 	reply_xdrp = NULL;
5757387SRobert.Gordon@Sun.COM 	wlist_exists_reply  = FALSE;
5767387SRobert.Gordon@Sun.COM 	cl_rpcreply_wlist = NULL;
5777387SRobert.Gordon@Sun.COM 	cl_long_reply = NULL;
5787387SRobert.Gordon@Sun.COM 	rcil.rcil_len = 0;
5797387SRobert.Gordon@Sun.COM 	rcil.rcil_len_alt = 0;
5807387SRobert.Gordon@Sun.COM 	long_reply_len = 0;
5817387SRobert.Gordon@Sun.COM 
582*8695SRajkumar.Sivaprakasam@Sun.COM 	rw_enter(&rdma_lock, RW_READER);
583*8695SRajkumar.Sivaprakasam@Sun.COM 	m = (rdma_registry_t *)p->cku_rd_handle;
584*8695SRajkumar.Sivaprakasam@Sun.COM 	if (m->r_mod_state == RDMA_MOD_INACTIVE) {
585*8695SRajkumar.Sivaprakasam@Sun.COM 		/*
586*8695SRajkumar.Sivaprakasam@Sun.COM 		 * If we didn't find a matching RDMA module in the registry
587*8695SRajkumar.Sivaprakasam@Sun.COM 		 * then there is no transport.
588*8695SRajkumar.Sivaprakasam@Sun.COM 		 */
589*8695SRajkumar.Sivaprakasam@Sun.COM 		rw_exit(&rdma_lock);
590*8695SRajkumar.Sivaprakasam@Sun.COM 		p->cku_err.re_status = RPC_CANTSEND;
591*8695SRajkumar.Sivaprakasam@Sun.COM 		p->cku_err.re_errno = EIO;
592*8695SRajkumar.Sivaprakasam@Sun.COM 		ticks = clnt_rdma_min_delay * drv_usectohz(1000000);
593*8695SRajkumar.Sivaprakasam@Sun.COM 		if (h->cl_nosignal == TRUE) {
594*8695SRajkumar.Sivaprakasam@Sun.COM 			delay(ticks);
595*8695SRajkumar.Sivaprakasam@Sun.COM 		} else {
596*8695SRajkumar.Sivaprakasam@Sun.COM 			if (delay_sig(ticks) == EINTR) {
597*8695SRajkumar.Sivaprakasam@Sun.COM 				p->cku_err.re_status = RPC_INTR;
598*8695SRajkumar.Sivaprakasam@Sun.COM 				p->cku_err.re_errno = EINTR;
599*8695SRajkumar.Sivaprakasam@Sun.COM 			}
600*8695SRajkumar.Sivaprakasam@Sun.COM 		}
601*8695SRajkumar.Sivaprakasam@Sun.COM 		return (RPC_CANTSEND);
602*8695SRajkumar.Sivaprakasam@Sun.COM 	}
6030Sstevel@tonic-gate 	/*
6040Sstevel@tonic-gate 	 * Get unique xid
6050Sstevel@tonic-gate 	 */
6060Sstevel@tonic-gate 	if (p->cku_xid == 0)
6070Sstevel@tonic-gate 		p->cku_xid = alloc_xid();
6080Sstevel@tonic-gate 
6090Sstevel@tonic-gate 	status = RDMA_GET_CONN(p->cku_rd_mod->rdma_ops, &p->cku_addr,
6100Sstevel@tonic-gate 	    p->cku_addrfmly, p->cku_rd_handle, &conn);
611*8695SRajkumar.Sivaprakasam@Sun.COM 	rw_exit(&rdma_lock);
6120Sstevel@tonic-gate 
6137387SRobert.Gordon@Sun.COM 	/*
6147387SRobert.Gordon@Sun.COM 	 * If there is a problem with the connection reflect the issue
6157387SRobert.Gordon@Sun.COM 	 * back to the higher level to address, we MAY delay for a short
6167387SRobert.Gordon@Sun.COM 	 * period so that we are kind to the transport.
6177387SRobert.Gordon@Sun.COM 	 */
6180Sstevel@tonic-gate 	if (conn == NULL) {
6190Sstevel@tonic-gate 		/*
6200Sstevel@tonic-gate 		 * Connect failed to server. Could be because of one
6210Sstevel@tonic-gate 		 * of several things. In some cases we don't want
6220Sstevel@tonic-gate 		 * the caller to retry immediately - delay before
6230Sstevel@tonic-gate 		 * returning to caller.
6240Sstevel@tonic-gate 		 */
6250Sstevel@tonic-gate 		switch (status) {
6260Sstevel@tonic-gate 		case RDMA_TIMEDOUT:
6270Sstevel@tonic-gate 			/*
6280Sstevel@tonic-gate 			 * Already timed out. No need to delay
6290Sstevel@tonic-gate 			 * some more.
6300Sstevel@tonic-gate 			 */
6310Sstevel@tonic-gate 			p->cku_err.re_status = RPC_TIMEDOUT;
6320Sstevel@tonic-gate 			p->cku_err.re_errno = ETIMEDOUT;
6330Sstevel@tonic-gate 			break;
6340Sstevel@tonic-gate 		case RDMA_INTR:
6350Sstevel@tonic-gate 			/*
6360Sstevel@tonic-gate 			 * Failed because of an signal. Very likely
6370Sstevel@tonic-gate 			 * the caller will not retry.
6380Sstevel@tonic-gate 			 */
6390Sstevel@tonic-gate 			p->cku_err.re_status = RPC_INTR;
6400Sstevel@tonic-gate 			p->cku_err.re_errno = EINTR;
6410Sstevel@tonic-gate 			break;
6420Sstevel@tonic-gate 		default:
6430Sstevel@tonic-gate 			/*
6440Sstevel@tonic-gate 			 * All other failures - server down or service
6450Sstevel@tonic-gate 			 * down or temporary resource failure. Delay before
6460Sstevel@tonic-gate 			 * returning to caller.
6470Sstevel@tonic-gate 			 */
6480Sstevel@tonic-gate 			ticks = clnt_rdma_min_delay * drv_usectohz(1000000);
6490Sstevel@tonic-gate 			p->cku_err.re_status = RPC_CANTCONNECT;
6500Sstevel@tonic-gate 			p->cku_err.re_errno = EIO;
6510Sstevel@tonic-gate 
6520Sstevel@tonic-gate 			if (h->cl_nosignal == TRUE) {
6530Sstevel@tonic-gate 				delay(ticks);
6540Sstevel@tonic-gate 			} else {
6550Sstevel@tonic-gate 				if (delay_sig(ticks) == EINTR) {
6560Sstevel@tonic-gate 					p->cku_err.re_status = RPC_INTR;
6570Sstevel@tonic-gate 					p->cku_err.re_errno = EINTR;
6580Sstevel@tonic-gate 				}
6590Sstevel@tonic-gate 			}
6600Sstevel@tonic-gate 			break;
6610Sstevel@tonic-gate 		}
6620Sstevel@tonic-gate 
6630Sstevel@tonic-gate 		return (p->cku_err.re_status);
6640Sstevel@tonic-gate 	}
6657387SRobert.Gordon@Sun.COM 
6667387SRobert.Gordon@Sun.COM 	clnt_check_credit(conn);
6670Sstevel@tonic-gate 
6687387SRobert.Gordon@Sun.COM 	status = CLNT_RDMA_FAIL;
6697387SRobert.Gordon@Sun.COM 
6707387SRobert.Gordon@Sun.COM 	rpcsec_gss = gss_i_or_p = FALSE;
6710Sstevel@tonic-gate 
6727387SRobert.Gordon@Sun.COM 	if (IS_RPCSEC_GSS(h)) {
6737387SRobert.Gordon@Sun.COM 		rpcsec_gss = TRUE;
6747387SRobert.Gordon@Sun.COM 		if (rpc_gss_get_service_type(h->cl_auth) ==
6757387SRobert.Gordon@Sun.COM 		    rpc_gss_svc_integrity ||
6767387SRobert.Gordon@Sun.COM 		    rpc_gss_get_service_type(h->cl_auth) ==
6777387SRobert.Gordon@Sun.COM 		    rpc_gss_svc_privacy)
6787387SRobert.Gordon@Sun.COM 			gss_i_or_p = TRUE;
6790Sstevel@tonic-gate 	}
6800Sstevel@tonic-gate 
6817387SRobert.Gordon@Sun.COM 	/*
6827387SRobert.Gordon@Sun.COM 	 * Try a regular RDMA message if RPCSEC_GSS is not being used
6837387SRobert.Gordon@Sun.COM 	 * or if RPCSEC_GSS is being used for authentication only.
6847387SRobert.Gordon@Sun.COM 	 */
6857387SRobert.Gordon@Sun.COM 	if (rpcsec_gss == FALSE ||
6867387SRobert.Gordon@Sun.COM 	    (rpcsec_gss == TRUE && gss_i_or_p == FALSE)) {
6870Sstevel@tonic-gate 		/*
6887387SRobert.Gordon@Sun.COM 		 * Grab a send buffer for the request.  Try to
6897387SRobert.Gordon@Sun.COM 		 * encode it to see if it fits. If not, then it
6907387SRobert.Gordon@Sun.COM 		 * needs to be sent in a chunk.
6910Sstevel@tonic-gate 		 */
6927387SRobert.Gordon@Sun.COM 		rpcmsg.type = SEND_BUFFER;
6937387SRobert.Gordon@Sun.COM 		if (rdma_buf_alloc(conn, &rpcmsg)) {
6947387SRobert.Gordon@Sun.COM 			DTRACE_PROBE(krpc__e__clntrdma__callit_nobufs);
6950Sstevel@tonic-gate 			goto done;
6960Sstevel@tonic-gate 		}
6977387SRobert.Gordon@Sun.COM 
6987387SRobert.Gordon@Sun.COM 		/* First try to encode into regular send buffer */
6997387SRobert.Gordon@Sun.COM 		op = RDMA_MSG;
7000Sstevel@tonic-gate 
7017387SRobert.Gordon@Sun.COM 		call_xdrp = &callxdr;
7027387SRobert.Gordon@Sun.COM 
7037387SRobert.Gordon@Sun.COM 		xdrrdma_create(call_xdrp, rpcmsg.addr, rpcmsg.len,
7047387SRobert.Gordon@Sun.COM 		    rdma_minchunk, NULL, XDR_ENCODE, conn);
7050Sstevel@tonic-gate 
7067387SRobert.Gordon@Sun.COM 		status = clnt_compose_rpcmsg(h, procnum, &rpcmsg, call_xdrp,
7077387SRobert.Gordon@Sun.COM 		    xdr_args, argsp);
7087387SRobert.Gordon@Sun.COM 
7097387SRobert.Gordon@Sun.COM 		if (status != CLNT_RDMA_SUCCESS) {
7107387SRobert.Gordon@Sun.COM 			/* Clean up from previous encode attempt */
7117387SRobert.Gordon@Sun.COM 			rdma_buf_free(conn, &rpcmsg);
7127387SRobert.Gordon@Sun.COM 			XDR_DESTROY(call_xdrp);
7130Sstevel@tonic-gate 		} else {
7147387SRobert.Gordon@Sun.COM 			XDR_CONTROL(call_xdrp, XDR_RDMA_GET_CHUNK_LEN, &rcil);
7150Sstevel@tonic-gate 		}
7160Sstevel@tonic-gate 	}
7170Sstevel@tonic-gate 
7187387SRobert.Gordon@Sun.COM 	/* If the encode didn't work, then try a NOMSG */
7197387SRobert.Gordon@Sun.COM 	if (status != CLNT_RDMA_SUCCESS) {
7207387SRobert.Gordon@Sun.COM 
7217387SRobert.Gordon@Sun.COM 		msglen = CKU_HDRSIZE + BYTES_PER_XDR_UNIT + MAX_AUTH_BYTES +
7227387SRobert.Gordon@Sun.COM 		    xdr_sizeof(xdr_args, argsp);
7237387SRobert.Gordon@Sun.COM 
7247387SRobert.Gordon@Sun.COM 		msglen = calc_length(msglen);
7257387SRobert.Gordon@Sun.COM 
7267387SRobert.Gordon@Sun.COM 		/* pick up the lengths for the reply buffer needed */
7277387SRobert.Gordon@Sun.COM 		(void) xdrrdma_sizeof(xdr_args, argsp, 0,
7287387SRobert.Gordon@Sun.COM 		    &rcil.rcil_len, &rcil.rcil_len_alt);
7297387SRobert.Gordon@Sun.COM 
7307387SRobert.Gordon@Sun.COM 		/*
7317387SRobert.Gordon@Sun.COM 		 * Construct a clist to describe the CHUNK_BUFFER
7327387SRobert.Gordon@Sun.COM 		 * for the rpcmsg.
7337387SRobert.Gordon@Sun.COM 		 */
7347387SRobert.Gordon@Sun.COM 		cl_rpcmsg = clist_alloc();
7357387SRobert.Gordon@Sun.COM 		cl_rpcmsg->c_len = msglen;
7367387SRobert.Gordon@Sun.COM 		cl_rpcmsg->rb_longbuf.type = RDMA_LONG_BUFFER;
7377387SRobert.Gordon@Sun.COM 		cl_rpcmsg->rb_longbuf.len = msglen;
7387387SRobert.Gordon@Sun.COM 		if (rdma_buf_alloc(conn, &cl_rpcmsg->rb_longbuf)) {
7397387SRobert.Gordon@Sun.COM 			clist_free(cl_rpcmsg);
7407387SRobert.Gordon@Sun.COM 			goto done;
7417387SRobert.Gordon@Sun.COM 		}
7427387SRobert.Gordon@Sun.COM 		cl_rpcmsg->w.c_saddr3 = cl_rpcmsg->rb_longbuf.addr;
7437387SRobert.Gordon@Sun.COM 
7447387SRobert.Gordon@Sun.COM 		op = RDMA_NOMSG;
7457387SRobert.Gordon@Sun.COM 		call_xdrp = &callxdr;
7467387SRobert.Gordon@Sun.COM 
7477387SRobert.Gordon@Sun.COM 		xdrrdma_create(call_xdrp, cl_rpcmsg->rb_longbuf.addr,
7487387SRobert.Gordon@Sun.COM 		    cl_rpcmsg->rb_longbuf.len, 0,
7497387SRobert.Gordon@Sun.COM 		    cl_rpcmsg, XDR_ENCODE, conn);
7507387SRobert.Gordon@Sun.COM 
7517387SRobert.Gordon@Sun.COM 		status = clnt_compose_rpcmsg(h, procnum, &rpcmsg, call_xdrp,
7527387SRobert.Gordon@Sun.COM 		    xdr_args, argsp);
7537387SRobert.Gordon@Sun.COM 
7547387SRobert.Gordon@Sun.COM 		if (status != CLNT_RDMA_SUCCESS) {
7557387SRobert.Gordon@Sun.COM 			p->cku_err.re_status = RPC_CANTENCODEARGS;
7567387SRobert.Gordon@Sun.COM 			p->cku_err.re_errno = EIO;
7577387SRobert.Gordon@Sun.COM 			DTRACE_PROBE(krpc__e__clntrdma__callit__composemsg);
7587387SRobert.Gordon@Sun.COM 			goto done;
7597387SRobert.Gordon@Sun.COM 		}
7607387SRobert.Gordon@Sun.COM 	}
7617387SRobert.Gordon@Sun.COM 
7627387SRobert.Gordon@Sun.COM 	/*
7637387SRobert.Gordon@Sun.COM 	 * During the XDR_ENCODE we may have "allocated" an RDMA READ or
7647387SRobert.Gordon@Sun.COM 	 * RDMA WRITE clist.
7657387SRobert.Gordon@Sun.COM 	 *
7667387SRobert.Gordon@Sun.COM 	 * First pull the RDMA READ chunk list from the XDR private
7677387SRobert.Gordon@Sun.COM 	 * area to keep it handy.
7687387SRobert.Gordon@Sun.COM 	 */
7697387SRobert.Gordon@Sun.COM 	XDR_CONTROL(call_xdrp, XDR_RDMA_GET_RLIST, &cl);
7707387SRobert.Gordon@Sun.COM 
7717387SRobert.Gordon@Sun.COM 	if (gss_i_or_p) {
7727387SRobert.Gordon@Sun.COM 		long_reply_len = rcil.rcil_len + rcil.rcil_len_alt;
7737387SRobert.Gordon@Sun.COM 		long_reply_len += MAX_AUTH_BYTES;
7747387SRobert.Gordon@Sun.COM 	} else {
7757387SRobert.Gordon@Sun.COM 		long_reply_len = rcil.rcil_len;
7767387SRobert.Gordon@Sun.COM 	}
7770Sstevel@tonic-gate 
7780Sstevel@tonic-gate 	/*
7790Sstevel@tonic-gate 	 * Update the chunk size information for the Long RPC msg.
7800Sstevel@tonic-gate 	 */
7810Sstevel@tonic-gate 	if (cl && op == RDMA_NOMSG)
7820Sstevel@tonic-gate 		cl->c_len = p->cku_outsz;
7830Sstevel@tonic-gate 
7840Sstevel@tonic-gate 	/*
7857387SRobert.Gordon@Sun.COM 	 * Prepare the RDMA header. On success xdrs will hold the result
7867387SRobert.Gordon@Sun.COM 	 * of xdrmem_create() for a SEND_BUFFER.
7870Sstevel@tonic-gate 	 */
7887387SRobert.Gordon@Sun.COM 	status = clnt_compose_rdma_header(conn, h, &clmsg,
7897387SRobert.Gordon@Sun.COM 	    &rdmahdr_o_xdrs, &op);
7907387SRobert.Gordon@Sun.COM 
7917387SRobert.Gordon@Sun.COM 	if (status != CLNT_RDMA_SUCCESS) {
7920Sstevel@tonic-gate 		p->cku_err.re_status = RPC_CANTSEND;
7930Sstevel@tonic-gate 		p->cku_err.re_errno = EIO;
7940Sstevel@tonic-gate 		RCSTAT_INCR(rcnomem);
7957387SRobert.Gordon@Sun.COM 		DTRACE_PROBE(krpc__e__clntrdma__callit__nobufs2);
7960Sstevel@tonic-gate 		goto done;
7970Sstevel@tonic-gate 	}
7987387SRobert.Gordon@Sun.COM 
7990Sstevel@tonic-gate 	/*
8007387SRobert.Gordon@Sun.COM 	 * Now insert the RDMA READ list iff present
8010Sstevel@tonic-gate 	 */
8027387SRobert.Gordon@Sun.COM 	status = clnt_setup_rlist(conn, rdmahdr_o_xdrs, call_xdrp);
8037387SRobert.Gordon@Sun.COM 	if (status != CLNT_RDMA_SUCCESS) {
8047387SRobert.Gordon@Sun.COM 		DTRACE_PROBE(krpc__e__clntrdma__callit__clistreg);
8057387SRobert.Gordon@Sun.COM 		rdma_buf_free(conn, &clmsg);
8067387SRobert.Gordon@Sun.COM 		p->cku_err.re_status = RPC_CANTSEND;
8077387SRobert.Gordon@Sun.COM 		p->cku_err.re_errno = EIO;
8087387SRobert.Gordon@Sun.COM 		goto done;
8097387SRobert.Gordon@Sun.COM 	}
8107387SRobert.Gordon@Sun.COM 
8117387SRobert.Gordon@Sun.COM 	/*
8127387SRobert.Gordon@Sun.COM 	 * Setup RDMA WRITE chunk list for nfs read operation
8137387SRobert.Gordon@Sun.COM 	 * other operations will have a NULL which will result
8147387SRobert.Gordon@Sun.COM 	 * as a NULL list in the XDR stream.
8157387SRobert.Gordon@Sun.COM 	 */
8167387SRobert.Gordon@Sun.COM 	status = clnt_setup_wlist(conn, rdmahdr_o_xdrs, call_xdrp);
8177387SRobert.Gordon@Sun.COM 	if (status != CLNT_RDMA_SUCCESS) {
8187387SRobert.Gordon@Sun.COM 		rdma_buf_free(conn, &clmsg);
8197387SRobert.Gordon@Sun.COM 		p->cku_err.re_status = RPC_CANTSEND;
8207387SRobert.Gordon@Sun.COM 		p->cku_err.re_errno = EIO;
8217387SRobert.Gordon@Sun.COM 		goto done;
8227387SRobert.Gordon@Sun.COM 	}
8230Sstevel@tonic-gate 
8240Sstevel@tonic-gate 	/*
8257387SRobert.Gordon@Sun.COM 	 * If NULL call and RPCSEC_GSS, provide a chunk such that
8267387SRobert.Gordon@Sun.COM 	 * large responses can flow back to the client.
8277387SRobert.Gordon@Sun.COM 	 * If RPCSEC_GSS with integrity or privacy is in use, get chunk.
8280Sstevel@tonic-gate 	 */
8297387SRobert.Gordon@Sun.COM 	if ((procnum == 0 && rpcsec_gss == TRUE) ||
8307387SRobert.Gordon@Sun.COM 	    (rpcsec_gss == TRUE && gss_i_or_p == TRUE))
8317387SRobert.Gordon@Sun.COM 		long_reply_len += 1024;
8320Sstevel@tonic-gate 
8337387SRobert.Gordon@Sun.COM 	status = clnt_setup_long_reply(conn, &cl_long_reply, long_reply_len);
8340Sstevel@tonic-gate 
8357387SRobert.Gordon@Sun.COM 	if (status != CLNT_RDMA_SUCCESS) {
8367387SRobert.Gordon@Sun.COM 		rdma_buf_free(conn, &clmsg);
8377387SRobert.Gordon@Sun.COM 		p->cku_err.re_status = RPC_CANTSEND;
8387387SRobert.Gordon@Sun.COM 		p->cku_err.re_errno = EIO;
8397387SRobert.Gordon@Sun.COM 		goto done;
8400Sstevel@tonic-gate 	}
8410Sstevel@tonic-gate 
8420Sstevel@tonic-gate 	/*
8437387SRobert.Gordon@Sun.COM 	 * XDR encode the RDMA_REPLY write chunk
8447387SRobert.Gordon@Sun.COM 	 */
8457387SRobert.Gordon@Sun.COM 	seg_array_len = (cl_long_reply ? 1 : 0);
8467387SRobert.Gordon@Sun.COM 	(void) xdr_encode_reply_wchunk(rdmahdr_o_xdrs, cl_long_reply,
8477387SRobert.Gordon@Sun.COM 	    seg_array_len);
8487387SRobert.Gordon@Sun.COM 
8497387SRobert.Gordon@Sun.COM 	/*
8507387SRobert.Gordon@Sun.COM 	 * Construct a clist in "sendlist" that represents what we
8517387SRobert.Gordon@Sun.COM 	 * will push over the wire.
8527387SRobert.Gordon@Sun.COM 	 *
8530Sstevel@tonic-gate 	 * Start with the RDMA header and clist (if any)
8540Sstevel@tonic-gate 	 */
8557387SRobert.Gordon@Sun.COM 	clist_add(&cl_sendlist, 0, XDR_GETPOS(rdmahdr_o_xdrs), &clmsg.handle,
8567387SRobert.Gordon@Sun.COM 	    clmsg.addr, NULL, NULL);
8570Sstevel@tonic-gate 
8580Sstevel@tonic-gate 	/*
8597387SRobert.Gordon@Sun.COM 	 * Put the RPC call message in  sendlist if small RPC
8600Sstevel@tonic-gate 	 */
8610Sstevel@tonic-gate 	if (op == RDMA_MSG) {
8627387SRobert.Gordon@Sun.COM 		clist_add(&cl_sendlist, 0, p->cku_outsz, &rpcmsg.handle,
8637387SRobert.Gordon@Sun.COM 		    rpcmsg.addr, NULL, NULL);
8640Sstevel@tonic-gate 	} else {
8650Sstevel@tonic-gate 		/* Long RPC already in chunk list */
8660Sstevel@tonic-gate 		RCSTAT_INCR(rclongrpcs);
8670Sstevel@tonic-gate 	}
8680Sstevel@tonic-gate 
8690Sstevel@tonic-gate 	/*
8700Sstevel@tonic-gate 	 * Set up a reply buffer ready for the reply
8710Sstevel@tonic-gate 	 */
8720Sstevel@tonic-gate 	status = rdma_clnt_postrecv(conn, p->cku_xid);
8730Sstevel@tonic-gate 	if (status != RDMA_SUCCESS) {
8740Sstevel@tonic-gate 		rdma_buf_free(conn, &clmsg);
8750Sstevel@tonic-gate 		p->cku_err.re_status = RPC_CANTSEND;
8760Sstevel@tonic-gate 		p->cku_err.re_errno = EIO;
8770Sstevel@tonic-gate 		goto done;
8780Sstevel@tonic-gate 	}
8797387SRobert.Gordon@Sun.COM 
8800Sstevel@tonic-gate 	/*
8810Sstevel@tonic-gate 	 * sync the memory for dma
8820Sstevel@tonic-gate 	 */
8830Sstevel@tonic-gate 	if (cl != NULL) {
8847387SRobert.Gordon@Sun.COM 		status = clist_syncmem(conn, cl, CLIST_REG_SOURCE);
8850Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
8867387SRobert.Gordon@Sun.COM 			(void) rdma_clnt_postrecv_remove(conn, p->cku_xid);
8870Sstevel@tonic-gate 			rdma_buf_free(conn, &clmsg);
8880Sstevel@tonic-gate 			p->cku_err.re_status = RPC_CANTSEND;
8890Sstevel@tonic-gate 			p->cku_err.re_errno = EIO;
8900Sstevel@tonic-gate 			goto done;
8910Sstevel@tonic-gate 		}
8920Sstevel@tonic-gate 	}
8930Sstevel@tonic-gate 
8940Sstevel@tonic-gate 	/*
8957387SRobert.Gordon@Sun.COM 	 * Send the RDMA Header and RPC call message to the server
8960Sstevel@tonic-gate 	 */
8977387SRobert.Gordon@Sun.COM 	status = RDMA_SEND(conn, cl_sendlist, p->cku_xid);
8980Sstevel@tonic-gate 	if (status != RDMA_SUCCESS) {
8997387SRobert.Gordon@Sun.COM 		(void) rdma_clnt_postrecv_remove(conn, p->cku_xid);
9000Sstevel@tonic-gate 		p->cku_err.re_status = RPC_CANTSEND;
9010Sstevel@tonic-gate 		p->cku_err.re_errno = EIO;
9020Sstevel@tonic-gate 		goto done;
9030Sstevel@tonic-gate 	}
9047387SRobert.Gordon@Sun.COM 
9057387SRobert.Gordon@Sun.COM 	/*
9067387SRobert.Gordon@Sun.COM 	 * RDMA plugin now owns the send msg buffers.
9077387SRobert.Gordon@Sun.COM 	 * Clear them out and don't free them.
9087387SRobert.Gordon@Sun.COM 	 */
9097387SRobert.Gordon@Sun.COM 	clmsg.addr = NULL;
9107387SRobert.Gordon@Sun.COM 	if (rpcmsg.type == SEND_BUFFER)
9117387SRobert.Gordon@Sun.COM 		rpcmsg.addr = NULL;
9120Sstevel@tonic-gate 
9130Sstevel@tonic-gate 	/*
9140Sstevel@tonic-gate 	 * Recv rpc reply
9150Sstevel@tonic-gate 	 */
9167387SRobert.Gordon@Sun.COM 	status = RDMA_RECV(conn, &cl_recvlist, p->cku_xid);
9170Sstevel@tonic-gate 
9180Sstevel@tonic-gate 	/*
9190Sstevel@tonic-gate 	 * Now check recv status
9200Sstevel@tonic-gate 	 */
9210Sstevel@tonic-gate 	if (status != 0) {
9220Sstevel@tonic-gate 		if (status == RDMA_INTR) {
9230Sstevel@tonic-gate 			p->cku_err.re_status = RPC_INTR;
9240Sstevel@tonic-gate 			p->cku_err.re_errno = EINTR;
9250Sstevel@tonic-gate 			RCSTAT_INCR(rcintrs);
9260Sstevel@tonic-gate 		} else if (status == RPC_TIMEDOUT) {
9270Sstevel@tonic-gate 			p->cku_err.re_status = RPC_TIMEDOUT;
9280Sstevel@tonic-gate 			p->cku_err.re_errno = ETIMEDOUT;
9290Sstevel@tonic-gate 			RCSTAT_INCR(rctimeouts);
9300Sstevel@tonic-gate 		} else {
9310Sstevel@tonic-gate 			p->cku_err.re_status = RPC_CANTRECV;
9320Sstevel@tonic-gate 			p->cku_err.re_errno = EIO;
9330Sstevel@tonic-gate 		}
9340Sstevel@tonic-gate 		goto done;
9350Sstevel@tonic-gate 	}
9367387SRobert.Gordon@Sun.COM 
9370Sstevel@tonic-gate 	/*
9380Sstevel@tonic-gate 	 * Process the reply message.
9390Sstevel@tonic-gate 	 *
9400Sstevel@tonic-gate 	 * First the chunk list (if any)
9410Sstevel@tonic-gate 	 */
9427387SRobert.Gordon@Sun.COM 	rdmahdr_i_xdrs = &(p->cku_inxdr);
9437387SRobert.Gordon@Sun.COM 	xdrmem_create(rdmahdr_i_xdrs,
9447387SRobert.Gordon@Sun.COM 	    (caddr_t)(uintptr_t)cl_recvlist->w.c_saddr3,
9457387SRobert.Gordon@Sun.COM 	    cl_recvlist->c_len, XDR_DECODE);
9467387SRobert.Gordon@Sun.COM 
9470Sstevel@tonic-gate 	/*
9480Sstevel@tonic-gate 	 * Treat xid as opaque (xid is the first entity
9490Sstevel@tonic-gate 	 * in the rpc rdma message).
9507387SRobert.Gordon@Sun.COM 	 * Skip xid and set the xdr position accordingly.
9510Sstevel@tonic-gate 	 */
9527387SRobert.Gordon@Sun.COM 	XDR_SETPOS(rdmahdr_i_xdrs, sizeof (uint32_t));
9537387SRobert.Gordon@Sun.COM 	(void) xdr_u_int(rdmahdr_i_xdrs, &vers);
9547387SRobert.Gordon@Sun.COM 	(void) xdr_u_int(rdmahdr_i_xdrs, &rdma_credit);
9557387SRobert.Gordon@Sun.COM 	(void) xdr_u_int(rdmahdr_i_xdrs, &op);
9567387SRobert.Gordon@Sun.COM 	(void) xdr_do_clist(rdmahdr_i_xdrs, &cl);
9577387SRobert.Gordon@Sun.COM 
9587387SRobert.Gordon@Sun.COM 	clnt_update_credit(conn, rdma_credit);
9597387SRobert.Gordon@Sun.COM 
9607387SRobert.Gordon@Sun.COM 	wlist_exists_reply = FALSE;
9617387SRobert.Gordon@Sun.COM 	if (! xdr_decode_wlist(rdmahdr_i_xdrs, &cl_rpcreply_wlist,
9627387SRobert.Gordon@Sun.COM 	    &wlist_exists_reply)) {
9637387SRobert.Gordon@Sun.COM 		DTRACE_PROBE(krpc__e__clntrdma__callit__wlist_decode);
9647387SRobert.Gordon@Sun.COM 		p->cku_err.re_status = RPC_CANTDECODERES;
9657387SRobert.Gordon@Sun.COM 		p->cku_err.re_errno = EIO;
9667387SRobert.Gordon@Sun.COM 		goto done;
9677387SRobert.Gordon@Sun.COM 	}
9680Sstevel@tonic-gate 
9690Sstevel@tonic-gate 	/*
9707387SRobert.Gordon@Sun.COM 	 * The server shouldn't have sent a RDMA_SEND that
9717387SRobert.Gordon@Sun.COM 	 * the client needs to RDMA_WRITE a reply back to
9727387SRobert.Gordon@Sun.COM 	 * the server.  So silently ignoring what the
9737387SRobert.Gordon@Sun.COM 	 * server returns in the rdma_reply section of the
9747387SRobert.Gordon@Sun.COM 	 * header.
9750Sstevel@tonic-gate 	 */
9767387SRobert.Gordon@Sun.COM 	(void) xdr_decode_reply_wchunk(rdmahdr_i_xdrs, &cl_rdma_reply);
9777387SRobert.Gordon@Sun.COM 	off = xdr_getpos(rdmahdr_i_xdrs);
9780Sstevel@tonic-gate 
9797387SRobert.Gordon@Sun.COM 	clnt_decode_long_reply(conn, cl_long_reply,
9807387SRobert.Gordon@Sun.COM 	    cl_rdma_reply, &replyxdr, &reply_xdrp,
9817387SRobert.Gordon@Sun.COM 	    cl, cl_recvlist, op, off);
9820Sstevel@tonic-gate 
9837387SRobert.Gordon@Sun.COM 	if (reply_xdrp == NULL)
9847387SRobert.Gordon@Sun.COM 		goto done;
9850Sstevel@tonic-gate 
9867387SRobert.Gordon@Sun.COM 	if (wlist_exists_reply) {
9877387SRobert.Gordon@Sun.COM 		XDR_CONTROL(reply_xdrp, XDR_RDMA_SET_WLIST, cl_rpcreply_wlist);
9880Sstevel@tonic-gate 	}
9890Sstevel@tonic-gate 
9900Sstevel@tonic-gate 	reply_msg.rm_direction = REPLY;
9910Sstevel@tonic-gate 	reply_msg.rm_reply.rp_stat = MSG_ACCEPTED;
9920Sstevel@tonic-gate 	reply_msg.acpted_rply.ar_stat = SUCCESS;
9930Sstevel@tonic-gate 	reply_msg.acpted_rply.ar_verf = _null_auth;
9947387SRobert.Gordon@Sun.COM 
9950Sstevel@tonic-gate 	/*
9960Sstevel@tonic-gate 	 *  xdr_results will be done in AUTH_UNWRAP.
9970Sstevel@tonic-gate 	 */
9980Sstevel@tonic-gate 	reply_msg.acpted_rply.ar_results.where = NULL;
9990Sstevel@tonic-gate 	reply_msg.acpted_rply.ar_results.proc = xdr_void;
10000Sstevel@tonic-gate 
10010Sstevel@tonic-gate 	/*
10020Sstevel@tonic-gate 	 * Decode and validate the response.
10030Sstevel@tonic-gate 	 */
10047387SRobert.Gordon@Sun.COM 	if (xdr_replymsg(reply_xdrp, &reply_msg)) {
10050Sstevel@tonic-gate 		enum clnt_stat re_status;
10060Sstevel@tonic-gate 
10070Sstevel@tonic-gate 		_seterr_reply(&reply_msg, &(p->cku_err));
10080Sstevel@tonic-gate 
10090Sstevel@tonic-gate 		re_status = p->cku_err.re_status;
10100Sstevel@tonic-gate 		if (re_status == RPC_SUCCESS) {
10110Sstevel@tonic-gate 			/*
10120Sstevel@tonic-gate 			 * Reply is good, check auth.
10130Sstevel@tonic-gate 			 */
10140Sstevel@tonic-gate 			if (!AUTH_VALIDATE(h->cl_auth,
10150Sstevel@tonic-gate 			    &reply_msg.acpted_rply.ar_verf)) {
10160Sstevel@tonic-gate 				p->cku_err.re_status = RPC_AUTHERROR;
10170Sstevel@tonic-gate 				p->cku_err.re_why = AUTH_INVALIDRESP;
10180Sstevel@tonic-gate 				RCSTAT_INCR(rcbadverfs);
10197387SRobert.Gordon@Sun.COM 				DTRACE_PROBE(
10207387SRobert.Gordon@Sun.COM 				    krpc__e__clntrdma__callit__authvalidate);
10217387SRobert.Gordon@Sun.COM 			} else if (!AUTH_UNWRAP(h->cl_auth, reply_xdrp,
10220Sstevel@tonic-gate 			    xdr_results, resultsp)) {
10230Sstevel@tonic-gate 				p->cku_err.re_status = RPC_CANTDECODERES;
10240Sstevel@tonic-gate 				p->cku_err.re_errno = EIO;
10257387SRobert.Gordon@Sun.COM 				DTRACE_PROBE(
10267387SRobert.Gordon@Sun.COM 				    krpc__e__clntrdma__callit__authunwrap);
10270Sstevel@tonic-gate 			}
10280Sstevel@tonic-gate 		} else {
10290Sstevel@tonic-gate 			/* set errno in case we can't recover */
10300Sstevel@tonic-gate 			if (re_status != RPC_VERSMISMATCH &&
10310Sstevel@tonic-gate 			    re_status != RPC_AUTHERROR &&
10320Sstevel@tonic-gate 			    re_status != RPC_PROGVERSMISMATCH)
10330Sstevel@tonic-gate 				p->cku_err.re_errno = EIO;
10340Sstevel@tonic-gate 
10350Sstevel@tonic-gate 			if (re_status == RPC_AUTHERROR) {
10367387SRobert.Gordon@Sun.COM 				if ((refresh_attempt > 0) &&
10377387SRobert.Gordon@Sun.COM 				    AUTH_REFRESH(h->cl_auth, &reply_msg,
10387387SRobert.Gordon@Sun.COM 				    p->cku_cred)) {
10397387SRobert.Gordon@Sun.COM 					refresh_attempt--;
10407387SRobert.Gordon@Sun.COM 					try_call_again = 1;
10417387SRobert.Gordon@Sun.COM 					goto done;
10427387SRobert.Gordon@Sun.COM 				}
10437387SRobert.Gordon@Sun.COM 
10447387SRobert.Gordon@Sun.COM 				try_call_again = 0;
10457387SRobert.Gordon@Sun.COM 
10467387SRobert.Gordon@Sun.COM 				/*
10477387SRobert.Gordon@Sun.COM 				 * We have used the client handle to
10487387SRobert.Gordon@Sun.COM 				 * do an AUTH_REFRESH and the RPC status may
10497387SRobert.Gordon@Sun.COM 				 * be set to RPC_SUCCESS; Let's make sure to
10507387SRobert.Gordon@Sun.COM 				 * set it to RPC_AUTHERROR.
10517387SRobert.Gordon@Sun.COM 				 */
10527387SRobert.Gordon@Sun.COM 				p->cku_err.re_status = RPC_AUTHERROR;
10537387SRobert.Gordon@Sun.COM 
10540Sstevel@tonic-gate 				/*
10550Sstevel@tonic-gate 				 * Map recoverable and unrecoverable
10560Sstevel@tonic-gate 				 * authentication errors to appropriate
10570Sstevel@tonic-gate 				 * errno
10580Sstevel@tonic-gate 				 */
10590Sstevel@tonic-gate 				switch (p->cku_err.re_why) {
10600Sstevel@tonic-gate 				case AUTH_BADCRED:
10610Sstevel@tonic-gate 				case AUTH_BADVERF:
10620Sstevel@tonic-gate 				case AUTH_INVALIDRESP:
10630Sstevel@tonic-gate 				case AUTH_TOOWEAK:
10640Sstevel@tonic-gate 				case AUTH_FAILED:
10650Sstevel@tonic-gate 				case RPCSEC_GSS_NOCRED:
10660Sstevel@tonic-gate 				case RPCSEC_GSS_FAILED:
10670Sstevel@tonic-gate 					p->cku_err.re_errno = EACCES;
10680Sstevel@tonic-gate 					break;
10690Sstevel@tonic-gate 				case AUTH_REJECTEDCRED:
10700Sstevel@tonic-gate 				case AUTH_REJECTEDVERF:
10710Sstevel@tonic-gate 				default:
10720Sstevel@tonic-gate 					p->cku_err.re_errno = EIO;
10730Sstevel@tonic-gate 					break;
10740Sstevel@tonic-gate 				}
10750Sstevel@tonic-gate 			}
10767387SRobert.Gordon@Sun.COM 			DTRACE_PROBE1(krpc__e__clntrdma__callit__rpcfailed,
10777387SRobert.Gordon@Sun.COM 			    int, p->cku_err.re_why);
10780Sstevel@tonic-gate 		}
10790Sstevel@tonic-gate 	} else {
10800Sstevel@tonic-gate 		p->cku_err.re_status = RPC_CANTDECODERES;
10810Sstevel@tonic-gate 		p->cku_err.re_errno = EIO;
10827387SRobert.Gordon@Sun.COM 		DTRACE_PROBE(krpc__e__clntrdma__callit__replymsg);
10830Sstevel@tonic-gate 	}
10840Sstevel@tonic-gate 
10857387SRobert.Gordon@Sun.COM done:
10867387SRobert.Gordon@Sun.COM 	clnt_return_credit(conn);
10877387SRobert.Gordon@Sun.COM 
10887387SRobert.Gordon@Sun.COM 	if (cl_sendlist != NULL)
10897387SRobert.Gordon@Sun.COM 		clist_free(cl_sendlist);
10907387SRobert.Gordon@Sun.COM 
10910Sstevel@tonic-gate 	/*
10920Sstevel@tonic-gate 	 * If rpc reply is in a chunk, free it now.
10930Sstevel@tonic-gate 	 */
10947387SRobert.Gordon@Sun.COM 	if (cl_long_reply) {
10957387SRobert.Gordon@Sun.COM 		(void) clist_deregister(conn, cl_long_reply, CLIST_REG_DST);
10967387SRobert.Gordon@Sun.COM 		rdma_buf_free(conn, &cl_long_reply->rb_longbuf);
10977387SRobert.Gordon@Sun.COM 		clist_free(cl_long_reply);
10987387SRobert.Gordon@Sun.COM 	}
10990Sstevel@tonic-gate 
11007387SRobert.Gordon@Sun.COM 	if (call_xdrp)
11017387SRobert.Gordon@Sun.COM 		XDR_DESTROY(call_xdrp);
11020Sstevel@tonic-gate 
11037387SRobert.Gordon@Sun.COM 	if (reply_xdrp) {
11047387SRobert.Gordon@Sun.COM 		(void) xdr_rpc_free_verifier(reply_xdrp, &reply_msg);
11057387SRobert.Gordon@Sun.COM 		XDR_DESTROY(reply_xdrp);
11060Sstevel@tonic-gate 	}
11070Sstevel@tonic-gate 
11087387SRobert.Gordon@Sun.COM 	if (cl_rdma_reply) {
11097387SRobert.Gordon@Sun.COM 		clist_free(cl_rdma_reply);
11100Sstevel@tonic-gate 	}
11110Sstevel@tonic-gate 
11127387SRobert.Gordon@Sun.COM 	if (cl_recvlist) {
11137387SRobert.Gordon@Sun.COM 		rdma_buf_t	recvmsg = {0};
11147387SRobert.Gordon@Sun.COM 		recvmsg.addr = (caddr_t)(uintptr_t)cl_recvlist->w.c_saddr3;
11150Sstevel@tonic-gate 		recvmsg.type = RECV_BUFFER;
11160Sstevel@tonic-gate 		RDMA_BUF_FREE(conn, &recvmsg);
11177387SRobert.Gordon@Sun.COM 		clist_free(cl_recvlist);
11180Sstevel@tonic-gate 	}
11197387SRobert.Gordon@Sun.COM 
11200Sstevel@tonic-gate 	RDMA_REL_CONN(conn);
11217387SRobert.Gordon@Sun.COM 
11227387SRobert.Gordon@Sun.COM 	if (try_call_again)
11237387SRobert.Gordon@Sun.COM 		goto call_again;
11247387SRobert.Gordon@Sun.COM 
11250Sstevel@tonic-gate 	if (p->cku_err.re_status != RPC_SUCCESS) {
11260Sstevel@tonic-gate 		RCSTAT_INCR(rcbadcalls);
11270Sstevel@tonic-gate 	}
11280Sstevel@tonic-gate 	return (p->cku_err.re_status);
11290Sstevel@tonic-gate }
11300Sstevel@tonic-gate 
11317387SRobert.Gordon@Sun.COM 
11327387SRobert.Gordon@Sun.COM static void
11337387SRobert.Gordon@Sun.COM clnt_decode_long_reply(CONN *conn,
11347387SRobert.Gordon@Sun.COM     struct clist *cl_long_reply,
11357387SRobert.Gordon@Sun.COM     struct clist *cl_rdma_reply, XDR *xdrs,
11367387SRobert.Gordon@Sun.COM     XDR **rxdrp, struct clist *cl,
11377387SRobert.Gordon@Sun.COM     struct clist *cl_recvlist,
11387387SRobert.Gordon@Sun.COM     uint_t  op, uint_t off)
11397387SRobert.Gordon@Sun.COM {
11407387SRobert.Gordon@Sun.COM 	if (op != RDMA_NOMSG) {
11417387SRobert.Gordon@Sun.COM 		DTRACE_PROBE1(krpc__i__longrepl__rdmamsg__len,
11427387SRobert.Gordon@Sun.COM 		    int, cl_recvlist->c_len - off);
11437387SRobert.Gordon@Sun.COM 		xdrrdma_create(xdrs,
11447387SRobert.Gordon@Sun.COM 		    (caddr_t)(uintptr_t)(cl_recvlist->w.c_saddr3 + off),
11457387SRobert.Gordon@Sun.COM 		    cl_recvlist->c_len - off, 0, cl, XDR_DECODE, conn);
11467387SRobert.Gordon@Sun.COM 		*rxdrp = xdrs;
11477387SRobert.Gordon@Sun.COM 		return;
11487387SRobert.Gordon@Sun.COM 	}
11497387SRobert.Gordon@Sun.COM 
11507387SRobert.Gordon@Sun.COM 	/* op must be RDMA_NOMSG */
11517387SRobert.Gordon@Sun.COM 	if (cl) {
11527387SRobert.Gordon@Sun.COM 		DTRACE_PROBE(krpc__e__clntrdma__declongreply__serverreadlist);
11537387SRobert.Gordon@Sun.COM 		return;
11547387SRobert.Gordon@Sun.COM 	}
11557387SRobert.Gordon@Sun.COM 
11567387SRobert.Gordon@Sun.COM 	if (cl_long_reply->u.c_daddr) {
11577387SRobert.Gordon@Sun.COM 		DTRACE_PROBE1(krpc__i__longrepl__rdmanomsg__len,
11587387SRobert.Gordon@Sun.COM 		    int, cl_rdma_reply->c_len);
11597387SRobert.Gordon@Sun.COM 
11607387SRobert.Gordon@Sun.COM 		xdrrdma_create(xdrs, (caddr_t)cl_long_reply->u.c_daddr3,
11617387SRobert.Gordon@Sun.COM 		    cl_rdma_reply->c_len, 0, NULL, XDR_DECODE, conn);
11627387SRobert.Gordon@Sun.COM 
11637387SRobert.Gordon@Sun.COM 		*rxdrp = xdrs;
11647387SRobert.Gordon@Sun.COM 	}
11657387SRobert.Gordon@Sun.COM }
11667387SRobert.Gordon@Sun.COM 
11677387SRobert.Gordon@Sun.COM static void
11687387SRobert.Gordon@Sun.COM clnt_return_credit(CONN *conn)
11697387SRobert.Gordon@Sun.COM {
11707387SRobert.Gordon@Sun.COM 	rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
11717387SRobert.Gordon@Sun.COM 
11727387SRobert.Gordon@Sun.COM 	mutex_enter(&conn->c_lock);
11737387SRobert.Gordon@Sun.COM 	cc_info->clnt_cc_in_flight_ops--;
11747387SRobert.Gordon@Sun.COM 	cv_signal(&cc_info->clnt_cc_cv);
11757387SRobert.Gordon@Sun.COM 	mutex_exit(&conn->c_lock);
11767387SRobert.Gordon@Sun.COM }
11777387SRobert.Gordon@Sun.COM 
11787387SRobert.Gordon@Sun.COM static void
11797387SRobert.Gordon@Sun.COM clnt_update_credit(CONN *conn, uint32_t rdma_credit)
11807387SRobert.Gordon@Sun.COM {
11817387SRobert.Gordon@Sun.COM 	rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
11827387SRobert.Gordon@Sun.COM 
11837387SRobert.Gordon@Sun.COM 	/*
11847387SRobert.Gordon@Sun.COM 	 * If the granted has not altered, avoid taking the
11857387SRobert.Gordon@Sun.COM 	 * mutex, to essentially do nothing..
11867387SRobert.Gordon@Sun.COM 	 */
11877387SRobert.Gordon@Sun.COM 	if (cc_info->clnt_cc_granted_ops == rdma_credit)
11887387SRobert.Gordon@Sun.COM 		return;
11897387SRobert.Gordon@Sun.COM 	/*
11907387SRobert.Gordon@Sun.COM 	 * Get the granted number of buffers for credit control.
11917387SRobert.Gordon@Sun.COM 	 */
11927387SRobert.Gordon@Sun.COM 	mutex_enter(&conn->c_lock);
11937387SRobert.Gordon@Sun.COM 	cc_info->clnt_cc_granted_ops = rdma_credit;
11947387SRobert.Gordon@Sun.COM 	mutex_exit(&conn->c_lock);
11957387SRobert.Gordon@Sun.COM }
11967387SRobert.Gordon@Sun.COM 
11977387SRobert.Gordon@Sun.COM static void
11987387SRobert.Gordon@Sun.COM clnt_check_credit(CONN *conn)
11997387SRobert.Gordon@Sun.COM {
12007387SRobert.Gordon@Sun.COM 	rdma_clnt_cred_ctrl_t *cc_info = &conn->rdma_conn_cred_ctrl_u.c_clnt_cc;
12017387SRobert.Gordon@Sun.COM 
12027387SRobert.Gordon@Sun.COM 	/*
12037387SRobert.Gordon@Sun.COM 	 * Make sure we are not going over our allowed buffer use
12047387SRobert.Gordon@Sun.COM 	 * (and make sure we have gotten a granted value before).
12057387SRobert.Gordon@Sun.COM 	 */
12067387SRobert.Gordon@Sun.COM 	mutex_enter(&conn->c_lock);
12077387SRobert.Gordon@Sun.COM 	while (cc_info->clnt_cc_in_flight_ops >= cc_info->clnt_cc_granted_ops &&
12087387SRobert.Gordon@Sun.COM 	    cc_info->clnt_cc_granted_ops != 0) {
12097387SRobert.Gordon@Sun.COM 		/*
12107387SRobert.Gordon@Sun.COM 		 * Client has maxed out its granted buffers due to
12117387SRobert.Gordon@Sun.COM 		 * credit control.  Current handling is to block and wait.
12127387SRobert.Gordon@Sun.COM 		 */
12137387SRobert.Gordon@Sun.COM 		cv_wait(&cc_info->clnt_cc_cv, &conn->c_lock);
12147387SRobert.Gordon@Sun.COM 	}
12157387SRobert.Gordon@Sun.COM 	cc_info->clnt_cc_in_flight_ops++;
12167387SRobert.Gordon@Sun.COM 	mutex_exit(&conn->c_lock);
12177387SRobert.Gordon@Sun.COM }
12187387SRobert.Gordon@Sun.COM 
12190Sstevel@tonic-gate /* ARGSUSED */
12200Sstevel@tonic-gate static void
12210Sstevel@tonic-gate clnt_rdma_kabort(CLIENT *h)
12220Sstevel@tonic-gate {
12230Sstevel@tonic-gate }
12240Sstevel@tonic-gate 
12250Sstevel@tonic-gate static void
12260Sstevel@tonic-gate clnt_rdma_kerror(CLIENT *h, struct rpc_err *err)
12270Sstevel@tonic-gate {
12280Sstevel@tonic-gate 	struct cku_private *p = htop(h);
12290Sstevel@tonic-gate 	*err = p->cku_err;
12300Sstevel@tonic-gate }
12310Sstevel@tonic-gate 
12320Sstevel@tonic-gate static bool_t
12330Sstevel@tonic-gate clnt_rdma_kfreeres(CLIENT *h, xdrproc_t xdr_res, caddr_t res_ptr)
12340Sstevel@tonic-gate {
12350Sstevel@tonic-gate 	struct cku_private *p = htop(h);
12360Sstevel@tonic-gate 	XDR *xdrs;
12370Sstevel@tonic-gate 
12380Sstevel@tonic-gate 	xdrs = &(p->cku_outxdr);
12390Sstevel@tonic-gate 	xdrs->x_op = XDR_FREE;
12400Sstevel@tonic-gate 	return ((*xdr_res)(xdrs, res_ptr));
12410Sstevel@tonic-gate }
12420Sstevel@tonic-gate 
12430Sstevel@tonic-gate /* ARGSUSED */
12440Sstevel@tonic-gate static bool_t
12450Sstevel@tonic-gate clnt_rdma_kcontrol(CLIENT *h, int cmd, char *arg)
12460Sstevel@tonic-gate {
12470Sstevel@tonic-gate 	return (TRUE);
12480Sstevel@tonic-gate }
12490Sstevel@tonic-gate 
12500Sstevel@tonic-gate /* ARGSUSED */
12510Sstevel@tonic-gate static int
12520Sstevel@tonic-gate clnt_rdma_ksettimers(CLIENT *h, struct rpc_timers *t, struct rpc_timers *all,
12530Sstevel@tonic-gate 	int minimum, void(*feedback)(int, int, caddr_t), caddr_t arg,
12540Sstevel@tonic-gate 	uint32_t xid)
12550Sstevel@tonic-gate {
12560Sstevel@tonic-gate 	RCSTAT_INCR(rctimers);
12570Sstevel@tonic-gate 	return (0);
12580Sstevel@tonic-gate }
12590Sstevel@tonic-gate 
12600Sstevel@tonic-gate int
12610Sstevel@tonic-gate rdma_reachable(int addr_type, struct netbuf *addr, struct knetconfig **knconf)
12620Sstevel@tonic-gate {
12630Sstevel@tonic-gate 	rdma_registry_t	*rp;
12640Sstevel@tonic-gate 	void *handle = NULL;
12650Sstevel@tonic-gate 	struct knetconfig *knc;
12660Sstevel@tonic-gate 	char *pf, *p;
12670Sstevel@tonic-gate 	rdma_stat status;
12680Sstevel@tonic-gate 	int error = 0;
12690Sstevel@tonic-gate 
12700Sstevel@tonic-gate 	if (!INGLOBALZONE(curproc))
12710Sstevel@tonic-gate 		return (-1);
12727387SRobert.Gordon@Sun.COM 
12730Sstevel@tonic-gate 	/*
12740Sstevel@tonic-gate 	 * modload the RDMA plugins if not already done.
12750Sstevel@tonic-gate 	 */
12760Sstevel@tonic-gate 	if (!rdma_modloaded) {
12770Sstevel@tonic-gate 		mutex_enter(&rdma_modload_lock);
12780Sstevel@tonic-gate 		if (!rdma_modloaded) {
12790Sstevel@tonic-gate 			error = rdma_modload();
12800Sstevel@tonic-gate 		}
12810Sstevel@tonic-gate 		mutex_exit(&rdma_modload_lock);
12820Sstevel@tonic-gate 		if (error)
12830Sstevel@tonic-gate 			return (-1);
12840Sstevel@tonic-gate 	}
12850Sstevel@tonic-gate 
12860Sstevel@tonic-gate 	if (!rdma_dev_available)
12870Sstevel@tonic-gate 		return (-1);
12880Sstevel@tonic-gate 
12890Sstevel@tonic-gate 	rw_enter(&rdma_lock, RW_READER);
12900Sstevel@tonic-gate 	rp = rdma_mod_head;
12910Sstevel@tonic-gate 	while (rp != NULL) {
1292*8695SRajkumar.Sivaprakasam@Sun.COM 		if (rp->r_mod_state == RDMA_MOD_INACTIVE) {
1293*8695SRajkumar.Sivaprakasam@Sun.COM 			rp = rp->r_next;
1294*8695SRajkumar.Sivaprakasam@Sun.COM 			continue;
1295*8695SRajkumar.Sivaprakasam@Sun.COM 		}
12960Sstevel@tonic-gate 		status = RDMA_REACHABLE(rp->r_mod->rdma_ops, addr_type, addr,
12970Sstevel@tonic-gate 		    &handle);
12980Sstevel@tonic-gate 		if (status == RDMA_SUCCESS) {
12990Sstevel@tonic-gate 			knc = kmem_zalloc(sizeof (struct knetconfig),
13007387SRobert.Gordon@Sun.COM 			    KM_SLEEP);
13010Sstevel@tonic-gate 			knc->knc_semantics = NC_TPI_RDMA;
13020Sstevel@tonic-gate 			pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
13030Sstevel@tonic-gate 			p = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
13040Sstevel@tonic-gate 			if (addr_type == AF_INET)
13050Sstevel@tonic-gate 				(void) strncpy(pf, NC_INET, KNC_STRSIZE);
13060Sstevel@tonic-gate 			else if (addr_type == AF_INET6)
13070Sstevel@tonic-gate 				(void) strncpy(pf, NC_INET6, KNC_STRSIZE);
13080Sstevel@tonic-gate 			pf[KNC_STRSIZE - 1] = '\0';
13090Sstevel@tonic-gate 
13100Sstevel@tonic-gate 			(void) strncpy(p, rp->r_mod->rdma_api, KNC_STRSIZE);
13110Sstevel@tonic-gate 			p[KNC_STRSIZE - 1] = '\0';
13120Sstevel@tonic-gate 
13130Sstevel@tonic-gate 			knc->knc_protofmly = pf;
13140Sstevel@tonic-gate 			knc->knc_proto = p;
1315*8695SRajkumar.Sivaprakasam@Sun.COM 			knc->knc_rdev = (dev_t)rp;
13160Sstevel@tonic-gate 			*knconf = knc;
13170Sstevel@tonic-gate 			rw_exit(&rdma_lock);
13180Sstevel@tonic-gate 			return (0);
13190Sstevel@tonic-gate 		}
13200Sstevel@tonic-gate 		rp = rp->r_next;
13210Sstevel@tonic-gate 	}
13220Sstevel@tonic-gate 	rw_exit(&rdma_lock);
13230Sstevel@tonic-gate 	return (-1);
13240Sstevel@tonic-gate }
13257387SRobert.Gordon@Sun.COM 
13267387SRobert.Gordon@Sun.COM static void
13277387SRobert.Gordon@Sun.COM check_dereg_wlist(CONN *conn, clist *rwc)
13287387SRobert.Gordon@Sun.COM {
13297387SRobert.Gordon@Sun.COM 	int status;
13307387SRobert.Gordon@Sun.COM 
13317387SRobert.Gordon@Sun.COM 	if (rwc == NULL)
13327387SRobert.Gordon@Sun.COM 		return;
13337387SRobert.Gordon@Sun.COM 
13347387SRobert.Gordon@Sun.COM 	if (rwc->c_dmemhandle.mrc_rmr && rwc->c_len) {
13357387SRobert.Gordon@Sun.COM 
13367387SRobert.Gordon@Sun.COM 		status = clist_deregister(conn, rwc, CLIST_REG_DST);
13377387SRobert.Gordon@Sun.COM 
13387387SRobert.Gordon@Sun.COM 		if (status != RDMA_SUCCESS) {
13397387SRobert.Gordon@Sun.COM 			DTRACE_PROBE1(krpc__e__clntrdma__dereg_wlist,
13407387SRobert.Gordon@Sun.COM 			    int, status);
13417387SRobert.Gordon@Sun.COM 		}
13427387SRobert.Gordon@Sun.COM 	}
13437387SRobert.Gordon@Sun.COM }
1344