xref: /onnv-gate/usr/src/uts/common/rpc/rpcib.c (revision 12965:b65a8427f8fe)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
57387SRobert.Gordon@Sun.COM  * Common Development and Distribution License (the "License").
67387SRobert.Gordon@Sun.COM  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
2212553SKaren.Rochford@Sun.COM  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate  */
240Sstevel@tonic-gate 
257387SRobert.Gordon@Sun.COM /*
267387SRobert.Gordon@Sun.COM  * Copyright (c) 2007, The Ohio State University. All rights reserved.
277387SRobert.Gordon@Sun.COM  *
287387SRobert.Gordon@Sun.COM  * Portions of this source code is developed by the team members of
297387SRobert.Gordon@Sun.COM  * The Ohio State University's Network-Based Computing Laboratory (NBCL),
307387SRobert.Gordon@Sun.COM  * headed by Professor Dhabaleswar K. (DK) Panda.
317387SRobert.Gordon@Sun.COM  *
327387SRobert.Gordon@Sun.COM  * Acknowledgements to contributions from developors:
337387SRobert.Gordon@Sun.COM  *   Ranjit Noronha: noronha@cse.ohio-state.edu
347387SRobert.Gordon@Sun.COM  *   Lei Chai      : chail@cse.ohio-state.edu
357387SRobert.Gordon@Sun.COM  *   Weikuan Yu    : yuw@cse.ohio-state.edu
367387SRobert.Gordon@Sun.COM  *
377387SRobert.Gordon@Sun.COM  */
380Sstevel@tonic-gate 
390Sstevel@tonic-gate /*
400Sstevel@tonic-gate  * The rpcib plugin. Implements the interface for RDMATF's
410Sstevel@tonic-gate  * interaction with IBTF.
420Sstevel@tonic-gate  */
430Sstevel@tonic-gate 
440Sstevel@tonic-gate #include <sys/param.h>
450Sstevel@tonic-gate #include <sys/types.h>
460Sstevel@tonic-gate #include <sys/user.h>
470Sstevel@tonic-gate #include <sys/systm.h>
480Sstevel@tonic-gate #include <sys/sysmacros.h>
490Sstevel@tonic-gate #include <sys/proc.h>
500Sstevel@tonic-gate #include <sys/socket.h>
510Sstevel@tonic-gate #include <sys/file.h>
520Sstevel@tonic-gate #include <sys/stream.h>
530Sstevel@tonic-gate #include <sys/strsubr.h>
540Sstevel@tonic-gate #include <sys/stropts.h>
550Sstevel@tonic-gate #include <sys/errno.h>
560Sstevel@tonic-gate #include <sys/kmem.h>
570Sstevel@tonic-gate #include <sys/debug.h>
580Sstevel@tonic-gate #include <sys/pathname.h>
590Sstevel@tonic-gate #include <sys/kstat.h>
600Sstevel@tonic-gate #include <sys/t_lock.h>
610Sstevel@tonic-gate #include <sys/ddi.h>
620Sstevel@tonic-gate #include <sys/cmn_err.h>
630Sstevel@tonic-gate #include <sys/time.h>
640Sstevel@tonic-gate #include <sys/isa_defs.h>
650Sstevel@tonic-gate #include <sys/callb.h>
660Sstevel@tonic-gate #include <sys/sunddi.h>
670Sstevel@tonic-gate #include <sys/sunndi.h>
687387SRobert.Gordon@Sun.COM #include <sys/sdt.h>
690Sstevel@tonic-gate #include <sys/ib/ibtl/ibti.h>
700Sstevel@tonic-gate #include <rpc/rpc.h>
710Sstevel@tonic-gate #include <rpc/ib.h>
720Sstevel@tonic-gate #include <sys/modctl.h>
730Sstevel@tonic-gate #include <sys/kstr.h>
740Sstevel@tonic-gate #include <sys/sockio.h>
750Sstevel@tonic-gate #include <sys/vnode.h>
760Sstevel@tonic-gate #include <sys/tiuser.h>
770Sstevel@tonic-gate #include <net/if.h>
788485SPeter.Memishian@Sun.COM #include <net/if_types.h>
790Sstevel@tonic-gate #include <sys/cred.h>
807387SRobert.Gordon@Sun.COM #include <rpc/rpc_rdma.h>
817387SRobert.Gordon@Sun.COM #include <nfs/nfs.h>
827387SRobert.Gordon@Sun.COM #include <sys/atomic.h>
837387SRobert.Gordon@Sun.COM 
849348SSiddheshwar.Mahesh@Sun.COM #define	NFS_RDMA_PORT	20049
859348SSiddheshwar.Mahesh@Sun.COM 
860Sstevel@tonic-gate 
878485SPeter.Memishian@Sun.COM /*
889146SSiddheshwar.Mahesh@Sun.COM  * Convenience structures for connection management
898485SPeter.Memishian@Sun.COM  */
908485SPeter.Memishian@Sun.COM typedef struct rpcib_ipaddrs {
918485SPeter.Memishian@Sun.COM 	void	*ri_list;	/* pointer to list of addresses */
928485SPeter.Memishian@Sun.COM 	uint_t	ri_count;	/* number of addresses in list */
938485SPeter.Memishian@Sun.COM 	uint_t	ri_size;	/* size of ri_list in bytes */
948485SPeter.Memishian@Sun.COM } rpcib_ipaddrs_t;
950Sstevel@tonic-gate 
969146SSiddheshwar.Mahesh@Sun.COM 
979146SSiddheshwar.Mahesh@Sun.COM typedef struct rpcib_ping {
989146SSiddheshwar.Mahesh@Sun.COM 	rib_hca_t  *hca;
999146SSiddheshwar.Mahesh@Sun.COM 	ibt_path_info_t path;
1009146SSiddheshwar.Mahesh@Sun.COM 	ibt_ip_addr_t srcip;
1019146SSiddheshwar.Mahesh@Sun.COM 	ibt_ip_addr_t dstip;
1029146SSiddheshwar.Mahesh@Sun.COM } rpcib_ping_t;
1039146SSiddheshwar.Mahesh@Sun.COM 
1040Sstevel@tonic-gate /*
1050Sstevel@tonic-gate  * Prototype declarations for driver ops
1060Sstevel@tonic-gate  */
1070Sstevel@tonic-gate static int	rpcib_attach(dev_info_t *, ddi_attach_cmd_t);
1080Sstevel@tonic-gate static int	rpcib_getinfo(dev_info_t *, ddi_info_cmd_t,
1097387SRobert.Gordon@Sun.COM 				void *, void **);
1100Sstevel@tonic-gate static int	rpcib_detach(dev_info_t *, ddi_detach_cmd_t);
1118485SPeter.Memishian@Sun.COM static boolean_t rpcib_rdma_capable_interface(struct lifreq *);
1128485SPeter.Memishian@Sun.COM static int	rpcib_do_ip_ioctl(int, int, void *);
1138485SPeter.Memishian@Sun.COM static boolean_t rpcib_get_ib_addresses(rpcib_ipaddrs_t *, rpcib_ipaddrs_t *);
1147387SRobert.Gordon@Sun.COM static int rpcib_cache_kstat_update(kstat_t *, int);
1157387SRobert.Gordon@Sun.COM static void rib_force_cleanup(void *);
1169733SFaramarz.Jalalian@Sun.COM static void rib_stop_hca_services(rib_hca_t *);
1179733SFaramarz.Jalalian@Sun.COM static void rib_attach_hca(void);
1189733SFaramarz.Jalalian@Sun.COM static int rib_find_hca_connection(rib_hca_t *hca, struct netbuf *s_svcaddr,
1199733SFaramarz.Jalalian@Sun.COM 		struct netbuf *d_svcaddr, CONN **conn);
1207387SRobert.Gordon@Sun.COM 
1217387SRobert.Gordon@Sun.COM struct {
1227387SRobert.Gordon@Sun.COM 	kstat_named_t cache_limit;
1237387SRobert.Gordon@Sun.COM 	kstat_named_t cache_allocation;
1247387SRobert.Gordon@Sun.COM 	kstat_named_t cache_hits;
1257387SRobert.Gordon@Sun.COM 	kstat_named_t cache_misses;
1267387SRobert.Gordon@Sun.COM 	kstat_named_t cache_misses_above_the_limit;
1277387SRobert.Gordon@Sun.COM } rpcib_kstat = {
1287387SRobert.Gordon@Sun.COM 	{"cache_limit",			KSTAT_DATA_UINT64 },
1297387SRobert.Gordon@Sun.COM 	{"cache_allocation",		KSTAT_DATA_UINT64 },
1307387SRobert.Gordon@Sun.COM 	{"cache_hits",			KSTAT_DATA_UINT64 },
1317387SRobert.Gordon@Sun.COM 	{"cache_misses",		KSTAT_DATA_UINT64 },
1327387SRobert.Gordon@Sun.COM 	{"cache_misses_above_the_limit", KSTAT_DATA_UINT64 },
1337387SRobert.Gordon@Sun.COM };
1340Sstevel@tonic-gate 
1350Sstevel@tonic-gate /* rpcib cb_ops */
1360Sstevel@tonic-gate static struct cb_ops rpcib_cbops = {
1370Sstevel@tonic-gate 	nulldev,		/* open */
1380Sstevel@tonic-gate 	nulldev,		/* close */
1390Sstevel@tonic-gate 	nodev,			/* strategy */
1400Sstevel@tonic-gate 	nodev,			/* print */
1410Sstevel@tonic-gate 	nodev,			/* dump */
1420Sstevel@tonic-gate 	nodev,			/* read */
1430Sstevel@tonic-gate 	nodev,			/* write */
1440Sstevel@tonic-gate 	nodev,			/* ioctl */
1450Sstevel@tonic-gate 	nodev,			/* devmap */
1460Sstevel@tonic-gate 	nodev,			/* mmap */
1470Sstevel@tonic-gate 	nodev,			/* segmap */
1480Sstevel@tonic-gate 	nochpoll,		/* poll */
1490Sstevel@tonic-gate 	ddi_prop_op,		/* prop_op */
1500Sstevel@tonic-gate 	NULL,			/* stream */
1510Sstevel@tonic-gate 	D_MP,			/* cb_flag */
1520Sstevel@tonic-gate 	CB_REV,			/* rev */
1530Sstevel@tonic-gate 	nodev,			/* int (*cb_aread)() */
1540Sstevel@tonic-gate 	nodev			/* int (*cb_awrite)() */
1550Sstevel@tonic-gate };
1560Sstevel@tonic-gate 
1570Sstevel@tonic-gate /*
1580Sstevel@tonic-gate  * Device options
1590Sstevel@tonic-gate  */
1600Sstevel@tonic-gate static struct dev_ops rpcib_ops = {
1610Sstevel@tonic-gate 	DEVO_REV,		/* devo_rev, */
1620Sstevel@tonic-gate 	0,			/* refcnt  */
1630Sstevel@tonic-gate 	rpcib_getinfo,		/* info */
1640Sstevel@tonic-gate 	nulldev,		/* identify */
1650Sstevel@tonic-gate 	nulldev,		/* probe */
1660Sstevel@tonic-gate 	rpcib_attach,		/* attach */
1670Sstevel@tonic-gate 	rpcib_detach,		/* detach */
1680Sstevel@tonic-gate 	nodev,			/* reset */
1690Sstevel@tonic-gate 	&rpcib_cbops,		    /* driver ops - devctl interfaces */
1700Sstevel@tonic-gate 	NULL,			/* bus operations */
1717656SSherry.Moore@Sun.COM 	NULL,			/* power */
1727656SSherry.Moore@Sun.COM 	ddi_quiesce_not_needed,		/* quiesce */
1730Sstevel@tonic-gate };
1740Sstevel@tonic-gate 
1750Sstevel@tonic-gate /*
1760Sstevel@tonic-gate  * Module linkage information.
1770Sstevel@tonic-gate  */
1780Sstevel@tonic-gate 
1790Sstevel@tonic-gate static struct modldrv rib_modldrv = {
1807656SSherry.Moore@Sun.COM 	&mod_driverops,		/* Driver module */
1817656SSherry.Moore@Sun.COM 	"RPCIB plugin driver",	/* Driver name and version */
1827656SSherry.Moore@Sun.COM 	&rpcib_ops,		/* Driver ops */
1830Sstevel@tonic-gate };
1840Sstevel@tonic-gate 
1850Sstevel@tonic-gate static struct modlinkage rib_modlinkage = {
1860Sstevel@tonic-gate 	MODREV_1,
1870Sstevel@tonic-gate 	(void *)&rib_modldrv,
1880Sstevel@tonic-gate 	NULL
1890Sstevel@tonic-gate };
1900Sstevel@tonic-gate 
1917387SRobert.Gordon@Sun.COM typedef struct rib_lrc_entry {
1927387SRobert.Gordon@Sun.COM 	struct rib_lrc_entry *forw;
1937387SRobert.Gordon@Sun.COM 	struct rib_lrc_entry *back;
1947387SRobert.Gordon@Sun.COM 	char *lrc_buf;
1957387SRobert.Gordon@Sun.COM 
1967387SRobert.Gordon@Sun.COM 	uint32_t lrc_len;
1977387SRobert.Gordon@Sun.COM 	void  *avl_node;
1987387SRobert.Gordon@Sun.COM 	bool_t registered;
1997387SRobert.Gordon@Sun.COM 
2007387SRobert.Gordon@Sun.COM 	struct mrc lrc_mhandle;
2017387SRobert.Gordon@Sun.COM 	bool_t lrc_on_freed_list;
2027387SRobert.Gordon@Sun.COM } rib_lrc_entry_t;
2037387SRobert.Gordon@Sun.COM 
2047387SRobert.Gordon@Sun.COM typedef	struct cache_struct	{
2057387SRobert.Gordon@Sun.COM 	rib_lrc_entry_t		r;
2067387SRobert.Gordon@Sun.COM 	uint32_t		len;
2077387SRobert.Gordon@Sun.COM 	uint32_t		elements;
2087387SRobert.Gordon@Sun.COM 	kmutex_t		node_lock;
2097387SRobert.Gordon@Sun.COM 	avl_node_t		avl_link;
2107387SRobert.Gordon@Sun.COM } cache_avl_struct_t;
2117387SRobert.Gordon@Sun.COM 
2127387SRobert.Gordon@Sun.COM uint64_t	cache_limit = 100 * 1024 * 1024;
2137387SRobert.Gordon@Sun.COM static uint64_t	cache_watermark = 80 * 1024 * 1024;
2147387SRobert.Gordon@Sun.COM static bool_t	stats_enabled = FALSE;
2157387SRobert.Gordon@Sun.COM 
2167387SRobert.Gordon@Sun.COM static uint64_t max_unsignaled_rws = 5;
2179348SSiddheshwar.Mahesh@Sun.COM int nfs_rdma_port = NFS_RDMA_PORT;
2187387SRobert.Gordon@Sun.COM 
21910326SSiddheshwar.Mahesh@Sun.COM #define	RIBNETID_TCP	"tcp"
22010326SSiddheshwar.Mahesh@Sun.COM #define	RIBNETID_TCP6	"tcp6"
22110326SSiddheshwar.Mahesh@Sun.COM 
2220Sstevel@tonic-gate /*
2230Sstevel@tonic-gate  * rib_stat: private data pointer used when registering
2240Sstevel@tonic-gate  *	with the IBTF.  It is returned to the consumer
2250Sstevel@tonic-gate  *	in all callbacks.
2260Sstevel@tonic-gate  */
2270Sstevel@tonic-gate static rpcib_state_t *rib_stat = NULL;
2280Sstevel@tonic-gate 
2297387SRobert.Gordon@Sun.COM #define	RNR_RETRIES	IBT_RNR_RETRY_1
2300Sstevel@tonic-gate #define	MAX_PORTS	2
2319723SSiddheshwar.Mahesh@Sun.COM #define	RDMA_DUMMY_WRID	0x4D3A1D4D3A1D
2329723SSiddheshwar.Mahesh@Sun.COM #define	RDMA_CONN_REAP_RETRY	10	/* 10 secs */
2330Sstevel@tonic-gate 
2347387SRobert.Gordon@Sun.COM int preposted_rbufs = RDMA_BUFS_GRANT;
2350Sstevel@tonic-gate int send_threshold = 1;
2360Sstevel@tonic-gate 
2370Sstevel@tonic-gate /*
2389723SSiddheshwar.Mahesh@Sun.COM  * Old cards with Tavor driver have limited memory footprint
2399723SSiddheshwar.Mahesh@Sun.COM  * when booted in 32bit. The rib_max_rbufs tunable can be
2409723SSiddheshwar.Mahesh@Sun.COM  * tuned for more buffers if needed.
2419723SSiddheshwar.Mahesh@Sun.COM  */
2429723SSiddheshwar.Mahesh@Sun.COM 
2439723SSiddheshwar.Mahesh@Sun.COM #if !defined(_ELF64) && !defined(__sparc)
2449723SSiddheshwar.Mahesh@Sun.COM int rib_max_rbufs = MAX_BUFS;
2459723SSiddheshwar.Mahesh@Sun.COM #else
2469723SSiddheshwar.Mahesh@Sun.COM int rib_max_rbufs = 10 * MAX_BUFS;
2479723SSiddheshwar.Mahesh@Sun.COM #endif	/* !(_ELF64) && !(__sparc) */
2489723SSiddheshwar.Mahesh@Sun.COM 
2499723SSiddheshwar.Mahesh@Sun.COM int rib_conn_timeout = 60 * 12;		/* 12 minutes */
2509723SSiddheshwar.Mahesh@Sun.COM 
2519723SSiddheshwar.Mahesh@Sun.COM /*
2520Sstevel@tonic-gate  * State of the plugin.
2530Sstevel@tonic-gate  * ACCEPT = accepting new connections and requests.
2540Sstevel@tonic-gate  * NO_ACCEPT = not accepting new connection and requests.
2550Sstevel@tonic-gate  * This should eventually move to rpcib_state_t structure, since this
2560Sstevel@tonic-gate  * will tell in which state the plugin is for a particular type of service
2570Sstevel@tonic-gate  * like NFS, NLM or v4 Callback deamon. The plugin might be in accept
2580Sstevel@tonic-gate  * state for one and in no_accept state for the other.
2590Sstevel@tonic-gate  */
2600Sstevel@tonic-gate int		plugin_state;
2610Sstevel@tonic-gate kmutex_t	plugin_state_lock;
2620Sstevel@tonic-gate 
2637387SRobert.Gordon@Sun.COM ldi_ident_t rpcib_li;
2640Sstevel@tonic-gate 
2650Sstevel@tonic-gate /*
2660Sstevel@tonic-gate  * RPCIB RDMATF operations
2670Sstevel@tonic-gate  */
2680Sstevel@tonic-gate static rdma_stat rib_reachable(int addr_type, struct netbuf *, void **handle);
2690Sstevel@tonic-gate static rdma_stat rib_disconnect(CONN *conn);
2700Sstevel@tonic-gate static void rib_listen(struct rdma_svc_data *rd);
2710Sstevel@tonic-gate static void rib_listen_stop(struct rdma_svc_data *rd);
2727387SRobert.Gordon@Sun.COM static rdma_stat rib_registermem(CONN *conn, caddr_t  adsp, caddr_t buf,
2737387SRobert.Gordon@Sun.COM 	uint_t buflen, struct mrc *buf_handle);
2740Sstevel@tonic-gate static rdma_stat rib_deregistermem(CONN *conn, caddr_t buf,
2750Sstevel@tonic-gate 	struct mrc buf_handle);
2767387SRobert.Gordon@Sun.COM static rdma_stat rib_registermem_via_hca(rib_hca_t *hca, caddr_t adsp,
2777387SRobert.Gordon@Sun.COM 		caddr_t buf, uint_t buflen, struct mrc *buf_handle);
2787387SRobert.Gordon@Sun.COM static rdma_stat rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf,
2797387SRobert.Gordon@Sun.COM 		struct mrc buf_handle);
2807387SRobert.Gordon@Sun.COM static rdma_stat rib_registermemsync(CONN *conn,  caddr_t adsp, caddr_t buf,
2817387SRobert.Gordon@Sun.COM 	uint_t buflen, struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle,
2827387SRobert.Gordon@Sun.COM 	void *lrc);
2830Sstevel@tonic-gate static rdma_stat rib_deregistermemsync(CONN *conn, caddr_t buf,
2847387SRobert.Gordon@Sun.COM 	struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle, void *);
2850Sstevel@tonic-gate static rdma_stat rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle,
2860Sstevel@tonic-gate 	caddr_t buf, int len, int cpu);
2870Sstevel@tonic-gate 
2880Sstevel@tonic-gate static rdma_stat rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf);
2890Sstevel@tonic-gate 
2900Sstevel@tonic-gate static void rib_reg_buf_free(CONN *conn, rdma_buf_t *rdbuf);
2910Sstevel@tonic-gate static void *rib_rbuf_alloc(CONN *, rdma_buf_t *);
2920Sstevel@tonic-gate 
2930Sstevel@tonic-gate static void rib_rbuf_free(CONN *conn, int ptype, void *buf);
2940Sstevel@tonic-gate 
2950Sstevel@tonic-gate static rdma_stat rib_send(CONN *conn, struct clist *cl, uint32_t msgid);
2960Sstevel@tonic-gate static rdma_stat rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid);
2970Sstevel@tonic-gate static rdma_stat rib_post_resp(CONN *conn, struct clist *cl, uint32_t msgid);
2987387SRobert.Gordon@Sun.COM static rdma_stat rib_post_resp_remove(CONN *conn, uint32_t msgid);
2990Sstevel@tonic-gate static rdma_stat rib_post_recv(CONN *conn, struct clist *cl);
3000Sstevel@tonic-gate static rdma_stat rib_recv(CONN *conn, struct clist **clp, uint32_t msgid);
3010Sstevel@tonic-gate static rdma_stat rib_read(CONN *conn, struct clist *cl, int wait);
3020Sstevel@tonic-gate static rdma_stat rib_write(CONN *conn, struct clist *cl, int wait);
3039146SSiddheshwar.Mahesh@Sun.COM static rdma_stat rib_ping_srv(int addr_type, struct netbuf *, rpcib_ping_t *);
3049733SFaramarz.Jalalian@Sun.COM static rdma_stat rib_conn_get(struct netbuf *, struct netbuf *,
3059733SFaramarz.Jalalian@Sun.COM 	int addr_type, void *, CONN **);
3060Sstevel@tonic-gate static rdma_stat rib_conn_release(CONN *conn);
3079803SSiddheshwar.Mahesh@Sun.COM static rdma_stat rib_connect(struct netbuf *, struct netbuf *, int,
3089803SSiddheshwar.Mahesh@Sun.COM 	rpcib_ping_t *, CONN **);
3090Sstevel@tonic-gate static rdma_stat rib_getinfo(rdma_info_t *info);
3107387SRobert.Gordon@Sun.COM 
3117387SRobert.Gordon@Sun.COM static rib_lrc_entry_t *rib_get_cache_buf(CONN *conn, uint32_t len);
3127387SRobert.Gordon@Sun.COM static void rib_free_cache_buf(CONN *conn, rib_lrc_entry_t *buf);
3137387SRobert.Gordon@Sun.COM static void rib_destroy_cache(rib_hca_t *hca);
3147387SRobert.Gordon@Sun.COM static	void	rib_server_side_cache_reclaim(void *argp);
3157387SRobert.Gordon@Sun.COM static int avl_compare(const void *t1, const void *t2);
3167387SRobert.Gordon@Sun.COM 
3170Sstevel@tonic-gate static void rib_stop_services(rib_hca_t *);
3187387SRobert.Gordon@Sun.COM static void rib_close_channels(rib_conn_list_t *);
3199723SSiddheshwar.Mahesh@Sun.COM static void rib_conn_close(void *);
32011530SSiddheshwar.Mahesh@Sun.COM static void rib_recv_rele(rib_qp_t *);
32111530SSiddheshwar.Mahesh@Sun.COM static rdma_stat rib_conn_release_locked(CONN *conn);
3220Sstevel@tonic-gate 
3230Sstevel@tonic-gate /*
3240Sstevel@tonic-gate  * RPCIB addressing operations
3250Sstevel@tonic-gate  */
3260Sstevel@tonic-gate 
3270Sstevel@tonic-gate /*
3280Sstevel@tonic-gate  * RDMA operations the RPCIB module exports
3290Sstevel@tonic-gate  */
3300Sstevel@tonic-gate static rdmaops_t rib_ops = {
3310Sstevel@tonic-gate 	rib_reachable,
3320Sstevel@tonic-gate 	rib_conn_get,
3330Sstevel@tonic-gate 	rib_conn_release,
3340Sstevel@tonic-gate 	rib_listen,
3350Sstevel@tonic-gate 	rib_listen_stop,
3360Sstevel@tonic-gate 	rib_registermem,
3370Sstevel@tonic-gate 	rib_deregistermem,
3380Sstevel@tonic-gate 	rib_registermemsync,
3390Sstevel@tonic-gate 	rib_deregistermemsync,
3400Sstevel@tonic-gate 	rib_syncmem,
3410Sstevel@tonic-gate 	rib_reg_buf_alloc,
3420Sstevel@tonic-gate 	rib_reg_buf_free,
3430Sstevel@tonic-gate 	rib_send,
3440Sstevel@tonic-gate 	rib_send_resp,
3450Sstevel@tonic-gate 	rib_post_resp,
3467387SRobert.Gordon@Sun.COM 	rib_post_resp_remove,
3470Sstevel@tonic-gate 	rib_post_recv,
3480Sstevel@tonic-gate 	rib_recv,
3490Sstevel@tonic-gate 	rib_read,
3500Sstevel@tonic-gate 	rib_write,
3517387SRobert.Gordon@Sun.COM 	rib_getinfo,
3520Sstevel@tonic-gate };
3530Sstevel@tonic-gate 
3540Sstevel@tonic-gate /*
3550Sstevel@tonic-gate  * RDMATF RPCIB plugin details
3560Sstevel@tonic-gate  */
3570Sstevel@tonic-gate static rdma_mod_t rib_mod = {
3580Sstevel@tonic-gate 	"ibtf",		/* api name */
3590Sstevel@tonic-gate 	RDMATF_VERS_1,
3600Sstevel@tonic-gate 	0,
3610Sstevel@tonic-gate 	&rib_ops,	/* rdma op vector for ibtf */
3620Sstevel@tonic-gate };
3630Sstevel@tonic-gate 
3649733SFaramarz.Jalalian@Sun.COM static rdma_stat rpcib_open_hcas(rpcib_state_t *);
3650Sstevel@tonic-gate static rdma_stat rib_qp_init(rib_qp_t *, int);
3660Sstevel@tonic-gate static void rib_svc_scq_handler(ibt_cq_hdl_t, void *);
3670Sstevel@tonic-gate static void rib_clnt_scq_handler(ibt_cq_hdl_t, void *);
3680Sstevel@tonic-gate static void rib_clnt_rcq_handler(ibt_cq_hdl_t, void *);
3690Sstevel@tonic-gate static void rib_svc_rcq_handler(ibt_cq_hdl_t, void *);
3700Sstevel@tonic-gate static rib_bufpool_t *rib_rbufpool_create(rib_hca_t *hca, int ptype, int num);
3717387SRobert.Gordon@Sun.COM static rdma_stat rib_reg_mem(rib_hca_t *, caddr_t adsp, caddr_t, uint_t,
3727387SRobert.Gordon@Sun.COM 	ibt_mr_flags_t, ibt_mr_hdl_t *, ibt_mr_desc_t *);
3737387SRobert.Gordon@Sun.COM static rdma_stat rib_reg_mem_user(rib_hca_t *, caddr_t, uint_t, ibt_mr_flags_t,
3747387SRobert.Gordon@Sun.COM 	ibt_mr_hdl_t *, ibt_mr_desc_t *, caddr_t);
3759146SSiddheshwar.Mahesh@Sun.COM static rdma_stat rib_conn_to_srv(rib_hca_t *, rib_qp_t *, rpcib_ping_t *);
3760Sstevel@tonic-gate static rdma_stat rib_clnt_create_chan(rib_hca_t *, struct netbuf *,
3770Sstevel@tonic-gate 	rib_qp_t **);
3780Sstevel@tonic-gate static rdma_stat rib_svc_create_chan(rib_hca_t *, caddr_t, uint8_t,
3790Sstevel@tonic-gate 	rib_qp_t **);
3800Sstevel@tonic-gate static rdma_stat rib_sendwait(rib_qp_t *, struct send_wid *);
3810Sstevel@tonic-gate static struct send_wid *rib_init_sendwait(uint32_t, int, rib_qp_t *);
3820Sstevel@tonic-gate static int rib_free_sendwait(struct send_wid *);
3830Sstevel@tonic-gate static struct rdma_done_list *rdma_done_add(rib_qp_t *qp, uint32_t xid);
3840Sstevel@tonic-gate static void rdma_done_rm(rib_qp_t *qp, struct rdma_done_list *rd);
3850Sstevel@tonic-gate static void rdma_done_rem_list(rib_qp_t *);
3860Sstevel@tonic-gate static void rdma_done_notify(rib_qp_t *qp, uint32_t xid);
3870Sstevel@tonic-gate 
3880Sstevel@tonic-gate static void rib_async_handler(void *,
3890Sstevel@tonic-gate 	ibt_hca_hdl_t, ibt_async_code_t, ibt_async_event_t *);
3900Sstevel@tonic-gate static rdma_stat rib_rem_rep(rib_qp_t *, struct reply *);
3910Sstevel@tonic-gate static struct svc_recv *rib_init_svc_recv(rib_qp_t *, ibt_wr_ds_t *);
3920Sstevel@tonic-gate static int rib_free_svc_recv(struct svc_recv *);
3930Sstevel@tonic-gate static struct recv_wid *rib_create_wid(rib_qp_t *, ibt_wr_ds_t *, uint32_t);
3940Sstevel@tonic-gate static void rib_free_wid(struct recv_wid *);
3950Sstevel@tonic-gate static rdma_stat rib_disconnect_channel(CONN *, rib_conn_list_t *);
39611530SSiddheshwar.Mahesh@Sun.COM static void rib_detach_hca(ibt_hca_hdl_t);
3979723SSiddheshwar.Mahesh@Sun.COM static void rib_close_a_channel(CONN *);
3989723SSiddheshwar.Mahesh@Sun.COM static void rib_send_hold(rib_qp_t *);
3999723SSiddheshwar.Mahesh@Sun.COM static void rib_send_rele(rib_qp_t *);
4000Sstevel@tonic-gate 
4010Sstevel@tonic-gate /*
4020Sstevel@tonic-gate  * Registration with IBTF as a consumer
4030Sstevel@tonic-gate  */
4040Sstevel@tonic-gate static struct ibt_clnt_modinfo_s rib_modinfo = {
4058580SBill.Taylor@Sun.COM 	IBTI_V_CURR,
4060Sstevel@tonic-gate 	IBT_GENERIC,
4070Sstevel@tonic-gate 	rib_async_handler,	/* async event handler */
4080Sstevel@tonic-gate 	NULL,			/* Memory Region Handler */
4090Sstevel@tonic-gate 	"nfs/ib"
4100Sstevel@tonic-gate };
4110Sstevel@tonic-gate 
4120Sstevel@tonic-gate /*
4130Sstevel@tonic-gate  * Global strucuture
4140Sstevel@tonic-gate  */
4150Sstevel@tonic-gate 
4160Sstevel@tonic-gate typedef struct rpcib_s {
4170Sstevel@tonic-gate 	dev_info_t	*rpcib_dip;
4180Sstevel@tonic-gate 	kmutex_t	rpcib_mutex;
4190Sstevel@tonic-gate } rpcib_t;
4200Sstevel@tonic-gate 
4210Sstevel@tonic-gate rpcib_t rpcib;
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate /*
4240Sstevel@tonic-gate  * /etc/system controlled variable to control
4250Sstevel@tonic-gate  * debugging in rpcib kernel module.
4260Sstevel@tonic-gate  * Set it to values greater that 1 to control
4270Sstevel@tonic-gate  * the amount of debugging messages required.
4280Sstevel@tonic-gate  */
4290Sstevel@tonic-gate int rib_debug = 0;
4300Sstevel@tonic-gate 
4310Sstevel@tonic-gate int
_init(void)4320Sstevel@tonic-gate _init(void)
4330Sstevel@tonic-gate {
4348485SPeter.Memishian@Sun.COM 	int error;
4350Sstevel@tonic-gate 
4360Sstevel@tonic-gate 	error = mod_install((struct modlinkage *)&rib_modlinkage);
4370Sstevel@tonic-gate 	if (error != 0) {
4380Sstevel@tonic-gate 		/*
4390Sstevel@tonic-gate 		 * Could not load module
4400Sstevel@tonic-gate 		 */
4410Sstevel@tonic-gate 		return (error);
4420Sstevel@tonic-gate 	}
4430Sstevel@tonic-gate 	mutex_init(&plugin_state_lock, NULL, MUTEX_DRIVER, NULL);
4440Sstevel@tonic-gate 	return (0);
4450Sstevel@tonic-gate }
4460Sstevel@tonic-gate 
4470Sstevel@tonic-gate int
_fini()4480Sstevel@tonic-gate _fini()
4490Sstevel@tonic-gate {
4500Sstevel@tonic-gate 	int status;
4510Sstevel@tonic-gate 
4520Sstevel@tonic-gate 	/*
4530Sstevel@tonic-gate 	 * Remove module
4540Sstevel@tonic-gate 	 */
4550Sstevel@tonic-gate 	if ((status = mod_remove(&rib_modlinkage)) != 0) {
4560Sstevel@tonic-gate 		return (status);
4570Sstevel@tonic-gate 	}
4580Sstevel@tonic-gate 	mutex_destroy(&plugin_state_lock);
4590Sstevel@tonic-gate 	return (0);
4600Sstevel@tonic-gate }
4610Sstevel@tonic-gate 
4620Sstevel@tonic-gate int
_info(struct modinfo * modinfop)4630Sstevel@tonic-gate _info(struct modinfo *modinfop)
4640Sstevel@tonic-gate {
4650Sstevel@tonic-gate 	return (mod_info(&rib_modlinkage, modinfop));
4660Sstevel@tonic-gate }
4670Sstevel@tonic-gate 
4680Sstevel@tonic-gate /*
4690Sstevel@tonic-gate  * rpcib_getinfo()
4700Sstevel@tonic-gate  * Given the device number, return the devinfo pointer or the
4710Sstevel@tonic-gate  * instance number.
4720Sstevel@tonic-gate  * Note: always succeed DDI_INFO_DEVT2INSTANCE, even before attach.
4730Sstevel@tonic-gate  */
4740Sstevel@tonic-gate 
4750Sstevel@tonic-gate /*ARGSUSED*/
4760Sstevel@tonic-gate static int
rpcib_getinfo(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** result)4770Sstevel@tonic-gate rpcib_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
4780Sstevel@tonic-gate {
4790Sstevel@tonic-gate 	int ret = DDI_SUCCESS;
4800Sstevel@tonic-gate 
4810Sstevel@tonic-gate 	switch (cmd) {
4820Sstevel@tonic-gate 	case DDI_INFO_DEVT2DEVINFO:
4830Sstevel@tonic-gate 		if (rpcib.rpcib_dip != NULL)
4840Sstevel@tonic-gate 			*result = rpcib.rpcib_dip;
4850Sstevel@tonic-gate 		else {
4860Sstevel@tonic-gate 			*result = NULL;
4870Sstevel@tonic-gate 			ret = DDI_FAILURE;
4880Sstevel@tonic-gate 		}
4890Sstevel@tonic-gate 		break;
4900Sstevel@tonic-gate 
4910Sstevel@tonic-gate 	case DDI_INFO_DEVT2INSTANCE:
4920Sstevel@tonic-gate 		*result = NULL;
4930Sstevel@tonic-gate 		break;
4940Sstevel@tonic-gate 
4950Sstevel@tonic-gate 	default:
4960Sstevel@tonic-gate 		ret = DDI_FAILURE;
4970Sstevel@tonic-gate 	}
4980Sstevel@tonic-gate 	return (ret);
4990Sstevel@tonic-gate }
5000Sstevel@tonic-gate 
5019733SFaramarz.Jalalian@Sun.COM static void
rpcib_free_hca_list()5029733SFaramarz.Jalalian@Sun.COM rpcib_free_hca_list()
5039733SFaramarz.Jalalian@Sun.COM {
5049733SFaramarz.Jalalian@Sun.COM 	rib_hca_t *hca, *hcap;
5059733SFaramarz.Jalalian@Sun.COM 
5069733SFaramarz.Jalalian@Sun.COM 	rw_enter(&rib_stat->hcas_list_lock, RW_WRITER);
5079733SFaramarz.Jalalian@Sun.COM 	hca = rib_stat->hcas_list;
5089733SFaramarz.Jalalian@Sun.COM 	rib_stat->hcas_list = NULL;
5099733SFaramarz.Jalalian@Sun.COM 	rw_exit(&rib_stat->hcas_list_lock);
5109733SFaramarz.Jalalian@Sun.COM 	while (hca != NULL) {
5119733SFaramarz.Jalalian@Sun.COM 		rw_enter(&hca->state_lock, RW_WRITER);
5129733SFaramarz.Jalalian@Sun.COM 		hcap = hca;
5139733SFaramarz.Jalalian@Sun.COM 		hca = hca->next;
5149733SFaramarz.Jalalian@Sun.COM 		rib_stat->nhca_inited--;
5159733SFaramarz.Jalalian@Sun.COM 		rib_mod.rdma_count--;
5169733SFaramarz.Jalalian@Sun.COM 		hcap->state = HCA_DETACHED;
5179733SFaramarz.Jalalian@Sun.COM 		rw_exit(&hcap->state_lock);
5189733SFaramarz.Jalalian@Sun.COM 		rib_stop_hca_services(hcap);
5199733SFaramarz.Jalalian@Sun.COM 
5209733SFaramarz.Jalalian@Sun.COM 		kmem_free(hcap, sizeof (*hcap));
5219733SFaramarz.Jalalian@Sun.COM 	}
5229733SFaramarz.Jalalian@Sun.COM }
5239733SFaramarz.Jalalian@Sun.COM 
5249733SFaramarz.Jalalian@Sun.COM static rdma_stat
rpcib_free_service_list()5259733SFaramarz.Jalalian@Sun.COM rpcib_free_service_list()
5269733SFaramarz.Jalalian@Sun.COM {
5279733SFaramarz.Jalalian@Sun.COM 	rib_service_t *service;
5289733SFaramarz.Jalalian@Sun.COM 	ibt_status_t ret;
5299733SFaramarz.Jalalian@Sun.COM 
5309733SFaramarz.Jalalian@Sun.COM 	rw_enter(&rib_stat->service_list_lock, RW_WRITER);
5319733SFaramarz.Jalalian@Sun.COM 	while (rib_stat->service_list != NULL) {
5329733SFaramarz.Jalalian@Sun.COM 		service = rib_stat->service_list;
5339733SFaramarz.Jalalian@Sun.COM 		ret = ibt_unbind_all_services(service->srv_hdl);
5349733SFaramarz.Jalalian@Sun.COM 		if (ret != IBT_SUCCESS) {
5359733SFaramarz.Jalalian@Sun.COM 			rw_exit(&rib_stat->service_list_lock);
5369733SFaramarz.Jalalian@Sun.COM #ifdef DEBUG
5379733SFaramarz.Jalalian@Sun.COM 			cmn_err(CE_NOTE, "rpcib_free_service_list: "
5389733SFaramarz.Jalalian@Sun.COM 			    "ibt_unbind_all_services failed (%d)\n", (int)ret);
5399733SFaramarz.Jalalian@Sun.COM #endif
5409733SFaramarz.Jalalian@Sun.COM 			return (RDMA_FAILED);
5419733SFaramarz.Jalalian@Sun.COM 		}
5429733SFaramarz.Jalalian@Sun.COM 		ret = ibt_deregister_service(rib_stat->ibt_clnt_hdl,
5439733SFaramarz.Jalalian@Sun.COM 		    service->srv_hdl);
5449733SFaramarz.Jalalian@Sun.COM 		if (ret != IBT_SUCCESS) {
5459733SFaramarz.Jalalian@Sun.COM 			rw_exit(&rib_stat->service_list_lock);
5469733SFaramarz.Jalalian@Sun.COM #ifdef DEBUG
5479733SFaramarz.Jalalian@Sun.COM 			cmn_err(CE_NOTE, "rpcib_free_service_list: "
5489733SFaramarz.Jalalian@Sun.COM 			    "ibt_deregister_service failed (%d)\n", (int)ret);
5499733SFaramarz.Jalalian@Sun.COM #endif
5509733SFaramarz.Jalalian@Sun.COM 			return (RDMA_FAILED);
5519733SFaramarz.Jalalian@Sun.COM 		}
5529733SFaramarz.Jalalian@Sun.COM 		rib_stat->service_list = service->next;
5539733SFaramarz.Jalalian@Sun.COM 		kmem_free(service, sizeof (rib_service_t));
5549733SFaramarz.Jalalian@Sun.COM 	}
5559733SFaramarz.Jalalian@Sun.COM 	rw_exit(&rib_stat->service_list_lock);
5569733SFaramarz.Jalalian@Sun.COM 
5579733SFaramarz.Jalalian@Sun.COM 	return (RDMA_SUCCESS);
5589733SFaramarz.Jalalian@Sun.COM }
5599733SFaramarz.Jalalian@Sun.COM 
5600Sstevel@tonic-gate static int
rpcib_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)5610Sstevel@tonic-gate rpcib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5620Sstevel@tonic-gate {
5630Sstevel@tonic-gate 	ibt_status_t	ibt_status;
5640Sstevel@tonic-gate 	rdma_stat	r_status;
5650Sstevel@tonic-gate 
5660Sstevel@tonic-gate 	switch (cmd) {
5670Sstevel@tonic-gate 	case DDI_ATTACH:
5680Sstevel@tonic-gate 		break;
5690Sstevel@tonic-gate 	case DDI_RESUME:
5700Sstevel@tonic-gate 		return (DDI_SUCCESS);
5710Sstevel@tonic-gate 	default:
5720Sstevel@tonic-gate 		return (DDI_FAILURE);
5730Sstevel@tonic-gate 	}
5740Sstevel@tonic-gate 
5750Sstevel@tonic-gate 	mutex_init(&rpcib.rpcib_mutex, NULL, MUTEX_DRIVER, NULL);
5760Sstevel@tonic-gate 
5770Sstevel@tonic-gate 	mutex_enter(&rpcib.rpcib_mutex);
5780Sstevel@tonic-gate 	if (rpcib.rpcib_dip != NULL) {
5790Sstevel@tonic-gate 		mutex_exit(&rpcib.rpcib_mutex);
5800Sstevel@tonic-gate 		return (DDI_FAILURE);
5810Sstevel@tonic-gate 	}
5820Sstevel@tonic-gate 	rpcib.rpcib_dip = dip;
5830Sstevel@tonic-gate 	mutex_exit(&rpcib.rpcib_mutex);
5840Sstevel@tonic-gate 	/*
5850Sstevel@tonic-gate 	 * Create the "rpcib" minor-node.
5860Sstevel@tonic-gate 	 */
5870Sstevel@tonic-gate 	if (ddi_create_minor_node(dip,
5880Sstevel@tonic-gate 	    "rpcib", S_IFCHR, 0, DDI_PSEUDO, 0) != DDI_SUCCESS) {
5890Sstevel@tonic-gate 		/* Error message, no cmn_err as they print on console */
5900Sstevel@tonic-gate 		return (DDI_FAILURE);
5910Sstevel@tonic-gate 	}
5920Sstevel@tonic-gate 
5930Sstevel@tonic-gate 	if (rib_stat == NULL) {
5940Sstevel@tonic-gate 		rib_stat = kmem_zalloc(sizeof (*rib_stat), KM_SLEEP);
5950Sstevel@tonic-gate 		mutex_init(&rib_stat->open_hca_lock, NULL, MUTEX_DRIVER, NULL);
5969733SFaramarz.Jalalian@Sun.COM 		rw_init(&rib_stat->hcas_list_lock, NULL, RW_DRIVER, NULL);
5979733SFaramarz.Jalalian@Sun.COM 		mutex_init(&rib_stat->listen_lock, NULL, MUTEX_DRIVER, NULL);
5980Sstevel@tonic-gate 	}
5990Sstevel@tonic-gate 
6009733SFaramarz.Jalalian@Sun.COM 	rib_stat->hca_count = ibt_get_hca_list(NULL);
6010Sstevel@tonic-gate 	if (rib_stat->hca_count < 1) {
6029733SFaramarz.Jalalian@Sun.COM 		mutex_destroy(&rib_stat->listen_lock);
6039733SFaramarz.Jalalian@Sun.COM 		rw_destroy(&rib_stat->hcas_list_lock);
6040Sstevel@tonic-gate 		mutex_destroy(&rib_stat->open_hca_lock);
6050Sstevel@tonic-gate 		kmem_free(rib_stat, sizeof (*rib_stat));
6060Sstevel@tonic-gate 		rib_stat = NULL;
6070Sstevel@tonic-gate 		return (DDI_FAILURE);
6080Sstevel@tonic-gate 	}
6090Sstevel@tonic-gate 
6100Sstevel@tonic-gate 	ibt_status = ibt_attach(&rib_modinfo, dip,
6117387SRobert.Gordon@Sun.COM 	    (void *)rib_stat, &rib_stat->ibt_clnt_hdl);
6127387SRobert.Gordon@Sun.COM 
6130Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
6149733SFaramarz.Jalalian@Sun.COM 		mutex_destroy(&rib_stat->listen_lock);
6159733SFaramarz.Jalalian@Sun.COM 		rw_destroy(&rib_stat->hcas_list_lock);
6160Sstevel@tonic-gate 		mutex_destroy(&rib_stat->open_hca_lock);
6170Sstevel@tonic-gate 		kmem_free(rib_stat, sizeof (*rib_stat));
6180Sstevel@tonic-gate 		rib_stat = NULL;
6190Sstevel@tonic-gate 		return (DDI_FAILURE);
6200Sstevel@tonic-gate 	}
6210Sstevel@tonic-gate 
6229733SFaramarz.Jalalian@Sun.COM 	rib_stat->service_list = NULL;
6239733SFaramarz.Jalalian@Sun.COM 	rw_init(&rib_stat->service_list_lock, NULL, RW_DRIVER, NULL);
6240Sstevel@tonic-gate 	mutex_enter(&rib_stat->open_hca_lock);
6259733SFaramarz.Jalalian@Sun.COM 	if (rpcib_open_hcas(rib_stat) != RDMA_SUCCESS) {
6260Sstevel@tonic-gate 		mutex_exit(&rib_stat->open_hca_lock);
6278695SRajkumar.Sivaprakasam@Sun.COM 		goto open_fail;
6280Sstevel@tonic-gate 	}
6290Sstevel@tonic-gate 	mutex_exit(&rib_stat->open_hca_lock);
6300Sstevel@tonic-gate 
6318695SRajkumar.Sivaprakasam@Sun.COM 	if (ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1) !=
6328695SRajkumar.Sivaprakasam@Sun.COM 	    DDI_PROP_SUCCESS) {
6338695SRajkumar.Sivaprakasam@Sun.COM 		cmn_err(CE_WARN, "rpcib_attach: ddi-no-autodetach prop update "
6348695SRajkumar.Sivaprakasam@Sun.COM 		    "failed.");
6358695SRajkumar.Sivaprakasam@Sun.COM 		goto register_fail;
6368695SRajkumar.Sivaprakasam@Sun.COM 	}
6378695SRajkumar.Sivaprakasam@Sun.COM 
6380Sstevel@tonic-gate 	/*
6390Sstevel@tonic-gate 	 * Register with rdmatf
6400Sstevel@tonic-gate 	 */
6410Sstevel@tonic-gate 	r_status = rdma_register_mod(&rib_mod);
6420Sstevel@tonic-gate 	if (r_status != RDMA_SUCCESS && r_status != RDMA_REG_EXIST) {
6438695SRajkumar.Sivaprakasam@Sun.COM 		cmn_err(CE_WARN, "rpcib_attach:rdma_register_mod failed, "
6448695SRajkumar.Sivaprakasam@Sun.COM 		    "status = %d", r_status);
6458695SRajkumar.Sivaprakasam@Sun.COM 		goto register_fail;
6460Sstevel@tonic-gate 	}
6470Sstevel@tonic-gate 
6480Sstevel@tonic-gate 	return (DDI_SUCCESS);
6498695SRajkumar.Sivaprakasam@Sun.COM 
6508695SRajkumar.Sivaprakasam@Sun.COM register_fail:
6519733SFaramarz.Jalalian@Sun.COM 
6528695SRajkumar.Sivaprakasam@Sun.COM open_fail:
6538695SRajkumar.Sivaprakasam@Sun.COM 	(void) ibt_detach(rib_stat->ibt_clnt_hdl);
6549733SFaramarz.Jalalian@Sun.COM 	rpcib_free_hca_list();
6559733SFaramarz.Jalalian@Sun.COM 	(void) rpcib_free_service_list();
6569733SFaramarz.Jalalian@Sun.COM 	mutex_destroy(&rib_stat->listen_lock);
6579733SFaramarz.Jalalian@Sun.COM 	rw_destroy(&rib_stat->hcas_list_lock);
6588695SRajkumar.Sivaprakasam@Sun.COM 	mutex_destroy(&rib_stat->open_hca_lock);
6599733SFaramarz.Jalalian@Sun.COM 	rw_destroy(&rib_stat->service_list_lock);
6608695SRajkumar.Sivaprakasam@Sun.COM 	kmem_free(rib_stat, sizeof (*rib_stat));
6618695SRajkumar.Sivaprakasam@Sun.COM 	rib_stat = NULL;
6628695SRajkumar.Sivaprakasam@Sun.COM 	return (DDI_FAILURE);
6630Sstevel@tonic-gate }
6640Sstevel@tonic-gate 
6650Sstevel@tonic-gate /*ARGSUSED*/
6660Sstevel@tonic-gate static int
rpcib_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)6670Sstevel@tonic-gate rpcib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
6680Sstevel@tonic-gate {
6690Sstevel@tonic-gate 	switch (cmd) {
6700Sstevel@tonic-gate 
6710Sstevel@tonic-gate 	case DDI_DETACH:
6720Sstevel@tonic-gate 		break;
6730Sstevel@tonic-gate 
6740Sstevel@tonic-gate 	case DDI_SUSPEND:
6750Sstevel@tonic-gate 	default:
6760Sstevel@tonic-gate 		return (DDI_FAILURE);
6770Sstevel@tonic-gate 	}
6780Sstevel@tonic-gate 
6790Sstevel@tonic-gate 	/*
6800Sstevel@tonic-gate 	 * Detach the hca and free resources
6810Sstevel@tonic-gate 	 */
6820Sstevel@tonic-gate 	mutex_enter(&plugin_state_lock);
6830Sstevel@tonic-gate 	plugin_state = NO_ACCEPT;
6840Sstevel@tonic-gate 	mutex_exit(&plugin_state_lock);
6859733SFaramarz.Jalalian@Sun.COM 
6869733SFaramarz.Jalalian@Sun.COM 	if (rpcib_free_service_list() != RDMA_SUCCESS)
6879733SFaramarz.Jalalian@Sun.COM 		return (DDI_FAILURE);
6889733SFaramarz.Jalalian@Sun.COM 	rpcib_free_hca_list();
6899733SFaramarz.Jalalian@Sun.COM 
6900Sstevel@tonic-gate 	(void) ibt_detach(rib_stat->ibt_clnt_hdl);
6919733SFaramarz.Jalalian@Sun.COM 	mutex_destroy(&rib_stat->listen_lock);
6929733SFaramarz.Jalalian@Sun.COM 	rw_destroy(&rib_stat->hcas_list_lock);
6938695SRajkumar.Sivaprakasam@Sun.COM 	mutex_destroy(&rib_stat->open_hca_lock);
6949733SFaramarz.Jalalian@Sun.COM 	rw_destroy(&rib_stat->service_list_lock);
6959733SFaramarz.Jalalian@Sun.COM 
6968695SRajkumar.Sivaprakasam@Sun.COM 	kmem_free(rib_stat, sizeof (*rib_stat));
6978695SRajkumar.Sivaprakasam@Sun.COM 	rib_stat = NULL;
6980Sstevel@tonic-gate 
6990Sstevel@tonic-gate 	mutex_enter(&rpcib.rpcib_mutex);
7000Sstevel@tonic-gate 	rpcib.rpcib_dip = NULL;
7010Sstevel@tonic-gate 	mutex_exit(&rpcib.rpcib_mutex);
7020Sstevel@tonic-gate 	mutex_destroy(&rpcib.rpcib_mutex);
7030Sstevel@tonic-gate 	return (DDI_SUCCESS);
7040Sstevel@tonic-gate }
7050Sstevel@tonic-gate 
7060Sstevel@tonic-gate 
7070Sstevel@tonic-gate static void rib_rbufpool_free(rib_hca_t *, int);
7080Sstevel@tonic-gate static void rib_rbufpool_deregister(rib_hca_t *, int);
7090Sstevel@tonic-gate static void rib_rbufpool_destroy(rib_hca_t *hca, int ptype);
7100Sstevel@tonic-gate static struct reply *rib_addreplylist(rib_qp_t *, uint32_t);
7110Sstevel@tonic-gate static rdma_stat rib_rem_replylist(rib_qp_t *);
7120Sstevel@tonic-gate static int rib_remreply(rib_qp_t *, struct reply *);
7130Sstevel@tonic-gate static rdma_stat rib_add_connlist(CONN *, rib_conn_list_t *);
7140Sstevel@tonic-gate static rdma_stat rib_rm_conn(CONN *, rib_conn_list_t *);
7150Sstevel@tonic-gate 
7167387SRobert.Gordon@Sun.COM 
7170Sstevel@tonic-gate /*
7180Sstevel@tonic-gate  * One CQ pair per HCA
7190Sstevel@tonic-gate  */
7200Sstevel@tonic-gate static rdma_stat
rib_create_cq(rib_hca_t * hca,uint32_t cq_size,ibt_cq_handler_t cq_handler,rib_cq_t ** cqp)7210Sstevel@tonic-gate rib_create_cq(rib_hca_t *hca, uint32_t cq_size, ibt_cq_handler_t cq_handler,
7229733SFaramarz.Jalalian@Sun.COM 	rib_cq_t **cqp)
7230Sstevel@tonic-gate {
7240Sstevel@tonic-gate 	rib_cq_t	*cq;
7250Sstevel@tonic-gate 	ibt_cq_attr_t	cq_attr;
7260Sstevel@tonic-gate 	uint32_t	real_size;
7270Sstevel@tonic-gate 	ibt_status_t	status;
7280Sstevel@tonic-gate 	rdma_stat	error = RDMA_SUCCESS;
7290Sstevel@tonic-gate 
7300Sstevel@tonic-gate 	cq = kmem_zalloc(sizeof (rib_cq_t), KM_SLEEP);
7310Sstevel@tonic-gate 	cq->rib_hca = hca;
732*12965SWilliam.Taylor@Oracle.COM 	bzero(&cq_attr, sizeof (cq_attr));
7330Sstevel@tonic-gate 	cq_attr.cq_size = cq_size;
7340Sstevel@tonic-gate 	cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
7350Sstevel@tonic-gate 	status = ibt_alloc_cq(hca->hca_hdl, &cq_attr, &cq->rib_cq_hdl,
7360Sstevel@tonic-gate 	    &real_size);
7370Sstevel@tonic-gate 	if (status != IBT_SUCCESS) {
7380Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_create_cq: ibt_alloc_cq() failed,"
7397387SRobert.Gordon@Sun.COM 		    " status=%d", status);
7400Sstevel@tonic-gate 		error = RDMA_FAILED;
7410Sstevel@tonic-gate 		goto fail;
7420Sstevel@tonic-gate 	}
7439733SFaramarz.Jalalian@Sun.COM 	ibt_set_cq_handler(cq->rib_cq_hdl, cq_handler, hca);
7440Sstevel@tonic-gate 
7450Sstevel@tonic-gate 	/*
7460Sstevel@tonic-gate 	 * Enable CQ callbacks. CQ Callbacks are single shot
7470Sstevel@tonic-gate 	 * (e.g. you have to call ibt_enable_cq_notify()
7480Sstevel@tonic-gate 	 * after each callback to get another one).
7490Sstevel@tonic-gate 	 */
7500Sstevel@tonic-gate 	status = ibt_enable_cq_notify(cq->rib_cq_hdl, IBT_NEXT_COMPLETION);
7510Sstevel@tonic-gate 	if (status != IBT_SUCCESS) {
7520Sstevel@tonic-gate 		cmn_err(CE_WARN, "rib_create_cq: "
7537387SRobert.Gordon@Sun.COM 		    "enable_cq_notify failed, status %d", status);
7540Sstevel@tonic-gate 		error = RDMA_FAILED;
7550Sstevel@tonic-gate 		goto fail;
7560Sstevel@tonic-gate 	}
7570Sstevel@tonic-gate 	*cqp = cq;
7580Sstevel@tonic-gate 
7590Sstevel@tonic-gate 	return (error);
7600Sstevel@tonic-gate fail:
7610Sstevel@tonic-gate 	if (cq->rib_cq_hdl)
7620Sstevel@tonic-gate 		(void) ibt_free_cq(cq->rib_cq_hdl);
7630Sstevel@tonic-gate 	if (cq)
7640Sstevel@tonic-gate 		kmem_free(cq, sizeof (rib_cq_t));
7650Sstevel@tonic-gate 	return (error);
7660Sstevel@tonic-gate }
7670Sstevel@tonic-gate 
7689733SFaramarz.Jalalian@Sun.COM /*
7699733SFaramarz.Jalalian@Sun.COM  * rpcib_find_hca
7709733SFaramarz.Jalalian@Sun.COM  *
7719733SFaramarz.Jalalian@Sun.COM  * Caller should have already locked the hcas_lock before calling
7729733SFaramarz.Jalalian@Sun.COM  * this function.
7739733SFaramarz.Jalalian@Sun.COM  */
7749733SFaramarz.Jalalian@Sun.COM static rib_hca_t *
rpcib_find_hca(rpcib_state_t * ribstat,ib_guid_t guid)7759733SFaramarz.Jalalian@Sun.COM rpcib_find_hca(rpcib_state_t *ribstat, ib_guid_t guid)
7769733SFaramarz.Jalalian@Sun.COM {
7779733SFaramarz.Jalalian@Sun.COM 	rib_hca_t *hca = ribstat->hcas_list;
7789733SFaramarz.Jalalian@Sun.COM 
7799733SFaramarz.Jalalian@Sun.COM 	while (hca && hca->hca_guid != guid)
7809733SFaramarz.Jalalian@Sun.COM 		hca = hca->next;
7819733SFaramarz.Jalalian@Sun.COM 
7829733SFaramarz.Jalalian@Sun.COM 	return (hca);
7839733SFaramarz.Jalalian@Sun.COM }
7849733SFaramarz.Jalalian@Sun.COM 
7850Sstevel@tonic-gate static rdma_stat
rpcib_open_hcas(rpcib_state_t * ribstat)7869733SFaramarz.Jalalian@Sun.COM rpcib_open_hcas(rpcib_state_t *ribstat)
7870Sstevel@tonic-gate {
7880Sstevel@tonic-gate 	rib_hca_t		*hca;
7890Sstevel@tonic-gate 	ibt_status_t		ibt_status;
7900Sstevel@tonic-gate 	rdma_stat		status;
7910Sstevel@tonic-gate 	ibt_hca_portinfo_t	*pinfop;
7920Sstevel@tonic-gate 	ibt_pd_flags_t		pd_flags = IBT_PD_NO_FLAGS;
7930Sstevel@tonic-gate 	uint_t			size, cq_size;
7940Sstevel@tonic-gate 	int			i;
7957387SRobert.Gordon@Sun.COM 	kstat_t *ksp;
7967387SRobert.Gordon@Sun.COM 	cache_avl_struct_t example_avl_node;
7977387SRobert.Gordon@Sun.COM 	char rssc_name[32];
7989733SFaramarz.Jalalian@Sun.COM 	int old_nhca_inited = ribstat->nhca_inited;
7999733SFaramarz.Jalalian@Sun.COM 	ib_guid_t		*hca_guids;
8000Sstevel@tonic-gate 
8010Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ribstat->open_hca_lock));
8027387SRobert.Gordon@Sun.COM 
8039733SFaramarz.Jalalian@Sun.COM 	ribstat->hca_count = ibt_get_hca_list(&hca_guids);
8049733SFaramarz.Jalalian@Sun.COM 	if (ribstat->hca_count == 0)
8059733SFaramarz.Jalalian@Sun.COM 		return (RDMA_FAILED);
8069733SFaramarz.Jalalian@Sun.COM 
8079733SFaramarz.Jalalian@Sun.COM 	rw_enter(&ribstat->hcas_list_lock, RW_WRITER);
8080Sstevel@tonic-gate 	/*
8090Sstevel@tonic-gate 	 * Open a hca and setup for RDMA
8100Sstevel@tonic-gate 	 */
8110Sstevel@tonic-gate 	for (i = 0; i < ribstat->hca_count; i++) {
8129733SFaramarz.Jalalian@Sun.COM 		if (rpcib_find_hca(ribstat, hca_guids[i]))
8139733SFaramarz.Jalalian@Sun.COM 			continue;
8149733SFaramarz.Jalalian@Sun.COM 		hca = kmem_zalloc(sizeof (rib_hca_t), KM_SLEEP);
8159733SFaramarz.Jalalian@Sun.COM 
8160Sstevel@tonic-gate 		ibt_status = ibt_open_hca(ribstat->ibt_clnt_hdl,
8179733SFaramarz.Jalalian@Sun.COM 		    hca_guids[i], &hca->hca_hdl);
8180Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
8199733SFaramarz.Jalalian@Sun.COM 			kmem_free(hca, sizeof (rib_hca_t));
8200Sstevel@tonic-gate 			continue;
8210Sstevel@tonic-gate 		}
8229733SFaramarz.Jalalian@Sun.COM 		hca->hca_guid = hca_guids[i];
8230Sstevel@tonic-gate 		hca->ibt_clnt_hdl = ribstat->ibt_clnt_hdl;
8240Sstevel@tonic-gate 		hca->state = HCA_INITED;
8250Sstevel@tonic-gate 
8260Sstevel@tonic-gate 		/*
8270Sstevel@tonic-gate 		 * query HCA info
8280Sstevel@tonic-gate 		 */
8290Sstevel@tonic-gate 		ibt_status = ibt_query_hca(hca->hca_hdl, &hca->hca_attrs);
8300Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
8310Sstevel@tonic-gate 			goto fail1;
8320Sstevel@tonic-gate 		}
8330Sstevel@tonic-gate 
8340Sstevel@tonic-gate 		/*
8350Sstevel@tonic-gate 		 * One PD (Protection Domain) per HCA.
8360Sstevel@tonic-gate 		 * A qp is allowed to access a memory region
8370Sstevel@tonic-gate 		 * only when it's in the same PD as that of
8380Sstevel@tonic-gate 		 * the memory region.
8390Sstevel@tonic-gate 		 */
8400Sstevel@tonic-gate 		ibt_status = ibt_alloc_pd(hca->hca_hdl, pd_flags, &hca->pd_hdl);
8410Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
8420Sstevel@tonic-gate 			goto fail1;
8430Sstevel@tonic-gate 		}
8440Sstevel@tonic-gate 
8450Sstevel@tonic-gate 		/*
8460Sstevel@tonic-gate 		 * query HCA ports
8470Sstevel@tonic-gate 		 */
8480Sstevel@tonic-gate 		ibt_status = ibt_query_hca_ports(hca->hca_hdl,
8497387SRobert.Gordon@Sun.COM 		    0, &pinfop, &hca->hca_nports, &size);
8500Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
8510Sstevel@tonic-gate 			goto fail2;
8520Sstevel@tonic-gate 		}
8530Sstevel@tonic-gate 		hca->hca_ports = pinfop;
8540Sstevel@tonic-gate 		hca->hca_pinfosz = size;
8550Sstevel@tonic-gate 		pinfop = NULL;
8560Sstevel@tonic-gate 
8570Sstevel@tonic-gate 		cq_size = DEF_CQ_SIZE; /* default cq size */
8580Sstevel@tonic-gate 		/*
8590Sstevel@tonic-gate 		 * Create 2 pairs of cq's (1 pair for client
8600Sstevel@tonic-gate 		 * and the other pair for server) on this hca.
8610Sstevel@tonic-gate 		 * If number of qp's gets too large, then several
8620Sstevel@tonic-gate 		 * cq's will be needed.
8630Sstevel@tonic-gate 		 */
8640Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_svc_rcq_handler,
8659733SFaramarz.Jalalian@Sun.COM 		    &hca->svc_rcq);
8660Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
8670Sstevel@tonic-gate 			goto fail3;
8680Sstevel@tonic-gate 		}
8690Sstevel@tonic-gate 
8700Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_svc_scq_handler,
8719733SFaramarz.Jalalian@Sun.COM 		    &hca->svc_scq);
8720Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
8730Sstevel@tonic-gate 			goto fail3;
8740Sstevel@tonic-gate 		}
8750Sstevel@tonic-gate 
8760Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_clnt_rcq_handler,
8779733SFaramarz.Jalalian@Sun.COM 		    &hca->clnt_rcq);
8780Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
8790Sstevel@tonic-gate 			goto fail3;
8800Sstevel@tonic-gate 		}
8810Sstevel@tonic-gate 
8820Sstevel@tonic-gate 		status = rib_create_cq(hca, cq_size, rib_clnt_scq_handler,
8839733SFaramarz.Jalalian@Sun.COM 		    &hca->clnt_scq);
8840Sstevel@tonic-gate 		if (status != RDMA_SUCCESS) {
8850Sstevel@tonic-gate 			goto fail3;
8860Sstevel@tonic-gate 		}
8870Sstevel@tonic-gate 
8880Sstevel@tonic-gate 		/*
8890Sstevel@tonic-gate 		 * Create buffer pools.
8900Sstevel@tonic-gate 		 * Note rib_rbuf_create also allocates memory windows.
8910Sstevel@tonic-gate 		 */
8920Sstevel@tonic-gate 		hca->recv_pool = rib_rbufpool_create(hca,
8939723SSiddheshwar.Mahesh@Sun.COM 		    RECV_BUFFER, rib_max_rbufs);
8940Sstevel@tonic-gate 		if (hca->recv_pool == NULL) {
8950Sstevel@tonic-gate 			goto fail3;
8960Sstevel@tonic-gate 		}
8970Sstevel@tonic-gate 
8980Sstevel@tonic-gate 		hca->send_pool = rib_rbufpool_create(hca,
8999723SSiddheshwar.Mahesh@Sun.COM 		    SEND_BUFFER, rib_max_rbufs);
9000Sstevel@tonic-gate 		if (hca->send_pool == NULL) {
9010Sstevel@tonic-gate 			rib_rbufpool_destroy(hca, RECV_BUFFER);
9020Sstevel@tonic-gate 			goto fail3;
9030Sstevel@tonic-gate 		}
9040Sstevel@tonic-gate 
9057387SRobert.Gordon@Sun.COM 		if (hca->server_side_cache == NULL) {
9067387SRobert.Gordon@Sun.COM 			(void) sprintf(rssc_name,
9079733SFaramarz.Jalalian@Sun.COM 			    "rib_srvr_cache_%llx",
9089733SFaramarz.Jalalian@Sun.COM 			    (long long unsigned int) hca->hca_guid);
9097387SRobert.Gordon@Sun.COM 			hca->server_side_cache = kmem_cache_create(
9107387SRobert.Gordon@Sun.COM 			    rssc_name,
9117387SRobert.Gordon@Sun.COM 			    sizeof (cache_avl_struct_t), 0,
9127387SRobert.Gordon@Sun.COM 			    NULL,
9137387SRobert.Gordon@Sun.COM 			    NULL,
9147387SRobert.Gordon@Sun.COM 			    rib_server_side_cache_reclaim,
9157387SRobert.Gordon@Sun.COM 			    hca, NULL, 0);
9167387SRobert.Gordon@Sun.COM 		}
9177387SRobert.Gordon@Sun.COM 
9187387SRobert.Gordon@Sun.COM 		avl_create(&hca->avl_tree,
9197387SRobert.Gordon@Sun.COM 		    avl_compare,
9207387SRobert.Gordon@Sun.COM 		    sizeof (cache_avl_struct_t),
9217387SRobert.Gordon@Sun.COM 		    (uint_t)(uintptr_t)&example_avl_node.avl_link-
9227387SRobert.Gordon@Sun.COM 		    (uint_t)(uintptr_t)&example_avl_node);
9237387SRobert.Gordon@Sun.COM 
9249733SFaramarz.Jalalian@Sun.COM 		rw_init(&hca->bound_services_lock, NULL, RW_DRIVER,
9259733SFaramarz.Jalalian@Sun.COM 		    hca->iblock);
9269733SFaramarz.Jalalian@Sun.COM 		rw_init(&hca->state_lock, NULL, RW_DRIVER, hca->iblock);
9277387SRobert.Gordon@Sun.COM 		rw_init(&hca->avl_rw_lock,
9287387SRobert.Gordon@Sun.COM 		    NULL, RW_DRIVER, hca->iblock);
9299733SFaramarz.Jalalian@Sun.COM 		mutex_init(&hca->cache_allocation_lock,
9307387SRobert.Gordon@Sun.COM 		    NULL, MUTEX_DRIVER, NULL);
9317387SRobert.Gordon@Sun.COM 		hca->avl_init = TRUE;
9327387SRobert.Gordon@Sun.COM 
9337387SRobert.Gordon@Sun.COM 		/* Create kstats for the cache */
9347387SRobert.Gordon@Sun.COM 		ASSERT(INGLOBALZONE(curproc));
9357387SRobert.Gordon@Sun.COM 
9367387SRobert.Gordon@Sun.COM 		if (!stats_enabled) {
9377387SRobert.Gordon@Sun.COM 			ksp = kstat_create_zone("unix", 0, "rpcib_cache", "rpc",
9387387SRobert.Gordon@Sun.COM 			    KSTAT_TYPE_NAMED,
9397387SRobert.Gordon@Sun.COM 			    sizeof (rpcib_kstat) / sizeof (kstat_named_t),
9407387SRobert.Gordon@Sun.COM 			    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE,
9417387SRobert.Gordon@Sun.COM 			    GLOBAL_ZONEID);
9427387SRobert.Gordon@Sun.COM 			if (ksp) {
9437387SRobert.Gordon@Sun.COM 				ksp->ks_data = (void *) &rpcib_kstat;
9447387SRobert.Gordon@Sun.COM 				ksp->ks_update = rpcib_cache_kstat_update;
9457387SRobert.Gordon@Sun.COM 				kstat_install(ksp);
9467387SRobert.Gordon@Sun.COM 				stats_enabled = TRUE;
9477387SRobert.Gordon@Sun.COM 			}
9487387SRobert.Gordon@Sun.COM 		}
9499723SSiddheshwar.Mahesh@Sun.COM 		if (hca->cleanup_helper == NULL) {
9509733SFaramarz.Jalalian@Sun.COM 			char tq_name[sizeof (hca->hca_guid) * 2 + 1];
9519733SFaramarz.Jalalian@Sun.COM 
9529733SFaramarz.Jalalian@Sun.COM 			(void) snprintf(tq_name, sizeof (tq_name), "%llX",
9539733SFaramarz.Jalalian@Sun.COM 			    (unsigned long long int) hca->hca_guid);
9549723SSiddheshwar.Mahesh@Sun.COM 			hca->cleanup_helper = ddi_taskq_create(NULL,
9559733SFaramarz.Jalalian@Sun.COM 			    tq_name, 1, TASKQ_DEFAULTPRI, 0);
9567387SRobert.Gordon@Sun.COM 		}
9577387SRobert.Gordon@Sun.COM 
9580Sstevel@tonic-gate 		mutex_init(&hca->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
9590Sstevel@tonic-gate 		cv_init(&hca->cb_cv, NULL, CV_DRIVER, NULL);
9600Sstevel@tonic-gate 		rw_init(&hca->cl_conn_list.conn_lock, NULL, RW_DRIVER,
9617387SRobert.Gordon@Sun.COM 		    hca->iblock);
9620Sstevel@tonic-gate 		rw_init(&hca->srv_conn_list.conn_lock, NULL, RW_DRIVER,
9637387SRobert.Gordon@Sun.COM 		    hca->iblock);
9640Sstevel@tonic-gate 		mutex_init(&hca->inuse_lock, NULL, MUTEX_DRIVER, hca->iblock);
9650Sstevel@tonic-gate 		hca->inuse = TRUE;
9669733SFaramarz.Jalalian@Sun.COM 
9679733SFaramarz.Jalalian@Sun.COM 		hca->next = ribstat->hcas_list;
9689733SFaramarz.Jalalian@Sun.COM 		ribstat->hcas_list = hca;
9690Sstevel@tonic-gate 		ribstat->nhca_inited++;
9700Sstevel@tonic-gate 		ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz);
9719733SFaramarz.Jalalian@Sun.COM 		continue;
9720Sstevel@tonic-gate 
9730Sstevel@tonic-gate fail3:
9740Sstevel@tonic-gate 		ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz);
9750Sstevel@tonic-gate fail2:
9760Sstevel@tonic-gate 		(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
9770Sstevel@tonic-gate fail1:
9780Sstevel@tonic-gate 		(void) ibt_close_hca(hca->hca_hdl);
9799733SFaramarz.Jalalian@Sun.COM 		kmem_free(hca, sizeof (rib_hca_t));
9800Sstevel@tonic-gate 	}
9819733SFaramarz.Jalalian@Sun.COM 	rw_exit(&ribstat->hcas_list_lock);
9829733SFaramarz.Jalalian@Sun.COM 	ibt_free_hca_list(hca_guids, ribstat->hca_count);
9839733SFaramarz.Jalalian@Sun.COM 	rib_mod.rdma_count = rib_stat->nhca_inited;
9849733SFaramarz.Jalalian@Sun.COM 
9859733SFaramarz.Jalalian@Sun.COM 	/*
9869733SFaramarz.Jalalian@Sun.COM 	 * return success if at least one new hca has been configured.
9879733SFaramarz.Jalalian@Sun.COM 	 */
9889733SFaramarz.Jalalian@Sun.COM 	if (ribstat->nhca_inited != old_nhca_inited)
9890Sstevel@tonic-gate 		return (RDMA_SUCCESS);
9900Sstevel@tonic-gate 	else
9910Sstevel@tonic-gate 		return (RDMA_FAILED);
9920Sstevel@tonic-gate }
9930Sstevel@tonic-gate 
9940Sstevel@tonic-gate /*
9950Sstevel@tonic-gate  * Callback routines
9960Sstevel@tonic-gate  */
9970Sstevel@tonic-gate 
9980Sstevel@tonic-gate /*
9990Sstevel@tonic-gate  * SCQ handlers
10000Sstevel@tonic-gate  */
10010Sstevel@tonic-gate /* ARGSUSED */
10020Sstevel@tonic-gate static void
rib_clnt_scq_handler(ibt_cq_hdl_t cq_hdl,void * arg)10030Sstevel@tonic-gate rib_clnt_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
10040Sstevel@tonic-gate {
10050Sstevel@tonic-gate 	ibt_status_t	ibt_status;
10060Sstevel@tonic-gate 	ibt_wc_t	wc;
10079723SSiddheshwar.Mahesh@Sun.COM 	struct send_wid	*wd;
10089723SSiddheshwar.Mahesh@Sun.COM 	CONN		*conn;
10099723SSiddheshwar.Mahesh@Sun.COM 	rib_qp_t	*qp;
10100Sstevel@tonic-gate 	int		i;
10110Sstevel@tonic-gate 
10120Sstevel@tonic-gate 	/*
10130Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
10140Sstevel@tonic-gate 	 * completion queue notification.
10150Sstevel@tonic-gate 	 */
10160Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
10170Sstevel@tonic-gate 
10180Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
10190Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
10207387SRobert.Gordon@Sun.COM 		bzero(&wc, sizeof (wc));
10217387SRobert.Gordon@Sun.COM 		ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
10227387SRobert.Gordon@Sun.COM 		if (ibt_status != IBT_SUCCESS)
10237387SRobert.Gordon@Sun.COM 			return;
10247387SRobert.Gordon@Sun.COM 
10257387SRobert.Gordon@Sun.COM 		/*
10267387SRobert.Gordon@Sun.COM 		 * Got a send completion
10277387SRobert.Gordon@Sun.COM 		 */
10289723SSiddheshwar.Mahesh@Sun.COM 		if (wc.wc_id != RDMA_DUMMY_WRID) {
10299723SSiddheshwar.Mahesh@Sun.COM 			wd = (struct send_wid *)(uintptr_t)wc.wc_id;
10309723SSiddheshwar.Mahesh@Sun.COM 			qp = wd->qp;
10319723SSiddheshwar.Mahesh@Sun.COM 			conn = qptoc(qp);
10329723SSiddheshwar.Mahesh@Sun.COM 
10337387SRobert.Gordon@Sun.COM 			mutex_enter(&wd->sendwait_lock);
10349723SSiddheshwar.Mahesh@Sun.COM 			switch (wc.wc_status) {
10359723SSiddheshwar.Mahesh@Sun.COM 			case IBT_WC_SUCCESS:
10369723SSiddheshwar.Mahesh@Sun.COM 				wd->status = RDMA_SUCCESS;
10379723SSiddheshwar.Mahesh@Sun.COM 				break;
10389723SSiddheshwar.Mahesh@Sun.COM 			default:
10399723SSiddheshwar.Mahesh@Sun.COM /*
10409723SSiddheshwar.Mahesh@Sun.COM  *    RC Send Q Error Code		Local state     Remote State
10419723SSiddheshwar.Mahesh@Sun.COM  *    ==================== 		===========     ============
10429723SSiddheshwar.Mahesh@Sun.COM  *    IBT_WC_BAD_RESPONSE_ERR             ERROR           None
10439723SSiddheshwar.Mahesh@Sun.COM  *    IBT_WC_LOCAL_LEN_ERR                ERROR           None
10449723SSiddheshwar.Mahesh@Sun.COM  *    IBT_WC_LOCAL_CHAN_OP_ERR            ERROR           None
10459723SSiddheshwar.Mahesh@Sun.COM  *    IBT_WC_LOCAL_PROTECT_ERR            ERROR           None
10469723SSiddheshwar.Mahesh@Sun.COM  *    IBT_WC_MEM_WIN_BIND_ERR             ERROR           None
10479723SSiddheshwar.Mahesh@Sun.COM  *    IBT_WC_REMOTE_INVALID_REQ_ERR       ERROR           ERROR
10489723SSiddheshwar.Mahesh@Sun.COM  *    IBT_WC_REMOTE_ACCESS_ERR            ERROR           ERROR
10499723SSiddheshwar.Mahesh@Sun.COM  *    IBT_WC_REMOTE_OP_ERR                ERROR           ERROR
10509723SSiddheshwar.Mahesh@Sun.COM  *    IBT_WC_RNR_NAK_TIMEOUT_ERR          ERROR           None
10519723SSiddheshwar.Mahesh@Sun.COM  *    IBT_WC_TRANS_TIMEOUT_ERR            ERROR           None
10529723SSiddheshwar.Mahesh@Sun.COM  *    IBT_WC_WR_FLUSHED_ERR               ERROR           None
10539723SSiddheshwar.Mahesh@Sun.COM  */
10549723SSiddheshwar.Mahesh@Sun.COM 				/*
10559723SSiddheshwar.Mahesh@Sun.COM 				 * Channel in error state. Set connection to
10569723SSiddheshwar.Mahesh@Sun.COM 				 * ERROR and cleanup will happen either from
10579723SSiddheshwar.Mahesh@Sun.COM 				 * conn_release  or from rib_conn_get
10589723SSiddheshwar.Mahesh@Sun.COM 				 */
10599723SSiddheshwar.Mahesh@Sun.COM 				wd->status = RDMA_FAILED;
10609723SSiddheshwar.Mahesh@Sun.COM 				mutex_enter(&conn->c_lock);
10619723SSiddheshwar.Mahesh@Sun.COM 				if (conn->c_state != C_DISCONN_PEND)
10629723SSiddheshwar.Mahesh@Sun.COM 					conn->c_state = C_ERROR_CONN;
10639723SSiddheshwar.Mahesh@Sun.COM 				mutex_exit(&conn->c_lock);
10649723SSiddheshwar.Mahesh@Sun.COM 				break;
10659723SSiddheshwar.Mahesh@Sun.COM 			}
10669723SSiddheshwar.Mahesh@Sun.COM 
10677387SRobert.Gordon@Sun.COM 			if (wd->cv_sig == 1) {
10687387SRobert.Gordon@Sun.COM 				/*
10699723SSiddheshwar.Mahesh@Sun.COM 				 * Notify poster
10707387SRobert.Gordon@Sun.COM 				 */
10717387SRobert.Gordon@Sun.COM 				cv_signal(&wd->wait_cv);
10727387SRobert.Gordon@Sun.COM 				mutex_exit(&wd->sendwait_lock);
10737387SRobert.Gordon@Sun.COM 			} else {
10747387SRobert.Gordon@Sun.COM 				/*
10757387SRobert.Gordon@Sun.COM 				 * Poster not waiting for notification.
10767387SRobert.Gordon@Sun.COM 				 * Free the send buffers and send_wid
10777387SRobert.Gordon@Sun.COM 				 */
10787387SRobert.Gordon@Sun.COM 				for (i = 0; i < wd->nsbufs; i++) {
10797387SRobert.Gordon@Sun.COM 					rib_rbuf_free(qptoc(wd->qp),
10807387SRobert.Gordon@Sun.COM 					    SEND_BUFFER,
10817387SRobert.Gordon@Sun.COM 					    (void *)(uintptr_t)wd->sbufaddr[i]);
10827387SRobert.Gordon@Sun.COM 				}
10839723SSiddheshwar.Mahesh@Sun.COM 
10849723SSiddheshwar.Mahesh@Sun.COM 				/* decrement the send ref count */
10859723SSiddheshwar.Mahesh@Sun.COM 				rib_send_rele(qp);
10869723SSiddheshwar.Mahesh@Sun.COM 
10879723SSiddheshwar.Mahesh@Sun.COM 				mutex_exit(&wd->sendwait_lock);
10889723SSiddheshwar.Mahesh@Sun.COM 				(void) rib_free_sendwait(wd);
10899723SSiddheshwar.Mahesh@Sun.COM 			}
10909723SSiddheshwar.Mahesh@Sun.COM 		}
10919723SSiddheshwar.Mahesh@Sun.COM 	}
10929723SSiddheshwar.Mahesh@Sun.COM }
10939723SSiddheshwar.Mahesh@Sun.COM 
10949723SSiddheshwar.Mahesh@Sun.COM /* ARGSUSED */
10959723SSiddheshwar.Mahesh@Sun.COM static void
rib_svc_scq_handler(ibt_cq_hdl_t cq_hdl,void * arg)10969723SSiddheshwar.Mahesh@Sun.COM rib_svc_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
10979723SSiddheshwar.Mahesh@Sun.COM {
10989723SSiddheshwar.Mahesh@Sun.COM 	ibt_status_t	ibt_status;
10999723SSiddheshwar.Mahesh@Sun.COM 	ibt_wc_t	wc;
11009723SSiddheshwar.Mahesh@Sun.COM 	struct send_wid	*wd;
11019723SSiddheshwar.Mahesh@Sun.COM 	rib_qp_t	*qp;
11029723SSiddheshwar.Mahesh@Sun.COM 	CONN		*conn;
11039723SSiddheshwar.Mahesh@Sun.COM 	int		i;
11049723SSiddheshwar.Mahesh@Sun.COM 
11059723SSiddheshwar.Mahesh@Sun.COM 	/*
11069723SSiddheshwar.Mahesh@Sun.COM 	 * Re-enable cq notify here to avoid missing any
11079723SSiddheshwar.Mahesh@Sun.COM 	 * completion queue notification.
11089723SSiddheshwar.Mahesh@Sun.COM 	 */
11099723SSiddheshwar.Mahesh@Sun.COM 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
11109723SSiddheshwar.Mahesh@Sun.COM 
11119723SSiddheshwar.Mahesh@Sun.COM 	ibt_status = IBT_SUCCESS;
11129723SSiddheshwar.Mahesh@Sun.COM 	while (ibt_status != IBT_CQ_EMPTY) {
11139723SSiddheshwar.Mahesh@Sun.COM 		bzero(&wc, sizeof (wc));
11149723SSiddheshwar.Mahesh@Sun.COM 		ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
11159723SSiddheshwar.Mahesh@Sun.COM 		if (ibt_status != IBT_SUCCESS)
11169723SSiddheshwar.Mahesh@Sun.COM 			return;
11179723SSiddheshwar.Mahesh@Sun.COM 
11189723SSiddheshwar.Mahesh@Sun.COM 		/*
11199723SSiddheshwar.Mahesh@Sun.COM 		 * Got a send completion
11209723SSiddheshwar.Mahesh@Sun.COM 		 */
11219723SSiddheshwar.Mahesh@Sun.COM 		if (wc.wc_id != RDMA_DUMMY_WRID) {
11229723SSiddheshwar.Mahesh@Sun.COM 			wd = (struct send_wid *)(uintptr_t)wc.wc_id;
11239723SSiddheshwar.Mahesh@Sun.COM 			qp = wd->qp;
11249723SSiddheshwar.Mahesh@Sun.COM 			conn = qptoc(qp);
11259723SSiddheshwar.Mahesh@Sun.COM 			mutex_enter(&wd->sendwait_lock);
11269723SSiddheshwar.Mahesh@Sun.COM 
11279723SSiddheshwar.Mahesh@Sun.COM 			switch (wc.wc_status) {
11289723SSiddheshwar.Mahesh@Sun.COM 			case IBT_WC_SUCCESS:
11299723SSiddheshwar.Mahesh@Sun.COM 				wd->status = RDMA_SUCCESS;
11309723SSiddheshwar.Mahesh@Sun.COM 				break;
11319723SSiddheshwar.Mahesh@Sun.COM 			default:
11329723SSiddheshwar.Mahesh@Sun.COM 				/*
11339723SSiddheshwar.Mahesh@Sun.COM 				 * Channel in error state. Set connection to
11349723SSiddheshwar.Mahesh@Sun.COM 				 * ERROR and cleanup will happen either from
11359723SSiddheshwar.Mahesh@Sun.COM 				 * conn_release  or conn timeout.
11369723SSiddheshwar.Mahesh@Sun.COM 				 */
11379723SSiddheshwar.Mahesh@Sun.COM 				wd->status = RDMA_FAILED;
11389723SSiddheshwar.Mahesh@Sun.COM 				mutex_enter(&conn->c_lock);
11399723SSiddheshwar.Mahesh@Sun.COM 				if (conn->c_state != C_DISCONN_PEND)
11409723SSiddheshwar.Mahesh@Sun.COM 					conn->c_state = C_ERROR_CONN;
11419723SSiddheshwar.Mahesh@Sun.COM 				mutex_exit(&conn->c_lock);
11429723SSiddheshwar.Mahesh@Sun.COM 				break;
11439723SSiddheshwar.Mahesh@Sun.COM 			}
11449723SSiddheshwar.Mahesh@Sun.COM 
11459723SSiddheshwar.Mahesh@Sun.COM 			if (wd->cv_sig == 1) {
11469723SSiddheshwar.Mahesh@Sun.COM 				/*
11479723SSiddheshwar.Mahesh@Sun.COM 				 * Update completion status and notify poster
11489723SSiddheshwar.Mahesh@Sun.COM 				 */
11499723SSiddheshwar.Mahesh@Sun.COM 				cv_signal(&wd->wait_cv);
11509723SSiddheshwar.Mahesh@Sun.COM 				mutex_exit(&wd->sendwait_lock);
11519723SSiddheshwar.Mahesh@Sun.COM 			} else {
11529723SSiddheshwar.Mahesh@Sun.COM 				/*
11539723SSiddheshwar.Mahesh@Sun.COM 				 * Poster not waiting for notification.
11549723SSiddheshwar.Mahesh@Sun.COM 				 * Free the send buffers and send_wid
11559723SSiddheshwar.Mahesh@Sun.COM 				 */
11569723SSiddheshwar.Mahesh@Sun.COM 				for (i = 0; i < wd->nsbufs; i++) {
11579723SSiddheshwar.Mahesh@Sun.COM 					rib_rbuf_free(qptoc(wd->qp),
11589723SSiddheshwar.Mahesh@Sun.COM 					    SEND_BUFFER,
11599723SSiddheshwar.Mahesh@Sun.COM 					    (void *)(uintptr_t)wd->sbufaddr[i]);
11609723SSiddheshwar.Mahesh@Sun.COM 				}
11619723SSiddheshwar.Mahesh@Sun.COM 
11629723SSiddheshwar.Mahesh@Sun.COM 				/* decrement the send ref count */
11639723SSiddheshwar.Mahesh@Sun.COM 				rib_send_rele(qp);
11649723SSiddheshwar.Mahesh@Sun.COM 
11657387SRobert.Gordon@Sun.COM 				mutex_exit(&wd->sendwait_lock);
11667387SRobert.Gordon@Sun.COM 				(void) rib_free_sendwait(wd);
11670Sstevel@tonic-gate 			}
11680Sstevel@tonic-gate 		}
11690Sstevel@tonic-gate 	}
11700Sstevel@tonic-gate }
11710Sstevel@tonic-gate 
11720Sstevel@tonic-gate /*
11730Sstevel@tonic-gate  * RCQ handler
11740Sstevel@tonic-gate  */
11750Sstevel@tonic-gate /* ARGSUSED */
11760Sstevel@tonic-gate static void
rib_clnt_rcq_handler(ibt_cq_hdl_t cq_hdl,void * arg)11770Sstevel@tonic-gate rib_clnt_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
11780Sstevel@tonic-gate {
11790Sstevel@tonic-gate 	rib_qp_t	*qp;
11800Sstevel@tonic-gate 	ibt_status_t	ibt_status;
11810Sstevel@tonic-gate 	ibt_wc_t	wc;
11820Sstevel@tonic-gate 	struct recv_wid	*rwid;
11830Sstevel@tonic-gate 
11840Sstevel@tonic-gate 	/*
11850Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
11860Sstevel@tonic-gate 	 * completion queue notification.
11870Sstevel@tonic-gate 	 */
11880Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
11890Sstevel@tonic-gate 
11900Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
11910Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
11920Sstevel@tonic-gate 		bzero(&wc, sizeof (wc));
11930Sstevel@tonic-gate 		ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
11940Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS)
11957387SRobert.Gordon@Sun.COM 			return;
11960Sstevel@tonic-gate 
1197249Sjwahlig 		rwid = (struct recv_wid *)(uintptr_t)wc.wc_id;
11980Sstevel@tonic-gate 		qp = rwid->qp;
119911530SSiddheshwar.Mahesh@Sun.COM 
12000Sstevel@tonic-gate 		if (wc.wc_status == IBT_WC_SUCCESS) {
12017387SRobert.Gordon@Sun.COM 			XDR	inxdrs, *xdrs;
12027387SRobert.Gordon@Sun.COM 			uint_t	xid, vers, op, find_xid = 0;
12037387SRobert.Gordon@Sun.COM 			struct reply	*r;
12047387SRobert.Gordon@Sun.COM 			CONN *conn = qptoc(qp);
12057387SRobert.Gordon@Sun.COM 			uint32_t rdma_credit = 0;
12067387SRobert.Gordon@Sun.COM 
12077387SRobert.Gordon@Sun.COM 			xdrs = &inxdrs;
12087387SRobert.Gordon@Sun.COM 			xdrmem_create(xdrs, (caddr_t)(uintptr_t)rwid->addr,
12097387SRobert.Gordon@Sun.COM 			    wc.wc_bytes_xfer, XDR_DECODE);
12100Sstevel@tonic-gate 			/*
12117387SRobert.Gordon@Sun.COM 			 * Treat xid as opaque (xid is the first entity
12127387SRobert.Gordon@Sun.COM 			 * in the rpc rdma message).
12130Sstevel@tonic-gate 			 */
12147387SRobert.Gordon@Sun.COM 			xid = *(uint32_t *)(uintptr_t)rwid->addr;
12157387SRobert.Gordon@Sun.COM 
12167387SRobert.Gordon@Sun.COM 			/* Skip xid and set the xdr position accordingly. */
12177387SRobert.Gordon@Sun.COM 			XDR_SETPOS(xdrs, sizeof (uint32_t));
12187387SRobert.Gordon@Sun.COM 			(void) xdr_u_int(xdrs, &vers);
12197387SRobert.Gordon@Sun.COM 			(void) xdr_u_int(xdrs, &rdma_credit);
12207387SRobert.Gordon@Sun.COM 			(void) xdr_u_int(xdrs, &op);
12217387SRobert.Gordon@Sun.COM 			XDR_DESTROY(xdrs);
12227387SRobert.Gordon@Sun.COM 
12237387SRobert.Gordon@Sun.COM 			if (vers != RPCRDMA_VERS) {
12247387SRobert.Gordon@Sun.COM 				/*
12257387SRobert.Gordon@Sun.COM 				 * Invalid RPC/RDMA version. Cannot
12267387SRobert.Gordon@Sun.COM 				 * interoperate.  Set connection to
12277387SRobert.Gordon@Sun.COM 				 * ERROR state and bail out.
12287387SRobert.Gordon@Sun.COM 				 */
12297387SRobert.Gordon@Sun.COM 				mutex_enter(&conn->c_lock);
12307387SRobert.Gordon@Sun.COM 				if (conn->c_state != C_DISCONN_PEND)
12317387SRobert.Gordon@Sun.COM 					conn->c_state = C_ERROR_CONN;
12327387SRobert.Gordon@Sun.COM 				mutex_exit(&conn->c_lock);
12337387SRobert.Gordon@Sun.COM 				rib_rbuf_free(conn, RECV_BUFFER,
12347387SRobert.Gordon@Sun.COM 				    (void *)(uintptr_t)rwid->addr);
12357387SRobert.Gordon@Sun.COM 				rib_free_wid(rwid);
123611530SSiddheshwar.Mahesh@Sun.COM 				rib_recv_rele(qp);
12377387SRobert.Gordon@Sun.COM 				continue;
12387387SRobert.Gordon@Sun.COM 			}
12397387SRobert.Gordon@Sun.COM 
12407387SRobert.Gordon@Sun.COM 			mutex_enter(&qp->replylist_lock);
12417387SRobert.Gordon@Sun.COM 			for (r = qp->replylist; r != NULL; r = r->next) {
12427387SRobert.Gordon@Sun.COM 				if (r->xid == xid) {
12437387SRobert.Gordon@Sun.COM 					find_xid = 1;
12447387SRobert.Gordon@Sun.COM 					switch (op) {
12457387SRobert.Gordon@Sun.COM 					case RDMA_MSG:
12467387SRobert.Gordon@Sun.COM 					case RDMA_NOMSG:
12477387SRobert.Gordon@Sun.COM 					case RDMA_MSGP:
12487387SRobert.Gordon@Sun.COM 						r->status = RDMA_SUCCESS;
12497387SRobert.Gordon@Sun.COM 						r->vaddr_cq = rwid->addr;
12507387SRobert.Gordon@Sun.COM 						r->bytes_xfer =
12517387SRobert.Gordon@Sun.COM 						    wc.wc_bytes_xfer;
12527387SRobert.Gordon@Sun.COM 						cv_signal(&r->wait_cv);
12537387SRobert.Gordon@Sun.COM 						break;
12547387SRobert.Gordon@Sun.COM 					default:
12557387SRobert.Gordon@Sun.COM 						rib_rbuf_free(qptoc(qp),
12567387SRobert.Gordon@Sun.COM 						    RECV_BUFFER,
12577387SRobert.Gordon@Sun.COM 						    (void *)(uintptr_t)
12587387SRobert.Gordon@Sun.COM 						    rwid->addr);
12597387SRobert.Gordon@Sun.COM 						break;
12607387SRobert.Gordon@Sun.COM 					}
12617387SRobert.Gordon@Sun.COM 					break;
12627387SRobert.Gordon@Sun.COM 				}
12637387SRobert.Gordon@Sun.COM 			}
12647387SRobert.Gordon@Sun.COM 			mutex_exit(&qp->replylist_lock);
12657387SRobert.Gordon@Sun.COM 			if (find_xid == 0) {
12667387SRobert.Gordon@Sun.COM 				/* RPC caller not waiting for reply */
12677387SRobert.Gordon@Sun.COM 
12687387SRobert.Gordon@Sun.COM 				DTRACE_PROBE1(rpcib__i__nomatchxid1,
12697387SRobert.Gordon@Sun.COM 				    int, xid);
12707387SRobert.Gordon@Sun.COM 
12710Sstevel@tonic-gate 				rib_rbuf_free(qptoc(qp), RECV_BUFFER,
12727387SRobert.Gordon@Sun.COM 				    (void *)(uintptr_t)rwid->addr);
12730Sstevel@tonic-gate 			}
12740Sstevel@tonic-gate 		} else if (wc.wc_status == IBT_WC_WR_FLUSHED_ERR) {
12750Sstevel@tonic-gate 			CONN *conn = qptoc(qp);
12760Sstevel@tonic-gate 
12770Sstevel@tonic-gate 			/*
12780Sstevel@tonic-gate 			 * Connection being flushed. Just free
12790Sstevel@tonic-gate 			 * the posted buffer
12800Sstevel@tonic-gate 			 */
1281249Sjwahlig 			rib_rbuf_free(conn, RECV_BUFFER,
12827387SRobert.Gordon@Sun.COM 			    (void *)(uintptr_t)rwid->addr);
12830Sstevel@tonic-gate 		} else {
12840Sstevel@tonic-gate 			CONN *conn = qptoc(qp);
12850Sstevel@tonic-gate /*
12860Sstevel@tonic-gate  *  RC Recv Q Error Code		Local state     Remote State
12870Sstevel@tonic-gate  *  ====================		===========     ============
12880Sstevel@tonic-gate  *  IBT_WC_LOCAL_ACCESS_ERR             ERROR           ERROR when NAK recvd
12890Sstevel@tonic-gate  *  IBT_WC_LOCAL_LEN_ERR                ERROR           ERROR when NAK recvd
12900Sstevel@tonic-gate  *  IBT_WC_LOCAL_PROTECT_ERR            ERROR           ERROR when NAK recvd
12910Sstevel@tonic-gate  *  IBT_WC_LOCAL_CHAN_OP_ERR            ERROR           ERROR when NAK recvd
12920Sstevel@tonic-gate  *  IBT_WC_REMOTE_INVALID_REQ_ERR       ERROR           ERROR when NAK recvd
12930Sstevel@tonic-gate  *  IBT_WC_WR_FLUSHED_ERR               None            None
12940Sstevel@tonic-gate  */
12950Sstevel@tonic-gate 			/*
12960Sstevel@tonic-gate 			 * Channel in error state. Set connection
12970Sstevel@tonic-gate 			 * in ERROR state.
12980Sstevel@tonic-gate 			 */
12990Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
13000Sstevel@tonic-gate 			if (conn->c_state != C_DISCONN_PEND)
13017387SRobert.Gordon@Sun.COM 				conn->c_state = C_ERROR_CONN;
13020Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
1303249Sjwahlig 			rib_rbuf_free(conn, RECV_BUFFER,
13047387SRobert.Gordon@Sun.COM 			    (void *)(uintptr_t)rwid->addr);
13050Sstevel@tonic-gate 		}
13060Sstevel@tonic-gate 		rib_free_wid(rwid);
130711530SSiddheshwar.Mahesh@Sun.COM 		rib_recv_rele(qp);
13080Sstevel@tonic-gate 	}
13090Sstevel@tonic-gate }
13100Sstevel@tonic-gate 
13110Sstevel@tonic-gate /* Server side */
13120Sstevel@tonic-gate /* ARGSUSED */
13130Sstevel@tonic-gate static void
rib_svc_rcq_handler(ibt_cq_hdl_t cq_hdl,void * arg)13140Sstevel@tonic-gate rib_svc_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
13150Sstevel@tonic-gate {
13167387SRobert.Gordon@Sun.COM 	rdma_recv_data_t *rdp;
13170Sstevel@tonic-gate 	rib_qp_t	*qp;
13180Sstevel@tonic-gate 	ibt_status_t	ibt_status;
13190Sstevel@tonic-gate 	ibt_wc_t	wc;
13200Sstevel@tonic-gate 	struct svc_recv	*s_recvp;
13210Sstevel@tonic-gate 	CONN		*conn;
13220Sstevel@tonic-gate 	mblk_t		*mp;
13230Sstevel@tonic-gate 
13240Sstevel@tonic-gate 	/*
13250Sstevel@tonic-gate 	 * Re-enable cq notify here to avoid missing any
13260Sstevel@tonic-gate 	 * completion queue notification.
13270Sstevel@tonic-gate 	 */
13280Sstevel@tonic-gate 	(void) ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION);
13290Sstevel@tonic-gate 
13300Sstevel@tonic-gate 	ibt_status = IBT_SUCCESS;
13310Sstevel@tonic-gate 	while (ibt_status != IBT_CQ_EMPTY) {
13320Sstevel@tonic-gate 		bzero(&wc, sizeof (wc));
13330Sstevel@tonic-gate 		ibt_status = ibt_poll_cq(cq_hdl, &wc, 1, NULL);
13340Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS)
13357387SRobert.Gordon@Sun.COM 			return;
13360Sstevel@tonic-gate 
1337249Sjwahlig 		s_recvp = (struct svc_recv *)(uintptr_t)wc.wc_id;
13380Sstevel@tonic-gate 		qp = s_recvp->qp;
13390Sstevel@tonic-gate 		conn = qptoc(qp);
13400Sstevel@tonic-gate 
13410Sstevel@tonic-gate 		if (wc.wc_status == IBT_WC_SUCCESS) {
13427387SRobert.Gordon@Sun.COM 			XDR	inxdrs, *xdrs;
13437387SRobert.Gordon@Sun.COM 			uint_t	xid, vers, op;
13447387SRobert.Gordon@Sun.COM 			uint32_t rdma_credit;
13457387SRobert.Gordon@Sun.COM 
13467387SRobert.Gordon@Sun.COM 			xdrs = &inxdrs;
13477387SRobert.Gordon@Sun.COM 			/* s_recvp->vaddr stores data */
13487387SRobert.Gordon@Sun.COM 			xdrmem_create(xdrs, (caddr_t)(uintptr_t)s_recvp->vaddr,
13497387SRobert.Gordon@Sun.COM 			    wc.wc_bytes_xfer, XDR_DECODE);
13507387SRobert.Gordon@Sun.COM 
13517387SRobert.Gordon@Sun.COM 			/*
13527387SRobert.Gordon@Sun.COM 			 * Treat xid as opaque (xid is the first entity
13537387SRobert.Gordon@Sun.COM 			 * in the rpc rdma message).
13547387SRobert.Gordon@Sun.COM 			 */
13557387SRobert.Gordon@Sun.COM 			xid = *(uint32_t *)(uintptr_t)s_recvp->vaddr;
13567387SRobert.Gordon@Sun.COM 			/* Skip xid and set the xdr position accordingly. */
13577387SRobert.Gordon@Sun.COM 			XDR_SETPOS(xdrs, sizeof (uint32_t));
13587387SRobert.Gordon@Sun.COM 			if (!xdr_u_int(xdrs, &vers) ||
13597387SRobert.Gordon@Sun.COM 			    !xdr_u_int(xdrs, &rdma_credit) ||
13607387SRobert.Gordon@Sun.COM 			    !xdr_u_int(xdrs, &op)) {
13617387SRobert.Gordon@Sun.COM 				rib_rbuf_free(conn, RECV_BUFFER,
13627387SRobert.Gordon@Sun.COM 				    (void *)(uintptr_t)s_recvp->vaddr);
13637387SRobert.Gordon@Sun.COM 				XDR_DESTROY(xdrs);
136411530SSiddheshwar.Mahesh@Sun.COM 				rib_recv_rele(qp);
13657387SRobert.Gordon@Sun.COM 				(void) rib_free_svc_recv(s_recvp);
13667387SRobert.Gordon@Sun.COM 				continue;
13677387SRobert.Gordon@Sun.COM 			}
13680Sstevel@tonic-gate 			XDR_DESTROY(xdrs);
13697387SRobert.Gordon@Sun.COM 
13707387SRobert.Gordon@Sun.COM 			if (vers != RPCRDMA_VERS) {
13717387SRobert.Gordon@Sun.COM 				/*
13727387SRobert.Gordon@Sun.COM 				 * Invalid RPC/RDMA version.
13737387SRobert.Gordon@Sun.COM 				 * Drop rpc rdma message.
13747387SRobert.Gordon@Sun.COM 				 */
13757387SRobert.Gordon@Sun.COM 				rib_rbuf_free(conn, RECV_BUFFER,
13767387SRobert.Gordon@Sun.COM 				    (void *)(uintptr_t)s_recvp->vaddr);
137711530SSiddheshwar.Mahesh@Sun.COM 				rib_recv_rele(qp);
13787387SRobert.Gordon@Sun.COM 				(void) rib_free_svc_recv(s_recvp);
13797387SRobert.Gordon@Sun.COM 				continue;
13807387SRobert.Gordon@Sun.COM 			}
13810Sstevel@tonic-gate 			/*
13820Sstevel@tonic-gate 			 * Is this for RDMA_DONE?
13830Sstevel@tonic-gate 			 */
13847387SRobert.Gordon@Sun.COM 			if (op == RDMA_DONE) {
13857387SRobert.Gordon@Sun.COM 				rib_rbuf_free(conn, RECV_BUFFER,
13867387SRobert.Gordon@Sun.COM 				    (void *)(uintptr_t)s_recvp->vaddr);
13877387SRobert.Gordon@Sun.COM 				/*
13887387SRobert.Gordon@Sun.COM 				 * Wake up the thread waiting on
13897387SRobert.Gordon@Sun.COM 				 * a RDMA_DONE for xid
13907387SRobert.Gordon@Sun.COM 				 */
13917387SRobert.Gordon@Sun.COM 				mutex_enter(&qp->rdlist_lock);
13927387SRobert.Gordon@Sun.COM 				rdma_done_notify(qp, xid);
13937387SRobert.Gordon@Sun.COM 				mutex_exit(&qp->rdlist_lock);
139411530SSiddheshwar.Mahesh@Sun.COM 				rib_recv_rele(qp);
13957387SRobert.Gordon@Sun.COM 				(void) rib_free_svc_recv(s_recvp);
13967387SRobert.Gordon@Sun.COM 				continue;
13977387SRobert.Gordon@Sun.COM 			}
13987387SRobert.Gordon@Sun.COM 
13997387SRobert.Gordon@Sun.COM 			mutex_enter(&plugin_state_lock);
140011613SFaramarz.Jalalian@Sun.COM 			mutex_enter(&conn->c_lock);
140111613SFaramarz.Jalalian@Sun.COM 			if ((plugin_state == ACCEPT) &&
140211613SFaramarz.Jalalian@Sun.COM 			    (conn->c_state == C_CONNECTED)) {
140311613SFaramarz.Jalalian@Sun.COM 				conn->c_ref++;
140411613SFaramarz.Jalalian@Sun.COM 				mutex_exit(&conn->c_lock);
14057387SRobert.Gordon@Sun.COM 				while ((mp = allocb(sizeof (*rdp), BPRI_LO))
14067387SRobert.Gordon@Sun.COM 				    == NULL)
14077387SRobert.Gordon@Sun.COM 					(void) strwaitbuf(
14087387SRobert.Gordon@Sun.COM 					    sizeof (*rdp), BPRI_LO);
14097387SRobert.Gordon@Sun.COM 				/*
14107387SRobert.Gordon@Sun.COM 				 * Plugin is in accept state, hence the master
14117387SRobert.Gordon@Sun.COM 				 * transport queue for this is still accepting
14127387SRobert.Gordon@Sun.COM 				 * requests. Hence we can call svc_queuereq to
14137387SRobert.Gordon@Sun.COM 				 * queue this recieved msg.
14147387SRobert.Gordon@Sun.COM 				 */
14157387SRobert.Gordon@Sun.COM 				rdp = (rdma_recv_data_t *)mp->b_rptr;
14167387SRobert.Gordon@Sun.COM 				rdp->conn = conn;
14177387SRobert.Gordon@Sun.COM 				rdp->rpcmsg.addr =
14187387SRobert.Gordon@Sun.COM 				    (caddr_t)(uintptr_t)s_recvp->vaddr;
14197387SRobert.Gordon@Sun.COM 				rdp->rpcmsg.type = RECV_BUFFER;
14207387SRobert.Gordon@Sun.COM 				rdp->rpcmsg.len = wc.wc_bytes_xfer;
14217387SRobert.Gordon@Sun.COM 				rdp->status = wc.wc_status;
14227387SRobert.Gordon@Sun.COM 				mp->b_wptr += sizeof (*rdp);
14237387SRobert.Gordon@Sun.COM 				svc_queuereq((queue_t *)rib_stat->q, mp);
14247387SRobert.Gordon@Sun.COM 				mutex_exit(&plugin_state_lock);
14257387SRobert.Gordon@Sun.COM 			} else {
14267387SRobert.Gordon@Sun.COM 				/*
14277387SRobert.Gordon@Sun.COM 				 * The master transport for this is going
14287387SRobert.Gordon@Sun.COM 				 * away and the queue is not accepting anymore
14297387SRobert.Gordon@Sun.COM 				 * requests for krpc, so don't do anything, just
14307387SRobert.Gordon@Sun.COM 				 * free the msg.
14317387SRobert.Gordon@Sun.COM 				 */
143211613SFaramarz.Jalalian@Sun.COM 				mutex_exit(&conn->c_lock);
14337387SRobert.Gordon@Sun.COM 				mutex_exit(&plugin_state_lock);
14347387SRobert.Gordon@Sun.COM 				rib_rbuf_free(conn, RECV_BUFFER,
14357387SRobert.Gordon@Sun.COM 				    (void *)(uintptr_t)s_recvp->vaddr);
14367387SRobert.Gordon@Sun.COM 			}
14370Sstevel@tonic-gate 		} else {
14380Sstevel@tonic-gate 			rib_rbuf_free(conn, RECV_BUFFER,
14397387SRobert.Gordon@Sun.COM 			    (void *)(uintptr_t)s_recvp->vaddr);
14400Sstevel@tonic-gate 		}
144111530SSiddheshwar.Mahesh@Sun.COM 		rib_recv_rele(qp);
14420Sstevel@tonic-gate 		(void) rib_free_svc_recv(s_recvp);
14430Sstevel@tonic-gate 	}
14440Sstevel@tonic-gate }
14450Sstevel@tonic-gate 
14469733SFaramarz.Jalalian@Sun.COM static void
rib_attach_hca()14479733SFaramarz.Jalalian@Sun.COM rib_attach_hca()
14489733SFaramarz.Jalalian@Sun.COM {
14499733SFaramarz.Jalalian@Sun.COM 	mutex_enter(&rib_stat->open_hca_lock);
14509803SSiddheshwar.Mahesh@Sun.COM 	(void) rpcib_open_hcas(rib_stat);
14519733SFaramarz.Jalalian@Sun.COM 	rib_listen(NULL);
14529733SFaramarz.Jalalian@Sun.COM 	mutex_exit(&rib_stat->open_hca_lock);
14539733SFaramarz.Jalalian@Sun.COM }
14549733SFaramarz.Jalalian@Sun.COM 
14550Sstevel@tonic-gate /*
14560Sstevel@tonic-gate  * Handles DR event of IBT_HCA_DETACH_EVENT.
14570Sstevel@tonic-gate  */
14580Sstevel@tonic-gate /* ARGSUSED */
14590Sstevel@tonic-gate static void
rib_async_handler(void * clnt_private,ibt_hca_hdl_t hca_hdl,ibt_async_code_t code,ibt_async_event_t * event)14600Sstevel@tonic-gate rib_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
14610Sstevel@tonic-gate 	ibt_async_code_t code, ibt_async_event_t *event)
14620Sstevel@tonic-gate {
14630Sstevel@tonic-gate 	switch (code) {
14640Sstevel@tonic-gate 	case IBT_HCA_ATTACH_EVENT:
14659733SFaramarz.Jalalian@Sun.COM 		rib_attach_hca();
14660Sstevel@tonic-gate 		break;
14670Sstevel@tonic-gate 	case IBT_HCA_DETACH_EVENT:
146811530SSiddheshwar.Mahesh@Sun.COM 		rib_detach_hca(hca_hdl);
14690Sstevel@tonic-gate #ifdef DEBUG
14707387SRobert.Gordon@Sun.COM 		cmn_err(CE_NOTE, "rib_async_handler(): HCA being detached!\n");
14710Sstevel@tonic-gate #endif
14720Sstevel@tonic-gate 		break;
14739733SFaramarz.Jalalian@Sun.COM 	case IBT_EVENT_PORT_UP:
14749733SFaramarz.Jalalian@Sun.COM 		/*
14759733SFaramarz.Jalalian@Sun.COM 		 * A port is up. We should call rib_listen() since there is
14769733SFaramarz.Jalalian@Sun.COM 		 * a chance that rib_listen() may have failed during
14779733SFaramarz.Jalalian@Sun.COM 		 * rib_attach_hca() because the port had not been up yet.
14789733SFaramarz.Jalalian@Sun.COM 		 */
14799733SFaramarz.Jalalian@Sun.COM 		rib_listen(NULL);
14809733SFaramarz.Jalalian@Sun.COM #ifdef DEBUG
14819733SFaramarz.Jalalian@Sun.COM 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_PORT_UP\n");
14829733SFaramarz.Jalalian@Sun.COM #endif
14839733SFaramarz.Jalalian@Sun.COM 		break;
14840Sstevel@tonic-gate #ifdef DEBUG
14850Sstevel@tonic-gate 	case IBT_EVENT_PATH_MIGRATED:
14867387SRobert.Gordon@Sun.COM 		cmn_err(CE_NOTE, "rib_async_handler(): "
14877387SRobert.Gordon@Sun.COM 		    "IBT_EVENT_PATH_MIGRATED\n");
14880Sstevel@tonic-gate 		break;
14890Sstevel@tonic-gate 	case IBT_EVENT_SQD:
14907387SRobert.Gordon@Sun.COM 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_SQD\n");
14910Sstevel@tonic-gate 		break;
14920Sstevel@tonic-gate 	case IBT_EVENT_COM_EST:
14937387SRobert.Gordon@Sun.COM 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_EVENT_COM_EST\n");
14940Sstevel@tonic-gate 		break;
14950Sstevel@tonic-gate 	case IBT_ERROR_CATASTROPHIC_CHAN:
14967387SRobert.Gordon@Sun.COM 		cmn_err(CE_NOTE, "rib_async_handler(): "
14977387SRobert.Gordon@Sun.COM 		    "IBT_ERROR_CATASTROPHIC_CHAN\n");
14980Sstevel@tonic-gate 		break;
14990Sstevel@tonic-gate 	case IBT_ERROR_INVALID_REQUEST_CHAN:
15007387SRobert.Gordon@Sun.COM 		cmn_err(CE_NOTE, "rib_async_handler(): "
15017387SRobert.Gordon@Sun.COM 		    "IBT_ERROR_INVALID_REQUEST_CHAN\n");
15020Sstevel@tonic-gate 		break;
15030Sstevel@tonic-gate 	case IBT_ERROR_ACCESS_VIOLATION_CHAN:
15047387SRobert.Gordon@Sun.COM 		cmn_err(CE_NOTE, "rib_async_handler(): "
15057387SRobert.Gordon@Sun.COM 		    "IBT_ERROR_ACCESS_VIOLATION_CHAN\n");
15060Sstevel@tonic-gate 		break;
15070Sstevel@tonic-gate 	case IBT_ERROR_PATH_MIGRATE_REQ:
15087387SRobert.Gordon@Sun.COM 		cmn_err(CE_NOTE, "rib_async_handler(): "
15097387SRobert.Gordon@Sun.COM 		    "IBT_ERROR_PATH_MIGRATE_REQ\n");
15100Sstevel@tonic-gate 		break;
15110Sstevel@tonic-gate 	case IBT_ERROR_CQ:
15127387SRobert.Gordon@Sun.COM 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_CQ\n");
15130Sstevel@tonic-gate 		break;
15140Sstevel@tonic-gate 	case IBT_ERROR_PORT_DOWN:
15157387SRobert.Gordon@Sun.COM 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ERROR_PORT_DOWN\n");
15160Sstevel@tonic-gate 		break;
15170Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE1:
15187387SRobert.Gordon@Sun.COM 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE1\n");
15190Sstevel@tonic-gate 		break;
15200Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE2:
15217387SRobert.Gordon@Sun.COM 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE2\n");
15220Sstevel@tonic-gate 		break;
15230Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE3:
15247387SRobert.Gordon@Sun.COM 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE3\n");
15250Sstevel@tonic-gate 		break;
15260Sstevel@tonic-gate 	case IBT_ASYNC_OPAQUE4:
15277387SRobert.Gordon@Sun.COM 		cmn_err(CE_NOTE, "rib_async_handler(): IBT_ASYNC_OPAQUE4\n");
15280Sstevel@tonic-gate 		break;
15290Sstevel@tonic-gate #endif
15300Sstevel@tonic-gate 	default:
15310Sstevel@tonic-gate 		break;
15320Sstevel@tonic-gate 	}
15330Sstevel@tonic-gate }
15340Sstevel@tonic-gate 
15350Sstevel@tonic-gate /*
15360Sstevel@tonic-gate  * Client's reachable function.
15370Sstevel@tonic-gate  */
15380Sstevel@tonic-gate static rdma_stat
rib_reachable(int addr_type,struct netbuf * raddr,void ** handle)15390Sstevel@tonic-gate rib_reachable(int addr_type, struct netbuf *raddr, void **handle)
15400Sstevel@tonic-gate {
15410Sstevel@tonic-gate 	rdma_stat	status;
15429146SSiddheshwar.Mahesh@Sun.COM 	rpcib_ping_t	rpt;
15439803SSiddheshwar.Mahesh@Sun.COM 	struct netbuf	saddr;
15449803SSiddheshwar.Mahesh@Sun.COM 	CONN		*conn;
15459803SSiddheshwar.Mahesh@Sun.COM 
15469803SSiddheshwar.Mahesh@Sun.COM 	bzero(&saddr, sizeof (struct netbuf));
15479803SSiddheshwar.Mahesh@Sun.COM 	status = rib_connect(&saddr, raddr, addr_type, &rpt, &conn);
15480Sstevel@tonic-gate 
15490Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
15509146SSiddheshwar.Mahesh@Sun.COM 		*handle = (void *)rpt.hca;
15519803SSiddheshwar.Mahesh@Sun.COM 		/* release the reference */
15529803SSiddheshwar.Mahesh@Sun.COM 		(void) rib_conn_release(conn);
15537387SRobert.Gordon@Sun.COM 		return (RDMA_SUCCESS);
15540Sstevel@tonic-gate 	} else {
15550Sstevel@tonic-gate 		*handle = NULL;
15567387SRobert.Gordon@Sun.COM 		DTRACE_PROBE(rpcib__i__pingfailed);
15570Sstevel@tonic-gate 		return (RDMA_FAILED);
15580Sstevel@tonic-gate 	}
15590Sstevel@tonic-gate }
15600Sstevel@tonic-gate 
15610Sstevel@tonic-gate /* Client side qp creation */
15620Sstevel@tonic-gate static rdma_stat
rib_clnt_create_chan(rib_hca_t * hca,struct netbuf * raddr,rib_qp_t ** qp)15630Sstevel@tonic-gate rib_clnt_create_chan(rib_hca_t *hca, struct netbuf *raddr, rib_qp_t **qp)
15640Sstevel@tonic-gate {
15650Sstevel@tonic-gate 	rib_qp_t	*kqp = NULL;
15660Sstevel@tonic-gate 	CONN		*conn;
15677387SRobert.Gordon@Sun.COM 	rdma_clnt_cred_ctrl_t *cc_info;
15680Sstevel@tonic-gate 
15690Sstevel@tonic-gate 	ASSERT(qp != NULL);
15700Sstevel@tonic-gate 	*qp = NULL;
15710Sstevel@tonic-gate 
15720Sstevel@tonic-gate 	kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP);
15730Sstevel@tonic-gate 	conn = qptoc(kqp);
15740Sstevel@tonic-gate 	kqp->hca = hca;
15750Sstevel@tonic-gate 	kqp->rdmaconn.c_rdmamod = &rib_mod;
15760Sstevel@tonic-gate 	kqp->rdmaconn.c_private = (caddr_t)kqp;
15770Sstevel@tonic-gate 
15780Sstevel@tonic-gate 	kqp->mode = RIB_CLIENT;
15790Sstevel@tonic-gate 	kqp->chan_flags = IBT_BLOCKING;
15800Sstevel@tonic-gate 	conn->c_raddr.buf = kmem_alloc(raddr->len, KM_SLEEP);
15810Sstevel@tonic-gate 	bcopy(raddr->buf, conn->c_raddr.buf, raddr->len);
15820Sstevel@tonic-gate 	conn->c_raddr.len = conn->c_raddr.maxlen = raddr->len;
15830Sstevel@tonic-gate 	/*
15840Sstevel@tonic-gate 	 * Initialize
15850Sstevel@tonic-gate 	 */
15860Sstevel@tonic-gate 	cv_init(&kqp->cb_conn_cv, NULL, CV_DEFAULT, NULL);
15870Sstevel@tonic-gate 	cv_init(&kqp->posted_rbufs_cv, NULL, CV_DEFAULT, NULL);
15880Sstevel@tonic-gate 	mutex_init(&kqp->posted_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock);
15899723SSiddheshwar.Mahesh@Sun.COM 	cv_init(&kqp->send_rbufs_cv, NULL, CV_DEFAULT, NULL);
15909723SSiddheshwar.Mahesh@Sun.COM 	mutex_init(&kqp->send_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock);
15910Sstevel@tonic-gate 	mutex_init(&kqp->replylist_lock, NULL, MUTEX_DRIVER, hca->iblock);
15920Sstevel@tonic-gate 	mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
15930Sstevel@tonic-gate 	mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
15940Sstevel@tonic-gate 	cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL);
15950Sstevel@tonic-gate 	mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock);
15967387SRobert.Gordon@Sun.COM 	/*
15977387SRobert.Gordon@Sun.COM 	 * Initialize the client credit control
15987387SRobert.Gordon@Sun.COM 	 * portion of the rdmaconn struct.
15997387SRobert.Gordon@Sun.COM 	 */
16007387SRobert.Gordon@Sun.COM 	kqp->rdmaconn.c_cc_type = RDMA_CC_CLNT;
16017387SRobert.Gordon@Sun.COM 	cc_info = &kqp->rdmaconn.rdma_conn_cred_ctrl_u.c_clnt_cc;
16027387SRobert.Gordon@Sun.COM 	cc_info->clnt_cc_granted_ops = 0;
16037387SRobert.Gordon@Sun.COM 	cc_info->clnt_cc_in_flight_ops = 0;
16047387SRobert.Gordon@Sun.COM 	cv_init(&cc_info->clnt_cc_cv, NULL, CV_DEFAULT, NULL);
16050Sstevel@tonic-gate 
16060Sstevel@tonic-gate 	*qp = kqp;
16070Sstevel@tonic-gate 	return (RDMA_SUCCESS);
16080Sstevel@tonic-gate }
16090Sstevel@tonic-gate 
16100Sstevel@tonic-gate /* Server side qp creation */
16110Sstevel@tonic-gate static rdma_stat
rib_svc_create_chan(rib_hca_t * hca,caddr_t q,uint8_t port,rib_qp_t ** qp)16120Sstevel@tonic-gate rib_svc_create_chan(rib_hca_t *hca, caddr_t q, uint8_t port, rib_qp_t **qp)
16130Sstevel@tonic-gate {
16140Sstevel@tonic-gate 	rib_qp_t	*kqp = NULL;
16150Sstevel@tonic-gate 	ibt_chan_sizes_t	chan_sizes;
16160Sstevel@tonic-gate 	ibt_rc_chan_alloc_args_t	qp_attr;
16170Sstevel@tonic-gate 	ibt_status_t		ibt_status;
16187387SRobert.Gordon@Sun.COM 	rdma_srv_cred_ctrl_t *cc_info;
16197387SRobert.Gordon@Sun.COM 
16200Sstevel@tonic-gate 	*qp = NULL;
16210Sstevel@tonic-gate 
16220Sstevel@tonic-gate 	kqp = kmem_zalloc(sizeof (rib_qp_t), KM_SLEEP);
16230Sstevel@tonic-gate 	kqp->hca = hca;
16240Sstevel@tonic-gate 	kqp->port_num = port;
16250Sstevel@tonic-gate 	kqp->rdmaconn.c_rdmamod = &rib_mod;
16260Sstevel@tonic-gate 	kqp->rdmaconn.c_private = (caddr_t)kqp;
16270Sstevel@tonic-gate 
16280Sstevel@tonic-gate 	/*
16290Sstevel@tonic-gate 	 * Create the qp handle
16300Sstevel@tonic-gate 	 */
16310Sstevel@tonic-gate 	bzero(&qp_attr, sizeof (ibt_rc_chan_alloc_args_t));
16320Sstevel@tonic-gate 	qp_attr.rc_scq = hca->svc_scq->rib_cq_hdl;
16330Sstevel@tonic-gate 	qp_attr.rc_rcq = hca->svc_rcq->rib_cq_hdl;
16340Sstevel@tonic-gate 	qp_attr.rc_pd = hca->pd_hdl;
16350Sstevel@tonic-gate 	qp_attr.rc_hca_port_num = port;
16360Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq_sgl = DSEG_MAX;
16370Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq_sgl = RQ_DSEG_MAX;
16380Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq = DEF_SQ_SIZE;
16390Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq = DEF_RQ_SIZE;
16400Sstevel@tonic-gate 	qp_attr.rc_clone_chan = NULL;
16410Sstevel@tonic-gate 	qp_attr.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
16420Sstevel@tonic-gate 	qp_attr.rc_flags = IBT_WR_SIGNALED;
16430Sstevel@tonic-gate 
16440Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
16450Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
16460Sstevel@tonic-gate 		ibt_status = ibt_alloc_rc_channel(hca->hca_hdl,
16477387SRobert.Gordon@Sun.COM 		    IBT_ACHAN_NO_FLAGS, &qp_attr, &kqp->qp_hdl,
16487387SRobert.Gordon@Sun.COM 		    &chan_sizes);
16490Sstevel@tonic-gate 	} else {
16500Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
16510Sstevel@tonic-gate 		goto fail;
16520Sstevel@tonic-gate 	}
16530Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
16540Sstevel@tonic-gate 
16550Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
16567387SRobert.Gordon@Sun.COM 		DTRACE_PROBE1(rpcib__i_svccreatechanfail,
16577387SRobert.Gordon@Sun.COM 		    int, ibt_status);
16580Sstevel@tonic-gate 		goto fail;
16590Sstevel@tonic-gate 	}
16600Sstevel@tonic-gate 
16610Sstevel@tonic-gate 	kqp->mode = RIB_SERVER;
16620Sstevel@tonic-gate 	kqp->chan_flags = IBT_BLOCKING;
16630Sstevel@tonic-gate 	kqp->q = q;	/* server ONLY */
16640Sstevel@tonic-gate 
16650Sstevel@tonic-gate 	cv_init(&kqp->cb_conn_cv, NULL, CV_DEFAULT, NULL);
16660Sstevel@tonic-gate 	cv_init(&kqp->posted_rbufs_cv, NULL, CV_DEFAULT, NULL);
16670Sstevel@tonic-gate 	mutex_init(&kqp->replylist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
16680Sstevel@tonic-gate 	mutex_init(&kqp->posted_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock);
16699723SSiddheshwar.Mahesh@Sun.COM 	cv_init(&kqp->send_rbufs_cv, NULL, CV_DEFAULT, NULL);
16709723SSiddheshwar.Mahesh@Sun.COM 	mutex_init(&kqp->send_rbufs_lock, NULL, MUTEX_DRIVER, hca->iblock);
16710Sstevel@tonic-gate 	mutex_init(&kqp->rdlist_lock, NULL, MUTEX_DEFAULT, hca->iblock);
16720Sstevel@tonic-gate 	mutex_init(&kqp->cb_lock, NULL, MUTEX_DRIVER, hca->iblock);
16730Sstevel@tonic-gate 	cv_init(&kqp->rdmaconn.c_cv, NULL, CV_DEFAULT, NULL);
16740Sstevel@tonic-gate 	mutex_init(&kqp->rdmaconn.c_lock, NULL, MUTEX_DRIVER, hca->iblock);
16750Sstevel@tonic-gate 	/*
16760Sstevel@tonic-gate 	 * Set the private data area to qp to be used in callbacks
16770Sstevel@tonic-gate 	 */
16780Sstevel@tonic-gate 	ibt_set_chan_private(kqp->qp_hdl, (void *)kqp);
16790Sstevel@tonic-gate 	kqp->rdmaconn.c_state = C_CONNECTED;
16807387SRobert.Gordon@Sun.COM 
16817387SRobert.Gordon@Sun.COM 	/*
16827387SRobert.Gordon@Sun.COM 	 * Initialize the server credit control
16837387SRobert.Gordon@Sun.COM 	 * portion of the rdmaconn struct.
16847387SRobert.Gordon@Sun.COM 	 */
16857387SRobert.Gordon@Sun.COM 	kqp->rdmaconn.c_cc_type = RDMA_CC_SRV;
16867387SRobert.Gordon@Sun.COM 	cc_info = &kqp->rdmaconn.rdma_conn_cred_ctrl_u.c_srv_cc;
16877387SRobert.Gordon@Sun.COM 	cc_info->srv_cc_buffers_granted = preposted_rbufs;
16887387SRobert.Gordon@Sun.COM 	cc_info->srv_cc_cur_buffers_used = 0;
16897387SRobert.Gordon@Sun.COM 	cc_info->srv_cc_posted = preposted_rbufs;
16907387SRobert.Gordon@Sun.COM 
16910Sstevel@tonic-gate 	*qp = kqp;
16927387SRobert.Gordon@Sun.COM 
16930Sstevel@tonic-gate 	return (RDMA_SUCCESS);
16940Sstevel@tonic-gate fail:
16950Sstevel@tonic-gate 	if (kqp)
16960Sstevel@tonic-gate 		kmem_free(kqp, sizeof (rib_qp_t));
16970Sstevel@tonic-gate 
16980Sstevel@tonic-gate 	return (RDMA_FAILED);
16990Sstevel@tonic-gate }
17000Sstevel@tonic-gate 
17010Sstevel@tonic-gate /* ARGSUSED */
17020Sstevel@tonic-gate ibt_cm_status_t
rib_clnt_cm_handler(void * clnt_hdl,ibt_cm_event_t * event,ibt_cm_return_args_t * ret_args,void * priv_data,ibt_priv_data_len_t len)17030Sstevel@tonic-gate rib_clnt_cm_handler(void *clnt_hdl, ibt_cm_event_t *event,
17040Sstevel@tonic-gate     ibt_cm_return_args_t *ret_args, void *priv_data,
17050Sstevel@tonic-gate     ibt_priv_data_len_t len)
17060Sstevel@tonic-gate {
17070Sstevel@tonic-gate 	rib_hca_t	*hca;
17080Sstevel@tonic-gate 
17099733SFaramarz.Jalalian@Sun.COM 	hca = (rib_hca_t *)clnt_hdl;
17100Sstevel@tonic-gate 
17110Sstevel@tonic-gate 	switch (event->cm_type) {
17120Sstevel@tonic-gate 
17130Sstevel@tonic-gate 	/* got a connection close event */
17140Sstevel@tonic-gate 	case IBT_CM_EVENT_CONN_CLOSED:
17150Sstevel@tonic-gate 	{
17160Sstevel@tonic-gate 		CONN	*conn;
17170Sstevel@tonic-gate 		rib_qp_t *qp;
17180Sstevel@tonic-gate 
17190Sstevel@tonic-gate 		/* check reason why connection was closed */
17200Sstevel@tonic-gate 		switch (event->cm_event.closed) {
17210Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREP_RCVD:
17220Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREQ_TIMEOUT:
17230Sstevel@tonic-gate 		case IBT_CM_CLOSED_DUP:
17240Sstevel@tonic-gate 		case IBT_CM_CLOSED_ABORT:
17250Sstevel@tonic-gate 		case IBT_CM_CLOSED_ALREADY:
17260Sstevel@tonic-gate 			/*
17270Sstevel@tonic-gate 			 * These cases indicate the local end initiated
17280Sstevel@tonic-gate 			 * the closing of the channel. Nothing to do here.
17290Sstevel@tonic-gate 			 */
17300Sstevel@tonic-gate 			break;
17310Sstevel@tonic-gate 		default:
17320Sstevel@tonic-gate 			/*
17330Sstevel@tonic-gate 			 * Reason for CONN_CLOSED event must be one of
17340Sstevel@tonic-gate 			 * IBT_CM_CLOSED_DREQ_RCVD or IBT_CM_CLOSED_REJ_RCVD
17350Sstevel@tonic-gate 			 * or IBT_CM_CLOSED_STALE. These indicate cases were
17360Sstevel@tonic-gate 			 * the remote end is closing the channel. In these
17370Sstevel@tonic-gate 			 * cases free the channel and transition to error
17380Sstevel@tonic-gate 			 * state
17390Sstevel@tonic-gate 			 */
17400Sstevel@tonic-gate 			qp = ibt_get_chan_private(event->cm_channel);
17410Sstevel@tonic-gate 			conn = qptoc(qp);
17420Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
17430Sstevel@tonic-gate 			if (conn->c_state == C_DISCONN_PEND) {
17440Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
17450Sstevel@tonic-gate 				break;
17460Sstevel@tonic-gate 			}
17470Sstevel@tonic-gate 
17487387SRobert.Gordon@Sun.COM 			conn->c_state = C_ERROR_CONN;
17490Sstevel@tonic-gate 
17500Sstevel@tonic-gate 			/*
17510Sstevel@tonic-gate 			 * Free the conn if c_ref is down to 0 already
17520Sstevel@tonic-gate 			 */
17530Sstevel@tonic-gate 			if (conn->c_ref == 0) {
17540Sstevel@tonic-gate 				/*
17550Sstevel@tonic-gate 				 * Remove from list and free conn
17560Sstevel@tonic-gate 				 */
17570Sstevel@tonic-gate 				conn->c_state = C_DISCONN_PEND;
17580Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
17599733SFaramarz.Jalalian@Sun.COM 				rw_enter(&hca->state_lock, RW_READER);
17609733SFaramarz.Jalalian@Sun.COM 				if (hca->state != HCA_DETACHED)
17619733SFaramarz.Jalalian@Sun.COM 					(void) rib_disconnect_channel(conn,
17629733SFaramarz.Jalalian@Sun.COM 					    &hca->cl_conn_list);
17639733SFaramarz.Jalalian@Sun.COM 				rw_exit(&hca->state_lock);
17640Sstevel@tonic-gate 			} else {
17659723SSiddheshwar.Mahesh@Sun.COM 				/*
17669723SSiddheshwar.Mahesh@Sun.COM 				 * conn will be freed when c_ref goes to 0.
17679723SSiddheshwar.Mahesh@Sun.COM 				 * Indicate to cleaning thread not to close
17689723SSiddheshwar.Mahesh@Sun.COM 				 * the connection, but just free the channel.
17699723SSiddheshwar.Mahesh@Sun.COM 				 */
17709723SSiddheshwar.Mahesh@Sun.COM 				conn->c_flags |= C_CLOSE_NOTNEEDED;
17710Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
17720Sstevel@tonic-gate 			}
17730Sstevel@tonic-gate #ifdef DEBUG
17740Sstevel@tonic-gate 			if (rib_debug)
17750Sstevel@tonic-gate 				cmn_err(CE_NOTE, "rib_clnt_cm_handler: "
17767387SRobert.Gordon@Sun.COM 				    "(CONN_CLOSED) channel disconnected");
17770Sstevel@tonic-gate #endif
17780Sstevel@tonic-gate 			break;
17790Sstevel@tonic-gate 		}
17800Sstevel@tonic-gate 		break;
17810Sstevel@tonic-gate 	}
17820Sstevel@tonic-gate 	default:
17830Sstevel@tonic-gate 		break;
17840Sstevel@tonic-gate 	}
17850Sstevel@tonic-gate 	return (IBT_CM_ACCEPT);
17860Sstevel@tonic-gate }
17870Sstevel@tonic-gate 
17880Sstevel@tonic-gate /*
17890Sstevel@tonic-gate  * Connect to the server.
17900Sstevel@tonic-gate  */
17910Sstevel@tonic-gate rdma_stat
rib_conn_to_srv(rib_hca_t * hca,rib_qp_t * qp,rpcib_ping_t * rptp)17929146SSiddheshwar.Mahesh@Sun.COM rib_conn_to_srv(rib_hca_t *hca, rib_qp_t *qp, rpcib_ping_t *rptp)
17930Sstevel@tonic-gate {
17940Sstevel@tonic-gate 	ibt_chan_open_args_t	chan_args;	/* channel args */
17950Sstevel@tonic-gate 	ibt_chan_sizes_t	chan_sizes;
17960Sstevel@tonic-gate 	ibt_rc_chan_alloc_args_t	qp_attr;
17970Sstevel@tonic-gate 	ibt_status_t		ibt_status;
17980Sstevel@tonic-gate 	ibt_rc_returns_t	ret_args;   	/* conn reject info */
17990Sstevel@tonic-gate 	int refresh = REFRESH_ATTEMPTS;	/* refresh if IBT_CM_CONN_STALE */
18007387SRobert.Gordon@Sun.COM 	ibt_ip_cm_info_t	ipcm_info;
18017387SRobert.Gordon@Sun.COM 	uint8_t cmp_ip_pvt[IBT_IP_HDR_PRIV_DATA_SZ];
18027387SRobert.Gordon@Sun.COM 
18030Sstevel@tonic-gate 
18040Sstevel@tonic-gate 	(void) bzero(&chan_args, sizeof (chan_args));
18050Sstevel@tonic-gate 	(void) bzero(&qp_attr, sizeof (ibt_rc_chan_alloc_args_t));
18067387SRobert.Gordon@Sun.COM 	(void) bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t));
18077387SRobert.Gordon@Sun.COM 
18089146SSiddheshwar.Mahesh@Sun.COM 	ipcm_info.src_addr.family = rptp->srcip.family;
18099146SSiddheshwar.Mahesh@Sun.COM 	switch (ipcm_info.src_addr.family) {
18107387SRobert.Gordon@Sun.COM 	case AF_INET:
18119146SSiddheshwar.Mahesh@Sun.COM 		ipcm_info.src_addr.un.ip4addr = rptp->srcip.un.ip4addr;
18127387SRobert.Gordon@Sun.COM 		break;
18137387SRobert.Gordon@Sun.COM 	case AF_INET6:
18149146SSiddheshwar.Mahesh@Sun.COM 		ipcm_info.src_addr.un.ip6addr = rptp->srcip.un.ip6addr;
18157387SRobert.Gordon@Sun.COM 		break;
18167387SRobert.Gordon@Sun.COM 	}
18177387SRobert.Gordon@Sun.COM 
18189146SSiddheshwar.Mahesh@Sun.COM 	ipcm_info.dst_addr.family = rptp->srcip.family;
18199146SSiddheshwar.Mahesh@Sun.COM 	switch (ipcm_info.dst_addr.family) {
18207387SRobert.Gordon@Sun.COM 	case AF_INET:
18219146SSiddheshwar.Mahesh@Sun.COM 		ipcm_info.dst_addr.un.ip4addr = rptp->dstip.un.ip4addr;
18227387SRobert.Gordon@Sun.COM 		break;
18237387SRobert.Gordon@Sun.COM 	case AF_INET6:
18249146SSiddheshwar.Mahesh@Sun.COM 		ipcm_info.dst_addr.un.ip6addr = rptp->dstip.un.ip6addr;
18257387SRobert.Gordon@Sun.COM 		break;
18267387SRobert.Gordon@Sun.COM 	}
18277387SRobert.Gordon@Sun.COM 
18289348SSiddheshwar.Mahesh@Sun.COM 	ipcm_info.src_port = (in_port_t)nfs_rdma_port;
18297387SRobert.Gordon@Sun.COM 
18307387SRobert.Gordon@Sun.COM 	ibt_status = ibt_format_ip_private_data(&ipcm_info,
18317387SRobert.Gordon@Sun.COM 	    IBT_IP_HDR_PRIV_DATA_SZ, cmp_ip_pvt);
18327387SRobert.Gordon@Sun.COM 
18337387SRobert.Gordon@Sun.COM 	if (ibt_status != IBT_SUCCESS) {
18347387SRobert.Gordon@Sun.COM 		cmn_err(CE_WARN, "ibt_format_ip_private_data failed\n");
18357387SRobert.Gordon@Sun.COM 		return (-1);
18367387SRobert.Gordon@Sun.COM 	}
18370Sstevel@tonic-gate 
18389146SSiddheshwar.Mahesh@Sun.COM 	qp_attr.rc_hca_port_num = rptp->path.pi_prim_cep_path.cep_hca_port_num;
18390Sstevel@tonic-gate 	/* Alloc a RC channel */
18400Sstevel@tonic-gate 	qp_attr.rc_scq = hca->clnt_scq->rib_cq_hdl;
18410Sstevel@tonic-gate 	qp_attr.rc_rcq = hca->clnt_rcq->rib_cq_hdl;
18420Sstevel@tonic-gate 	qp_attr.rc_pd = hca->pd_hdl;
18430Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq_sgl = DSEG_MAX;
18440Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq_sgl = RQ_DSEG_MAX;
18450Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_sq = DEF_SQ_SIZE;
18460Sstevel@tonic-gate 	qp_attr.rc_sizes.cs_rq = DEF_RQ_SIZE;
18470Sstevel@tonic-gate 	qp_attr.rc_clone_chan = NULL;
18480Sstevel@tonic-gate 	qp_attr.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
18490Sstevel@tonic-gate 	qp_attr.rc_flags = IBT_WR_SIGNALED;
18500Sstevel@tonic-gate 
18519348SSiddheshwar.Mahesh@Sun.COM 	rptp->path.pi_sid = ibt_get_ip_sid(IPPROTO_TCP, nfs_rdma_port);
18529146SSiddheshwar.Mahesh@Sun.COM 	chan_args.oc_path = &rptp->path;
18539348SSiddheshwar.Mahesh@Sun.COM 
18540Sstevel@tonic-gate 	chan_args.oc_cm_handler = rib_clnt_cm_handler;
18559733SFaramarz.Jalalian@Sun.COM 	chan_args.oc_cm_clnt_private = (void *)hca;
18567387SRobert.Gordon@Sun.COM 	chan_args.oc_rdma_ra_out = 4;
18577387SRobert.Gordon@Sun.COM 	chan_args.oc_rdma_ra_in = 4;
18580Sstevel@tonic-gate 	chan_args.oc_path_retry_cnt = 2;
18590Sstevel@tonic-gate 	chan_args.oc_path_rnr_retry_cnt = RNR_RETRIES;
18607387SRobert.Gordon@Sun.COM 	chan_args.oc_priv_data = cmp_ip_pvt;
18617387SRobert.Gordon@Sun.COM 	chan_args.oc_priv_data_len = IBT_IP_HDR_PRIV_DATA_SZ;
18620Sstevel@tonic-gate 
18630Sstevel@tonic-gate refresh:
18640Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
18650Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
18660Sstevel@tonic-gate 		ibt_status = ibt_alloc_rc_channel(hca->hca_hdl,
18677387SRobert.Gordon@Sun.COM 		    IBT_ACHAN_NO_FLAGS,
18687387SRobert.Gordon@Sun.COM 		    &qp_attr, &qp->qp_hdl,
18697387SRobert.Gordon@Sun.COM 		    &chan_sizes);
18700Sstevel@tonic-gate 	} else {
18710Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
18720Sstevel@tonic-gate 		return (RDMA_FAILED);
18730Sstevel@tonic-gate 	}
18740Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
18750Sstevel@tonic-gate 
18760Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
18777387SRobert.Gordon@Sun.COM 		DTRACE_PROBE1(rpcib__i_conntosrv,
18787387SRobert.Gordon@Sun.COM 		    int, ibt_status);
18790Sstevel@tonic-gate 		return (RDMA_FAILED);
18800Sstevel@tonic-gate 	}
18810Sstevel@tonic-gate 
18820Sstevel@tonic-gate 	/* Connect to the Server */
18830Sstevel@tonic-gate 	(void) bzero(&ret_args, sizeof (ret_args));
18840Sstevel@tonic-gate 	mutex_enter(&qp->cb_lock);
18850Sstevel@tonic-gate 	ibt_status = ibt_open_rc_channel(qp->qp_hdl, IBT_OCHAN_NO_FLAGS,
18867387SRobert.Gordon@Sun.COM 	    IBT_BLOCKING, &chan_args, &ret_args);
18870Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
18887387SRobert.Gordon@Sun.COM 		DTRACE_PROBE2(rpcib__i_openrctosrv,
18897387SRobert.Gordon@Sun.COM 		    int, ibt_status, int, ret_args.rc_status);
18907387SRobert.Gordon@Sun.COM 
18910Sstevel@tonic-gate 		(void) ibt_free_channel(qp->qp_hdl);
18920Sstevel@tonic-gate 		qp->qp_hdl = NULL;
18930Sstevel@tonic-gate 		mutex_exit(&qp->cb_lock);
18940Sstevel@tonic-gate 		if (refresh-- && ibt_status == IBT_CM_FAILURE &&
18957387SRobert.Gordon@Sun.COM 		    ret_args.rc_status == IBT_CM_CONN_STALE) {
18960Sstevel@tonic-gate 			/*
18970Sstevel@tonic-gate 			 * Got IBT_CM_CONN_STALE probably because of stale
18980Sstevel@tonic-gate 			 * data on the passive end of a channel that existed
18990Sstevel@tonic-gate 			 * prior to reboot. Retry establishing a channel
19000Sstevel@tonic-gate 			 * REFRESH_ATTEMPTS times, during which time the
19010Sstevel@tonic-gate 			 * stale conditions on the server might clear up.
19020Sstevel@tonic-gate 			 */
19030Sstevel@tonic-gate 			goto refresh;
19040Sstevel@tonic-gate 		}
19050Sstevel@tonic-gate 		return (RDMA_FAILED);
19060Sstevel@tonic-gate 	}
19070Sstevel@tonic-gate 	mutex_exit(&qp->cb_lock);
19080Sstevel@tonic-gate 	/*
19090Sstevel@tonic-gate 	 * Set the private data area to qp to be used in callbacks
19100Sstevel@tonic-gate 	 */
19110Sstevel@tonic-gate 	ibt_set_chan_private(qp->qp_hdl, (void *)qp);
19120Sstevel@tonic-gate 	return (RDMA_SUCCESS);
19130Sstevel@tonic-gate }
19140Sstevel@tonic-gate 
19150Sstevel@tonic-gate rdma_stat
rib_ping_srv(int addr_type,struct netbuf * raddr,rpcib_ping_t * rptp)19169146SSiddheshwar.Mahesh@Sun.COM rib_ping_srv(int addr_type, struct netbuf *raddr, rpcib_ping_t *rptp)
19170Sstevel@tonic-gate {
19189733SFaramarz.Jalalian@Sun.COM 	uint_t			i, addr_count;
19190Sstevel@tonic-gate 	ibt_status_t		ibt_status;
19207387SRobert.Gordon@Sun.COM 	uint8_t			num_paths_p;
19217387SRobert.Gordon@Sun.COM 	ibt_ip_path_attr_t	ipattr;
19227387SRobert.Gordon@Sun.COM 	ibt_path_ip_src_t	srcip;
19238485SPeter.Memishian@Sun.COM 	rpcib_ipaddrs_t		addrs4;
19248485SPeter.Memishian@Sun.COM 	rpcib_ipaddrs_t		addrs6;
19258485SPeter.Memishian@Sun.COM 	struct sockaddr_in	*sinp;
19268485SPeter.Memishian@Sun.COM 	struct sockaddr_in6	*sin6p;
19279733SFaramarz.Jalalian@Sun.COM 	rdma_stat		retval = RDMA_FAILED;
19289733SFaramarz.Jalalian@Sun.COM 	rib_hca_t *hca;
19299733SFaramarz.Jalalian@Sun.COM 
19309733SFaramarz.Jalalian@Sun.COM 	if ((addr_type != AF_INET) && (addr_type != AF_INET6))
19319733SFaramarz.Jalalian@Sun.COM 		return (RDMA_INVAL);
19320Sstevel@tonic-gate 	ASSERT(raddr->buf != NULL);
19330Sstevel@tonic-gate 
19347387SRobert.Gordon@Sun.COM 	bzero(&ipattr, sizeof (ibt_ip_path_attr_t));
19357387SRobert.Gordon@Sun.COM 
19368485SPeter.Memishian@Sun.COM 	if (!rpcib_get_ib_addresses(&addrs4, &addrs6) ||
19378485SPeter.Memishian@Sun.COM 	    (addrs4.ri_count == 0 && addrs6.ri_count == 0)) {
19388485SPeter.Memishian@Sun.COM 		retval = RDMA_FAILED;
19399733SFaramarz.Jalalian@Sun.COM 		goto done2;
19407387SRobert.Gordon@Sun.COM 	}
19417387SRobert.Gordon@Sun.COM 
19429733SFaramarz.Jalalian@Sun.COM 	if (addr_type == AF_INET) {
19439733SFaramarz.Jalalian@Sun.COM 		addr_count = addrs4.ri_count;
19448485SPeter.Memishian@Sun.COM 		sinp = (struct sockaddr_in *)raddr->buf;
19459146SSiddheshwar.Mahesh@Sun.COM 		rptp->dstip.family = AF_INET;
19469146SSiddheshwar.Mahesh@Sun.COM 		rptp->dstip.un.ip4addr = sinp->sin_addr.s_addr;
19478485SPeter.Memishian@Sun.COM 		sinp = addrs4.ri_list;
19489733SFaramarz.Jalalian@Sun.COM 	} else {
19499733SFaramarz.Jalalian@Sun.COM 		addr_count = addrs6.ri_count;
19509733SFaramarz.Jalalian@Sun.COM 		sin6p = (struct sockaddr_in6 *)raddr->buf;
19519733SFaramarz.Jalalian@Sun.COM 		rptp->dstip.family = AF_INET6;
19529733SFaramarz.Jalalian@Sun.COM 		rptp->dstip.un.ip6addr = sin6p->sin6_addr;
19539733SFaramarz.Jalalian@Sun.COM 		sin6p = addrs6.ri_list;
19549733SFaramarz.Jalalian@Sun.COM 	}
19559733SFaramarz.Jalalian@Sun.COM 
19569733SFaramarz.Jalalian@Sun.COM 	rw_enter(&rib_stat->hcas_list_lock, RW_READER);
19579733SFaramarz.Jalalian@Sun.COM 	for (hca = rib_stat->hcas_list; hca; hca = hca->next) {
19589733SFaramarz.Jalalian@Sun.COM 		rw_enter(&hca->state_lock, RW_READER);
19599733SFaramarz.Jalalian@Sun.COM 		if (hca->state == HCA_DETACHED) {
19609733SFaramarz.Jalalian@Sun.COM 			rw_exit(&hca->state_lock);
19619733SFaramarz.Jalalian@Sun.COM 			continue;
19629733SFaramarz.Jalalian@Sun.COM 		}
19638485SPeter.Memishian@Sun.COM 
19649146SSiddheshwar.Mahesh@Sun.COM 		ipattr.ipa_dst_ip 	= &rptp->dstip;
19659733SFaramarz.Jalalian@Sun.COM 		ipattr.ipa_hca_guid	= hca->hca_guid;
19669146SSiddheshwar.Mahesh@Sun.COM 		ipattr.ipa_ndst		= 1;
19679146SSiddheshwar.Mahesh@Sun.COM 		ipattr.ipa_max_paths	= 1;
19689146SSiddheshwar.Mahesh@Sun.COM 		ipattr.ipa_src_ip.family = rptp->dstip.family;
19699733SFaramarz.Jalalian@Sun.COM 		for (i = 0; i < addr_count; i++) {
19707387SRobert.Gordon@Sun.COM 			num_paths_p = 0;
19719733SFaramarz.Jalalian@Sun.COM 			if (addr_type == AF_INET) {
19729733SFaramarz.Jalalian@Sun.COM 				ipattr.ipa_src_ip.un.ip4addr =
19739733SFaramarz.Jalalian@Sun.COM 				    sinp[i].sin_addr.s_addr;
19749733SFaramarz.Jalalian@Sun.COM 			} else {
19759733SFaramarz.Jalalian@Sun.COM 				ipattr.ipa_src_ip.un.ip6addr =
19769733SFaramarz.Jalalian@Sun.COM 				    sin6p[i].sin6_addr;
19779733SFaramarz.Jalalian@Sun.COM 			}
19789146SSiddheshwar.Mahesh@Sun.COM 			bzero(&srcip, sizeof (ibt_path_ip_src_t));
19797387SRobert.Gordon@Sun.COM 
19807387SRobert.Gordon@Sun.COM 			ibt_status = ibt_get_ip_paths(rib_stat->ibt_clnt_hdl,
19819146SSiddheshwar.Mahesh@Sun.COM 			    IBT_PATH_NO_FLAGS, &ipattr, &rptp->path,
19829146SSiddheshwar.Mahesh@Sun.COM 			    &num_paths_p, &srcip);
19837387SRobert.Gordon@Sun.COM 			if (ibt_status == IBT_SUCCESS &&
19847387SRobert.Gordon@Sun.COM 			    num_paths_p != 0 &&
19859733SFaramarz.Jalalian@Sun.COM 			    rptp->path.pi_hca_guid == hca->hca_guid) {
19869733SFaramarz.Jalalian@Sun.COM 				rptp->hca = hca;
19879733SFaramarz.Jalalian@Sun.COM 				rw_exit(&hca->state_lock);
19889733SFaramarz.Jalalian@Sun.COM 				if (addr_type == AF_INET) {
19899733SFaramarz.Jalalian@Sun.COM 					rptp->srcip.family = AF_INET;
19909733SFaramarz.Jalalian@Sun.COM 					rptp->srcip.un.ip4addr =
19919733SFaramarz.Jalalian@Sun.COM 					    srcip.ip_primary.un.ip4addr;
19929733SFaramarz.Jalalian@Sun.COM 				} else {
19939733SFaramarz.Jalalian@Sun.COM 					rptp->srcip.family = AF_INET6;
19949733SFaramarz.Jalalian@Sun.COM 					rptp->srcip.un.ip6addr =
19959733SFaramarz.Jalalian@Sun.COM 					    srcip.ip_primary.un.ip6addr;
19969733SFaramarz.Jalalian@Sun.COM 
19979733SFaramarz.Jalalian@Sun.COM 				}
19989733SFaramarz.Jalalian@Sun.COM 				retval = RDMA_SUCCESS;
19999733SFaramarz.Jalalian@Sun.COM 				goto done1;
20007387SRobert.Gordon@Sun.COM 			}
20017387SRobert.Gordon@Sun.COM 		}
20029733SFaramarz.Jalalian@Sun.COM 		rw_exit(&hca->state_lock);
20030Sstevel@tonic-gate 	}
20049733SFaramarz.Jalalian@Sun.COM done1:
20059733SFaramarz.Jalalian@Sun.COM 	rw_exit(&rib_stat->hcas_list_lock);
20069733SFaramarz.Jalalian@Sun.COM done2:
20078485SPeter.Memishian@Sun.COM 	if (addrs4.ri_size > 0)
20088485SPeter.Memishian@Sun.COM 		kmem_free(addrs4.ri_list, addrs4.ri_size);
20098485SPeter.Memishian@Sun.COM 	if (addrs6.ri_size > 0)
20108485SPeter.Memishian@Sun.COM 		kmem_free(addrs6.ri_list, addrs6.ri_size);
20118485SPeter.Memishian@Sun.COM 	return (retval);
20120Sstevel@tonic-gate }
20130Sstevel@tonic-gate 
20140Sstevel@tonic-gate /*
20150Sstevel@tonic-gate  * Close channel, remove from connection list and
20160Sstevel@tonic-gate  * free up resources allocated for that channel.
20170Sstevel@tonic-gate  */
20180Sstevel@tonic-gate rdma_stat
rib_disconnect_channel(CONN * conn,rib_conn_list_t * conn_list)20190Sstevel@tonic-gate rib_disconnect_channel(CONN *conn, rib_conn_list_t *conn_list)
20200Sstevel@tonic-gate {
20210Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
20220Sstevel@tonic-gate 	rib_hca_t	*hca;
20230Sstevel@tonic-gate 
20249723SSiddheshwar.Mahesh@Sun.COM 	mutex_enter(&conn->c_lock);
20259723SSiddheshwar.Mahesh@Sun.COM 	if (conn->c_timeout != NULL) {
20269723SSiddheshwar.Mahesh@Sun.COM 		mutex_exit(&conn->c_lock);
20279723SSiddheshwar.Mahesh@Sun.COM 		(void) untimeout(conn->c_timeout);
20289723SSiddheshwar.Mahesh@Sun.COM 		mutex_enter(&conn->c_lock);
20299723SSiddheshwar.Mahesh@Sun.COM 	}
20309723SSiddheshwar.Mahesh@Sun.COM 
20319723SSiddheshwar.Mahesh@Sun.COM 	while (conn->c_flags & C_CLOSE_PENDING) {
20329723SSiddheshwar.Mahesh@Sun.COM 		cv_wait(&conn->c_cv, &conn->c_lock);
20339723SSiddheshwar.Mahesh@Sun.COM 	}
20349723SSiddheshwar.Mahesh@Sun.COM 	mutex_exit(&conn->c_lock);
20359723SSiddheshwar.Mahesh@Sun.COM 
20360Sstevel@tonic-gate 	/*
20370Sstevel@tonic-gate 	 * c_ref == 0 and connection is in C_DISCONN_PEND
20380Sstevel@tonic-gate 	 */
20390Sstevel@tonic-gate 	hca = qp->hca;
20400Sstevel@tonic-gate 	if (conn_list != NULL)
20410Sstevel@tonic-gate 		(void) rib_rm_conn(conn, conn_list);
20427387SRobert.Gordon@Sun.COM 
20439723SSiddheshwar.Mahesh@Sun.COM 	/*
20449723SSiddheshwar.Mahesh@Sun.COM 	 * There is only one case where we get here with
20459723SSiddheshwar.Mahesh@Sun.COM 	 * qp_hdl = NULL, which is during connection setup on
20469723SSiddheshwar.Mahesh@Sun.COM 	 * the client. In such a case there are no posted
20479723SSiddheshwar.Mahesh@Sun.COM 	 * send/recv buffers.
20489723SSiddheshwar.Mahesh@Sun.COM 	 */
20490Sstevel@tonic-gate 	if (qp->qp_hdl != NULL) {
20500Sstevel@tonic-gate 		mutex_enter(&qp->posted_rbufs_lock);
20510Sstevel@tonic-gate 		while (qp->n_posted_rbufs)
20520Sstevel@tonic-gate 			cv_wait(&qp->posted_rbufs_cv, &qp->posted_rbufs_lock);
20530Sstevel@tonic-gate 		mutex_exit(&qp->posted_rbufs_lock);
20549723SSiddheshwar.Mahesh@Sun.COM 
20559723SSiddheshwar.Mahesh@Sun.COM 		mutex_enter(&qp->send_rbufs_lock);
20569723SSiddheshwar.Mahesh@Sun.COM 		while (qp->n_send_rbufs)
20579723SSiddheshwar.Mahesh@Sun.COM 			cv_wait(&qp->send_rbufs_cv, &qp->send_rbufs_lock);
205811530SSiddheshwar.Mahesh@Sun.COM 			mutex_exit(&qp->send_rbufs_lock);
20599723SSiddheshwar.Mahesh@Sun.COM 
20600Sstevel@tonic-gate 		(void) ibt_free_channel(qp->qp_hdl);
206111530SSiddheshwar.Mahesh@Sun.COM 			qp->qp_hdl = NULL;
20620Sstevel@tonic-gate 	}
20637387SRobert.Gordon@Sun.COM 
20640Sstevel@tonic-gate 	ASSERT(qp->rdlist == NULL);
20657387SRobert.Gordon@Sun.COM 
20660Sstevel@tonic-gate 	if (qp->replylist != NULL) {
20670Sstevel@tonic-gate 		(void) rib_rem_replylist(qp);
20680Sstevel@tonic-gate 	}
20690Sstevel@tonic-gate 
20700Sstevel@tonic-gate 	cv_destroy(&qp->cb_conn_cv);
20710Sstevel@tonic-gate 	cv_destroy(&qp->posted_rbufs_cv);
20729723SSiddheshwar.Mahesh@Sun.COM 	cv_destroy(&qp->send_rbufs_cv);
20730Sstevel@tonic-gate 	mutex_destroy(&qp->cb_lock);
20740Sstevel@tonic-gate 	mutex_destroy(&qp->replylist_lock);
20750Sstevel@tonic-gate 	mutex_destroy(&qp->posted_rbufs_lock);
20769723SSiddheshwar.Mahesh@Sun.COM 	mutex_destroy(&qp->send_rbufs_lock);
20770Sstevel@tonic-gate 	mutex_destroy(&qp->rdlist_lock);
20780Sstevel@tonic-gate 
20790Sstevel@tonic-gate 	cv_destroy(&conn->c_cv);
20800Sstevel@tonic-gate 	mutex_destroy(&conn->c_lock);
20810Sstevel@tonic-gate 
20820Sstevel@tonic-gate 	if (conn->c_raddr.buf != NULL) {
20830Sstevel@tonic-gate 		kmem_free(conn->c_raddr.buf, conn->c_raddr.len);
20840Sstevel@tonic-gate 	}
20850Sstevel@tonic-gate 	if (conn->c_laddr.buf != NULL) {
20860Sstevel@tonic-gate 		kmem_free(conn->c_laddr.buf, conn->c_laddr.len);
20870Sstevel@tonic-gate 	}
208810326SSiddheshwar.Mahesh@Sun.COM 	if (conn->c_netid != NULL) {
208910326SSiddheshwar.Mahesh@Sun.COM 		kmem_free(conn->c_netid, (strlen(conn->c_netid) + 1));
209010326SSiddheshwar.Mahesh@Sun.COM 	}
209112553SKaren.Rochford@Sun.COM 	if (conn->c_addrmask.buf != NULL) {
209212553SKaren.Rochford@Sun.COM 		kmem_free(conn->c_addrmask.buf, conn->c_addrmask.len);
209312553SKaren.Rochford@Sun.COM 	}
20947387SRobert.Gordon@Sun.COM 
20957387SRobert.Gordon@Sun.COM 	/*
20967387SRobert.Gordon@Sun.COM 	 * Credit control cleanup.
20977387SRobert.Gordon@Sun.COM 	 */
20987387SRobert.Gordon@Sun.COM 	if (qp->rdmaconn.c_cc_type == RDMA_CC_CLNT) {
20997387SRobert.Gordon@Sun.COM 		rdma_clnt_cred_ctrl_t *cc_info;
21007387SRobert.Gordon@Sun.COM 		cc_info = &qp->rdmaconn.rdma_conn_cred_ctrl_u.c_clnt_cc;
21017387SRobert.Gordon@Sun.COM 		cv_destroy(&cc_info->clnt_cc_cv);
21027387SRobert.Gordon@Sun.COM 	}
21037387SRobert.Gordon@Sun.COM 
21040Sstevel@tonic-gate 	kmem_free(qp, sizeof (rib_qp_t));
21050Sstevel@tonic-gate 
21060Sstevel@tonic-gate 	/*
21070Sstevel@tonic-gate 	 * If HCA has been DETACHED and the srv/clnt_conn_list is NULL,
21080Sstevel@tonic-gate 	 * then the hca is no longer being used.
21090Sstevel@tonic-gate 	 */
21100Sstevel@tonic-gate 	if (conn_list != NULL) {
21110Sstevel@tonic-gate 		rw_enter(&hca->state_lock, RW_READER);
21120Sstevel@tonic-gate 		if (hca->state == HCA_DETACHED) {
21130Sstevel@tonic-gate 			rw_enter(&hca->srv_conn_list.conn_lock, RW_READER);
21140Sstevel@tonic-gate 			if (hca->srv_conn_list.conn_hd == NULL) {
21150Sstevel@tonic-gate 				rw_enter(&hca->cl_conn_list.conn_lock,
21167387SRobert.Gordon@Sun.COM 				    RW_READER);
21177387SRobert.Gordon@Sun.COM 
21180Sstevel@tonic-gate 				if (hca->cl_conn_list.conn_hd == NULL) {
21190Sstevel@tonic-gate 					mutex_enter(&hca->inuse_lock);
21200Sstevel@tonic-gate 					hca->inuse = FALSE;
21210Sstevel@tonic-gate 					cv_signal(&hca->cb_cv);
21220Sstevel@tonic-gate 					mutex_exit(&hca->inuse_lock);
21230Sstevel@tonic-gate 				}
21240Sstevel@tonic-gate 				rw_exit(&hca->cl_conn_list.conn_lock);
21250Sstevel@tonic-gate 			}
21260Sstevel@tonic-gate 			rw_exit(&hca->srv_conn_list.conn_lock);
21270Sstevel@tonic-gate 		}
21280Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
21290Sstevel@tonic-gate 	}
21307387SRobert.Gordon@Sun.COM 
21310Sstevel@tonic-gate 	return (RDMA_SUCCESS);
21320Sstevel@tonic-gate }
21330Sstevel@tonic-gate 
21340Sstevel@tonic-gate /*
21359723SSiddheshwar.Mahesh@Sun.COM  * All sends are done under the protection of
21369723SSiddheshwar.Mahesh@Sun.COM  * the wdesc->sendwait_lock. n_send_rbufs count
21379723SSiddheshwar.Mahesh@Sun.COM  * is protected using the send_rbufs_lock.
21389723SSiddheshwar.Mahesh@Sun.COM  * lock ordering is:
21399723SSiddheshwar.Mahesh@Sun.COM  * sendwait_lock -> send_rbufs_lock
21409723SSiddheshwar.Mahesh@Sun.COM  */
21419723SSiddheshwar.Mahesh@Sun.COM 
21429723SSiddheshwar.Mahesh@Sun.COM void
rib_send_hold(rib_qp_t * qp)21439723SSiddheshwar.Mahesh@Sun.COM rib_send_hold(rib_qp_t *qp)
21449723SSiddheshwar.Mahesh@Sun.COM {
21459723SSiddheshwar.Mahesh@Sun.COM 	mutex_enter(&qp->send_rbufs_lock);
21469723SSiddheshwar.Mahesh@Sun.COM 	qp->n_send_rbufs++;
21479723SSiddheshwar.Mahesh@Sun.COM 	mutex_exit(&qp->send_rbufs_lock);
21489723SSiddheshwar.Mahesh@Sun.COM }
21499723SSiddheshwar.Mahesh@Sun.COM 
21509723SSiddheshwar.Mahesh@Sun.COM void
rib_send_rele(rib_qp_t * qp)21519723SSiddheshwar.Mahesh@Sun.COM rib_send_rele(rib_qp_t *qp)
21529723SSiddheshwar.Mahesh@Sun.COM {
21539723SSiddheshwar.Mahesh@Sun.COM 	mutex_enter(&qp->send_rbufs_lock);
21549723SSiddheshwar.Mahesh@Sun.COM 	qp->n_send_rbufs--;
21559723SSiddheshwar.Mahesh@Sun.COM 	if (qp->n_send_rbufs == 0)
21569723SSiddheshwar.Mahesh@Sun.COM 		cv_signal(&qp->send_rbufs_cv);
21579723SSiddheshwar.Mahesh@Sun.COM 	mutex_exit(&qp->send_rbufs_lock);
21589723SSiddheshwar.Mahesh@Sun.COM }
21599723SSiddheshwar.Mahesh@Sun.COM 
216011530SSiddheshwar.Mahesh@Sun.COM void
rib_recv_rele(rib_qp_t * qp)216111530SSiddheshwar.Mahesh@Sun.COM rib_recv_rele(rib_qp_t *qp)
216211530SSiddheshwar.Mahesh@Sun.COM {
216311530SSiddheshwar.Mahesh@Sun.COM 	mutex_enter(&qp->posted_rbufs_lock);
216411530SSiddheshwar.Mahesh@Sun.COM 	qp->n_posted_rbufs--;
216511530SSiddheshwar.Mahesh@Sun.COM 	if (qp->n_posted_rbufs == 0)
216611530SSiddheshwar.Mahesh@Sun.COM 		cv_signal(&qp->posted_rbufs_cv);
216711530SSiddheshwar.Mahesh@Sun.COM 	mutex_exit(&qp->posted_rbufs_lock);
216811530SSiddheshwar.Mahesh@Sun.COM }
216911530SSiddheshwar.Mahesh@Sun.COM 
21709723SSiddheshwar.Mahesh@Sun.COM /*
21710Sstevel@tonic-gate  * Wait for send completion notification. Only on receiving a
21720Sstevel@tonic-gate  * notification be it a successful or error completion, free the
21730Sstevel@tonic-gate  * send_wid.
21740Sstevel@tonic-gate  */
21750Sstevel@tonic-gate static rdma_stat
rib_sendwait(rib_qp_t * qp,struct send_wid * wd)21760Sstevel@tonic-gate rib_sendwait(rib_qp_t *qp, struct send_wid *wd)
21770Sstevel@tonic-gate {
21780Sstevel@tonic-gate 	clock_t timout, cv_wait_ret;
21790Sstevel@tonic-gate 	rdma_stat error = RDMA_SUCCESS;
21800Sstevel@tonic-gate 	int	i;
21810Sstevel@tonic-gate 
21820Sstevel@tonic-gate 	/*
21830Sstevel@tonic-gate 	 * Wait for send to complete
21840Sstevel@tonic-gate 	 */
21850Sstevel@tonic-gate 	ASSERT(wd != NULL);
21860Sstevel@tonic-gate 	mutex_enter(&wd->sendwait_lock);
21870Sstevel@tonic-gate 	if (wd->status == (uint_t)SEND_WAIT) {
21880Sstevel@tonic-gate 		timout = drv_usectohz(SEND_WAIT_TIME * 1000000) +
21890Sstevel@tonic-gate 		    ddi_get_lbolt();
21907387SRobert.Gordon@Sun.COM 
21910Sstevel@tonic-gate 		if (qp->mode == RIB_SERVER) {
21920Sstevel@tonic-gate 			while ((cv_wait_ret = cv_timedwait(&wd->wait_cv,
21937387SRobert.Gordon@Sun.COM 			    &wd->sendwait_lock, timout)) > 0 &&
21940Sstevel@tonic-gate 			    wd->status == (uint_t)SEND_WAIT)
21950Sstevel@tonic-gate 				;
21960Sstevel@tonic-gate 			switch (cv_wait_ret) {
21970Sstevel@tonic-gate 			case -1:	/* timeout */
21987387SRobert.Gordon@Sun.COM 				DTRACE_PROBE(rpcib__i__srvsendwait__timeout);
21997387SRobert.Gordon@Sun.COM 
22000Sstevel@tonic-gate 				wd->cv_sig = 0;		/* no signal needed */
22010Sstevel@tonic-gate 				error = RDMA_TIMEDOUT;
22020Sstevel@tonic-gate 				break;
22030Sstevel@tonic-gate 			default:	/* got send completion */
22040Sstevel@tonic-gate 				break;
22050Sstevel@tonic-gate 			}
22060Sstevel@tonic-gate 		} else {
22070Sstevel@tonic-gate 			while ((cv_wait_ret = cv_timedwait_sig(&wd->wait_cv,
22087387SRobert.Gordon@Sun.COM 			    &wd->sendwait_lock, timout)) > 0 &&
22090Sstevel@tonic-gate 			    wd->status == (uint_t)SEND_WAIT)
22100Sstevel@tonic-gate 				;
22110Sstevel@tonic-gate 			switch (cv_wait_ret) {
22120Sstevel@tonic-gate 			case -1:	/* timeout */
22137387SRobert.Gordon@Sun.COM 				DTRACE_PROBE(rpcib__i__clntsendwait__timeout);
22147387SRobert.Gordon@Sun.COM 
22150Sstevel@tonic-gate 				wd->cv_sig = 0;		/* no signal needed */
22160Sstevel@tonic-gate 				error = RDMA_TIMEDOUT;
22170Sstevel@tonic-gate 				break;
22180Sstevel@tonic-gate 			case 0:		/* interrupted */
22197387SRobert.Gordon@Sun.COM 				DTRACE_PROBE(rpcib__i__clntsendwait__intr);
22207387SRobert.Gordon@Sun.COM 
22210Sstevel@tonic-gate 				wd->cv_sig = 0;		/* no signal needed */
22220Sstevel@tonic-gate 				error = RDMA_INTR;
22230Sstevel@tonic-gate 				break;
22240Sstevel@tonic-gate 			default:	/* got send completion */
22250Sstevel@tonic-gate 				break;
22260Sstevel@tonic-gate 			}
22270Sstevel@tonic-gate 		}
22280Sstevel@tonic-gate 	}
22290Sstevel@tonic-gate 
22300Sstevel@tonic-gate 	if (wd->status != (uint_t)SEND_WAIT) {
22310Sstevel@tonic-gate 		/* got send completion */
22320Sstevel@tonic-gate 		if (wd->status != RDMA_SUCCESS) {
22339723SSiddheshwar.Mahesh@Sun.COM 			switch (wd->status) {
22349723SSiddheshwar.Mahesh@Sun.COM 			case RDMA_CONNLOST:
22359723SSiddheshwar.Mahesh@Sun.COM 				error = RDMA_CONNLOST;
22369723SSiddheshwar.Mahesh@Sun.COM 				break;
22379723SSiddheshwar.Mahesh@Sun.COM 			default:
22389348SSiddheshwar.Mahesh@Sun.COM 				error = RDMA_FAILED;
22399723SSiddheshwar.Mahesh@Sun.COM 				break;
22409348SSiddheshwar.Mahesh@Sun.COM 			}
22410Sstevel@tonic-gate 		}
22420Sstevel@tonic-gate 		for (i = 0; i < wd->nsbufs; i++) {
22430Sstevel@tonic-gate 			rib_rbuf_free(qptoc(qp), SEND_BUFFER,
22447387SRobert.Gordon@Sun.COM 			    (void *)(uintptr_t)wd->sbufaddr[i]);
22450Sstevel@tonic-gate 		}
22469723SSiddheshwar.Mahesh@Sun.COM 
22479723SSiddheshwar.Mahesh@Sun.COM 		rib_send_rele(qp);
22489723SSiddheshwar.Mahesh@Sun.COM 
22490Sstevel@tonic-gate 		mutex_exit(&wd->sendwait_lock);
22500Sstevel@tonic-gate 		(void) rib_free_sendwait(wd);
22519723SSiddheshwar.Mahesh@Sun.COM 
22520Sstevel@tonic-gate 	} else {
22530Sstevel@tonic-gate 		mutex_exit(&wd->sendwait_lock);
22540Sstevel@tonic-gate 	}
22550Sstevel@tonic-gate 	return (error);
22560Sstevel@tonic-gate }
22570Sstevel@tonic-gate 
22580Sstevel@tonic-gate static struct send_wid *
rib_init_sendwait(uint32_t xid,int cv_sig,rib_qp_t * qp)22590Sstevel@tonic-gate rib_init_sendwait(uint32_t xid, int cv_sig, rib_qp_t *qp)
22600Sstevel@tonic-gate {
22610Sstevel@tonic-gate 	struct send_wid	*wd;
22620Sstevel@tonic-gate 
22630Sstevel@tonic-gate 	wd = kmem_zalloc(sizeof (struct send_wid), KM_SLEEP);
22640Sstevel@tonic-gate 	wd->xid = xid;
22650Sstevel@tonic-gate 	wd->cv_sig = cv_sig;
22660Sstevel@tonic-gate 	wd->qp = qp;
22670Sstevel@tonic-gate 	cv_init(&wd->wait_cv, NULL, CV_DEFAULT, NULL);
22680Sstevel@tonic-gate 	mutex_init(&wd->sendwait_lock, NULL, MUTEX_DRIVER, NULL);
22690Sstevel@tonic-gate 	wd->status = (uint_t)SEND_WAIT;
22700Sstevel@tonic-gate 
22710Sstevel@tonic-gate 	return (wd);
22720Sstevel@tonic-gate }
22730Sstevel@tonic-gate 
22740Sstevel@tonic-gate static int
rib_free_sendwait(struct send_wid * wdesc)22750Sstevel@tonic-gate rib_free_sendwait(struct send_wid *wdesc)
22760Sstevel@tonic-gate {
22770Sstevel@tonic-gate 	cv_destroy(&wdesc->wait_cv);
22780Sstevel@tonic-gate 	mutex_destroy(&wdesc->sendwait_lock);
22790Sstevel@tonic-gate 	kmem_free(wdesc, sizeof (*wdesc));
22800Sstevel@tonic-gate 
22810Sstevel@tonic-gate 	return (0);
22820Sstevel@tonic-gate }
22830Sstevel@tonic-gate 
22840Sstevel@tonic-gate static rdma_stat
rib_rem_rep(rib_qp_t * qp,struct reply * rep)22850Sstevel@tonic-gate rib_rem_rep(rib_qp_t *qp, struct reply *rep)
22860Sstevel@tonic-gate {
22870Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
22880Sstevel@tonic-gate 	if (rep != NULL) {
22897387SRobert.Gordon@Sun.COM 		(void) rib_remreply(qp, rep);
22907387SRobert.Gordon@Sun.COM 		mutex_exit(&qp->replylist_lock);
22917387SRobert.Gordon@Sun.COM 		return (RDMA_SUCCESS);
22920Sstevel@tonic-gate 	}
22930Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
22940Sstevel@tonic-gate 	return (RDMA_FAILED);
22950Sstevel@tonic-gate }
22960Sstevel@tonic-gate 
22970Sstevel@tonic-gate /*
22980Sstevel@tonic-gate  * Send buffers are freed here only in case of error in posting
22990Sstevel@tonic-gate  * on QP. If the post succeeded, the send buffers are freed upon
23000Sstevel@tonic-gate  * send completion in rib_sendwait() or in the scq_handler.
23010Sstevel@tonic-gate  */
23020Sstevel@tonic-gate rdma_stat
rib_send_and_wait(CONN * conn,struct clist * cl,uint32_t msgid,int send_sig,int cv_sig,caddr_t * swid)23030Sstevel@tonic-gate rib_send_and_wait(CONN *conn, struct clist *cl, uint32_t msgid,
23047387SRobert.Gordon@Sun.COM 	int send_sig, int cv_sig, caddr_t *swid)
23050Sstevel@tonic-gate {
23060Sstevel@tonic-gate 	struct send_wid	*wdesc;
23070Sstevel@tonic-gate 	struct clist	*clp;
23080Sstevel@tonic-gate 	ibt_status_t	ibt_status = IBT_SUCCESS;
23090Sstevel@tonic-gate 	rdma_stat	ret = RDMA_SUCCESS;
23100Sstevel@tonic-gate 	ibt_send_wr_t	tx_wr;
23110Sstevel@tonic-gate 	int		i, nds;
23120Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
23130Sstevel@tonic-gate 	uint_t		total_msg_size;
23147387SRobert.Gordon@Sun.COM 	rib_qp_t	*qp;
23157387SRobert.Gordon@Sun.COM 
23167387SRobert.Gordon@Sun.COM 	qp = ctoqp(conn);
23170Sstevel@tonic-gate 
23180Sstevel@tonic-gate 	ASSERT(cl != NULL);
23190Sstevel@tonic-gate 
23200Sstevel@tonic-gate 	bzero(&tx_wr, sizeof (ibt_send_wr_t));
23210Sstevel@tonic-gate 
23220Sstevel@tonic-gate 	nds = 0;
23230Sstevel@tonic-gate 	total_msg_size = 0;
23240Sstevel@tonic-gate 	clp = cl;
23250Sstevel@tonic-gate 	while (clp != NULL) {
23260Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
23277387SRobert.Gordon@Sun.COM 			DTRACE_PROBE(rpcib__i__sendandwait_dsegmax_exceeded);
23280Sstevel@tonic-gate 			return (RDMA_FAILED);
23290Sstevel@tonic-gate 		}
23307387SRobert.Gordon@Sun.COM 		sgl[nds].ds_va = clp->w.c_saddr;
23310Sstevel@tonic-gate 		sgl[nds].ds_key = clp->c_smemhandle.mrc_lmr; /* lkey */
23320Sstevel@tonic-gate 		sgl[nds].ds_len = clp->c_len;
23330Sstevel@tonic-gate 		total_msg_size += clp->c_len;
23340Sstevel@tonic-gate 		clp = clp->c_next;
23350Sstevel@tonic-gate 		nds++;
23360Sstevel@tonic-gate 	}
23370Sstevel@tonic-gate 
23380Sstevel@tonic-gate 	if (send_sig) {
23390Sstevel@tonic-gate 		/* Set SEND_SIGNAL flag. */
23400Sstevel@tonic-gate 		tx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
23410Sstevel@tonic-gate 		wdesc = rib_init_sendwait(msgid, cv_sig, qp);
23427387SRobert.Gordon@Sun.COM 		*swid = (caddr_t)wdesc;
23439723SSiddheshwar.Mahesh@Sun.COM 		tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
23449723SSiddheshwar.Mahesh@Sun.COM 		mutex_enter(&wdesc->sendwait_lock);
23459723SSiddheshwar.Mahesh@Sun.COM 		wdesc->nsbufs = nds;
23469723SSiddheshwar.Mahesh@Sun.COM 		for (i = 0; i < nds; i++) {
23479723SSiddheshwar.Mahesh@Sun.COM 			wdesc->sbufaddr[i] = sgl[i].ds_va;
23489723SSiddheshwar.Mahesh@Sun.COM 		}
23490Sstevel@tonic-gate 	} else {
23500Sstevel@tonic-gate 		tx_wr.wr_flags = IBT_WR_NO_FLAGS;
23519723SSiddheshwar.Mahesh@Sun.COM 		*swid = NULL;
23529723SSiddheshwar.Mahesh@Sun.COM 		tx_wr.wr_id = (ibt_wrid_t)RDMA_DUMMY_WRID;
23530Sstevel@tonic-gate 	}
23549723SSiddheshwar.Mahesh@Sun.COM 
23550Sstevel@tonic-gate 	tx_wr.wr_opcode = IBT_WRC_SEND;
23560Sstevel@tonic-gate 	tx_wr.wr_trans = IBT_RC_SRV;
23570Sstevel@tonic-gate 	tx_wr.wr_nds = nds;
23580Sstevel@tonic-gate 	tx_wr.wr_sgl = sgl;
23590Sstevel@tonic-gate 
23600Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
23617387SRobert.Gordon@Sun.COM 	if (conn->c_state == C_CONNECTED) {
23620Sstevel@tonic-gate 		ibt_status = ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL);
23630Sstevel@tonic-gate 	}
23647387SRobert.Gordon@Sun.COM 	if (conn->c_state != C_CONNECTED ||
23657387SRobert.Gordon@Sun.COM 	    ibt_status != IBT_SUCCESS) {
23667387SRobert.Gordon@Sun.COM 		if (conn->c_state != C_DISCONN_PEND)
23677387SRobert.Gordon@Sun.COM 			conn->c_state = C_ERROR_CONN;
23680Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
23699723SSiddheshwar.Mahesh@Sun.COM 		if (send_sig) {
23709723SSiddheshwar.Mahesh@Sun.COM 			for (i = 0; i < nds; i++) {
23719723SSiddheshwar.Mahesh@Sun.COM 				rib_rbuf_free(conn, SEND_BUFFER,
23729723SSiddheshwar.Mahesh@Sun.COM 				    (void *)(uintptr_t)wdesc->sbufaddr[i]);
23739723SSiddheshwar.Mahesh@Sun.COM 			}
23749723SSiddheshwar.Mahesh@Sun.COM 			mutex_exit(&wdesc->sendwait_lock);
23759723SSiddheshwar.Mahesh@Sun.COM 			(void) rib_free_sendwait(wdesc);
23760Sstevel@tonic-gate 		}
23777387SRobert.Gordon@Sun.COM 		return (RDMA_CONNLOST);
23780Sstevel@tonic-gate 	}
23799723SSiddheshwar.Mahesh@Sun.COM 
23800Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
23810Sstevel@tonic-gate 
23820Sstevel@tonic-gate 	if (send_sig) {
23839723SSiddheshwar.Mahesh@Sun.COM 		rib_send_hold(qp);
23849723SSiddheshwar.Mahesh@Sun.COM 		mutex_exit(&wdesc->sendwait_lock);
23857387SRobert.Gordon@Sun.COM 		if (cv_sig) {
23867387SRobert.Gordon@Sun.COM 			/*
23877387SRobert.Gordon@Sun.COM 			 * cv_wait for send to complete.
23887387SRobert.Gordon@Sun.COM 			 * We can fail due to a timeout or signal or
23897387SRobert.Gordon@Sun.COM 			 * unsuccessful send.
23907387SRobert.Gordon@Sun.COM 			 */
23917387SRobert.Gordon@Sun.COM 			ret = rib_sendwait(qp, wdesc);
23927387SRobert.Gordon@Sun.COM 
23937387SRobert.Gordon@Sun.COM 			return (ret);
23940Sstevel@tonic-gate 		}
23950Sstevel@tonic-gate 	}
23960Sstevel@tonic-gate 
23970Sstevel@tonic-gate 	return (RDMA_SUCCESS);
23980Sstevel@tonic-gate }
23990Sstevel@tonic-gate 
24007387SRobert.Gordon@Sun.COM 
24010Sstevel@tonic-gate rdma_stat
rib_send(CONN * conn,struct clist * cl,uint32_t msgid)24020Sstevel@tonic-gate rib_send(CONN *conn, struct clist *cl, uint32_t msgid)
24030Sstevel@tonic-gate {
24040Sstevel@tonic-gate 	rdma_stat	ret;
24057387SRobert.Gordon@Sun.COM 	caddr_t		wd;
24060Sstevel@tonic-gate 
24070Sstevel@tonic-gate 	/* send-wait & cv_signal */
24087387SRobert.Gordon@Sun.COM 	ret = rib_send_and_wait(conn, cl, msgid, 1, 1, &wd);
24090Sstevel@tonic-gate 	return (ret);
24100Sstevel@tonic-gate }
24110Sstevel@tonic-gate 
24120Sstevel@tonic-gate /*
24139723SSiddheshwar.Mahesh@Sun.COM  * Deprecated/obsolete interface not used currently
24149723SSiddheshwar.Mahesh@Sun.COM  * but earlier used for READ-READ protocol.
24150Sstevel@tonic-gate  * Send RPC reply and wait for RDMA_DONE.
24160Sstevel@tonic-gate  */
24170Sstevel@tonic-gate rdma_stat
rib_send_resp(CONN * conn,struct clist * cl,uint32_t msgid)24180Sstevel@tonic-gate rib_send_resp(CONN *conn, struct clist *cl, uint32_t msgid)
24190Sstevel@tonic-gate {
24200Sstevel@tonic-gate 	rdma_stat ret = RDMA_SUCCESS;
24210Sstevel@tonic-gate 	struct rdma_done_list *rd;
242211066Srafael.vanoni@sun.com 	clock_t cv_wait_ret;
24237387SRobert.Gordon@Sun.COM 	caddr_t *wid = NULL;
24240Sstevel@tonic-gate 	rib_qp_t *qp = ctoqp(conn);
24250Sstevel@tonic-gate 
24260Sstevel@tonic-gate 	mutex_enter(&qp->rdlist_lock);
24270Sstevel@tonic-gate 	rd = rdma_done_add(qp, msgid);
24280Sstevel@tonic-gate 
24290Sstevel@tonic-gate 	/* No cv_signal (whether send-wait or no-send-wait) */
24307387SRobert.Gordon@Sun.COM 	ret = rib_send_and_wait(conn, cl, msgid, 1, 0, wid);
24317387SRobert.Gordon@Sun.COM 
24320Sstevel@tonic-gate 	if (ret != RDMA_SUCCESS) {
24337387SRobert.Gordon@Sun.COM 		rdma_done_rm(qp, rd);
24347387SRobert.Gordon@Sun.COM 	} else {
24357387SRobert.Gordon@Sun.COM 		/*
24367387SRobert.Gordon@Sun.COM 		 * Wait for RDMA_DONE from remote end
24377387SRobert.Gordon@Sun.COM 		 */
243811066Srafael.vanoni@sun.com 		cv_wait_ret = cv_reltimedwait(&rd->rdma_done_cv,
243911066Srafael.vanoni@sun.com 		    &qp->rdlist_lock, drv_usectohz(REPLY_WAIT_TIME * 1000000),
244011066Srafael.vanoni@sun.com 		    TR_CLOCK_TICK);
24417387SRobert.Gordon@Sun.COM 
24427387SRobert.Gordon@Sun.COM 		rdma_done_rm(qp, rd);
24437387SRobert.Gordon@Sun.COM 
24447387SRobert.Gordon@Sun.COM 		if (cv_wait_ret < 0) {
24457387SRobert.Gordon@Sun.COM 			ret = RDMA_TIMEDOUT;
24467387SRobert.Gordon@Sun.COM 		}
24470Sstevel@tonic-gate 	}
24480Sstevel@tonic-gate 
24490Sstevel@tonic-gate 	mutex_exit(&qp->rdlist_lock);
24500Sstevel@tonic-gate 	return (ret);
24510Sstevel@tonic-gate }
24520Sstevel@tonic-gate 
24530Sstevel@tonic-gate static struct recv_wid *
rib_create_wid(rib_qp_t * qp,ibt_wr_ds_t * sgl,uint32_t msgid)24540Sstevel@tonic-gate rib_create_wid(rib_qp_t *qp, ibt_wr_ds_t *sgl, uint32_t msgid)
24550Sstevel@tonic-gate {
24560Sstevel@tonic-gate 	struct recv_wid	*rwid;
24570Sstevel@tonic-gate 
24580Sstevel@tonic-gate 	rwid = kmem_zalloc(sizeof (struct recv_wid), KM_SLEEP);
24590Sstevel@tonic-gate 	rwid->xid = msgid;
24600Sstevel@tonic-gate 	rwid->addr = sgl->ds_va;
24610Sstevel@tonic-gate 	rwid->qp = qp;
24620Sstevel@tonic-gate 
24630Sstevel@tonic-gate 	return (rwid);
24640Sstevel@tonic-gate }
24650Sstevel@tonic-gate 
24660Sstevel@tonic-gate static void
rib_free_wid(struct recv_wid * rwid)24670Sstevel@tonic-gate rib_free_wid(struct recv_wid *rwid)
24680Sstevel@tonic-gate {
24690Sstevel@tonic-gate 	kmem_free(rwid, sizeof (struct recv_wid));
24700Sstevel@tonic-gate }
24710Sstevel@tonic-gate 
24720Sstevel@tonic-gate rdma_stat
rib_clnt_post(CONN * conn,struct clist * cl,uint32_t msgid)24730Sstevel@tonic-gate rib_clnt_post(CONN* conn, struct clist *cl, uint32_t msgid)
24740Sstevel@tonic-gate {
24750Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
24760Sstevel@tonic-gate 	struct clist	*clp = cl;
24770Sstevel@tonic-gate 	struct reply	*rep;
24780Sstevel@tonic-gate 	struct recv_wid	*rwid;
24790Sstevel@tonic-gate 	int		nds;
24800Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
24810Sstevel@tonic-gate 	ibt_recv_wr_t	recv_wr;
24820Sstevel@tonic-gate 	rdma_stat	ret;
24830Sstevel@tonic-gate 	ibt_status_t	ibt_status;
24840Sstevel@tonic-gate 
24850Sstevel@tonic-gate 	/*
24860Sstevel@tonic-gate 	 * rdma_clnt_postrecv uses RECV_BUFFER.
24870Sstevel@tonic-gate 	 */
24880Sstevel@tonic-gate 
24890Sstevel@tonic-gate 	nds = 0;
24900Sstevel@tonic-gate 	while (cl != NULL) {
24910Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
24927387SRobert.Gordon@Sun.COM 			ret = RDMA_FAILED;
24937387SRobert.Gordon@Sun.COM 			goto done;
24940Sstevel@tonic-gate 		}
24957387SRobert.Gordon@Sun.COM 		sgl[nds].ds_va = cl->w.c_saddr;
24960Sstevel@tonic-gate 		sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
24970Sstevel@tonic-gate 		sgl[nds].ds_len = cl->c_len;
24980Sstevel@tonic-gate 		cl = cl->c_next;
24990Sstevel@tonic-gate 		nds++;
25000Sstevel@tonic-gate 	}
25010Sstevel@tonic-gate 
25020Sstevel@tonic-gate 	if (nds != 1) {
25037387SRobert.Gordon@Sun.COM 		ret = RDMA_FAILED;
25047387SRobert.Gordon@Sun.COM 		goto done;
25050Sstevel@tonic-gate 	}
25067387SRobert.Gordon@Sun.COM 
25070Sstevel@tonic-gate 	bzero(&recv_wr, sizeof (ibt_recv_wr_t));
25080Sstevel@tonic-gate 	recv_wr.wr_nds = nds;
25090Sstevel@tonic-gate 	recv_wr.wr_sgl = sgl;
25100Sstevel@tonic-gate 
25110Sstevel@tonic-gate 	rwid = rib_create_wid(qp, &sgl[0], msgid);
25120Sstevel@tonic-gate 	if (rwid) {
25137387SRobert.Gordon@Sun.COM 		recv_wr.wr_id = (ibt_wrid_t)(uintptr_t)rwid;
25140Sstevel@tonic-gate 	} else {
25150Sstevel@tonic-gate 		ret = RDMA_NORESOURCE;
25160Sstevel@tonic-gate 		goto done;
25170Sstevel@tonic-gate 	}
25180Sstevel@tonic-gate 	rep = rib_addreplylist(qp, msgid);
25190Sstevel@tonic-gate 	if (!rep) {
25200Sstevel@tonic-gate 		rib_free_wid(rwid);
25210Sstevel@tonic-gate 		ret = RDMA_NORESOURCE;
25220Sstevel@tonic-gate 		goto done;
25230Sstevel@tonic-gate 	}
25240Sstevel@tonic-gate 
25250Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
25267387SRobert.Gordon@Sun.COM 
25277387SRobert.Gordon@Sun.COM 	if (conn->c_state == C_CONNECTED) {
25280Sstevel@tonic-gate 		ibt_status = ibt_post_recv(qp->qp_hdl, &recv_wr, 1, NULL);
25290Sstevel@tonic-gate 	}
25307387SRobert.Gordon@Sun.COM 
25317387SRobert.Gordon@Sun.COM 	if (conn->c_state != C_CONNECTED ||
25327387SRobert.Gordon@Sun.COM 	    ibt_status != IBT_SUCCESS) {
25337387SRobert.Gordon@Sun.COM 		if (conn->c_state != C_DISCONN_PEND)
25347387SRobert.Gordon@Sun.COM 			conn->c_state = C_ERROR_CONN;
25350Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
25360Sstevel@tonic-gate 		rib_free_wid(rwid);
25370Sstevel@tonic-gate 		(void) rib_rem_rep(qp, rep);
25387387SRobert.Gordon@Sun.COM 		ret = RDMA_CONNLOST;
25390Sstevel@tonic-gate 		goto done;
25400Sstevel@tonic-gate 	}
254111530SSiddheshwar.Mahesh@Sun.COM 
254211530SSiddheshwar.Mahesh@Sun.COM 	mutex_enter(&qp->posted_rbufs_lock);
254311530SSiddheshwar.Mahesh@Sun.COM 	qp->n_posted_rbufs++;
254411530SSiddheshwar.Mahesh@Sun.COM 	mutex_exit(&qp->posted_rbufs_lock);
254511530SSiddheshwar.Mahesh@Sun.COM 
25460Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
25470Sstevel@tonic-gate 	return (RDMA_SUCCESS);
25480Sstevel@tonic-gate 
25490Sstevel@tonic-gate done:
25500Sstevel@tonic-gate 	while (clp != NULL) {
25517387SRobert.Gordon@Sun.COM 		rib_rbuf_free(conn, RECV_BUFFER,
25527387SRobert.Gordon@Sun.COM 		    (void *)(uintptr_t)clp->w.c_saddr3);
25537387SRobert.Gordon@Sun.COM 		clp = clp->c_next;
25540Sstevel@tonic-gate 	}
25550Sstevel@tonic-gate 	return (ret);
25560Sstevel@tonic-gate }
25570Sstevel@tonic-gate 
25580Sstevel@tonic-gate rdma_stat
rib_svc_post(CONN * conn,struct clist * cl)25590Sstevel@tonic-gate rib_svc_post(CONN* conn, struct clist *cl)
25600Sstevel@tonic-gate {
25610Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
25620Sstevel@tonic-gate 	struct svc_recv	*s_recvp;
25630Sstevel@tonic-gate 	int		nds;
25640Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
25650Sstevel@tonic-gate 	ibt_recv_wr_t	recv_wr;
25660Sstevel@tonic-gate 	ibt_status_t	ibt_status;
25670Sstevel@tonic-gate 
25680Sstevel@tonic-gate 	nds = 0;
25690Sstevel@tonic-gate 	while (cl != NULL) {
25700Sstevel@tonic-gate 		if (nds >= DSEG_MAX) {
25717387SRobert.Gordon@Sun.COM 			return (RDMA_FAILED);
25720Sstevel@tonic-gate 		}
25737387SRobert.Gordon@Sun.COM 		sgl[nds].ds_va = cl->w.c_saddr;
25740Sstevel@tonic-gate 		sgl[nds].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
25750Sstevel@tonic-gate 		sgl[nds].ds_len = cl->c_len;
25760Sstevel@tonic-gate 		cl = cl->c_next;
25770Sstevel@tonic-gate 		nds++;
25780Sstevel@tonic-gate 	}
25790Sstevel@tonic-gate 
25800Sstevel@tonic-gate 	if (nds != 1) {
25817387SRobert.Gordon@Sun.COM 		rib_rbuf_free(conn, RECV_BUFFER,
25827387SRobert.Gordon@Sun.COM 		    (caddr_t)(uintptr_t)sgl[0].ds_va);
25837387SRobert.Gordon@Sun.COM 
25847387SRobert.Gordon@Sun.COM 		return (RDMA_FAILED);
25850Sstevel@tonic-gate 	}
25867387SRobert.Gordon@Sun.COM 
25870Sstevel@tonic-gate 	bzero(&recv_wr, sizeof (ibt_recv_wr_t));
25880Sstevel@tonic-gate 	recv_wr.wr_nds = nds;
25890Sstevel@tonic-gate 	recv_wr.wr_sgl = sgl;
25900Sstevel@tonic-gate 
25910Sstevel@tonic-gate 	s_recvp = rib_init_svc_recv(qp, &sgl[0]);
2592249Sjwahlig 	/* Use s_recvp's addr as wr id */
2593249Sjwahlig 	recv_wr.wr_id = (ibt_wrid_t)(uintptr_t)s_recvp;
25940Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
25957387SRobert.Gordon@Sun.COM 	if (conn->c_state == C_CONNECTED) {
25960Sstevel@tonic-gate 		ibt_status = ibt_post_recv(qp->qp_hdl, &recv_wr, 1, NULL);
25970Sstevel@tonic-gate 	}
25987387SRobert.Gordon@Sun.COM 	if (conn->c_state != C_CONNECTED ||
25997387SRobert.Gordon@Sun.COM 	    ibt_status != IBT_SUCCESS) {
26007387SRobert.Gordon@Sun.COM 		if (conn->c_state != C_DISCONN_PEND)
26017387SRobert.Gordon@Sun.COM 			conn->c_state = C_ERROR_CONN;
26020Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
2603249Sjwahlig 		rib_rbuf_free(conn, RECV_BUFFER,
26047387SRobert.Gordon@Sun.COM 		    (caddr_t)(uintptr_t)sgl[0].ds_va);
26050Sstevel@tonic-gate 		(void) rib_free_svc_recv(s_recvp);
26067387SRobert.Gordon@Sun.COM 
26077387SRobert.Gordon@Sun.COM 		return (RDMA_CONNLOST);
26080Sstevel@tonic-gate 	}
26090Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
26100Sstevel@tonic-gate 
26110Sstevel@tonic-gate 	return (RDMA_SUCCESS);
26120Sstevel@tonic-gate }
26130Sstevel@tonic-gate 
26140Sstevel@tonic-gate /* Client */
26150Sstevel@tonic-gate rdma_stat
rib_post_resp(CONN * conn,struct clist * cl,uint32_t msgid)26160Sstevel@tonic-gate rib_post_resp(CONN* conn, struct clist *cl, uint32_t msgid)
26170Sstevel@tonic-gate {
26180Sstevel@tonic-gate 	return (rib_clnt_post(conn, cl, msgid));
26190Sstevel@tonic-gate }
26200Sstevel@tonic-gate 
26217387SRobert.Gordon@Sun.COM /* Client */
26227387SRobert.Gordon@Sun.COM rdma_stat
rib_post_resp_remove(CONN * conn,uint32_t msgid)26237387SRobert.Gordon@Sun.COM rib_post_resp_remove(CONN* conn, uint32_t msgid)
26247387SRobert.Gordon@Sun.COM {
26257387SRobert.Gordon@Sun.COM 	rib_qp_t	*qp = ctoqp(conn);
26267387SRobert.Gordon@Sun.COM 	struct reply	*rep;
26277387SRobert.Gordon@Sun.COM 
26287387SRobert.Gordon@Sun.COM 	mutex_enter(&qp->replylist_lock);
26297387SRobert.Gordon@Sun.COM 	for (rep = qp->replylist; rep != NULL; rep = rep->next) {
26307387SRobert.Gordon@Sun.COM 		if (rep->xid == msgid) {
26317387SRobert.Gordon@Sun.COM 			if (rep->vaddr_cq) {
26327387SRobert.Gordon@Sun.COM 				rib_rbuf_free(conn, RECV_BUFFER,
26337387SRobert.Gordon@Sun.COM 				    (caddr_t)(uintptr_t)rep->vaddr_cq);
26347387SRobert.Gordon@Sun.COM 			}
26357387SRobert.Gordon@Sun.COM 			(void) rib_remreply(qp, rep);
26367387SRobert.Gordon@Sun.COM 			break;
26377387SRobert.Gordon@Sun.COM 		}
26387387SRobert.Gordon@Sun.COM 	}
26397387SRobert.Gordon@Sun.COM 	mutex_exit(&qp->replylist_lock);
26407387SRobert.Gordon@Sun.COM 
26417387SRobert.Gordon@Sun.COM 	return (RDMA_SUCCESS);
26427387SRobert.Gordon@Sun.COM }
26437387SRobert.Gordon@Sun.COM 
26440Sstevel@tonic-gate /* Server */
26450Sstevel@tonic-gate rdma_stat
rib_post_recv(CONN * conn,struct clist * cl)26460Sstevel@tonic-gate rib_post_recv(CONN *conn, struct clist *cl)
26470Sstevel@tonic-gate {
26480Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
26490Sstevel@tonic-gate 
26500Sstevel@tonic-gate 	if (rib_svc_post(conn, cl) == RDMA_SUCCESS) {
26510Sstevel@tonic-gate 		mutex_enter(&qp->posted_rbufs_lock);
26520Sstevel@tonic-gate 		qp->n_posted_rbufs++;
26530Sstevel@tonic-gate 		mutex_exit(&qp->posted_rbufs_lock);
26540Sstevel@tonic-gate 		return (RDMA_SUCCESS);
26550Sstevel@tonic-gate 	}
26560Sstevel@tonic-gate 	return (RDMA_FAILED);
26570Sstevel@tonic-gate }
26580Sstevel@tonic-gate 
26590Sstevel@tonic-gate /*
26600Sstevel@tonic-gate  * Client side only interface to "recv" the rpc reply buf
26610Sstevel@tonic-gate  * posted earlier by rib_post_resp(conn, cl, msgid).
26620Sstevel@tonic-gate  */
26630Sstevel@tonic-gate rdma_stat
rib_recv(CONN * conn,struct clist ** clp,uint32_t msgid)26640Sstevel@tonic-gate rib_recv(CONN *conn, struct clist **clp, uint32_t msgid)
26650Sstevel@tonic-gate {
26660Sstevel@tonic-gate 	struct reply *rep = NULL;
26670Sstevel@tonic-gate 	clock_t timout, cv_wait_ret;
26680Sstevel@tonic-gate 	rdma_stat ret = RDMA_SUCCESS;
26690Sstevel@tonic-gate 	rib_qp_t *qp = ctoqp(conn);
26700Sstevel@tonic-gate 
26710Sstevel@tonic-gate 	/*
26720Sstevel@tonic-gate 	 * Find the reply structure for this msgid
26730Sstevel@tonic-gate 	 */
26740Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
26750Sstevel@tonic-gate 
26760Sstevel@tonic-gate 	for (rep = qp->replylist; rep != NULL; rep = rep->next) {
26777387SRobert.Gordon@Sun.COM 		if (rep->xid == msgid)
26787387SRobert.Gordon@Sun.COM 			break;
26790Sstevel@tonic-gate 	}
26807387SRobert.Gordon@Sun.COM 
26810Sstevel@tonic-gate 	if (rep != NULL) {
26820Sstevel@tonic-gate 		/*
26830Sstevel@tonic-gate 		 * If message not yet received, wait.
26840Sstevel@tonic-gate 		 */
26850Sstevel@tonic-gate 		if (rep->status == (uint_t)REPLY_WAIT) {
26860Sstevel@tonic-gate 			timout = ddi_get_lbolt() +
26870Sstevel@tonic-gate 			    drv_usectohz(REPLY_WAIT_TIME * 1000000);
26887387SRobert.Gordon@Sun.COM 
26890Sstevel@tonic-gate 			while ((cv_wait_ret = cv_timedwait_sig(&rep->wait_cv,
26907387SRobert.Gordon@Sun.COM 			    &qp->replylist_lock, timout)) > 0 &&
26917387SRobert.Gordon@Sun.COM 			    rep->status == (uint_t)REPLY_WAIT)
26927387SRobert.Gordon@Sun.COM 				;
26930Sstevel@tonic-gate 
26940Sstevel@tonic-gate 			switch (cv_wait_ret) {
26950Sstevel@tonic-gate 			case -1:	/* timeout */
26960Sstevel@tonic-gate 				ret = RDMA_TIMEDOUT;
26970Sstevel@tonic-gate 				break;
26980Sstevel@tonic-gate 			case 0:
26990Sstevel@tonic-gate 				ret = RDMA_INTR;
27000Sstevel@tonic-gate 				break;
27010Sstevel@tonic-gate 			default:
27020Sstevel@tonic-gate 				break;
27030Sstevel@tonic-gate 			}
27040Sstevel@tonic-gate 		}
27050Sstevel@tonic-gate 
27060Sstevel@tonic-gate 		if (rep->status == RDMA_SUCCESS) {
27070Sstevel@tonic-gate 			struct clist *cl = NULL;
27080Sstevel@tonic-gate 
27090Sstevel@tonic-gate 			/*
27100Sstevel@tonic-gate 			 * Got message successfully
27110Sstevel@tonic-gate 			 */
27120Sstevel@tonic-gate 			clist_add(&cl, 0, rep->bytes_xfer, NULL,
2713249Sjwahlig 			    (caddr_t)(uintptr_t)rep->vaddr_cq, NULL, NULL);
27140Sstevel@tonic-gate 			*clp = cl;
27150Sstevel@tonic-gate 		} else {
27160Sstevel@tonic-gate 			if (rep->status != (uint_t)REPLY_WAIT) {
27170Sstevel@tonic-gate 				/*
27180Sstevel@tonic-gate 				 * Got error in reply message. Free
27190Sstevel@tonic-gate 				 * recv buffer here.
27200Sstevel@tonic-gate 				 */
27210Sstevel@tonic-gate 				ret = rep->status;
27220Sstevel@tonic-gate 				rib_rbuf_free(conn, RECV_BUFFER,
27237387SRobert.Gordon@Sun.COM 				    (caddr_t)(uintptr_t)rep->vaddr_cq);
27240Sstevel@tonic-gate 			}
27250Sstevel@tonic-gate 		}
27260Sstevel@tonic-gate 		(void) rib_remreply(qp, rep);
27270Sstevel@tonic-gate 	} else {
27280Sstevel@tonic-gate 		/*
27290Sstevel@tonic-gate 		 * No matching reply structure found for given msgid on the
27300Sstevel@tonic-gate 		 * reply wait list.
27310Sstevel@tonic-gate 		 */
27320Sstevel@tonic-gate 		ret = RDMA_INVAL;
27337387SRobert.Gordon@Sun.COM 		DTRACE_PROBE(rpcib__i__nomatchxid2);
27340Sstevel@tonic-gate 	}
27350Sstevel@tonic-gate 
27360Sstevel@tonic-gate 	/*
27370Sstevel@tonic-gate 	 * Done.
27380Sstevel@tonic-gate 	 */
27390Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
27400Sstevel@tonic-gate 	return (ret);
27410Sstevel@tonic-gate }
27420Sstevel@tonic-gate 
27430Sstevel@tonic-gate /*
27440Sstevel@tonic-gate  * RDMA write a buffer to the remote address.
27450Sstevel@tonic-gate  */
27460Sstevel@tonic-gate rdma_stat
rib_write(CONN * conn,struct clist * cl,int wait)27470Sstevel@tonic-gate rib_write(CONN *conn, struct clist *cl, int wait)
27480Sstevel@tonic-gate {
27490Sstevel@tonic-gate 	ibt_send_wr_t	tx_wr;
27500Sstevel@tonic-gate 	int		cv_sig;
27510Sstevel@tonic-gate 	ibt_wr_ds_t	sgl[DSEG_MAX];
27520Sstevel@tonic-gate 	struct send_wid	*wdesc;
27530Sstevel@tonic-gate 	ibt_status_t	ibt_status;
27540Sstevel@tonic-gate 	rdma_stat	ret = RDMA_SUCCESS;
27550Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
27567387SRobert.Gordon@Sun.COM 	uint64_t	n_writes = 0;
27570Sstevel@tonic-gate 
27580Sstevel@tonic-gate 	if (cl == NULL) {
27590Sstevel@tonic-gate 		return (RDMA_FAILED);
27600Sstevel@tonic-gate 	}
27610Sstevel@tonic-gate 
27627387SRobert.Gordon@Sun.COM 	while ((cl != NULL)) {
27637387SRobert.Gordon@Sun.COM 		if (cl->c_len > 0) {
27647387SRobert.Gordon@Sun.COM 			bzero(&tx_wr, sizeof (ibt_send_wr_t));
27657387SRobert.Gordon@Sun.COM 			tx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->u.c_daddr;
27667387SRobert.Gordon@Sun.COM 			tx_wr.wr.rc.rcwr.rdma.rdma_rkey =
27677387SRobert.Gordon@Sun.COM 			    cl->c_dmemhandle.mrc_rmr; /* rkey */
27687387SRobert.Gordon@Sun.COM 			sgl[0].ds_va = cl->w.c_saddr;
27697387SRobert.Gordon@Sun.COM 			sgl[0].ds_key = cl->c_smemhandle.mrc_lmr; /* lkey */
27707387SRobert.Gordon@Sun.COM 			sgl[0].ds_len = cl->c_len;
27717387SRobert.Gordon@Sun.COM 
27727387SRobert.Gordon@Sun.COM 			if (wait) {
27737387SRobert.Gordon@Sun.COM 				cv_sig = 1;
27747387SRobert.Gordon@Sun.COM 			} else {
27757387SRobert.Gordon@Sun.COM 				if (n_writes > max_unsignaled_rws) {
27767387SRobert.Gordon@Sun.COM 					n_writes = 0;
27777387SRobert.Gordon@Sun.COM 					cv_sig = 1;
27787387SRobert.Gordon@Sun.COM 				} else {
27797387SRobert.Gordon@Sun.COM 					cv_sig = 0;
27807387SRobert.Gordon@Sun.COM 				}
27817387SRobert.Gordon@Sun.COM 			}
27827387SRobert.Gordon@Sun.COM 
27839723SSiddheshwar.Mahesh@Sun.COM 			if (cv_sig) {
27849723SSiddheshwar.Mahesh@Sun.COM 				tx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
27859723SSiddheshwar.Mahesh@Sun.COM 				wdesc = rib_init_sendwait(0, cv_sig, qp);
27869723SSiddheshwar.Mahesh@Sun.COM 				tx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
27879723SSiddheshwar.Mahesh@Sun.COM 				mutex_enter(&wdesc->sendwait_lock);
27889723SSiddheshwar.Mahesh@Sun.COM 			} else {
27899723SSiddheshwar.Mahesh@Sun.COM 				tx_wr.wr_flags = IBT_WR_NO_FLAGS;
27909723SSiddheshwar.Mahesh@Sun.COM 				tx_wr.wr_id = (ibt_wrid_t)RDMA_DUMMY_WRID;
27919723SSiddheshwar.Mahesh@Sun.COM 			}
27927387SRobert.Gordon@Sun.COM 			tx_wr.wr_opcode = IBT_WRC_RDMAW;
27937387SRobert.Gordon@Sun.COM 			tx_wr.wr_trans = IBT_RC_SRV;
27947387SRobert.Gordon@Sun.COM 			tx_wr.wr_nds = 1;
27957387SRobert.Gordon@Sun.COM 			tx_wr.wr_sgl = sgl;
27967387SRobert.Gordon@Sun.COM 
27977387SRobert.Gordon@Sun.COM 			mutex_enter(&conn->c_lock);
27987387SRobert.Gordon@Sun.COM 			if (conn->c_state == C_CONNECTED) {
27997387SRobert.Gordon@Sun.COM 				ibt_status =
28007387SRobert.Gordon@Sun.COM 				    ibt_post_send(qp->qp_hdl, &tx_wr, 1, NULL);
28017387SRobert.Gordon@Sun.COM 			}
28027387SRobert.Gordon@Sun.COM 			if (conn->c_state != C_CONNECTED ||
28037387SRobert.Gordon@Sun.COM 			    ibt_status != IBT_SUCCESS) {
28047387SRobert.Gordon@Sun.COM 				if (conn->c_state != C_DISCONN_PEND)
28057387SRobert.Gordon@Sun.COM 					conn->c_state = C_ERROR_CONN;
28067387SRobert.Gordon@Sun.COM 				mutex_exit(&conn->c_lock);
28079723SSiddheshwar.Mahesh@Sun.COM 				if (cv_sig) {
28089723SSiddheshwar.Mahesh@Sun.COM 					mutex_exit(&wdesc->sendwait_lock);
28099723SSiddheshwar.Mahesh@Sun.COM 					(void) rib_free_sendwait(wdesc);
28109723SSiddheshwar.Mahesh@Sun.COM 				}
28117387SRobert.Gordon@Sun.COM 				return (RDMA_CONNLOST);
28127387SRobert.Gordon@Sun.COM 			}
28139723SSiddheshwar.Mahesh@Sun.COM 
28147387SRobert.Gordon@Sun.COM 			mutex_exit(&conn->c_lock);
28157387SRobert.Gordon@Sun.COM 
28167387SRobert.Gordon@Sun.COM 			/*
28177387SRobert.Gordon@Sun.COM 			 * Wait for send to complete
28187387SRobert.Gordon@Sun.COM 			 */
28199723SSiddheshwar.Mahesh@Sun.COM 			if (cv_sig) {
28209723SSiddheshwar.Mahesh@Sun.COM 
28219723SSiddheshwar.Mahesh@Sun.COM 				rib_send_hold(qp);
28229723SSiddheshwar.Mahesh@Sun.COM 				mutex_exit(&wdesc->sendwait_lock);
28239723SSiddheshwar.Mahesh@Sun.COM 
28247387SRobert.Gordon@Sun.COM 				ret = rib_sendwait(qp, wdesc);
28259723SSiddheshwar.Mahesh@Sun.COM 				if (ret != 0)
28267387SRobert.Gordon@Sun.COM 					return (ret);
28277387SRobert.Gordon@Sun.COM 			}
28287387SRobert.Gordon@Sun.COM 			n_writes ++;
28290Sstevel@tonic-gate 		}
28300Sstevel@tonic-gate 		cl = cl->c_next;
28310Sstevel@tonic-gate 	}
28320Sstevel@tonic-gate 	return (RDMA_SUCCESS);
28330Sstevel@tonic-gate }
28340Sstevel@tonic-gate 
28350Sstevel@tonic-gate /*
28360Sstevel@tonic-gate  * RDMA Read a buffer from the remote address.
28370Sstevel@tonic-gate  */
28380Sstevel@tonic-gate rdma_stat
rib_read(CONN * conn,struct clist * cl,int wait)28390Sstevel@tonic-gate rib_read(CONN *conn, struct clist *cl, int wait)
28400Sstevel@tonic-gate {
28410Sstevel@tonic-gate 	ibt_send_wr_t	rx_wr;
28429723SSiddheshwar.Mahesh@Sun.COM 	int		cv_sig = 0;
28437387SRobert.Gordon@Sun.COM 	ibt_wr_ds_t	sgl;
28440Sstevel@tonic-gate 	struct send_wid	*wdesc;
28450Sstevel@tonic-gate 	ibt_status_t	ibt_status = IBT_SUCCESS;
28460Sstevel@tonic-gate 	rdma_stat	ret = RDMA_SUCCESS;
28470Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
28480Sstevel@tonic-gate 
28490Sstevel@tonic-gate 	if (cl == NULL) {
28500Sstevel@tonic-gate 		return (RDMA_FAILED);
28510Sstevel@tonic-gate 	}
28520Sstevel@tonic-gate 
28530Sstevel@tonic-gate 	while (cl != NULL) {
28547387SRobert.Gordon@Sun.COM 		bzero(&rx_wr, sizeof (ibt_send_wr_t));
28557387SRobert.Gordon@Sun.COM 		/*
28567387SRobert.Gordon@Sun.COM 		 * Remote address is at the head chunk item in list.
28577387SRobert.Gordon@Sun.COM 		 */
28587387SRobert.Gordon@Sun.COM 		rx_wr.wr.rc.rcwr.rdma.rdma_raddr = cl->w.c_saddr;
28597387SRobert.Gordon@Sun.COM 		rx_wr.wr.rc.rcwr.rdma.rdma_rkey = cl->c_smemhandle.mrc_rmr;
28607387SRobert.Gordon@Sun.COM 
28617387SRobert.Gordon@Sun.COM 		sgl.ds_va = cl->u.c_daddr;
28627387SRobert.Gordon@Sun.COM 		sgl.ds_key = cl->c_dmemhandle.mrc_lmr; /* lkey */
28637387SRobert.Gordon@Sun.COM 		sgl.ds_len = cl->c_len;
28647387SRobert.Gordon@Sun.COM 
28659723SSiddheshwar.Mahesh@Sun.COM 		/*
28669723SSiddheshwar.Mahesh@Sun.COM 		 * If there are multiple chunks to be read, and
28679723SSiddheshwar.Mahesh@Sun.COM 		 * wait is set, ask for signal only for the last chunk
28689723SSiddheshwar.Mahesh@Sun.COM 		 * and wait only on the last chunk. The completion of
28699723SSiddheshwar.Mahesh@Sun.COM 		 * RDMA_READ on last chunk ensures that reads on all
28709723SSiddheshwar.Mahesh@Sun.COM 		 * previous chunks are also completed.
28719723SSiddheshwar.Mahesh@Sun.COM 		 */
28729723SSiddheshwar.Mahesh@Sun.COM 		if (wait && (cl->c_next == NULL)) {
28739723SSiddheshwar.Mahesh@Sun.COM 			cv_sig = 1;
28749723SSiddheshwar.Mahesh@Sun.COM 			wdesc = rib_init_sendwait(0, cv_sig, qp);
28757387SRobert.Gordon@Sun.COM 			rx_wr.wr_flags = IBT_WR_SEND_SIGNAL;
28769723SSiddheshwar.Mahesh@Sun.COM 			rx_wr.wr_id = (ibt_wrid_t)(uintptr_t)wdesc;
28779723SSiddheshwar.Mahesh@Sun.COM 			mutex_enter(&wdesc->sendwait_lock);
28787387SRobert.Gordon@Sun.COM 		} else {
28797387SRobert.Gordon@Sun.COM 			rx_wr.wr_flags = IBT_WR_NO_FLAGS;
28809723SSiddheshwar.Mahesh@Sun.COM 			rx_wr.wr_id = (ibt_wrid_t)RDMA_DUMMY_WRID;
28810Sstevel@tonic-gate 		}
28827387SRobert.Gordon@Sun.COM 		rx_wr.wr_opcode = IBT_WRC_RDMAR;
28837387SRobert.Gordon@Sun.COM 		rx_wr.wr_trans = IBT_RC_SRV;
28847387SRobert.Gordon@Sun.COM 		rx_wr.wr_nds = 1;
28857387SRobert.Gordon@Sun.COM 		rx_wr.wr_sgl = &sgl;
28867387SRobert.Gordon@Sun.COM 
28877387SRobert.Gordon@Sun.COM 		mutex_enter(&conn->c_lock);
28887387SRobert.Gordon@Sun.COM 		if (conn->c_state == C_CONNECTED) {
28897387SRobert.Gordon@Sun.COM 			ibt_status = ibt_post_send(qp->qp_hdl, &rx_wr, 1, NULL);
28907387SRobert.Gordon@Sun.COM 		}
28917387SRobert.Gordon@Sun.COM 		if (conn->c_state != C_CONNECTED ||
28927387SRobert.Gordon@Sun.COM 		    ibt_status != IBT_SUCCESS) {
28937387SRobert.Gordon@Sun.COM 			if (conn->c_state != C_DISCONN_PEND)
28947387SRobert.Gordon@Sun.COM 				conn->c_state = C_ERROR_CONN;
28957387SRobert.Gordon@Sun.COM 			mutex_exit(&conn->c_lock);
28969723SSiddheshwar.Mahesh@Sun.COM 			if (wait && (cl->c_next == NULL)) {
28979723SSiddheshwar.Mahesh@Sun.COM 				mutex_exit(&wdesc->sendwait_lock);
28989723SSiddheshwar.Mahesh@Sun.COM 				(void) rib_free_sendwait(wdesc);
28999723SSiddheshwar.Mahesh@Sun.COM 			}
29007387SRobert.Gordon@Sun.COM 			return (RDMA_CONNLOST);
29017387SRobert.Gordon@Sun.COM 		}
29029723SSiddheshwar.Mahesh@Sun.COM 
29037387SRobert.Gordon@Sun.COM 		mutex_exit(&conn->c_lock);
29047387SRobert.Gordon@Sun.COM 
29057387SRobert.Gordon@Sun.COM 		/*
29067387SRobert.Gordon@Sun.COM 		 * Wait for send to complete if this is the
29077387SRobert.Gordon@Sun.COM 		 * last item in the list.
29087387SRobert.Gordon@Sun.COM 		 */
29097387SRobert.Gordon@Sun.COM 		if (wait && cl->c_next == NULL) {
29109723SSiddheshwar.Mahesh@Sun.COM 			rib_send_hold(qp);
29119723SSiddheshwar.Mahesh@Sun.COM 			mutex_exit(&wdesc->sendwait_lock);
29129723SSiddheshwar.Mahesh@Sun.COM 
29137387SRobert.Gordon@Sun.COM 			ret = rib_sendwait(qp, wdesc);
29149723SSiddheshwar.Mahesh@Sun.COM 
29159723SSiddheshwar.Mahesh@Sun.COM 			if (ret != 0)
29167387SRobert.Gordon@Sun.COM 				return (ret);
29177387SRobert.Gordon@Sun.COM 		}
29180Sstevel@tonic-gate 		cl = cl->c_next;
29190Sstevel@tonic-gate 	}
29200Sstevel@tonic-gate 	return (RDMA_SUCCESS);
29210Sstevel@tonic-gate }
29220Sstevel@tonic-gate 
29230Sstevel@tonic-gate /*
29240Sstevel@tonic-gate  * rib_srv_cm_handler()
29250Sstevel@tonic-gate  *    Connection Manager callback to handle RC connection requests.
29260Sstevel@tonic-gate  */
29270Sstevel@tonic-gate /* ARGSUSED */
29280Sstevel@tonic-gate static ibt_cm_status_t
rib_srv_cm_handler(void * any,ibt_cm_event_t * event,ibt_cm_return_args_t * ret_args,void * priv_data,ibt_priv_data_len_t len)29290Sstevel@tonic-gate rib_srv_cm_handler(void *any, ibt_cm_event_t *event,
29300Sstevel@tonic-gate 	ibt_cm_return_args_t *ret_args, void *priv_data,
29310Sstevel@tonic-gate 	ibt_priv_data_len_t len)
29320Sstevel@tonic-gate {
29330Sstevel@tonic-gate 	queue_t		*q;
29340Sstevel@tonic-gate 	rib_qp_t	*qp;
29350Sstevel@tonic-gate 	rib_hca_t	*hca;
29360Sstevel@tonic-gate 	rdma_stat	status = RDMA_SUCCESS;
29370Sstevel@tonic-gate 	int		i;
29380Sstevel@tonic-gate 	struct clist	cl;
29397387SRobert.Gordon@Sun.COM 	rdma_buf_t	rdbuf = {0};
29400Sstevel@tonic-gate 	void		*buf = NULL;
29410Sstevel@tonic-gate 	CONN		*conn;
29427387SRobert.Gordon@Sun.COM 	ibt_ip_cm_info_t	ipinfo;
29437387SRobert.Gordon@Sun.COM 	struct sockaddr_in *s;
29447387SRobert.Gordon@Sun.COM 	struct sockaddr_in6 *s6;
29457387SRobert.Gordon@Sun.COM 	int sin_size = sizeof (struct sockaddr_in);
29467387SRobert.Gordon@Sun.COM 	int in_size = sizeof (struct in_addr);
29477387SRobert.Gordon@Sun.COM 	int sin6_size = sizeof (struct sockaddr_in6);
29480Sstevel@tonic-gate 
29490Sstevel@tonic-gate 	ASSERT(any != NULL);
29500Sstevel@tonic-gate 	ASSERT(event != NULL);
29510Sstevel@tonic-gate 
29529733SFaramarz.Jalalian@Sun.COM 	hca = (rib_hca_t *)any;
29530Sstevel@tonic-gate 
29540Sstevel@tonic-gate 	/* got a connection request */
29550Sstevel@tonic-gate 	switch (event->cm_type) {
29560Sstevel@tonic-gate 	case IBT_CM_EVENT_REQ_RCV:
29570Sstevel@tonic-gate 		/*
29580Sstevel@tonic-gate 		 * If the plugin is in the NO_ACCEPT state, bail out.
29590Sstevel@tonic-gate 		 */
29600Sstevel@tonic-gate 		mutex_enter(&plugin_state_lock);
29610Sstevel@tonic-gate 		if (plugin_state == NO_ACCEPT) {
29620Sstevel@tonic-gate 			mutex_exit(&plugin_state_lock);
29630Sstevel@tonic-gate 			return (IBT_CM_REJECT);
29640Sstevel@tonic-gate 		}
29650Sstevel@tonic-gate 		mutex_exit(&plugin_state_lock);
29660Sstevel@tonic-gate 
29670Sstevel@tonic-gate 		/*
29680Sstevel@tonic-gate 		 * Need to send a MRA MAD to CM so that it does not
29690Sstevel@tonic-gate 		 * timeout on us.
29700Sstevel@tonic-gate 		 */
29710Sstevel@tonic-gate 		(void) ibt_cm_delay(IBT_CM_DELAY_REQ, event->cm_session_id,
29727387SRobert.Gordon@Sun.COM 		    event->cm_event.req.req_timeout * 8, NULL, 0);
29730Sstevel@tonic-gate 
29740Sstevel@tonic-gate 		mutex_enter(&rib_stat->open_hca_lock);
29750Sstevel@tonic-gate 		q = rib_stat->q;
29760Sstevel@tonic-gate 		mutex_exit(&rib_stat->open_hca_lock);
29777387SRobert.Gordon@Sun.COM 
29780Sstevel@tonic-gate 		status = rib_svc_create_chan(hca, (caddr_t)q,
29797387SRobert.Gordon@Sun.COM 		    event->cm_event.req.req_prim_hca_port, &qp);
29807387SRobert.Gordon@Sun.COM 
29810Sstevel@tonic-gate 		if (status) {
29820Sstevel@tonic-gate 			return (IBT_CM_REJECT);
29830Sstevel@tonic-gate 		}
29840Sstevel@tonic-gate 
29850Sstevel@tonic-gate 		ret_args->cm_ret.rep.cm_channel = qp->qp_hdl;
29867387SRobert.Gordon@Sun.COM 		ret_args->cm_ret.rep.cm_rdma_ra_out = 4;
29877387SRobert.Gordon@Sun.COM 		ret_args->cm_ret.rep.cm_rdma_ra_in = 4;
29880Sstevel@tonic-gate 		ret_args->cm_ret.rep.cm_rnr_retry_cnt = RNR_RETRIES;
29890Sstevel@tonic-gate 
29900Sstevel@tonic-gate 		/*
29910Sstevel@tonic-gate 		 * Pre-posts RECV buffers
29920Sstevel@tonic-gate 		 */
29930Sstevel@tonic-gate 		conn = qptoc(qp);
29940Sstevel@tonic-gate 		for (i = 0; i < preposted_rbufs; i++) {
29957387SRobert.Gordon@Sun.COM 			bzero(&rdbuf, sizeof (rdbuf));
29967387SRobert.Gordon@Sun.COM 			rdbuf.type = RECV_BUFFER;
29977387SRobert.Gordon@Sun.COM 			buf = rib_rbuf_alloc(conn, &rdbuf);
29987387SRobert.Gordon@Sun.COM 			if (buf == NULL) {
29999723SSiddheshwar.Mahesh@Sun.COM 				/*
30009723SSiddheshwar.Mahesh@Sun.COM 				 * A connection is not established yet.
30019723SSiddheshwar.Mahesh@Sun.COM 				 * Just flush the channel. Buffers
30029723SSiddheshwar.Mahesh@Sun.COM 				 * posted till now will error out with
30039723SSiddheshwar.Mahesh@Sun.COM 				 * IBT_WC_WR_FLUSHED_ERR.
30049723SSiddheshwar.Mahesh@Sun.COM 				 */
30059723SSiddheshwar.Mahesh@Sun.COM 				(void) ibt_flush_channel(qp->qp_hdl);
30067387SRobert.Gordon@Sun.COM 				(void) rib_disconnect_channel(conn, NULL);
30077387SRobert.Gordon@Sun.COM 				return (IBT_CM_REJECT);
30087387SRobert.Gordon@Sun.COM 			}
30097387SRobert.Gordon@Sun.COM 
30107387SRobert.Gordon@Sun.COM 			bzero(&cl, sizeof (cl));
30117387SRobert.Gordon@Sun.COM 			cl.w.c_saddr3 = (caddr_t)rdbuf.addr;
30127387SRobert.Gordon@Sun.COM 			cl.c_len = rdbuf.len;
30137387SRobert.Gordon@Sun.COM 			cl.c_smemhandle.mrc_lmr =
30147387SRobert.Gordon@Sun.COM 			    rdbuf.handle.mrc_lmr; /* lkey */
30157387SRobert.Gordon@Sun.COM 			cl.c_next = NULL;
30167387SRobert.Gordon@Sun.COM 			status = rib_post_recv(conn, &cl);
30177387SRobert.Gordon@Sun.COM 			if (status != RDMA_SUCCESS) {
30189723SSiddheshwar.Mahesh@Sun.COM 				/*
30199723SSiddheshwar.Mahesh@Sun.COM 				 * A connection is not established yet.
30209723SSiddheshwar.Mahesh@Sun.COM 				 * Just flush the channel. Buffers
30219723SSiddheshwar.Mahesh@Sun.COM 				 * posted till now will error out with
30229723SSiddheshwar.Mahesh@Sun.COM 				 * IBT_WC_WR_FLUSHED_ERR.
30239723SSiddheshwar.Mahesh@Sun.COM 				 */
30249723SSiddheshwar.Mahesh@Sun.COM 				(void) ibt_flush_channel(qp->qp_hdl);
30257387SRobert.Gordon@Sun.COM 				(void) rib_disconnect_channel(conn, NULL);
30267387SRobert.Gordon@Sun.COM 				return (IBT_CM_REJECT);
30277387SRobert.Gordon@Sun.COM 			}
30280Sstevel@tonic-gate 		}
30290Sstevel@tonic-gate 		(void) rib_add_connlist(conn, &hca->srv_conn_list);
30300Sstevel@tonic-gate 
30310Sstevel@tonic-gate 		/*
30327387SRobert.Gordon@Sun.COM 		 * Get the address translation
30330Sstevel@tonic-gate 		 */
30340Sstevel@tonic-gate 		rw_enter(&hca->state_lock, RW_READER);
30350Sstevel@tonic-gate 		if (hca->state == HCA_DETACHED) {
30367387SRobert.Gordon@Sun.COM 			rw_exit(&hca->state_lock);
30377387SRobert.Gordon@Sun.COM 			return (IBT_CM_REJECT);
30380Sstevel@tonic-gate 		}
30390Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
30400Sstevel@tonic-gate 
30417387SRobert.Gordon@Sun.COM 		bzero(&ipinfo, sizeof (ibt_ip_cm_info_t));
30427387SRobert.Gordon@Sun.COM 
30437387SRobert.Gordon@Sun.COM 		if (ibt_get_ip_data(event->cm_priv_data_len,
30447387SRobert.Gordon@Sun.COM 		    event->cm_priv_data,
30457387SRobert.Gordon@Sun.COM 		    &ipinfo) != IBT_SUCCESS) {
30467387SRobert.Gordon@Sun.COM 
30477387SRobert.Gordon@Sun.COM 			return (IBT_CM_REJECT);
30480Sstevel@tonic-gate 		}
30497387SRobert.Gordon@Sun.COM 
30507387SRobert.Gordon@Sun.COM 		switch (ipinfo.src_addr.family) {
30517387SRobert.Gordon@Sun.COM 		case AF_INET:
30527387SRobert.Gordon@Sun.COM 
305310326SSiddheshwar.Mahesh@Sun.COM 			conn->c_netid = kmem_zalloc(strlen(RIBNETID_TCP) + 1,
305410326SSiddheshwar.Mahesh@Sun.COM 			    KM_SLEEP);
305510326SSiddheshwar.Mahesh@Sun.COM 			(void) strcpy(conn->c_netid, RIBNETID_TCP);
305610326SSiddheshwar.Mahesh@Sun.COM 
30577387SRobert.Gordon@Sun.COM 			conn->c_raddr.maxlen =
30587387SRobert.Gordon@Sun.COM 			    conn->c_raddr.len = sin_size;
30597387SRobert.Gordon@Sun.COM 			conn->c_raddr.buf = kmem_zalloc(sin_size, KM_SLEEP);
30607387SRobert.Gordon@Sun.COM 
30617387SRobert.Gordon@Sun.COM 			s = (struct sockaddr_in *)conn->c_raddr.buf;
30627387SRobert.Gordon@Sun.COM 			s->sin_family = AF_INET;
30637387SRobert.Gordon@Sun.COM 			bcopy((void *)&ipinfo.src_addr.un.ip4addr,
30647387SRobert.Gordon@Sun.COM 			    &s->sin_addr, in_size);
30657387SRobert.Gordon@Sun.COM 
306610326SSiddheshwar.Mahesh@Sun.COM 			conn->c_laddr.maxlen =
306710326SSiddheshwar.Mahesh@Sun.COM 			    conn->c_laddr.len = sin_size;
306810326SSiddheshwar.Mahesh@Sun.COM 			conn->c_laddr.buf = kmem_zalloc(sin_size, KM_SLEEP);
306910326SSiddheshwar.Mahesh@Sun.COM 
307010326SSiddheshwar.Mahesh@Sun.COM 			s = (struct sockaddr_in *)conn->c_laddr.buf;
307110326SSiddheshwar.Mahesh@Sun.COM 			s->sin_family = AF_INET;
307210326SSiddheshwar.Mahesh@Sun.COM 			bcopy((void *)&ipinfo.dst_addr.un.ip4addr,
307310326SSiddheshwar.Mahesh@Sun.COM 			    &s->sin_addr, in_size);
307410326SSiddheshwar.Mahesh@Sun.COM 
307512553SKaren.Rochford@Sun.COM 			conn->c_addrmask.maxlen = conn->c_addrmask.len =
307612553SKaren.Rochford@Sun.COM 			    sizeof (struct sockaddr_in);
307712553SKaren.Rochford@Sun.COM 			conn->c_addrmask.buf =
307812553SKaren.Rochford@Sun.COM 			    kmem_zalloc(conn->c_addrmask.len, KM_SLEEP);
307912553SKaren.Rochford@Sun.COM 			((struct sockaddr_in *)
308012553SKaren.Rochford@Sun.COM 			    conn->c_addrmask.buf)->sin_addr.s_addr =
308112553SKaren.Rochford@Sun.COM 			    (uint32_t)~0;
308212553SKaren.Rochford@Sun.COM 			((struct sockaddr_in *)
308312553SKaren.Rochford@Sun.COM 			    conn->c_addrmask.buf)->sin_family =
308412553SKaren.Rochford@Sun.COM 			    (sa_family_t)~0;
30857387SRobert.Gordon@Sun.COM 			break;
30867387SRobert.Gordon@Sun.COM 
30877387SRobert.Gordon@Sun.COM 		case AF_INET6:
30887387SRobert.Gordon@Sun.COM 
308910326SSiddheshwar.Mahesh@Sun.COM 			conn->c_netid = kmem_zalloc(strlen(RIBNETID_TCP6) + 1,
309010326SSiddheshwar.Mahesh@Sun.COM 			    KM_SLEEP);
309110326SSiddheshwar.Mahesh@Sun.COM 			(void) strcpy(conn->c_netid, RIBNETID_TCP6);
309210326SSiddheshwar.Mahesh@Sun.COM 
30937387SRobert.Gordon@Sun.COM 			conn->c_raddr.maxlen =
30947387SRobert.Gordon@Sun.COM 			    conn->c_raddr.len = sin6_size;
30957387SRobert.Gordon@Sun.COM 			conn->c_raddr.buf = kmem_zalloc(sin6_size, KM_SLEEP);
30967387SRobert.Gordon@Sun.COM 
30977387SRobert.Gordon@Sun.COM 			s6 = (struct sockaddr_in6 *)conn->c_raddr.buf;
30987387SRobert.Gordon@Sun.COM 			s6->sin6_family = AF_INET6;
30997387SRobert.Gordon@Sun.COM 			bcopy((void *)&ipinfo.src_addr.un.ip6addr,
31007387SRobert.Gordon@Sun.COM 			    &s6->sin6_addr,
31017387SRobert.Gordon@Sun.COM 			    sizeof (struct in6_addr));
31027387SRobert.Gordon@Sun.COM 
310310326SSiddheshwar.Mahesh@Sun.COM 			conn->c_laddr.maxlen =
310410326SSiddheshwar.Mahesh@Sun.COM 			    conn->c_laddr.len = sin6_size;
310510326SSiddheshwar.Mahesh@Sun.COM 			conn->c_laddr.buf = kmem_zalloc(sin6_size, KM_SLEEP);
310610326SSiddheshwar.Mahesh@Sun.COM 
310710326SSiddheshwar.Mahesh@Sun.COM 			s6 = (struct sockaddr_in6 *)conn->c_laddr.buf;
310810326SSiddheshwar.Mahesh@Sun.COM 			s6->sin6_family = AF_INET6;
310910326SSiddheshwar.Mahesh@Sun.COM 			bcopy((void *)&ipinfo.dst_addr.un.ip6addr,
311010326SSiddheshwar.Mahesh@Sun.COM 			    &s6->sin6_addr,
311110326SSiddheshwar.Mahesh@Sun.COM 			    sizeof (struct in6_addr));
311210326SSiddheshwar.Mahesh@Sun.COM 
311312553SKaren.Rochford@Sun.COM 			conn->c_addrmask.maxlen = conn->c_addrmask.len =
311412553SKaren.Rochford@Sun.COM 			    sizeof (struct sockaddr_in6);
311512553SKaren.Rochford@Sun.COM 			conn->c_addrmask.buf =
311612553SKaren.Rochford@Sun.COM 			    kmem_zalloc(conn->c_addrmask.len, KM_SLEEP);
311712553SKaren.Rochford@Sun.COM 			(void) memset(&((struct sockaddr_in6 *)
311812553SKaren.Rochford@Sun.COM 			    conn->c_addrmask.buf)->sin6_addr, (uchar_t)~0,
311912553SKaren.Rochford@Sun.COM 			    sizeof (struct in6_addr));
312012553SKaren.Rochford@Sun.COM 			((struct sockaddr_in6 *)
312112553SKaren.Rochford@Sun.COM 			    conn->c_addrmask.buf)->sin6_family =
312212553SKaren.Rochford@Sun.COM 			    (sa_family_t)~0;
31237387SRobert.Gordon@Sun.COM 			break;
31247387SRobert.Gordon@Sun.COM 
31257387SRobert.Gordon@Sun.COM 		default:
31267387SRobert.Gordon@Sun.COM 			return (IBT_CM_REJECT);
31270Sstevel@tonic-gate 		}
31287387SRobert.Gordon@Sun.COM 
31290Sstevel@tonic-gate 		break;
31300Sstevel@tonic-gate 
31310Sstevel@tonic-gate 	case IBT_CM_EVENT_CONN_CLOSED:
31320Sstevel@tonic-gate 	{
31330Sstevel@tonic-gate 		CONN		*conn;
31340Sstevel@tonic-gate 		rib_qp_t	*qp;
31350Sstevel@tonic-gate 
31360Sstevel@tonic-gate 		switch (event->cm_event.closed) {
31370Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREP_RCVD:
31380Sstevel@tonic-gate 		case IBT_CM_CLOSED_DREQ_TIMEOUT:
31390Sstevel@tonic-gate 		case IBT_CM_CLOSED_DUP:
31400Sstevel@tonic-gate 		case IBT_CM_CLOSED_ABORT:
31410Sstevel@tonic-gate 		case IBT_CM_CLOSED_ALREADY:
31420Sstevel@tonic-gate 			/*
31430Sstevel@tonic-gate 			 * These cases indicate the local end initiated
31440Sstevel@tonic-gate 			 * the closing of the channel. Nothing to do here.
31450Sstevel@tonic-gate 			 */
31460Sstevel@tonic-gate 			break;
31470Sstevel@tonic-gate 		default:
31480Sstevel@tonic-gate 			/*
31490Sstevel@tonic-gate 			 * Reason for CONN_CLOSED event must be one of
31500Sstevel@tonic-gate 			 * IBT_CM_CLOSED_DREQ_RCVD or IBT_CM_CLOSED_REJ_RCVD
31510Sstevel@tonic-gate 			 * or IBT_CM_CLOSED_STALE. These indicate cases were
31520Sstevel@tonic-gate 			 * the remote end is closing the channel. In these
31530Sstevel@tonic-gate 			 * cases free the channel and transition to error
31540Sstevel@tonic-gate 			 * state
31550Sstevel@tonic-gate 			 */
31560Sstevel@tonic-gate 			qp = ibt_get_chan_private(event->cm_channel);
31570Sstevel@tonic-gate 			conn = qptoc(qp);
31580Sstevel@tonic-gate 			mutex_enter(&conn->c_lock);
31590Sstevel@tonic-gate 			if (conn->c_state == C_DISCONN_PEND) {
31600Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
31610Sstevel@tonic-gate 				break;
31620Sstevel@tonic-gate 			}
31637387SRobert.Gordon@Sun.COM 			conn->c_state = C_ERROR_CONN;
31640Sstevel@tonic-gate 
31650Sstevel@tonic-gate 			/*
31660Sstevel@tonic-gate 			 * Free the conn if c_ref goes down to 0
31670Sstevel@tonic-gate 			 */
31680Sstevel@tonic-gate 			if (conn->c_ref == 0) {
31690Sstevel@tonic-gate 				/*
31700Sstevel@tonic-gate 				 * Remove from list and free conn
31710Sstevel@tonic-gate 				 */
31720Sstevel@tonic-gate 				conn->c_state = C_DISCONN_PEND;
31730Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
31740Sstevel@tonic-gate 				(void) rib_disconnect_channel(conn,
31757387SRobert.Gordon@Sun.COM 				    &hca->srv_conn_list);
31760Sstevel@tonic-gate 			} else {
31779723SSiddheshwar.Mahesh@Sun.COM 				/*
31789723SSiddheshwar.Mahesh@Sun.COM 				 * conn will be freed when c_ref goes to 0.
31799723SSiddheshwar.Mahesh@Sun.COM 				 * Indicate to cleaning thread not to close
31809723SSiddheshwar.Mahesh@Sun.COM 				 * the connection, but just free the channel.
31819723SSiddheshwar.Mahesh@Sun.COM 				 */
31829723SSiddheshwar.Mahesh@Sun.COM 				conn->c_flags |= C_CLOSE_NOTNEEDED;
31830Sstevel@tonic-gate 				mutex_exit(&conn->c_lock);
31840Sstevel@tonic-gate 			}
31857387SRobert.Gordon@Sun.COM 			DTRACE_PROBE(rpcib__i__srvcm_chandisconnect);
31860Sstevel@tonic-gate 			break;
31870Sstevel@tonic-gate 		}
31880Sstevel@tonic-gate 		break;
31890Sstevel@tonic-gate 	}
31900Sstevel@tonic-gate 	case IBT_CM_EVENT_CONN_EST:
31917387SRobert.Gordon@Sun.COM 		/*
31927387SRobert.Gordon@Sun.COM 		 * RTU received, hence connection established.
31937387SRobert.Gordon@Sun.COM 		 */
31940Sstevel@tonic-gate 		if (rib_debug > 1)
31950Sstevel@tonic-gate 			cmn_err(CE_NOTE, "rib_srv_cm_handler: "
31967387SRobert.Gordon@Sun.COM 			    "(CONN_EST) channel established");
31970Sstevel@tonic-gate 		break;
31980Sstevel@tonic-gate 
31990Sstevel@tonic-gate 	default:
32007387SRobert.Gordon@Sun.COM 		if (rib_debug > 2) {
32017387SRobert.Gordon@Sun.COM 			/* Let CM handle the following events. */
32027387SRobert.Gordon@Sun.COM 			if (event->cm_type == IBT_CM_EVENT_REP_RCV) {
32037387SRobert.Gordon@Sun.COM 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
32047387SRobert.Gordon@Sun.COM 				    "server recv'ed IBT_CM_EVENT_REP_RCV\n");
32057387SRobert.Gordon@Sun.COM 			} else if (event->cm_type == IBT_CM_EVENT_LAP_RCV) {
32067387SRobert.Gordon@Sun.COM 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
32077387SRobert.Gordon@Sun.COM 				    "server recv'ed IBT_CM_EVENT_LAP_RCV\n");
32087387SRobert.Gordon@Sun.COM 			} else if (event->cm_type == IBT_CM_EVENT_MRA_RCV) {
32097387SRobert.Gordon@Sun.COM 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
32107387SRobert.Gordon@Sun.COM 				    "server recv'ed IBT_CM_EVENT_MRA_RCV\n");
32117387SRobert.Gordon@Sun.COM 			} else if (event->cm_type == IBT_CM_EVENT_APR_RCV) {
32127387SRobert.Gordon@Sun.COM 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
32137387SRobert.Gordon@Sun.COM 				    "server recv'ed IBT_CM_EVENT_APR_RCV\n");
32147387SRobert.Gordon@Sun.COM 			} else if (event->cm_type == IBT_CM_EVENT_FAILURE) {
32157387SRobert.Gordon@Sun.COM 				cmn_err(CE_NOTE, "rib_srv_cm_handler: "
32167387SRobert.Gordon@Sun.COM 				    "server recv'ed IBT_CM_EVENT_FAILURE\n");
32177387SRobert.Gordon@Sun.COM 			}
32180Sstevel@tonic-gate 		}
32197387SRobert.Gordon@Sun.COM 		return (IBT_CM_DEFAULT);
32200Sstevel@tonic-gate 	}
32210Sstevel@tonic-gate 
32220Sstevel@tonic-gate 	/* accept all other CM messages (i.e. let the CM handle them) */
32230Sstevel@tonic-gate 	return (IBT_CM_ACCEPT);
32240Sstevel@tonic-gate }
32250Sstevel@tonic-gate 
32260Sstevel@tonic-gate static rdma_stat
rib_register_service(rib_hca_t * hca,int service_type,uint8_t protocol_num,in_port_t dst_port)32279733SFaramarz.Jalalian@Sun.COM rib_register_service(rib_hca_t *hca, int service_type,
32289733SFaramarz.Jalalian@Sun.COM 	uint8_t protocol_num, in_port_t dst_port)
32290Sstevel@tonic-gate {
32300Sstevel@tonic-gate 	ibt_srv_desc_t		sdesc;
32310Sstevel@tonic-gate 	ibt_hca_portinfo_t	*port_infop;
32320Sstevel@tonic-gate 	ib_svc_id_t		srv_id;
32330Sstevel@tonic-gate 	ibt_srv_hdl_t		srv_hdl;
32340Sstevel@tonic-gate 	uint_t			port_size;
32357387SRobert.Gordon@Sun.COM 	uint_t			pki, i, num_ports, nbinds;
32360Sstevel@tonic-gate 	ibt_status_t		ibt_status;
32379733SFaramarz.Jalalian@Sun.COM 	rib_service_t		*service;
32380Sstevel@tonic-gate 	ib_pkey_t		pkey;
32390Sstevel@tonic-gate 
32400Sstevel@tonic-gate 	/*
32410Sstevel@tonic-gate 	 * Query all ports for the given HCA
32420Sstevel@tonic-gate 	 */
32430Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
32440Sstevel@tonic-gate 	if (hca->state != HCA_DETACHED) {
32450Sstevel@tonic-gate 		ibt_status = ibt_query_hca_ports(hca->hca_hdl, 0, &port_infop,
32460Sstevel@tonic-gate 		    &num_ports, &port_size);
32470Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
32480Sstevel@tonic-gate 	} else {
32490Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
32500Sstevel@tonic-gate 		return (RDMA_FAILED);
32510Sstevel@tonic-gate 	}
32520Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
32530Sstevel@tonic-gate 		return (RDMA_FAILED);
32540Sstevel@tonic-gate 	}
32550Sstevel@tonic-gate 
32567387SRobert.Gordon@Sun.COM 	DTRACE_PROBE1(rpcib__i__regservice_numports,
32577387SRobert.Gordon@Sun.COM 	    int, num_ports);
32587387SRobert.Gordon@Sun.COM 
32597387SRobert.Gordon@Sun.COM 	for (i = 0; i < num_ports; i++) {
32607387SRobert.Gordon@Sun.COM 		if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE) {
32617387SRobert.Gordon@Sun.COM 			DTRACE_PROBE1(rpcib__i__regservice__portinactive,
32627387SRobert.Gordon@Sun.COM 			    int, i+1);
32637387SRobert.Gordon@Sun.COM 		} else if (port_infop[i].p_linkstate == IBT_PORT_ACTIVE) {
32647387SRobert.Gordon@Sun.COM 			DTRACE_PROBE1(rpcib__i__regservice__portactive,
32657387SRobert.Gordon@Sun.COM 			    int, i+1);
32660Sstevel@tonic-gate 		}
32670Sstevel@tonic-gate 	}
32687387SRobert.Gordon@Sun.COM 
32690Sstevel@tonic-gate 	/*
32700Sstevel@tonic-gate 	 * Get all the IP addresses on this system to register the
32710Sstevel@tonic-gate 	 * given "service type" on all DNS recognized IP addrs.
32720Sstevel@tonic-gate 	 * Each service type such as NFS will have all the systems
32730Sstevel@tonic-gate 	 * IP addresses as its different names. For now the only
32740Sstevel@tonic-gate 	 * type of service we support in RPCIB is NFS.
32750Sstevel@tonic-gate 	 */
32769733SFaramarz.Jalalian@Sun.COM 	rw_enter(&rib_stat->service_list_lock, RW_WRITER);
32770Sstevel@tonic-gate 	/*
32780Sstevel@tonic-gate 	 * Start registering and binding service to active
32790Sstevel@tonic-gate 	 * on active ports on this HCA.
32800Sstevel@tonic-gate 	 */
32810Sstevel@tonic-gate 	nbinds = 0;
32829733SFaramarz.Jalalian@Sun.COM 	for (service = rib_stat->service_list;
32839733SFaramarz.Jalalian@Sun.COM 	    service && (service->srv_type != service_type);
32849733SFaramarz.Jalalian@Sun.COM 	    service = service->next)
32859733SFaramarz.Jalalian@Sun.COM 		;
32869733SFaramarz.Jalalian@Sun.COM 
32879733SFaramarz.Jalalian@Sun.COM 	if (service == NULL) {
32889733SFaramarz.Jalalian@Sun.COM 		/*
32899733SFaramarz.Jalalian@Sun.COM 		 * We use IP addresses as the service names for
32909733SFaramarz.Jalalian@Sun.COM 		 * service registration.  Register each of them
32919733SFaramarz.Jalalian@Sun.COM 		 * with CM to obtain a svc_id and svc_hdl.  We do not
32929733SFaramarz.Jalalian@Sun.COM 		 * register the service with machine's loopback address.
32939733SFaramarz.Jalalian@Sun.COM 		 */
32949733SFaramarz.Jalalian@Sun.COM 		(void) bzero(&srv_id, sizeof (ib_svc_id_t));
32959733SFaramarz.Jalalian@Sun.COM 		(void) bzero(&srv_hdl, sizeof (ibt_srv_hdl_t));
32969733SFaramarz.Jalalian@Sun.COM 		(void) bzero(&sdesc, sizeof (ibt_srv_desc_t));
32979733SFaramarz.Jalalian@Sun.COM 		sdesc.sd_handler = rib_srv_cm_handler;
32989733SFaramarz.Jalalian@Sun.COM 		sdesc.sd_flags = 0;
32999733SFaramarz.Jalalian@Sun.COM 		ibt_status = ibt_register_service(hca->ibt_clnt_hdl,
33009733SFaramarz.Jalalian@Sun.COM 		    &sdesc, ibt_get_ip_sid(protocol_num, dst_port),
33019733SFaramarz.Jalalian@Sun.COM 		    1, &srv_hdl, &srv_id);
33029733SFaramarz.Jalalian@Sun.COM 		if ((ibt_status != IBT_SUCCESS) &&
33039733SFaramarz.Jalalian@Sun.COM 		    (ibt_status != IBT_CM_SERVICE_EXISTS)) {
33049733SFaramarz.Jalalian@Sun.COM 			rw_exit(&rib_stat->service_list_lock);
33059733SFaramarz.Jalalian@Sun.COM 			DTRACE_PROBE1(rpcib__i__regservice__ibtres,
33069733SFaramarz.Jalalian@Sun.COM 			    int, ibt_status);
33079733SFaramarz.Jalalian@Sun.COM 			ibt_free_portinfo(port_infop, port_size);
33089733SFaramarz.Jalalian@Sun.COM 			return (RDMA_FAILED);
33099733SFaramarz.Jalalian@Sun.COM 		}
33109733SFaramarz.Jalalian@Sun.COM 
33119733SFaramarz.Jalalian@Sun.COM 		/*
33129733SFaramarz.Jalalian@Sun.COM 		 * Allocate and prepare a service entry
33139733SFaramarz.Jalalian@Sun.COM 		 */
33149733SFaramarz.Jalalian@Sun.COM 		service = kmem_zalloc(sizeof (rib_service_t), KM_SLEEP);
33159733SFaramarz.Jalalian@Sun.COM 
33169733SFaramarz.Jalalian@Sun.COM 		service->srv_type = service_type;
33179733SFaramarz.Jalalian@Sun.COM 		service->srv_hdl = srv_hdl;
33189733SFaramarz.Jalalian@Sun.COM 		service->srv_id = srv_id;
33199733SFaramarz.Jalalian@Sun.COM 
33209733SFaramarz.Jalalian@Sun.COM 		service->next = rib_stat->service_list;
33219733SFaramarz.Jalalian@Sun.COM 		rib_stat->service_list = service;
33229733SFaramarz.Jalalian@Sun.COM 		DTRACE_PROBE1(rpcib__i__regservice__new__service,
33239733SFaramarz.Jalalian@Sun.COM 		    int, service->srv_type);
33249733SFaramarz.Jalalian@Sun.COM 	} else {
33259733SFaramarz.Jalalian@Sun.COM 		srv_hdl = service->srv_hdl;
33269733SFaramarz.Jalalian@Sun.COM 		srv_id = service->srv_id;
33279733SFaramarz.Jalalian@Sun.COM 		DTRACE_PROBE1(rpcib__i__regservice__existing__service,
33289733SFaramarz.Jalalian@Sun.COM 		    int, service->srv_type);
33299733SFaramarz.Jalalian@Sun.COM 	}
33307387SRobert.Gordon@Sun.COM 
33317387SRobert.Gordon@Sun.COM 	for (i = 0; i < num_ports; i++) {
33329733SFaramarz.Jalalian@Sun.COM 		ibt_sbind_hdl_t		sbp;
33339733SFaramarz.Jalalian@Sun.COM 		rib_hca_service_t	*hca_srv;
33349733SFaramarz.Jalalian@Sun.COM 		ib_gid_t		gid;
33359733SFaramarz.Jalalian@Sun.COM 
33360Sstevel@tonic-gate 		if (port_infop[i].p_linkstate != IBT_PORT_ACTIVE)
33370Sstevel@tonic-gate 			continue;
33380Sstevel@tonic-gate 
33390Sstevel@tonic-gate 		for (pki = 0; pki < port_infop[i].p_pkey_tbl_sz; pki++) {
33407387SRobert.Gordon@Sun.COM 			pkey = port_infop[i].p_pkey_tbl[pki];
33419733SFaramarz.Jalalian@Sun.COM 
33429733SFaramarz.Jalalian@Sun.COM 			rw_enter(&hca->bound_services_lock, RW_READER);
33439733SFaramarz.Jalalian@Sun.COM 			gid = port_infop[i].p_sgid_tbl[0];
33449733SFaramarz.Jalalian@Sun.COM 			for (hca_srv = hca->bound_services; hca_srv;
33459733SFaramarz.Jalalian@Sun.COM 			    hca_srv = hca_srv->next) {
33469733SFaramarz.Jalalian@Sun.COM 				if ((hca_srv->srv_id == service->srv_id) &&
33479733SFaramarz.Jalalian@Sun.COM 				    (hca_srv->gid.gid_prefix ==
33489733SFaramarz.Jalalian@Sun.COM 				    gid.gid_prefix) &&
33499733SFaramarz.Jalalian@Sun.COM 				    (hca_srv->gid.gid_guid == gid.gid_guid))
33509733SFaramarz.Jalalian@Sun.COM 					break;
33519733SFaramarz.Jalalian@Sun.COM 			}
33529733SFaramarz.Jalalian@Sun.COM 			rw_exit(&hca->bound_services_lock);
33539733SFaramarz.Jalalian@Sun.COM 			if (hca_srv != NULL) {
33549733SFaramarz.Jalalian@Sun.COM 				/*
33559733SFaramarz.Jalalian@Sun.COM 				 * port is alreay bound the the service
33569733SFaramarz.Jalalian@Sun.COM 				 */
33579733SFaramarz.Jalalian@Sun.COM 				DTRACE_PROBE1(
33589733SFaramarz.Jalalian@Sun.COM 				    rpcib__i__regservice__already__bound,
33599733SFaramarz.Jalalian@Sun.COM 				    int, i+1);
33609733SFaramarz.Jalalian@Sun.COM 				nbinds++;
33619733SFaramarz.Jalalian@Sun.COM 				continue;
33629733SFaramarz.Jalalian@Sun.COM 			}
33639733SFaramarz.Jalalian@Sun.COM 
33647387SRobert.Gordon@Sun.COM 			if ((pkey & IBSRM_HB) &&
33657387SRobert.Gordon@Sun.COM 			    (pkey != IB_PKEY_INVALID_FULL)) {
33667387SRobert.Gordon@Sun.COM 
33679733SFaramarz.Jalalian@Sun.COM 				sbp = NULL;
33687387SRobert.Gordon@Sun.COM 				ibt_status = ibt_bind_service(srv_hdl,
33699733SFaramarz.Jalalian@Sun.COM 				    gid, NULL, hca, &sbp);
33709733SFaramarz.Jalalian@Sun.COM 
33719733SFaramarz.Jalalian@Sun.COM 				if (ibt_status == IBT_SUCCESS) {
33729733SFaramarz.Jalalian@Sun.COM 					hca_srv = kmem_zalloc(
33739733SFaramarz.Jalalian@Sun.COM 					    sizeof (rib_hca_service_t),
33749733SFaramarz.Jalalian@Sun.COM 					    KM_SLEEP);
33759733SFaramarz.Jalalian@Sun.COM 					hca_srv->srv_id = srv_id;
33769733SFaramarz.Jalalian@Sun.COM 					hca_srv->gid = gid;
33779733SFaramarz.Jalalian@Sun.COM 					hca_srv->sbind_hdl = sbp;
33789733SFaramarz.Jalalian@Sun.COM 
33799733SFaramarz.Jalalian@Sun.COM 					rw_enter(&hca->bound_services_lock,
33809733SFaramarz.Jalalian@Sun.COM 					    RW_WRITER);
33819733SFaramarz.Jalalian@Sun.COM 					hca_srv->next = hca->bound_services;
33829733SFaramarz.Jalalian@Sun.COM 					hca->bound_services = hca_srv;
33839733SFaramarz.Jalalian@Sun.COM 					rw_exit(&hca->bound_services_lock);
33849733SFaramarz.Jalalian@Sun.COM 					nbinds++;
33859733SFaramarz.Jalalian@Sun.COM 				}
33867387SRobert.Gordon@Sun.COM 
33877387SRobert.Gordon@Sun.COM 				DTRACE_PROBE1(rpcib__i__regservice__bindres,
33887387SRobert.Gordon@Sun.COM 				    int, ibt_status);
33890Sstevel@tonic-gate 			}
33900Sstevel@tonic-gate 		}
33910Sstevel@tonic-gate 	}
33929733SFaramarz.Jalalian@Sun.COM 	rw_exit(&rib_stat->service_list_lock);
33930Sstevel@tonic-gate 
33940Sstevel@tonic-gate 	ibt_free_portinfo(port_infop, port_size);
33950Sstevel@tonic-gate 
33960Sstevel@tonic-gate 	if (nbinds == 0) {
33970Sstevel@tonic-gate 		return (RDMA_FAILED);
33980Sstevel@tonic-gate 	} else {
33990Sstevel@tonic-gate 		/*
34000Sstevel@tonic-gate 		 * Put this plugin into accept state, since atleast
34010Sstevel@tonic-gate 		 * one registration was successful.
34020Sstevel@tonic-gate 		 */
34030Sstevel@tonic-gate 		mutex_enter(&plugin_state_lock);
34040Sstevel@tonic-gate 		plugin_state = ACCEPT;
34050Sstevel@tonic-gate 		mutex_exit(&plugin_state_lock);
34060Sstevel@tonic-gate 		return (RDMA_SUCCESS);
34070Sstevel@tonic-gate 	}
34080Sstevel@tonic-gate }
34090Sstevel@tonic-gate 
34100Sstevel@tonic-gate void
rib_listen(struct rdma_svc_data * rd)34110Sstevel@tonic-gate rib_listen(struct rdma_svc_data *rd)
34120Sstevel@tonic-gate {
34139733SFaramarz.Jalalian@Sun.COM 	rdma_stat status;
34149733SFaramarz.Jalalian@Sun.COM 	int n_listening = 0;
34159733SFaramarz.Jalalian@Sun.COM 	rib_hca_t *hca;
34169733SFaramarz.Jalalian@Sun.COM 
34179733SFaramarz.Jalalian@Sun.COM 	mutex_enter(&rib_stat->listen_lock);
34180Sstevel@tonic-gate 	/*
34199733SFaramarz.Jalalian@Sun.COM 	 * if rd parameter is NULL then it means that rib_stat->q is
34209733SFaramarz.Jalalian@Sun.COM 	 * already initialized by a call from RDMA and we just want to
34219733SFaramarz.Jalalian@Sun.COM 	 * add a newly attached HCA to the same listening state as other
34229733SFaramarz.Jalalian@Sun.COM 	 * HCAs.
34230Sstevel@tonic-gate 	 */
34249733SFaramarz.Jalalian@Sun.COM 	if (rd == NULL) {
34259733SFaramarz.Jalalian@Sun.COM 		if (rib_stat->q == NULL) {
34269733SFaramarz.Jalalian@Sun.COM 			mutex_exit(&rib_stat->listen_lock);
34279733SFaramarz.Jalalian@Sun.COM 			return;
34289733SFaramarz.Jalalian@Sun.COM 		}
34299733SFaramarz.Jalalian@Sun.COM 	} else {
34309733SFaramarz.Jalalian@Sun.COM 		rib_stat->q = &rd->q;
34310Sstevel@tonic-gate 	}
34329733SFaramarz.Jalalian@Sun.COM 	rw_enter(&rib_stat->hcas_list_lock, RW_READER);
34339733SFaramarz.Jalalian@Sun.COM 	for (hca = rib_stat->hcas_list; hca; hca = hca->next) {
34349733SFaramarz.Jalalian@Sun.COM 		/*
34359733SFaramarz.Jalalian@Sun.COM 		 * First check if a hca is still attached
34369733SFaramarz.Jalalian@Sun.COM 		 */
34379733SFaramarz.Jalalian@Sun.COM 		rw_enter(&hca->state_lock, RW_READER);
34389733SFaramarz.Jalalian@Sun.COM 		if (hca->state != HCA_INITED) {
34399733SFaramarz.Jalalian@Sun.COM 			rw_exit(&hca->state_lock);
34409733SFaramarz.Jalalian@Sun.COM 			continue;
34419733SFaramarz.Jalalian@Sun.COM 		}
34429733SFaramarz.Jalalian@Sun.COM 		rw_exit(&hca->state_lock);
34439733SFaramarz.Jalalian@Sun.COM 
34449733SFaramarz.Jalalian@Sun.COM 		/*
34459733SFaramarz.Jalalian@Sun.COM 		 * Right now the only service type is NFS. Hence
34469733SFaramarz.Jalalian@Sun.COM 		 * force feed this value. Ideally to communicate
34479733SFaramarz.Jalalian@Sun.COM 		 * the service type it should be passed down in
34489733SFaramarz.Jalalian@Sun.COM 		 * rdma_svc_data.
34499733SFaramarz.Jalalian@Sun.COM 		 */
34509733SFaramarz.Jalalian@Sun.COM 		status = rib_register_service(hca, NFS,
34519733SFaramarz.Jalalian@Sun.COM 		    IPPROTO_TCP, nfs_rdma_port);
34529733SFaramarz.Jalalian@Sun.COM 		if (status == RDMA_SUCCESS)
34539733SFaramarz.Jalalian@Sun.COM 			n_listening++;
34540Sstevel@tonic-gate 	}
34559733SFaramarz.Jalalian@Sun.COM 	rw_exit(&rib_stat->hcas_list_lock);
34569733SFaramarz.Jalalian@Sun.COM 
34570Sstevel@tonic-gate 	/*
34580Sstevel@tonic-gate 	 * Service active on an HCA, check rd->err_code for more
34590Sstevel@tonic-gate 	 * explainable errors.
34600Sstevel@tonic-gate 	 */
34619733SFaramarz.Jalalian@Sun.COM 	if (rd) {
34629733SFaramarz.Jalalian@Sun.COM 		if (n_listening > 0) {
34639733SFaramarz.Jalalian@Sun.COM 			rd->active = 1;
34649733SFaramarz.Jalalian@Sun.COM 			rd->err_code = RDMA_SUCCESS;
34659733SFaramarz.Jalalian@Sun.COM 		} else {
34669733SFaramarz.Jalalian@Sun.COM 			rd->active = 0;
34679733SFaramarz.Jalalian@Sun.COM 			rd->err_code = RDMA_FAILED;
34689733SFaramarz.Jalalian@Sun.COM 		}
34699733SFaramarz.Jalalian@Sun.COM 	}
34709733SFaramarz.Jalalian@Sun.COM 	mutex_exit(&rib_stat->listen_lock);
34710Sstevel@tonic-gate }
34720Sstevel@tonic-gate 
34730Sstevel@tonic-gate /* XXXX */
34740Sstevel@tonic-gate /* ARGSUSED */
34750Sstevel@tonic-gate static void
rib_listen_stop(struct rdma_svc_data * svcdata)34760Sstevel@tonic-gate rib_listen_stop(struct rdma_svc_data *svcdata)
34770Sstevel@tonic-gate {
34780Sstevel@tonic-gate 	rib_hca_t		*hca;
34790Sstevel@tonic-gate 
34809733SFaramarz.Jalalian@Sun.COM 	mutex_enter(&rib_stat->listen_lock);
34810Sstevel@tonic-gate 	/*
34820Sstevel@tonic-gate 	 * KRPC called the RDMATF to stop the listeners, this means
34830Sstevel@tonic-gate 	 * stop sending incomming or recieved requests to KRPC master
34840Sstevel@tonic-gate 	 * transport handle for RDMA-IB. This is also means that the
34850Sstevel@tonic-gate 	 * master transport handle, responsible for us, is going away.
34860Sstevel@tonic-gate 	 */
34870Sstevel@tonic-gate 	mutex_enter(&plugin_state_lock);
34880Sstevel@tonic-gate 	plugin_state = NO_ACCEPT;
34890Sstevel@tonic-gate 	if (svcdata != NULL)
34900Sstevel@tonic-gate 		svcdata->active = 0;
34910Sstevel@tonic-gate 	mutex_exit(&plugin_state_lock);
34920Sstevel@tonic-gate 
34939733SFaramarz.Jalalian@Sun.COM 	rw_enter(&rib_stat->hcas_list_lock, RW_READER);
34949733SFaramarz.Jalalian@Sun.COM 	for (hca = rib_stat->hcas_list; hca; hca = hca->next) {
34959733SFaramarz.Jalalian@Sun.COM 		/*
34969733SFaramarz.Jalalian@Sun.COM 		 * First check if a hca is still attached
34979733SFaramarz.Jalalian@Sun.COM 		 */
34989733SFaramarz.Jalalian@Sun.COM 		rw_enter(&hca->state_lock, RW_READER);
34999733SFaramarz.Jalalian@Sun.COM 		if (hca->state == HCA_DETACHED) {
35009733SFaramarz.Jalalian@Sun.COM 			rw_exit(&hca->state_lock);
35019733SFaramarz.Jalalian@Sun.COM 			continue;
35029733SFaramarz.Jalalian@Sun.COM 		}
35039733SFaramarz.Jalalian@Sun.COM 		rib_close_channels(&hca->srv_conn_list);
35049733SFaramarz.Jalalian@Sun.COM 		rib_stop_services(hca);
35059733SFaramarz.Jalalian@Sun.COM 		rw_exit(&hca->state_lock);
35069733SFaramarz.Jalalian@Sun.COM 	}
35079733SFaramarz.Jalalian@Sun.COM 	rw_exit(&rib_stat->hcas_list_lock);
35089733SFaramarz.Jalalian@Sun.COM 
35090Sstevel@tonic-gate 	/*
35109733SFaramarz.Jalalian@Sun.COM 	 * Avoid rib_listen() using the stale q field.
35119733SFaramarz.Jalalian@Sun.COM 	 * This could happen if a port goes up after all services
35129733SFaramarz.Jalalian@Sun.COM 	 * are already unregistered.
35130Sstevel@tonic-gate 	 */
35149733SFaramarz.Jalalian@Sun.COM 	rib_stat->q = NULL;
35159733SFaramarz.Jalalian@Sun.COM 	mutex_exit(&rib_stat->listen_lock);
35160Sstevel@tonic-gate }
35170Sstevel@tonic-gate 
35180Sstevel@tonic-gate /*
35190Sstevel@tonic-gate  * Traverse the HCA's service list to unbind and deregister services.
35209733SFaramarz.Jalalian@Sun.COM  * For each bound service of HCA to be removed, first find the corresponding
35219733SFaramarz.Jalalian@Sun.COM  * service handle (srv_hdl) and then unbind the service by calling
35229733SFaramarz.Jalalian@Sun.COM  * ibt_unbind_service().
35230Sstevel@tonic-gate  */
35240Sstevel@tonic-gate static void
rib_stop_services(rib_hca_t * hca)35250Sstevel@tonic-gate rib_stop_services(rib_hca_t *hca)
35260Sstevel@tonic-gate {
35279733SFaramarz.Jalalian@Sun.COM 	rib_hca_service_t *srv_list, *to_remove;
35280Sstevel@tonic-gate 
35290Sstevel@tonic-gate 	/*
35300Sstevel@tonic-gate 	 * unbind and deregister the services for this service type.
35310Sstevel@tonic-gate 	 * Right now there is only one service type. In future it will
35320Sstevel@tonic-gate 	 * be passed down to this function.
35330Sstevel@tonic-gate 	 */
35349733SFaramarz.Jalalian@Sun.COM 	rw_enter(&hca->bound_services_lock, RW_READER);
35359733SFaramarz.Jalalian@Sun.COM 	srv_list = hca->bound_services;
35369733SFaramarz.Jalalian@Sun.COM 	hca->bound_services = NULL;
35379733SFaramarz.Jalalian@Sun.COM 	rw_exit(&hca->bound_services_lock);
35389733SFaramarz.Jalalian@Sun.COM 
35390Sstevel@tonic-gate 	while (srv_list != NULL) {
35409733SFaramarz.Jalalian@Sun.COM 		rib_service_t *sc;
35419733SFaramarz.Jalalian@Sun.COM 
35420Sstevel@tonic-gate 		to_remove = srv_list;
35439733SFaramarz.Jalalian@Sun.COM 		srv_list = to_remove->next;
35449733SFaramarz.Jalalian@Sun.COM 		rw_enter(&rib_stat->service_list_lock, RW_READER);
35459733SFaramarz.Jalalian@Sun.COM 		for (sc = rib_stat->service_list;
35469733SFaramarz.Jalalian@Sun.COM 		    sc && (sc->srv_id != to_remove->srv_id);
35479733SFaramarz.Jalalian@Sun.COM 		    sc = sc->next)
35489733SFaramarz.Jalalian@Sun.COM 			;
35499733SFaramarz.Jalalian@Sun.COM 		/*
35509733SFaramarz.Jalalian@Sun.COM 		 * if sc is NULL then the service doesn't exist anymore,
35519733SFaramarz.Jalalian@Sun.COM 		 * probably just removed completely through rib_stat.
35529733SFaramarz.Jalalian@Sun.COM 		 */
35539733SFaramarz.Jalalian@Sun.COM 		if (sc != NULL)
35549733SFaramarz.Jalalian@Sun.COM 			(void) ibt_unbind_service(sc->srv_hdl,
35559733SFaramarz.Jalalian@Sun.COM 			    to_remove->sbind_hdl);
35569733SFaramarz.Jalalian@Sun.COM 		rw_exit(&rib_stat->service_list_lock);
35579733SFaramarz.Jalalian@Sun.COM 		kmem_free(to_remove, sizeof (rib_hca_service_t));
35580Sstevel@tonic-gate 	}
35590Sstevel@tonic-gate }
35600Sstevel@tonic-gate 
35610Sstevel@tonic-gate static struct svc_recv *
rib_init_svc_recv(rib_qp_t * qp,ibt_wr_ds_t * sgl)35620Sstevel@tonic-gate rib_init_svc_recv(rib_qp_t *qp, ibt_wr_ds_t *sgl)
35630Sstevel@tonic-gate {
35640Sstevel@tonic-gate 	struct svc_recv	*recvp;
35650Sstevel@tonic-gate 
35660Sstevel@tonic-gate 	recvp = kmem_zalloc(sizeof (struct svc_recv), KM_SLEEP);
35670Sstevel@tonic-gate 	recvp->vaddr = sgl->ds_va;
35680Sstevel@tonic-gate 	recvp->qp = qp;
35690Sstevel@tonic-gate 	recvp->bytes_xfer = 0;
35700Sstevel@tonic-gate 	return (recvp);
35710Sstevel@tonic-gate }
35720Sstevel@tonic-gate 
35730Sstevel@tonic-gate static int
rib_free_svc_recv(struct svc_recv * recvp)35740Sstevel@tonic-gate rib_free_svc_recv(struct svc_recv *recvp)
35750Sstevel@tonic-gate {
35760Sstevel@tonic-gate 	kmem_free(recvp, sizeof (*recvp));
35770Sstevel@tonic-gate 
35780Sstevel@tonic-gate 	return (0);
35790Sstevel@tonic-gate }
35800Sstevel@tonic-gate 
35810Sstevel@tonic-gate static struct reply *
rib_addreplylist(rib_qp_t * qp,uint32_t msgid)35820Sstevel@tonic-gate rib_addreplylist(rib_qp_t *qp, uint32_t msgid)
35830Sstevel@tonic-gate {
35840Sstevel@tonic-gate 	struct reply	*rep;
35850Sstevel@tonic-gate 
35860Sstevel@tonic-gate 
35870Sstevel@tonic-gate 	rep = kmem_zalloc(sizeof (struct reply), KM_NOSLEEP);
35880Sstevel@tonic-gate 	if (rep == NULL) {
35897387SRobert.Gordon@Sun.COM 		DTRACE_PROBE(rpcib__i__addrreply__nomem);
35900Sstevel@tonic-gate 		return (NULL);
35910Sstevel@tonic-gate 	}
35920Sstevel@tonic-gate 	rep->xid = msgid;
35930Sstevel@tonic-gate 	rep->vaddr_cq = NULL;
35940Sstevel@tonic-gate 	rep->bytes_xfer = 0;
35950Sstevel@tonic-gate 	rep->status = (uint_t)REPLY_WAIT;
35960Sstevel@tonic-gate 	rep->prev = NULL;
35970Sstevel@tonic-gate 	cv_init(&rep->wait_cv, NULL, CV_DEFAULT, NULL);
35980Sstevel@tonic-gate 
35990Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
36000Sstevel@tonic-gate 	if (qp->replylist) {
36010Sstevel@tonic-gate 		rep->next = qp->replylist;
36020Sstevel@tonic-gate 		qp->replylist->prev = rep;
36030Sstevel@tonic-gate 	}
36040Sstevel@tonic-gate 	qp->rep_list_size++;
36057387SRobert.Gordon@Sun.COM 
36067387SRobert.Gordon@Sun.COM 	DTRACE_PROBE1(rpcib__i__addrreply__listsize,
36077387SRobert.Gordon@Sun.COM 	    int, qp->rep_list_size);
36087387SRobert.Gordon@Sun.COM 
36090Sstevel@tonic-gate 	qp->replylist = rep;
36100Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
36110Sstevel@tonic-gate 
36120Sstevel@tonic-gate 	return (rep);
36130Sstevel@tonic-gate }
36140Sstevel@tonic-gate 
36150Sstevel@tonic-gate static rdma_stat
rib_rem_replylist(rib_qp_t * qp)36160Sstevel@tonic-gate rib_rem_replylist(rib_qp_t *qp)
36170Sstevel@tonic-gate {
36180Sstevel@tonic-gate 	struct reply	*r, *n;
36190Sstevel@tonic-gate 
36200Sstevel@tonic-gate 	mutex_enter(&qp->replylist_lock);
36210Sstevel@tonic-gate 	for (r = qp->replylist; r != NULL; r = n) {
36220Sstevel@tonic-gate 		n = r->next;
36230Sstevel@tonic-gate 		(void) rib_remreply(qp, r);
36240Sstevel@tonic-gate 	}
36250Sstevel@tonic-gate 	mutex_exit(&qp->replylist_lock);
36260Sstevel@tonic-gate 
36270Sstevel@tonic-gate 	return (RDMA_SUCCESS);
36280Sstevel@tonic-gate }
36290Sstevel@tonic-gate 
36300Sstevel@tonic-gate static int
rib_remreply(rib_qp_t * qp,struct reply * rep)36310Sstevel@tonic-gate rib_remreply(rib_qp_t *qp, struct reply *rep)
36320Sstevel@tonic-gate {
36330Sstevel@tonic-gate 
36340Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->replylist_lock));
36350Sstevel@tonic-gate 	if (rep->prev) {
36360Sstevel@tonic-gate 		rep->prev->next = rep->next;
36370Sstevel@tonic-gate 	}
36380Sstevel@tonic-gate 	if (rep->next) {
36390Sstevel@tonic-gate 		rep->next->prev = rep->prev;
36400Sstevel@tonic-gate 	}
36410Sstevel@tonic-gate 	if (qp->replylist == rep)
36420Sstevel@tonic-gate 		qp->replylist = rep->next;
36430Sstevel@tonic-gate 
36440Sstevel@tonic-gate 	cv_destroy(&rep->wait_cv);
36450Sstevel@tonic-gate 	qp->rep_list_size--;
36467387SRobert.Gordon@Sun.COM 
36477387SRobert.Gordon@Sun.COM 	DTRACE_PROBE1(rpcib__i__remreply__listsize,
36487387SRobert.Gordon@Sun.COM 	    int, qp->rep_list_size);
36490Sstevel@tonic-gate 
36500Sstevel@tonic-gate 	kmem_free(rep, sizeof (*rep));
36510Sstevel@tonic-gate 
36520Sstevel@tonic-gate 	return (0);
36530Sstevel@tonic-gate }
36540Sstevel@tonic-gate 
36550Sstevel@tonic-gate rdma_stat
rib_registermem(CONN * conn,caddr_t adsp,caddr_t buf,uint_t buflen,struct mrc * buf_handle)36567387SRobert.Gordon@Sun.COM rib_registermem(CONN *conn,  caddr_t adsp, caddr_t buf, uint_t buflen,
36570Sstevel@tonic-gate 	struct mrc *buf_handle)
36580Sstevel@tonic-gate {
36590Sstevel@tonic-gate 	ibt_mr_hdl_t	mr_hdl = NULL;	/* memory region handle */
36600Sstevel@tonic-gate 	ibt_mr_desc_t	mr_desc;	/* vaddr, lkey, rkey */
36610Sstevel@tonic-gate 	rdma_stat	status;
36620Sstevel@tonic-gate 	rib_hca_t	*hca = (ctoqp(conn))->hca;
36630Sstevel@tonic-gate 
36640Sstevel@tonic-gate 	/*
36650Sstevel@tonic-gate 	 * Note: ALL buffer pools use the same memory type RDMARW.
36660Sstevel@tonic-gate 	 */
36677387SRobert.Gordon@Sun.COM 	status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc);
36680Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
3669249Sjwahlig 		buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
36700Sstevel@tonic-gate 		buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
36710Sstevel@tonic-gate 		buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
36720Sstevel@tonic-gate 	} else {
36730Sstevel@tonic-gate 		buf_handle->mrc_linfo = NULL;
36740Sstevel@tonic-gate 		buf_handle->mrc_lmr = 0;
36750Sstevel@tonic-gate 		buf_handle->mrc_rmr = 0;
36760Sstevel@tonic-gate 	}
36770Sstevel@tonic-gate 	return (status);
36780Sstevel@tonic-gate }
36790Sstevel@tonic-gate 
36800Sstevel@tonic-gate static rdma_stat
rib_reg_mem(rib_hca_t * hca,caddr_t adsp,caddr_t buf,uint_t size,ibt_mr_flags_t spec,ibt_mr_hdl_t * mr_hdlp,ibt_mr_desc_t * mr_descp)36817387SRobert.Gordon@Sun.COM rib_reg_mem(rib_hca_t *hca, caddr_t adsp, caddr_t buf, uint_t size,
36827387SRobert.Gordon@Sun.COM 	ibt_mr_flags_t spec,
36830Sstevel@tonic-gate 	ibt_mr_hdl_t *mr_hdlp, ibt_mr_desc_t *mr_descp)
36840Sstevel@tonic-gate {
36850Sstevel@tonic-gate 	ibt_mr_attr_t	mem_attr;
36860Sstevel@tonic-gate 	ibt_status_t	ibt_status;
3687249Sjwahlig 	mem_attr.mr_vaddr = (uintptr_t)buf;
36880Sstevel@tonic-gate 	mem_attr.mr_len = (ib_msglen_t)size;
36897387SRobert.Gordon@Sun.COM 	mem_attr.mr_as = (struct as *)(caddr_t)adsp;
36900Sstevel@tonic-gate 	mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE |
36910Sstevel@tonic-gate 	    IBT_MR_ENABLE_REMOTE_READ | IBT_MR_ENABLE_REMOTE_WRITE |
36920Sstevel@tonic-gate 	    IBT_MR_ENABLE_WINDOW_BIND | spec;
36930Sstevel@tonic-gate 
36940Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
36959733SFaramarz.Jalalian@Sun.COM 	if (hca->state != HCA_DETACHED) {
36960Sstevel@tonic-gate 		ibt_status = ibt_register_mr(hca->hca_hdl, hca->pd_hdl,
36977387SRobert.Gordon@Sun.COM 		    &mem_attr, mr_hdlp, mr_descp);
36980Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
36990Sstevel@tonic-gate 	} else {
37000Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
37010Sstevel@tonic-gate 		return (RDMA_FAILED);
37020Sstevel@tonic-gate 	}
37030Sstevel@tonic-gate 
37040Sstevel@tonic-gate 	if (ibt_status != IBT_SUCCESS) {
37050Sstevel@tonic-gate 		return (RDMA_FAILED);
37060Sstevel@tonic-gate 	}
37070Sstevel@tonic-gate 	return (RDMA_SUCCESS);
37080Sstevel@tonic-gate }
37090Sstevel@tonic-gate 
37100Sstevel@tonic-gate rdma_stat
rib_registermemsync(CONN * conn,caddr_t adsp,caddr_t buf,uint_t buflen,struct mrc * buf_handle,RIB_SYNCMEM_HANDLE * sync_handle,void * lrc)37117387SRobert.Gordon@Sun.COM rib_registermemsync(CONN *conn,  caddr_t adsp, caddr_t buf, uint_t buflen,
37127387SRobert.Gordon@Sun.COM 	struct mrc *buf_handle, RIB_SYNCMEM_HANDLE *sync_handle, void *lrc)
37130Sstevel@tonic-gate {
37140Sstevel@tonic-gate 	ibt_mr_hdl_t	mr_hdl = NULL;	/* memory region handle */
37157387SRobert.Gordon@Sun.COM 	rib_lrc_entry_t *l;
37160Sstevel@tonic-gate 	ibt_mr_desc_t	mr_desc;	/* vaddr, lkey, rkey */
37170Sstevel@tonic-gate 	rdma_stat	status;
37180Sstevel@tonic-gate 	rib_hca_t	*hca = (ctoqp(conn))->hca;
37190Sstevel@tonic-gate 
37200Sstevel@tonic-gate 	/*
37210Sstevel@tonic-gate 	 * Non-coherent memory registration.
37220Sstevel@tonic-gate 	 */
37237387SRobert.Gordon@Sun.COM 	l = (rib_lrc_entry_t *)lrc;
37247387SRobert.Gordon@Sun.COM 	if (l) {
37257387SRobert.Gordon@Sun.COM 		if (l->registered) {
37267387SRobert.Gordon@Sun.COM 			buf_handle->mrc_linfo =
37277387SRobert.Gordon@Sun.COM 			    (uintptr_t)l->lrc_mhandle.mrc_linfo;
37287387SRobert.Gordon@Sun.COM 			buf_handle->mrc_lmr =
37297387SRobert.Gordon@Sun.COM 			    (uint32_t)l->lrc_mhandle.mrc_lmr;
37307387SRobert.Gordon@Sun.COM 			buf_handle->mrc_rmr =
37317387SRobert.Gordon@Sun.COM 			    (uint32_t)l->lrc_mhandle.mrc_rmr;
37327387SRobert.Gordon@Sun.COM 			*sync_handle = (RIB_SYNCMEM_HANDLE)
37337387SRobert.Gordon@Sun.COM 			    (uintptr_t)l->lrc_mhandle.mrc_linfo;
37347387SRobert.Gordon@Sun.COM 			return (RDMA_SUCCESS);
37357387SRobert.Gordon@Sun.COM 		} else {
37367387SRobert.Gordon@Sun.COM 			/* Always register the whole buffer */
37377387SRobert.Gordon@Sun.COM 			buf = (caddr_t)l->lrc_buf;
37387387SRobert.Gordon@Sun.COM 			buflen = l->lrc_len;
37397387SRobert.Gordon@Sun.COM 		}
37407387SRobert.Gordon@Sun.COM 	}
37417387SRobert.Gordon@Sun.COM 	status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc);
37427387SRobert.Gordon@Sun.COM 
37430Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
37447387SRobert.Gordon@Sun.COM 		if (l) {
37457387SRobert.Gordon@Sun.COM 			l->lrc_mhandle.mrc_linfo = (uintptr_t)mr_hdl;
37467387SRobert.Gordon@Sun.COM 			l->lrc_mhandle.mrc_lmr   = (uint32_t)mr_desc.md_lkey;
37477387SRobert.Gordon@Sun.COM 			l->lrc_mhandle.mrc_rmr   = (uint32_t)mr_desc.md_rkey;
37487387SRobert.Gordon@Sun.COM 			l->registered		 = TRUE;
37497387SRobert.Gordon@Sun.COM 		}
3750249Sjwahlig 		buf_handle->mrc_linfo = (uintptr_t)mr_hdl;
37510Sstevel@tonic-gate 		buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
37520Sstevel@tonic-gate 		buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
37530Sstevel@tonic-gate 		*sync_handle = (RIB_SYNCMEM_HANDLE)mr_hdl;
37540Sstevel@tonic-gate 	} else {
37550Sstevel@tonic-gate 		buf_handle->mrc_linfo = NULL;
37560Sstevel@tonic-gate 		buf_handle->mrc_lmr = 0;
37570Sstevel@tonic-gate 		buf_handle->mrc_rmr = 0;
37580Sstevel@tonic-gate 	}
37590Sstevel@tonic-gate 	return (status);
37600Sstevel@tonic-gate }
37610Sstevel@tonic-gate 
37620Sstevel@tonic-gate /* ARGSUSED */
37630Sstevel@tonic-gate rdma_stat
rib_deregistermem(CONN * conn,caddr_t buf,struct mrc buf_handle)37640Sstevel@tonic-gate rib_deregistermem(CONN *conn, caddr_t buf, struct mrc buf_handle)
37650Sstevel@tonic-gate {
37660Sstevel@tonic-gate 	rib_hca_t *hca = (ctoqp(conn))->hca;
37670Sstevel@tonic-gate 	/*
37680Sstevel@tonic-gate 	 * Allow memory deregistration even if HCA is
37690Sstevel@tonic-gate 	 * getting detached. Need all outstanding
37700Sstevel@tonic-gate 	 * memory registrations to be deregistered
37710Sstevel@tonic-gate 	 * before HCA_DETACH_EVENT can be accepted.
37720Sstevel@tonic-gate 	 */
37730Sstevel@tonic-gate 	(void) ibt_deregister_mr(hca->hca_hdl,
37747387SRobert.Gordon@Sun.COM 	    (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo);
37750Sstevel@tonic-gate 	return (RDMA_SUCCESS);
37760Sstevel@tonic-gate }
37770Sstevel@tonic-gate 
37780Sstevel@tonic-gate /* ARGSUSED */
37790Sstevel@tonic-gate rdma_stat
rib_deregistermemsync(CONN * conn,caddr_t buf,struct mrc buf_handle,RIB_SYNCMEM_HANDLE sync_handle,void * lrc)37800Sstevel@tonic-gate rib_deregistermemsync(CONN *conn, caddr_t buf, struct mrc buf_handle,
37817387SRobert.Gordon@Sun.COM 		RIB_SYNCMEM_HANDLE sync_handle, void *lrc)
37820Sstevel@tonic-gate {
37837387SRobert.Gordon@Sun.COM 	rib_lrc_entry_t *l;
37847387SRobert.Gordon@Sun.COM 	l = (rib_lrc_entry_t *)lrc;
37857387SRobert.Gordon@Sun.COM 	if (l)
37867387SRobert.Gordon@Sun.COM 		if (l->registered)
37877387SRobert.Gordon@Sun.COM 			return (RDMA_SUCCESS);
37887387SRobert.Gordon@Sun.COM 
37890Sstevel@tonic-gate 	(void) rib_deregistermem(conn, buf, buf_handle);
37900Sstevel@tonic-gate 
37910Sstevel@tonic-gate 	return (RDMA_SUCCESS);
37920Sstevel@tonic-gate }
37930Sstevel@tonic-gate 
37940Sstevel@tonic-gate /* ARGSUSED */
37950Sstevel@tonic-gate rdma_stat
rib_syncmem(CONN * conn,RIB_SYNCMEM_HANDLE shandle,caddr_t buf,int len,int cpu)37960Sstevel@tonic-gate rib_syncmem(CONN *conn, RIB_SYNCMEM_HANDLE shandle, caddr_t buf,
37970Sstevel@tonic-gate 		int len, int cpu)
37980Sstevel@tonic-gate {
37990Sstevel@tonic-gate 	ibt_status_t	status;
38000Sstevel@tonic-gate 	rib_hca_t *hca = (ctoqp(conn))->hca;
38010Sstevel@tonic-gate 	ibt_mr_sync_t	mr_segment;
38020Sstevel@tonic-gate 
38030Sstevel@tonic-gate 	mr_segment.ms_handle = (ibt_mr_hdl_t)shandle;
3804249Sjwahlig 	mr_segment.ms_vaddr = (ib_vaddr_t)(uintptr_t)buf;
38050Sstevel@tonic-gate 	mr_segment.ms_len = (ib_memlen_t)len;
38060Sstevel@tonic-gate 	if (cpu) {
38070Sstevel@tonic-gate 		/* make incoming data visible to memory */
38080Sstevel@tonic-gate 		mr_segment.ms_flags = IBT_SYNC_WRITE;
38090Sstevel@tonic-gate 	} else {
38100Sstevel@tonic-gate 		/* make memory changes visible to IO */
38110Sstevel@tonic-gate 		mr_segment.ms_flags = IBT_SYNC_READ;
38120Sstevel@tonic-gate 	}
38130Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
38149733SFaramarz.Jalalian@Sun.COM 	if (hca->state != HCA_DETACHED) {
38150Sstevel@tonic-gate 		status = ibt_sync_mr(hca->hca_hdl, &mr_segment, 1);
38160Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
38170Sstevel@tonic-gate 	} else {
38180Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
38190Sstevel@tonic-gate 		return (RDMA_FAILED);
38200Sstevel@tonic-gate 	}
38210Sstevel@tonic-gate 
38220Sstevel@tonic-gate 	if (status == IBT_SUCCESS)
38230Sstevel@tonic-gate 		return (RDMA_SUCCESS);
38240Sstevel@tonic-gate 	else {
38250Sstevel@tonic-gate 		return (RDMA_FAILED);
38260Sstevel@tonic-gate 	}
38270Sstevel@tonic-gate }
38280Sstevel@tonic-gate 
38290Sstevel@tonic-gate /*
38300Sstevel@tonic-gate  * XXXX	????
38310Sstevel@tonic-gate  */
38320Sstevel@tonic-gate static rdma_stat
rib_getinfo(rdma_info_t * info)38330Sstevel@tonic-gate rib_getinfo(rdma_info_t *info)
38340Sstevel@tonic-gate {
38350Sstevel@tonic-gate 	/*
38360Sstevel@tonic-gate 	 * XXXX	Hack!
38370Sstevel@tonic-gate 	 */
38380Sstevel@tonic-gate 	info->addrlen = 16;
38390Sstevel@tonic-gate 	info->mts = 1000000;
38400Sstevel@tonic-gate 	info->mtu = 1000000;
38410Sstevel@tonic-gate 
38420Sstevel@tonic-gate 	return (RDMA_SUCCESS);
38430Sstevel@tonic-gate }
38440Sstevel@tonic-gate 
38450Sstevel@tonic-gate rib_bufpool_t *
rib_rbufpool_create(rib_hca_t * hca,int ptype,int num)38460Sstevel@tonic-gate rib_rbufpool_create(rib_hca_t *hca, int ptype, int num)
38470Sstevel@tonic-gate {
38480Sstevel@tonic-gate 	rib_bufpool_t	*rbp = NULL;
38490Sstevel@tonic-gate 	bufpool_t	*bp = NULL;
38500Sstevel@tonic-gate 	caddr_t		buf;
38510Sstevel@tonic-gate 	ibt_mr_attr_t	mem_attr;
38520Sstevel@tonic-gate 	ibt_status_t	ibt_status;
38530Sstevel@tonic-gate 	int		i, j;
38540Sstevel@tonic-gate 
38550Sstevel@tonic-gate 	rbp = (rib_bufpool_t *)kmem_zalloc(sizeof (rib_bufpool_t), KM_SLEEP);
38560Sstevel@tonic-gate 
38570Sstevel@tonic-gate 	bp = (bufpool_t *)kmem_zalloc(sizeof (bufpool_t) +
38587387SRobert.Gordon@Sun.COM 	    num * sizeof (void *), KM_SLEEP);
38590Sstevel@tonic-gate 
38600Sstevel@tonic-gate 	mutex_init(&bp->buflock, NULL, MUTEX_DRIVER, hca->iblock);
38610Sstevel@tonic-gate 	bp->numelems = num;
38620Sstevel@tonic-gate 
38637387SRobert.Gordon@Sun.COM 
38640Sstevel@tonic-gate 	switch (ptype) {
38657387SRobert.Gordon@Sun.COM 	case SEND_BUFFER:
38660Sstevel@tonic-gate 		mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
38670Sstevel@tonic-gate 		bp->rsize = RPC_MSG_SZ;
38680Sstevel@tonic-gate 		break;
38697387SRobert.Gordon@Sun.COM 	case RECV_BUFFER:
38700Sstevel@tonic-gate 		mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
38710Sstevel@tonic-gate 		bp->rsize = RPC_BUF_SIZE;
38720Sstevel@tonic-gate 		break;
38737387SRobert.Gordon@Sun.COM 	default:
38740Sstevel@tonic-gate 		goto fail;
38750Sstevel@tonic-gate 	}
38760Sstevel@tonic-gate 
38770Sstevel@tonic-gate 	/*
38780Sstevel@tonic-gate 	 * Register the pool.
38790Sstevel@tonic-gate 	 */
38800Sstevel@tonic-gate 	bp->bufsize = num * bp->rsize;
38810Sstevel@tonic-gate 	bp->buf = kmem_zalloc(bp->bufsize, KM_SLEEP);
38820Sstevel@tonic-gate 	rbp->mr_hdl = (ibt_mr_hdl_t *)kmem_zalloc(num *
38837387SRobert.Gordon@Sun.COM 	    sizeof (ibt_mr_hdl_t), KM_SLEEP);
38840Sstevel@tonic-gate 	rbp->mr_desc = (ibt_mr_desc_t *)kmem_zalloc(num *
38857387SRobert.Gordon@Sun.COM 	    sizeof (ibt_mr_desc_t), KM_SLEEP);
38860Sstevel@tonic-gate 	rw_enter(&hca->state_lock, RW_READER);
38877387SRobert.Gordon@Sun.COM 
38889733SFaramarz.Jalalian@Sun.COM 	if (hca->state == HCA_DETACHED) {
38890Sstevel@tonic-gate 		rw_exit(&hca->state_lock);
38900Sstevel@tonic-gate 		goto fail;
38910Sstevel@tonic-gate 	}
38927387SRobert.Gordon@Sun.COM 
38930Sstevel@tonic-gate 	for (i = 0, buf = bp->buf; i < num; i++, buf += bp->rsize) {
38940Sstevel@tonic-gate 		bzero(&rbp->mr_desc[i], sizeof (ibt_mr_desc_t));
3895249Sjwahlig 		mem_attr.mr_vaddr = (uintptr_t)buf;
38960Sstevel@tonic-gate 		mem_attr.mr_len = (ib_msglen_t)bp->rsize;
38970Sstevel@tonic-gate 		mem_attr.mr_as = NULL;
38980Sstevel@tonic-gate 		ibt_status = ibt_register_mr(hca->hca_hdl,
38997387SRobert.Gordon@Sun.COM 		    hca->pd_hdl, &mem_attr,
39007387SRobert.Gordon@Sun.COM 		    &rbp->mr_hdl[i],
39017387SRobert.Gordon@Sun.COM 		    &rbp->mr_desc[i]);
39020Sstevel@tonic-gate 		if (ibt_status != IBT_SUCCESS) {
39037387SRobert.Gordon@Sun.COM 			for (j = 0; j < i; j++) {
39047387SRobert.Gordon@Sun.COM 				(void) ibt_deregister_mr(hca->hca_hdl,
39057387SRobert.Gordon@Sun.COM 				    rbp->mr_hdl[j]);
39067387SRobert.Gordon@Sun.COM 			}
39077387SRobert.Gordon@Sun.COM 			rw_exit(&hca->state_lock);
39087387SRobert.Gordon@Sun.COM 			goto fail;
39090Sstevel@tonic-gate 		}
39100Sstevel@tonic-gate 	}
39110Sstevel@tonic-gate 	rw_exit(&hca->state_lock);
39120Sstevel@tonic-gate 	buf = (caddr_t)bp->buf;
39130Sstevel@tonic-gate 	for (i = 0; i < num; i++, buf += bp->rsize) {
39140Sstevel@tonic-gate 		bp->buflist[i] = (void *)buf;
39150Sstevel@tonic-gate 	}
39160Sstevel@tonic-gate 	bp->buffree = num - 1;	/* no. of free buffers */
39170Sstevel@tonic-gate 	rbp->bpool = bp;
39180Sstevel@tonic-gate 
39190Sstevel@tonic-gate 	return (rbp);
39200Sstevel@tonic-gate fail:
39210Sstevel@tonic-gate 	if (bp) {
39227387SRobert.Gordon@Sun.COM 		if (bp->buf)
39237387SRobert.Gordon@Sun.COM 			kmem_free(bp->buf, bp->bufsize);
39247387SRobert.Gordon@Sun.COM 		kmem_free(bp, sizeof (bufpool_t) + num*sizeof (void *));
39250Sstevel@tonic-gate 	}
39260Sstevel@tonic-gate 	if (rbp) {
39277387SRobert.Gordon@Sun.COM 		if (rbp->mr_hdl)
39287387SRobert.Gordon@Sun.COM 			kmem_free(rbp->mr_hdl, num*sizeof (ibt_mr_hdl_t));
39297387SRobert.Gordon@Sun.COM 		if (rbp->mr_desc)
39307387SRobert.Gordon@Sun.COM 			kmem_free(rbp->mr_desc, num*sizeof (ibt_mr_desc_t));
39317387SRobert.Gordon@Sun.COM 		kmem_free(rbp, sizeof (rib_bufpool_t));
39320Sstevel@tonic-gate 	}
39330Sstevel@tonic-gate 	return (NULL);
39340Sstevel@tonic-gate }
39350Sstevel@tonic-gate 
39360Sstevel@tonic-gate static void
rib_rbufpool_deregister(rib_hca_t * hca,int ptype)39370Sstevel@tonic-gate rib_rbufpool_deregister(rib_hca_t *hca, int ptype)
39380Sstevel@tonic-gate {
39390Sstevel@tonic-gate 	int i;
39400Sstevel@tonic-gate 	rib_bufpool_t *rbp = NULL;
39410Sstevel@tonic-gate 	bufpool_t *bp;
39420Sstevel@tonic-gate 
39430Sstevel@tonic-gate 	/*
39440Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
39450Sstevel@tonic-gate 	 */
39460Sstevel@tonic-gate 	switch (ptype) {
39470Sstevel@tonic-gate 		case SEND_BUFFER:
39480Sstevel@tonic-gate 			rbp = hca->send_pool;
39490Sstevel@tonic-gate 			break;
39500Sstevel@tonic-gate 		case RECV_BUFFER:
39510Sstevel@tonic-gate 			rbp = hca->recv_pool;
39520Sstevel@tonic-gate 			break;
39530Sstevel@tonic-gate 		default:
39540Sstevel@tonic-gate 			return;
39550Sstevel@tonic-gate 	}
39560Sstevel@tonic-gate 	if (rbp == NULL)
39570Sstevel@tonic-gate 		return;
39580Sstevel@tonic-gate 
39590Sstevel@tonic-gate 	bp = rbp->bpool;
39600Sstevel@tonic-gate 
39610Sstevel@tonic-gate 	/*
39620Sstevel@tonic-gate 	 * Deregister the pool memory and free it.
39630Sstevel@tonic-gate 	 */
39640Sstevel@tonic-gate 	for (i = 0; i < bp->numelems; i++) {
39650Sstevel@tonic-gate 		(void) ibt_deregister_mr(hca->hca_hdl, rbp->mr_hdl[i]);
39660Sstevel@tonic-gate 	}
39670Sstevel@tonic-gate }
39680Sstevel@tonic-gate 
39690Sstevel@tonic-gate static void
rib_rbufpool_free(rib_hca_t * hca,int ptype)39700Sstevel@tonic-gate rib_rbufpool_free(rib_hca_t *hca, int ptype)
39710Sstevel@tonic-gate {
39720Sstevel@tonic-gate 
39730Sstevel@tonic-gate 	rib_bufpool_t *rbp = NULL;
39740Sstevel@tonic-gate 	bufpool_t *bp;
39750Sstevel@tonic-gate 
39760Sstevel@tonic-gate 	/*
39770Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
39780Sstevel@tonic-gate 	 */
39790Sstevel@tonic-gate 	switch (ptype) {
39800Sstevel@tonic-gate 		case SEND_BUFFER:
39810Sstevel@tonic-gate 			rbp = hca->send_pool;
39820Sstevel@tonic-gate 			break;
39830Sstevel@tonic-gate 		case RECV_BUFFER:
39840Sstevel@tonic-gate 			rbp = hca->recv_pool;
39850Sstevel@tonic-gate 			break;
39860Sstevel@tonic-gate 		default:
39870Sstevel@tonic-gate 			return;
39880Sstevel@tonic-gate 	}
39890Sstevel@tonic-gate 	if (rbp == NULL)
39900Sstevel@tonic-gate 		return;
39910Sstevel@tonic-gate 
39920Sstevel@tonic-gate 	bp = rbp->bpool;
39930Sstevel@tonic-gate 
39940Sstevel@tonic-gate 	/*
39950Sstevel@tonic-gate 	 * Free the pool memory.
39960Sstevel@tonic-gate 	 */
39970Sstevel@tonic-gate 	if (rbp->mr_hdl)
39980Sstevel@tonic-gate 		kmem_free(rbp->mr_hdl, bp->numelems*sizeof (ibt_mr_hdl_t));
39990Sstevel@tonic-gate 
40000Sstevel@tonic-gate 	if (rbp->mr_desc)
40010Sstevel@tonic-gate 		kmem_free(rbp->mr_desc, bp->numelems*sizeof (ibt_mr_desc_t));
40020Sstevel@tonic-gate 	if (bp->buf)
40030Sstevel@tonic-gate 		kmem_free(bp->buf, bp->bufsize);
40040Sstevel@tonic-gate 	mutex_destroy(&bp->buflock);
40050Sstevel@tonic-gate 	kmem_free(bp, sizeof (bufpool_t) + bp->numelems*sizeof (void *));
40060Sstevel@tonic-gate 	kmem_free(rbp, sizeof (rib_bufpool_t));
40070Sstevel@tonic-gate }
40080Sstevel@tonic-gate 
40090Sstevel@tonic-gate void
rib_rbufpool_destroy(rib_hca_t * hca,int ptype)40100Sstevel@tonic-gate rib_rbufpool_destroy(rib_hca_t *hca, int ptype)
40110Sstevel@tonic-gate {
40120Sstevel@tonic-gate 	/*
40130Sstevel@tonic-gate 	 * Deregister the pool memory and free it.
40140Sstevel@tonic-gate 	 */
40150Sstevel@tonic-gate 	rib_rbufpool_deregister(hca, ptype);
40160Sstevel@tonic-gate 	rib_rbufpool_free(hca, ptype);
40170Sstevel@tonic-gate }
40180Sstevel@tonic-gate 
40190Sstevel@tonic-gate /*
40200Sstevel@tonic-gate  * Fetch a buffer from the pool of type specified in rdbuf->type.
40210Sstevel@tonic-gate  */
40220Sstevel@tonic-gate static rdma_stat
rib_reg_buf_alloc(CONN * conn,rdma_buf_t * rdbuf)40230Sstevel@tonic-gate rib_reg_buf_alloc(CONN *conn, rdma_buf_t *rdbuf)
40240Sstevel@tonic-gate {
40257387SRobert.Gordon@Sun.COM 	rib_lrc_entry_t *rlep;
40267387SRobert.Gordon@Sun.COM 
40277387SRobert.Gordon@Sun.COM 	if (rdbuf->type ==  RDMA_LONG_BUFFER) {
40287387SRobert.Gordon@Sun.COM 		rlep = rib_get_cache_buf(conn, rdbuf->len);
40297387SRobert.Gordon@Sun.COM 		rdbuf->rb_private =  (caddr_t)rlep;
40307387SRobert.Gordon@Sun.COM 		rdbuf->addr = rlep->lrc_buf;
40317387SRobert.Gordon@Sun.COM 		rdbuf->handle = rlep->lrc_mhandle;
40327387SRobert.Gordon@Sun.COM 		return (RDMA_SUCCESS);
40337387SRobert.Gordon@Sun.COM 	}
40340Sstevel@tonic-gate 
40350Sstevel@tonic-gate 	rdbuf->addr = rib_rbuf_alloc(conn, rdbuf);
40360Sstevel@tonic-gate 	if (rdbuf->addr) {
40370Sstevel@tonic-gate 		switch (rdbuf->type) {
40380Sstevel@tonic-gate 		case SEND_BUFFER:
40390Sstevel@tonic-gate 			rdbuf->len = RPC_MSG_SZ;	/* 1K */
40400Sstevel@tonic-gate 			break;
40410Sstevel@tonic-gate 		case RECV_BUFFER:
40420Sstevel@tonic-gate 			rdbuf->len = RPC_BUF_SIZE; /* 2K */
40430Sstevel@tonic-gate 			break;
40440Sstevel@tonic-gate 		default:
40450Sstevel@tonic-gate 			rdbuf->len = 0;
40460Sstevel@tonic-gate 		}
40470Sstevel@tonic-gate 		return (RDMA_SUCCESS);
40480Sstevel@tonic-gate 	} else
40490Sstevel@tonic-gate 		return (RDMA_FAILED);
40500Sstevel@tonic-gate }
40510Sstevel@tonic-gate 
40520Sstevel@tonic-gate /*
40530Sstevel@tonic-gate  * Fetch a buffer of specified type.
40540Sstevel@tonic-gate  * Note that rdbuf->handle is mw's rkey.
40550Sstevel@tonic-gate  */
40560Sstevel@tonic-gate static void *
rib_rbuf_alloc(CONN * conn,rdma_buf_t * rdbuf)40570Sstevel@tonic-gate rib_rbuf_alloc(CONN *conn, rdma_buf_t *rdbuf)
40580Sstevel@tonic-gate {
40590Sstevel@tonic-gate 	rib_qp_t	*qp = ctoqp(conn);
40600Sstevel@tonic-gate 	rib_hca_t	*hca = qp->hca;
40610Sstevel@tonic-gate 	rdma_btype	ptype = rdbuf->type;
40620Sstevel@tonic-gate 	void		*buf;
40630Sstevel@tonic-gate 	rib_bufpool_t	*rbp = NULL;
40640Sstevel@tonic-gate 	bufpool_t	*bp;
40650Sstevel@tonic-gate 	int		i;
40660Sstevel@tonic-gate 
40670Sstevel@tonic-gate 	/*
40680Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
40690Sstevel@tonic-gate 	 */
40700Sstevel@tonic-gate 	switch (ptype) {
40717387SRobert.Gordon@Sun.COM 	case SEND_BUFFER:
40727387SRobert.Gordon@Sun.COM 		rbp = hca->send_pool;
40737387SRobert.Gordon@Sun.COM 		break;
40747387SRobert.Gordon@Sun.COM 	case RECV_BUFFER:
40757387SRobert.Gordon@Sun.COM 		rbp = hca->recv_pool;
40767387SRobert.Gordon@Sun.COM 		break;
40777387SRobert.Gordon@Sun.COM 	default:
40787387SRobert.Gordon@Sun.COM 		return (NULL);
40790Sstevel@tonic-gate 	}
40800Sstevel@tonic-gate 	if (rbp == NULL)
40810Sstevel@tonic-gate 		return (NULL);
40820Sstevel@tonic-gate 
40830Sstevel@tonic-gate 	bp = rbp->bpool;
40840Sstevel@tonic-gate 
40850Sstevel@tonic-gate 	mutex_enter(&bp->buflock);
40860Sstevel@tonic-gate 	if (bp->buffree < 0) {
40870Sstevel@tonic-gate 		mutex_exit(&bp->buflock);
40880Sstevel@tonic-gate 		return (NULL);
40890Sstevel@tonic-gate 	}
40900Sstevel@tonic-gate 
40910Sstevel@tonic-gate 	/* XXXX put buf, rdbuf->handle.mrc_rmr, ... in one place. */
40920Sstevel@tonic-gate 	buf = bp->buflist[bp->buffree];
40930Sstevel@tonic-gate 	rdbuf->addr = buf;
40940Sstevel@tonic-gate 	rdbuf->len = bp->rsize;
40950Sstevel@tonic-gate 	for (i = bp->numelems - 1; i >= 0; i--) {
40967387SRobert.Gordon@Sun.COM 		if ((ib_vaddr_t)(uintptr_t)buf == rbp->mr_desc[i].md_vaddr) {
40977387SRobert.Gordon@Sun.COM 			rdbuf->handle.mrc_rmr =
40987387SRobert.Gordon@Sun.COM 			    (uint32_t)rbp->mr_desc[i].md_rkey;
40997387SRobert.Gordon@Sun.COM 			rdbuf->handle.mrc_linfo =
41007387SRobert.Gordon@Sun.COM 			    (uintptr_t)rbp->mr_hdl[i];
41017387SRobert.Gordon@Sun.COM 			rdbuf->handle.mrc_lmr =
41027387SRobert.Gordon@Sun.COM 			    (uint32_t)rbp->mr_desc[i].md_lkey;
41037387SRobert.Gordon@Sun.COM 			bp->buffree--;
41047387SRobert.Gordon@Sun.COM 
41057387SRobert.Gordon@Sun.COM 			mutex_exit(&bp->buflock);
41067387SRobert.Gordon@Sun.COM 
41077387SRobert.Gordon@Sun.COM 			return (buf);
41087387SRobert.Gordon@Sun.COM 		}
41090Sstevel@tonic-gate 	}
41107387SRobert.Gordon@Sun.COM 
41110Sstevel@tonic-gate 	mutex_exit(&bp->buflock);
41120Sstevel@tonic-gate 
41130Sstevel@tonic-gate 	return (NULL);
41140Sstevel@tonic-gate }
41150Sstevel@tonic-gate 
41160Sstevel@tonic-gate static void
rib_reg_buf_free(CONN * conn,rdma_buf_t * rdbuf)41170Sstevel@tonic-gate rib_reg_buf_free(CONN *conn, rdma_buf_t *rdbuf)
41180Sstevel@tonic-gate {
41190Sstevel@tonic-gate 
41207387SRobert.Gordon@Sun.COM 	if (rdbuf->type == RDMA_LONG_BUFFER) {
41217387SRobert.Gordon@Sun.COM 		rib_free_cache_buf(conn, (rib_lrc_entry_t *)rdbuf->rb_private);
41227387SRobert.Gordon@Sun.COM 		rdbuf->rb_private = NULL;
41237387SRobert.Gordon@Sun.COM 		return;
41247387SRobert.Gordon@Sun.COM 	}
41250Sstevel@tonic-gate 	rib_rbuf_free(conn, rdbuf->type, rdbuf->addr);
41260Sstevel@tonic-gate }
41270Sstevel@tonic-gate 
41280Sstevel@tonic-gate static void
rib_rbuf_free(CONN * conn,int ptype,void * buf)41290Sstevel@tonic-gate rib_rbuf_free(CONN *conn, int ptype, void *buf)
41300Sstevel@tonic-gate {
41310Sstevel@tonic-gate 	rib_qp_t *qp = ctoqp(conn);
41320Sstevel@tonic-gate 	rib_hca_t *hca = qp->hca;
41330Sstevel@tonic-gate 	rib_bufpool_t *rbp = NULL;
41340Sstevel@tonic-gate 	bufpool_t *bp;
41350Sstevel@tonic-gate 
41360Sstevel@tonic-gate 	/*
41370Sstevel@tonic-gate 	 * Obtain pool address based on type of pool
41380Sstevel@tonic-gate 	 */
41390Sstevel@tonic-gate 	switch (ptype) {
41407387SRobert.Gordon@Sun.COM 	case SEND_BUFFER:
41417387SRobert.Gordon@Sun.COM 		rbp = hca->send_pool;
41427387SRobert.Gordon@Sun.COM 		break;
41437387SRobert.Gordon@Sun.COM 	case RECV_BUFFER:
41447387SRobert.Gordon@Sun.COM 		rbp = hca->recv_pool;
41457387SRobert.Gordon@Sun.COM 		break;
41467387SRobert.Gordon@Sun.COM 	default:
41477387SRobert.Gordon@Sun.COM 		return;
41480Sstevel@tonic-gate 	}
41490Sstevel@tonic-gate 	if (rbp == NULL)
41500Sstevel@tonic-gate 		return;
41510Sstevel@tonic-gate 
41520Sstevel@tonic-gate 	bp = rbp->bpool;
41530Sstevel@tonic-gate 
41540Sstevel@tonic-gate 	mutex_enter(&bp->buflock);
41550Sstevel@tonic-gate 	if (++bp->buffree >= bp->numelems) {
41560Sstevel@tonic-gate 		/*
41570Sstevel@tonic-gate 		 * Should never happen
41580Sstevel@tonic-gate 		 */
41590Sstevel@tonic-gate 		bp->buffree--;
41600Sstevel@tonic-gate 	} else {
41610Sstevel@tonic-gate 		bp->buflist[bp->buffree] = buf;
41620Sstevel@tonic-gate 	}
41630Sstevel@tonic-gate 	mutex_exit(&bp->buflock);
41640Sstevel@tonic-gate }
41650Sstevel@tonic-gate 
41660Sstevel@tonic-gate static rdma_stat
rib_add_connlist(CONN * cn,rib_conn_list_t * connlist)41670Sstevel@tonic-gate rib_add_connlist(CONN *cn, rib_conn_list_t *connlist)
41680Sstevel@tonic-gate {
41690Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_WRITER);
41700Sstevel@tonic-gate 	if (connlist->conn_hd) {
41710Sstevel@tonic-gate 		cn->c_next = connlist->conn_hd;
41720Sstevel@tonic-gate 		connlist->conn_hd->c_prev = cn;
41730Sstevel@tonic-gate 	}
41740Sstevel@tonic-gate 	connlist->conn_hd = cn;
41750Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
41760Sstevel@tonic-gate 
41770Sstevel@tonic-gate 	return (RDMA_SUCCESS);
41780Sstevel@tonic-gate }
41790Sstevel@tonic-gate 
41800Sstevel@tonic-gate static rdma_stat
rib_rm_conn(CONN * cn,rib_conn_list_t * connlist)41810Sstevel@tonic-gate rib_rm_conn(CONN *cn, rib_conn_list_t *connlist)
41820Sstevel@tonic-gate {
41830Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_WRITER);
41840Sstevel@tonic-gate 	if (cn->c_prev) {
41850Sstevel@tonic-gate 		cn->c_prev->c_next = cn->c_next;
41860Sstevel@tonic-gate 	}
41870Sstevel@tonic-gate 	if (cn->c_next) {
41880Sstevel@tonic-gate 		cn->c_next->c_prev = cn->c_prev;
41890Sstevel@tonic-gate 	}
41900Sstevel@tonic-gate 	if (connlist->conn_hd == cn)
41910Sstevel@tonic-gate 		connlist->conn_hd = cn->c_next;
41920Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
41930Sstevel@tonic-gate 
41940Sstevel@tonic-gate 	return (RDMA_SUCCESS);
41950Sstevel@tonic-gate }
41960Sstevel@tonic-gate 
41979803SSiddheshwar.Mahesh@Sun.COM /* ARGSUSED */
41989803SSiddheshwar.Mahesh@Sun.COM static rdma_stat
rib_conn_get(struct netbuf * s_svcaddr,struct netbuf * d_svcaddr,int addr_type,void * handle,CONN ** conn)41999803SSiddheshwar.Mahesh@Sun.COM rib_conn_get(struct netbuf *s_svcaddr, struct netbuf *d_svcaddr,
42009803SSiddheshwar.Mahesh@Sun.COM     int addr_type, void *handle, CONN **conn)
42019803SSiddheshwar.Mahesh@Sun.COM {
42029803SSiddheshwar.Mahesh@Sun.COM 	rdma_stat status;
42039803SSiddheshwar.Mahesh@Sun.COM 	rpcib_ping_t rpt;
42049803SSiddheshwar.Mahesh@Sun.COM 
42059803SSiddheshwar.Mahesh@Sun.COM 	status = rib_connect(s_svcaddr, d_svcaddr, addr_type, &rpt, conn);
42069803SSiddheshwar.Mahesh@Sun.COM 	return (status);
42079803SSiddheshwar.Mahesh@Sun.COM }
42089803SSiddheshwar.Mahesh@Sun.COM 
42090Sstevel@tonic-gate /*
42109733SFaramarz.Jalalian@Sun.COM  * rib_find_hca_connection
42119733SFaramarz.Jalalian@Sun.COM  *
42129733SFaramarz.Jalalian@Sun.COM  * if there is an existing connection to the specified address then
42139733SFaramarz.Jalalian@Sun.COM  * it will be returned in conn, otherwise conn will be set to NULL.
42149733SFaramarz.Jalalian@Sun.COM  * Also cleans up any connection that is in error state.
42150Sstevel@tonic-gate  */
42169733SFaramarz.Jalalian@Sun.COM static int
rib_find_hca_connection(rib_hca_t * hca,struct netbuf * s_svcaddr,struct netbuf * d_svcaddr,CONN ** conn)42179733SFaramarz.Jalalian@Sun.COM rib_find_hca_connection(rib_hca_t *hca, struct netbuf *s_svcaddr,
42189733SFaramarz.Jalalian@Sun.COM     struct netbuf *d_svcaddr, CONN **conn)
42190Sstevel@tonic-gate {
42200Sstevel@tonic-gate 	CONN *cn;
42210Sstevel@tonic-gate 	clock_t cv_stat, timout;
42229733SFaramarz.Jalalian@Sun.COM 
42239733SFaramarz.Jalalian@Sun.COM 	*conn = NULL;
42240Sstevel@tonic-gate again:
42250Sstevel@tonic-gate 	rw_enter(&hca->cl_conn_list.conn_lock, RW_READER);
42260Sstevel@tonic-gate 	cn = hca->cl_conn_list.conn_hd;
42270Sstevel@tonic-gate 	while (cn != NULL) {
42280Sstevel@tonic-gate 		/*
42290Sstevel@tonic-gate 		 * First, clear up any connection in the ERROR state
42300Sstevel@tonic-gate 		 */
42310Sstevel@tonic-gate 		mutex_enter(&cn->c_lock);
42327387SRobert.Gordon@Sun.COM 		if (cn->c_state == C_ERROR_CONN) {
42330Sstevel@tonic-gate 			if (cn->c_ref == 0) {
42340Sstevel@tonic-gate 				/*
42350Sstevel@tonic-gate 				 * Remove connection from list and destroy it.
42360Sstevel@tonic-gate 				 */
42370Sstevel@tonic-gate 				cn->c_state = C_DISCONN_PEND;
42380Sstevel@tonic-gate 				mutex_exit(&cn->c_lock);
42390Sstevel@tonic-gate 				rw_exit(&hca->cl_conn_list.conn_lock);
42409723SSiddheshwar.Mahesh@Sun.COM 				rib_conn_close((void *)cn);
42410Sstevel@tonic-gate 				goto again;
42420Sstevel@tonic-gate 			}
42430Sstevel@tonic-gate 			mutex_exit(&cn->c_lock);
42440Sstevel@tonic-gate 			cn = cn->c_next;
42450Sstevel@tonic-gate 			continue;
42467387SRobert.Gordon@Sun.COM 		}
42477387SRobert.Gordon@Sun.COM 		if (cn->c_state == C_DISCONN_PEND) {
42480Sstevel@tonic-gate 			mutex_exit(&cn->c_lock);
42490Sstevel@tonic-gate 			cn = cn->c_next;
42500Sstevel@tonic-gate 			continue;
42510Sstevel@tonic-gate 		}
42529733SFaramarz.Jalalian@Sun.COM 
42539733SFaramarz.Jalalian@Sun.COM 		/*
42549733SFaramarz.Jalalian@Sun.COM 		 * source address is only checked for if there is one,
42559733SFaramarz.Jalalian@Sun.COM 		 * this is the case for retries.
42569733SFaramarz.Jalalian@Sun.COM 		 */
42579733SFaramarz.Jalalian@Sun.COM 		if ((cn->c_raddr.len == d_svcaddr->len) &&
42589733SFaramarz.Jalalian@Sun.COM 		    (bcmp(d_svcaddr->buf, cn->c_raddr.buf,
42599733SFaramarz.Jalalian@Sun.COM 		    d_svcaddr->len) == 0) &&
42609733SFaramarz.Jalalian@Sun.COM 		    ((s_svcaddr->len == 0) ||
42619733SFaramarz.Jalalian@Sun.COM 		    ((cn->c_laddr.len == s_svcaddr->len) &&
42629733SFaramarz.Jalalian@Sun.COM 		    (bcmp(s_svcaddr->buf, cn->c_laddr.buf,
42639733SFaramarz.Jalalian@Sun.COM 		    s_svcaddr->len) == 0)))) {
42640Sstevel@tonic-gate 			/*
42650Sstevel@tonic-gate 			 * Our connection. Give up conn list lock
42660Sstevel@tonic-gate 			 * as we are done traversing the list.
42670Sstevel@tonic-gate 			 */
42680Sstevel@tonic-gate 			rw_exit(&hca->cl_conn_list.conn_lock);
42690Sstevel@tonic-gate 			if (cn->c_state == C_CONNECTED) {
42700Sstevel@tonic-gate 				cn->c_ref++;	/* sharing a conn */
42710Sstevel@tonic-gate 				mutex_exit(&cn->c_lock);
42720Sstevel@tonic-gate 				*conn = cn;
42739733SFaramarz.Jalalian@Sun.COM 				return (RDMA_SUCCESS);
42740Sstevel@tonic-gate 			}
42750Sstevel@tonic-gate 			if (cn->c_state == C_CONN_PEND) {
42760Sstevel@tonic-gate 				/*
42770Sstevel@tonic-gate 				 * Hold a reference to this conn before
42780Sstevel@tonic-gate 				 * we give up the lock.
42790Sstevel@tonic-gate 				 */
42800Sstevel@tonic-gate 				cn->c_ref++;
42810Sstevel@tonic-gate 				timout =  ddi_get_lbolt() +
42820Sstevel@tonic-gate 				    drv_usectohz(CONN_WAIT_TIME * 1000000);
42830Sstevel@tonic-gate 				while ((cv_stat = cv_timedwait_sig(&cn->c_cv,
42847387SRobert.Gordon@Sun.COM 				    &cn->c_lock, timout)) > 0 &&
42857387SRobert.Gordon@Sun.COM 				    cn->c_state == C_CONN_PEND)
42860Sstevel@tonic-gate 					;
42870Sstevel@tonic-gate 				if (cv_stat == 0) {
428811530SSiddheshwar.Mahesh@Sun.COM 					(void) rib_conn_release_locked(cn);
42890Sstevel@tonic-gate 					return (RDMA_INTR);
42900Sstevel@tonic-gate 				}
42910Sstevel@tonic-gate 				if (cv_stat < 0) {
429211530SSiddheshwar.Mahesh@Sun.COM 					(void) rib_conn_release_locked(cn);
42930Sstevel@tonic-gate 					return (RDMA_TIMEDOUT);
42940Sstevel@tonic-gate 				}
42950Sstevel@tonic-gate 				if (cn->c_state == C_CONNECTED) {
42960Sstevel@tonic-gate 					*conn = cn;
42970Sstevel@tonic-gate 					mutex_exit(&cn->c_lock);
42989733SFaramarz.Jalalian@Sun.COM 					return (RDMA_SUCCESS);
42990Sstevel@tonic-gate 				} else {
430011530SSiddheshwar.Mahesh@Sun.COM 					(void) rib_conn_release_locked(cn);
43010Sstevel@tonic-gate 					return (RDMA_TIMEDOUT);
43020Sstevel@tonic-gate 				}
43030Sstevel@tonic-gate 			}
43040Sstevel@tonic-gate 		}
43050Sstevel@tonic-gate 		mutex_exit(&cn->c_lock);
43060Sstevel@tonic-gate 		cn = cn->c_next;
43070Sstevel@tonic-gate 	}
43080Sstevel@tonic-gate 	rw_exit(&hca->cl_conn_list.conn_lock);
43099733SFaramarz.Jalalian@Sun.COM 	*conn = NULL;
43109733SFaramarz.Jalalian@Sun.COM 	return (RDMA_FAILED);
43119733SFaramarz.Jalalian@Sun.COM }
43129733SFaramarz.Jalalian@Sun.COM 
43139733SFaramarz.Jalalian@Sun.COM /*
43149733SFaramarz.Jalalian@Sun.COM  * Connection management.
43159733SFaramarz.Jalalian@Sun.COM  * IBTF does not support recycling of channels. So connections are only
43169733SFaramarz.Jalalian@Sun.COM  * in four states - C_CONN_PEND, or C_CONNECTED, or C_ERROR_CONN or
43179733SFaramarz.Jalalian@Sun.COM  * C_DISCONN_PEND state. No C_IDLE state.
43189733SFaramarz.Jalalian@Sun.COM  * C_CONN_PEND state: Connection establishment in progress to the server.
43199733SFaramarz.Jalalian@Sun.COM  * C_CONNECTED state: A connection when created is in C_CONNECTED state.
43209733SFaramarz.Jalalian@Sun.COM  * It has an RC channel associated with it. ibt_post_send/recv are allowed
43219733SFaramarz.Jalalian@Sun.COM  * only in this state.
43229733SFaramarz.Jalalian@Sun.COM  * C_ERROR_CONN state: A connection transitions to this state when WRs on the
43239733SFaramarz.Jalalian@Sun.COM  * channel are completed in error or an IBT_CM_EVENT_CONN_CLOSED event
43249733SFaramarz.Jalalian@Sun.COM  * happens on the channel or a IBT_HCA_DETACH_EVENT occurs on the HCA.
43259733SFaramarz.Jalalian@Sun.COM  * C_DISCONN_PEND state: When a connection is in C_ERROR_CONN state and when
43269733SFaramarz.Jalalian@Sun.COM  * c_ref drops to 0 (this indicates that RPC has no more references to this
43279733SFaramarz.Jalalian@Sun.COM  * connection), the connection should be destroyed. A connection transitions
43289733SFaramarz.Jalalian@Sun.COM  * into this state when it is being destroyed.
43299733SFaramarz.Jalalian@Sun.COM  */
43309733SFaramarz.Jalalian@Sun.COM /* ARGSUSED */
43319733SFaramarz.Jalalian@Sun.COM static rdma_stat
rib_connect(struct netbuf * s_svcaddr,struct netbuf * d_svcaddr,int addr_type,rpcib_ping_t * rpt,CONN ** conn)43329803SSiddheshwar.Mahesh@Sun.COM rib_connect(struct netbuf *s_svcaddr, struct netbuf *d_svcaddr,
43339803SSiddheshwar.Mahesh@Sun.COM     int addr_type, rpcib_ping_t *rpt, CONN **conn)
43349733SFaramarz.Jalalian@Sun.COM {
43359733SFaramarz.Jalalian@Sun.COM 	CONN *cn;
43369733SFaramarz.Jalalian@Sun.COM 	int status;
43379733SFaramarz.Jalalian@Sun.COM 	rib_hca_t *hca;
43389733SFaramarz.Jalalian@Sun.COM 	rib_qp_t *qp;
43399733SFaramarz.Jalalian@Sun.COM 	int s_addr_len;
43409733SFaramarz.Jalalian@Sun.COM 	char *s_addr_buf;
43419733SFaramarz.Jalalian@Sun.COM 
43429733SFaramarz.Jalalian@Sun.COM 	rw_enter(&rib_stat->hcas_list_lock, RW_READER);
43439733SFaramarz.Jalalian@Sun.COM 	for (hca = rib_stat->hcas_list; hca; hca = hca->next) {
43449733SFaramarz.Jalalian@Sun.COM 		rw_enter(&hca->state_lock, RW_READER);
43459733SFaramarz.Jalalian@Sun.COM 		if (hca->state != HCA_DETACHED) {
43469733SFaramarz.Jalalian@Sun.COM 			status = rib_find_hca_connection(hca, s_svcaddr,
43479733SFaramarz.Jalalian@Sun.COM 			    d_svcaddr, conn);
43489733SFaramarz.Jalalian@Sun.COM 			rw_exit(&hca->state_lock);
43499733SFaramarz.Jalalian@Sun.COM 			if ((status == RDMA_INTR) || (status == RDMA_SUCCESS)) {
43509733SFaramarz.Jalalian@Sun.COM 				rw_exit(&rib_stat->hcas_list_lock);
43519733SFaramarz.Jalalian@Sun.COM 				return (status);
43529733SFaramarz.Jalalian@Sun.COM 			}
43539733SFaramarz.Jalalian@Sun.COM 		} else
43549733SFaramarz.Jalalian@Sun.COM 			rw_exit(&hca->state_lock);
43559733SFaramarz.Jalalian@Sun.COM 	}
43569733SFaramarz.Jalalian@Sun.COM 	rw_exit(&rib_stat->hcas_list_lock);
43579733SFaramarz.Jalalian@Sun.COM 
43589733SFaramarz.Jalalian@Sun.COM 	/*
43599733SFaramarz.Jalalian@Sun.COM 	 * No existing connection found, establish a new connection.
43609733SFaramarz.Jalalian@Sun.COM 	 */
43619803SSiddheshwar.Mahesh@Sun.COM 	bzero(rpt, sizeof (rpcib_ping_t));
43629803SSiddheshwar.Mahesh@Sun.COM 
43639803SSiddheshwar.Mahesh@Sun.COM 	status = rib_ping_srv(addr_type, d_svcaddr, rpt);
43640Sstevel@tonic-gate 	if (status != RDMA_SUCCESS) {
43650Sstevel@tonic-gate 		return (RDMA_FAILED);
43660Sstevel@tonic-gate 	}
43679803SSiddheshwar.Mahesh@Sun.COM 	hca = rpt->hca;
43689803SSiddheshwar.Mahesh@Sun.COM 
43699803SSiddheshwar.Mahesh@Sun.COM 	if (rpt->srcip.family == AF_INET) {
43709803SSiddheshwar.Mahesh@Sun.COM 		s_addr_len = sizeof (rpt->srcip.un.ip4addr);
43719803SSiddheshwar.Mahesh@Sun.COM 		s_addr_buf = (char *)&rpt->srcip.un.ip4addr;
43729803SSiddheshwar.Mahesh@Sun.COM 	} else if (rpt->srcip.family == AF_INET6) {
43739803SSiddheshwar.Mahesh@Sun.COM 		s_addr_len = sizeof (rpt->srcip.un.ip6addr);
43749803SSiddheshwar.Mahesh@Sun.COM 		s_addr_buf = (char *)&rpt->srcip.un.ip6addr;
43759803SSiddheshwar.Mahesh@Sun.COM 	} else {
43769733SFaramarz.Jalalian@Sun.COM 		return (RDMA_FAILED);
43779803SSiddheshwar.Mahesh@Sun.COM 	}
43780Sstevel@tonic-gate 
43790Sstevel@tonic-gate 	/*
43800Sstevel@tonic-gate 	 * Channel to server doesn't exist yet, create one.
43810Sstevel@tonic-gate 	 */
43829733SFaramarz.Jalalian@Sun.COM 	if (rib_clnt_create_chan(hca, d_svcaddr, &qp) != RDMA_SUCCESS) {
43830Sstevel@tonic-gate 		return (RDMA_FAILED);
43840Sstevel@tonic-gate 	}
43850Sstevel@tonic-gate 	cn = qptoc(qp);
43860Sstevel@tonic-gate 	cn->c_state = C_CONN_PEND;
43870Sstevel@tonic-gate 	cn->c_ref = 1;
43880Sstevel@tonic-gate 
43899733SFaramarz.Jalalian@Sun.COM 	cn->c_laddr.buf = kmem_alloc(s_addr_len, KM_SLEEP);
43909733SFaramarz.Jalalian@Sun.COM 	bcopy(s_addr_buf, cn->c_laddr.buf, s_addr_len);
43919733SFaramarz.Jalalian@Sun.COM 	cn->c_laddr.len = cn->c_laddr.maxlen = s_addr_len;
43929733SFaramarz.Jalalian@Sun.COM 
439310326SSiddheshwar.Mahesh@Sun.COM 	if (rpt->srcip.family == AF_INET) {
439410326SSiddheshwar.Mahesh@Sun.COM 		cn->c_netid = kmem_zalloc(strlen(RIBNETID_TCP) + 1, KM_SLEEP);
439510326SSiddheshwar.Mahesh@Sun.COM 		(void) strcpy(cn->c_netid, RIBNETID_TCP);
439612553SKaren.Rochford@Sun.COM 
439712553SKaren.Rochford@Sun.COM 		cn->c_addrmask.len = cn->c_addrmask.maxlen =
439812553SKaren.Rochford@Sun.COM 		    sizeof (struct sockaddr_in);
439912553SKaren.Rochford@Sun.COM 		cn->c_addrmask.buf = kmem_zalloc(cn->c_addrmask.len, KM_SLEEP);
440012553SKaren.Rochford@Sun.COM 
440112553SKaren.Rochford@Sun.COM 		((struct sockaddr_in *)cn->c_addrmask.buf)->sin_addr.s_addr =
440212553SKaren.Rochford@Sun.COM 		    (uint32_t)~0;
440312553SKaren.Rochford@Sun.COM 		((struct sockaddr_in *)cn->c_addrmask.buf)->sin_family =
440412553SKaren.Rochford@Sun.COM 		    (ushort_t)~0;
440512553SKaren.Rochford@Sun.COM 
440610326SSiddheshwar.Mahesh@Sun.COM 	} else {
440710326SSiddheshwar.Mahesh@Sun.COM 		cn->c_netid = kmem_zalloc(strlen(RIBNETID_TCP6) + 1, KM_SLEEP);
440810326SSiddheshwar.Mahesh@Sun.COM 		(void) strcpy(cn->c_netid, RIBNETID_TCP6);
440912553SKaren.Rochford@Sun.COM 
441012553SKaren.Rochford@Sun.COM 		cn->c_addrmask.len = cn->c_addrmask.maxlen =
441112553SKaren.Rochford@Sun.COM 		    sizeof (struct sockaddr_in6);
441212553SKaren.Rochford@Sun.COM 		cn->c_addrmask.buf = kmem_zalloc(cn->c_addrmask.len, KM_SLEEP);
441312553SKaren.Rochford@Sun.COM 
441412553SKaren.Rochford@Sun.COM 		(void) memset(
441512553SKaren.Rochford@Sun.COM 		    &((struct sockaddr_in6 *)cn->c_addrmask.buf)->sin6_addr,
441612553SKaren.Rochford@Sun.COM 		    (uchar_t)~0, sizeof (struct in6_addr));
441712553SKaren.Rochford@Sun.COM 		((struct sockaddr_in6 *)cn->c_addrmask.buf)->sin6_family =
441812553SKaren.Rochford@Sun.COM 		    (sa_family_t)~0;
441910326SSiddheshwar.Mahesh@Sun.COM 	}
442010326SSiddheshwar.Mahesh@Sun.COM 
44210Sstevel@tonic-gate 	/*
44220Sstevel@tonic-gate 	 * Add to conn list.
44230Sstevel@tonic-gate 	 * We had given up the READER lock. In the time since then,
44240Sstevel@tonic-gate 	 * another thread might have created the connection we are
44250Sstevel@tonic-gate 	 * trying here. But for now, that is quiet alright - there
44260Sstevel@tonic-gate 	 * might be two connections between a pair of hosts instead
44270Sstevel@tonic-gate 	 * of one. If we really want to close that window,
44280Sstevel@tonic-gate 	 * then need to check the list after acquiring the
44290Sstevel@tonic-gate 	 * WRITER lock.
44300Sstevel@tonic-gate 	 */
44310Sstevel@tonic-gate 	(void) rib_add_connlist(cn, &hca->cl_conn_list);
44329803SSiddheshwar.Mahesh@Sun.COM 	status = rib_conn_to_srv(hca, qp, rpt);
44330Sstevel@tonic-gate 	mutex_enter(&cn->c_lock);
443411530SSiddheshwar.Mahesh@Sun.COM 
443511530SSiddheshwar.Mahesh@Sun.COM 	if (cn->c_flags & C_CLOSE_PENDING) {
443611530SSiddheshwar.Mahesh@Sun.COM 		/*
443711530SSiddheshwar.Mahesh@Sun.COM 		 * This handles a case where the module or
443811530SSiddheshwar.Mahesh@Sun.COM 		 * HCA detached in the time a connection is
443911530SSiddheshwar.Mahesh@Sun.COM 		 * established. In such a case close the
444011530SSiddheshwar.Mahesh@Sun.COM 		 * connection immediately if this is the
444111530SSiddheshwar.Mahesh@Sun.COM 		 * only reference.
444211530SSiddheshwar.Mahesh@Sun.COM 		 */
444311530SSiddheshwar.Mahesh@Sun.COM 		if (cn->c_ref == 1) {
444411530SSiddheshwar.Mahesh@Sun.COM 			cn->c_ref--;
444511530SSiddheshwar.Mahesh@Sun.COM 			cn->c_state = C_DISCONN_PEND;
444611530SSiddheshwar.Mahesh@Sun.COM 			mutex_exit(&cn->c_lock);
444711530SSiddheshwar.Mahesh@Sun.COM 			rib_conn_close((void *)cn);
444811530SSiddheshwar.Mahesh@Sun.COM 			return (RDMA_FAILED);
444911530SSiddheshwar.Mahesh@Sun.COM 		}
445011530SSiddheshwar.Mahesh@Sun.COM 
445111530SSiddheshwar.Mahesh@Sun.COM 		/*
445211530SSiddheshwar.Mahesh@Sun.COM 		 * Connection to be closed later when c_ref = 0
445311530SSiddheshwar.Mahesh@Sun.COM 		 */
445411530SSiddheshwar.Mahesh@Sun.COM 		status = RDMA_FAILED;
445511530SSiddheshwar.Mahesh@Sun.COM 	}
445611530SSiddheshwar.Mahesh@Sun.COM 
44570Sstevel@tonic-gate 	if (status == RDMA_SUCCESS) {
44580Sstevel@tonic-gate 		cn->c_state = C_CONNECTED;
44590Sstevel@tonic-gate 		*conn = cn;
44600Sstevel@tonic-gate 	} else {
44617387SRobert.Gordon@Sun.COM 		cn->c_state = C_ERROR_CONN;
44620Sstevel@tonic-gate 		cn->c_ref--;
44630Sstevel@tonic-gate 	}
446411530SSiddheshwar.Mahesh@Sun.COM 	cv_signal(&cn->c_cv);
44650Sstevel@tonic-gate 	mutex_exit(&cn->c_lock);
44660Sstevel@tonic-gate 	return (status);
44670Sstevel@tonic-gate }
44680Sstevel@tonic-gate 
44699723SSiddheshwar.Mahesh@Sun.COM static void
rib_conn_close(void * rarg)44709723SSiddheshwar.Mahesh@Sun.COM rib_conn_close(void *rarg)
44719723SSiddheshwar.Mahesh@Sun.COM {
44729723SSiddheshwar.Mahesh@Sun.COM 	CONN *conn = (CONN *)rarg;
44739723SSiddheshwar.Mahesh@Sun.COM 	rib_qp_t *qp = ctoqp(conn);
44749723SSiddheshwar.Mahesh@Sun.COM 
44759723SSiddheshwar.Mahesh@Sun.COM 	mutex_enter(&conn->c_lock);
44769723SSiddheshwar.Mahesh@Sun.COM 	if (!(conn->c_flags & C_CLOSE_NOTNEEDED)) {
44779723SSiddheshwar.Mahesh@Sun.COM 
44789723SSiddheshwar.Mahesh@Sun.COM 		conn->c_flags |= (C_CLOSE_NOTNEEDED | C_CLOSE_PENDING);
447911530SSiddheshwar.Mahesh@Sun.COM 
44809723SSiddheshwar.Mahesh@Sun.COM 		/*
44819723SSiddheshwar.Mahesh@Sun.COM 		 * Live connection in CONNECTED state.
44829723SSiddheshwar.Mahesh@Sun.COM 		 */
44839723SSiddheshwar.Mahesh@Sun.COM 		if (conn->c_state == C_CONNECTED) {
44849723SSiddheshwar.Mahesh@Sun.COM 			conn->c_state = C_ERROR_CONN;
44859723SSiddheshwar.Mahesh@Sun.COM 		}
44869723SSiddheshwar.Mahesh@Sun.COM 		mutex_exit(&conn->c_lock);
44879723SSiddheshwar.Mahesh@Sun.COM 
44889723SSiddheshwar.Mahesh@Sun.COM 		rib_close_a_channel(conn);
44899723SSiddheshwar.Mahesh@Sun.COM 
44909723SSiddheshwar.Mahesh@Sun.COM 		mutex_enter(&conn->c_lock);
44919723SSiddheshwar.Mahesh@Sun.COM 		conn->c_flags &= ~C_CLOSE_PENDING;
44929723SSiddheshwar.Mahesh@Sun.COM 	}
44939723SSiddheshwar.Mahesh@Sun.COM 
44949723SSiddheshwar.Mahesh@Sun.COM 	mutex_exit(&conn->c_lock);
44959723SSiddheshwar.Mahesh@Sun.COM 
44969723SSiddheshwar.Mahesh@Sun.COM 	if (qp->mode == RIB_SERVER)
44979723SSiddheshwar.Mahesh@Sun.COM 		(void) rib_disconnect_channel(conn,
44989723SSiddheshwar.Mahesh@Sun.COM 		    &qp->hca->srv_conn_list);
44999723SSiddheshwar.Mahesh@Sun.COM 	else
45009723SSiddheshwar.Mahesh@Sun.COM 		(void) rib_disconnect_channel(conn,
45019723SSiddheshwar.Mahesh@Sun.COM 		    &qp->hca->cl_conn_list);
45029723SSiddheshwar.Mahesh@Sun.COM }
45039723SSiddheshwar.Mahesh@Sun.COM 
45049723SSiddheshwar.Mahesh@Sun.COM static void
rib_conn_timeout_call(void * carg)45059723SSiddheshwar.Mahesh@Sun.COM rib_conn_timeout_call(void *carg)
45069723SSiddheshwar.Mahesh@Sun.COM {
45079723SSiddheshwar.Mahesh@Sun.COM 	time_t idle_time;
45089723SSiddheshwar.Mahesh@Sun.COM 	CONN *conn = (CONN *)carg;
45099723SSiddheshwar.Mahesh@Sun.COM 	rib_hca_t *hca = ctoqp(conn)->hca;
45109723SSiddheshwar.Mahesh@Sun.COM 	int error;
45119723SSiddheshwar.Mahesh@Sun.COM 
45129723SSiddheshwar.Mahesh@Sun.COM 	mutex_enter(&conn->c_lock);
45139723SSiddheshwar.Mahesh@Sun.COM 	if ((conn->c_ref > 0) ||
45149723SSiddheshwar.Mahesh@Sun.COM 	    (conn->c_state == C_DISCONN_PEND)) {
45159723SSiddheshwar.Mahesh@Sun.COM 		conn->c_timeout = NULL;
45169723SSiddheshwar.Mahesh@Sun.COM 		mutex_exit(&conn->c_lock);
45179723SSiddheshwar.Mahesh@Sun.COM 		return;
45189723SSiddheshwar.Mahesh@Sun.COM 	}
45199723SSiddheshwar.Mahesh@Sun.COM 
45209723SSiddheshwar.Mahesh@Sun.COM 	idle_time = (gethrestime_sec() - conn->c_last_used);
45219723SSiddheshwar.Mahesh@Sun.COM 
45229723SSiddheshwar.Mahesh@Sun.COM 	if ((idle_time <= rib_conn_timeout) &&
45239723SSiddheshwar.Mahesh@Sun.COM 	    (conn->c_state != C_ERROR_CONN)) {
45249723SSiddheshwar.Mahesh@Sun.COM 		/*
45259723SSiddheshwar.Mahesh@Sun.COM 		 * There was activity after the last timeout.
45269723SSiddheshwar.Mahesh@Sun.COM 		 * Extend the conn life. Unless the conn is
45279723SSiddheshwar.Mahesh@Sun.COM 		 * already in error state.
45289723SSiddheshwar.Mahesh@Sun.COM 		 */
45299723SSiddheshwar.Mahesh@Sun.COM 		conn->c_timeout = timeout(rib_conn_timeout_call, conn,
45309723SSiddheshwar.Mahesh@Sun.COM 		    SEC_TO_TICK(rib_conn_timeout - idle_time));
45319723SSiddheshwar.Mahesh@Sun.COM 		mutex_exit(&conn->c_lock);
45329723SSiddheshwar.Mahesh@Sun.COM 		return;
45339723SSiddheshwar.Mahesh@Sun.COM 	}
45349723SSiddheshwar.Mahesh@Sun.COM 
45359723SSiddheshwar.Mahesh@Sun.COM 	error = ddi_taskq_dispatch(hca->cleanup_helper, rib_conn_close,
45369723SSiddheshwar.Mahesh@Sun.COM 	    (void *)conn, DDI_NOSLEEP);
45379723SSiddheshwar.Mahesh@Sun.COM 
45389723SSiddheshwar.Mahesh@Sun.COM 	/*
45399723SSiddheshwar.Mahesh@Sun.COM 	 * If taskq dispatch fails above, then reset the timeout
45409723SSiddheshwar.Mahesh@Sun.COM 	 * to try again after 10 secs.
45419723SSiddheshwar.Mahesh@Sun.COM 	 */
45429723SSiddheshwar.Mahesh@Sun.COM 
45439723SSiddheshwar.Mahesh@Sun.COM 	if (error != DDI_SUCCESS) {
45449723SSiddheshwar.Mahesh@Sun.COM 		conn->c_timeout = timeout(rib_conn_timeout_call, conn,
45459723SSiddheshwar.Mahesh@Sun.COM 		    SEC_TO_TICK(RDMA_CONN_REAP_RETRY));
45469723SSiddheshwar.Mahesh@Sun.COM 		mutex_exit(&conn->c_lock);
45479723SSiddheshwar.Mahesh@Sun.COM 		return;
45489723SSiddheshwar.Mahesh@Sun.COM 	}
45499723SSiddheshwar.Mahesh@Sun.COM 
45509723SSiddheshwar.Mahesh@Sun.COM 	conn->c_state = C_DISCONN_PEND;
45519723SSiddheshwar.Mahesh@Sun.COM 	mutex_exit(&conn->c_lock);
45529723SSiddheshwar.Mahesh@Sun.COM }
45539723SSiddheshwar.Mahesh@Sun.COM 
45540Sstevel@tonic-gate static rdma_stat
rib_conn_release(CONN * conn)45550Sstevel@tonic-gate rib_conn_release(CONN *conn)
45560Sstevel@tonic-gate {
45570Sstevel@tonic-gate 	mutex_enter(&conn->c_lock);
455811530SSiddheshwar.Mahesh@Sun.COM 	return (rib_conn_release_locked(conn));
455911530SSiddheshwar.Mahesh@Sun.COM }
456011530SSiddheshwar.Mahesh@Sun.COM 
456111530SSiddheshwar.Mahesh@Sun.COM /*
456211530SSiddheshwar.Mahesh@Sun.COM  * Expects conn->c_lock to be held on entry.
456311530SSiddheshwar.Mahesh@Sun.COM  * c_lock released on return
456411530SSiddheshwar.Mahesh@Sun.COM  */
456511530SSiddheshwar.Mahesh@Sun.COM static rdma_stat
rib_conn_release_locked(CONN * conn)456611530SSiddheshwar.Mahesh@Sun.COM rib_conn_release_locked(CONN *conn)
456711530SSiddheshwar.Mahesh@Sun.COM {
45680Sstevel@tonic-gate 	conn->c_ref--;
45690Sstevel@tonic-gate 
45709723SSiddheshwar.Mahesh@Sun.COM 	conn->c_last_used = gethrestime_sec();
45719723SSiddheshwar.Mahesh@Sun.COM 	if (conn->c_ref > 0) {
45729723SSiddheshwar.Mahesh@Sun.COM 		mutex_exit(&conn->c_lock);
45739723SSiddheshwar.Mahesh@Sun.COM 		return (RDMA_SUCCESS);
45749723SSiddheshwar.Mahesh@Sun.COM 	}
45759723SSiddheshwar.Mahesh@Sun.COM 
45760Sstevel@tonic-gate 	/*
45777387SRobert.Gordon@Sun.COM 	 * If a conn is C_ERROR_CONN, close the channel.
45780Sstevel@tonic-gate 	 */
45797387SRobert.Gordon@Sun.COM 	if (conn->c_ref == 0 && conn->c_state == C_ERROR_CONN) {
45800Sstevel@tonic-gate 		conn->c_state = C_DISCONN_PEND;
45810Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
45829723SSiddheshwar.Mahesh@Sun.COM 		rib_conn_close((void *)conn);
45830Sstevel@tonic-gate 		return (RDMA_SUCCESS);
45840Sstevel@tonic-gate 	}
45859723SSiddheshwar.Mahesh@Sun.COM 
45869723SSiddheshwar.Mahesh@Sun.COM 	/*
45879723SSiddheshwar.Mahesh@Sun.COM 	 * c_ref == 0, set a timeout for conn release
45889723SSiddheshwar.Mahesh@Sun.COM 	 */
45899723SSiddheshwar.Mahesh@Sun.COM 
45909723SSiddheshwar.Mahesh@Sun.COM 	if (conn->c_timeout == NULL) {
45919723SSiddheshwar.Mahesh@Sun.COM 		conn->c_timeout = timeout(rib_conn_timeout_call, conn,
45929723SSiddheshwar.Mahesh@Sun.COM 		    SEC_TO_TICK(rib_conn_timeout));
45939723SSiddheshwar.Mahesh@Sun.COM 	}
45949723SSiddheshwar.Mahesh@Sun.COM 
45950Sstevel@tonic-gate 	mutex_exit(&conn->c_lock);
45960Sstevel@tonic-gate 	return (RDMA_SUCCESS);
45970Sstevel@tonic-gate }
45980Sstevel@tonic-gate 
45990Sstevel@tonic-gate /*
46000Sstevel@tonic-gate  * Add at front of list
46010Sstevel@tonic-gate  */
46020Sstevel@tonic-gate static struct rdma_done_list *
rdma_done_add(rib_qp_t * qp,uint32_t xid)46030Sstevel@tonic-gate rdma_done_add(rib_qp_t *qp, uint32_t xid)
46040Sstevel@tonic-gate {
46050Sstevel@tonic-gate 	struct rdma_done_list *rd;
46060Sstevel@tonic-gate 
46070Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->rdlist_lock));
46080Sstevel@tonic-gate 
46090Sstevel@tonic-gate 	rd = kmem_alloc(sizeof (*rd), KM_SLEEP);
46100Sstevel@tonic-gate 	rd->xid = xid;
46110Sstevel@tonic-gate 	cv_init(&rd->rdma_done_cv, NULL, CV_DEFAULT, NULL);
46120Sstevel@tonic-gate 
46130Sstevel@tonic-gate 	rd->prev = NULL;
46140Sstevel@tonic-gate 	rd->next = qp->rdlist;
46150Sstevel@tonic-gate 	if (qp->rdlist != NULL)
46160Sstevel@tonic-gate 		qp->rdlist->prev = rd;
46170Sstevel@tonic-gate 	qp->rdlist = rd;
46180Sstevel@tonic-gate 
46190Sstevel@tonic-gate 	return (rd);
46200Sstevel@tonic-gate }
46210Sstevel@tonic-gate 
46220Sstevel@tonic-gate static void
rdma_done_rm(rib_qp_t * qp,struct rdma_done_list * rd)46230Sstevel@tonic-gate rdma_done_rm(rib_qp_t *qp, struct rdma_done_list *rd)
46240Sstevel@tonic-gate {
46250Sstevel@tonic-gate 	struct rdma_done_list *r;
46260Sstevel@tonic-gate 
46270Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->rdlist_lock));
46280Sstevel@tonic-gate 
46290Sstevel@tonic-gate 	r = rd->next;
46300Sstevel@tonic-gate 	if (r != NULL) {
46310Sstevel@tonic-gate 		r->prev = rd->prev;
46320Sstevel@tonic-gate 	}
46330Sstevel@tonic-gate 
46340Sstevel@tonic-gate 	r = rd->prev;
46350Sstevel@tonic-gate 	if (r != NULL) {
46360Sstevel@tonic-gate 		r->next = rd->next;
46370Sstevel@tonic-gate 	} else {
46380Sstevel@tonic-gate 		qp->rdlist = rd->next;
46390Sstevel@tonic-gate 	}
46400Sstevel@tonic-gate 
46410Sstevel@tonic-gate 	cv_destroy(&rd->rdma_done_cv);
46420Sstevel@tonic-gate 	kmem_free(rd, sizeof (*rd));
46430Sstevel@tonic-gate }
46440Sstevel@tonic-gate 
46450Sstevel@tonic-gate static void
rdma_done_rem_list(rib_qp_t * qp)46460Sstevel@tonic-gate rdma_done_rem_list(rib_qp_t *qp)
46470Sstevel@tonic-gate {
46480Sstevel@tonic-gate 	struct rdma_done_list	*r, *n;
46490Sstevel@tonic-gate 
46500Sstevel@tonic-gate 	mutex_enter(&qp->rdlist_lock);
46510Sstevel@tonic-gate 	for (r = qp->rdlist; r != NULL; r = n) {
46520Sstevel@tonic-gate 		n = r->next;
46530Sstevel@tonic-gate 		rdma_done_rm(qp, r);
46540Sstevel@tonic-gate 	}
46550Sstevel@tonic-gate 	mutex_exit(&qp->rdlist_lock);
46560Sstevel@tonic-gate }
46570Sstevel@tonic-gate 
46580Sstevel@tonic-gate static void
rdma_done_notify(rib_qp_t * qp,uint32_t xid)46590Sstevel@tonic-gate rdma_done_notify(rib_qp_t *qp, uint32_t xid)
46600Sstevel@tonic-gate {
46610Sstevel@tonic-gate 	struct rdma_done_list *r = qp->rdlist;
46620Sstevel@tonic-gate 
46630Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&qp->rdlist_lock));
46640Sstevel@tonic-gate 
46650Sstevel@tonic-gate 	while (r) {
46660Sstevel@tonic-gate 		if (r->xid == xid) {
46670Sstevel@tonic-gate 			cv_signal(&r->rdma_done_cv);
46680Sstevel@tonic-gate 			return;
46690Sstevel@tonic-gate 		} else {
46700Sstevel@tonic-gate 			r = r->next;
46710Sstevel@tonic-gate 		}
46720Sstevel@tonic-gate 	}
46737387SRobert.Gordon@Sun.COM 	DTRACE_PROBE1(rpcib__i__donenotify__nomatchxid,
46747387SRobert.Gordon@Sun.COM 	    int, xid);
46750Sstevel@tonic-gate }
46760Sstevel@tonic-gate 
46779723SSiddheshwar.Mahesh@Sun.COM /*
46789723SSiddheshwar.Mahesh@Sun.COM  * Expects conn->c_lock to be held by the caller.
46799723SSiddheshwar.Mahesh@Sun.COM  */
46809723SSiddheshwar.Mahesh@Sun.COM 
46819723SSiddheshwar.Mahesh@Sun.COM static void
rib_close_a_channel(CONN * conn)46829723SSiddheshwar.Mahesh@Sun.COM rib_close_a_channel(CONN *conn)
46839723SSiddheshwar.Mahesh@Sun.COM {
46849723SSiddheshwar.Mahesh@Sun.COM 	rib_qp_t	*qp;
46859723SSiddheshwar.Mahesh@Sun.COM 	qp = ctoqp(conn);
46869723SSiddheshwar.Mahesh@Sun.COM 
46879723SSiddheshwar.Mahesh@Sun.COM 	if (qp->qp_hdl == NULL) {
46889723SSiddheshwar.Mahesh@Sun.COM 		/* channel already freed */
46899723SSiddheshwar.Mahesh@Sun.COM 		return;
46909723SSiddheshwar.Mahesh@Sun.COM 	}
46919723SSiddheshwar.Mahesh@Sun.COM 
46929723SSiddheshwar.Mahesh@Sun.COM 	/*
46939723SSiddheshwar.Mahesh@Sun.COM 	 * Call ibt_close_rc_channel in blocking mode
46949723SSiddheshwar.Mahesh@Sun.COM 	 * with no callbacks.
46959723SSiddheshwar.Mahesh@Sun.COM 	 */
46969723SSiddheshwar.Mahesh@Sun.COM 	(void) ibt_close_rc_channel(qp->qp_hdl, IBT_NOCALLBACKS,
46979723SSiddheshwar.Mahesh@Sun.COM 	    NULL, 0, NULL, NULL, 0);
46989723SSiddheshwar.Mahesh@Sun.COM }
46990Sstevel@tonic-gate 
47000Sstevel@tonic-gate /*
47010Sstevel@tonic-gate  * Goes through all connections and closes the channel
47020Sstevel@tonic-gate  * This will cause all the WRs on those channels to be
47030Sstevel@tonic-gate  * flushed.
47040Sstevel@tonic-gate  */
47050Sstevel@tonic-gate static void
rib_close_channels(rib_conn_list_t * connlist)47060Sstevel@tonic-gate rib_close_channels(rib_conn_list_t *connlist)
47070Sstevel@tonic-gate {
47089723SSiddheshwar.Mahesh@Sun.COM 	CONN 		*conn, *tmp;
47090Sstevel@tonic-gate 
47100Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_READER);
47110Sstevel@tonic-gate 	conn = connlist->conn_hd;
47120Sstevel@tonic-gate 	while (conn != NULL) {
47130Sstevel@tonic-gate 		mutex_enter(&conn->c_lock);
47149723SSiddheshwar.Mahesh@Sun.COM 		tmp = conn->c_next;
47159723SSiddheshwar.Mahesh@Sun.COM 		if (!(conn->c_flags & C_CLOSE_NOTNEEDED)) {
47169723SSiddheshwar.Mahesh@Sun.COM 
471711530SSiddheshwar.Mahesh@Sun.COM 			if (conn->c_state == C_CONN_PEND) {
471811530SSiddheshwar.Mahesh@Sun.COM 				conn->c_flags |= C_CLOSE_PENDING;
471911530SSiddheshwar.Mahesh@Sun.COM 				goto next;
472011530SSiddheshwar.Mahesh@Sun.COM 			}
472111530SSiddheshwar.Mahesh@Sun.COM 
47229723SSiddheshwar.Mahesh@Sun.COM 			conn->c_flags |= (C_CLOSE_NOTNEEDED | C_CLOSE_PENDING);
47239723SSiddheshwar.Mahesh@Sun.COM 
47240Sstevel@tonic-gate 			/*
47250Sstevel@tonic-gate 			 * Live connection in CONNECTED state.
47260Sstevel@tonic-gate 			 */
47279723SSiddheshwar.Mahesh@Sun.COM 			if (conn->c_state == C_CONNECTED)
47289723SSiddheshwar.Mahesh@Sun.COM 				conn->c_state = C_ERROR_CONN;
47299723SSiddheshwar.Mahesh@Sun.COM 			mutex_exit(&conn->c_lock);
47309723SSiddheshwar.Mahesh@Sun.COM 
47319723SSiddheshwar.Mahesh@Sun.COM 			rib_close_a_channel(conn);
47329723SSiddheshwar.Mahesh@Sun.COM 
47339723SSiddheshwar.Mahesh@Sun.COM 			mutex_enter(&conn->c_lock);
47349723SSiddheshwar.Mahesh@Sun.COM 			conn->c_flags &= ~C_CLOSE_PENDING;
47359723SSiddheshwar.Mahesh@Sun.COM 			/* Signal a pending rib_disconnect_channel() */
47369723SSiddheshwar.Mahesh@Sun.COM 			cv_signal(&conn->c_cv);
47370Sstevel@tonic-gate 		}
473811530SSiddheshwar.Mahesh@Sun.COM next:
47390Sstevel@tonic-gate 		mutex_exit(&conn->c_lock);
47409723SSiddheshwar.Mahesh@Sun.COM 		conn = tmp;
47410Sstevel@tonic-gate 	}
47420Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
47430Sstevel@tonic-gate }
47440Sstevel@tonic-gate 
47450Sstevel@tonic-gate /*
47460Sstevel@tonic-gate  * Frees up all connections that are no longer being referenced
47470Sstevel@tonic-gate  */
47480Sstevel@tonic-gate static void
rib_purge_connlist(rib_conn_list_t * connlist)47490Sstevel@tonic-gate rib_purge_connlist(rib_conn_list_t *connlist)
47500Sstevel@tonic-gate {
47510Sstevel@tonic-gate 	CONN 		*conn;
47520Sstevel@tonic-gate 
47530Sstevel@tonic-gate top:
47540Sstevel@tonic-gate 	rw_enter(&connlist->conn_lock, RW_READER);
47550Sstevel@tonic-gate 	conn = connlist->conn_hd;
47560Sstevel@tonic-gate 	while (conn != NULL) {
47570Sstevel@tonic-gate 		mutex_enter(&conn->c_lock);
47580Sstevel@tonic-gate 
47590Sstevel@tonic-gate 		/*
47600Sstevel@tonic-gate 		 * At this point connection is either in ERROR
47610Sstevel@tonic-gate 		 * or DISCONN_PEND state. If in DISCONN_PEND state
47620Sstevel@tonic-gate 		 * then some other thread is culling that connection.
47630Sstevel@tonic-gate 		 * If not and if c_ref is 0, then destroy the connection.
47640Sstevel@tonic-gate 		 */
47650Sstevel@tonic-gate 		if (conn->c_ref == 0 &&
47667387SRobert.Gordon@Sun.COM 		    conn->c_state != C_DISCONN_PEND) {
47670Sstevel@tonic-gate 			/*
47680Sstevel@tonic-gate 			 * Cull the connection
47690Sstevel@tonic-gate 			 */
47700Sstevel@tonic-gate 			conn->c_state = C_DISCONN_PEND;
47710Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
47720Sstevel@tonic-gate 			rw_exit(&connlist->conn_lock);
47730Sstevel@tonic-gate 			(void) rib_disconnect_channel(conn, connlist);
47740Sstevel@tonic-gate 			goto top;
47750Sstevel@tonic-gate 		} else {
47760Sstevel@tonic-gate 			/*
47770Sstevel@tonic-gate 			 * conn disconnect already scheduled or will
47780Sstevel@tonic-gate 			 * happen from conn_release when c_ref drops to 0.
47790Sstevel@tonic-gate 			 */
47800Sstevel@tonic-gate 			mutex_exit(&conn->c_lock);
47810Sstevel@tonic-gate 		}
47820Sstevel@tonic-gate 		conn = conn->c_next;
47830Sstevel@tonic-gate 	}
47840Sstevel@tonic-gate 	rw_exit(&connlist->conn_lock);
47850Sstevel@tonic-gate 
47860Sstevel@tonic-gate 	/*
47870Sstevel@tonic-gate 	 * At this point, only connections with c_ref != 0 are on the list
47880Sstevel@tonic-gate 	 */
47890Sstevel@tonic-gate }
47900Sstevel@tonic-gate 
47910Sstevel@tonic-gate /*
47929723SSiddheshwar.Mahesh@Sun.COM  * Free all the HCA resources and close
47939723SSiddheshwar.Mahesh@Sun.COM  * the hca.
47949723SSiddheshwar.Mahesh@Sun.COM  */
47959723SSiddheshwar.Mahesh@Sun.COM 
47969723SSiddheshwar.Mahesh@Sun.COM static void
rib_free_hca(rib_hca_t * hca)47979723SSiddheshwar.Mahesh@Sun.COM rib_free_hca(rib_hca_t *hca)
47989723SSiddheshwar.Mahesh@Sun.COM {
47999723SSiddheshwar.Mahesh@Sun.COM 	(void) ibt_free_cq(hca->clnt_rcq->rib_cq_hdl);
48009723SSiddheshwar.Mahesh@Sun.COM 	(void) ibt_free_cq(hca->clnt_scq->rib_cq_hdl);
48019723SSiddheshwar.Mahesh@Sun.COM 	(void) ibt_free_cq(hca->svc_rcq->rib_cq_hdl);
48029723SSiddheshwar.Mahesh@Sun.COM 	(void) ibt_free_cq(hca->svc_scq->rib_cq_hdl);
48039723SSiddheshwar.Mahesh@Sun.COM 
48049723SSiddheshwar.Mahesh@Sun.COM 	kmem_free(hca->clnt_rcq, sizeof (rib_cq_t));
48059723SSiddheshwar.Mahesh@Sun.COM 	kmem_free(hca->clnt_scq, sizeof (rib_cq_t));
48069723SSiddheshwar.Mahesh@Sun.COM 	kmem_free(hca->svc_rcq, sizeof (rib_cq_t));
48079723SSiddheshwar.Mahesh@Sun.COM 	kmem_free(hca->svc_scq, sizeof (rib_cq_t));
48089723SSiddheshwar.Mahesh@Sun.COM 
48099723SSiddheshwar.Mahesh@Sun.COM 	rib_rbufpool_destroy(hca, RECV_BUFFER);
48109723SSiddheshwar.Mahesh@Sun.COM 	rib_rbufpool_destroy(hca, SEND_BUFFER);
48119723SSiddheshwar.Mahesh@Sun.COM 	rib_destroy_cache(hca);
48129723SSiddheshwar.Mahesh@Sun.COM 	if (rib_mod.rdma_count == 0)
481311387SSurya.Prakki@Sun.COM 		(void) rdma_unregister_mod(&rib_mod);
48149723SSiddheshwar.Mahesh@Sun.COM 	(void) ibt_free_pd(hca->hca_hdl, hca->pd_hdl);
48159723SSiddheshwar.Mahesh@Sun.COM 	(void) ibt_close_hca(hca->hca_hdl);
48169723SSiddheshwar.Mahesh@Sun.COM 	hca->hca_hdl = NULL;
48179723SSiddheshwar.Mahesh@Sun.COM }
48189723SSiddheshwar.Mahesh@Sun.COM 
48199733SFaramarz.Jalalian@Sun.COM 
48200Sstevel@tonic-gate static void
rib_stop_hca_services(rib_hca_t * hca)48219733SFaramarz.Jalalian@Sun.COM rib_stop_hca_services(rib_hca_t *hca)
48220Sstevel@tonic-gate {
48230Sstevel@tonic-gate 	rib_stop_services(hca);
48240Sstevel@tonic-gate 	rib_close_channels(&hca->cl_conn_list);
48250Sstevel@tonic-gate 	rib_close_channels(&hca->srv_conn_list);
48268695SRajkumar.Sivaprakasam@Sun.COM 
48278695SRajkumar.Sivaprakasam@Sun.COM 	rib_purge_connlist(&hca->cl_conn_list);
48288695SRajkumar.Sivaprakasam@Sun.COM 	rib_purge_connlist(&hca->srv_conn_list);
48298695SRajkumar.Sivaprakasam@Sun.COM 
48309733SFaramarz.Jalalian@Sun.COM 	if ((rib_stat->hcas_list == NULL) && stats_enabled) {
48318695SRajkumar.Sivaprakasam@Sun.COM 		kstat_delete_byname_zone("unix", 0, "rpcib_cache",
48328695SRajkumar.Sivaprakasam@Sun.COM 		    GLOBAL_ZONEID);
48339733SFaramarz.Jalalian@Sun.COM 		stats_enabled = FALSE;
48348695SRajkumar.Sivaprakasam@Sun.COM 	}
48350Sstevel@tonic-gate 
48360Sstevel@tonic-gate 	rw_enter(&hca->srv_conn_list.conn_lock, RW_READER);
48370Sstevel@tonic-gate 	rw_enter(&hca->cl_conn_list.conn_lock, RW_READER);
48380Sstevel@tonic-gate 	if (hca->srv_conn_list.conn_hd == NULL &&
48397387SRobert.Gordon@Sun.COM 	    hca->cl_conn_list.conn_hd == NULL) {
48400Sstevel@tonic-gate 		/*
48410Sstevel@tonic-gate 		 * conn_lists are NULL, so destroy
48420Sstevel@tonic-gate 		 * buffers, close hca and be done.
48430Sstevel@tonic-gate 		 */
48449723SSiddheshwar.Mahesh@Sun.COM 		rib_free_hca(hca);
48450Sstevel@tonic-gate 	}
48460Sstevel@tonic-gate 	rw_exit(&hca->cl_conn_list.conn_lock);
48470Sstevel@tonic-gate 	rw_exit(&hca->srv_conn_list.conn_lock);
48480Sstevel@tonic-gate 
48490Sstevel@tonic-gate 	if (hca->hca_hdl != NULL) {
48500Sstevel@tonic-gate 		mutex_enter(&hca->inuse_lock);
48510Sstevel@tonic-gate 		while (hca->inuse)
48520Sstevel@tonic-gate 			cv_wait(&hca->cb_cv, &hca->inuse_lock);
48530Sstevel@tonic-gate 		mutex_exit(&hca->inuse_lock);
48548695SRajkumar.Sivaprakasam@Sun.COM 
48559723SSiddheshwar.Mahesh@Sun.COM 		rib_free_hca(hca);
48569723SSiddheshwar.Mahesh@Sun.COM 	}
48579733SFaramarz.Jalalian@Sun.COM 	rw_destroy(&hca->bound_services_lock);
48589723SSiddheshwar.Mahesh@Sun.COM 
48599723SSiddheshwar.Mahesh@Sun.COM 	if (hca->cleanup_helper != NULL) {
48609723SSiddheshwar.Mahesh@Sun.COM 		ddi_taskq_destroy(hca->cleanup_helper);
48619723SSiddheshwar.Mahesh@Sun.COM 		hca->cleanup_helper = NULL;
48620Sstevel@tonic-gate 	}
48630Sstevel@tonic-gate }
48647387SRobert.Gordon@Sun.COM 
48659733SFaramarz.Jalalian@Sun.COM /*
48669733SFaramarz.Jalalian@Sun.COM  * Cleans and closes up all uses of the HCA
48679733SFaramarz.Jalalian@Sun.COM  */
48689733SFaramarz.Jalalian@Sun.COM static void
rib_detach_hca(ibt_hca_hdl_t hca_hdl)486911530SSiddheshwar.Mahesh@Sun.COM rib_detach_hca(ibt_hca_hdl_t hca_hdl)
48709733SFaramarz.Jalalian@Sun.COM {
487111530SSiddheshwar.Mahesh@Sun.COM 	rib_hca_t *hca = NULL;
48729733SFaramarz.Jalalian@Sun.COM 	rib_hca_t **hcap;
48739733SFaramarz.Jalalian@Sun.COM 
487411530SSiddheshwar.Mahesh@Sun.COM 	rw_enter(&rib_stat->hcas_list_lock, RW_WRITER);
487511530SSiddheshwar.Mahesh@Sun.COM 	for (hcap = &rib_stat->hcas_list; *hcap; hcap = &(*hcap)->next) {
487611530SSiddheshwar.Mahesh@Sun.COM 		hca = *hcap;
487711530SSiddheshwar.Mahesh@Sun.COM 		rw_enter(&hca->state_lock, RW_WRITER);
487811530SSiddheshwar.Mahesh@Sun.COM 		if (hca->hca_hdl == hca_hdl) {
487911530SSiddheshwar.Mahesh@Sun.COM 			/*
488011530SSiddheshwar.Mahesh@Sun.COM 			 * Mark as detached and remove from
488111530SSiddheshwar.Mahesh@Sun.COM 			 * hca list.
488211530SSiddheshwar.Mahesh@Sun.COM 			 */
488311530SSiddheshwar.Mahesh@Sun.COM 			hca->state = HCA_DETACHED;
488411530SSiddheshwar.Mahesh@Sun.COM 			*hcap = hca->next;
488511530SSiddheshwar.Mahesh@Sun.COM 			rib_stat->nhca_inited--;
488611530SSiddheshwar.Mahesh@Sun.COM 			rib_mod.rdma_count--;
488711530SSiddheshwar.Mahesh@Sun.COM 			rw_exit(&hca->state_lock);
488811530SSiddheshwar.Mahesh@Sun.COM 			break;
488911530SSiddheshwar.Mahesh@Sun.COM 		}
489011530SSiddheshwar.Mahesh@Sun.COM 		rw_exit(&hca->state_lock);
489111530SSiddheshwar.Mahesh@Sun.COM 	}
489211530SSiddheshwar.Mahesh@Sun.COM 	rw_exit(&rib_stat->hcas_list_lock);
489311530SSiddheshwar.Mahesh@Sun.COM 
489411530SSiddheshwar.Mahesh@Sun.COM 	if (hca == NULL)
489511530SSiddheshwar.Mahesh@Sun.COM 		return;
489611530SSiddheshwar.Mahesh@Sun.COM 	ASSERT(hca->hca_hdl == hca_hdl);
489711530SSiddheshwar.Mahesh@Sun.COM 
48989733SFaramarz.Jalalian@Sun.COM 	/*
48999733SFaramarz.Jalalian@Sun.COM 	 * Stop all services on the HCA
49009733SFaramarz.Jalalian@Sun.COM 	 * Go through cl_conn_list and close all rc_channels
49019733SFaramarz.Jalalian@Sun.COM 	 * Go through svr_conn_list and close all rc_channels
49029733SFaramarz.Jalalian@Sun.COM 	 * Free connections whose c_ref has dropped to 0
49039733SFaramarz.Jalalian@Sun.COM 	 * Destroy all CQs
49049733SFaramarz.Jalalian@Sun.COM 	 * Deregister and released all buffer pool memory after all
49059733SFaramarz.Jalalian@Sun.COM 	 * connections are destroyed
49069733SFaramarz.Jalalian@Sun.COM 	 * Free the protection domain
49079733SFaramarz.Jalalian@Sun.COM 	 * ibt_close_hca()
49089733SFaramarz.Jalalian@Sun.COM 	 */
49099733SFaramarz.Jalalian@Sun.COM 	rib_stop_hca_services(hca);
49109733SFaramarz.Jalalian@Sun.COM 
49119733SFaramarz.Jalalian@Sun.COM 	kmem_free(hca, sizeof (*hca));
49129733SFaramarz.Jalalian@Sun.COM }
49139733SFaramarz.Jalalian@Sun.COM 
49147387SRobert.Gordon@Sun.COM static void
rib_server_side_cache_reclaim(void * argp)49157387SRobert.Gordon@Sun.COM rib_server_side_cache_reclaim(void *argp)
49167387SRobert.Gordon@Sun.COM {
49177387SRobert.Gordon@Sun.COM 	cache_avl_struct_t    *rcas;
49187387SRobert.Gordon@Sun.COM 	rib_lrc_entry_t		*rb;
49197387SRobert.Gordon@Sun.COM 	rib_hca_t *hca = (rib_hca_t *)argp;
49207387SRobert.Gordon@Sun.COM 
49217387SRobert.Gordon@Sun.COM 	rw_enter(&hca->avl_rw_lock, RW_WRITER);
49227387SRobert.Gordon@Sun.COM 	rcas = avl_first(&hca->avl_tree);
49237387SRobert.Gordon@Sun.COM 	if (rcas != NULL)
49247387SRobert.Gordon@Sun.COM 		avl_remove(&hca->avl_tree, rcas);
49257387SRobert.Gordon@Sun.COM 
49267387SRobert.Gordon@Sun.COM 	while (rcas != NULL) {
49277387SRobert.Gordon@Sun.COM 		while (rcas->r.forw != &rcas->r) {
49287387SRobert.Gordon@Sun.COM 			rcas->elements--;
49297387SRobert.Gordon@Sun.COM 			rb = rcas->r.forw;
49307387SRobert.Gordon@Sun.COM 			remque(rb);
49317387SRobert.Gordon@Sun.COM 			if (rb->registered)
49327387SRobert.Gordon@Sun.COM 				(void) rib_deregistermem_via_hca(hca,
49337387SRobert.Gordon@Sun.COM 				    rb->lrc_buf, rb->lrc_mhandle);
49349733SFaramarz.Jalalian@Sun.COM 
49359733SFaramarz.Jalalian@Sun.COM 			hca->cache_allocation -= rb->lrc_len;
49367387SRobert.Gordon@Sun.COM 			kmem_free(rb->lrc_buf, rb->lrc_len);
49377387SRobert.Gordon@Sun.COM 			kmem_free(rb, sizeof (rib_lrc_entry_t));
49387387SRobert.Gordon@Sun.COM 		}
49397387SRobert.Gordon@Sun.COM 		mutex_destroy(&rcas->node_lock);
49407387SRobert.Gordon@Sun.COM 		kmem_cache_free(hca->server_side_cache, rcas);
49417387SRobert.Gordon@Sun.COM 		rcas = avl_first(&hca->avl_tree);
49427387SRobert.Gordon@Sun.COM 		if (rcas != NULL)
49437387SRobert.Gordon@Sun.COM 			avl_remove(&hca->avl_tree, rcas);
49447387SRobert.Gordon@Sun.COM 	}
49457387SRobert.Gordon@Sun.COM 	rw_exit(&hca->avl_rw_lock);
49467387SRobert.Gordon@Sun.COM }
49477387SRobert.Gordon@Sun.COM 
49487387SRobert.Gordon@Sun.COM static void
rib_server_side_cache_cleanup(void * argp)49497387SRobert.Gordon@Sun.COM rib_server_side_cache_cleanup(void *argp)
49507387SRobert.Gordon@Sun.COM {
49517387SRobert.Gordon@Sun.COM 	cache_avl_struct_t    *rcas;
49527387SRobert.Gordon@Sun.COM 	rib_lrc_entry_t		*rb;
49537387SRobert.Gordon@Sun.COM 	rib_hca_t *hca = (rib_hca_t *)argp;
49547387SRobert.Gordon@Sun.COM 
49559733SFaramarz.Jalalian@Sun.COM 	mutex_enter(&hca->cache_allocation_lock);
49569733SFaramarz.Jalalian@Sun.COM 	if (hca->cache_allocation < cache_limit) {
49579733SFaramarz.Jalalian@Sun.COM 		mutex_exit(&hca->cache_allocation_lock);
49587387SRobert.Gordon@Sun.COM 		return;
49597387SRobert.Gordon@Sun.COM 	}
49609733SFaramarz.Jalalian@Sun.COM 	mutex_exit(&hca->cache_allocation_lock);
49617387SRobert.Gordon@Sun.COM 
49627387SRobert.Gordon@Sun.COM 	rw_enter(&hca->avl_rw_lock, RW_WRITER);
49637387SRobert.Gordon@Sun.COM 	rcas = avl_last(&hca->avl_tree);
49647387SRobert.Gordon@Sun.COM 	if (rcas != NULL)
49657387SRobert.Gordon@Sun.COM 		avl_remove(&hca->avl_tree, rcas);
49667387SRobert.Gordon@Sun.COM 
49677387SRobert.Gordon@Sun.COM 	while (rcas != NULL) {
49687387SRobert.Gordon@Sun.COM 		while (rcas->r.forw != &rcas->r) {
49697387SRobert.Gordon@Sun.COM 			rcas->elements--;
49707387SRobert.Gordon@Sun.COM 			rb = rcas->r.forw;
49717387SRobert.Gordon@Sun.COM 			remque(rb);
49727387SRobert.Gordon@Sun.COM 			if (rb->registered)
49737387SRobert.Gordon@Sun.COM 				(void) rib_deregistermem_via_hca(hca,
49747387SRobert.Gordon@Sun.COM 				    rb->lrc_buf, rb->lrc_mhandle);
49759733SFaramarz.Jalalian@Sun.COM 
49769733SFaramarz.Jalalian@Sun.COM 			hca->cache_allocation -= rb->lrc_len;
49779733SFaramarz.Jalalian@Sun.COM 
49787387SRobert.Gordon@Sun.COM 			kmem_free(rb->lrc_buf, rb->lrc_len);
49797387SRobert.Gordon@Sun.COM 			kmem_free(rb, sizeof (rib_lrc_entry_t));
49807387SRobert.Gordon@Sun.COM 		}
49817387SRobert.Gordon@Sun.COM 		mutex_destroy(&rcas->node_lock);
49828695SRajkumar.Sivaprakasam@Sun.COM 		if (hca->server_side_cache) {
49838695SRajkumar.Sivaprakasam@Sun.COM 			kmem_cache_free(hca->server_side_cache, rcas);
49848695SRajkumar.Sivaprakasam@Sun.COM 		}
49859733SFaramarz.Jalalian@Sun.COM 
49869733SFaramarz.Jalalian@Sun.COM 		if (hca->cache_allocation < cache_limit) {
49877387SRobert.Gordon@Sun.COM 			rw_exit(&hca->avl_rw_lock);
49887387SRobert.Gordon@Sun.COM 			return;
49897387SRobert.Gordon@Sun.COM 		}
49907387SRobert.Gordon@Sun.COM 
49917387SRobert.Gordon@Sun.COM 		rcas = avl_last(&hca->avl_tree);
49927387SRobert.Gordon@Sun.COM 		if (rcas != NULL)
49937387SRobert.Gordon@Sun.COM 			avl_remove(&hca->avl_tree, rcas);
49947387SRobert.Gordon@Sun.COM 	}
49957387SRobert.Gordon@Sun.COM 	rw_exit(&hca->avl_rw_lock);
49967387SRobert.Gordon@Sun.COM }
49977387SRobert.Gordon@Sun.COM 
49987387SRobert.Gordon@Sun.COM static int
avl_compare(const void * t1,const void * t2)49997387SRobert.Gordon@Sun.COM avl_compare(const void *t1, const void *t2)
50007387SRobert.Gordon@Sun.COM {
50017387SRobert.Gordon@Sun.COM 	if (((cache_avl_struct_t *)t1)->len == ((cache_avl_struct_t *)t2)->len)
50027387SRobert.Gordon@Sun.COM 		return (0);
50037387SRobert.Gordon@Sun.COM 
50047387SRobert.Gordon@Sun.COM 	if (((cache_avl_struct_t *)t1)->len < ((cache_avl_struct_t *)t2)->len)
50057387SRobert.Gordon@Sun.COM 		return (-1);
50067387SRobert.Gordon@Sun.COM 
50077387SRobert.Gordon@Sun.COM 	return (1);
50087387SRobert.Gordon@Sun.COM }
50097387SRobert.Gordon@Sun.COM 
50107387SRobert.Gordon@Sun.COM static void
rib_destroy_cache(rib_hca_t * hca)50117387SRobert.Gordon@Sun.COM rib_destroy_cache(rib_hca_t *hca)
50127387SRobert.Gordon@Sun.COM {
50138695SRajkumar.Sivaprakasam@Sun.COM 	if (hca->avl_init) {
50148695SRajkumar.Sivaprakasam@Sun.COM 		rib_server_side_cache_reclaim((void *)hca);
50158695SRajkumar.Sivaprakasam@Sun.COM 		if (hca->server_side_cache) {
50168695SRajkumar.Sivaprakasam@Sun.COM 			kmem_cache_destroy(hca->server_side_cache);
50178695SRajkumar.Sivaprakasam@Sun.COM 			hca->server_side_cache = NULL;
50188695SRajkumar.Sivaprakasam@Sun.COM 		}
50197387SRobert.Gordon@Sun.COM 		avl_destroy(&hca->avl_tree);
50209733SFaramarz.Jalalian@Sun.COM 		mutex_destroy(&hca->cache_allocation_lock);
50217387SRobert.Gordon@Sun.COM 		rw_destroy(&hca->avl_rw_lock);
50227387SRobert.Gordon@Sun.COM 	}
50237387SRobert.Gordon@Sun.COM 	hca->avl_init = FALSE;
50247387SRobert.Gordon@Sun.COM }
50257387SRobert.Gordon@Sun.COM 
50267387SRobert.Gordon@Sun.COM static void
rib_force_cleanup(void * hca)50277387SRobert.Gordon@Sun.COM rib_force_cleanup(void *hca)
50287387SRobert.Gordon@Sun.COM {
50299723SSiddheshwar.Mahesh@Sun.COM 	if (((rib_hca_t *)hca)->cleanup_helper != NULL)
50307387SRobert.Gordon@Sun.COM 		(void) ddi_taskq_dispatch(
50319723SSiddheshwar.Mahesh@Sun.COM 		    ((rib_hca_t *)hca)->cleanup_helper,
50327387SRobert.Gordon@Sun.COM 		    rib_server_side_cache_cleanup,
50337387SRobert.Gordon@Sun.COM 		    (void *)hca, DDI_NOSLEEP);
50347387SRobert.Gordon@Sun.COM }
50357387SRobert.Gordon@Sun.COM 
50367387SRobert.Gordon@Sun.COM static rib_lrc_entry_t *
rib_get_cache_buf(CONN * conn,uint32_t len)50377387SRobert.Gordon@Sun.COM rib_get_cache_buf(CONN *conn, uint32_t len)
50387387SRobert.Gordon@Sun.COM {
50397387SRobert.Gordon@Sun.COM 	cache_avl_struct_t	cas, *rcas;
50407387SRobert.Gordon@Sun.COM 	rib_hca_t	*hca = (ctoqp(conn))->hca;
50417387SRobert.Gordon@Sun.COM 	rib_lrc_entry_t *reply_buf;
50427387SRobert.Gordon@Sun.COM 	avl_index_t where = NULL;
50437387SRobert.Gordon@Sun.COM 	uint64_t c_alloc = 0;
50447387SRobert.Gordon@Sun.COM 
50457387SRobert.Gordon@Sun.COM 	if (!hca->avl_init)
50467387SRobert.Gordon@Sun.COM 		goto  error_alloc;
50477387SRobert.Gordon@Sun.COM 
50487387SRobert.Gordon@Sun.COM 	cas.len = len;
50497387SRobert.Gordon@Sun.COM 
50507387SRobert.Gordon@Sun.COM 	rw_enter(&hca->avl_rw_lock, RW_READER);
50517387SRobert.Gordon@Sun.COM 
50529733SFaramarz.Jalalian@Sun.COM 	mutex_enter(&hca->cache_allocation_lock);
50539733SFaramarz.Jalalian@Sun.COM 	c_alloc = hca->cache_allocation;
50549733SFaramarz.Jalalian@Sun.COM 	mutex_exit(&hca->cache_allocation_lock);
50557387SRobert.Gordon@Sun.COM 
50567387SRobert.Gordon@Sun.COM 	if ((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree, &cas,
50577387SRobert.Gordon@Sun.COM 	    &where)) == NULL) {
50587387SRobert.Gordon@Sun.COM 		/* Am I above the cache limit */
50597387SRobert.Gordon@Sun.COM 		if ((c_alloc + len) >= cache_limit) {
50607387SRobert.Gordon@Sun.COM 			rib_force_cleanup((void *)hca);
50617387SRobert.Gordon@Sun.COM 			rw_exit(&hca->avl_rw_lock);
50629733SFaramarz.Jalalian@Sun.COM 			mutex_enter(&hca->cache_allocation_lock);
50639733SFaramarz.Jalalian@Sun.COM 			hca->cache_misses_above_the_limit ++;
50649733SFaramarz.Jalalian@Sun.COM 			mutex_exit(&hca->cache_allocation_lock);
50657387SRobert.Gordon@Sun.COM 
50667387SRobert.Gordon@Sun.COM 			/* Allocate and register the buffer directly */
50677387SRobert.Gordon@Sun.COM 			goto error_alloc;
50687387SRobert.Gordon@Sun.COM 		}
50697387SRobert.Gordon@Sun.COM 
50707387SRobert.Gordon@Sun.COM 		rw_exit(&hca->avl_rw_lock);
50717387SRobert.Gordon@Sun.COM 		rw_enter(&hca->avl_rw_lock, RW_WRITER);
50727387SRobert.Gordon@Sun.COM 
50737387SRobert.Gordon@Sun.COM 		/* Recheck to make sure no other thread added the entry in */
50747387SRobert.Gordon@Sun.COM 		if ((rcas = (cache_avl_struct_t *)avl_find(&hca->avl_tree,
50757387SRobert.Gordon@Sun.COM 		    &cas, &where)) == NULL) {
50767387SRobert.Gordon@Sun.COM 			/* Allocate an avl tree entry */
50777387SRobert.Gordon@Sun.COM 			rcas = (cache_avl_struct_t *)
50787387SRobert.Gordon@Sun.COM 			    kmem_cache_alloc(hca->server_side_cache, KM_SLEEP);
50797387SRobert.Gordon@Sun.COM 
50807387SRobert.Gordon@Sun.COM 			bzero(rcas, sizeof (cache_avl_struct_t));
50817387SRobert.Gordon@Sun.COM 			rcas->elements = 0;
50827387SRobert.Gordon@Sun.COM 			rcas->r.forw = &rcas->r;
50837387SRobert.Gordon@Sun.COM 			rcas->r.back = &rcas->r;
50847387SRobert.Gordon@Sun.COM 			rcas->len = len;
50857387SRobert.Gordon@Sun.COM 			mutex_init(&rcas->node_lock, NULL, MUTEX_DEFAULT, NULL);
50867387SRobert.Gordon@Sun.COM 			avl_insert(&hca->avl_tree, rcas, where);
50877387SRobert.Gordon@Sun.COM 		}
50887387SRobert.Gordon@Sun.COM 	}
50897387SRobert.Gordon@Sun.COM 
50907387SRobert.Gordon@Sun.COM 	mutex_enter(&rcas->node_lock);
50917387SRobert.Gordon@Sun.COM 
50927387SRobert.Gordon@Sun.COM 	if (rcas->r.forw != &rcas->r && rcas->elements > 0) {
50937387SRobert.Gordon@Sun.COM 		reply_buf = rcas->r.forw;
50947387SRobert.Gordon@Sun.COM 		remque(reply_buf);
50957387SRobert.Gordon@Sun.COM 		rcas->elements--;
50967387SRobert.Gordon@Sun.COM 		mutex_exit(&rcas->node_lock);
50977387SRobert.Gordon@Sun.COM 		rw_exit(&hca->avl_rw_lock);
50989733SFaramarz.Jalalian@Sun.COM 
50999733SFaramarz.Jalalian@Sun.COM 		mutex_enter(&hca->cache_allocation_lock);
51009733SFaramarz.Jalalian@Sun.COM 		hca->cache_hits++;
51019733SFaramarz.Jalalian@Sun.COM 		hca->cache_allocation -= len;
51029733SFaramarz.Jalalian@Sun.COM 		mutex_exit(&hca->cache_allocation_lock);
51037387SRobert.Gordon@Sun.COM 	} else {
51047387SRobert.Gordon@Sun.COM 		/* Am I above the cache limit */
51057387SRobert.Gordon@Sun.COM 		mutex_exit(&rcas->node_lock);
51067387SRobert.Gordon@Sun.COM 		if ((c_alloc + len) >= cache_limit) {
51077387SRobert.Gordon@Sun.COM 			rib_force_cleanup((void *)hca);
51087387SRobert.Gordon@Sun.COM 			rw_exit(&hca->avl_rw_lock);
51099733SFaramarz.Jalalian@Sun.COM 
51109733SFaramarz.Jalalian@Sun.COM 			mutex_enter(&hca->cache_allocation_lock);
51119733SFaramarz.Jalalian@Sun.COM 			hca->cache_misses_above_the_limit++;
51129733SFaramarz.Jalalian@Sun.COM 			mutex_exit(&hca->cache_allocation_lock);
51137387SRobert.Gordon@Sun.COM 			/* Allocate and register the buffer directly */
51147387SRobert.Gordon@Sun.COM 			goto error_alloc;
51157387SRobert.Gordon@Sun.COM 		}
51167387SRobert.Gordon@Sun.COM 		rw_exit(&hca->avl_rw_lock);
51179733SFaramarz.Jalalian@Sun.COM 		mutex_enter(&hca->cache_allocation_lock);
51189733SFaramarz.Jalalian@Sun.COM 		hca->cache_misses++;
51199733SFaramarz.Jalalian@Sun.COM 		mutex_exit(&hca->cache_allocation_lock);
51207387SRobert.Gordon@Sun.COM 		/* Allocate a reply_buf entry */
51217387SRobert.Gordon@Sun.COM 		reply_buf = (rib_lrc_entry_t *)
51227387SRobert.Gordon@Sun.COM 		    kmem_zalloc(sizeof (rib_lrc_entry_t), KM_SLEEP);
51237387SRobert.Gordon@Sun.COM 		bzero(reply_buf, sizeof (rib_lrc_entry_t));
51247387SRobert.Gordon@Sun.COM 		reply_buf->lrc_buf  = kmem_alloc(len, KM_SLEEP);
51257387SRobert.Gordon@Sun.COM 		reply_buf->lrc_len  = len;
51267387SRobert.Gordon@Sun.COM 		reply_buf->registered = FALSE;
51277387SRobert.Gordon@Sun.COM 		reply_buf->avl_node = (void *)rcas;
51287387SRobert.Gordon@Sun.COM 	}
51297387SRobert.Gordon@Sun.COM 
51307387SRobert.Gordon@Sun.COM 	return (reply_buf);
51317387SRobert.Gordon@Sun.COM 
51327387SRobert.Gordon@Sun.COM error_alloc:
51337387SRobert.Gordon@Sun.COM 	reply_buf = (rib_lrc_entry_t *)
51347387SRobert.Gordon@Sun.COM 	    kmem_zalloc(sizeof (rib_lrc_entry_t), KM_SLEEP);
51357387SRobert.Gordon@Sun.COM 	bzero(reply_buf, sizeof (rib_lrc_entry_t));
51367387SRobert.Gordon@Sun.COM 	reply_buf->lrc_buf = kmem_alloc(len, KM_SLEEP);
51377387SRobert.Gordon@Sun.COM 	reply_buf->lrc_len = len;
51387387SRobert.Gordon@Sun.COM 	reply_buf->registered = FALSE;
51397387SRobert.Gordon@Sun.COM 	reply_buf->avl_node = NULL;
51407387SRobert.Gordon@Sun.COM 
51417387SRobert.Gordon@Sun.COM 	return (reply_buf);
51427387SRobert.Gordon@Sun.COM }
51437387SRobert.Gordon@Sun.COM 
51447387SRobert.Gordon@Sun.COM /*
51457387SRobert.Gordon@Sun.COM  * Return a pre-registered back to the cache (without
51467387SRobert.Gordon@Sun.COM  * unregistering the buffer)..
51477387SRobert.Gordon@Sun.COM  */
51487387SRobert.Gordon@Sun.COM 
51497387SRobert.Gordon@Sun.COM static void
rib_free_cache_buf(CONN * conn,rib_lrc_entry_t * reg_buf)51507387SRobert.Gordon@Sun.COM rib_free_cache_buf(CONN *conn, rib_lrc_entry_t *reg_buf)
51517387SRobert.Gordon@Sun.COM {
51527387SRobert.Gordon@Sun.COM 	cache_avl_struct_t    cas, *rcas;
51537387SRobert.Gordon@Sun.COM 	avl_index_t where = NULL;
51547387SRobert.Gordon@Sun.COM 	rib_hca_t	*hca = (ctoqp(conn))->hca;
51557387SRobert.Gordon@Sun.COM 
51567387SRobert.Gordon@Sun.COM 	if (!hca->avl_init)
51577387SRobert.Gordon@Sun.COM 		goto  error_free;
51587387SRobert.Gordon@Sun.COM 
51597387SRobert.Gordon@Sun.COM 	cas.len = reg_buf->lrc_len;
51607387SRobert.Gordon@Sun.COM 	rw_enter(&hca->avl_rw_lock, RW_READER);
51617387SRobert.Gordon@Sun.COM 	if ((rcas = (cache_avl_struct_t *)
51627387SRobert.Gordon@Sun.COM 	    avl_find(&hca->avl_tree, &cas, &where)) == NULL) {
51637387SRobert.Gordon@Sun.COM 		rw_exit(&hca->avl_rw_lock);
51647387SRobert.Gordon@Sun.COM 		goto error_free;
51657387SRobert.Gordon@Sun.COM 	} else {
51667387SRobert.Gordon@Sun.COM 		cas.len = reg_buf->lrc_len;
51677387SRobert.Gordon@Sun.COM 		mutex_enter(&rcas->node_lock);
51687387SRobert.Gordon@Sun.COM 		insque(reg_buf, &rcas->r);
51697387SRobert.Gordon@Sun.COM 		rcas->elements ++;
51707387SRobert.Gordon@Sun.COM 		mutex_exit(&rcas->node_lock);
51717387SRobert.Gordon@Sun.COM 		rw_exit(&hca->avl_rw_lock);
51729733SFaramarz.Jalalian@Sun.COM 		mutex_enter(&hca->cache_allocation_lock);
51739733SFaramarz.Jalalian@Sun.COM 		hca->cache_allocation += cas.len;
51749733SFaramarz.Jalalian@Sun.COM 		mutex_exit(&hca->cache_allocation_lock);
51757387SRobert.Gordon@Sun.COM 	}
51767387SRobert.Gordon@Sun.COM 
51777387SRobert.Gordon@Sun.COM 	return;
51787387SRobert.Gordon@Sun.COM 
51797387SRobert.Gordon@Sun.COM error_free:
51807387SRobert.Gordon@Sun.COM 
51817387SRobert.Gordon@Sun.COM 	if (reg_buf->registered)
51827387SRobert.Gordon@Sun.COM 		(void) rib_deregistermem_via_hca(hca,
51837387SRobert.Gordon@Sun.COM 		    reg_buf->lrc_buf, reg_buf->lrc_mhandle);
51847387SRobert.Gordon@Sun.COM 	kmem_free(reg_buf->lrc_buf, reg_buf->lrc_len);
51857387SRobert.Gordon@Sun.COM 	kmem_free(reg_buf, sizeof (rib_lrc_entry_t));
51867387SRobert.Gordon@Sun.COM }
51877387SRobert.Gordon@Sun.COM 
51887387SRobert.Gordon@Sun.COM static rdma_stat
rib_registermem_via_hca(rib_hca_t * hca,caddr_t adsp,caddr_t buf,uint_t buflen,struct mrc * buf_handle)51897387SRobert.Gordon@Sun.COM rib_registermem_via_hca(rib_hca_t *hca, caddr_t adsp, caddr_t buf,
51907387SRobert.Gordon@Sun.COM 	uint_t buflen, struct mrc *buf_handle)
51917387SRobert.Gordon@Sun.COM {
51927387SRobert.Gordon@Sun.COM 	ibt_mr_hdl_t	mr_hdl = NULL;	/* memory region handle */
51937387SRobert.Gordon@Sun.COM 	ibt_mr_desc_t	mr_desc;	/* vaddr, lkey, rkey */
51947387SRobert.Gordon@Sun.COM 	rdma_stat	status;
51957387SRobert.Gordon@Sun.COM 
51967387SRobert.Gordon@Sun.COM 
51977387SRobert.Gordon@Sun.COM 	/*
51987387SRobert.Gordon@Sun.COM 	 * Note: ALL buffer pools use the same memory type RDMARW.
51997387SRobert.Gordon@Sun.COM 	 */
52007387SRobert.Gordon@Sun.COM 	status = rib_reg_mem(hca, adsp, buf, buflen, 0, &mr_hdl, &mr_desc);
52017387SRobert.Gordon@Sun.COM 	if (status == RDMA_SUCCESS) {
52027387SRobert.Gordon@Sun.COM 		buf_handle->mrc_linfo = (uint64_t)(uintptr_t)mr_hdl;
52037387SRobert.Gordon@Sun.COM 		buf_handle->mrc_lmr = (uint32_t)mr_desc.md_lkey;
52047387SRobert.Gordon@Sun.COM 		buf_handle->mrc_rmr = (uint32_t)mr_desc.md_rkey;
52057387SRobert.Gordon@Sun.COM 	} else {
52067387SRobert.Gordon@Sun.COM 		buf_handle->mrc_linfo = NULL;
52077387SRobert.Gordon@Sun.COM 		buf_handle->mrc_lmr = 0;
52087387SRobert.Gordon@Sun.COM 		buf_handle->mrc_rmr = 0;
52097387SRobert.Gordon@Sun.COM 	}
52107387SRobert.Gordon@Sun.COM 	return (status);
52117387SRobert.Gordon@Sun.COM }
52127387SRobert.Gordon@Sun.COM 
52137387SRobert.Gordon@Sun.COM /* ARGSUSED */
52147387SRobert.Gordon@Sun.COM static rdma_stat
rib_deregistermemsync_via_hca(rib_hca_t * hca,caddr_t buf,struct mrc buf_handle,RIB_SYNCMEM_HANDLE sync_handle)52157387SRobert.Gordon@Sun.COM rib_deregistermemsync_via_hca(rib_hca_t *hca, caddr_t buf,
52167387SRobert.Gordon@Sun.COM     struct mrc buf_handle, RIB_SYNCMEM_HANDLE sync_handle)
52177387SRobert.Gordon@Sun.COM {
52187387SRobert.Gordon@Sun.COM 
52197387SRobert.Gordon@Sun.COM 	(void) rib_deregistermem_via_hca(hca, buf, buf_handle);
52207387SRobert.Gordon@Sun.COM 	return (RDMA_SUCCESS);
52217387SRobert.Gordon@Sun.COM }
52227387SRobert.Gordon@Sun.COM 
52237387SRobert.Gordon@Sun.COM /* ARGSUSED */
52247387SRobert.Gordon@Sun.COM static rdma_stat
rib_deregistermem_via_hca(rib_hca_t * hca,caddr_t buf,struct mrc buf_handle)52257387SRobert.Gordon@Sun.COM rib_deregistermem_via_hca(rib_hca_t *hca, caddr_t buf, struct mrc buf_handle)
52267387SRobert.Gordon@Sun.COM {
52277387SRobert.Gordon@Sun.COM 
52287387SRobert.Gordon@Sun.COM 	(void) ibt_deregister_mr(hca->hca_hdl,
52297387SRobert.Gordon@Sun.COM 	    (ibt_mr_hdl_t)(uintptr_t)buf_handle.mrc_linfo);
52307387SRobert.Gordon@Sun.COM 	return (RDMA_SUCCESS);
52317387SRobert.Gordon@Sun.COM }
52327387SRobert.Gordon@Sun.COM 
52337387SRobert.Gordon@Sun.COM /*
52348485SPeter.Memishian@Sun.COM  * Check if the IP interface named by `lifrp' is RDMA-capable.
52357387SRobert.Gordon@Sun.COM  */
52368485SPeter.Memishian@Sun.COM static boolean_t
rpcib_rdma_capable_interface(struct lifreq * lifrp)52378485SPeter.Memishian@Sun.COM rpcib_rdma_capable_interface(struct lifreq *lifrp)
52387387SRobert.Gordon@Sun.COM {
52398485SPeter.Memishian@Sun.COM 	char ifname[LIFNAMSIZ];
52408485SPeter.Memishian@Sun.COM 	char *cp;
52418485SPeter.Memishian@Sun.COM 
52428485SPeter.Memishian@Sun.COM 	if (lifrp->lifr_type == IFT_IB)
52438485SPeter.Memishian@Sun.COM 		return (B_TRUE);
52447387SRobert.Gordon@Sun.COM 
52457387SRobert.Gordon@Sun.COM 	/*
52468485SPeter.Memishian@Sun.COM 	 * Strip off the logical interface portion before getting
52478485SPeter.Memishian@Sun.COM 	 * intimate with the name.
52487387SRobert.Gordon@Sun.COM 	 */
52498485SPeter.Memishian@Sun.COM 	(void) strlcpy(ifname, lifrp->lifr_name, LIFNAMSIZ);
52508485SPeter.Memishian@Sun.COM 	if ((cp = strchr(ifname, ':')) != NULL)
52518485SPeter.Memishian@Sun.COM 		*cp = '\0';
52528485SPeter.Memishian@Sun.COM 
52538485SPeter.Memishian@Sun.COM 	return (strcmp("lo0", ifname) == 0);
52547387SRobert.Gordon@Sun.COM }
52557387SRobert.Gordon@Sun.COM 
52567387SRobert.Gordon@Sun.COM static int
rpcib_do_ip_ioctl(int cmd,int len,void * arg)52578485SPeter.Memishian@Sun.COM rpcib_do_ip_ioctl(int cmd, int len, void *arg)
52587387SRobert.Gordon@Sun.COM {
525911185SSean.McEnroe@Sun.COM 	vnode_t *kkvp, *vp;
52607387SRobert.Gordon@Sun.COM 	TIUSER  *tiptr;
52617387SRobert.Gordon@Sun.COM 	struct  strioctl iocb;
52627387SRobert.Gordon@Sun.COM 	k_sigset_t smask;
52637387SRobert.Gordon@Sun.COM 	int	err = 0;
52647387SRobert.Gordon@Sun.COM 
526511185SSean.McEnroe@Sun.COM 	if (lookupname("/dev/udp", UIO_SYSSPACE, FOLLOW, NULLVPP, &kkvp) == 0) {
526611185SSean.McEnroe@Sun.COM 		if (t_kopen(NULL, kkvp->v_rdev, FREAD|FWRITE,
52677387SRobert.Gordon@Sun.COM 		    &tiptr, CRED()) == 0) {
52688485SPeter.Memishian@Sun.COM 			vp = tiptr->fp->f_vnode;
52698485SPeter.Memishian@Sun.COM 		} else {
527011185SSean.McEnroe@Sun.COM 			VN_RELE(kkvp);
52718485SPeter.Memishian@Sun.COM 			return (EPROTO);
52727387SRobert.Gordon@Sun.COM 		}
52737387SRobert.Gordon@Sun.COM 	} else {
52748485SPeter.Memishian@Sun.COM 		return (EPROTO);
52757387SRobert.Gordon@Sun.COM 	}
52767387SRobert.Gordon@Sun.COM 
52777387SRobert.Gordon@Sun.COM 	iocb.ic_cmd = cmd;
52787387SRobert.Gordon@Sun.COM 	iocb.ic_timout = 0;
52797387SRobert.Gordon@Sun.COM 	iocb.ic_len = len;
52808485SPeter.Memishian@Sun.COM 	iocb.ic_dp = (caddr_t)arg;
52817387SRobert.Gordon@Sun.COM 	sigintr(&smask, 0);
52827387SRobert.Gordon@Sun.COM 	err = kstr_ioctl(vp, I_STR, (intptr_t)&iocb);
52837387SRobert.Gordon@Sun.COM 	sigunintr(&smask);
52847387SRobert.Gordon@Sun.COM 	(void) t_kclose(tiptr, 0);
528511185SSean.McEnroe@Sun.COM 	VN_RELE(kkvp);
52867387SRobert.Gordon@Sun.COM 	return (err);
52877387SRobert.Gordon@Sun.COM }
52887387SRobert.Gordon@Sun.COM 
52898485SPeter.Memishian@Sun.COM /*
52908485SPeter.Memishian@Sun.COM  * Issue an SIOCGLIFCONF down to IP and return the result in `lifcp'.
52918485SPeter.Memishian@Sun.COM  * lifcp->lifc_buf is dynamically allocated to be *bufsizep bytes.
52928485SPeter.Memishian@Sun.COM  */
52938485SPeter.Memishian@Sun.COM static int
rpcib_do_lifconf(struct lifconf * lifcp,uint_t * bufsizep)52948485SPeter.Memishian@Sun.COM rpcib_do_lifconf(struct lifconf *lifcp, uint_t *bufsizep)
52958485SPeter.Memishian@Sun.COM {
52968485SPeter.Memishian@Sun.COM 	int err;
52978485SPeter.Memishian@Sun.COM 	struct lifnum lifn;
52988485SPeter.Memishian@Sun.COM 
52998485SPeter.Memishian@Sun.COM 	bzero(&lifn, sizeof (struct lifnum));
53008485SPeter.Memishian@Sun.COM 	lifn.lifn_family = AF_UNSPEC;
53018485SPeter.Memishian@Sun.COM 
53028485SPeter.Memishian@Sun.COM 	err = rpcib_do_ip_ioctl(SIOCGLIFNUM, sizeof (struct lifnum), &lifn);
53038485SPeter.Memishian@Sun.COM 	if (err != 0)
53048485SPeter.Memishian@Sun.COM 		return (err);
53058485SPeter.Memishian@Sun.COM 
53068485SPeter.Memishian@Sun.COM 	/*
53078485SPeter.Memishian@Sun.COM 	 * Pad the interface count to account for additional interfaces that
53088485SPeter.Memishian@Sun.COM 	 * may have been configured between the SIOCGLIFNUM and SIOCGLIFCONF.
53098485SPeter.Memishian@Sun.COM 	 */
53108485SPeter.Memishian@Sun.COM 	lifn.lifn_count += 4;
53118485SPeter.Memishian@Sun.COM 
53128485SPeter.Memishian@Sun.COM 	bzero(lifcp, sizeof (struct lifconf));
53138485SPeter.Memishian@Sun.COM 	lifcp->lifc_family = AF_UNSPEC;
53148485SPeter.Memishian@Sun.COM 	lifcp->lifc_len = *bufsizep = lifn.lifn_count * sizeof (struct lifreq);
53158485SPeter.Memishian@Sun.COM 	lifcp->lifc_buf = kmem_zalloc(*bufsizep, KM_SLEEP);
53168485SPeter.Memishian@Sun.COM 
53178485SPeter.Memishian@Sun.COM 	err = rpcib_do_ip_ioctl(SIOCGLIFCONF, sizeof (struct lifconf), lifcp);
53188485SPeter.Memishian@Sun.COM 	if (err != 0) {
53198485SPeter.Memishian@Sun.COM 		kmem_free(lifcp->lifc_buf, *bufsizep);
53208485SPeter.Memishian@Sun.COM 		return (err);
53217387SRobert.Gordon@Sun.COM 	}
53228485SPeter.Memishian@Sun.COM 	return (0);
53237387SRobert.Gordon@Sun.COM }
53247387SRobert.Gordon@Sun.COM 
53257387SRobert.Gordon@Sun.COM static boolean_t
rpcib_get_ib_addresses(rpcib_ipaddrs_t * addrs4,rpcib_ipaddrs_t * addrs6)53268485SPeter.Memishian@Sun.COM rpcib_get_ib_addresses(rpcib_ipaddrs_t *addrs4, rpcib_ipaddrs_t *addrs6)
53277387SRobert.Gordon@Sun.COM {
53288485SPeter.Memishian@Sun.COM 	uint_t i, nifs;
53298485SPeter.Memishian@Sun.COM 	uint_t bufsize;
53308485SPeter.Memishian@Sun.COM 	struct lifconf lifc;
53318485SPeter.Memishian@Sun.COM 	struct lifreq *lifrp;
53328485SPeter.Memishian@Sun.COM 	struct sockaddr_in *sinp;
53338485SPeter.Memishian@Sun.COM 	struct sockaddr_in6 *sin6p;
53348485SPeter.Memishian@Sun.COM 
53358485SPeter.Memishian@Sun.COM 	bzero(addrs4, sizeof (rpcib_ipaddrs_t));
53368485SPeter.Memishian@Sun.COM 	bzero(addrs6, sizeof (rpcib_ipaddrs_t));
53378485SPeter.Memishian@Sun.COM 
53388485SPeter.Memishian@Sun.COM 	if (rpcib_do_lifconf(&lifc, &bufsize) != 0)
53398485SPeter.Memishian@Sun.COM 		return (B_FALSE);
53408485SPeter.Memishian@Sun.COM 
53418485SPeter.Memishian@Sun.COM 	if ((nifs = lifc.lifc_len / sizeof (struct lifreq)) == 0) {
53428485SPeter.Memishian@Sun.COM 		kmem_free(lifc.lifc_buf, bufsize);
53438485SPeter.Memishian@Sun.COM 		return (B_FALSE);
53447387SRobert.Gordon@Sun.COM 	}
53457387SRobert.Gordon@Sun.COM 
53468485SPeter.Memishian@Sun.COM 	/*
53478485SPeter.Memishian@Sun.COM 	 * Worst case is that all of the addresses are IB-capable and have
53488485SPeter.Memishian@Sun.COM 	 * the same address family, so size our buffers accordingly.
53498485SPeter.Memishian@Sun.COM 	 */
53508485SPeter.Memishian@Sun.COM 	addrs4->ri_size = nifs * sizeof (struct sockaddr_in);
53518485SPeter.Memishian@Sun.COM 	addrs4->ri_list = kmem_zalloc(addrs4->ri_size, KM_SLEEP);
53528485SPeter.Memishian@Sun.COM 	addrs6->ri_size = nifs * sizeof (struct sockaddr_in6);
53538485SPeter.Memishian@Sun.COM 	addrs6->ri_list = kmem_zalloc(addrs6->ri_size, KM_SLEEP);
53548485SPeter.Memishian@Sun.COM 
53558485SPeter.Memishian@Sun.COM 	for (lifrp = lifc.lifc_req, i = 0; i < nifs; i++, lifrp++) {
53568485SPeter.Memishian@Sun.COM 		if (!rpcib_rdma_capable_interface(lifrp))
53578485SPeter.Memishian@Sun.COM 			continue;
53588485SPeter.Memishian@Sun.COM 
53598485SPeter.Memishian@Sun.COM 		if (lifrp->lifr_addr.ss_family == AF_INET) {
53608485SPeter.Memishian@Sun.COM 			sinp = addrs4->ri_list;
53618485SPeter.Memishian@Sun.COM 			bcopy(&lifrp->lifr_addr, &sinp[addrs4->ri_count++],
53628485SPeter.Memishian@Sun.COM 			    sizeof (struct sockaddr_in));
53638485SPeter.Memishian@Sun.COM 		} else if (lifrp->lifr_addr.ss_family == AF_INET6) {
53648485SPeter.Memishian@Sun.COM 			sin6p = addrs6->ri_list;
53658485SPeter.Memishian@Sun.COM 			bcopy(&lifrp->lifr_addr, &sin6p[addrs6->ri_count++],
53668485SPeter.Memishian@Sun.COM 			    sizeof (struct sockaddr_in6));
53677387SRobert.Gordon@Sun.COM 		}
53687387SRobert.Gordon@Sun.COM 	}
53698485SPeter.Memishian@Sun.COM 
53708485SPeter.Memishian@Sun.COM 	kmem_free(lifc.lifc_buf, bufsize);
53718485SPeter.Memishian@Sun.COM 	return (B_TRUE);
53727387SRobert.Gordon@Sun.COM }
53737387SRobert.Gordon@Sun.COM 
53747387SRobert.Gordon@Sun.COM /* ARGSUSED */
53759733SFaramarz.Jalalian@Sun.COM static int
rpcib_cache_kstat_update(kstat_t * ksp,int rw)53769733SFaramarz.Jalalian@Sun.COM rpcib_cache_kstat_update(kstat_t *ksp, int rw)
53779733SFaramarz.Jalalian@Sun.COM {
53789733SFaramarz.Jalalian@Sun.COM 	rib_hca_t *hca;
53797387SRobert.Gordon@Sun.COM 
53807387SRobert.Gordon@Sun.COM 	if (KSTAT_WRITE == rw) {
53817387SRobert.Gordon@Sun.COM 		return (EACCES);
53827387SRobert.Gordon@Sun.COM 	}
53839733SFaramarz.Jalalian@Sun.COM 
53847387SRobert.Gordon@Sun.COM 	rpcib_kstat.cache_limit.value.ui64 =
53857387SRobert.Gordon@Sun.COM 	    (uint64_t)cache_limit;
53869733SFaramarz.Jalalian@Sun.COM 	rw_enter(&rib_stat->hcas_list_lock, RW_READER);
53879733SFaramarz.Jalalian@Sun.COM 	for (hca = rib_stat->hcas_list; hca; hca = hca->next) {
53889733SFaramarz.Jalalian@Sun.COM 		rpcib_kstat.cache_allocation.value.ui64 +=
53899733SFaramarz.Jalalian@Sun.COM 		    (uint64_t)hca->cache_allocation;
53909733SFaramarz.Jalalian@Sun.COM 		rpcib_kstat.cache_hits.value.ui64 +=
53919733SFaramarz.Jalalian@Sun.COM 		    (uint64_t)hca->cache_hits;
53929733SFaramarz.Jalalian@Sun.COM 		rpcib_kstat.cache_misses.value.ui64 +=
53939733SFaramarz.Jalalian@Sun.COM 		    (uint64_t)hca->cache_misses;
53949733SFaramarz.Jalalian@Sun.COM 		rpcib_kstat.cache_misses_above_the_limit.value.ui64 +=
53959733SFaramarz.Jalalian@Sun.COM 		    (uint64_t)hca->cache_misses_above_the_limit;
53969733SFaramarz.Jalalian@Sun.COM 	}
53979733SFaramarz.Jalalian@Sun.COM 	rw_exit(&rib_stat->hcas_list_lock);
53987387SRobert.Gordon@Sun.COM 	return (0);
53997387SRobert.Gordon@Sun.COM }
5400