xref: /onnv-gate/usr/src/uts/common/sys/ib/clients/ibd/ibd.h (revision 13030:acf659bebd08)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
55766Sgg161487  * Common Development and Distribution License (the "License").
65766Sgg161487  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
218917SVenkatakrishnan.Rajagopalan@Sun.COM 
220Sstevel@tonic-gate /*
2312163SRamaswamy.Tummala@Sun.COM  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #ifndef _SYS_IB_CLIENTS_IBD_H
270Sstevel@tonic-gate #define	_SYS_IB_CLIENTS_IBD_H
280Sstevel@tonic-gate 
290Sstevel@tonic-gate #ifdef __cplusplus
300Sstevel@tonic-gate extern "C" {
310Sstevel@tonic-gate #endif
320Sstevel@tonic-gate 
3311534SKevin.Ge@Sun.COM /* The following macros are used in both ibd.c and ibd_cm.c */
3411534SKevin.Ge@Sun.COM 
3511534SKevin.Ge@Sun.COM /*
3611534SKevin.Ge@Sun.COM  * Completion queue polling control
3711534SKevin.Ge@Sun.COM  */
3811534SKevin.Ge@Sun.COM #define	IBD_CQ_POLLING			0x1
3911534SKevin.Ge@Sun.COM #define	IBD_REDO_CQ_POLLING		0x2
4011534SKevin.Ge@Sun.COM 
4111534SKevin.Ge@Sun.COM /*
4211534SKevin.Ge@Sun.COM  * Maximum length for returning chained mps back to crossbow.
4311534SKevin.Ge@Sun.COM  * Also used as the maximum number of rx wc's polled at a time.
4411534SKevin.Ge@Sun.COM  */
4511534SKevin.Ge@Sun.COM #define	IBD_MAX_RX_MP_LEN		16
4611534SKevin.Ge@Sun.COM 
4711534SKevin.Ge@Sun.COM /*
4811534SKevin.Ge@Sun.COM  * When doing multiple-send-wr, this value determines how many to do at
4911534SKevin.Ge@Sun.COM  * a time (in a single ibt_post_send).
5011534SKevin.Ge@Sun.COM  */
5111534SKevin.Ge@Sun.COM #define	IBD_MAX_TX_POST_MULTIPLE	4
5211534SKevin.Ge@Sun.COM 
5311534SKevin.Ge@Sun.COM /*
5411534SKevin.Ge@Sun.COM  * Flag bits for resources to reap
5511534SKevin.Ge@Sun.COM  */
5611534SKevin.Ge@Sun.COM #define	IBD_RSRC_SWQE			0x1
5711534SKevin.Ge@Sun.COM #define	IBD_RSRC_LSOBUF			0x2
5811534SKevin.Ge@Sun.COM #define	IBD_RSRC_RC_SWQE		0x4
5911534SKevin.Ge@Sun.COM #define	IBD_RSRC_RC_TX_LARGEBUF		0x8
6011534SKevin.Ge@Sun.COM 
6111534SKevin.Ge@Sun.COM /*
6211534SKevin.Ge@Sun.COM  * Async operation types
6311534SKevin.Ge@Sun.COM  */
6411534SKevin.Ge@Sun.COM #define	IBD_ASYNC_GETAH			1
6511534SKevin.Ge@Sun.COM #define	IBD_ASYNC_JOIN			2
6611534SKevin.Ge@Sun.COM #define	IBD_ASYNC_LEAVE			3
6711534SKevin.Ge@Sun.COM #define	IBD_ASYNC_PROMON		4
6811534SKevin.Ge@Sun.COM #define	IBD_ASYNC_PROMOFF		5
6911534SKevin.Ge@Sun.COM #define	IBD_ASYNC_REAP			6
7011534SKevin.Ge@Sun.COM #define	IBD_ASYNC_TRAP			7
7111534SKevin.Ge@Sun.COM #define	IBD_ASYNC_SCHED			8
7211534SKevin.Ge@Sun.COM #define	IBD_ASYNC_LINK			9
7311534SKevin.Ge@Sun.COM #define	IBD_ASYNC_EXIT			10
7411534SKevin.Ge@Sun.COM #define	IBD_ASYNC_RC_TOO_BIG		11
75*13030SKevin.Ge@Sun.COM #define	IBD_ASYNC_RC_CLOSE_ACT_CHAN	12
76*13030SKevin.Ge@Sun.COM #define	IBD_ASYNC_RC_RECYCLE_ACE	13
77*13030SKevin.Ge@Sun.COM #define	IBD_ASYNC_RC_CLOSE_PAS_CHAN	14
78*13030SKevin.Ge@Sun.COM 
79*13030SKevin.Ge@Sun.COM /*
80*13030SKevin.Ge@Sun.COM  * State of IBD driver initialization during attach/m_start
81*13030SKevin.Ge@Sun.COM  */
82*13030SKevin.Ge@Sun.COM #define	IBD_DRV_STATE_INITIALIZED	0x000001
83*13030SKevin.Ge@Sun.COM #define	IBD_DRV_RXINTR_ADDED		0x000002
84*13030SKevin.Ge@Sun.COM #define	IBD_DRV_TXINTR_ADDED		0x000004
85*13030SKevin.Ge@Sun.COM #define	IBD_DRV_IBTL_ATTACH_DONE	0x000008
86*13030SKevin.Ge@Sun.COM #define	IBD_DRV_HCA_OPENED		0x000010
87*13030SKevin.Ge@Sun.COM #define	IBD_DRV_PD_ALLOCD		0x000020
88*13030SKevin.Ge@Sun.COM #define	IBD_DRV_MAC_REGISTERED		0x000040
89*13030SKevin.Ge@Sun.COM #define	IBD_DRV_PORT_DETAILS_OBTAINED	0x000080
90*13030SKevin.Ge@Sun.COM #define	IBD_DRV_BCAST_GROUP_FOUND	0x000100
91*13030SKevin.Ge@Sun.COM #define	IBD_DRV_ACACHE_INITIALIZED	0x000200
92*13030SKevin.Ge@Sun.COM #define	IBD_DRV_CQS_ALLOCD		0x000400
93*13030SKevin.Ge@Sun.COM #define	IBD_DRV_UD_CHANNEL_SETUP	0x000800
94*13030SKevin.Ge@Sun.COM #define	IBD_DRV_TXLIST_ALLOCD		0x001000
95*13030SKevin.Ge@Sun.COM #define	IBD_DRV_SCQ_NOTIFY_ENABLED	0x002000
96*13030SKevin.Ge@Sun.COM #define	IBD_DRV_RXLIST_ALLOCD		0x004000
97*13030SKevin.Ge@Sun.COM #define	IBD_DRV_BCAST_GROUP_JOINED	0x008000
98*13030SKevin.Ge@Sun.COM #define	IBD_DRV_ASYNC_THR_CREATED	0x010000
99*13030SKevin.Ge@Sun.COM #define	IBD_DRV_RCQ_NOTIFY_ENABLED	0x020000
100*13030SKevin.Ge@Sun.COM #define	IBD_DRV_SM_NOTICES_REGISTERED	0x040000
101*13030SKevin.Ge@Sun.COM #define	IBD_DRV_STARTED			0x080000
102*13030SKevin.Ge@Sun.COM #define	IBD_DRV_RC_SRQ_ALLOCD		0x100000
103*13030SKevin.Ge@Sun.COM #define	IBD_DRV_RC_LARGEBUF_ALLOCD	0x200000
104*13030SKevin.Ge@Sun.COM #define	IBD_DRV_RC_LISTEN		0x400000
105*13030SKevin.Ge@Sun.COM #ifdef DEBUG
106*13030SKevin.Ge@Sun.COM #define	IBD_DRV_RC_PRIVATE_STATE	0x800000
107*13030SKevin.Ge@Sun.COM #endif
108*13030SKevin.Ge@Sun.COM #define	IBD_DRV_IN_DELETION		0x1000000
109*13030SKevin.Ge@Sun.COM #define	IBD_DRV_IN_LATE_HCA_INIT 	0x2000000
110*13030SKevin.Ge@Sun.COM #define	IBD_DRV_REQ_LIST_INITED 	0x4000000
111*13030SKevin.Ge@Sun.COM #define	IBD_DRV_RC_TIMEOUT		0x8000000
11211534SKevin.Ge@Sun.COM 
11311534SKevin.Ge@Sun.COM /*
11411534SKevin.Ge@Sun.COM  * Miscellaneous constants
11511534SKevin.Ge@Sun.COM  */
11611534SKevin.Ge@Sun.COM #define	IBD_SEND			0
11711534SKevin.Ge@Sun.COM #define	IBD_RECV			1
11811534SKevin.Ge@Sun.COM 
11912163SRamaswamy.Tummala@Sun.COM /* Tunables defaults and limits */
12012163SRamaswamy.Tummala@Sun.COM #define	IBD_LINK_MODE_UD		0
12112163SRamaswamy.Tummala@Sun.COM #define	IBD_LINK_MODE_RC		1
12212163SRamaswamy.Tummala@Sun.COM 
12312163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_LINK_MODE		IBD_LINK_MODE_RC
12412163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_LSO_POLICY		B_TRUE
12512163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_NUM_LSO_BUFS		1024
12612163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_CREATE_BCAST_GROUP	B_TRUE
12712163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_COALESCE_COMPLETIONS	B_TRUE
12812163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_UD_RX_COMP_COUNT	4
12912163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_UD_RX_COMP_USEC		10
13012163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_UD_TX_COMP_COUNT	16
13112163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_UD_TX_COMP_USEC		300
13212163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_RC_RX_COMP_COUNT	4
13312163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_RC_RX_COMP_USEC		10
13412163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_RC_TX_COMP_COUNT	10
13512163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_RC_TX_COMP_USEC		300
13612163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_UD_TX_COPY_THRESH	4096
13712163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_RC_RX_COPY_THRESH	4096
13812163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_RC_TX_COPY_THRESH	4096
13912163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_UD_NUM_RWQE		4000
14012163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_UD_NUM_SWQE		4000
14112163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_RC_ENABLE_SRQ		B_TRUE
142*13030SKevin.Ge@Sun.COM #if defined(__i386)
143*13030SKevin.Ge@Sun.COM #define	IBD_DEF_RC_NUM_RWQE		511
144*13030SKevin.Ge@Sun.COM #define	IBD_DEF_RC_NUM_SWQE		255
145*13030SKevin.Ge@Sun.COM #else
14612163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_RC_NUM_RWQE		2047
14712163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_RC_NUM_SWQE		511
148*13030SKevin.Ge@Sun.COM #endif
14912163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_NUM_AH			256
15012163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_HASH_SIZE		32
15112163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_RC_NUM_SRQ		(IBD_DEF_RC_NUM_RWQE - 1)
15212163SRamaswamy.Tummala@Sun.COM #define	IBD_DEF_RC_RX_RWQE_THRESH	(IBD_DEF_RC_NUM_RWQE >> 2)
15312163SRamaswamy.Tummala@Sun.COM 
15412163SRamaswamy.Tummala@Sun.COM /* Tunable limits */
15512163SRamaswamy.Tummala@Sun.COM #define	IBD_MIN_NUM_LSO_BUFS		512
15612163SRamaswamy.Tummala@Sun.COM #define	IBD_MAX_NUM_LSO_BUFS		4096
15712163SRamaswamy.Tummala@Sun.COM #define	IBD_MIN_UD_TX_COPY_THRESH	2048
15812163SRamaswamy.Tummala@Sun.COM #define	IBD_MAX_UD_TX_COPY_THRESH	65536
15912163SRamaswamy.Tummala@Sun.COM #define	IBD_MIN_UD_NUM_SWQE		512
16012163SRamaswamy.Tummala@Sun.COM #define	IBD_MAX_UD_NUM_SWQE		8000
16112163SRamaswamy.Tummala@Sun.COM #define	IBD_MIN_UD_NUM_RWQE		512
16212163SRamaswamy.Tummala@Sun.COM #define	IBD_MAX_UD_NUM_RWQE		8000
16312163SRamaswamy.Tummala@Sun.COM #define	IBD_MIN_NUM_AH			32
16412163SRamaswamy.Tummala@Sun.COM #define	IBD_MAX_NUM_AH			8192
16512163SRamaswamy.Tummala@Sun.COM #define	IBD_MIN_HASH_SIZE		32
16612163SRamaswamy.Tummala@Sun.COM #define	IBD_MAX_HASH_SIZE		1024
16712163SRamaswamy.Tummala@Sun.COM 
168*13030SKevin.Ge@Sun.COM #if defined(__i386)
169*13030SKevin.Ge@Sun.COM #define	IBD_MIN_RC_NUM_SWQE		255
170*13030SKevin.Ge@Sun.COM #else
17112163SRamaswamy.Tummala@Sun.COM #define	IBD_MIN_RC_NUM_SWQE		511
172*13030SKevin.Ge@Sun.COM #endif
17312163SRamaswamy.Tummala@Sun.COM #define	IBD_MAX_RC_NUM_SWQE		8000
17412163SRamaswamy.Tummala@Sun.COM #define	IBD_MIN_RC_NUM_RWQE		511
17512163SRamaswamy.Tummala@Sun.COM #define	IBD_MAX_RC_NUM_RWQE		8000
17612163SRamaswamy.Tummala@Sun.COM #define	IBD_MIN_RC_RX_COPY_THRESH	1500
17712163SRamaswamy.Tummala@Sun.COM #define	IBD_MAX_RC_RX_COPY_THRESH	65520
17812163SRamaswamy.Tummala@Sun.COM #define	IBD_MIN_RC_TX_COPY_THRESH	1500
17912163SRamaswamy.Tummala@Sun.COM #define	IBD_MAX_RC_TX_COPY_THRESH	65520
18012163SRamaswamy.Tummala@Sun.COM #define	IBD_MIN_RC_NUM_SRQ		(IBD_MIN_RC_NUM_RWQE - 1)
18112163SRamaswamy.Tummala@Sun.COM #define	IBD_MIN_RC_RX_RWQE_THRESH	(IBD_MIN_RC_NUM_RWQE >> 2)
18212163SRamaswamy.Tummala@Sun.COM 
18311534SKevin.Ge@Sun.COM /*
18411534SKevin.Ge@Sun.COM  * Thresholds
18511534SKevin.Ge@Sun.COM  *
18611534SKevin.Ge@Sun.COM  * When waiting for resources (swqes or lso buffers) to become available,
18711534SKevin.Ge@Sun.COM  * the first two thresholds below determine how long to wait before informing
18811534SKevin.Ge@Sun.COM  * the network layer to start sending packets again. The IBD_TX_POLL_THRESH
18911534SKevin.Ge@Sun.COM  * determines how low the available swqes should go before we start polling
19011534SKevin.Ge@Sun.COM  * the completion queue.
19111534SKevin.Ge@Sun.COM  */
19211534SKevin.Ge@Sun.COM #define	IBD_FREE_LSOS_THRESH		8
19311534SKevin.Ge@Sun.COM #define	IBD_FREE_SWQES_THRESH		20
19411534SKevin.Ge@Sun.COM #define	IBD_TX_POLL_THRESH		80
19511534SKevin.Ge@Sun.COM 
19611534SKevin.Ge@Sun.COM #ifdef DEBUG
19711534SKevin.Ge@Sun.COM void debug_print(int l, char *fmt, ...);
19811534SKevin.Ge@Sun.COM #define	DPRINT		debug_print
19911534SKevin.Ge@Sun.COM #else
20011534SKevin.Ge@Sun.COM #define	DPRINT		0 &&
20111534SKevin.Ge@Sun.COM #endif
20211534SKevin.Ge@Sun.COM 
20311534SKevin.Ge@Sun.COM /*
20411534SKevin.Ge@Sun.COM  * AH and MCE active list manipulation:
20511534SKevin.Ge@Sun.COM  *
20611534SKevin.Ge@Sun.COM  * Multicast disable requests and MCG delete traps are two cases
20711534SKevin.Ge@Sun.COM  * where the active AH entry for the mcg (if any unreferenced one exists)
20811534SKevin.Ge@Sun.COM  * will be moved to the free list (to force the next Tx to the mcg to
20911534SKevin.Ge@Sun.COM  * join the MCG in SendOnly mode). Port up handling will also move AHs
21011534SKevin.Ge@Sun.COM  * from active to free list.
21111534SKevin.Ge@Sun.COM  *
21211534SKevin.Ge@Sun.COM  * In the case when some transmits are still pending on an entry
21311534SKevin.Ge@Sun.COM  * for an mcg, but a multicast disable has already been issued on the
21411534SKevin.Ge@Sun.COM  * mcg, there are some options to consider to preserve the join state
21511534SKevin.Ge@Sun.COM  * to ensure the emitted packet is properly routed on the IBA fabric.
21611534SKevin.Ge@Sun.COM  * For the AH, we can
21711534SKevin.Ge@Sun.COM  * 1. take out of active list at multicast disable time.
21811534SKevin.Ge@Sun.COM  * 2. take out of active list only when last pending Tx completes.
21911534SKevin.Ge@Sun.COM  * For the MCE, we can
22011534SKevin.Ge@Sun.COM  * 3. take out of active list at multicast disable time.
22111534SKevin.Ge@Sun.COM  * 4. take out of active list only when last pending Tx completes.
22211534SKevin.Ge@Sun.COM  * 5. move from active list to stale list at multicast disable time.
22311534SKevin.Ge@Sun.COM  * We choose to use 2,4. We use option 4 so that if a multicast enable
22411534SKevin.Ge@Sun.COM  * is tried before the pending Tx completes, the enable code finds the
22511534SKevin.Ge@Sun.COM  * mce in the active list and just has to make sure it will not be reaped
22611534SKevin.Ge@Sun.COM  * (ie the mcg leave done) when the pending Tx does complete. Alternatively,
22711534SKevin.Ge@Sun.COM  * a stale list (#5) that would be checked in the enable code would need
22811534SKevin.Ge@Sun.COM  * to be implemented. Option 2 is used, because otherwise, a Tx attempt
22911534SKevin.Ge@Sun.COM  * after the multicast disable would try to put an AH in the active list,
23011534SKevin.Ge@Sun.COM  * and associate the mce it finds in the active list to this new AH,
23111534SKevin.Ge@Sun.COM  * whereas the mce is already associated with the previous AH (taken off
23211534SKevin.Ge@Sun.COM  * the active list), and will be removed once the pending Tx's complete
23311534SKevin.Ge@Sun.COM  * (unless a reference count on mce's is implemented). One implication of
23411534SKevin.Ge@Sun.COM  * using 2,4 is that new Tx's posted before the pending Tx's complete will
23511534SKevin.Ge@Sun.COM  * grab new references on the AH, further delaying the leave.
23611534SKevin.Ge@Sun.COM  *
23711534SKevin.Ge@Sun.COM  * In the case of mcg delete (or create) trap when the port is sendonly
23811534SKevin.Ge@Sun.COM  * joined, the AH and MCE handling is different: the AH and MCE has to be
23911534SKevin.Ge@Sun.COM  * immediately taken off the active lists (forcing a join and path lookup
24011534SKevin.Ge@Sun.COM  * at the next Tx is the only guaranteed means of ensuring a proper Tx
24111534SKevin.Ge@Sun.COM  * to an mcg as it is repeatedly created and deleted and goes thru
24211534SKevin.Ge@Sun.COM  * reincarnations).
24311534SKevin.Ge@Sun.COM  *
24411534SKevin.Ge@Sun.COM  * When a port is already sendonly joined, and a multicast enable is
24511534SKevin.Ge@Sun.COM  * attempted, the same mce structure is promoted; this ensures only a
24611534SKevin.Ge@Sun.COM  * single mce on the active list tracks the most powerful join state.
24711534SKevin.Ge@Sun.COM  *
24811534SKevin.Ge@Sun.COM  * In the case of port up event handling, the MCE for sendonly membership
24911534SKevin.Ge@Sun.COM  * is freed up, and the ACE is put into the free list as soon as possible
25011534SKevin.Ge@Sun.COM  * (depending on whether posted Tx's have completed). For fullmembership
25111534SKevin.Ge@Sun.COM  * MCE's though, the ACE is similarly handled; but the MCE is kept around
25211534SKevin.Ge@Sun.COM  * (a re-JOIN is attempted) only if the DLPI leave has not already been
25311534SKevin.Ge@Sun.COM  * done; else the mce is deconstructed (mc_fullreap case).
25411534SKevin.Ge@Sun.COM  *
25511534SKevin.Ge@Sun.COM  * MCG creation and deletion trap handling:
25611534SKevin.Ge@Sun.COM  *
25711534SKevin.Ge@Sun.COM  * These traps are unreliable (meaning sometimes the trap might never
25811534SKevin.Ge@Sun.COM  * be delivered to the subscribed nodes) and may arrive out-of-order
25911534SKevin.Ge@Sun.COM  * since they use UD transport. An alternative to relying on these
26011534SKevin.Ge@Sun.COM  * unreliable traps is to poll for mcg presence every so often, but
26111534SKevin.Ge@Sun.COM  * instead of doing that, we try to be as conservative as possible
26211534SKevin.Ge@Sun.COM  * while handling the traps, and hope that the traps do arrive at
26311534SKevin.Ge@Sun.COM  * the subscribed nodes soon. Note that if a node is fullmember
26411534SKevin.Ge@Sun.COM  * joined to an mcg, it can not possibly receive a mcg create/delete
26511534SKevin.Ge@Sun.COM  * trap for that mcg (by fullmember definition); if it does, it is
26611534SKevin.Ge@Sun.COM  * an old trap from a previous incarnation of the mcg.
26711534SKevin.Ge@Sun.COM  *
26811534SKevin.Ge@Sun.COM  * Whenever a trap is received, the driver cleans up its sendonly
26911534SKevin.Ge@Sun.COM  * membership to the group; we choose to do a sendonly leave even
27011534SKevin.Ge@Sun.COM  * on a creation trap to handle the case of a prior deletion of the mcg
27111534SKevin.Ge@Sun.COM  * having gone unnoticed. Consider an example scenario:
27211534SKevin.Ge@Sun.COM  * T1: MCG M is deleted, and fires off deletion trap D1.
27311534SKevin.Ge@Sun.COM  * T2: MCG M is recreated, fires off creation trap C1, which is lost.
27411534SKevin.Ge@Sun.COM  * T3: Node N tries to transmit to M, joining in sendonly mode.
27511534SKevin.Ge@Sun.COM  * T4: MCG M is deleted, and fires off deletion trap D2.
27611534SKevin.Ge@Sun.COM  * T5: N receives a deletion trap, but can not distinguish D1 from D2.
27711534SKevin.Ge@Sun.COM  *     If the trap is D2, then a LEAVE is not required, since the mcg
27811534SKevin.Ge@Sun.COM  *     is already deleted; but if it is D1, a LEAVE is required. A safe
27911534SKevin.Ge@Sun.COM  *     approach is to always LEAVE, but the SM may be confused if it
28011534SKevin.Ge@Sun.COM  *     receives a LEAVE without a prior JOIN.
28111534SKevin.Ge@Sun.COM  *
28211534SKevin.Ge@Sun.COM  * Management of the non-membership to an mcg is similar to the above,
28311534SKevin.Ge@Sun.COM  * except that if the interface is in promiscuous mode, it is required
28411534SKevin.Ge@Sun.COM  * to attempt to re-join the mcg after receiving a trap. Unfortunately,
28511534SKevin.Ge@Sun.COM  * if the re-join attempt fails (in which case a warning message needs
28611534SKevin.Ge@Sun.COM  * to be printed), it is not clear whether it failed due to the mcg not
28711534SKevin.Ge@Sun.COM  * existing, or some fabric/hca issues, due to the delayed nature of
28811534SKevin.Ge@Sun.COM  * trap delivery. Querying the SA to establish presence/absence of the
28911534SKevin.Ge@Sun.COM  * mcg is also racy at best. Thus, the driver just prints a warning
29011534SKevin.Ge@Sun.COM  * message when it can not rejoin after receiving a create trap, although
29111534SKevin.Ge@Sun.COM  * this might be (on rare occasions) a mis-warning if the create trap is
29211534SKevin.Ge@Sun.COM  * received after the mcg was deleted.
29311534SKevin.Ge@Sun.COM  */
29411534SKevin.Ge@Sun.COM 
29511534SKevin.Ge@Sun.COM /*
29611534SKevin.Ge@Sun.COM  * Implementation of atomic "recycle" bits and reference count
29711534SKevin.Ge@Sun.COM  * on address handles. This utilizes the fact that max reference
29811534SKevin.Ge@Sun.COM  * count on any handle is limited by number of send wqes, thus
29911534SKevin.Ge@Sun.COM  * high bits in the ac_ref field can be used as the recycle bits,
30011534SKevin.Ge@Sun.COM  * and only the low bits hold the number of pending Tx requests.
30111534SKevin.Ge@Sun.COM  * This atomic AH reference counting allows the Tx completion
30211534SKevin.Ge@Sun.COM  * handler not to acquire the id_ac_mutex to process every completion,
30311534SKevin.Ge@Sun.COM  * thus reducing lock contention problems between completion and
30411534SKevin.Ge@Sun.COM  * the Tx path.
30511534SKevin.Ge@Sun.COM  */
30611534SKevin.Ge@Sun.COM #define	CYCLEVAL		0x80000
30711534SKevin.Ge@Sun.COM #define	CLEAR_REFCYCLE(ace)	(ace)->ac_ref = 0
30811534SKevin.Ge@Sun.COM #define	CYCLE_SET(ace)		(((ace)->ac_ref & CYCLEVAL) == CYCLEVAL)
30911534SKevin.Ge@Sun.COM #define	GET_REF(ace)		((ace)->ac_ref)
31011534SKevin.Ge@Sun.COM #define	GET_REF_CYCLE(ace) (				\
31111534SKevin.Ge@Sun.COM 	/*						\
31211534SKevin.Ge@Sun.COM 	 * Make sure "cycle" bit is set.		\
31311534SKevin.Ge@Sun.COM 	 */						\
31411534SKevin.Ge@Sun.COM 	ASSERT(CYCLE_SET(ace)),				\
31511534SKevin.Ge@Sun.COM 	((ace)->ac_ref & ~(CYCLEVAL))			\
31611534SKevin.Ge@Sun.COM )
31711534SKevin.Ge@Sun.COM #define	INC_REF(ace, num) {				\
31811534SKevin.Ge@Sun.COM 	atomic_add_32(&(ace)->ac_ref, num);		\
31911534SKevin.Ge@Sun.COM }
32011534SKevin.Ge@Sun.COM #define	SET_CYCLE_IF_REF(ace) (				\
32111534SKevin.Ge@Sun.COM 	CYCLE_SET(ace) ? B_TRUE :			\
32211534SKevin.Ge@Sun.COM 	    atomic_add_32_nv(&ace->ac_ref, CYCLEVAL) ==	\
32311534SKevin.Ge@Sun.COM 		CYCLEVAL ?				\
32411534SKevin.Ge@Sun.COM 		/*					\
32511534SKevin.Ge@Sun.COM 		 * Clear the "cycle" bit we just set;	\
32611534SKevin.Ge@Sun.COM 		 * ref count known to be 0 from above.	\
32711534SKevin.Ge@Sun.COM 		 */					\
32811534SKevin.Ge@Sun.COM 		CLEAR_REFCYCLE(ace), B_FALSE :		\
32911534SKevin.Ge@Sun.COM 		/*					\
33011534SKevin.Ge@Sun.COM 		 * We set "cycle" bit; let caller know.	\
33111534SKevin.Ge@Sun.COM 		 */					\
33211534SKevin.Ge@Sun.COM 		B_TRUE					\
33311534SKevin.Ge@Sun.COM )
33411534SKevin.Ge@Sun.COM #define	DEC_REF_DO_CYCLE(ace) (				\
33511534SKevin.Ge@Sun.COM 	atomic_dec_32_nv(&ace->ac_ref) == CYCLEVAL ?	\
33611534SKevin.Ge@Sun.COM 		/*					\
33711534SKevin.Ge@Sun.COM 		 * Ref count known to be 0 from above.	\
33811534SKevin.Ge@Sun.COM 		 */					\
33911534SKevin.Ge@Sun.COM 		B_TRUE :				\
34011534SKevin.Ge@Sun.COM 		B_FALSE					\
34111534SKevin.Ge@Sun.COM )
34211534SKevin.Ge@Sun.COM 
34311534SKevin.Ge@Sun.COM /*
34411534SKevin.Ge@Sun.COM  * Address handle entries maintained by the driver are kept in the
34511534SKevin.Ge@Sun.COM  * free and active lists. Each entry starts out in the free list;
34611534SKevin.Ge@Sun.COM  * it migrates to the active list when primed using ibt_get_paths()
34711534SKevin.Ge@Sun.COM  * and ibt_modify_ud_dest() for transmission to a specific destination.
34811534SKevin.Ge@Sun.COM  * In the active list, the entry has a reference count indicating the
34911534SKevin.Ge@Sun.COM  * number of ongoing/uncompleted transmits that reference it. The
35011534SKevin.Ge@Sun.COM  * entry is left in the active list even after the reference count
35111534SKevin.Ge@Sun.COM  * goes to 0, since successive transmits can find it there and do
35211534SKevin.Ge@Sun.COM  * not need to set up another entry (ie the path information is
35311534SKevin.Ge@Sun.COM  * cached using the active list). Entries on the active list are
35411534SKevin.Ge@Sun.COM  * also hashed using the destination link address as a key for faster
35511534SKevin.Ge@Sun.COM  * lookups during transmits.
35611534SKevin.Ge@Sun.COM  *
35711534SKevin.Ge@Sun.COM  * For any destination address (unicast or multicast, whatever the
35811534SKevin.Ge@Sun.COM  * join states), there will be at most one entry in the active list.
35911534SKevin.Ge@Sun.COM  * Entries with a 0 reference count on the active list can be reused
36011534SKevin.Ge@Sun.COM  * for a transmit to a new destination, if the free list is empty.
36111534SKevin.Ge@Sun.COM  *
36211534SKevin.Ge@Sun.COM  * The AH free list insertion/deletion is protected with the id_ac_mutex,
36311534SKevin.Ge@Sun.COM  * since the async thread and Tx callback handlers insert/delete. The
36411534SKevin.Ge@Sun.COM  * active list does not need a lock (all operations are done by the
36511534SKevin.Ge@Sun.COM  * async thread) but updates to the reference count are atomically
36611534SKevin.Ge@Sun.COM  * done (increments done by Tx path, decrements by the Tx callback handler).
36711534SKevin.Ge@Sun.COM  */
36811534SKevin.Ge@Sun.COM #define	IBD_ACACHE_INSERT_FREE(state, ce) \
36911534SKevin.Ge@Sun.COM 	list_insert_head(&state->id_ah_free, ce)
37011534SKevin.Ge@Sun.COM #define	IBD_ACACHE_GET_FREE(state) \
37111534SKevin.Ge@Sun.COM 	list_get_head(&state->id_ah_free)
37211534SKevin.Ge@Sun.COM #define	IBD_ACACHE_INSERT_ACTIVE(state, ce) {			\
37311534SKevin.Ge@Sun.COM 	int _ret_;						\
37411534SKevin.Ge@Sun.COM 	list_insert_head(&state->id_ah_active, ce);		\
37511534SKevin.Ge@Sun.COM 	_ret_ = mod_hash_insert(state->id_ah_active_hash,	\
37611534SKevin.Ge@Sun.COM 	    (mod_hash_key_t)&ce->ac_mac, (mod_hash_val_t)ce);	\
37711534SKevin.Ge@Sun.COM 	ASSERT(_ret_ == 0);					\
37811534SKevin.Ge@Sun.COM 	state->id_ac_hot_ace = ce;				\
37911534SKevin.Ge@Sun.COM }
38011534SKevin.Ge@Sun.COM #define	IBD_ACACHE_PULLOUT_ACTIVE(state, ce) {			\
38111534SKevin.Ge@Sun.COM 	list_remove(&state->id_ah_active, ce);			\
38211534SKevin.Ge@Sun.COM 	if (state->id_ac_hot_ace == ce)				\
38311534SKevin.Ge@Sun.COM 		state->id_ac_hot_ace = NULL;			\
38411534SKevin.Ge@Sun.COM 	(void) mod_hash_remove(state->id_ah_active_hash,	\
38511534SKevin.Ge@Sun.COM 	    (mod_hash_key_t)&ce->ac_mac, (mod_hash_val_t)ce);	\
38611534SKevin.Ge@Sun.COM }
38711534SKevin.Ge@Sun.COM #define	IBD_ACACHE_GET_ACTIVE(state) \
38811534SKevin.Ge@Sun.COM 	list_get_head(&state->id_ah_active)
38911534SKevin.Ge@Sun.COM 
39011534SKevin.Ge@Sun.COM /*
39111534SKevin.Ge@Sun.COM  * Padding for nd6 Neighbor Solicitation and Advertisement needs to be at
39211534SKevin.Ge@Sun.COM  * front of optional src/tgt link layer address. Right now Solaris inserts
39311534SKevin.Ge@Sun.COM  * padding by default at the end. The routine which is doing is nce_xmit()
39411534SKevin.Ge@Sun.COM  * in ip_ndp.c. It copies the nd_lla_addr after the nd_opt_hdr_t. So when
39511534SKevin.Ge@Sun.COM  * the packet comes down from IP layer to the IBD driver, it is in the
39611534SKevin.Ge@Sun.COM  * following format: [IPoIB_PTXHDR_T][INET6 packet][ICMP6][OPT_ND_HDR_T]
39711534SKevin.Ge@Sun.COM  * This size is 2 bytes followed by [22 bytes of ipoib_machdr]. As a result
39811534SKevin.Ge@Sun.COM  * machdr is not 4 byte aligned and had 2 bytes of padding at the end.
39911534SKevin.Ge@Sun.COM  *
40011534SKevin.Ge@Sun.COM  * The send routine at IBD driver changes this packet as follows:
40111534SKevin.Ge@Sun.COM  * [IPoIB_HDR_T][INET6 packet][ICMP6][OPT_ND_HDR_T + 2 bytes of padding]
40211534SKevin.Ge@Sun.COM  * followed by [22 bytes of ipoib_machdr] resulting in machdr 4 byte
40311534SKevin.Ge@Sun.COM  * aligned.
40411534SKevin.Ge@Sun.COM  *
40511534SKevin.Ge@Sun.COM  * At the receiving side again ibd_process_rx takes the above packet and
40611534SKevin.Ge@Sun.COM  * removes the two bytes of front padding and inserts it at the end. This
40711534SKevin.Ge@Sun.COM  * is since the IP layer does not understand padding at the front.
40811534SKevin.Ge@Sun.COM  */
40911534SKevin.Ge@Sun.COM #define	IBD_PAD_NSNA(ip6h, len, type) {					\
41011534SKevin.Ge@Sun.COM 	uchar_t 	*nd_lla_ptr;					\
41111534SKevin.Ge@Sun.COM 	icmp6_t 	*icmp6;						\
41211534SKevin.Ge@Sun.COM 	nd_opt_hdr_t	*opt;						\
41311534SKevin.Ge@Sun.COM 	int 		i;						\
41411534SKevin.Ge@Sun.COM 									\
41511534SKevin.Ge@Sun.COM 	icmp6 = (icmp6_t *)&ip6h[1];					\
41611534SKevin.Ge@Sun.COM 	len -= sizeof (nd_neighbor_advert_t);				\
41711534SKevin.Ge@Sun.COM 	if (((icmp6->icmp6_type == ND_NEIGHBOR_SOLICIT) ||		\
41811534SKevin.Ge@Sun.COM 	    (icmp6->icmp6_type == ND_NEIGHBOR_ADVERT)) &&		\
41911534SKevin.Ge@Sun.COM 	    (len != 0)) {						\
42011534SKevin.Ge@Sun.COM 		opt = (nd_opt_hdr_t *)((uint8_t *)ip6h			\
42111534SKevin.Ge@Sun.COM 		    + IPV6_HDR_LEN + sizeof (nd_neighbor_advert_t));	\
42211534SKevin.Ge@Sun.COM 		ASSERT(opt != NULL);					\
42311534SKevin.Ge@Sun.COM 		nd_lla_ptr = (uchar_t *)&opt[1];			\
42411534SKevin.Ge@Sun.COM 		if (type == IBD_SEND) {					\
42511534SKevin.Ge@Sun.COM 			for (i = IPOIB_ADDRL; i > 0; i--)		\
42611534SKevin.Ge@Sun.COM 				*(nd_lla_ptr + i + 1) =			\
42711534SKevin.Ge@Sun.COM 				    *(nd_lla_ptr + i - 1);		\
42811534SKevin.Ge@Sun.COM 		} else {						\
42911534SKevin.Ge@Sun.COM 			for (i = 0; i < IPOIB_ADDRL; i++)		\
43011534SKevin.Ge@Sun.COM 				*(nd_lla_ptr + i) =			\
43111534SKevin.Ge@Sun.COM 				    *(nd_lla_ptr + i + 2);		\
43211534SKevin.Ge@Sun.COM 		}							\
43311534SKevin.Ge@Sun.COM 		*(nd_lla_ptr + i) = 0;					\
43411534SKevin.Ge@Sun.COM 		*(nd_lla_ptr + i + 1) = 0;				\
43511534SKevin.Ge@Sun.COM 	}								\
43611534SKevin.Ge@Sun.COM }
43711534SKevin.Ge@Sun.COM 
43811534SKevin.Ge@Sun.COM 
4390Sstevel@tonic-gate /*
4400Sstevel@tonic-gate  * IETF defined IPoIB encapsulation header, with 2b of ethertype
4410Sstevel@tonic-gate  * followed by 2 reserved bytes. This is at the start of the
4420Sstevel@tonic-gate  * datagram sent to and received over the wire by the driver.
4430Sstevel@tonic-gate  */
4440Sstevel@tonic-gate typedef struct ipoib_header {
4450Sstevel@tonic-gate 	ushort_t	ipoib_type;
4460Sstevel@tonic-gate 	ushort_t	ipoib_mbz;
4470Sstevel@tonic-gate } ipoib_hdr_t;
4480Sstevel@tonic-gate 
4490Sstevel@tonic-gate #define	IPOIB_HDRSIZE	sizeof (struct ipoib_header)
4500Sstevel@tonic-gate 
4510Sstevel@tonic-gate /*
4520Sstevel@tonic-gate  * IETF defined IPoIB link address; IBA QPN, followed by GID,
4530Sstevel@tonic-gate  * which has a prefix and suffix, as reported via ARP.
4540Sstevel@tonic-gate  */
4550Sstevel@tonic-gate typedef struct ipoib_mac {
4560Sstevel@tonic-gate 	uint32_t	ipoib_qpn;
4570Sstevel@tonic-gate 	uint32_t	ipoib_gidpref[2];
4580Sstevel@tonic-gate 	uint32_t	ipoib_gidsuff[2];
4590Sstevel@tonic-gate } ipoib_mac_t;
4600Sstevel@tonic-gate 
4610Sstevel@tonic-gate #define	IPOIB_ADDRL	sizeof (struct ipoib_mac)
4620Sstevel@tonic-gate 
4630Sstevel@tonic-gate /*
4640Sstevel@tonic-gate  * Pseudo header prepended to datagram in DLIOCRAW transmit path
4650Sstevel@tonic-gate  * and when GLD hands the datagram to the gldm_send entry point.
4660Sstevel@tonic-gate  */
4670Sstevel@tonic-gate typedef struct ipoib_ptxhdr {
4680Sstevel@tonic-gate 	ipoib_mac_t	ipoib_dest;
4690Sstevel@tonic-gate 	ipoib_hdr_t	ipoib_rhdr;
4700Sstevel@tonic-gate } ipoib_ptxhdr_t;
4710Sstevel@tonic-gate 
4720Sstevel@tonic-gate #define	IPOIBDLSAP(p, offset)	((ipoib_ptxhdr_t *)((caddr_t)(p)+offset))
4730Sstevel@tonic-gate 
4740Sstevel@tonic-gate /*
4750Sstevel@tonic-gate  * The pseudo-GRH structure that sits before the data in the
4760Sstevel@tonic-gate  * receive buffer, and is overlaid on top of the real GRH.
4770Sstevel@tonic-gate  * The driver sets the ipoib_vertcflow to 0 if the pseudo-GRH
4780Sstevel@tonic-gate  * does not hold valid information. If it is indicated valid,
4790Sstevel@tonic-gate  * the driver must additionally provide the sender's qpn in
4800Sstevel@tonic-gate  * network byte order in ipoib_sqpn, and not touch the
4810Sstevel@tonic-gate  * remaining parts which were DMA'ed in by the IBA hardware.
4820Sstevel@tonic-gate  */
4830Sstevel@tonic-gate typedef struct ipoib_pgrh {
4840Sstevel@tonic-gate 	uint32_t	ipoib_vertcflow;
4850Sstevel@tonic-gate 	uint32_t	ipoib_sqpn;
4860Sstevel@tonic-gate 	uint32_t	ipoib_sgid_pref[2];
4870Sstevel@tonic-gate 	uint32_t	ipoib_sgid_suff[2];
4880Sstevel@tonic-gate 	uint32_t	ipoib_dgid_pref[2];
4890Sstevel@tonic-gate 	uint32_t	ipoib_dgid_suff[2];
4900Sstevel@tonic-gate } ipoib_pgrh_t;
4910Sstevel@tonic-gate 
4920Sstevel@tonic-gate /*
4930Sstevel@tonic-gate  * The GRH is also dma'ed into recv buffers, thus space needs
4940Sstevel@tonic-gate  * to be allocated for them.
4950Sstevel@tonic-gate  */
4960Sstevel@tonic-gate #define	IPOIB_GRH_SIZE	sizeof (ipoib_pgrh_t)
4970Sstevel@tonic-gate 
49811534SKevin.Ge@Sun.COM /* support  the RC (reliable connected) mode */
49911534SKevin.Ge@Sun.COM #define	IBD_MAC_ADDR_RC		0x80000000
50011534SKevin.Ge@Sun.COM /* support the UC (unreliable connected) mode */
50111534SKevin.Ge@Sun.COM #define	IBD_MAC_ADDR_UC		0x40000000
50211534SKevin.Ge@Sun.COM 
50311534SKevin.Ge@Sun.COM #define	IBD_RC_SERVICE_ID 0x100000000000000ULL
50411534SKevin.Ge@Sun.COM 
50511534SKevin.Ge@Sun.COM /*
50611534SKevin.Ge@Sun.COM  * Legacy OFED had used a wrong service ID (one additional zero digit) for
50711534SKevin.Ge@Sun.COM  * many years. To interop with legacy OFED, we support this wrong service ID
50811534SKevin.Ge@Sun.COM  * here.
50911534SKevin.Ge@Sun.COM  */
51011534SKevin.Ge@Sun.COM #define	IBD_RC_SERVICE_ID_OFED_INTEROP 0x1000000000000000ULL
51111534SKevin.Ge@Sun.COM 
51211534SKevin.Ge@Sun.COM #define	IBD_RC_MIN_CQ_SIZE	0x7f
51311534SKevin.Ge@Sun.COM 
51411534SKevin.Ge@Sun.COM /* Number of ibt_wc_t provided for each RC channel */
51511534SKevin.Ge@Sun.COM #define	IBD_RC_MAX_CQ_WC	0x3f
51611534SKevin.Ge@Sun.COM 
5170Sstevel@tonic-gate #if defined(_KERNEL) && !defined(_BOOT)
5180Sstevel@tonic-gate 
5190Sstevel@tonic-gate #include <sys/ib/ibtl/ibti.h>
5200Sstevel@tonic-gate #include <sys/ib/ib_pkt_hdrs.h>
5210Sstevel@tonic-gate #include <sys/list.h>
5228275SEric Cheng #include <sys/mac_provider.h>
5235766Sgg161487 #include <sys/mac_ib.h>
5240Sstevel@tonic-gate #include <sys/modhash.h>
5250Sstevel@tonic-gate 
52611534SKevin.Ge@Sun.COM /* State of a reliable connected channel (ibd_rc_chan_t->chan_state) */
52711534SKevin.Ge@Sun.COM typedef enum {
52811534SKevin.Ge@Sun.COM 	IBD_RC_STATE_INIT = 0,
52911534SKevin.Ge@Sun.COM 
53011534SKevin.Ge@Sun.COM 	/* Active side */
53111534SKevin.Ge@Sun.COM 	IBD_RC_STATE_ACT_REP_RECV,	/* reply received */
53211534SKevin.Ge@Sun.COM 	IBD_RC_STATE_ACT_ESTAB,		/* established, ready to send */
53311534SKevin.Ge@Sun.COM 	IBD_RC_STATE_ACT_REJECT,	/* rejected */
53411534SKevin.Ge@Sun.COM 	/* Someone else is closing this channel, please don't re-close it */
53511534SKevin.Ge@Sun.COM 	IBD_RC_STATE_ACT_CLOSING,
53611534SKevin.Ge@Sun.COM 	IBD_RC_STATE_ACT_CLOSED,
53711534SKevin.Ge@Sun.COM 	IBD_RC_STATE_ACT_ERROR,
53811534SKevin.Ge@Sun.COM 
53911534SKevin.Ge@Sun.COM 	/* Passive side */
54011534SKevin.Ge@Sun.COM 	IBD_RC_STATE_PAS_REQ_RECV,	/* request received */
54111534SKevin.Ge@Sun.COM 	IBD_RC_STATE_PAS_ESTAB,		/* established, ready to receive */
54211534SKevin.Ge@Sun.COM 	IBD_RC_STATE_PAS_REJECT,	/* rejected */
54311534SKevin.Ge@Sun.COM 
54411534SKevin.Ge@Sun.COM 	IBD_RC_STATE_PAS_CLOSED
54511534SKevin.Ge@Sun.COM } ibd_rc_chan_state_t;
54611534SKevin.Ge@Sun.COM 
5470Sstevel@tonic-gate /*
5480Sstevel@tonic-gate  * Structure to encapsulate various types of async requests.
5490Sstevel@tonic-gate  */
5500Sstevel@tonic-gate typedef struct ibd_acache_rq {
5510Sstevel@tonic-gate 	struct list_node 	rq_list; 	/* list of pending work */
5520Sstevel@tonic-gate 	int			rq_op;		/* what operation */
5530Sstevel@tonic-gate 	ipoib_mac_t		rq_mac;
5540Sstevel@tonic-gate 	ib_gid_t		rq_gid;
5550Sstevel@tonic-gate 	void			*rq_ptr;
55611534SKevin.Ge@Sun.COM 	void			*rq_ptr2;
5570Sstevel@tonic-gate } ibd_req_t;
5580Sstevel@tonic-gate 
5590Sstevel@tonic-gate typedef struct ibd_mcache {
5600Sstevel@tonic-gate 	struct list_node	mc_list;	/* full/non list */
5610Sstevel@tonic-gate 	uint8_t			mc_jstate;
5620Sstevel@tonic-gate 	boolean_t		mc_fullreap;
5630Sstevel@tonic-gate 	ibt_mcg_info_t		mc_info;
5640Sstevel@tonic-gate 	ibd_req_t		mc_req;		/* to queue LEAVE req */
5650Sstevel@tonic-gate } ibd_mce_t;
5660Sstevel@tonic-gate 
5670Sstevel@tonic-gate typedef struct ibd_acache_s {
5680Sstevel@tonic-gate 	struct list_node	ac_list;	/* free/active list */
5690Sstevel@tonic-gate 	ibt_ud_dest_hdl_t	ac_dest;
5700Sstevel@tonic-gate 	ipoib_mac_t		ac_mac;
5710Sstevel@tonic-gate 	uint32_t		ac_ref;
5720Sstevel@tonic-gate 	ibd_mce_t		*ac_mce;	/* for MCG AHs */
57311534SKevin.Ge@Sun.COM 
57411534SKevin.Ge@Sun.COM 	/* For Reliable Connected mode */
57511534SKevin.Ge@Sun.COM 	struct ibd_rc_chan_s	*ac_chan;
57611534SKevin.Ge@Sun.COM 	/* protect tx_too_big_ongoing */
57711534SKevin.Ge@Sun.COM 	kmutex_t		tx_too_big_mutex;
57811534SKevin.Ge@Sun.COM 	/* Deal with too big packet */
57911534SKevin.Ge@Sun.COM 	boolean_t		tx_too_big_ongoing;
5800Sstevel@tonic-gate } ibd_ace_t;
5810Sstevel@tonic-gate 
5828917SVenkatakrishnan.Rajagopalan@Sun.COM #define	IBD_MAX_SQSEG	59
5838917SVenkatakrishnan.Rajagopalan@Sun.COM #define	IBD_MAX_RQSEG	1
5848917SVenkatakrishnan.Rajagopalan@Sun.COM 
5858917SVenkatakrishnan.Rajagopalan@Sun.COM typedef enum {
5868917SVenkatakrishnan.Rajagopalan@Sun.COM 	IBD_WQE_SEND,
5878917SVenkatakrishnan.Rajagopalan@Sun.COM 	IBD_WQE_RECV
5888917SVenkatakrishnan.Rajagopalan@Sun.COM } ibd_wqe_type_t;
5898917SVenkatakrishnan.Rajagopalan@Sun.COM 
5908917SVenkatakrishnan.Rajagopalan@Sun.COM typedef enum {
5918917SVenkatakrishnan.Rajagopalan@Sun.COM 	IBD_WQE_TXBUF = 1,
5928917SVenkatakrishnan.Rajagopalan@Sun.COM 	IBD_WQE_LSOBUF = 2,
59311534SKevin.Ge@Sun.COM 	IBD_WQE_MAPPED = 3,
59411534SKevin.Ge@Sun.COM 	IBD_WQE_RC_COPYBUF = 4
5958917SVenkatakrishnan.Rajagopalan@Sun.COM } ibd_wqe_buftype_t;
5968917SVenkatakrishnan.Rajagopalan@Sun.COM 
59711534SKevin.Ge@Sun.COM #ifdef DEBUG
59811534SKevin.Ge@Sun.COM typedef struct ibd_rc_stat_s {
59911534SKevin.Ge@Sun.COM 	kstat_named_t		rc_rcv_trans_byte;
60011534SKevin.Ge@Sun.COM 	kstat_named_t		rc_rcv_trans_pkt;
60111534SKevin.Ge@Sun.COM 	kstat_named_t		rc_rcv_copy_byte;
60211534SKevin.Ge@Sun.COM 	kstat_named_t		rc_rcv_copy_pkt;
60311534SKevin.Ge@Sun.COM 	kstat_named_t		rc_rcv_alloc_fail;
60411534SKevin.Ge@Sun.COM 
60511534SKevin.Ge@Sun.COM 	kstat_named_t		rc_rcq_err;	/* fail in rcq handler */
60611534SKevin.Ge@Sun.COM 
60711534SKevin.Ge@Sun.COM 	kstat_named_t		rc_rwqe_short;	/* short rwqe */
60811534SKevin.Ge@Sun.COM 
60911534SKevin.Ge@Sun.COM 	kstat_named_t		rc_xmt_bytes;
61012163SRamaswamy.Tummala@Sun.COM 	/* pkt size <= state->id_rc_tx_copy_thresh */
61111534SKevin.Ge@Sun.COM 	kstat_named_t		rc_xmt_small_pkt;
61211534SKevin.Ge@Sun.COM 	kstat_named_t		rc_xmt_fragmented_pkt;
61311534SKevin.Ge@Sun.COM 	/* fail in ibt_map_mem_iov() */
61411534SKevin.Ge@Sun.COM 	kstat_named_t		rc_xmt_map_fail_pkt;
61511534SKevin.Ge@Sun.COM 	/* succ in ibt_map_mem_iov() */
61611534SKevin.Ge@Sun.COM 	kstat_named_t		rc_xmt_map_succ_pkt;
61711534SKevin.Ge@Sun.COM 
61811534SKevin.Ge@Sun.COM 	kstat_named_t		rc_ace_not_found;	/* ace not found */
61911534SKevin.Ge@Sun.COM 	/* no swqe even after recycle */
62011534SKevin.Ge@Sun.COM 	kstat_named_t		rc_scq_no_swqe;
62111534SKevin.Ge@Sun.COM 	/* no tx large buf even after recycle */
62211534SKevin.Ge@Sun.COM 	kstat_named_t		rc_scq_no_largebuf;
62311534SKevin.Ge@Sun.COM 
62411534SKevin.Ge@Sun.COM 	/* short swqe in ibd_send() */
62511534SKevin.Ge@Sun.COM 	kstat_named_t		rc_swqe_short;
62611534SKevin.Ge@Sun.COM 	/* call mac_tx_update() when there is enough swqe */
62711534SKevin.Ge@Sun.COM 	kstat_named_t		rc_swqe_mac_update;
62811534SKevin.Ge@Sun.COM 	/* short large buf in ibd_send() */
62911534SKevin.Ge@Sun.COM 	kstat_named_t		rc_xmt_buf_short;
63011534SKevin.Ge@Sun.COM 	/* call mac_tx_update() when there is enough Tx large buffers */
63111534SKevin.Ge@Sun.COM 	kstat_named_t rc_xmt_buf_mac_update;
63211534SKevin.Ge@Sun.COM 
63311534SKevin.Ge@Sun.COM 	kstat_named_t		rc_conn_succ;	/* # of success connect */
63411534SKevin.Ge@Sun.COM 	kstat_named_t		rc_conn_fail;	/* # of fail connect */
63511534SKevin.Ge@Sun.COM 	/* ace->ac_chan == NULL for unicast packet */
63611534SKevin.Ge@Sun.COM 	kstat_named_t		rc_null_conn;
63711534SKevin.Ge@Sun.COM 	/* not in active established state */
63811534SKevin.Ge@Sun.COM 	kstat_named_t		rc_no_estab_conn;
63911534SKevin.Ge@Sun.COM 
64011534SKevin.Ge@Sun.COM 	kstat_named_t		rc_act_close;	/* call ibd_rc_act_close() */
64111534SKevin.Ge@Sun.COM 	kstat_named_t		rc_pas_close;	/* call ibd_rc_pas_close() */
64211534SKevin.Ge@Sun.COM 	kstat_named_t		rc_delay_ace_recycle;
64311534SKevin.Ge@Sun.COM 	kstat_named_t		rc_act_close_simultaneous;
64411534SKevin.Ge@Sun.COM 
64511534SKevin.Ge@Sun.COM 	kstat_named_t		rc_reset_cnt;	/* # of Reset RC channel */
646*13030SKevin.Ge@Sun.COM 	kstat_named_t		rc_timeout_act;
647*13030SKevin.Ge@Sun.COM 	kstat_named_t		rc_timeout_pas;
64811534SKevin.Ge@Sun.COM } ibd_rc_stat_t;
64911534SKevin.Ge@Sun.COM #endif
65011534SKevin.Ge@Sun.COM 
65111534SKevin.Ge@Sun.COM typedef struct ibd_rc_chan_list_s {
65211534SKevin.Ge@Sun.COM 	/* This mutex protects chan_list and ibd_rc_chan_t.next */
65311534SKevin.Ge@Sun.COM 	kmutex_t		chan_list_mutex;
65411534SKevin.Ge@Sun.COM 	struct ibd_rc_chan_s	*chan_list;
65511534SKevin.Ge@Sun.COM } ibd_rc_chan_list_t;
65611534SKevin.Ge@Sun.COM 
65711534SKevin.Ge@Sun.COM typedef struct ibd_rc_tx_largebuf_s {
65811534SKevin.Ge@Sun.COM 	struct ibd_rc_tx_largebuf_s	*lb_next;
65911534SKevin.Ge@Sun.COM 	uint8_t				*lb_buf;
66011534SKevin.Ge@Sun.COM } ibd_rc_tx_largebuf_t;
66111534SKevin.Ge@Sun.COM 
6628917SVenkatakrishnan.Rajagopalan@Sun.COM /*
6638917SVenkatakrishnan.Rajagopalan@Sun.COM  * Pre-registered copybuf used for send and receive
6648917SVenkatakrishnan.Rajagopalan@Sun.COM  */
6658917SVenkatakrishnan.Rajagopalan@Sun.COM typedef struct ibd_copybuf_s {
6668917SVenkatakrishnan.Rajagopalan@Sun.COM 	ibt_wr_ds_t		ic_sgl;
6678917SVenkatakrishnan.Rajagopalan@Sun.COM 	uint8_t			*ic_bufaddr;
6688917SVenkatakrishnan.Rajagopalan@Sun.COM } ibd_copybuf_t;
6690Sstevel@tonic-gate 
6700Sstevel@tonic-gate typedef struct ibd_wqe_s {
6710Sstevel@tonic-gate 	struct ibd_wqe_s	*w_next;
6720Sstevel@tonic-gate 	ibd_copybuf_t		w_copybuf;
6730Sstevel@tonic-gate 	mblk_t			*im_mblk;
6740Sstevel@tonic-gate } ibd_wqe_t;
6750Sstevel@tonic-gate 
6768917SVenkatakrishnan.Rajagopalan@Sun.COM /*
6778917SVenkatakrishnan.Rajagopalan@Sun.COM  * Send WQE
6788917SVenkatakrishnan.Rajagopalan@Sun.COM  */
6790Sstevel@tonic-gate typedef struct ibd_swqe_s {
6800Sstevel@tonic-gate 	ibd_wqe_t		w_ibd_swqe;
6818917SVenkatakrishnan.Rajagopalan@Sun.COM 	ibd_wqe_buftype_t	w_buftype;
6820Sstevel@tonic-gate 	ibt_send_wr_t		w_swr;
6830Sstevel@tonic-gate 	ibd_ace_t		*w_ahandle;
6848917SVenkatakrishnan.Rajagopalan@Sun.COM 	ibt_mi_hdl_t		w_mi_hdl;
6858917SVenkatakrishnan.Rajagopalan@Sun.COM 	ibt_wr_ds_t		w_sgl[IBD_MAX_SQSEG];
68611534SKevin.Ge@Sun.COM 	ibd_rc_tx_largebuf_t	*w_rc_tx_largebuf;
6870Sstevel@tonic-gate } ibd_swqe_t;
6880Sstevel@tonic-gate 
6890Sstevel@tonic-gate #define	swqe_next		w_ibd_swqe.w_next
6900Sstevel@tonic-gate #define	swqe_copybuf		w_ibd_swqe.w_copybuf
6910Sstevel@tonic-gate #define	swqe_im_mblk		w_ibd_swqe.im_mblk
6920Sstevel@tonic-gate #define	SWQE_TO_WQE(swqe)	(ibd_wqe_t *)&((swqe)->w_ibd_swqe)
6930Sstevel@tonic-gate #define	WQE_TO_SWQE(wqe)	(ibd_swqe_t *)wqe
6940Sstevel@tonic-gate 
6958917SVenkatakrishnan.Rajagopalan@Sun.COM /*
6968917SVenkatakrishnan.Rajagopalan@Sun.COM  * Receive WQE
6978917SVenkatakrishnan.Rajagopalan@Sun.COM  */
6980Sstevel@tonic-gate typedef struct ibd_rwqe_s {
6990Sstevel@tonic-gate 	ibd_wqe_t		w_ibd_rwqe;
7000Sstevel@tonic-gate 	struct ibd_state_s	*w_state;
7010Sstevel@tonic-gate 	ibt_recv_wr_t		w_rwr;
7020Sstevel@tonic-gate 	frtn_t			w_freemsg_cb;
70311534SKevin.Ge@Sun.COM 	boolean_t		w_freeing_wqe;
70411534SKevin.Ge@Sun.COM 	struct ibd_rc_chan_s	*w_chan;
7050Sstevel@tonic-gate } ibd_rwqe_t;
7060Sstevel@tonic-gate 
7070Sstevel@tonic-gate #define	rwqe_next		w_ibd_rwqe.w_next
7080Sstevel@tonic-gate #define	rwqe_copybuf		w_ibd_rwqe.w_copybuf
7090Sstevel@tonic-gate #define	rwqe_im_mblk		w_ibd_rwqe.im_mblk
7100Sstevel@tonic-gate #define	RWQE_TO_WQE(rwqe)	(ibd_wqe_t *)&((rwqe)->w_ibd_rwqe)
7110Sstevel@tonic-gate #define	WQE_TO_RWQE(wqe)	(ibd_rwqe_t *)wqe
7120Sstevel@tonic-gate 
7130Sstevel@tonic-gate typedef struct ibd_list_s {
71410852SBill.Taylor@Sun.COM 	kmutex_t		dl_mutex;
7150Sstevel@tonic-gate 	ibd_wqe_t		*dl_head;
7160Sstevel@tonic-gate 	union {
7170Sstevel@tonic-gate 		boolean_t	pending_sends;
7180Sstevel@tonic-gate 		uint32_t	bufs_outstanding;
7190Sstevel@tonic-gate 	} ustat;
7200Sstevel@tonic-gate 	uint32_t		dl_cnt;
7210Sstevel@tonic-gate } ibd_list_t;
7220Sstevel@tonic-gate 
7230Sstevel@tonic-gate #define	dl_pending_sends	ustat.pending_sends
7240Sstevel@tonic-gate #define	dl_bufs_outstanding	ustat.bufs_outstanding
7250Sstevel@tonic-gate 
7260Sstevel@tonic-gate /*
7278917SVenkatakrishnan.Rajagopalan@Sun.COM  * LSO buffers
7288917SVenkatakrishnan.Rajagopalan@Sun.COM  *
7298917SVenkatakrishnan.Rajagopalan@Sun.COM  * Under normal circumstances we should never need to use any buffer
7308917SVenkatakrishnan.Rajagopalan@Sun.COM  * that's larger than MTU.  Unfortunately, IB HCA has limitations
7318917SVenkatakrishnan.Rajagopalan@Sun.COM  * on the length of SGL that are much smaller than those for regular
7328917SVenkatakrishnan.Rajagopalan@Sun.COM  * ethernet NICs.  Since the network layer doesn't care to limit the
7338917SVenkatakrishnan.Rajagopalan@Sun.COM  * number of mblk fragments in any send mp chain, we end up having to
7348917SVenkatakrishnan.Rajagopalan@Sun.COM  * use these larger-than-MTU sized (larger than id_tx_buf_sz actually)
7358917SVenkatakrishnan.Rajagopalan@Sun.COM  * buffers occasionally.
7368917SVenkatakrishnan.Rajagopalan@Sun.COM  */
7378917SVenkatakrishnan.Rajagopalan@Sun.COM typedef struct ibd_lsobuf_s {
7388917SVenkatakrishnan.Rajagopalan@Sun.COM 	struct ibd_lsobuf_s *lb_next;
7398917SVenkatakrishnan.Rajagopalan@Sun.COM 	uint8_t		*lb_buf;
7408917SVenkatakrishnan.Rajagopalan@Sun.COM 	int		lb_isfree;
7418917SVenkatakrishnan.Rajagopalan@Sun.COM } ibd_lsobuf_t;
7428917SVenkatakrishnan.Rajagopalan@Sun.COM 
7438917SVenkatakrishnan.Rajagopalan@Sun.COM typedef struct ibd_lsobkt_s {
7448917SVenkatakrishnan.Rajagopalan@Sun.COM 	uint8_t		*bkt_mem;
7458917SVenkatakrishnan.Rajagopalan@Sun.COM 	ibd_lsobuf_t	*bkt_bufl;
7468917SVenkatakrishnan.Rajagopalan@Sun.COM 	ibd_lsobuf_t	*bkt_free_head;
7478917SVenkatakrishnan.Rajagopalan@Sun.COM 	ibt_mr_hdl_t	bkt_mr_hdl;
7488917SVenkatakrishnan.Rajagopalan@Sun.COM 	ibt_mr_desc_t	bkt_mr_desc;
7498917SVenkatakrishnan.Rajagopalan@Sun.COM 	uint_t		bkt_nelem;
7508917SVenkatakrishnan.Rajagopalan@Sun.COM 	uint_t		bkt_nfree;
7518917SVenkatakrishnan.Rajagopalan@Sun.COM } ibd_lsobkt_t;
7528917SVenkatakrishnan.Rajagopalan@Sun.COM 
75312163SRamaswamy.Tummala@Sun.COM #define	IBD_PORT_DRIVER		0x1
75412163SRamaswamy.Tummala@Sun.COM #define	IBD_PARTITION_OBJ	0x2
75512163SRamaswamy.Tummala@Sun.COM 
7568917SVenkatakrishnan.Rajagopalan@Sun.COM /*
75710852SBill.Taylor@Sun.COM  * Posting to a single software rx post queue is contentious,
75810852SBill.Taylor@Sun.COM  * so break it out to (multiple) an array of queues.
75910852SBill.Taylor@Sun.COM  *
76010852SBill.Taylor@Sun.COM  * Try to ensure rx_queue structs fall in different cache lines using a filler.
76110852SBill.Taylor@Sun.COM  * Note: the RX_QUEUE_CACHE_LINE needs to change if the struct changes.
76210852SBill.Taylor@Sun.COM  */
76310852SBill.Taylor@Sun.COM #define	RX_QUEUE_CACHE_LINE \
76411421SBill.Taylor@Sun.COM 	(64 - (sizeof (kmutex_t) + sizeof (ibd_wqe_t *) + sizeof (uint_t)))
76510852SBill.Taylor@Sun.COM typedef struct ibd_rx_queue_s {
76610852SBill.Taylor@Sun.COM 	kmutex_t		rx_post_lock;
76710852SBill.Taylor@Sun.COM 	ibd_wqe_t		*rx_head;
76811421SBill.Taylor@Sun.COM 	uint_t			rx_cnt;
76911421SBill.Taylor@Sun.COM 	uint8_t			rx_pad[RX_QUEUE_CACHE_LINE];
77010852SBill.Taylor@Sun.COM } ibd_rx_queue_t;
77110852SBill.Taylor@Sun.COM 
77210852SBill.Taylor@Sun.COM /*
7730Sstevel@tonic-gate  * This structure maintains information per port per HCA
7740Sstevel@tonic-gate  * (per network interface).
7750Sstevel@tonic-gate  */
7760Sstevel@tonic-gate typedef struct ibd_state_s {
77712163SRamaswamy.Tummala@Sun.COM 	uint_t			id_type;
7780Sstevel@tonic-gate 	dev_info_t		*id_dip;
7790Sstevel@tonic-gate 	ibt_clnt_hdl_t		id_ibt_hdl;
7800Sstevel@tonic-gate 	ibt_hca_hdl_t		id_hca_hdl;
7810Sstevel@tonic-gate 	ibt_pd_hdl_t		id_pd_hdl;
7825766Sgg161487 	kmem_cache_t		*id_req_kmc;
7830Sstevel@tonic-gate 
78410852SBill.Taylor@Sun.COM 	ibd_list_t		id_tx_rel_list;
78510852SBill.Taylor@Sun.COM 
78611421SBill.Taylor@Sun.COM 	uint32_t		id_running;
78711421SBill.Taylor@Sun.COM 
7880Sstevel@tonic-gate 	uint32_t		id_max_sqseg;
78910852SBill.Taylor@Sun.COM 	uint32_t		id_max_sqseg_hiwm;
7900Sstevel@tonic-gate 	ibd_list_t		id_tx_list;
7915766Sgg161487 	ddi_softintr_t		id_tx;
7920Sstevel@tonic-gate 	uint32_t		id_tx_sends;
7938917SVenkatakrishnan.Rajagopalan@Sun.COM 
79410852SBill.Taylor@Sun.COM 	kmutex_t		id_txpost_lock;
79510852SBill.Taylor@Sun.COM 	ibd_swqe_t		*id_tx_head;
79610852SBill.Taylor@Sun.COM 	ibd_swqe_t		*id_tx_tail;
79710852SBill.Taylor@Sun.COM 	int			id_tx_busy;
79810852SBill.Taylor@Sun.COM 
79910852SBill.Taylor@Sun.COM 	uint_t			id_tx_buf_sz;
8008917SVenkatakrishnan.Rajagopalan@Sun.COM 	uint8_t			*id_tx_bufs;
80110852SBill.Taylor@Sun.COM 	ibd_swqe_t		*id_tx_wqes;
8028917SVenkatakrishnan.Rajagopalan@Sun.COM 	ibt_mr_hdl_t		id_tx_mr_hdl;
8038917SVenkatakrishnan.Rajagopalan@Sun.COM 	ibt_mr_desc_t		id_tx_mr_desc;
8048917SVenkatakrishnan.Rajagopalan@Sun.COM 
8058917SVenkatakrishnan.Rajagopalan@Sun.COM 	kmutex_t		id_lso_lock;
8068917SVenkatakrishnan.Rajagopalan@Sun.COM 	ibd_lsobkt_t		*id_lso;
8078917SVenkatakrishnan.Rajagopalan@Sun.COM 
80810852SBill.Taylor@Sun.COM 	kmutex_t		id_scq_poll_lock;
80910852SBill.Taylor@Sun.COM 	int			id_scq_poll_busy;
8108917SVenkatakrishnan.Rajagopalan@Sun.COM 
8110Sstevel@tonic-gate 	ibt_cq_hdl_t		id_scq_hdl;
8120Sstevel@tonic-gate 	ibt_wc_t		*id_txwcs;
8135766Sgg161487 	uint32_t		id_txwcs_size;
8140Sstevel@tonic-gate 
81510852SBill.Taylor@Sun.COM 	int			id_rx_nqueues;
81610852SBill.Taylor@Sun.COM 	ibd_rx_queue_t		*id_rx_queues;
81711421SBill.Taylor@Sun.COM 	int			id_rx_post_queue_index;
81811421SBill.Taylor@Sun.COM 	uint32_t		id_rx_post_active;
8198917SVenkatakrishnan.Rajagopalan@Sun.COM 
82010852SBill.Taylor@Sun.COM 	ibd_rwqe_t		*id_rx_wqes;
82110852SBill.Taylor@Sun.COM 	uint8_t			*id_rx_bufs;
82210852SBill.Taylor@Sun.COM 	ibt_mr_hdl_t		id_rx_mr_hdl;
82310852SBill.Taylor@Sun.COM 	ibt_mr_desc_t		id_rx_mr_desc;
82410852SBill.Taylor@Sun.COM 	uint_t			id_rx_buf_sz;
82512163SRamaswamy.Tummala@Sun.COM 	/*
82612163SRamaswamy.Tummala@Sun.COM 	 * id_ud_num_rwqe
82712163SRamaswamy.Tummala@Sun.COM 	 * Number of "receive WQE" elements that will be allocated and used
82812163SRamaswamy.Tummala@Sun.COM 	 * by ibd. This parameter is limited by the maximum channel size of
82912163SRamaswamy.Tummala@Sun.COM 	 * the HCA. Each buffer in the receive wqe will be of MTU size.
83012163SRamaswamy.Tummala@Sun.COM 	 */
83112163SRamaswamy.Tummala@Sun.COM 	uint32_t		id_ud_num_rwqe;
8320Sstevel@tonic-gate 	ibd_list_t		id_rx_list;
8335766Sgg161487 	ddi_softintr_t		id_rx;
83410852SBill.Taylor@Sun.COM 	uint32_t		id_rx_bufs_outstanding_limit;
83510852SBill.Taylor@Sun.COM 	uint32_t		id_rx_allocb;
83610852SBill.Taylor@Sun.COM 	uint32_t		id_rx_allocb_failed;
83710852SBill.Taylor@Sun.COM 	ibd_list_t		id_rx_free_list;
83810852SBill.Taylor@Sun.COM 
83910852SBill.Taylor@Sun.COM 	kmutex_t		id_rcq_poll_lock;
84010852SBill.Taylor@Sun.COM 	int			id_rcq_poll_busy;
84110852SBill.Taylor@Sun.COM 	uint32_t		id_rxwcs_size;
8425766Sgg161487 	ibt_wc_t		*id_rxwcs;
84310852SBill.Taylor@Sun.COM 	ibt_cq_hdl_t		id_rcq_hdl;
8440Sstevel@tonic-gate 
8450Sstevel@tonic-gate 	ibt_channel_hdl_t	id_chnl_hdl;
8460Sstevel@tonic-gate 	ib_pkey_t		id_pkey;
8470Sstevel@tonic-gate 	uint16_t		id_pkix;
8480Sstevel@tonic-gate 	uint8_t			id_port;
8490Sstevel@tonic-gate 	ibt_mcg_info_t		*id_mcinfo;
8500Sstevel@tonic-gate 
8515766Sgg161487 	mac_handle_t		id_mh;
8528917SVenkatakrishnan.Rajagopalan@Sun.COM 	mac_resource_handle_t	id_rh;
8530Sstevel@tonic-gate 	ib_gid_t		id_sgid;
8540Sstevel@tonic-gate 	ib_qpn_t		id_qpnum;
8550Sstevel@tonic-gate 	ipoib_mac_t		id_macaddr;
8560Sstevel@tonic-gate 	ib_gid_t		id_mgid;
8570Sstevel@tonic-gate 	ipoib_mac_t		id_bcaddr;
8580Sstevel@tonic-gate 
8590Sstevel@tonic-gate 	int			id_mtu;
8600Sstevel@tonic-gate 	uchar_t			id_scope;
8610Sstevel@tonic-gate 
8620Sstevel@tonic-gate 	kmutex_t		id_acache_req_lock;
8630Sstevel@tonic-gate 	kcondvar_t		id_acache_req_cv;
8648917SVenkatakrishnan.Rajagopalan@Sun.COM 	struct list		id_req_list;
8650Sstevel@tonic-gate 	kt_did_t		id_async_thrid;
8660Sstevel@tonic-gate 
8670Sstevel@tonic-gate 	kmutex_t		id_ac_mutex;
86810852SBill.Taylor@Sun.COM 	ibd_ace_t		*id_ac_hot_ace;
8698917SVenkatakrishnan.Rajagopalan@Sun.COM 	struct list		id_ah_active;
8700Sstevel@tonic-gate 	struct list		id_ah_free;
8710Sstevel@tonic-gate 	ipoib_mac_t		id_ah_addr;
8720Sstevel@tonic-gate 	ibd_req_t		id_ah_req;
8730Sstevel@tonic-gate 	char			id_ah_op;
8748917SVenkatakrishnan.Rajagopalan@Sun.COM 	uint64_t		id_ah_error;
8750Sstevel@tonic-gate 	ibd_ace_t		*id_ac_list;
8768917SVenkatakrishnan.Rajagopalan@Sun.COM 	mod_hash_t		*id_ah_active_hash;
8770Sstevel@tonic-gate 
8780Sstevel@tonic-gate 	kmutex_t		id_mc_mutex;
8790Sstevel@tonic-gate 	struct list		id_mc_full;
8800Sstevel@tonic-gate 	struct list		id_mc_non;
8810Sstevel@tonic-gate 
8820Sstevel@tonic-gate 	kmutex_t		id_trap_lock;
8830Sstevel@tonic-gate 	kcondvar_t		id_trap_cv;
8840Sstevel@tonic-gate 	boolean_t		id_trap_stop;
8850Sstevel@tonic-gate 	uint32_t		id_trap_inprog;
8860Sstevel@tonic-gate 
8875766Sgg161487 	char			id_prom_op;
8880Sstevel@tonic-gate 
8890Sstevel@tonic-gate 	kmutex_t		id_sched_lock;
8908917SVenkatakrishnan.Rajagopalan@Sun.COM 	int			id_sched_needed;
89110852SBill.Taylor@Sun.COM 	int			id_sched_cnt;
89210852SBill.Taylor@Sun.COM 	int			id_sched_lso_cnt;
8930Sstevel@tonic-gate 
8940Sstevel@tonic-gate 	kmutex_t		id_link_mutex;
8955766Sgg161487 	link_state_t		id_link_state;
8960Sstevel@tonic-gate 	uint64_t		id_link_speed;
8970Sstevel@tonic-gate 
8980Sstevel@tonic-gate 	uint64_t		id_num_intrs;
8990Sstevel@tonic-gate 	uint64_t		id_tx_short;
90012163SRamaswamy.Tummala@Sun.COM 	/*
90112163SRamaswamy.Tummala@Sun.COM 	 * id_ud_num_swqe
90212163SRamaswamy.Tummala@Sun.COM 	 * Number of "send WQE" elements that will be allocated and used by
90312163SRamaswamy.Tummala@Sun.COM 	 * ibd. When tuning this parameter, the size of pre-allocated, pre-
90412163SRamaswamy.Tummala@Sun.COM 	 * mapped copy buffer in each of these send wqes must be taken into
90512163SRamaswamy.Tummala@Sun.COM 	 * account. This copy buffer size is determined by the value of
90612163SRamaswamy.Tummala@Sun.COM 	 * IBD_TX_BUF_SZ (this is currently set to the same value of
90712163SRamaswamy.Tummala@Sun.COM 	 * ibd_tx_copy_thresh, but may be changed independently if needed).
90812163SRamaswamy.Tummala@Sun.COM 	 */
90912163SRamaswamy.Tummala@Sun.COM 	uint32_t		id_ud_num_swqe;
9105766Sgg161487 
9115766Sgg161487 	uint64_t		id_xmt_bytes;
9128917SVenkatakrishnan.Rajagopalan@Sun.COM 	uint64_t		id_rcv_bytes;
9135766Sgg161487 	uint64_t		id_multi_xmt;
9145766Sgg161487 	uint64_t		id_brd_xmt;
9155766Sgg161487 	uint64_t		id_multi_rcv;
9165766Sgg161487 	uint64_t		id_brd_rcv;
9175766Sgg161487 	uint64_t		id_xmt_pkt;
9185766Sgg161487 	uint64_t		id_rcv_pkt;
9198917SVenkatakrishnan.Rajagopalan@Sun.COM 
9208917SVenkatakrishnan.Rajagopalan@Sun.COM 	uint32_t		id_hwcksum_capab;
9218917SVenkatakrishnan.Rajagopalan@Sun.COM 	boolean_t		id_lso_policy;
9228917SVenkatakrishnan.Rajagopalan@Sun.COM 	boolean_t		id_lso_capable;
9238917SVenkatakrishnan.Rajagopalan@Sun.COM 	uint_t			id_lso_maxlen;
9248917SVenkatakrishnan.Rajagopalan@Sun.COM 	int			id_hca_res_lkey_capab;
9258917SVenkatakrishnan.Rajagopalan@Sun.COM 	ibt_lkey_t		id_res_lkey;
9269985SVenkatakrishnan.Rajagopalan@Sun.COM 
92710300SVenki.Rajagopalan@Sun.COM 	boolean_t		id_bgroup_created;
92810300SVenki.Rajagopalan@Sun.COM 	kmutex_t		id_macst_lock;
92910300SVenki.Rajagopalan@Sun.COM 	kcondvar_t		id_macst_cv;
9309985SVenkatakrishnan.Rajagopalan@Sun.COM 	uint32_t		id_mac_state;
93111534SKevin.Ge@Sun.COM 
93211534SKevin.Ge@Sun.COM 	/* For Reliable Connected Mode */
93311534SKevin.Ge@Sun.COM 	boolean_t		id_enable_rc;
93411534SKevin.Ge@Sun.COM 	boolean_t		rc_enable_srq;
93511534SKevin.Ge@Sun.COM 
93611534SKevin.Ge@Sun.COM 	int			rc_mtu;
93711534SKevin.Ge@Sun.COM 	uint32_t		rc_tx_max_sqseg;
93811768SKevin.Ge@Sun.COM 	/*
93911768SKevin.Ge@Sun.COM 	 * In IPoIB over Reliable Connected mode, its mac address is added
94011768SKevin.Ge@Sun.COM 	 * an "IBD_MAC_ADDR_RC" prefix. But for loopback filter in function
94111768SKevin.Ge@Sun.COM 	 * ibd_process_rx(), the input mac address should not include the
94211768SKevin.Ge@Sun.COM 	 * "IBD_MAC_ADDR_RC" prefix.
94311768SKevin.Ge@Sun.COM 	 *
94411768SKevin.Ge@Sun.COM 	 * So, we introduce the rc_macaddr_loopback for the loopback filter in
94511768SKevin.Ge@Sun.COM 	 * IPoIB over Reliable Connected mode.
94611768SKevin.Ge@Sun.COM 	 *
94711768SKevin.Ge@Sun.COM 	 * rc_macaddr_loopback = id_macaddr excludes "IBD_MAC_ADDR_RC" prefix.
94811768SKevin.Ge@Sun.COM 	 */
94911768SKevin.Ge@Sun.COM 	ipoib_mac_t		rc_macaddr_loopback;
95011534SKevin.Ge@Sun.COM 
95111534SKevin.Ge@Sun.COM 	ibt_srv_hdl_t		rc_listen_hdl;
95211534SKevin.Ge@Sun.COM 	ibt_sbind_hdl_t		rc_listen_bind;
95311534SKevin.Ge@Sun.COM 	ibt_srv_hdl_t		rc_listen_hdl_OFED_interop;
95411534SKevin.Ge@Sun.COM 	ibt_sbind_hdl_t		rc_listen_bind_OFED_interop;
95511534SKevin.Ge@Sun.COM 
95611534SKevin.Ge@Sun.COM 	ibd_rc_chan_list_t	rc_pass_chan_list;
95711534SKevin.Ge@Sun.COM 	/* obsolete active channel list */
95811534SKevin.Ge@Sun.COM 	ibd_rc_chan_list_t	rc_obs_act_chan_list;
95911534SKevin.Ge@Sun.COM 
96011534SKevin.Ge@Sun.COM 	kmutex_t		rc_ace_recycle_lock;
96111534SKevin.Ge@Sun.COM 	ibd_ace_t		*rc_ace_recycle;
96211534SKevin.Ge@Sun.COM 
96311534SKevin.Ge@Sun.COM 	/* Send */
96411534SKevin.Ge@Sun.COM 	/*
96511534SKevin.Ge@Sun.COM 	 * This mutex protects rc_tx_largebuf_free_head, rc_tx_largebuf_nfree
96611534SKevin.Ge@Sun.COM 	 * and ibd_rc_tx_largebuf_t->lb_next
96711534SKevin.Ge@Sun.COM 	 */
96811534SKevin.Ge@Sun.COM 	kmutex_t		rc_tx_large_bufs_lock;
96911534SKevin.Ge@Sun.COM 	ibd_rc_tx_largebuf_t	*rc_tx_largebuf_free_head;
97011534SKevin.Ge@Sun.COM 	uint_t			rc_tx_largebuf_nfree;
97111534SKevin.Ge@Sun.COM 	/* The chunk of whole Tx large buffers */
97211534SKevin.Ge@Sun.COM 	uint8_t			*rc_tx_mr_bufs;
97311534SKevin.Ge@Sun.COM 	ibt_mr_hdl_t		rc_tx_mr_hdl;
97411534SKevin.Ge@Sun.COM 	ibt_mr_desc_t		rc_tx_mr_desc;
97511534SKevin.Ge@Sun.COM 	ibd_rc_tx_largebuf_t	*rc_tx_largebuf_desc_base;	/* base addr */
97611534SKevin.Ge@Sun.COM 
97711534SKevin.Ge@Sun.COM 	boolean_t		rc_enable_iov_map;
97811534SKevin.Ge@Sun.COM 	uint_t			rc_max_sqseg_hiwm;
97911534SKevin.Ge@Sun.COM 
98011534SKevin.Ge@Sun.COM 	/* For SRQ */
98111534SKevin.Ge@Sun.COM 	uint32_t 		rc_srq_size;
98211534SKevin.Ge@Sun.COM 	ibt_srq_hdl_t		rc_srq_hdl;
98311534SKevin.Ge@Sun.COM 	ibd_list_t		rc_srq_rwqe_list;
98411534SKevin.Ge@Sun.COM 	ibd_list_t		rc_srq_free_list;
98511534SKevin.Ge@Sun.COM 	ibd_rwqe_t		*rc_srq_rwqes;
98611534SKevin.Ge@Sun.COM 	uint8_t			*rc_srq_rx_bufs;
98711534SKevin.Ge@Sun.COM 	ibt_mr_hdl_t		rc_srq_rx_mr_hdl;
98811534SKevin.Ge@Sun.COM 	ibt_mr_desc_t		rc_srq_rx_mr_desc;
98911534SKevin.Ge@Sun.COM 
99011534SKevin.Ge@Sun.COM 	/* For chained receive */
99111534SKevin.Ge@Sun.COM 	kmutex_t		rc_rx_lock;
99211534SKevin.Ge@Sun.COM 	mblk_t			*rc_rx_mp;
99311534SKevin.Ge@Sun.COM 	mblk_t			*rc_rx_mp_tail;
99411534SKevin.Ge@Sun.COM 	uint32_t		rc_rx_mp_len;
99511534SKevin.Ge@Sun.COM 
996*13030SKevin.Ge@Sun.COM 	uint32_t		rc_num_tx_chan;
997*13030SKevin.Ge@Sun.COM 	uint32_t		rc_num_rx_chan;
998*13030SKevin.Ge@Sun.COM 
999*13030SKevin.Ge@Sun.COM 	/* Protect rc_timeout_start and rc_timeout */
1000*13030SKevin.Ge@Sun.COM 	kmutex_t		rc_timeout_lock;
1001*13030SKevin.Ge@Sun.COM 	boolean_t		rc_timeout_start;
1002*13030SKevin.Ge@Sun.COM 	timeout_id_t		rc_timeout;
1003*13030SKevin.Ge@Sun.COM 
100411534SKevin.Ge@Sun.COM 	/* Counters for RC mode */
100511534SKevin.Ge@Sun.COM 	/* RX */
100611534SKevin.Ge@Sun.COM 	/*
100711534SKevin.Ge@Sun.COM 	 * # of Received packets. These packets are directly transferred to GLD
100811534SKevin.Ge@Sun.COM 	 * without copy it
100911534SKevin.Ge@Sun.COM 	 */
101011534SKevin.Ge@Sun.COM 	uint64_t		rc_rcv_trans_byte;
101111534SKevin.Ge@Sun.COM 	uint64_t		rc_rcv_trans_pkt;
101211534SKevin.Ge@Sun.COM 	/*
101311534SKevin.Ge@Sun.COM 	 * # of Received packets. We will allocate new buffers for these packet,
101411534SKevin.Ge@Sun.COM 	 * copy their content into new buffers, then transfer to GLD
101511534SKevin.Ge@Sun.COM 	 */
101611534SKevin.Ge@Sun.COM 	uint64_t		rc_rcv_copy_byte;
101711534SKevin.Ge@Sun.COM 	uint64_t		rc_rcv_copy_pkt;
101811534SKevin.Ge@Sun.COM 	uint64_t		rc_rcv_alloc_fail;
101911534SKevin.Ge@Sun.COM 
102011534SKevin.Ge@Sun.COM #ifdef DEBUG
102111534SKevin.Ge@Sun.COM 	uint64_t		rc_rwqe_short;	/* short rwqe */
102211534SKevin.Ge@Sun.COM #endif
102311534SKevin.Ge@Sun.COM 
102411534SKevin.Ge@Sun.COM 	/* wc->wc_status != IBT_WC_SUCCESS */
102511534SKevin.Ge@Sun.COM 	uint64_t		rc_rcq_err;
102611534SKevin.Ge@Sun.COM 
102711534SKevin.Ge@Sun.COM 	/* Tx */
102811534SKevin.Ge@Sun.COM 	uint64_t		rc_xmt_bytes;
102911534SKevin.Ge@Sun.COM 
103011534SKevin.Ge@Sun.COM 	/* pkt size <= ibd_rc_tx_copy_thresh */
103111534SKevin.Ge@Sun.COM 	uint64_t		rc_xmt_small_pkt;
103211534SKevin.Ge@Sun.COM 	uint64_t		rc_xmt_fragmented_pkt;
103311534SKevin.Ge@Sun.COM 	/* fail in ibt_map_mem_iov() */
103411534SKevin.Ge@Sun.COM 	uint64_t		rc_xmt_map_fail_pkt;
103511534SKevin.Ge@Sun.COM 	/* succ in ibt_map_mem_iov() */
103611534SKevin.Ge@Sun.COM 	uint64_t		rc_xmt_map_succ_pkt;
103711534SKevin.Ge@Sun.COM 
103811534SKevin.Ge@Sun.COM 	uint64_t		rc_ace_not_found;
103911534SKevin.Ge@Sun.COM 
104011534SKevin.Ge@Sun.COM 	uint64_t		rc_xmt_drop_too_long_pkt;
104111534SKevin.Ge@Sun.COM 	uint64_t		rc_xmt_icmp_too_long_pkt;
104211534SKevin.Ge@Sun.COM 	uint64_t		rc_xmt_reenter_too_long_pkt;
104311534SKevin.Ge@Sun.COM 
104411534SKevin.Ge@Sun.COM 	/* short swqe in ibd_send() */
104511534SKevin.Ge@Sun.COM 	uint64_t		rc_swqe_short;
104611534SKevin.Ge@Sun.COM 	/* call mac_tx_update when there is enough swqe */
104711534SKevin.Ge@Sun.COM 	uint64_t		rc_swqe_mac_update;
104811534SKevin.Ge@Sun.COM 	/* short tx large copy buf in ibd_send() */
104911534SKevin.Ge@Sun.COM 	uint64_t		rc_xmt_buf_short;
105011534SKevin.Ge@Sun.COM 	/* call mac_tx_update when there is enough Tx copy buf */
105111534SKevin.Ge@Sun.COM 	uint64_t		rc_xmt_buf_mac_update;
105211534SKevin.Ge@Sun.COM 
105311534SKevin.Ge@Sun.COM 	/* No swqe even after call swqe recycle function */
105411534SKevin.Ge@Sun.COM 	uint64_t		rc_scq_no_swqe;
105511534SKevin.Ge@Sun.COM 	/* No large Tx buf even after call swqe recycle function */
105611534SKevin.Ge@Sun.COM 	uint64_t		rc_scq_no_largebuf;
105711534SKevin.Ge@Sun.COM 
105811534SKevin.Ge@Sun.COM 	/* Connection setup and close */
105911534SKevin.Ge@Sun.COM 	uint64_t		rc_conn_succ;	/* time of succ connect */
106011534SKevin.Ge@Sun.COM 	uint64_t		rc_conn_fail;	/* time of fail connect */
106111534SKevin.Ge@Sun.COM 	/* ace->ac_chan == NULL for unicast packet */
106211534SKevin.Ge@Sun.COM 	uint64_t		rc_null_conn;
106311534SKevin.Ge@Sun.COM 	/* not in active established state */
106411534SKevin.Ge@Sun.COM 	uint64_t		rc_no_estab_conn;
106511534SKevin.Ge@Sun.COM 
106611534SKevin.Ge@Sun.COM 	uint64_t		rc_act_close;	/* call ibd_rc_act_close() */
106711534SKevin.Ge@Sun.COM 	uint64_t		rc_pas_close;	/* call ibd_rc_pas_close() */
106811534SKevin.Ge@Sun.COM 	uint64_t		rc_delay_ace_recycle;
106911534SKevin.Ge@Sun.COM 	uint64_t		rc_act_close_simultaneous;
1070*13030SKevin.Ge@Sun.COM 	/* Fail to close a channel because someone else is still using it */
1071*13030SKevin.Ge@Sun.COM 	uint64_t		rc_act_close_not_clean;
1072*13030SKevin.Ge@Sun.COM 	/* RCQ is being invoked when closing RC channel */
1073*13030SKevin.Ge@Sun.COM 	uint64_t		rc_pas_close_rcq_invoking;
107411534SKevin.Ge@Sun.COM 
107511534SKevin.Ge@Sun.COM 	/* the counter of reset RC channel */
107611534SKevin.Ge@Sun.COM 	uint64_t		rc_reset_cnt;
107711534SKevin.Ge@Sun.COM 
1078*13030SKevin.Ge@Sun.COM 	uint64_t		rc_timeout_act;
1079*13030SKevin.Ge@Sun.COM 	uint64_t		rc_timeout_pas;
1080*13030SKevin.Ge@Sun.COM 
1081*13030SKevin.Ge@Sun.COM 	/*
1082*13030SKevin.Ge@Sun.COM 	 * Fail to stop this port because this port is connecting to a remote
1083*13030SKevin.Ge@Sun.COM 	 * port
1084*13030SKevin.Ge@Sun.COM 	 */
1085*13030SKevin.Ge@Sun.COM 	uint64_t		rc_stop_connect;
1086*13030SKevin.Ge@Sun.COM 
108711534SKevin.Ge@Sun.COM #ifdef DEBUG
108811534SKevin.Ge@Sun.COM 	kstat_t 		*rc_ksp;
108911534SKevin.Ge@Sun.COM #endif
109012163SRamaswamy.Tummala@Sun.COM 	ib_guid_t		id_hca_guid;
109112163SRamaswamy.Tummala@Sun.COM 	ib_guid_t		id_port_guid;
109212163SRamaswamy.Tummala@Sun.COM 	datalink_id_t		id_dlinkid;
109312163SRamaswamy.Tummala@Sun.COM 	datalink_id_t		id_plinkid;
109412163SRamaswamy.Tummala@Sun.COM 	int			id_port_inst;
109512163SRamaswamy.Tummala@Sun.COM 	struct ibd_state_s	*id_next;
109612163SRamaswamy.Tummala@Sun.COM 	boolean_t		id_force_create;
109712163SRamaswamy.Tummala@Sun.COM 	boolean_t		id_bgroup_present;
109812163SRamaswamy.Tummala@Sun.COM 	uint_t			id_hca_max_chan_sz;
109912163SRamaswamy.Tummala@Sun.COM 
110012163SRamaswamy.Tummala@Sun.COM 	/*
110112163SRamaswamy.Tummala@Sun.COM 	 * UD Mode Tunables
110212163SRamaswamy.Tummala@Sun.COM 	 *
110312163SRamaswamy.Tummala@Sun.COM 	 * id_ud_tx_copy_thresh
110412163SRamaswamy.Tummala@Sun.COM 	 * This sets the threshold at which ibd will attempt to do a bcopy
110512163SRamaswamy.Tummala@Sun.COM 	 * of the outgoing data into a pre-mapped buffer. IPoIB driver's
110612163SRamaswamy.Tummala@Sun.COM 	 * send behavior is restricted by various parameters, so setting of
110712163SRamaswamy.Tummala@Sun.COM 	 * this value must be made after careful considerations only. For
110812163SRamaswamy.Tummala@Sun.COM 	 * instance, IB HCAs currently impose a relatively small limit
110912163SRamaswamy.Tummala@Sun.COM 	 * (when compared to ethernet NICs) on the length of the SGL for
111012163SRamaswamy.Tummala@Sun.COM 	 * transmit. On the other hand, the ip stack could send down mp
111112163SRamaswamy.Tummala@Sun.COM 	 * chains that are quite long when LSO is enabled.
111212163SRamaswamy.Tummala@Sun.COM 	 *
111312163SRamaswamy.Tummala@Sun.COM 	 * id_num_lso_bufs
111412163SRamaswamy.Tummala@Sun.COM 	 * Number of "larger-than-MTU" copy buffers to use for cases when the
111512163SRamaswamy.Tummala@Sun.COM 	 * outgoing mblk chain is too fragmented to be used with
111612163SRamaswamy.Tummala@Sun.COM 	 * ibt_map_mem_iov() and too large to be used with regular MTU-sized
111712163SRamaswamy.Tummala@Sun.COM 	 * copy buffers. It is not recommended to tune this variable without
111812163SRamaswamy.Tummala@Sun.COM 	 * understanding the application environment and/or memory resources.
111912163SRamaswamy.Tummala@Sun.COM 	 * The size of each of these lso buffers is determined by the value of
112012163SRamaswamy.Tummala@Sun.COM 	 * IBD_LSO_BUFSZ.
112112163SRamaswamy.Tummala@Sun.COM 	 *
112212163SRamaswamy.Tummala@Sun.COM 	 * id_num_ah
112312163SRamaswamy.Tummala@Sun.COM 	 * Number of AH cache entries to allocate
112412163SRamaswamy.Tummala@Sun.COM 	 *
112512163SRamaswamy.Tummala@Sun.COM 	 * id_hash_size
112612163SRamaswamy.Tummala@Sun.COM 	 * Hash table size for the active AH list
112712163SRamaswamy.Tummala@Sun.COM 	 *
112812163SRamaswamy.Tummala@Sun.COM 	 */
112912163SRamaswamy.Tummala@Sun.COM 	uint_t id_ud_tx_copy_thresh;
113012163SRamaswamy.Tummala@Sun.COM 	uint_t id_num_lso_bufs;
113112163SRamaswamy.Tummala@Sun.COM 	uint_t id_num_ah;
113212163SRamaswamy.Tummala@Sun.COM 	uint_t id_hash_size;
113312163SRamaswamy.Tummala@Sun.COM 
113412163SRamaswamy.Tummala@Sun.COM 	boolean_t id_create_broadcast_group;
113512163SRamaswamy.Tummala@Sun.COM 
113612163SRamaswamy.Tummala@Sun.COM 	boolean_t id_allow_coalesce_comp_tuning;
113712163SRamaswamy.Tummala@Sun.COM 	uint_t id_ud_rx_comp_count;
113812163SRamaswamy.Tummala@Sun.COM 	uint_t id_ud_rx_comp_usec;
113912163SRamaswamy.Tummala@Sun.COM 	uint_t id_ud_tx_comp_count;
114012163SRamaswamy.Tummala@Sun.COM 	uint_t id_ud_tx_comp_usec;
114112163SRamaswamy.Tummala@Sun.COM 
114212163SRamaswamy.Tummala@Sun.COM 	/* RC Mode Tunables */
114312163SRamaswamy.Tummala@Sun.COM 
114412163SRamaswamy.Tummala@Sun.COM 	uint_t id_rc_rx_comp_count;
114512163SRamaswamy.Tummala@Sun.COM 	uint_t id_rc_rx_comp_usec;
114612163SRamaswamy.Tummala@Sun.COM 	uint_t id_rc_tx_comp_count;
114712163SRamaswamy.Tummala@Sun.COM 	uint_t id_rc_tx_comp_usec;
114812163SRamaswamy.Tummala@Sun.COM 	/*
114912163SRamaswamy.Tummala@Sun.COM 	 * id_rc_tx_copy_thresh
115012163SRamaswamy.Tummala@Sun.COM 	 * This sets the threshold at which ibd will attempt to do a bcopy
115112163SRamaswamy.Tummala@Sun.COM 	 * of the outgoing data into a pre-mapped buffer.
115212163SRamaswamy.Tummala@Sun.COM 	 *
115312163SRamaswamy.Tummala@Sun.COM 	 * id_rc_rx_copy_thresh
115412163SRamaswamy.Tummala@Sun.COM 	 * If (the size of incoming buffer <= id_rc_rx_copy_thresh), ibd
115512163SRamaswamy.Tummala@Sun.COM 	 * will attempt to allocate a buffer and do a bcopy of the incoming
115612163SRamaswamy.Tummala@Sun.COM 	 * data into the allocated buffer.
115712163SRamaswamy.Tummala@Sun.COM 	 *
115812163SRamaswamy.Tummala@Sun.COM 	 * id_rc_rx_rwqe_thresh
115912163SRamaswamy.Tummala@Sun.COM 	 * If (the number of available rwqe < ibd_rc_rx_rwqe_thresh), ibd
116012163SRamaswamy.Tummala@Sun.COM 	 * will attempt to allocate a buffer and do a bcopy of the incoming
116112163SRamaswamy.Tummala@Sun.COM 	 * data into the allocated buffer.
116212163SRamaswamy.Tummala@Sun.COM 	 *
116312163SRamaswamy.Tummala@Sun.COM 	 * id_rc_num_swqe
116412163SRamaswamy.Tummala@Sun.COM 	 * 1) Send CQ size = ibd_rc_num_swqe
116512163SRamaswamy.Tummala@Sun.COM 	 * 2) The send queue size = ibd_rc_num_swqe -1
116612163SRamaswamy.Tummala@Sun.COM 	 * 3) Number of pre-allocated Tx buffers for ibt_post_send() =
116712163SRamaswamy.Tummala@Sun.COM 	 * ibd_rc_num_swqe - 1.
116812163SRamaswamy.Tummala@Sun.COM 	 *
116912163SRamaswamy.Tummala@Sun.COM 	 * id_rc_num_rwqe
1170*13030SKevin.Ge@Sun.COM 	 * 1) For non-SRQ, we pre-post id_rc_num_rwqe number of WRs
117112163SRamaswamy.Tummala@Sun.COM 	 * via ibt_post_receive() for receive queue of each RC channel.
1172*13030SKevin.Ge@Sun.COM 	 * 2) For SRQ and non-SRQ, receive CQ size = id_rc_num_rwqe
117312163SRamaswamy.Tummala@Sun.COM 	 *
117412163SRamaswamy.Tummala@Sun.COM 	 * For SRQ
1175*13030SKevin.Ge@Sun.COM 	 * If using SRQ, we allocate id_rc_num_srq number of buffers (the
117612163SRamaswamy.Tummala@Sun.COM 	 * size of each buffer is equal to RC mtu). And post them by
117712163SRamaswamy.Tummala@Sun.COM 	 * ibt_post_srq().
117812163SRamaswamy.Tummala@Sun.COM 	 *
117912163SRamaswamy.Tummala@Sun.COM 	 * id_rc_num_srq
1180*13030SKevin.Ge@Sun.COM 	 * id_rc_num_srq should not be larger than id_rc_num_rwqe,
118112163SRamaswamy.Tummala@Sun.COM 	 * otherwise it will cause a bug with the following warnings:
118212163SRamaswamy.Tummala@Sun.COM 	 * NOTICE: hermon0: Device Error: EQE cq overrun or protection error
118312163SRamaswamy.Tummala@Sun.COM 	 * NOTICE: hermon0: Device Error: EQE local work queue catastrophic
118412163SRamaswamy.Tummala@Sun.COM 	 * error
118512163SRamaswamy.Tummala@Sun.COM 	 * NOTICE: ibd0: HCA GUID 0003ba0001008984 port 1 PKEY ffff
118612163SRamaswamy.Tummala@Sun.COM 	 * catastrophic channel error
118712163SRamaswamy.Tummala@Sun.COM 	 * NOTICE: ibd0: HCA GUID 0003ba0001008984 port 1 PKEY ffff
118812163SRamaswamy.Tummala@Sun.COM 	 * completion queue error
118912163SRamaswamy.Tummala@Sun.COM 	 */
119012163SRamaswamy.Tummala@Sun.COM 	uint_t id_rc_tx_copy_thresh;
119112163SRamaswamy.Tummala@Sun.COM 	uint_t id_rc_rx_copy_thresh;
119212163SRamaswamy.Tummala@Sun.COM 	uint_t id_rc_rx_rwqe_thresh;
119312163SRamaswamy.Tummala@Sun.COM 	uint_t id_rc_num_swqe;
119412163SRamaswamy.Tummala@Sun.COM 	uint_t id_rc_num_rwqe;
119512163SRamaswamy.Tummala@Sun.COM 	uint_t id_rc_num_srq;
11960Sstevel@tonic-gate } ibd_state_t;
11970Sstevel@tonic-gate 
119811642SKevin.Ge@Sun.COM /*
119911642SKevin.Ge@Sun.COM  * Structures to track global IBTF data, data that is shared
120011642SKevin.Ge@Sun.COM  * among the IBD device instances.  This includes the one ibt_hdl
120111642SKevin.Ge@Sun.COM  * and the list of service registrations.
120211642SKevin.Ge@Sun.COM  */
120311642SKevin.Ge@Sun.COM typedef struct ibd_service_s {
120411642SKevin.Ge@Sun.COM 	struct ibd_service_s	*is_link;
120511642SKevin.Ge@Sun.COM 	ibt_srv_hdl_t		is_srv_hdl;
120611642SKevin.Ge@Sun.COM 	ib_svc_id_t		is_sid;
120711642SKevin.Ge@Sun.COM 	uint_t			is_ref_cnt;
120811642SKevin.Ge@Sun.COM } ibd_service_t;
120911642SKevin.Ge@Sun.COM 
121011642SKevin.Ge@Sun.COM typedef struct ibd_global_state_s {
121111642SKevin.Ge@Sun.COM 	kmutex_t	ig_mutex;
121211642SKevin.Ge@Sun.COM 	ibt_clnt_hdl_t	ig_ibt_hdl;
121311642SKevin.Ge@Sun.COM 	uint_t		ig_ibt_hdl_ref_cnt;
121411642SKevin.Ge@Sun.COM 	ibd_service_t	*ig_service_list;
121511642SKevin.Ge@Sun.COM } ibd_global_state_t;
121611642SKevin.Ge@Sun.COM 
121711534SKevin.Ge@Sun.COM typedef struct ibd_rc_msg_hello_s {
121811534SKevin.Ge@Sun.COM 	uint32_t reserved_qpn;
121911534SKevin.Ge@Sun.COM 	uint32_t rx_mtu;
122011534SKevin.Ge@Sun.COM } ibd_rc_msg_hello_t;
122111534SKevin.Ge@Sun.COM 
122211534SKevin.Ge@Sun.COM typedef struct ibd_rc_chan_s {
122311534SKevin.Ge@Sun.COM 	struct ibd_rc_chan_s	*next;
122411534SKevin.Ge@Sun.COM 	/* channel hdl that we'll be using for Reliable Connected Mode */
122511534SKevin.Ge@Sun.COM 	ibt_channel_hdl_t	chan_hdl;
122611534SKevin.Ge@Sun.COM 	struct ibd_state_s	*state;
122711534SKevin.Ge@Sun.COM 	ibd_ace_t		*ace;
122811534SKevin.Ge@Sun.COM 	ibd_rc_chan_state_t	chan_state;
122911534SKevin.Ge@Sun.COM 
123011534SKevin.Ge@Sun.COM 	ibd_list_t		tx_wqe_list;	/* free wqe list */
123111534SKevin.Ge@Sun.COM 	ibd_list_t		tx_rel_list;	/* for swqe recycle */
123211534SKevin.Ge@Sun.COM 
123311534SKevin.Ge@Sun.COM 	ibd_swqe_t		*tx_wqes;
123411534SKevin.Ge@Sun.COM 
123511534SKevin.Ge@Sun.COM 	/* start address of Tx Buffers */
123611534SKevin.Ge@Sun.COM 	uint8_t			*tx_mr_bufs;
123711534SKevin.Ge@Sun.COM 	ibt_mr_hdl_t		tx_mr_hdl;
123811534SKevin.Ge@Sun.COM 	ibt_mr_desc_t		tx_mr_desc;
123911534SKevin.Ge@Sun.COM 
124011534SKevin.Ge@Sun.COM 	ibt_cq_hdl_t		scq_hdl;	/* Tx completion queue */
124111534SKevin.Ge@Sun.COM 	ibt_wc_t		tx_wc[IBD_RC_MAX_CQ_WC];
124211534SKevin.Ge@Sun.COM 	ddi_softintr_t		scq_softintr;
124311534SKevin.Ge@Sun.COM 
124411534SKevin.Ge@Sun.COM 	/* For chained send */
124511534SKevin.Ge@Sun.COM 	kmutex_t		tx_post_lock;
124611534SKevin.Ge@Sun.COM 	ibd_swqe_t		*tx_head;
124711534SKevin.Ge@Sun.COM 	ibd_swqe_t		*tx_tail;
124811534SKevin.Ge@Sun.COM 	int			tx_busy;
124911534SKevin.Ge@Sun.COM 
125011534SKevin.Ge@Sun.COM 	/* For tx buffer recycle */
125111534SKevin.Ge@Sun.COM 	kmutex_t		tx_poll_lock;
125211534SKevin.Ge@Sun.COM 	int			tx_poll_busy;
125311534SKevin.Ge@Sun.COM 
125411534SKevin.Ge@Sun.COM 	/* Rx */
125511534SKevin.Ge@Sun.COM 	ibd_list_t		rx_wqe_list;	/* used by ibt_post_recv */
125611534SKevin.Ge@Sun.COM 	ibd_list_t		rx_free_list;	/* free rwqe list */
125711534SKevin.Ge@Sun.COM 
125811534SKevin.Ge@Sun.COM 	ibt_cq_hdl_t		rcq_hdl;	/* Rx completion queue */
125911534SKevin.Ge@Sun.COM 	ibt_wc_t		rx_wc[IBD_RC_MAX_CQ_WC];
126011534SKevin.Ge@Sun.COM 
126111534SKevin.Ge@Sun.COM 	ibd_rwqe_t		*rx_rwqes;	/* the chuck of whole rwqes */
126211534SKevin.Ge@Sun.COM 	uint8_t			*rx_bufs;	/* the chuck of whole Rx bufs */
126311534SKevin.Ge@Sun.COM 	ibt_mr_hdl_t		rx_mr_hdl;	/* ibt_mr_hdl_t for rx_bufs */
126411534SKevin.Ge@Sun.COM 	ibt_mr_desc_t		rx_mr_desc;	/* ibt_mr_desc_t for rx_bufs */
126511534SKevin.Ge@Sun.COM 
126611534SKevin.Ge@Sun.COM 	/* For chained receive */
126711534SKevin.Ge@Sun.COM 	kmutex_t		rx_lock;
126811534SKevin.Ge@Sun.COM 	mblk_t			*rx_mp;
126911534SKevin.Ge@Sun.COM 	mblk_t			*rx_mp_tail;
127011534SKevin.Ge@Sun.COM 	uint32_t		rx_mp_len;
127111534SKevin.Ge@Sun.COM 
127211534SKevin.Ge@Sun.COM 	uint32_t 		rcq_size;
127311534SKevin.Ge@Sun.COM 	uint32_t 		scq_size;
127411534SKevin.Ge@Sun.COM 	/*
127511534SKevin.Ge@Sun.COM 	 * We need two channels for each connection.
127611534SKevin.Ge@Sun.COM 	 * One channel for Tx; another channel for Rx.
127711534SKevin.Ge@Sun.COM 	 * If "is_tx_chan == B_TRUE", this is a Tx channel.
127811534SKevin.Ge@Sun.COM 	 */
127911534SKevin.Ge@Sun.COM 	boolean_t		is_tx_chan;
1280*13030SKevin.Ge@Sun.COM 
1281*13030SKevin.Ge@Sun.COM 	/*
1282*13030SKevin.Ge@Sun.COM 	 * For the connection reaper routine ibd_rc_conn_timeout_call().
1283*13030SKevin.Ge@Sun.COM 	 * "is_used == B_FALSE" indicates this RC channel has not been used for
1284*13030SKevin.Ge@Sun.COM 	 * a long (=ibd_rc_conn_timeout) time.
1285*13030SKevin.Ge@Sun.COM 	 */
1286*13030SKevin.Ge@Sun.COM 	boolean_t		is_used;
1287*13030SKevin.Ge@Sun.COM 	/*
1288*13030SKevin.Ge@Sun.COM 	 * When closing this channel, we need to make sure
1289*13030SKevin.Ge@Sun.COM 	 * "chan->rcq_invoking == 0".
1290*13030SKevin.Ge@Sun.COM 	 */
1291*13030SKevin.Ge@Sun.COM 	uint32_t		rcq_invoking;
129211534SKevin.Ge@Sun.COM } ibd_rc_chan_t;
129311534SKevin.Ge@Sun.COM 
129411534SKevin.Ge@Sun.COM /*
129511534SKevin.Ge@Sun.COM  * The following functions are defined in "ibd.c".
129611534SKevin.Ge@Sun.COM  * They are also used by "ibd_cm.c"
129711534SKevin.Ge@Sun.COM  */
129811534SKevin.Ge@Sun.COM void ibd_print_warn(ibd_state_t *, char *, ...);
129911534SKevin.Ge@Sun.COM void ibd_unmap_mem(ibd_state_t *, ibd_swqe_t *);
130011534SKevin.Ge@Sun.COM void ibd_queue_work_slot(ibd_state_t *, ibd_req_t *, int);
130111534SKevin.Ge@Sun.COM boolean_t ibd_acache_recycle(ibd_state_t *, ipoib_mac_t *, boolean_t);
130211534SKevin.Ge@Sun.COM void ibd_dec_ref_ace(ibd_state_t *, ibd_ace_t *);
130311534SKevin.Ge@Sun.COM ibd_ace_t *ibd_acache_find(ibd_state_t *, ipoib_mac_t *, boolean_t, int);
130411534SKevin.Ge@Sun.COM 
130511534SKevin.Ge@Sun.COM /*
130611534SKevin.Ge@Sun.COM  * The following functions are defined in "ibd_cm.c".
130711534SKevin.Ge@Sun.COM  * They are also used in "ibd.c".
130811534SKevin.Ge@Sun.COM  */
130911534SKevin.Ge@Sun.COM void ibd_async_rc_process_too_big(ibd_state_t *, ibd_req_t *);
131011534SKevin.Ge@Sun.COM void ibd_async_rc_close_act_chan(ibd_state_t *, ibd_req_t *);
131111534SKevin.Ge@Sun.COM void ibd_async_rc_recycle_ace(ibd_state_t *, ibd_req_t *);
131211534SKevin.Ge@Sun.COM 
131311534SKevin.Ge@Sun.COM /* Connection Setup/Close Functions */
131411534SKevin.Ge@Sun.COM ibt_status_t ibd_rc_listen(ibd_state_t *);
131511534SKevin.Ge@Sun.COM void ibd_rc_stop_listen(ibd_state_t *);
131611534SKevin.Ge@Sun.COM ibt_status_t ibd_rc_connect(ibd_state_t *, ibd_ace_t *, ibt_path_info_t *,
131711534SKevin.Ge@Sun.COM     uint64_t);
131811534SKevin.Ge@Sun.COM void ibd_rc_try_connect(ibd_state_t *, ibd_ace_t *,  ibt_path_info_t *);
131911534SKevin.Ge@Sun.COM void ibd_rc_signal_act_close(ibd_state_t *, ibd_ace_t *);
132011534SKevin.Ge@Sun.COM void ibd_rc_signal_ace_recycle(ibd_state_t *, ibd_ace_t *);
1321*13030SKevin.Ge@Sun.COM int ibd_rc_pas_close(ibd_rc_chan_t *, boolean_t, boolean_t);
132211642SKevin.Ge@Sun.COM void ibd_rc_close_all_chan(ibd_state_t *);
1323*13030SKevin.Ge@Sun.COM void ibd_rc_conn_timeout_call(void *carg);
132411534SKevin.Ge@Sun.COM 
132511534SKevin.Ge@Sun.COM /* Receive Functions */
132611534SKevin.Ge@Sun.COM int ibd_rc_init_srq_list(ibd_state_t *);
132711534SKevin.Ge@Sun.COM void ibd_rc_fini_srq_list(ibd_state_t *);
132811642SKevin.Ge@Sun.COM int ibd_rc_repost_srq_free_list(ibd_state_t *);
132911534SKevin.Ge@Sun.COM 
133011534SKevin.Ge@Sun.COM /* Send Functions */
133111534SKevin.Ge@Sun.COM int ibd_rc_init_tx_largebuf_list(ibd_state_t *);
133211534SKevin.Ge@Sun.COM void ibd_rc_fini_tx_largebuf_list(ibd_state_t *);
133311534SKevin.Ge@Sun.COM ibd_swqe_t *ibd_rc_acquire_swqes(ibd_rc_chan_t *);
133411534SKevin.Ge@Sun.COM void ibd_rc_post_send(ibd_rc_chan_t *, ibd_swqe_t *);
133511534SKevin.Ge@Sun.COM void ibd_rc_drain_scq(ibd_rc_chan_t *, ibt_cq_hdl_t);
133611534SKevin.Ge@Sun.COM void ibd_rc_tx_cleanup(ibd_swqe_t *);
133711534SKevin.Ge@Sun.COM 
133811534SKevin.Ge@Sun.COM /* Others */
133911534SKevin.Ge@Sun.COM void ibd_rc_get_conf(ibd_state_t *);
134011534SKevin.Ge@Sun.COM int ibd_rc_init_stats(ibd_state_t *);
134111534SKevin.Ge@Sun.COM 
13420Sstevel@tonic-gate #endif /* _KERNEL && !_BOOT */
13430Sstevel@tonic-gate 
13440Sstevel@tonic-gate #ifdef __cplusplus
13450Sstevel@tonic-gate }
13460Sstevel@tonic-gate #endif
13470Sstevel@tonic-gate 
13480Sstevel@tonic-gate #endif	/* _SYS_IB_CLIENTS_IBD_H */
1349