xref: /onnv-gate/usr/src/uts/common/sys/ib/clients/rds/rdsib_ep.h (revision 8082:f52c9d98dc8c)
13302Sagiri /*
23302Sagiri  * CDDL HEADER START
33302Sagiri  *
43302Sagiri  * The contents of this file are subject to the terms of the
53302Sagiri  * Common Development and Distribution License (the "License").
63302Sagiri  * You may not use this file except in compliance with the License.
73302Sagiri  *
83302Sagiri  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93302Sagiri  * or http://www.opensolaris.org/os/licensing.
103302Sagiri  * See the License for the specific language governing permissions
113302Sagiri  * and limitations under the License.
123302Sagiri  *
133302Sagiri  * When distributing Covered Code, include this CDDL HEADER in each
143302Sagiri  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153302Sagiri  * If applicable, add the following below this CDDL HEADER, with the
163302Sagiri  * fields enclosed by brackets "[]" replaced with your own identifying
173302Sagiri  * information: Portions Copyright [yyyy] [name of copyright owner]
183302Sagiri  *
193302Sagiri  * CDDL HEADER END
203302Sagiri  */
213302Sagiri /*
22*8082SRamaswamy.Tummala@Sun.COM  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
233302Sagiri  * Use is subject to license terms.
243302Sagiri  */
253302Sagiri /*
263302Sagiri  * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved.
273302Sagiri  *
283302Sagiri  * This software is available to you under a choice of one of two
293302Sagiri  * licenses.  You may choose to be licensed under the terms of the GNU
303302Sagiri  * General Public License (GPL) Version 2, available from the file
313302Sagiri  * COPYING in the main directory of this source tree, or the
323302Sagiri  * OpenIB.org BSD license below:
333302Sagiri  *
343302Sagiri  *     Redistribution and use in source and binary forms, with or
353302Sagiri  *     without modification, are permitted provided that the following
363302Sagiri  *     conditions are met:
373302Sagiri  *
383302Sagiri  *	- Redistributions of source code must retain the above
393302Sagiri  *	  copyright notice, this list of conditions and the following
403302Sagiri  *	  disclaimer.
413302Sagiri  *
423302Sagiri  *	- Redistributions in binary form must reproduce the above
433302Sagiri  *	  copyright notice, this list of conditions and the following
443302Sagiri  *	  disclaimer in the documentation and/or other materials
453302Sagiri  *	  provided with the distribution.
463302Sagiri  *
473302Sagiri  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
483302Sagiri  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
493302Sagiri  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
503302Sagiri  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
513302Sagiri  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
523302Sagiri  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
533302Sagiri  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
543302Sagiri  * SOFTWARE.
553302Sagiri  *
563302Sagiri  */
573302Sagiri /*
583302Sagiri  * Sun elects to include this software in Sun product
593302Sagiri  * under the OpenIB BSD license.
603302Sagiri  *
613302Sagiri  *
623302Sagiri  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
633302Sagiri  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
643302Sagiri  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
653302Sagiri  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
663302Sagiri  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
673302Sagiri  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
683302Sagiri  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
693302Sagiri  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
703302Sagiri  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
713302Sagiri  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
723302Sagiri  * POSSIBILITY OF SUCH DAMAGE.
733302Sagiri  */
743302Sagiri 
753302Sagiri #ifndef _RDSIB_EP_H
763302Sagiri #define	_RDSIB_EP_H
773302Sagiri 
783302Sagiri #ifdef __cplusplus
793302Sagiri extern "C" {
803302Sagiri #endif
813302Sagiri 
823302Sagiri #include <netinet/in.h>
833302Sagiri 
843302Sagiri /*
853302Sagiri  * Control channel or Data channel
863302Sagiri  */
873302Sagiri typedef enum rds_ep_type_s {
883302Sagiri 	RDS_EP_TYPE_CTRL		= 1,
893302Sagiri 	RDS_EP_TYPE_DATA		= 2
903302Sagiri } rds_ep_type_t;
913302Sagiri 
923302Sagiri /*
933302Sagiri  * Channel States
943302Sagiri  *
953302Sagiri  * RDS_EP_STATE_UNCONNECTED - Initial state when rds_ep_t is created
963302Sagiri  * RDS_EP_STATE_ACTIVE_PENDING - Active side connection in progress
973302Sagiri  * RDS_EP_STATE_PASSIVE_PENDING - Passice side connection in progress
983302Sagiri  * RDS_EP_STATE_CONNECTED - Channel is connected
993302Sagiri  * RDS_EP_STATE_DESTROY_TIMEWAIT - Channel is closed
1003302Sagiri  */
1013302Sagiri typedef enum rds_ep_state_s {
1023302Sagiri 	RDS_EP_STATE_UNCONNECTED		= 0,
1033302Sagiri 	RDS_EP_STATE_ACTIVE_PENDING		= 1,
1043302Sagiri 	RDS_EP_STATE_PASSIVE_PENDING		= 2,
1053302Sagiri 	RDS_EP_STATE_CONNECTED			= 3,
1063302Sagiri 	RDS_EP_STATE_CLOSING			= 4,
1073302Sagiri 	RDS_EP_STATE_CLOSED			= 5,
1083302Sagiri 	RDS_EP_STATE_ERROR			= 6
1093302Sagiri } rds_ep_state_t;
1103302Sagiri 
1113302Sagiri /*
1123302Sagiri  * Session State Machine Diagram
1133302Sagiri  *
1143302Sagiri  *                     -----------------
1153302Sagiri  *                    |       (6)       |
1163302Sagiri  *                    |                 |
1173302Sagiri  *                    v                 |
1183302Sagiri  *             --> (Created)-------->(Failed)
1193302Sagiri  *            |     |         (5)       ^
1203302Sagiri  *            |     |(1)                |
1213302Sagiri  *            |     |                   |(9)
1223302Sagiri  *            |     v                   |
1233302Sagiri  *            |    (Init)<--------------|
1243302Sagiri  *            |     | |       (8)       |
1253302Sagiri  *            |     | |                 |
1263302Sagiri  *            |  (2)|  --------------   |
1273302Sagiri  *        (11)|     |         (7)    |  |
1283302Sagiri  *            |     v                v  |
1293302Sagiri  *            |    (Connected)------>(Error)
1303302Sagiri  *            |     |         (10)
1313302Sagiri  *            |     |(3)
1323302Sagiri  *            |     |
1333302Sagiri  *            |     v
1343302Sagiri  *            |    (Closed)
1353302Sagiri  *            |     |
1363302Sagiri  *            |     |(4)
1373302Sagiri  *            |     |
1383302Sagiri  *            |     v
1393302Sagiri  *             --- (Fini) ------->(Destroy)
1403302Sagiri  *                         (12)
1413302Sagiri  *
1423302Sagiri  *	(1) rds_session_init()
1433302Sagiri  *	(2) rds_session_open()
1443302Sagiri  *	(3) rds_session_close()
1453302Sagiri  *	(4) rds_session_fini()
1463302Sagiri  *	(4) rds_passive_session_fini()
1473302Sagiri  *	(5) Failure in rds_session_init()
1483302Sagiri  *	(6) rds_sendmsg(3SOCKET)/Incoming CM REQ
1493302Sagiri  *	(7) Failure in rds_session_open()
1504154Sagiri  *	(8) rds_session_close(), rds_get_ibaddr() and rds_session_reinit()
1513302Sagiri  *	(9) rds_session_close() and rds_session_fini()
1523302Sagiri  *	(9) rds_cleanup_passive_session() and rds_passive_session_fini()
1533302Sagiri  *	(10) Connection Error/Incoming REQ
1543302Sagiri  *	(11) rds_sendmsg(3SOCKET)/Incoming REQ
1553302Sagiri  *
1563302Sagiri  *
1573302Sagiri  * Created   - Session is allocated and inserted into the sessionlist but
1583302Sagiri  *             not all members are initialized.
1593302Sagiri  * Init      - All members are initialized, send buffer pool is allocated.
1603302Sagiri  * Connected - Data and ctrl RC channels are opened.
1613302Sagiri  * Closed    - Data and ctrl RC channels are closed.
1623302Sagiri  * Fini      - Send buffer pool and buffers in the receive pool are freed.
1633302Sagiri  * Destroy   - Session is removed from the session list and is ready to be
1643302Sagiri  *             freed.
1653302Sagiri  * Failed    - Session initialization has failed (send buffer pool allocation).
1663302Sagiri  * Error     - (1) Failed to open the RC channels.
1673302Sagiri  *             (2) An error occurred on the RC channels while sending.
1683302Sagiri  *             (3) Received a new CM REQ message on the existing connection.
1693302Sagiri  */
1703302Sagiri typedef enum rds_session_state_s {
1713302Sagiri 	RDS_SESSION_STATE_CREATED		= 0,
1723302Sagiri 	RDS_SESSION_STATE_FAILED		= 1,
1733302Sagiri 	RDS_SESSION_STATE_INIT			= 2,
1743302Sagiri 	RDS_SESSION_STATE_CONNECTED		= 3,
175*8082SRamaswamy.Tummala@Sun.COM 	RDS_SESSION_STATE_HCA_CLOSING		= 4,
176*8082SRamaswamy.Tummala@Sun.COM 	RDS_SESSION_STATE_ERROR			= 5,
177*8082SRamaswamy.Tummala@Sun.COM 	RDS_SESSION_STATE_ACTIVE_CLOSING	= 6,
178*8082SRamaswamy.Tummala@Sun.COM 	RDS_SESSION_STATE_PASSIVE_CLOSING	= 7,
179*8082SRamaswamy.Tummala@Sun.COM 	RDS_SESSION_STATE_CLOSED		= 8,
180*8082SRamaswamy.Tummala@Sun.COM 	RDS_SESSION_STATE_FINI			= 9,
181*8082SRamaswamy.Tummala@Sun.COM 	RDS_SESSION_STATE_DESTROY		= 10
1823302Sagiri } rds_session_state_t;
1833302Sagiri 
1843302Sagiri #define	RDS_SESSION_TRANSITION(sp, state)			\
1853302Sagiri 		rw_enter(&sp->session_lock, RW_WRITER);		\
1863302Sagiri 		sp->session_state = state;			\
1873302Sagiri 		rw_exit(&sp->session_lock)
1883302Sagiri 
1893302Sagiri /* Active or Passive */
1903302Sagiri #define	RDS_SESSION_ACTIVE	1
1913302Sagiri #define	RDS_SESSION_PASSIVE	2
1923302Sagiri 
1933302Sagiri /*
1943302Sagiri  * RDS QP Information
1953302Sagiri  *
1963302Sagiri  * lock  - Synchronize access
1973302Sagiri  * depth - Max number of WRs that can be posted.
1983302Sagiri  * level - Number of outstanding WRs in the QP
1993302Sagiri  * lwm   - Water mark at which to post more receive WRs.
2003302Sagiri  * taskqpending - Indicates if a taskq thread is dispatched to post receive
2013302Sagiri  *		WRs in the RQ
2023302Sagiri  */
2033302Sagiri typedef struct rds_qp_s {
2043302Sagiri 	kmutex_t		qp_lock;
2053302Sagiri 	uint32_t		qp_depth;
2063302Sagiri 	uint32_t		qp_level;
2073302Sagiri 	uint32_t		qp_lwm;
2083302Sagiri 	boolean_t		qp_taskqpending;
2093302Sagiri } rds_qp_t;
2103302Sagiri 
2113302Sagiri /*
2123302Sagiri  * RDS EndPoint(One end of RC connection)
2133302Sagiri  *
2143302Sagiri  * sp        - Parent Session
2153302Sagiri  * type      - Control or Data Channel
2163302Sagiri  * remip     - Same as session_remip
2173302Sagiri  * myip      - Same as session_myip
2183302Sagiri  * snd_lkey  - LKey for the send buffer pool
2193302Sagiri  * hca_guid  - HCA guid
2203302Sagiri  * snd_mrhdl - Memory handle for the send buffer pool
2213302Sagiri  * lock      - Protects the members
2223302Sagiri  * state     - See rds_ep_state_t
2233302Sagiri  * chanhdl   - RC channel handle
2243302Sagiri  * sendcq    - Send CQ handle
2253302Sagiri  * recvcq    - Recv CQ handle
2263302Sagiri  * sndpool   - Send buffer Pool
2273302Sagiri  * rcvpool   - Recv buffer Pool
2283302Sagiri  * segfbp    - First packet of a segmented message.
2293302Sagiri  * seglbp    - Last packet of a segmented message.
2303302Sagiri  * lbufid    - Last successful buffer that was received by the remote.
2313302Sagiri  *             Valid only during session failover/reconnect.
2323302Sagiri  * rbufid    - Last buffer (remote buffer) that was received successfully
2333302Sagiri  *             from the remote node.
2343302Sagiri  * ds        - SGL used for send acknowledgement.
2353302Sagiri  * ackwr     - WR to send acknowledgement.
2363302Sagiri  * ackhdl    - Memory handle for 'ack_addr'.
2373302Sagiri  * ack_rkey  - RKey for 'ack_addr'.
2383302Sagiri  * ack_addr  - Memory region to receive RDMA acknowledgement from remote.
2393302Sagiri  */
2403302Sagiri typedef struct rds_ep_s {
2413302Sagiri 	struct rds_session_s	*ep_sp;
2423302Sagiri 	rds_ep_type_t		ep_type;
2433302Sagiri 	ipaddr_t		ep_remip;
2443302Sagiri 	ipaddr_t		ep_myip;
2453302Sagiri 	ibt_lkey_t		ep_snd_lkey;
2463302Sagiri 	ib_guid_t		ep_hca_guid;
2473302Sagiri 	ibt_mr_hdl_t		ep_snd_mrhdl;
2483302Sagiri 	kmutex_t		ep_lock;
2493302Sagiri 	rds_ep_state_t		ep_state;
2503302Sagiri 	ibt_channel_hdl_t	ep_chanhdl;
2513302Sagiri 	ibt_cq_hdl_t		ep_sendcq;
2523302Sagiri 	ibt_cq_hdl_t		ep_recvcq;
2533302Sagiri 	rds_bufpool_t		ep_sndpool;
2543302Sagiri 	rds_bufpool_t		ep_rcvpool;
2553302Sagiri 	rds_qp_t		ep_recvqp;
2563302Sagiri 	uint_t			ep_rdmacnt;
2573302Sagiri 	rds_buf_t		*ep_segfbp;
2583302Sagiri 	rds_buf_t		*ep_seglbp;
2593302Sagiri 	uintptr_t		ep_lbufid;
2603302Sagiri 	uintptr_t		ep_rbufid;
2613302Sagiri 	ibt_wr_ds_t		ep_ackds;
2623302Sagiri 	ibt_send_wr_t		ep_ackwr;
2633302Sagiri 	ibt_mr_hdl_t		ep_ackhdl;
2643302Sagiri 	ibt_rkey_t		ep_ack_rkey;
2653302Sagiri 	uintptr_t		ep_ack_addr;
2663302Sagiri } rds_ep_t;
2673302Sagiri 
2683302Sagiri /*
2693302Sagiri  * One end of an RDS session
2703302Sagiri  *
2713302Sagiri  * nextp   - Pointer to the next session in the session list.
2723302Sagiri  *           This is protected by rds_state_t:rds_sessionlock.
2733302Sagiri  * remip   - IP address of the node having the remote end of the session.
2743302Sagiri  * myip    - IP address of this end of the session.
2753302Sagiri  * lgid    - IB local (source) gid, hosting "myip".
2763302Sagiri  * rgid    - IB remote (destination) gid, hosting "remip".
2773302Sagiri  * lock    - Provides read/write access to members of the session.
2783302Sagiri  * type    - Identifies which end of session (active or passive).
2793302Sagiri  * state   - State of session (rds_session_state_t).
2803302Sagiri  * dataep  - Data endpoint
2813302Sagiri  * ctrlep  - Control endpoint
2823302Sagiri  * failover- Flag to indicate that an error occured and the session is
2833302Sagiri  *           re-connecting.
2843302Sagiri  * portmap_lock - To serialize access to portmap.
2853302Sagiri  * portmap - Bitmap of sockets.
2863302Sagiri  *           The maximum number of sockets seem to be 65536, the portmap has
2873302Sagiri  *           1 bit for each remote socket. A set bit indicates that the
2883302Sagiri  *           corresponding remote socket is stalled and vice versa.
2893302Sagiri  */
2903302Sagiri typedef struct rds_session_s {
2913302Sagiri 	struct rds_session_s	*session_nextp;
2923302Sagiri 	ipaddr_t		session_remip;
2933302Sagiri 	ipaddr_t		session_myip;
294*8082SRamaswamy.Tummala@Sun.COM 	ib_guid_t		session_hca_guid;
2953302Sagiri 	ib_gid_t		session_lgid;
2963302Sagiri 	ib_gid_t		session_rgid;
2973302Sagiri 	krwlock_t		session_lock;
2983302Sagiri 	uint8_t			session_type;
2993302Sagiri 	uint8_t			session_state;
3003302Sagiri 	struct rds_ep_s		session_dataep;
3013302Sagiri 	struct rds_ep_s		session_ctrlep;
3023302Sagiri 	uint_t			session_failover;
3034467Sagiri 	krwlock_t		session_local_portmap_lock;
3044467Sagiri 	krwlock_t		session_remote_portmap_lock;
3054467Sagiri 	uint8_t			session_local_portmap[RDS_PORT_MAP_SIZE];
3064467Sagiri 	uint8_t			session_remote_portmap[RDS_PORT_MAP_SIZE];
3074703Shiremath 	ibt_path_info_t		session_pinfo;
3083302Sagiri } rds_session_t;
3093302Sagiri 
3103302Sagiri /* defined in rds_ep.c */
3114467Sagiri int rds_ep_init(rds_ep_t *ep, ib_guid_t hca_guid);
3123302Sagiri rds_session_t *rds_session_create(rds_state_t *statep, ipaddr_t destip,
3133302Sagiri     ipaddr_t srcip, ibt_cm_req_rcv_t *reqp, uint8_t type);
3143302Sagiri int rds_session_init(rds_session_t *sp);
3154154Sagiri int rds_session_reinit(rds_session_t *sp, ib_gid_t lgid);
3163302Sagiri void rds_session_open(rds_session_t *sp);
3173302Sagiri void rds_session_close(rds_session_t *sp, ibt_execution_mode_t mode,
3183302Sagiri     uint_t wait);
3193302Sagiri rds_session_t *rds_session_lkup(rds_state_t *statep, ipaddr_t destip,
3203302Sagiri     ib_guid_t node_guid);
3213302Sagiri void rds_recycle_session(rds_session_t *sp);
3223302Sagiri void rds_session_active(rds_session_t *sp);
3233302Sagiri void rds_close_sessions(void *arg);
3243302Sagiri void rds_received_msg(rds_ep_t *ep, rds_buf_t *bp);
3253302Sagiri void rds_handle_control_message(rds_session_t *sp, rds_ctrl_pkt_t *cp);
3263302Sagiri void rds_handle_send_error(rds_ep_t *ep);
3274154Sagiri void rds_session_fini(rds_session_t *sp);
3283302Sagiri void rds_passive_session_fini(rds_session_t *sp);
3293302Sagiri void rds_cleanup_passive_session(void *arg);
3303302Sagiri 
3313302Sagiri /* defined in rds_ib.c */
3323302Sagiri ibt_channel_hdl_t rds_ep_alloc_rc_channel(rds_ep_t *ep, uint8_t hca_port);
3333302Sagiri void rds_ep_free_rc_channel(rds_ep_t *ep);
3343302Sagiri void rds_post_recv_buf(void *arg);
3353302Sagiri void rds_poll_send_completions(ibt_cq_hdl_t cq, struct rds_ep_s *ep,
3363302Sagiri     boolean_t lock);
3373302Sagiri 
3383302Sagiri /* defined in rds_cm.c */
3393302Sagiri int rds_open_rc_channel(rds_ep_t *ep, ibt_path_info_t *pinfo,
3403302Sagiri     ibt_execution_mode_t mode, ibt_channel_hdl_t *chanhdl);
3413302Sagiri int rds_close_rc_channel(ibt_channel_hdl_t chanhdl, ibt_execution_mode_t mode);
3423302Sagiri 
3433302Sagiri int rds_deliver_new_msg(mblk_t *mp, ipaddr_t local_addr, ipaddr_t rem_addr,
3443302Sagiri     in_port_t local_port, in_port_t rem_port, zoneid_t zoneid);
3453302Sagiri 
3463302Sagiri /* defined in rds_sc.c */
3473302Sagiri int rds_sc_path_lookup(ipaddr_t *localip, ipaddr_t *remip);
3483302Sagiri 
3493302Sagiri #ifdef __cplusplus
3503302Sagiri }
3513302Sagiri #endif
3523302Sagiri 
3533302Sagiri #endif	/* _RDSIB_EP_H */
354