xref: /onnv-gate/usr/src/uts/common/io/ib/clients/rdsv3/connection.c (revision 13118:e192495818d4)
112198SEiji.Ota@Sun.COM /*
212198SEiji.Ota@Sun.COM  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
312198SEiji.Ota@Sun.COM  */
412198SEiji.Ota@Sun.COM 
512198SEiji.Ota@Sun.COM /*
612763SGiri.Adari@Sun.COM  * This file contains code imported from the OFED rds source file connection.c
712763SGiri.Adari@Sun.COM  * Oracle elects to have and use the contents of connection.c under and governed
812763SGiri.Adari@Sun.COM  * by the OpenIB.org BSD license (see below for full license text). However,
912763SGiri.Adari@Sun.COM  * the following notice accompanied the original version of this file:
1012763SGiri.Adari@Sun.COM  */
1112763SGiri.Adari@Sun.COM 
1212763SGiri.Adari@Sun.COM /*
1312198SEiji.Ota@Sun.COM  * Copyright (c) 2006 Oracle.  All rights reserved.
1412198SEiji.Ota@Sun.COM  *
1512198SEiji.Ota@Sun.COM  * This software is available to you under a choice of one of two
1612198SEiji.Ota@Sun.COM  * licenses.  You may choose to be licensed under the terms of the GNU
1712198SEiji.Ota@Sun.COM  * General Public License (GPL) Version 2, available from the file
1812198SEiji.Ota@Sun.COM  * COPYING in the main directory of this source tree, or the
1912198SEiji.Ota@Sun.COM  * OpenIB.org BSD license below:
2012198SEiji.Ota@Sun.COM  *
2112198SEiji.Ota@Sun.COM  *     Redistribution and use in source and binary forms, with or
2212198SEiji.Ota@Sun.COM  *     without modification, are permitted provided that the following
2312198SEiji.Ota@Sun.COM  *     conditions are met:
2412198SEiji.Ota@Sun.COM  *
2512198SEiji.Ota@Sun.COM  *      - Redistributions of source code must retain the above
2612198SEiji.Ota@Sun.COM  *        copyright notice, this list of conditions and the following
2712198SEiji.Ota@Sun.COM  *        disclaimer.
2812198SEiji.Ota@Sun.COM  *
2912198SEiji.Ota@Sun.COM  *      - Redistributions in binary form must reproduce the above
3012198SEiji.Ota@Sun.COM  *        copyright notice, this list of conditions and the following
3112198SEiji.Ota@Sun.COM  *        disclaimer in the documentation and/or other materials
3212198SEiji.Ota@Sun.COM  *        provided with the distribution.
3312198SEiji.Ota@Sun.COM  *
3412198SEiji.Ota@Sun.COM  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
3512198SEiji.Ota@Sun.COM  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
3612198SEiji.Ota@Sun.COM  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
3712198SEiji.Ota@Sun.COM  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
3812198SEiji.Ota@Sun.COM  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
3912198SEiji.Ota@Sun.COM  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
4012198SEiji.Ota@Sun.COM  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
4112198SEiji.Ota@Sun.COM  * SOFTWARE.
4212198SEiji.Ota@Sun.COM  *
4312198SEiji.Ota@Sun.COM  */
4412198SEiji.Ota@Sun.COM #include <sys/types.h>
4512198SEiji.Ota@Sun.COM #include <sys/kmem.h>
4612198SEiji.Ota@Sun.COM #include <sys/rds.h>
4712198SEiji.Ota@Sun.COM 
4812198SEiji.Ota@Sun.COM #include <sys/ib/clients/rdsv3/rdsv3.h>
4912198SEiji.Ota@Sun.COM #include <sys/ib/clients/rdsv3/loop.h>
5012198SEiji.Ota@Sun.COM #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
5112198SEiji.Ota@Sun.COM 
5212198SEiji.Ota@Sun.COM /* converting this to RCU is a chore for another day.. */
5312198SEiji.Ota@Sun.COM static krwlock_t rdsv3_conn_lock;
5412198SEiji.Ota@Sun.COM struct avl_tree rdsv3_conn_hash;
5512198SEiji.Ota@Sun.COM static struct kmem_cache *rdsv3_conn_slab = NULL;
5612198SEiji.Ota@Sun.COM 
5712198SEiji.Ota@Sun.COM #define	rdsv3_conn_info_set(var, test, suffix) do {               \
5812198SEiji.Ota@Sun.COM 	if (test)                                               \
5912863SEiji.Ota@Sun.COM 		var |= RDS_INFO_CONNECTION_FLAG_##suffix;     \
6012198SEiji.Ota@Sun.COM } while (0)
6112198SEiji.Ota@Sun.COM 
6212198SEiji.Ota@Sun.COM 
6312198SEiji.Ota@Sun.COM static struct rdsv3_connection *
rdsv3_conn_lookup(uint32_be_t laddr,uint32_be_t faddr,avl_index_t * pos)6412198SEiji.Ota@Sun.COM rdsv3_conn_lookup(uint32_be_t laddr, uint32_be_t faddr, avl_index_t *pos)
6512198SEiji.Ota@Sun.COM {
6612198SEiji.Ota@Sun.COM 	struct rdsv3_connection *conn;
6712198SEiji.Ota@Sun.COM 	struct rdsv3_conn_info_s conn_info;
6812198SEiji.Ota@Sun.COM 	avl_index_t place = 0;
6912198SEiji.Ota@Sun.COM 
7012198SEiji.Ota@Sun.COM 	conn_info.c_laddr = laddr;
7112198SEiji.Ota@Sun.COM 	conn_info.c_faddr = faddr;
7212198SEiji.Ota@Sun.COM 
7312198SEiji.Ota@Sun.COM 	conn = avl_find(&rdsv3_conn_hash, &conn_info, &place);
7412198SEiji.Ota@Sun.COM 
7512198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF5("rdsv3_conn_lookup",
7612198SEiji.Ota@Sun.COM 	    "returning conn %p for %u.%u.%u.%u -> %u.%u.%u.%u",
7712198SEiji.Ota@Sun.COM 	    conn, NIPQUAD(laddr), NIPQUAD(faddr));
7812198SEiji.Ota@Sun.COM 
7912198SEiji.Ota@Sun.COM 	if (pos != NULL)
8012198SEiji.Ota@Sun.COM 		*pos = place;
8112198SEiji.Ota@Sun.COM 
8212198SEiji.Ota@Sun.COM 	return (conn);
8312198SEiji.Ota@Sun.COM }
8412198SEiji.Ota@Sun.COM 
8512198SEiji.Ota@Sun.COM /*
8612198SEiji.Ota@Sun.COM  * This is called by transports as they're bringing down a connection.
8712198SEiji.Ota@Sun.COM  * It clears partial message state so that the transport can start sending
8812198SEiji.Ota@Sun.COM  * and receiving over this connection again in the future.  It is up to
8912198SEiji.Ota@Sun.COM  * the transport to have serialized this call with its send and recv.
9012198SEiji.Ota@Sun.COM  */
9112198SEiji.Ota@Sun.COM void
rdsv3_conn_reset(struct rdsv3_connection * conn)9212198SEiji.Ota@Sun.COM rdsv3_conn_reset(struct rdsv3_connection *conn)
9312198SEiji.Ota@Sun.COM {
9412198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_conn_reset",
9512198SEiji.Ota@Sun.COM 	    "connection %u.%u.%u.%u to %u.%u.%u.%u reset",
9612198SEiji.Ota@Sun.COM 	    NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr));
9712198SEiji.Ota@Sun.COM 
9812198SEiji.Ota@Sun.COM 	rdsv3_stats_inc(s_conn_reset);
9912198SEiji.Ota@Sun.COM 	rdsv3_send_reset(conn);
10012198SEiji.Ota@Sun.COM 	conn->c_flags = 0;
10112198SEiji.Ota@Sun.COM 
10212198SEiji.Ota@Sun.COM 	/*
10312198SEiji.Ota@Sun.COM 	 * Do not clear next_rx_seq here, else we cannot distinguish
10412198SEiji.Ota@Sun.COM 	 * retransmitted packets from new packets, and will hand all
10512198SEiji.Ota@Sun.COM 	 * of them to the application. That is not consistent with the
10612198SEiji.Ota@Sun.COM 	 * reliability guarantees of RDS.
10712198SEiji.Ota@Sun.COM 	 */
10812198SEiji.Ota@Sun.COM }
10912198SEiji.Ota@Sun.COM 
11012198SEiji.Ota@Sun.COM /*
11112198SEiji.Ota@Sun.COM  * There is only every one 'conn' for a given pair of addresses in the
11212198SEiji.Ota@Sun.COM  * system at a time.  They contain messages to be retransmitted and so
11312198SEiji.Ota@Sun.COM  * span the lifetime of the actual underlying transport connections.
11412198SEiji.Ota@Sun.COM  *
11512198SEiji.Ota@Sun.COM  * For now they are not garbage collected once they're created.  They
11612198SEiji.Ota@Sun.COM  * are torn down as the module is removed, if ever.
11712198SEiji.Ota@Sun.COM  */
11812198SEiji.Ota@Sun.COM static struct rdsv3_connection *
__rdsv3_conn_create(uint32_be_t laddr,uint32_be_t faddr,struct rdsv3_transport * trans,int gfp,int is_outgoing)11912198SEiji.Ota@Sun.COM __rdsv3_conn_create(uint32_be_t laddr, uint32_be_t faddr,
12012676SEiji.Ota@Sun.COM     struct rdsv3_transport *trans, int gfp, int is_outgoing)
12112198SEiji.Ota@Sun.COM {
12212198SEiji.Ota@Sun.COM 	struct rdsv3_connection *conn, *parent = NULL;
12312198SEiji.Ota@Sun.COM 	avl_index_t pos;
12412198SEiji.Ota@Sun.COM 	int ret;
12512198SEiji.Ota@Sun.COM 
12612198SEiji.Ota@Sun.COM 	rw_enter(&rdsv3_conn_lock, RW_READER);
12712198SEiji.Ota@Sun.COM 	conn = rdsv3_conn_lookup(laddr, faddr, &pos);
12812198SEiji.Ota@Sun.COM 	if (conn &&
12912198SEiji.Ota@Sun.COM 	    conn->c_loopback &&
13012198SEiji.Ota@Sun.COM 	    conn->c_trans != &rdsv3_loop_transport &&
13112198SEiji.Ota@Sun.COM 	    !is_outgoing) {
13212198SEiji.Ota@Sun.COM 		/*
13312198SEiji.Ota@Sun.COM 		 * This is a looped back IB connection, and we're
13412198SEiji.Ota@Sun.COM 		 * called by the code handling the incoming connect.
13512198SEiji.Ota@Sun.COM 		 * We need a second connection object into which we
13612198SEiji.Ota@Sun.COM 		 * can stick the other QP.
13712198SEiji.Ota@Sun.COM 		 */
13812198SEiji.Ota@Sun.COM 		parent = conn;
13912198SEiji.Ota@Sun.COM 		conn = parent->c_passive;
14012198SEiji.Ota@Sun.COM 	}
14112198SEiji.Ota@Sun.COM 	rw_exit(&rdsv3_conn_lock);
14212198SEiji.Ota@Sun.COM 	if (conn)
14312198SEiji.Ota@Sun.COM 		goto out;
14412198SEiji.Ota@Sun.COM 
14512198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("__rdsv3_conn_create", "Enter(%x -> %x)",
14612198SEiji.Ota@Sun.COM 	    ntohl(laddr), ntohl(faddr));
14712198SEiji.Ota@Sun.COM 
14812198SEiji.Ota@Sun.COM 	conn = kmem_cache_alloc(rdsv3_conn_slab, gfp);
14912676SEiji.Ota@Sun.COM 	if (!conn) {
15012198SEiji.Ota@Sun.COM 		conn = ERR_PTR(-ENOMEM);
15112198SEiji.Ota@Sun.COM 		goto out;
15212198SEiji.Ota@Sun.COM 	}
15312198SEiji.Ota@Sun.COM 
15412198SEiji.Ota@Sun.COM 	/* see rdsv3_conn_constructor */
15512198SEiji.Ota@Sun.COM 	conn->c_laddr = laddr;
15612198SEiji.Ota@Sun.COM 	conn->c_faddr = faddr;
15712198SEiji.Ota@Sun.COM 
158*12895SGiri.Adari@Sun.COM 	/*
159*12895SGiri.Adari@Sun.COM 	 * We don't allow sockets to send messages without binding.
160*12895SGiri.Adari@Sun.COM 	 * So, the IP address will already be there in the bind array.
161*12895SGiri.Adari@Sun.COM 	 * Mostly, this is a readonly operation.
162*12895SGiri.Adari@Sun.COM 	 * For now, passing GLOBAL_ZONEID.
163*12895SGiri.Adari@Sun.COM 	 */
164*12895SGiri.Adari@Sun.COM 	conn->c_bucketp = rdsv3_find_ip_bucket(ntohl(laddr), GLOBAL_ZONEID);
165*12895SGiri.Adari@Sun.COM 
16612198SEiji.Ota@Sun.COM 	ret = rdsv3_cong_get_maps(conn);
16712198SEiji.Ota@Sun.COM 	if (ret) {
16812198SEiji.Ota@Sun.COM 		kmem_cache_free(rdsv3_conn_slab, conn);
16912198SEiji.Ota@Sun.COM 		conn = ERR_PTR(ret);
17012198SEiji.Ota@Sun.COM 		goto out;
17112198SEiji.Ota@Sun.COM 	}
17212198SEiji.Ota@Sun.COM 
17312198SEiji.Ota@Sun.COM 	/*
17412198SEiji.Ota@Sun.COM 	 * This is where a connection becomes loopback.  If *any* RDS sockets
17512198SEiji.Ota@Sun.COM 	 * can bind to the destination address then we'd rather the messages
17612198SEiji.Ota@Sun.COM 	 * flow through loopback rather than either transport.
17712198SEiji.Ota@Sun.COM 	 */
17812198SEiji.Ota@Sun.COM 	if (rdsv3_trans_get_preferred(faddr)) {
17912198SEiji.Ota@Sun.COM 		conn->c_loopback = 1;
18012198SEiji.Ota@Sun.COM 		if (is_outgoing && trans->t_prefer_loopback) {
18112198SEiji.Ota@Sun.COM 			/*
18212198SEiji.Ota@Sun.COM 			 * "outgoing" connection - and the transport
18312198SEiji.Ota@Sun.COM 			 * says it wants the connection handled by the
18412198SEiji.Ota@Sun.COM 			 * loopback transport. This is what TCP does.
18512198SEiji.Ota@Sun.COM 			 */
18612198SEiji.Ota@Sun.COM 			trans = &rdsv3_loop_transport;
18712198SEiji.Ota@Sun.COM 		}
18812198SEiji.Ota@Sun.COM 	}
18912198SEiji.Ota@Sun.COM 
19012198SEiji.Ota@Sun.COM 	conn->c_trans = trans;
19112198SEiji.Ota@Sun.COM 
19212198SEiji.Ota@Sun.COM 	ret = trans->conn_alloc(conn, gfp);
19312198SEiji.Ota@Sun.COM 	if (ret) {
19412198SEiji.Ota@Sun.COM 		kmem_cache_free(rdsv3_conn_slab, conn);
19512198SEiji.Ota@Sun.COM 		conn = ERR_PTR(ret);
19612198SEiji.Ota@Sun.COM 		goto out;
19712198SEiji.Ota@Sun.COM 	}
19812198SEiji.Ota@Sun.COM 
19912198SEiji.Ota@Sun.COM 	conn->c_state = RDSV3_CONN_DOWN;
20012198SEiji.Ota@Sun.COM 	conn->c_reconnect_jiffies = 0;
20112198SEiji.Ota@Sun.COM 	RDSV3_INIT_DELAYED_WORK(&conn->c_send_w, rdsv3_send_worker);
20212198SEiji.Ota@Sun.COM 	RDSV3_INIT_DELAYED_WORK(&conn->c_recv_w, rdsv3_recv_worker);
20312198SEiji.Ota@Sun.COM 	RDSV3_INIT_DELAYED_WORK(&conn->c_conn_w, rdsv3_connect_worker);
20412676SEiji.Ota@Sun.COM 	RDSV3_INIT_DELAYED_WORK(&conn->c_reap_w, rdsv3_reaper_worker);
20512198SEiji.Ota@Sun.COM 	RDSV3_INIT_WORK(&conn->c_down_w, rdsv3_shutdown_worker);
20612198SEiji.Ota@Sun.COM 	mutex_init(&conn->c_cm_lock, NULL, MUTEX_DRIVER, NULL);
20712198SEiji.Ota@Sun.COM 	conn->c_flags = 0;
20812198SEiji.Ota@Sun.COM 
20912198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("__rdsv3_conn_create",
21012198SEiji.Ota@Sun.COM 	    "allocated conn %p for %u.%u.%u.%u -> %u.%u.%u.%u over %s %s",
21112198SEiji.Ota@Sun.COM 	    conn, NIPQUAD(laddr), NIPQUAD(faddr),
21212198SEiji.Ota@Sun.COM 	    trans->t_name ? trans->t_name : "[unknown]",
21312198SEiji.Ota@Sun.COM 	    is_outgoing ? "(outgoing)" : "");
21412198SEiji.Ota@Sun.COM 
21512198SEiji.Ota@Sun.COM 	/*
21612198SEiji.Ota@Sun.COM 	 * Since we ran without holding the conn lock, someone could
21712198SEiji.Ota@Sun.COM 	 * have created the same conn (either normal or passive) in the
21812198SEiji.Ota@Sun.COM 	 * interim. We check while holding the lock. If we won, we complete
21912198SEiji.Ota@Sun.COM 	 * init and return our conn. If we lost, we rollback and return the
22012198SEiji.Ota@Sun.COM 	 * other one.
22112198SEiji.Ota@Sun.COM 	 */
22212198SEiji.Ota@Sun.COM 	rw_enter(&rdsv3_conn_lock, RW_WRITER);
22312198SEiji.Ota@Sun.COM 	if (parent) {
22412198SEiji.Ota@Sun.COM 		/* Creating passive conn */
22512198SEiji.Ota@Sun.COM 		if (parent->c_passive) {
22612198SEiji.Ota@Sun.COM 			trans->conn_free(conn->c_transport_data);
22712198SEiji.Ota@Sun.COM 			kmem_cache_free(rdsv3_conn_slab, conn);
22812198SEiji.Ota@Sun.COM 			conn = parent->c_passive;
22912198SEiji.Ota@Sun.COM 		} else {
23012198SEiji.Ota@Sun.COM 			parent->c_passive = conn;
23112198SEiji.Ota@Sun.COM 			rdsv3_cong_add_conn(conn);
23212198SEiji.Ota@Sun.COM 		}
23312198SEiji.Ota@Sun.COM 	} else {
23412198SEiji.Ota@Sun.COM 		/* Creating normal conn */
23512198SEiji.Ota@Sun.COM 		struct rdsv3_connection *found;
23612198SEiji.Ota@Sun.COM 
23712198SEiji.Ota@Sun.COM 		found = rdsv3_conn_lookup(laddr, faddr, &pos);
23812198SEiji.Ota@Sun.COM 		if (found) {
23912198SEiji.Ota@Sun.COM 			trans->conn_free(conn->c_transport_data);
24012198SEiji.Ota@Sun.COM 			kmem_cache_free(rdsv3_conn_slab, conn);
24112198SEiji.Ota@Sun.COM 			conn = found;
24212198SEiji.Ota@Sun.COM 		} else {
24312198SEiji.Ota@Sun.COM 			avl_insert(&rdsv3_conn_hash, conn, pos);
24412198SEiji.Ota@Sun.COM 			rdsv3_cong_add_conn(conn);
24512676SEiji.Ota@Sun.COM 			rdsv3_queue_delayed_work(rdsv3_wq, &conn->c_reap_w,
24612676SEiji.Ota@Sun.COM 			    RDSV3_REAPER_WAIT_JIFFIES);
24712198SEiji.Ota@Sun.COM 		}
24812198SEiji.Ota@Sun.COM 	}
24912198SEiji.Ota@Sun.COM 
25012198SEiji.Ota@Sun.COM 	rw_exit(&rdsv3_conn_lock);
25112198SEiji.Ota@Sun.COM 
25212198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("__rdsv3_conn_create", "Return(conn: %p)", conn);
25312198SEiji.Ota@Sun.COM 
25412198SEiji.Ota@Sun.COM out:
25512198SEiji.Ota@Sun.COM 	return (conn);
25612198SEiji.Ota@Sun.COM }
25712198SEiji.Ota@Sun.COM 
25812198SEiji.Ota@Sun.COM struct rdsv3_connection *
rdsv3_conn_create(uint32_be_t laddr,uint32_be_t faddr,struct rdsv3_transport * trans,int gfp)25912198SEiji.Ota@Sun.COM rdsv3_conn_create(uint32_be_t laddr, uint32_be_t faddr,
26012198SEiji.Ota@Sun.COM     struct rdsv3_transport *trans, int gfp)
26112198SEiji.Ota@Sun.COM {
26212198SEiji.Ota@Sun.COM 	return (__rdsv3_conn_create(laddr, faddr, trans, gfp, 0));
26312198SEiji.Ota@Sun.COM }
26412198SEiji.Ota@Sun.COM 
26512198SEiji.Ota@Sun.COM struct rdsv3_connection *
rdsv3_conn_create_outgoing(uint32_be_t laddr,uint32_be_t faddr,struct rdsv3_transport * trans,int gfp)26612198SEiji.Ota@Sun.COM rdsv3_conn_create_outgoing(uint32_be_t laddr, uint32_be_t faddr,
26712198SEiji.Ota@Sun.COM     struct rdsv3_transport *trans, int gfp)
26812198SEiji.Ota@Sun.COM {
26912198SEiji.Ota@Sun.COM 	return (__rdsv3_conn_create(laddr, faddr, trans, gfp, 1));
27012198SEiji.Ota@Sun.COM }
27112198SEiji.Ota@Sun.COM 
27212676SEiji.Ota@Sun.COM extern struct avl_tree	rdsv3_conn_hash;
27312676SEiji.Ota@Sun.COM 
27412676SEiji.Ota@Sun.COM void
rdsv3_conn_shutdown(struct rdsv3_connection * conn)27512676SEiji.Ota@Sun.COM rdsv3_conn_shutdown(struct rdsv3_connection *conn)
27612676SEiji.Ota@Sun.COM {
27712676SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_conn_shutdown", "Enter(conn: %p)", conn);
27812676SEiji.Ota@Sun.COM 
27912676SEiji.Ota@Sun.COM 	/* shut it down unless it's down already */
28012676SEiji.Ota@Sun.COM 	if (!rdsv3_conn_transition(conn, RDSV3_CONN_DOWN, RDSV3_CONN_DOWN)) {
28112676SEiji.Ota@Sun.COM 		/*
28212676SEiji.Ota@Sun.COM 		 * Quiesce the connection mgmt handlers before we start tearing
28312676SEiji.Ota@Sun.COM 		 * things down. We don't hold the mutex for the entire
28412676SEiji.Ota@Sun.COM 		 * duration of the shutdown operation, else we may be
28512676SEiji.Ota@Sun.COM 		 * deadlocking with the CM handler. Instead, the CM event
28612676SEiji.Ota@Sun.COM 		 * handler is supposed to check for state DISCONNECTING
28712676SEiji.Ota@Sun.COM 		 */
28812676SEiji.Ota@Sun.COM 		mutex_enter(&conn->c_cm_lock);
28912676SEiji.Ota@Sun.COM 		if (!rdsv3_conn_transition(conn, RDSV3_CONN_UP,
29012676SEiji.Ota@Sun.COM 		    RDSV3_CONN_DISCONNECTING) &&
29112676SEiji.Ota@Sun.COM 		    !rdsv3_conn_transition(conn, RDSV3_CONN_ERROR,
29212676SEiji.Ota@Sun.COM 		    RDSV3_CONN_DISCONNECTING)) {
29312676SEiji.Ota@Sun.COM 			RDSV3_DPRINTF2("rdsv3_conn_shutdown",
29412676SEiji.Ota@Sun.COM 			    "shutdown called in state %d",
29512676SEiji.Ota@Sun.COM 			    atomic_get(&conn->c_state));
29612676SEiji.Ota@Sun.COM 			rdsv3_conn_drop(conn);
29712676SEiji.Ota@Sun.COM 			mutex_exit(&conn->c_cm_lock);
29812676SEiji.Ota@Sun.COM 			return;
29912676SEiji.Ota@Sun.COM 		}
30012676SEiji.Ota@Sun.COM 		mutex_exit(&conn->c_cm_lock);
30112676SEiji.Ota@Sun.COM 
30212676SEiji.Ota@Sun.COM 		/* verify everybody's out of rds_send_xmit() */
30312676SEiji.Ota@Sun.COM 		mutex_enter(&conn->c_send_lock);
30412676SEiji.Ota@Sun.COM 		while (atomic_get(&conn->c_senders)) {
30512676SEiji.Ota@Sun.COM 			mutex_exit(&conn->c_send_lock);
30612676SEiji.Ota@Sun.COM 			delay(1);
30712676SEiji.Ota@Sun.COM 			mutex_enter(&conn->c_send_lock);
30812676SEiji.Ota@Sun.COM 		}
30912676SEiji.Ota@Sun.COM 
31012676SEiji.Ota@Sun.COM 		conn->c_trans->conn_shutdown(conn);
31112676SEiji.Ota@Sun.COM 		rdsv3_conn_reset(conn);
31212676SEiji.Ota@Sun.COM 		mutex_exit(&conn->c_send_lock);
31312676SEiji.Ota@Sun.COM 
31412676SEiji.Ota@Sun.COM 		if (!rdsv3_conn_transition(conn, RDSV3_CONN_DISCONNECTING,
31512676SEiji.Ota@Sun.COM 		    RDSV3_CONN_DOWN)) {
31612676SEiji.Ota@Sun.COM 			/*
31712676SEiji.Ota@Sun.COM 			 * This can happen - eg when we're in the middle of
31812676SEiji.Ota@Sun.COM 			 * tearing down the connection, and someone unloads
31912676SEiji.Ota@Sun.COM 			 * the rds module.
32012676SEiji.Ota@Sun.COM 			 * Quite reproduceable with loopback connections.
32112676SEiji.Ota@Sun.COM 			 * Mostly harmless.
32212676SEiji.Ota@Sun.COM 			 */
32312676SEiji.Ota@Sun.COM #ifndef __lock_lint
32412676SEiji.Ota@Sun.COM 			RDSV3_DPRINTF2("rdsv3_conn_shutdown",
32512676SEiji.Ota@Sun.COM 			    "failed to transition to state DOWN, "
32612676SEiji.Ota@Sun.COM 			    "current statis is: %d",
32712676SEiji.Ota@Sun.COM 			    atomic_get(&conn->c_state));
32812676SEiji.Ota@Sun.COM 			rdsv3_conn_drop(conn);
32912676SEiji.Ota@Sun.COM #endif
33012676SEiji.Ota@Sun.COM 			return;
33112676SEiji.Ota@Sun.COM 		}
33212676SEiji.Ota@Sun.COM 	}
33312676SEiji.Ota@Sun.COM 
33412676SEiji.Ota@Sun.COM 	/*
33512676SEiji.Ota@Sun.COM 	 * Then reconnect if it's still live.
33612676SEiji.Ota@Sun.COM 	 * The passive side of an IB loopback connection is never added
33712676SEiji.Ota@Sun.COM 	 * to the conn hash, so we never trigger a reconnect on this
33812676SEiji.Ota@Sun.COM 	 * conn - the reconnect is always triggered by the active peer.
33912676SEiji.Ota@Sun.COM 	 */
34012676SEiji.Ota@Sun.COM 	rdsv3_cancel_delayed_work(&conn->c_conn_w);
34112676SEiji.Ota@Sun.COM 
34212676SEiji.Ota@Sun.COM 	{
34312676SEiji.Ota@Sun.COM 		struct rdsv3_conn_info_s conn_info;
34412676SEiji.Ota@Sun.COM 
34512676SEiji.Ota@Sun.COM 		conn_info.c_laddr = conn->c_laddr;
34612676SEiji.Ota@Sun.COM 		conn_info.c_faddr = conn->c_faddr;
34712676SEiji.Ota@Sun.COM 		if (avl_find(&rdsv3_conn_hash, &conn_info, NULL) == conn)
34812676SEiji.Ota@Sun.COM 			rdsv3_queue_reconnect(conn);
34912676SEiji.Ota@Sun.COM 	}
35012676SEiji.Ota@Sun.COM 	RDSV3_DPRINTF2("rdsv3_conn_shutdown", "Exit");
35112676SEiji.Ota@Sun.COM }
35212676SEiji.Ota@Sun.COM 
35312676SEiji.Ota@Sun.COM /*
35412676SEiji.Ota@Sun.COM  * Stop and free a connection.
35512676SEiji.Ota@Sun.COM  */
35612198SEiji.Ota@Sun.COM void
rdsv3_conn_destroy(struct rdsv3_connection * conn)35712198SEiji.Ota@Sun.COM rdsv3_conn_destroy(struct rdsv3_connection *conn)
35812198SEiji.Ota@Sun.COM {
35912198SEiji.Ota@Sun.COM 	struct rdsv3_message *rm, *rtmp;
36012676SEiji.Ota@Sun.COM 	list_t to_be_dropped;
36112198SEiji.Ota@Sun.COM 
36212198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_conn_destroy",
36312198SEiji.Ota@Sun.COM 	    "freeing conn %p for %u.%u.%u.%u -> %u.%u.%u.%u",
36412198SEiji.Ota@Sun.COM 	    conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr));
36512198SEiji.Ota@Sun.COM 
36612198SEiji.Ota@Sun.COM 	avl_remove(&rdsv3_conn_hash, conn);
36712198SEiji.Ota@Sun.COM 
36812676SEiji.Ota@Sun.COM 	rdsv3_cancel_delayed_work(&conn->c_reap_w);
36912198SEiji.Ota@Sun.COM 	rdsv3_cancel_delayed_work(&conn->c_send_w);
37012198SEiji.Ota@Sun.COM 	rdsv3_cancel_delayed_work(&conn->c_recv_w);
37112676SEiji.Ota@Sun.COM 
37212676SEiji.Ota@Sun.COM 	rdsv3_conn_shutdown(conn);
37312198SEiji.Ota@Sun.COM 
37412198SEiji.Ota@Sun.COM 	/* tear down queued messages */
37512676SEiji.Ota@Sun.COM 
37612676SEiji.Ota@Sun.COM 	list_create(&to_be_dropped, sizeof (struct rdsv3_message),
37712676SEiji.Ota@Sun.COM 	    offsetof(struct rdsv3_message, m_conn_item));
37812676SEiji.Ota@Sun.COM 
37912676SEiji.Ota@Sun.COM 	RDSV3_FOR_EACH_LIST_NODE_SAFE(rm, rtmp, &conn->c_retrans, m_conn_item) {
38012676SEiji.Ota@Sun.COM 		list_remove_node(&rm->m_conn_item);
38112676SEiji.Ota@Sun.COM 		list_insert_tail(&to_be_dropped, rm);
38212676SEiji.Ota@Sun.COM 	}
38312676SEiji.Ota@Sun.COM 
38412676SEiji.Ota@Sun.COM 	RDSV3_FOR_EACH_LIST_NODE_SAFE(rm, rtmp, &conn->c_send_queue,
38512198SEiji.Ota@Sun.COM 	    m_conn_item) {
38612198SEiji.Ota@Sun.COM 		list_remove_node(&rm->m_conn_item);
38712676SEiji.Ota@Sun.COM 		list_insert_tail(&to_be_dropped, rm);
38812676SEiji.Ota@Sun.COM 	}
38912676SEiji.Ota@Sun.COM 
39012676SEiji.Ota@Sun.COM 	RDSV3_FOR_EACH_LIST_NODE_SAFE(rm, rtmp, &to_be_dropped, m_conn_item) {
39112676SEiji.Ota@Sun.COM 		clear_bit(RDSV3_MSG_ON_CONN, &rm->m_flags);
39212676SEiji.Ota@Sun.COM 		list_remove_node(&rm->m_conn_item);
39312198SEiji.Ota@Sun.COM 		rdsv3_message_put(rm);
39412198SEiji.Ota@Sun.COM 	}
39512676SEiji.Ota@Sun.COM 
39612198SEiji.Ota@Sun.COM 	if (conn->c_xmit_rm)
39712198SEiji.Ota@Sun.COM 		rdsv3_message_put(conn->c_xmit_rm);
39812198SEiji.Ota@Sun.COM 
39912198SEiji.Ota@Sun.COM 	conn->c_trans->conn_free(conn->c_transport_data);
40012198SEiji.Ota@Sun.COM 
40112198SEiji.Ota@Sun.COM 	/*
40212198SEiji.Ota@Sun.COM 	 * The congestion maps aren't freed up here.  They're
40312198SEiji.Ota@Sun.COM 	 * freed by rdsv3_cong_exit() after all the connections
40412198SEiji.Ota@Sun.COM 	 * have been freed.
40512198SEiji.Ota@Sun.COM 	 */
40612198SEiji.Ota@Sun.COM 	rdsv3_cong_remove_conn(conn);
40712198SEiji.Ota@Sun.COM 
40812198SEiji.Ota@Sun.COM 	ASSERT(list_is_empty(&conn->c_retrans));
40912198SEiji.Ota@Sun.COM 	kmem_cache_free(rdsv3_conn_slab, conn);
41012198SEiji.Ota@Sun.COM 
41112198SEiji.Ota@Sun.COM }
41212198SEiji.Ota@Sun.COM 
41312198SEiji.Ota@Sun.COM /* ARGSUSED */
41412198SEiji.Ota@Sun.COM static void
rdsv3_conn_message_info(struct rsock * sock,unsigned int len,struct rdsv3_info_iterator * iter,struct rdsv3_info_lengths * lens,int want_send)41512198SEiji.Ota@Sun.COM rdsv3_conn_message_info(struct rsock *sock, unsigned int len,
41612198SEiji.Ota@Sun.COM     struct rdsv3_info_iterator *iter,
41712198SEiji.Ota@Sun.COM     struct rdsv3_info_lengths *lens,
41812198SEiji.Ota@Sun.COM     int want_send)
41912198SEiji.Ota@Sun.COM {
42012198SEiji.Ota@Sun.COM 	struct list *list;
42112198SEiji.Ota@Sun.COM 	struct rdsv3_connection *conn;
42212198SEiji.Ota@Sun.COM 	struct rdsv3_message *rm;
42312198SEiji.Ota@Sun.COM 	unsigned int total = 0;
42412198SEiji.Ota@Sun.COM 
42512198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_conn_message_info", "Enter");
42612198SEiji.Ota@Sun.COM 
42712863SEiji.Ota@Sun.COM 	len /= sizeof (struct rds_info_message);
42812198SEiji.Ota@Sun.COM 
42912198SEiji.Ota@Sun.COM 	rw_enter(&rdsv3_conn_lock, RW_READER);
43012198SEiji.Ota@Sun.COM 
43112198SEiji.Ota@Sun.COM 	if (avl_is_empty(&rdsv3_conn_hash)) {
43212198SEiji.Ota@Sun.COM 		/* no connections */
43312198SEiji.Ota@Sun.COM 		rw_exit(&rdsv3_conn_lock);
43412198SEiji.Ota@Sun.COM 		return;
43512198SEiji.Ota@Sun.COM 	}
43612198SEiji.Ota@Sun.COM 
43712198SEiji.Ota@Sun.COM 	conn = (struct rdsv3_connection *)avl_first(&rdsv3_conn_hash);
43812198SEiji.Ota@Sun.COM 
43912198SEiji.Ota@Sun.COM 	do {
44012198SEiji.Ota@Sun.COM 		if (want_send)
44112198SEiji.Ota@Sun.COM 			list = &conn->c_send_queue;
44212198SEiji.Ota@Sun.COM 		else
44312198SEiji.Ota@Sun.COM 			list = &conn->c_retrans;
44412198SEiji.Ota@Sun.COM 
44512198SEiji.Ota@Sun.COM 		mutex_enter(&conn->c_lock);
44612198SEiji.Ota@Sun.COM 
44712198SEiji.Ota@Sun.COM 		/* XXX too lazy to maintain counts.. */
44812198SEiji.Ota@Sun.COM 		RDSV3_FOR_EACH_LIST_NODE(rm, list, m_conn_item) {
44912198SEiji.Ota@Sun.COM 			total++;
45012198SEiji.Ota@Sun.COM 			if (total <= len)
45112198SEiji.Ota@Sun.COM 				rdsv3_inc_info_copy(&rm->m_inc, iter,
45212198SEiji.Ota@Sun.COM 				    conn->c_laddr, conn->c_faddr, 0);
45312198SEiji.Ota@Sun.COM 		}
45412198SEiji.Ota@Sun.COM 
45512198SEiji.Ota@Sun.COM 		mutex_exit(&conn->c_lock);
45612198SEiji.Ota@Sun.COM 
45712198SEiji.Ota@Sun.COM 		conn = AVL_NEXT(&rdsv3_conn_hash, conn);
45812198SEiji.Ota@Sun.COM 	} while (conn != NULL);
45912198SEiji.Ota@Sun.COM 	rw_exit(&rdsv3_conn_lock);
46012198SEiji.Ota@Sun.COM 
46112198SEiji.Ota@Sun.COM 	lens->nr = total;
46212863SEiji.Ota@Sun.COM 	lens->each = sizeof (struct rds_info_message);
46312198SEiji.Ota@Sun.COM 
46412198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_conn_message_info", "Return");
46512198SEiji.Ota@Sun.COM }
46612198SEiji.Ota@Sun.COM 
46712198SEiji.Ota@Sun.COM static void
rdsv3_conn_message_info_send(struct rsock * sock,unsigned int len,struct rdsv3_info_iterator * iter,struct rdsv3_info_lengths * lens)46812198SEiji.Ota@Sun.COM rdsv3_conn_message_info_send(struct rsock *sock, unsigned int len,
46912198SEiji.Ota@Sun.COM     struct rdsv3_info_iterator *iter,
47012198SEiji.Ota@Sun.COM     struct rdsv3_info_lengths *lens)
47112198SEiji.Ota@Sun.COM {
47212198SEiji.Ota@Sun.COM 	rdsv3_conn_message_info(sock, len, iter, lens, 1);
47312198SEiji.Ota@Sun.COM }
47412198SEiji.Ota@Sun.COM 
47512198SEiji.Ota@Sun.COM static void
rdsv3_conn_message_info_retrans(struct rsock * sock,unsigned int len,struct rdsv3_info_iterator * iter,struct rdsv3_info_lengths * lens)47612198SEiji.Ota@Sun.COM rdsv3_conn_message_info_retrans(struct rsock *sock,
47712198SEiji.Ota@Sun.COM     unsigned int len,
47812198SEiji.Ota@Sun.COM     struct rdsv3_info_iterator *iter,
47912198SEiji.Ota@Sun.COM     struct rdsv3_info_lengths *lens)
48012198SEiji.Ota@Sun.COM {
48112198SEiji.Ota@Sun.COM 	rdsv3_conn_message_info(sock, len, iter, lens, 0);
48212198SEiji.Ota@Sun.COM }
48312198SEiji.Ota@Sun.COM 
48412198SEiji.Ota@Sun.COM /* ARGSUSED */
48512198SEiji.Ota@Sun.COM void
rdsv3_for_each_conn_info(struct rsock * sock,unsigned int len,struct rdsv3_info_iterator * iter,struct rdsv3_info_lengths * lens,int (* visitor)(struct rdsv3_connection *,void *),size_t item_len)48612198SEiji.Ota@Sun.COM rdsv3_for_each_conn_info(struct rsock *sock, unsigned int len,
48712198SEiji.Ota@Sun.COM     struct rdsv3_info_iterator *iter,
48812198SEiji.Ota@Sun.COM     struct rdsv3_info_lengths *lens,
48912198SEiji.Ota@Sun.COM     int (*visitor)(struct rdsv3_connection *, void *),
49012198SEiji.Ota@Sun.COM     size_t item_len)
49112198SEiji.Ota@Sun.COM {
49212580SGiri.Adari@Sun.COM 	uint8_t *buffer;
49312198SEiji.Ota@Sun.COM 	struct rdsv3_connection *conn;
49412198SEiji.Ota@Sun.COM 
49512198SEiji.Ota@Sun.COM 	rw_enter(&rdsv3_conn_lock, RW_READER);
49612198SEiji.Ota@Sun.COM 
49712198SEiji.Ota@Sun.COM 	lens->nr = 0;
49812198SEiji.Ota@Sun.COM 	lens->each = item_len;
49912198SEiji.Ota@Sun.COM 
50012198SEiji.Ota@Sun.COM 	if (avl_is_empty(&rdsv3_conn_hash)) {
50112198SEiji.Ota@Sun.COM 		/* no connections */
50212198SEiji.Ota@Sun.COM 		rw_exit(&rdsv3_conn_lock);
50312198SEiji.Ota@Sun.COM 		return;
50412198SEiji.Ota@Sun.COM 	}
50512198SEiji.Ota@Sun.COM 
50612580SGiri.Adari@Sun.COM 	/* allocate a little extra as this can get cast to a uint64_t */
50712580SGiri.Adari@Sun.COM 	buffer = kmem_zalloc(item_len + 8, KM_SLEEP);
50812580SGiri.Adari@Sun.COM 
50912198SEiji.Ota@Sun.COM 	conn = (struct rdsv3_connection *)avl_first(&rdsv3_conn_hash);
51012198SEiji.Ota@Sun.COM 
51112198SEiji.Ota@Sun.COM 	do {
51212198SEiji.Ota@Sun.COM 		/* XXX no c_lock usage.. */
51312580SGiri.Adari@Sun.COM 		if (visitor(conn, buffer)) {
51412580SGiri.Adari@Sun.COM 			/*
51512580SGiri.Adari@Sun.COM 			 * We copy as much as we can fit in the buffer,
51612580SGiri.Adari@Sun.COM 			 * but we count all items so that the caller
51712580SGiri.Adari@Sun.COM 			 * can resize the buffer.
51812580SGiri.Adari@Sun.COM 			 */
51912580SGiri.Adari@Sun.COM 			if (len >= item_len) {
52012580SGiri.Adari@Sun.COM 				RDSV3_DPRINTF4("rdsv3_for_each_conn_info",
52112580SGiri.Adari@Sun.COM 				    "buffer: %p iter: %p bytes: %d", buffer,
52212580SGiri.Adari@Sun.COM 				    iter->addr + iter->offset, item_len);
52312580SGiri.Adari@Sun.COM 				rdsv3_info_copy(iter, buffer, item_len);
52412580SGiri.Adari@Sun.COM 				len -= item_len;
52512580SGiri.Adari@Sun.COM 			}
52612580SGiri.Adari@Sun.COM 			lens->nr++;
52712198SEiji.Ota@Sun.COM 		}
52812198SEiji.Ota@Sun.COM 		conn = AVL_NEXT(&rdsv3_conn_hash, conn);
52912198SEiji.Ota@Sun.COM 	} while (conn != NULL);
53012198SEiji.Ota@Sun.COM 	rw_exit(&rdsv3_conn_lock);
53112580SGiri.Adari@Sun.COM 
53212580SGiri.Adari@Sun.COM 	kmem_free(buffer, item_len + 8);
53312198SEiji.Ota@Sun.COM }
53412198SEiji.Ota@Sun.COM 
53512198SEiji.Ota@Sun.COM static int
rdsv3_conn_info_visitor(struct rdsv3_connection * conn,void * buffer)53612198SEiji.Ota@Sun.COM rdsv3_conn_info_visitor(struct rdsv3_connection *conn, void *buffer)
53712198SEiji.Ota@Sun.COM {
53812863SEiji.Ota@Sun.COM 	struct rds_info_connection *cinfo = buffer;
53912198SEiji.Ota@Sun.COM 
54012198SEiji.Ota@Sun.COM 	cinfo->next_tx_seq = conn->c_next_tx_seq;
54112198SEiji.Ota@Sun.COM 	cinfo->next_rx_seq = conn->c_next_rx_seq;
54212198SEiji.Ota@Sun.COM 	cinfo->laddr = conn->c_laddr;
54312198SEiji.Ota@Sun.COM 	cinfo->faddr = conn->c_faddr;
54412198SEiji.Ota@Sun.COM 	(void) strncpy((char *)cinfo->transport, conn->c_trans->t_name,
54512198SEiji.Ota@Sun.COM 	    sizeof (cinfo->transport));
54612198SEiji.Ota@Sun.COM 	cinfo->flags = 0;
54712198SEiji.Ota@Sun.COM 
54812198SEiji.Ota@Sun.COM 	rdsv3_conn_info_set(cinfo->flags,
54912676SEiji.Ota@Sun.COM 	    MUTEX_HELD(&conn->c_send_lock), SENDING);
55012676SEiji.Ota@Sun.COM 
55112198SEiji.Ota@Sun.COM 	/* XXX Future: return the state rather than these funky bits */
55212198SEiji.Ota@Sun.COM 	rdsv3_conn_info_set(cinfo->flags,
55312198SEiji.Ota@Sun.COM 	    atomic_get(&conn->c_state) == RDSV3_CONN_CONNECTING,
55412198SEiji.Ota@Sun.COM 	    CONNECTING);
55512198SEiji.Ota@Sun.COM 	rdsv3_conn_info_set(cinfo->flags,
55612198SEiji.Ota@Sun.COM 	    atomic_get(&conn->c_state) == RDSV3_CONN_UP,
55712198SEiji.Ota@Sun.COM 	    CONNECTED);
55812198SEiji.Ota@Sun.COM 	return (1);
55912198SEiji.Ota@Sun.COM }
56012198SEiji.Ota@Sun.COM 
56112198SEiji.Ota@Sun.COM static void
rdsv3_conn_info(struct rsock * sock,unsigned int len,struct rdsv3_info_iterator * iter,struct rdsv3_info_lengths * lens)56212198SEiji.Ota@Sun.COM rdsv3_conn_info(struct rsock *sock, unsigned int len,
56312198SEiji.Ota@Sun.COM     struct rdsv3_info_iterator *iter, struct rdsv3_info_lengths *lens)
56412198SEiji.Ota@Sun.COM {
56512198SEiji.Ota@Sun.COM 	rdsv3_for_each_conn_info(sock, len, iter, lens,
56612863SEiji.Ota@Sun.COM 	    rdsv3_conn_info_visitor, sizeof (struct rds_info_connection));
56712198SEiji.Ota@Sun.COM }
56812198SEiji.Ota@Sun.COM 
56912198SEiji.Ota@Sun.COM int
rdsv3_conn_init()57012198SEiji.Ota@Sun.COM rdsv3_conn_init()
57112198SEiji.Ota@Sun.COM {
57212198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_conn_init", "Enter");
57312198SEiji.Ota@Sun.COM 
57412198SEiji.Ota@Sun.COM 	rdsv3_conn_slab = kmem_cache_create("rdsv3_connection",
57512198SEiji.Ota@Sun.COM 	    sizeof (struct rdsv3_connection), 0, rdsv3_conn_constructor,
57612198SEiji.Ota@Sun.COM 	    rdsv3_conn_destructor, NULL, NULL, NULL, 0);
57712676SEiji.Ota@Sun.COM 	if (!rdsv3_conn_slab) {
57812320SGiri.Adari@Sun.COM 		RDSV3_DPRINTF2("rdsv3_conn_init",
57912198SEiji.Ota@Sun.COM 		    "kmem_cache_create(rdsv3_conn_slab) failed");
58012676SEiji.Ota@Sun.COM 		return (-ENOMEM);
58112198SEiji.Ota@Sun.COM 	}
58212198SEiji.Ota@Sun.COM 
58312198SEiji.Ota@Sun.COM 	avl_create(&rdsv3_conn_hash, rdsv3_conn_compare,
58412198SEiji.Ota@Sun.COM 	    sizeof (struct rdsv3_connection), offsetof(struct rdsv3_connection,
58512198SEiji.Ota@Sun.COM 	    c_hash_node));
58612198SEiji.Ota@Sun.COM 
58712198SEiji.Ota@Sun.COM 	rw_init(&rdsv3_conn_lock, NULL, RW_DRIVER, NULL);
58812198SEiji.Ota@Sun.COM 
58912198SEiji.Ota@Sun.COM 	rdsv3_loop_init();
59012198SEiji.Ota@Sun.COM 
59112863SEiji.Ota@Sun.COM 	rdsv3_info_register_func(RDS_INFO_CONNECTIONS, rdsv3_conn_info);
59212863SEiji.Ota@Sun.COM 	rdsv3_info_register_func(RDS_INFO_SEND_MESSAGES,
59312198SEiji.Ota@Sun.COM 	    rdsv3_conn_message_info_send);
59412863SEiji.Ota@Sun.COM 	rdsv3_info_register_func(RDS_INFO_RETRANS_MESSAGES,
59512198SEiji.Ota@Sun.COM 	    rdsv3_conn_message_info_retrans);
59612198SEiji.Ota@Sun.COM 
59712198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_conn_init", "Return");
59812198SEiji.Ota@Sun.COM 
59912198SEiji.Ota@Sun.COM 	return (0);
60012198SEiji.Ota@Sun.COM }
60112198SEiji.Ota@Sun.COM 
60212198SEiji.Ota@Sun.COM void
rdsv3_conn_exit()60312198SEiji.Ota@Sun.COM rdsv3_conn_exit()
60412198SEiji.Ota@Sun.COM {
60512198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_conn_exit", "Enter");
60612198SEiji.Ota@Sun.COM 
60712198SEiji.Ota@Sun.COM 	rdsv3_loop_exit();
60812198SEiji.Ota@Sun.COM 
60912198SEiji.Ota@Sun.COM 	rw_destroy(&rdsv3_conn_lock);
61012198SEiji.Ota@Sun.COM 	avl_destroy(&rdsv3_conn_hash);
61112198SEiji.Ota@Sun.COM 
61212198SEiji.Ota@Sun.COM 	ASSERT(rdsv3_conn_slab);
61312198SEiji.Ota@Sun.COM 	kmem_cache_destroy(rdsv3_conn_slab);
61412198SEiji.Ota@Sun.COM 
61512198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_conn_exit", "Return");
61612198SEiji.Ota@Sun.COM }
61712198SEiji.Ota@Sun.COM 
61812198SEiji.Ota@Sun.COM /*
61912198SEiji.Ota@Sun.COM  * Force a disconnect
62012198SEiji.Ota@Sun.COM  */
62112198SEiji.Ota@Sun.COM void
rdsv3_conn_drop(struct rdsv3_connection * conn)62212198SEiji.Ota@Sun.COM rdsv3_conn_drop(struct rdsv3_connection *conn)
62312198SEiji.Ota@Sun.COM {
62412198SEiji.Ota@Sun.COM 	conn->c_state = RDSV3_CONN_ERROR;
62512198SEiji.Ota@Sun.COM 	rdsv3_queue_work(rdsv3_wq, &conn->c_down_w);
62612198SEiji.Ota@Sun.COM }
627