xref: /onnv-gate/usr/src/uts/common/io/ib/clients/eoib/eib_ibt.c (revision 13101:da7b13ec3a28)
1*13101SVenki.Rajagopalan@Sun.COM /*
2*13101SVenki.Rajagopalan@Sun.COM  * CDDL HEADER START
3*13101SVenki.Rajagopalan@Sun.COM  *
4*13101SVenki.Rajagopalan@Sun.COM  * The contents of this file are subject to the terms of the
5*13101SVenki.Rajagopalan@Sun.COM  * Common Development and Distribution License (the "License").
6*13101SVenki.Rajagopalan@Sun.COM  * You may not use this file except in compliance with the License.
7*13101SVenki.Rajagopalan@Sun.COM  *
8*13101SVenki.Rajagopalan@Sun.COM  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*13101SVenki.Rajagopalan@Sun.COM  * or http://www.opensolaris.org/os/licensing.
10*13101SVenki.Rajagopalan@Sun.COM  * See the License for the specific language governing permissions
11*13101SVenki.Rajagopalan@Sun.COM  * and limitations under the License.
12*13101SVenki.Rajagopalan@Sun.COM  *
13*13101SVenki.Rajagopalan@Sun.COM  * When distributing Covered Code, include this CDDL HEADER in each
14*13101SVenki.Rajagopalan@Sun.COM  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*13101SVenki.Rajagopalan@Sun.COM  * If applicable, add the following below this CDDL HEADER, with the
16*13101SVenki.Rajagopalan@Sun.COM  * fields enclosed by brackets "[]" replaced with your own identifying
17*13101SVenki.Rajagopalan@Sun.COM  * information: Portions Copyright [yyyy] [name of copyright owner]
18*13101SVenki.Rajagopalan@Sun.COM  *
19*13101SVenki.Rajagopalan@Sun.COM  * CDDL HEADER END
20*13101SVenki.Rajagopalan@Sun.COM  */
21*13101SVenki.Rajagopalan@Sun.COM 
22*13101SVenki.Rajagopalan@Sun.COM /*
23*13101SVenki.Rajagopalan@Sun.COM  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24*13101SVenki.Rajagopalan@Sun.COM  */
25*13101SVenki.Rajagopalan@Sun.COM 
26*13101SVenki.Rajagopalan@Sun.COM #include <sys/types.h>
27*13101SVenki.Rajagopalan@Sun.COM #include <sys/kmem.h>
28*13101SVenki.Rajagopalan@Sun.COM #include <sys/conf.h>
29*13101SVenki.Rajagopalan@Sun.COM #include <sys/ddi.h>
30*13101SVenki.Rajagopalan@Sun.COM #include <sys/sunddi.h>
31*13101SVenki.Rajagopalan@Sun.COM #include <sys/ksynch.h>
32*13101SVenki.Rajagopalan@Sun.COM #include <sys/dlpi.h>			/* HCKSUM_INET_FULL_V4 */
33*13101SVenki.Rajagopalan@Sun.COM #include <sys/pattr.h>			/* HCK_FULLCKSUM */
34*13101SVenki.Rajagopalan@Sun.COM #include <sys/ib/mgt/sm_attr.h>		/* SM_INIT_TYPE_REPLY_... */
35*13101SVenki.Rajagopalan@Sun.COM 
36*13101SVenki.Rajagopalan@Sun.COM #include <sys/ib/clients/eoib/eib_impl.h>
37*13101SVenki.Rajagopalan@Sun.COM 
38*13101SVenki.Rajagopalan@Sun.COM /*
39*13101SVenki.Rajagopalan@Sun.COM  * Declarations private to this file
40*13101SVenki.Rajagopalan@Sun.COM  */
41*13101SVenki.Rajagopalan@Sun.COM static void eib_ibt_reset_partitions(eib_t *);
42*13101SVenki.Rajagopalan@Sun.COM static void eib_ibt_wakeup_sqd_waiters(eib_t *, ibt_channel_hdl_t);
43*13101SVenki.Rajagopalan@Sun.COM static int eib_ibt_chan_pkey(eib_t *, eib_chan_t *, ib_pkey_t, boolean_t,
44*13101SVenki.Rajagopalan@Sun.COM     boolean_t *);
45*13101SVenki.Rajagopalan@Sun.COM static boolean_t eib_ibt_has_chan_pkey_changed(eib_t *, eib_chan_t *);
46*13101SVenki.Rajagopalan@Sun.COM static boolean_t eib_ibt_has_any_pkey_changed(eib_t *);
47*13101SVenki.Rajagopalan@Sun.COM static int eib_ibt_fill_avect(eib_t *, eib_avect_t *, ib_lid_t);
48*13101SVenki.Rajagopalan@Sun.COM static void eib_ibt_record_srate(eib_t *);
49*13101SVenki.Rajagopalan@Sun.COM 
50*13101SVenki.Rajagopalan@Sun.COM /*
51*13101SVenki.Rajagopalan@Sun.COM  * Definitions private to this file
52*13101SVenki.Rajagopalan@Sun.COM  */
53*13101SVenki.Rajagopalan@Sun.COM 
54*13101SVenki.Rajagopalan@Sun.COM /*
55*13101SVenki.Rajagopalan@Sun.COM  * SM's init type reply flags
56*13101SVenki.Rajagopalan@Sun.COM  */
57*13101SVenki.Rajagopalan@Sun.COM #define	EIB_PORT_ATTR_LOADED(itr)				\
58*13101SVenki.Rajagopalan@Sun.COM 	(((itr) & SM_INIT_TYPE_REPLY_NO_LOAD_REPLY) == 0)
59*13101SVenki.Rajagopalan@Sun.COM #define	EIB_PORT_ATTR_NOT_PRESERVED(itr)			\
60*13101SVenki.Rajagopalan@Sun.COM 	(((itr) & SM_INIT_TYPE_PRESERVE_CONTENT_REPLY) == 0)
61*13101SVenki.Rajagopalan@Sun.COM #define	EIB_PORT_PRES_NOT_PRESERVED(itr)			\
62*13101SVenki.Rajagopalan@Sun.COM 	(((itr) & SM_INIT_TYPE_PRESERVE_PRESENCE_REPLY) == 0)
63*13101SVenki.Rajagopalan@Sun.COM 
64*13101SVenki.Rajagopalan@Sun.COM /*
65*13101SVenki.Rajagopalan@Sun.COM  * eib_ibt_hca_init() initialization progress flags
66*13101SVenki.Rajagopalan@Sun.COM  */
67*13101SVenki.Rajagopalan@Sun.COM #define	EIB_HCAINIT_HCA_OPENED		0x01
68*13101SVenki.Rajagopalan@Sun.COM #define	EIB_HCAINIT_ATTRS_ALLOCD	0x02
69*13101SVenki.Rajagopalan@Sun.COM #define	EIB_HCAINIT_HCA_PORTS_QUERIED	0x04
70*13101SVenki.Rajagopalan@Sun.COM #define	EIB_HCAINIT_PD_ALLOCD		0x08
71*13101SVenki.Rajagopalan@Sun.COM #define	EIB_HCAINIT_CAPAB_RECORDED	0x10
72*13101SVenki.Rajagopalan@Sun.COM 
73*13101SVenki.Rajagopalan@Sun.COM int
eib_ibt_hca_init(eib_t * ss)74*13101SVenki.Rajagopalan@Sun.COM eib_ibt_hca_init(eib_t *ss)
75*13101SVenki.Rajagopalan@Sun.COM {
76*13101SVenki.Rajagopalan@Sun.COM 	ibt_status_t ret;
77*13101SVenki.Rajagopalan@Sun.COM 	ibt_hca_portinfo_t *pi;
78*13101SVenki.Rajagopalan@Sun.COM 	uint_t num_pi;
79*13101SVenki.Rajagopalan@Sun.COM 	uint_t sz_pi;
80*13101SVenki.Rajagopalan@Sun.COM 	uint_t progress = 0;
81*13101SVenki.Rajagopalan@Sun.COM 
82*13101SVenki.Rajagopalan@Sun.COM 	if (ss->ei_hca_hdl)
83*13101SVenki.Rajagopalan@Sun.COM 		return (EIB_E_SUCCESS);
84*13101SVenki.Rajagopalan@Sun.COM 
85*13101SVenki.Rajagopalan@Sun.COM 	/*
86*13101SVenki.Rajagopalan@Sun.COM 	 * Open the HCA
87*13101SVenki.Rajagopalan@Sun.COM 	 */
88*13101SVenki.Rajagopalan@Sun.COM 	ret = ibt_open_hca(ss->ei_ibt_hdl, ss->ei_props->ep_hca_guid,
89*13101SVenki.Rajagopalan@Sun.COM 	    &ss->ei_hca_hdl);
90*13101SVenki.Rajagopalan@Sun.COM 	if (ret != IBT_SUCCESS) {
91*13101SVenki.Rajagopalan@Sun.COM 		EIB_DPRINTF_ERR(ss->ei_instance,
92*13101SVenki.Rajagopalan@Sun.COM 		    "ibt_open_hca(hca_guid=0x%llx) "
93*13101SVenki.Rajagopalan@Sun.COM 		    "failed, ret=%d", ss->ei_props->ep_hca_guid, ret);
94*13101SVenki.Rajagopalan@Sun.COM 		goto ibt_hca_init_fail;
95*13101SVenki.Rajagopalan@Sun.COM 	}
96*13101SVenki.Rajagopalan@Sun.COM 	progress |= EIB_HCAINIT_HCA_OPENED;
97*13101SVenki.Rajagopalan@Sun.COM 
98*13101SVenki.Rajagopalan@Sun.COM 	/*
99*13101SVenki.Rajagopalan@Sun.COM 	 * Query and store HCA attributes
100*13101SVenki.Rajagopalan@Sun.COM 	 */
101*13101SVenki.Rajagopalan@Sun.COM 	ss->ei_hca_attrs = kmem_zalloc(sizeof (ibt_hca_attr_t), KM_SLEEP);
102*13101SVenki.Rajagopalan@Sun.COM 	progress |= EIB_HCAINIT_ATTRS_ALLOCD;
103*13101SVenki.Rajagopalan@Sun.COM 
104*13101SVenki.Rajagopalan@Sun.COM 	ret = ibt_query_hca(ss->ei_hca_hdl, ss->ei_hca_attrs);
105*13101SVenki.Rajagopalan@Sun.COM 	if (ret != IBT_SUCCESS) {
106*13101SVenki.Rajagopalan@Sun.COM 		EIB_DPRINTF_ERR(ss->ei_instance,
107*13101SVenki.Rajagopalan@Sun.COM 		    "ibt_query_hca(hca_hdl=0x%llx, "
108*13101SVenki.Rajagopalan@Sun.COM 		    "hca_guid=0x%llx) failed, ret=%d",
109*13101SVenki.Rajagopalan@Sun.COM 		    ss->ei_hca_hdl, ss->ei_props->ep_hca_guid, ret);
110*13101SVenki.Rajagopalan@Sun.COM 		goto ibt_hca_init_fail;
111*13101SVenki.Rajagopalan@Sun.COM 	}
112*13101SVenki.Rajagopalan@Sun.COM 
113*13101SVenki.Rajagopalan@Sun.COM 	/*
114*13101SVenki.Rajagopalan@Sun.COM 	 * At this point, we don't even care about the linkstate, we only want
115*13101SVenki.Rajagopalan@Sun.COM 	 * to record our invariant base port guid and mtu
116*13101SVenki.Rajagopalan@Sun.COM 	 */
117*13101SVenki.Rajagopalan@Sun.COM 	ret = ibt_query_hca_ports(ss->ei_hca_hdl, ss->ei_props->ep_port_num,
118*13101SVenki.Rajagopalan@Sun.COM 	    &pi, &num_pi, &sz_pi);
119*13101SVenki.Rajagopalan@Sun.COM 	if (ret != IBT_SUCCESS) {
120*13101SVenki.Rajagopalan@Sun.COM 		EIB_DPRINTF_ERR(ss->ei_instance,
121*13101SVenki.Rajagopalan@Sun.COM 		    "ibt_query_hca_ports(hca_hdl=0x%llx, "
122*13101SVenki.Rajagopalan@Sun.COM 		    "port=0x%x) failed, ret=%d", ss->ei_hca_hdl,
123*13101SVenki.Rajagopalan@Sun.COM 		    ss->ei_props->ep_port_num, ret);
124*13101SVenki.Rajagopalan@Sun.COM 		goto ibt_hca_init_fail;
125*13101SVenki.Rajagopalan@Sun.COM 	}
126*13101SVenki.Rajagopalan@Sun.COM 	if (num_pi != 1) {
127*13101SVenki.Rajagopalan@Sun.COM 		EIB_DPRINTF_ERR(ss->ei_instance,
128*13101SVenki.Rajagopalan@Sun.COM 		    "ibt_query_hca_ports(hca_hdl=0x%llx, "
129*13101SVenki.Rajagopalan@Sun.COM 		    "port=0x%x) returned num_pi=%d", ss->ei_hca_hdl,
130*13101SVenki.Rajagopalan@Sun.COM 		    ss->ei_props->ep_port_num, num_pi);
131*13101SVenki.Rajagopalan@Sun.COM 		ibt_free_portinfo(pi, sz_pi);
132*13101SVenki.Rajagopalan@Sun.COM 		goto ibt_hca_init_fail;
133*13101SVenki.Rajagopalan@Sun.COM 	}
134*13101SVenki.Rajagopalan@Sun.COM 
135*13101SVenki.Rajagopalan@Sun.COM 	ss->ei_props->ep_sgid = pi->p_sgid_tbl[0];
136*13101SVenki.Rajagopalan@Sun.COM 	ss->ei_props->ep_mtu = (128 << pi->p_mtu);
137*13101SVenki.Rajagopalan@Sun.COM 	ibt_free_portinfo(pi, sz_pi);
138*13101SVenki.Rajagopalan@Sun.COM 
139*13101SVenki.Rajagopalan@Sun.COM 	progress |= EIB_HCAINIT_HCA_PORTS_QUERIED;
140*13101SVenki.Rajagopalan@Sun.COM 
141*13101SVenki.Rajagopalan@Sun.COM 	/*
142*13101SVenki.Rajagopalan@Sun.COM 	 * Allocate a protection domain for all our transactions
143*13101SVenki.Rajagopalan@Sun.COM 	 */
144*13101SVenki.Rajagopalan@Sun.COM 	ret = ibt_alloc_pd(ss->ei_hca_hdl, IBT_PD_NO_FLAGS, &ss->ei_pd_hdl);
145*13101SVenki.Rajagopalan@Sun.COM 	if (ret != IBT_SUCCESS) {
146*13101SVenki.Rajagopalan@Sun.COM 		EIB_DPRINTF_ERR(ss->ei_instance,
147*13101SVenki.Rajagopalan@Sun.COM 		    "ibt_alloc_pd(hca_hdl=0x%llx, "
148*13101SVenki.Rajagopalan@Sun.COM 		    "hca_guid=0x%llx) failed, ret=%d",
149*13101SVenki.Rajagopalan@Sun.COM 		    ss->ei_hca_hdl, ss->ei_props->ep_hca_guid, ret);
150*13101SVenki.Rajagopalan@Sun.COM 		goto ibt_hca_init_fail;
151*13101SVenki.Rajagopalan@Sun.COM 	}
152*13101SVenki.Rajagopalan@Sun.COM 	progress |= EIB_HCAINIT_PD_ALLOCD;
153*13101SVenki.Rajagopalan@Sun.COM 
154*13101SVenki.Rajagopalan@Sun.COM 	/*
155*13101SVenki.Rajagopalan@Sun.COM 	 * Finally, record the capabilities
156*13101SVenki.Rajagopalan@Sun.COM 	 */
157*13101SVenki.Rajagopalan@Sun.COM 	ss->ei_caps = kmem_zalloc(sizeof (eib_caps_t), KM_SLEEP);
158*13101SVenki.Rajagopalan@Sun.COM 	eib_ibt_record_capab(ss, ss->ei_hca_attrs, ss->ei_caps);
159*13101SVenki.Rajagopalan@Sun.COM 	eib_ibt_record_srate(ss);
160*13101SVenki.Rajagopalan@Sun.COM 
161*13101SVenki.Rajagopalan@Sun.COM 	progress |= EIB_HCAINIT_CAPAB_RECORDED;
162*13101SVenki.Rajagopalan@Sun.COM 
163*13101SVenki.Rajagopalan@Sun.COM 	return (EIB_E_SUCCESS);
164*13101SVenki.Rajagopalan@Sun.COM 
165*13101SVenki.Rajagopalan@Sun.COM ibt_hca_init_fail:
166*13101SVenki.Rajagopalan@Sun.COM 	eib_rb_ibt_hca_init(ss, progress);
167*13101SVenki.Rajagopalan@Sun.COM 	return (EIB_E_FAILURE);
168*13101SVenki.Rajagopalan@Sun.COM }
169*13101SVenki.Rajagopalan@Sun.COM 
170*13101SVenki.Rajagopalan@Sun.COM void
eib_ibt_link_mod(eib_t * ss)171*13101SVenki.Rajagopalan@Sun.COM eib_ibt_link_mod(eib_t *ss)
172*13101SVenki.Rajagopalan@Sun.COM {
173*13101SVenki.Rajagopalan@Sun.COM 	eib_node_state_t *ns = ss->ei_node_state;
174*13101SVenki.Rajagopalan@Sun.COM 	ibt_hca_portinfo_t *pi;
175*13101SVenki.Rajagopalan@Sun.COM 	ibt_status_t ret;
176*13101SVenki.Rajagopalan@Sun.COM 	uint8_t vn0_mac[ETHERADDRL];
177*13101SVenki.Rajagopalan@Sun.COM 	boolean_t all_zombies = B_FALSE;
178*13101SVenki.Rajagopalan@Sun.COM 	boolean_t all_need_rejoin = B_FALSE;
179*13101SVenki.Rajagopalan@Sun.COM 	uint_t num_pi;
180*13101SVenki.Rajagopalan@Sun.COM 	uint_t sz_pi;
181*13101SVenki.Rajagopalan@Sun.COM 	uint8_t itr;
182*13101SVenki.Rajagopalan@Sun.COM 
183*13101SVenki.Rajagopalan@Sun.COM 	if (ns->ns_link_state == LINK_STATE_UNKNOWN)
184*13101SVenki.Rajagopalan@Sun.COM 		return;
185*13101SVenki.Rajagopalan@Sun.COM 
186*13101SVenki.Rajagopalan@Sun.COM 	/*
187*13101SVenki.Rajagopalan@Sun.COM 	 * See if we can get the port attributes or we're as good as down.
188*13101SVenki.Rajagopalan@Sun.COM 	 */
189*13101SVenki.Rajagopalan@Sun.COM 	ret = ibt_query_hca_ports(ss->ei_hca_hdl, ss->ei_props->ep_port_num,
190*13101SVenki.Rajagopalan@Sun.COM 	    &pi, &num_pi, &sz_pi);
191*13101SVenki.Rajagopalan@Sun.COM 	if ((ret != IBT_SUCCESS) || (pi->p_linkstate != IBT_PORT_ACTIVE)) {
192*13101SVenki.Rajagopalan@Sun.COM 		ibt_free_portinfo(pi, sz_pi);
193*13101SVenki.Rajagopalan@Sun.COM 		eib_mac_link_down(ss, B_FALSE);
194*13101SVenki.Rajagopalan@Sun.COM 		return;
195*13101SVenki.Rajagopalan@Sun.COM 	}
196*13101SVenki.Rajagopalan@Sun.COM 
197*13101SVenki.Rajagopalan@Sun.COM 	/*
198*13101SVenki.Rajagopalan@Sun.COM 	 * If the SM re-initialized the port attributes, but did not preserve
199*13101SVenki.Rajagopalan@Sun.COM 	 * the old attributes, we need to check more.
200*13101SVenki.Rajagopalan@Sun.COM 	 */
201*13101SVenki.Rajagopalan@Sun.COM 	itr = pi->p_init_type_reply;
202*13101SVenki.Rajagopalan@Sun.COM 	if (EIB_PORT_ATTR_LOADED(itr) && EIB_PORT_ATTR_NOT_PRESERVED(itr)) {
203*13101SVenki.Rajagopalan@Sun.COM 		/*
204*13101SVenki.Rajagopalan@Sun.COM 		 * We're just coming back up; if we see that our base lid
205*13101SVenki.Rajagopalan@Sun.COM 		 * or sgid table has changed, we'll update these and try to
206*13101SVenki.Rajagopalan@Sun.COM 		 * restart all active vnics. If any of the vnic pkeys have
207*13101SVenki.Rajagopalan@Sun.COM 		 * changed, we'll reset the affected channels to the new pkey.
208*13101SVenki.Rajagopalan@Sun.COM 		 */
209*13101SVenki.Rajagopalan@Sun.COM 		if (bcmp(pi->p_sgid_tbl, &ss->ei_props->ep_sgid,
210*13101SVenki.Rajagopalan@Sun.COM 		    sizeof (ib_gid_t)) != 0) {
211*13101SVenki.Rajagopalan@Sun.COM 			EIB_DPRINTF_VERBOSE(ss->ei_instance,
212*13101SVenki.Rajagopalan@Sun.COM 			    "eib_ibt_link_mod: port sgid table changed "
213*13101SVenki.Rajagopalan@Sun.COM 			    "(old %llx.%llx != new %llx.%llx), "
214*13101SVenki.Rajagopalan@Sun.COM 			    "all vnics are zombies now.",
215*13101SVenki.Rajagopalan@Sun.COM 			    ss->ei_props->ep_sgid.gid_prefix,
216*13101SVenki.Rajagopalan@Sun.COM 			    ss->ei_props->ep_sgid.gid_guid,
217*13101SVenki.Rajagopalan@Sun.COM 			    pi->p_sgid_tbl[0].gid_prefix,
218*13101SVenki.Rajagopalan@Sun.COM 			    pi->p_sgid_tbl[0].gid_guid);
219*13101SVenki.Rajagopalan@Sun.COM 
220*13101SVenki.Rajagopalan@Sun.COM 			ss->ei_props->ep_sgid = pi->p_sgid_tbl[0];
221*13101SVenki.Rajagopalan@Sun.COM 			all_zombies = B_TRUE;
222*13101SVenki.Rajagopalan@Sun.COM 
223*13101SVenki.Rajagopalan@Sun.COM 		} else if (ss->ei_props->ep_blid != pi->p_base_lid) {
224*13101SVenki.Rajagopalan@Sun.COM 			EIB_DPRINTF_VERBOSE(ss->ei_instance,
225*13101SVenki.Rajagopalan@Sun.COM 			    "eib_ibt_link_mod: port base lid changed "
226*13101SVenki.Rajagopalan@Sun.COM 			    "(old 0x%x != new 0x%x), "
227*13101SVenki.Rajagopalan@Sun.COM 			    "all vnics are zombies now.",
228*13101SVenki.Rajagopalan@Sun.COM 			    ss->ei_props->ep_blid, pi->p_base_lid);
229*13101SVenki.Rajagopalan@Sun.COM 
230*13101SVenki.Rajagopalan@Sun.COM 			ss->ei_props->ep_blid = pi->p_base_lid;
231*13101SVenki.Rajagopalan@Sun.COM 			all_zombies = B_TRUE;
232*13101SVenki.Rajagopalan@Sun.COM 
233*13101SVenki.Rajagopalan@Sun.COM 		} else if (eib_ibt_has_any_pkey_changed(ss)) {
234*13101SVenki.Rajagopalan@Sun.COM 			EIB_DPRINTF_VERBOSE(ss->ei_instance,
235*13101SVenki.Rajagopalan@Sun.COM 			    "eib_ibt_link_mod: pkey has changed for vnic(s), "
236*13101SVenki.Rajagopalan@Sun.COM 			    "resetting all partitions");
237*13101SVenki.Rajagopalan@Sun.COM 
238*13101SVenki.Rajagopalan@Sun.COM 			eib_ibt_reset_partitions(ss);
239*13101SVenki.Rajagopalan@Sun.COM 		}
240*13101SVenki.Rajagopalan@Sun.COM 	}
241*13101SVenki.Rajagopalan@Sun.COM 
242*13101SVenki.Rajagopalan@Sun.COM 	if (pi) {
243*13101SVenki.Rajagopalan@Sun.COM 		ibt_free_portinfo(pi, sz_pi);
244*13101SVenki.Rajagopalan@Sun.COM 	}
245*13101SVenki.Rajagopalan@Sun.COM 
246*13101SVenki.Rajagopalan@Sun.COM 	/*
247*13101SVenki.Rajagopalan@Sun.COM 	 * If the SM hasn't preserved our presence in MCGs, we need to
248*13101SVenki.Rajagopalan@Sun.COM 	 * rejoin all of them.
249*13101SVenki.Rajagopalan@Sun.COM 	 */
250*13101SVenki.Rajagopalan@Sun.COM 	if (EIB_PORT_PRES_NOT_PRESERVED(itr)) {
251*13101SVenki.Rajagopalan@Sun.COM 		EIB_DPRINTF_VERBOSE(ss->ei_instance, "eib_ibt_link_mod: "
252*13101SVenki.Rajagopalan@Sun.COM 		    "hca_guid=0x%llx, port=0x%x presence not preserved in SM, "
253*13101SVenki.Rajagopalan@Sun.COM 		    "rejoining all mcgs", ss->ei_props->ep_hca_guid,
254*13101SVenki.Rajagopalan@Sun.COM 		    ss->ei_props->ep_port_num);
255*13101SVenki.Rajagopalan@Sun.COM 
256*13101SVenki.Rajagopalan@Sun.COM 		all_need_rejoin = B_TRUE;
257*13101SVenki.Rajagopalan@Sun.COM 	}
258*13101SVenki.Rajagopalan@Sun.COM 
259*13101SVenki.Rajagopalan@Sun.COM 	/*
260*13101SVenki.Rajagopalan@Sun.COM 	 * Before we do the actual work of restarting/rejoining, we need to
261*13101SVenki.Rajagopalan@Sun.COM 	 * see if the GW is reachable at this point of time.  If not, we
262*13101SVenki.Rajagopalan@Sun.COM 	 * still continue to keep our link "down."  Whenever the GW becomes
263*13101SVenki.Rajagopalan@Sun.COM 	 * reachable again, we'll restart/rejoin all the vnics that we've
264*13101SVenki.Rajagopalan@Sun.COM 	 * just marked.
265*13101SVenki.Rajagopalan@Sun.COM 	 */
266*13101SVenki.Rajagopalan@Sun.COM 	mutex_enter(&ss->ei_vnic_lock);
267*13101SVenki.Rajagopalan@Sun.COM 	if (all_zombies) {
268*13101SVenki.Rajagopalan@Sun.COM 		ss->ei_zombie_vnics = ss->ei_active_vnics;
269*13101SVenki.Rajagopalan@Sun.COM 	}
270*13101SVenki.Rajagopalan@Sun.COM 	if (all_need_rejoin) {
271*13101SVenki.Rajagopalan@Sun.COM 		ss->ei_rejoin_vnics = ss->ei_active_vnics;
272*13101SVenki.Rajagopalan@Sun.COM 	}
273*13101SVenki.Rajagopalan@Sun.COM 	if (ss->ei_gw_unreachable) {
274*13101SVenki.Rajagopalan@Sun.COM 		mutex_exit(&ss->ei_vnic_lock);
275*13101SVenki.Rajagopalan@Sun.COM 
276*13101SVenki.Rajagopalan@Sun.COM 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_ibt_link_mod: "
277*13101SVenki.Rajagopalan@Sun.COM 		    "gateway (gw_port=0x%x) unreachable for "
278*13101SVenki.Rajagopalan@Sun.COM 		    "hca_guid=0x%llx, port=0x%x, link state down",
279*13101SVenki.Rajagopalan@Sun.COM 		    ss->ei_gw_props->pp_gw_portid, ss->ei_props->ep_hca_guid,
280*13101SVenki.Rajagopalan@Sun.COM 		    ss->ei_props->ep_port_num);
281*13101SVenki.Rajagopalan@Sun.COM 
282*13101SVenki.Rajagopalan@Sun.COM 		eib_mac_link_down(ss, B_FALSE);
283*13101SVenki.Rajagopalan@Sun.COM 		return;
284*13101SVenki.Rajagopalan@Sun.COM 	}
285*13101SVenki.Rajagopalan@Sun.COM 	mutex_exit(&ss->ei_vnic_lock);
286*13101SVenki.Rajagopalan@Sun.COM 
287*13101SVenki.Rajagopalan@Sun.COM 	/*
288*13101SVenki.Rajagopalan@Sun.COM 	 * Try to awaken the dead if possible
289*13101SVenki.Rajagopalan@Sun.COM 	 */
290*13101SVenki.Rajagopalan@Sun.COM 	bcopy(eib_zero_mac, vn0_mac, ETHERADDRL);
291*13101SVenki.Rajagopalan@Sun.COM 	if (all_zombies) {
292*13101SVenki.Rajagopalan@Sun.COM 		EIB_DPRINTF_VERBOSE(ss->ei_instance, "eib_ibt_link_mod: "
293*13101SVenki.Rajagopalan@Sun.COM 		    "hca_guid=0x%llx, hca_port=0x%x, gw_port=0x%x, "
294*13101SVenki.Rajagopalan@Sun.COM 		    "attempting to resurrect zombies",
295*13101SVenki.Rajagopalan@Sun.COM 		    ss->ei_props->ep_hca_guid, ss->ei_props->ep_port_num,
296*13101SVenki.Rajagopalan@Sun.COM 		    ss->ei_gw_props->pp_gw_portid);
297*13101SVenki.Rajagopalan@Sun.COM 
298*13101SVenki.Rajagopalan@Sun.COM 		eib_vnic_resurrect_zombies(ss, vn0_mac);
299*13101SVenki.Rajagopalan@Sun.COM 	}
300*13101SVenki.Rajagopalan@Sun.COM 
301*13101SVenki.Rajagopalan@Sun.COM 	/*
302*13101SVenki.Rajagopalan@Sun.COM 	 * Re-join the mcgs if we need to
303*13101SVenki.Rajagopalan@Sun.COM 	 */
304*13101SVenki.Rajagopalan@Sun.COM 	if (all_need_rejoin) {
305*13101SVenki.Rajagopalan@Sun.COM 		EIB_DPRINTF_VERBOSE(ss->ei_instance, "eib_ibt_link_mod: "
306*13101SVenki.Rajagopalan@Sun.COM 		    "hca_guid=0x%llx, hca_port=0x%x, gw_port=0x%x, "
307*13101SVenki.Rajagopalan@Sun.COM 		    "attempting to rejoin mcgs",
308*13101SVenki.Rajagopalan@Sun.COM 		    ss->ei_props->ep_hca_guid, ss->ei_props->ep_port_num,
309*13101SVenki.Rajagopalan@Sun.COM 		    ss->ei_gw_props->pp_gw_portid);
310*13101SVenki.Rajagopalan@Sun.COM 
311*13101SVenki.Rajagopalan@Sun.COM 		eib_vnic_rejoin_mcgs(ss);
312*13101SVenki.Rajagopalan@Sun.COM 	}
313*13101SVenki.Rajagopalan@Sun.COM 
314*13101SVenki.Rajagopalan@Sun.COM 	/*
315*13101SVenki.Rajagopalan@Sun.COM 	 * If we've restarted the zombies because the gateway went down and
316*13101SVenki.Rajagopalan@Sun.COM 	 * came back, it is possible our unicast mac address changed from
317*13101SVenki.Rajagopalan@Sun.COM 	 * what it was earlier. If so, we need to update our unicast address
318*13101SVenki.Rajagopalan@Sun.COM 	 * with the mac layer before marking the link up.
319*13101SVenki.Rajagopalan@Sun.COM 	 */
320*13101SVenki.Rajagopalan@Sun.COM 	if (bcmp(vn0_mac, eib_zero_mac, ETHERADDRL) != 0)
321*13101SVenki.Rajagopalan@Sun.COM 		mac_unicst_update(ss->ei_mac_hdl, vn0_mac);
322*13101SVenki.Rajagopalan@Sun.COM 
323*13101SVenki.Rajagopalan@Sun.COM 	/*
324*13101SVenki.Rajagopalan@Sun.COM 	 * Notify the link state up if required
325*13101SVenki.Rajagopalan@Sun.COM 	 */
326*13101SVenki.Rajagopalan@Sun.COM 	eib_mac_link_up(ss, B_FALSE);
327*13101SVenki.Rajagopalan@Sun.COM }
328*13101SVenki.Rajagopalan@Sun.COM 
329*13101SVenki.Rajagopalan@Sun.COM int
eib_ibt_modify_chan_pkey(eib_t * ss,eib_chan_t * chan,ib_pkey_t pkey)330*13101SVenki.Rajagopalan@Sun.COM eib_ibt_modify_chan_pkey(eib_t *ss, eib_chan_t *chan, ib_pkey_t pkey)
331*13101SVenki.Rajagopalan@Sun.COM {
332*13101SVenki.Rajagopalan@Sun.COM 	/*
333*13101SVenki.Rajagopalan@Sun.COM 	 * Make sure the channel pkey and index are set to what we need
334*13101SVenki.Rajagopalan@Sun.COM 	 */
335*13101SVenki.Rajagopalan@Sun.COM 	return (eib_ibt_chan_pkey(ss, chan, pkey, B_TRUE, NULL));
336*13101SVenki.Rajagopalan@Sun.COM }
337*13101SVenki.Rajagopalan@Sun.COM 
338*13101SVenki.Rajagopalan@Sun.COM eib_avect_t *
eib_ibt_hold_avect(eib_t * ss,ib_lid_t dlid,uint8_t sl)339*13101SVenki.Rajagopalan@Sun.COM eib_ibt_hold_avect(eib_t *ss, ib_lid_t dlid, uint8_t sl)
340*13101SVenki.Rajagopalan@Sun.COM {
341*13101SVenki.Rajagopalan@Sun.COM 	uint_t ndx = dlid % EIB_AV_NBUCKETS;	/* simple hashing */
342*13101SVenki.Rajagopalan@Sun.COM 	eib_avect_t *av;
343*13101SVenki.Rajagopalan@Sun.COM 	eib_avect_t *prev;
344*13101SVenki.Rajagopalan@Sun.COM 	int ret;
345*13101SVenki.Rajagopalan@Sun.COM 
346*13101SVenki.Rajagopalan@Sun.COM 	mutex_enter(&ss->ei_av_lock);
347*13101SVenki.Rajagopalan@Sun.COM 
348*13101SVenki.Rajagopalan@Sun.COM 	/*
349*13101SVenki.Rajagopalan@Sun.COM 	 * See if we have the address vector
350*13101SVenki.Rajagopalan@Sun.COM 	 */
351*13101SVenki.Rajagopalan@Sun.COM 	prev = NULL;
352*13101SVenki.Rajagopalan@Sun.COM 	for (av = ss->ei_av[ndx]; av; av = av->av_next) {
353*13101SVenki.Rajagopalan@Sun.COM 		prev = av;
354*13101SVenki.Rajagopalan@Sun.COM 		if ((av->av_vect).av_dlid == dlid)
355*13101SVenki.Rajagopalan@Sun.COM 			break;
356*13101SVenki.Rajagopalan@Sun.COM 	}
357*13101SVenki.Rajagopalan@Sun.COM 
358*13101SVenki.Rajagopalan@Sun.COM 	/*
359*13101SVenki.Rajagopalan@Sun.COM 	 * If we don't have it, create a new one and chain it to
360*13101SVenki.Rajagopalan@Sun.COM 	 * the same bucket
361*13101SVenki.Rajagopalan@Sun.COM 	 */
362*13101SVenki.Rajagopalan@Sun.COM 	if (av == NULL) {
363*13101SVenki.Rajagopalan@Sun.COM 		av = kmem_zalloc(sizeof (eib_avect_t), KM_NOSLEEP);
364*13101SVenki.Rajagopalan@Sun.COM 		if (av == NULL) {
365*13101SVenki.Rajagopalan@Sun.COM 			mutex_exit(&ss->ei_av_lock);
366*13101SVenki.Rajagopalan@Sun.COM 			EIB_DPRINTF_WARN(ss->ei_instance, "eib_ibt_hold_avect: "
367*13101SVenki.Rajagopalan@Sun.COM 			    "no memory, could not allocate address vector");
368*13101SVenki.Rajagopalan@Sun.COM 			return (NULL);
369*13101SVenki.Rajagopalan@Sun.COM 		}
370*13101SVenki.Rajagopalan@Sun.COM 
371*13101SVenki.Rajagopalan@Sun.COM 		ret = EIB_E_FAILURE;
372*13101SVenki.Rajagopalan@Sun.COM 		if (!eib_wa_no_av_discover)
373*13101SVenki.Rajagopalan@Sun.COM 			ret = eib_ibt_fill_avect(ss, av, dlid);
374*13101SVenki.Rajagopalan@Sun.COM 
375*13101SVenki.Rajagopalan@Sun.COM 		if (ret != EIB_E_SUCCESS) {
376*13101SVenki.Rajagopalan@Sun.COM 			(av->av_vect).av_srate = IBT_SRATE_10;
377*13101SVenki.Rajagopalan@Sun.COM 			(av->av_vect).av_srvl = sl;
378*13101SVenki.Rajagopalan@Sun.COM 			(av->av_vect).av_port_num = ss->ei_props->ep_port_num;
379*13101SVenki.Rajagopalan@Sun.COM 			(av->av_vect).av_send_grh = B_FALSE;
380*13101SVenki.Rajagopalan@Sun.COM 			(av->av_vect).av_dlid = dlid;
381*13101SVenki.Rajagopalan@Sun.COM 			(av->av_vect).av_src_path = 0;	/* we use base lid */
382*13101SVenki.Rajagopalan@Sun.COM 		}
383*13101SVenki.Rajagopalan@Sun.COM 
384*13101SVenki.Rajagopalan@Sun.COM 		if (prev)
385*13101SVenki.Rajagopalan@Sun.COM 			prev->av_next = av;
386*13101SVenki.Rajagopalan@Sun.COM 		else
387*13101SVenki.Rajagopalan@Sun.COM 			ss->ei_av[ndx] = av;
388*13101SVenki.Rajagopalan@Sun.COM 	}
389*13101SVenki.Rajagopalan@Sun.COM 
390*13101SVenki.Rajagopalan@Sun.COM 	/*
391*13101SVenki.Rajagopalan@Sun.COM 	 * Increment the address vector reference count before returning
392*13101SVenki.Rajagopalan@Sun.COM 	 */
393*13101SVenki.Rajagopalan@Sun.COM 	(av->av_ref)++;
394*13101SVenki.Rajagopalan@Sun.COM 
395*13101SVenki.Rajagopalan@Sun.COM 	mutex_exit(&ss->ei_av_lock);
396*13101SVenki.Rajagopalan@Sun.COM 
397*13101SVenki.Rajagopalan@Sun.COM 	return (av);
398*13101SVenki.Rajagopalan@Sun.COM }
399*13101SVenki.Rajagopalan@Sun.COM 
400*13101SVenki.Rajagopalan@Sun.COM static int
eib_ibt_fill_avect(eib_t * ss,eib_avect_t * av,ib_lid_t dlid)401*13101SVenki.Rajagopalan@Sun.COM eib_ibt_fill_avect(eib_t *ss, eib_avect_t *av, ib_lid_t dlid)
402*13101SVenki.Rajagopalan@Sun.COM {
403*13101SVenki.Rajagopalan@Sun.COM 	ibt_node_info_t ni;
404*13101SVenki.Rajagopalan@Sun.COM 	ibt_path_attr_t attr;
405*13101SVenki.Rajagopalan@Sun.COM 	ibt_path_info_t path;
406*13101SVenki.Rajagopalan@Sun.COM 	ibt_status_t ret;
407*13101SVenki.Rajagopalan@Sun.COM 	ib_gid_t dgid;
408*13101SVenki.Rajagopalan@Sun.COM 
409*13101SVenki.Rajagopalan@Sun.COM 	if ((ret = ibt_lid_to_node_info(dlid, &ni)) != IBT_SUCCESS) {
410*13101SVenki.Rajagopalan@Sun.COM 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_ibt_fill_avect: "
411*13101SVenki.Rajagopalan@Sun.COM 		    "ibt_lid_to_node_info(dlid=0x%x) failed, ret=%d",
412*13101SVenki.Rajagopalan@Sun.COM 		    dlid, ret);
413*13101SVenki.Rajagopalan@Sun.COM 		return (EIB_E_FAILURE);
414*13101SVenki.Rajagopalan@Sun.COM 	}
415*13101SVenki.Rajagopalan@Sun.COM 	dgid.gid_prefix = ss->ei_gw_props->pp_gw_sn_prefix;
416*13101SVenki.Rajagopalan@Sun.COM 	dgid.gid_guid = ni.n_port_guid;
417*13101SVenki.Rajagopalan@Sun.COM 
418*13101SVenki.Rajagopalan@Sun.COM 	/*
419*13101SVenki.Rajagopalan@Sun.COM 	 * Get the reversible path information for this destination
420*13101SVenki.Rajagopalan@Sun.COM 	 */
421*13101SVenki.Rajagopalan@Sun.COM 	bzero(&attr, sizeof (ibt_path_info_t));
422*13101SVenki.Rajagopalan@Sun.COM 	attr.pa_sgid = ss->ei_props->ep_sgid;
423*13101SVenki.Rajagopalan@Sun.COM 	attr.pa_dgids = &dgid;
424*13101SVenki.Rajagopalan@Sun.COM 	attr.pa_num_dgids = 1;
425*13101SVenki.Rajagopalan@Sun.COM 
426*13101SVenki.Rajagopalan@Sun.COM 	bzero(&path, sizeof (ibt_path_info_t));
427*13101SVenki.Rajagopalan@Sun.COM 	ret = ibt_get_paths(ss->ei_ibt_hdl, IBT_PATH_NO_FLAGS,
428*13101SVenki.Rajagopalan@Sun.COM 	    &attr, 1, &path, NULL);
429*13101SVenki.Rajagopalan@Sun.COM 	if ((ret != IBT_SUCCESS) || (path.pi_hca_guid == 0)) {
430*13101SVenki.Rajagopalan@Sun.COM 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_ibt_fill_avect: "
431*13101SVenki.Rajagopalan@Sun.COM 		    "ibt_get_paths(dgid=%llx.%llx) failed, ret=%d",
432*13101SVenki.Rajagopalan@Sun.COM 		    dgid.gid_prefix, dgid.gid_guid);
433*13101SVenki.Rajagopalan@Sun.COM 		return (EIB_E_FAILURE);
434*13101SVenki.Rajagopalan@Sun.COM 	}
435*13101SVenki.Rajagopalan@Sun.COM 
436*13101SVenki.Rajagopalan@Sun.COM 	/*
437*13101SVenki.Rajagopalan@Sun.COM 	 * Fill in the address vector
438*13101SVenki.Rajagopalan@Sun.COM 	 */
439*13101SVenki.Rajagopalan@Sun.COM 	bcopy(&path.pi_prim_cep_path.cep_adds_vect, &av->av_vect,
440*13101SVenki.Rajagopalan@Sun.COM 	    sizeof (ibt_adds_vect_t));
441*13101SVenki.Rajagopalan@Sun.COM 
442*13101SVenki.Rajagopalan@Sun.COM 	return (EIB_E_SUCCESS);
443*13101SVenki.Rajagopalan@Sun.COM }
444*13101SVenki.Rajagopalan@Sun.COM 
445*13101SVenki.Rajagopalan@Sun.COM void
eib_ibt_release_avect(eib_t * ss,eib_avect_t * av)446*13101SVenki.Rajagopalan@Sun.COM eib_ibt_release_avect(eib_t *ss, eib_avect_t *av)
447*13101SVenki.Rajagopalan@Sun.COM {
448*13101SVenki.Rajagopalan@Sun.COM 	mutex_enter(&ss->ei_av_lock);
449*13101SVenki.Rajagopalan@Sun.COM 
450*13101SVenki.Rajagopalan@Sun.COM 	ASSERT(av->av_ref > 0);
451*13101SVenki.Rajagopalan@Sun.COM 	(av->av_ref)--;
452*13101SVenki.Rajagopalan@Sun.COM 
453*13101SVenki.Rajagopalan@Sun.COM 	mutex_exit(&ss->ei_av_lock);
454*13101SVenki.Rajagopalan@Sun.COM }
455*13101SVenki.Rajagopalan@Sun.COM 
456*13101SVenki.Rajagopalan@Sun.COM void
eib_ibt_free_avects(eib_t * ss)457*13101SVenki.Rajagopalan@Sun.COM eib_ibt_free_avects(eib_t *ss)
458*13101SVenki.Rajagopalan@Sun.COM {
459*13101SVenki.Rajagopalan@Sun.COM 	eib_avect_t *av;
460*13101SVenki.Rajagopalan@Sun.COM 	eib_avect_t *av_next;
461*13101SVenki.Rajagopalan@Sun.COM 	int ndx;
462*13101SVenki.Rajagopalan@Sun.COM 
463*13101SVenki.Rajagopalan@Sun.COM 	mutex_enter(&ss->ei_av_lock);
464*13101SVenki.Rajagopalan@Sun.COM 	for (ndx = 0; ndx < EIB_AV_NBUCKETS; ndx++) {
465*13101SVenki.Rajagopalan@Sun.COM 		for (av = ss->ei_av[ndx]; av; av = av_next) {
466*13101SVenki.Rajagopalan@Sun.COM 			av_next = av->av_next;
467*13101SVenki.Rajagopalan@Sun.COM 
468*13101SVenki.Rajagopalan@Sun.COM 			ASSERT(av->av_ref == 0);
469*13101SVenki.Rajagopalan@Sun.COM 			kmem_free(av, sizeof (eib_avect_t));
470*13101SVenki.Rajagopalan@Sun.COM 		}
471*13101SVenki.Rajagopalan@Sun.COM 		ss->ei_av[ndx] = NULL;
472*13101SVenki.Rajagopalan@Sun.COM 	}
473*13101SVenki.Rajagopalan@Sun.COM 	mutex_exit(&ss->ei_av_lock);
474*13101SVenki.Rajagopalan@Sun.COM }
475*13101SVenki.Rajagopalan@Sun.COM 
476*13101SVenki.Rajagopalan@Sun.COM /*ARGSUSED*/
477*13101SVenki.Rajagopalan@Sun.COM void
eib_ibt_async_handler(void * clnt_private,ibt_hca_hdl_t hca_hdl,ibt_async_code_t code,ibt_async_event_t * event)478*13101SVenki.Rajagopalan@Sun.COM eib_ibt_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
479*13101SVenki.Rajagopalan@Sun.COM     ibt_async_code_t code, ibt_async_event_t *event)
480*13101SVenki.Rajagopalan@Sun.COM {
481*13101SVenki.Rajagopalan@Sun.COM 	eib_t *ss = (eib_t *)clnt_private;
482*13101SVenki.Rajagopalan@Sun.COM 	eib_event_t *evi;
483*13101SVenki.Rajagopalan@Sun.COM 	uint_t ev_code;
484*13101SVenki.Rajagopalan@Sun.COM 
485*13101SVenki.Rajagopalan@Sun.COM 	ev_code = EIB_EV_NONE;
486*13101SVenki.Rajagopalan@Sun.COM 
487*13101SVenki.Rajagopalan@Sun.COM 	switch (code) {
488*13101SVenki.Rajagopalan@Sun.COM 	case IBT_EVENT_SQD:
489*13101SVenki.Rajagopalan@Sun.COM 		EIB_DPRINTF_VERBOSE(ss->ei_instance,
490*13101SVenki.Rajagopalan@Sun.COM 		    "eib_ibt_async_handler: got IBT_EVENT_SQD");
491*13101SVenki.Rajagopalan@Sun.COM 		eib_ibt_wakeup_sqd_waiters(ss, event->ev_chan_hdl);
492*13101SVenki.Rajagopalan@Sun.COM 		break;
493*13101SVenki.Rajagopalan@Sun.COM 
494*13101SVenki.Rajagopalan@Sun.COM 	case IBT_EVENT_PORT_UP:
495*13101SVenki.Rajagopalan@Sun.COM 		if (event->ev_port == ss->ei_props->ep_port_num) {
496*13101SVenki.Rajagopalan@Sun.COM 			EIB_DPRINTF_VERBOSE(ss->ei_instance,
497*13101SVenki.Rajagopalan@Sun.COM 			    "eib_ibt_async_handler: got IBT_EVENT_PORT_UP");
498*13101SVenki.Rajagopalan@Sun.COM 			ev_code = EIB_EV_PORT_UP;
499*13101SVenki.Rajagopalan@Sun.COM 		}
500*13101SVenki.Rajagopalan@Sun.COM 		break;
501*13101SVenki.Rajagopalan@Sun.COM 
502*13101SVenki.Rajagopalan@Sun.COM 	case IBT_ERROR_PORT_DOWN:
503*13101SVenki.Rajagopalan@Sun.COM 		if (event->ev_port == ss->ei_props->ep_port_num) {
504*13101SVenki.Rajagopalan@Sun.COM 			EIB_DPRINTF_VERBOSE(ss->ei_instance,
505*13101SVenki.Rajagopalan@Sun.COM 			    "eib_ibt_async_handler: got IBT_ERROR_PORT_DOWN");
506*13101SVenki.Rajagopalan@Sun.COM 			ev_code = EIB_EV_PORT_DOWN;
507*13101SVenki.Rajagopalan@Sun.COM 		}
508*13101SVenki.Rajagopalan@Sun.COM 		break;
509*13101SVenki.Rajagopalan@Sun.COM 
510*13101SVenki.Rajagopalan@Sun.COM 	case IBT_CLNT_REREG_EVENT:
511*13101SVenki.Rajagopalan@Sun.COM 		if (event->ev_port == ss->ei_props->ep_port_num) {
512*13101SVenki.Rajagopalan@Sun.COM 			EIB_DPRINTF_VERBOSE(ss->ei_instance,
513*13101SVenki.Rajagopalan@Sun.COM 			    "eib_ibt_async_handler: got IBT_CLNT_REREG_EVENT");
514*13101SVenki.Rajagopalan@Sun.COM 			ev_code = EIB_EV_CLNT_REREG;
515*13101SVenki.Rajagopalan@Sun.COM 		}
516*13101SVenki.Rajagopalan@Sun.COM 		break;
517*13101SVenki.Rajagopalan@Sun.COM 
518*13101SVenki.Rajagopalan@Sun.COM 	case IBT_PORT_CHANGE_EVENT:
519*13101SVenki.Rajagopalan@Sun.COM 		if ((event->ev_port == ss->ei_props->ep_port_num) &&
520*13101SVenki.Rajagopalan@Sun.COM 		    (event->ev_port_flags & IBT_PORT_CHANGE_PKEY)) {
521*13101SVenki.Rajagopalan@Sun.COM 			EIB_DPRINTF_VERBOSE(ss->ei_instance,
522*13101SVenki.Rajagopalan@Sun.COM 			    "eib_ibt_async_handler: "
523*13101SVenki.Rajagopalan@Sun.COM 			    "got IBT_PORT_CHANGE_EVENT(PKEY_CHANGE)");
524*13101SVenki.Rajagopalan@Sun.COM 			ev_code = EIB_EV_PKEY_CHANGE;
525*13101SVenki.Rajagopalan@Sun.COM 		} else if ((event->ev_port == ss->ei_props->ep_port_num) &&
526*13101SVenki.Rajagopalan@Sun.COM 		    (event->ev_port_flags & IBT_PORT_CHANGE_SGID)) {
527*13101SVenki.Rajagopalan@Sun.COM 			EIB_DPRINTF_VERBOSE(ss->ei_instance,
528*13101SVenki.Rajagopalan@Sun.COM 			    "eib_ibt_async_handler: "
529*13101SVenki.Rajagopalan@Sun.COM 			    "got IBT_PORT_CHANGE_EVENT(SGID_CHANGE)");
530*13101SVenki.Rajagopalan@Sun.COM 			ev_code = EIB_EV_SGID_CHANGE;
531*13101SVenki.Rajagopalan@Sun.COM 		}
532*13101SVenki.Rajagopalan@Sun.COM 		break;
533*13101SVenki.Rajagopalan@Sun.COM 
534*13101SVenki.Rajagopalan@Sun.COM 	case IBT_HCA_ATTACH_EVENT:
535*13101SVenki.Rajagopalan@Sun.COM 		/*
536*13101SVenki.Rajagopalan@Sun.COM 		 * For HCA attach, after a new HCA is plugged in and
537*13101SVenki.Rajagopalan@Sun.COM 		 * configured using cfgadm, an explicit plumb will need
538*13101SVenki.Rajagopalan@Sun.COM 		 * to be run, so we don't need to do anything here.
539*13101SVenki.Rajagopalan@Sun.COM 		 */
540*13101SVenki.Rajagopalan@Sun.COM 		EIB_DPRINTF_VERBOSE(ss->ei_instance, "eib_ibt_async_handler: "
541*13101SVenki.Rajagopalan@Sun.COM 		    "got IBT_HCA_ATTACH_EVENT");
542*13101SVenki.Rajagopalan@Sun.COM 		break;
543*13101SVenki.Rajagopalan@Sun.COM 
544*13101SVenki.Rajagopalan@Sun.COM 	case IBT_HCA_DETACH_EVENT:
545*13101SVenki.Rajagopalan@Sun.COM 		/*
546*13101SVenki.Rajagopalan@Sun.COM 		 * Before an HCA unplug, cfgadm is expected to trigger
547*13101SVenki.Rajagopalan@Sun.COM 		 * any rcm scripts to unplumb the EoIB instances on the
548*13101SVenki.Rajagopalan@Sun.COM 		 * card. If so, we should not be holding any hca resource,
549*13101SVenki.Rajagopalan@Sun.COM 		 * since we don't do ibt_open_hca() until plumb time. However,
550*13101SVenki.Rajagopalan@Sun.COM 		 * if an earlier unplumb hadn't cleaned up the hca resources
551*13101SVenki.Rajagopalan@Sun.COM 		 * properly because the network layer hadn't returned the
552*13101SVenki.Rajagopalan@Sun.COM 		 * buffers at that time, we could be holding hca resources.
553*13101SVenki.Rajagopalan@Sun.COM 		 * We'll try to release them here, and protect the code from
554*13101SVenki.Rajagopalan@Sun.COM 		 * racing with some other plumb/unplumb operation.
555*13101SVenki.Rajagopalan@Sun.COM 		 */
556*13101SVenki.Rajagopalan@Sun.COM 		EIB_DPRINTF_VERBOSE(ss->ei_instance, "eib_ibt_async_handler: "
557*13101SVenki.Rajagopalan@Sun.COM 		    "got IBT_HCA_DETACH_EVENT");
558*13101SVenki.Rajagopalan@Sun.COM 
559*13101SVenki.Rajagopalan@Sun.COM 		eib_mac_set_nic_state(ss, EIB_NIC_STOPPING);
560*13101SVenki.Rajagopalan@Sun.COM 		eib_rb_rsrc_setup_bufs(ss, B_FALSE);
561*13101SVenki.Rajagopalan@Sun.COM 		if (ss->ei_tx || ss->ei_rx || ss->ei_lso) {
562*13101SVenki.Rajagopalan@Sun.COM 			EIB_DPRINTF_WARN(ss->ei_instance,
563*13101SVenki.Rajagopalan@Sun.COM 			    "eib_events_handler: nw layer still holding "
564*13101SVenki.Rajagopalan@Sun.COM 			    "hca resources, could not detach HCA");
565*13101SVenki.Rajagopalan@Sun.COM 		} else if (ss->ei_hca_hdl) {
566*13101SVenki.Rajagopalan@Sun.COM 			eib_rb_ibt_hca_init(ss, ~0);
567*13101SVenki.Rajagopalan@Sun.COM 		}
568*13101SVenki.Rajagopalan@Sun.COM 		eib_mac_clr_nic_state(ss, EIB_NIC_STOPPING);
569*13101SVenki.Rajagopalan@Sun.COM 
570*13101SVenki.Rajagopalan@Sun.COM 		break;
571*13101SVenki.Rajagopalan@Sun.COM 	}
572*13101SVenki.Rajagopalan@Sun.COM 
573*13101SVenki.Rajagopalan@Sun.COM 	if (ev_code != EIB_EV_NONE) {
574*13101SVenki.Rajagopalan@Sun.COM 		evi = kmem_zalloc(sizeof (eib_event_t), KM_NOSLEEP);
575*13101SVenki.Rajagopalan@Sun.COM 		if (evi == NULL) {
576*13101SVenki.Rajagopalan@Sun.COM 			EIB_DPRINTF_WARN(ss->ei_instance,
577*13101SVenki.Rajagopalan@Sun.COM 			    "eib_ibt_async_handler: "
578*13101SVenki.Rajagopalan@Sun.COM 			    "no memory, could not handle event 0x%lx", ev_code);
579*13101SVenki.Rajagopalan@Sun.COM 		} else {
580*13101SVenki.Rajagopalan@Sun.COM 			evi->ev_code = ev_code;
581*13101SVenki.Rajagopalan@Sun.COM 			evi->ev_arg = NULL;
582*13101SVenki.Rajagopalan@Sun.COM 			eib_svc_enqueue_event(ss, evi);
583*13101SVenki.Rajagopalan@Sun.COM 		}
584*13101SVenki.Rajagopalan@Sun.COM 	}
585*13101SVenki.Rajagopalan@Sun.COM }
586*13101SVenki.Rajagopalan@Sun.COM 
587*13101SVenki.Rajagopalan@Sun.COM /*ARGSUSED*/
588*13101SVenki.Rajagopalan@Sun.COM void
eib_ibt_record_capab(eib_t * ss,ibt_hca_attr_t * hca_attrs,eib_caps_t * caps)589*13101SVenki.Rajagopalan@Sun.COM eib_ibt_record_capab(eib_t *ss, ibt_hca_attr_t *hca_attrs, eib_caps_t *caps)
590*13101SVenki.Rajagopalan@Sun.COM {
591*13101SVenki.Rajagopalan@Sun.COM 	uint_t max_swqe = EIB_DATA_MAX_SWQE;
592*13101SVenki.Rajagopalan@Sun.COM 	uint_t max_rwqe = EIB_DATA_MAX_RWQE;
593*13101SVenki.Rajagopalan@Sun.COM 
594*13101SVenki.Rajagopalan@Sun.COM 	/*
595*13101SVenki.Rajagopalan@Sun.COM 	 * Checksum
596*13101SVenki.Rajagopalan@Sun.COM 	 */
597*13101SVenki.Rajagopalan@Sun.COM 	caps->cp_cksum_flags = 0;
598*13101SVenki.Rajagopalan@Sun.COM 	if ((!eib_wa_no_cksum_offload) &&
599*13101SVenki.Rajagopalan@Sun.COM 	    (hca_attrs->hca_flags & IBT_HCA_CKSUM_FULL)) {
600*13101SVenki.Rajagopalan@Sun.COM 		caps->cp_cksum_flags =
601*13101SVenki.Rajagopalan@Sun.COM 		    HCK_FULLCKSUM | HCKSUM_INET_FULL_V4;
602*13101SVenki.Rajagopalan@Sun.COM 		    /* HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM; */
603*13101SVenki.Rajagopalan@Sun.COM 	}
604*13101SVenki.Rajagopalan@Sun.COM 
605*13101SVenki.Rajagopalan@Sun.COM 	/*
606*13101SVenki.Rajagopalan@Sun.COM 	 * Reserved L-Key
607*13101SVenki.Rajagopalan@Sun.COM 	 */
608*13101SVenki.Rajagopalan@Sun.COM 	if (hca_attrs->hca_flags2 & IBT_HCA2_RES_LKEY) {
609*13101SVenki.Rajagopalan@Sun.COM 		caps->cp_resv_lkey_capab = 1;
610*13101SVenki.Rajagopalan@Sun.COM 		caps->cp_resv_lkey = hca_attrs->hca_reserved_lkey;
611*13101SVenki.Rajagopalan@Sun.COM 	}
612*13101SVenki.Rajagopalan@Sun.COM 
613*13101SVenki.Rajagopalan@Sun.COM 	/*
614*13101SVenki.Rajagopalan@Sun.COM 	 * LSO
615*13101SVenki.Rajagopalan@Sun.COM 	 */
616*13101SVenki.Rajagopalan@Sun.COM 	caps->cp_lso_maxlen = 0;
617*13101SVenki.Rajagopalan@Sun.COM 	if (!eib_wa_no_lso) {
618*13101SVenki.Rajagopalan@Sun.COM 		if (hca_attrs->hca_max_lso_size > EIB_LSO_MAXLEN) {
619*13101SVenki.Rajagopalan@Sun.COM 			caps->cp_lso_maxlen = EIB_LSO_MAXLEN;
620*13101SVenki.Rajagopalan@Sun.COM 		} else {
621*13101SVenki.Rajagopalan@Sun.COM 			caps->cp_lso_maxlen = hca_attrs->hca_max_lso_size;
622*13101SVenki.Rajagopalan@Sun.COM 		}
623*13101SVenki.Rajagopalan@Sun.COM 	}
624*13101SVenki.Rajagopalan@Sun.COM 
625*13101SVenki.Rajagopalan@Sun.COM 	/*
626*13101SVenki.Rajagopalan@Sun.COM 	 * SGL
627*13101SVenki.Rajagopalan@Sun.COM 	 *
628*13101SVenki.Rajagopalan@Sun.COM 	 * Translating virtual address regions into physical regions
629*13101SVenki.Rajagopalan@Sun.COM 	 * for using the Reserved LKey feature results in a wr sgl that
630*13101SVenki.Rajagopalan@Sun.COM 	 * is a little longer. Since failing ibt_map_mem_iov() is costly,
631*13101SVenki.Rajagopalan@Sun.COM 	 * we'll record a high-water mark (65%) when we should stop
632*13101SVenki.Rajagopalan@Sun.COM 	 * trying to use Reserved LKey
633*13101SVenki.Rajagopalan@Sun.COM 	 */
634*13101SVenki.Rajagopalan@Sun.COM 	if (hca_attrs->hca_flags & IBT_HCA_WQE_SIZE_INFO) {
635*13101SVenki.Rajagopalan@Sun.COM 		caps->cp_max_sgl = hca_attrs->hca_ud_send_sgl_sz;
636*13101SVenki.Rajagopalan@Sun.COM 	} else {
637*13101SVenki.Rajagopalan@Sun.COM 		caps->cp_max_sgl = hca_attrs->hca_max_sgl;
638*13101SVenki.Rajagopalan@Sun.COM 	}
639*13101SVenki.Rajagopalan@Sun.COM 	if (caps->cp_max_sgl > EIB_MAX_SGL) {
640*13101SVenki.Rajagopalan@Sun.COM 		caps->cp_max_sgl = EIB_MAX_SGL;
641*13101SVenki.Rajagopalan@Sun.COM 	}
642*13101SVenki.Rajagopalan@Sun.COM 	caps->cp_hiwm_sgl = (caps->cp_max_sgl * 65) / 100;
643*13101SVenki.Rajagopalan@Sun.COM 
644*13101SVenki.Rajagopalan@Sun.COM 	/*
645*13101SVenki.Rajagopalan@Sun.COM 	 * SWQE/RWQE: meet max chan size and max cq size limits (leave room
646*13101SVenki.Rajagopalan@Sun.COM 	 * to avoid cq overflow event)
647*13101SVenki.Rajagopalan@Sun.COM 	 */
648*13101SVenki.Rajagopalan@Sun.COM 	if (max_swqe > hca_attrs->hca_max_chan_sz)
649*13101SVenki.Rajagopalan@Sun.COM 		max_swqe = hca_attrs->hca_max_chan_sz;
650*13101SVenki.Rajagopalan@Sun.COM 	if (max_swqe > (hca_attrs->hca_max_cq_sz - 1))
651*13101SVenki.Rajagopalan@Sun.COM 		max_swqe = hca_attrs->hca_max_cq_sz - 1;
652*13101SVenki.Rajagopalan@Sun.COM 	caps->cp_max_swqe = max_swqe;
653*13101SVenki.Rajagopalan@Sun.COM 
654*13101SVenki.Rajagopalan@Sun.COM 	if (max_rwqe > hca_attrs->hca_max_chan_sz)
655*13101SVenki.Rajagopalan@Sun.COM 		max_rwqe = hca_attrs->hca_max_chan_sz;
656*13101SVenki.Rajagopalan@Sun.COM 	if (max_rwqe > (hca_attrs->hca_max_cq_sz - 1))
657*13101SVenki.Rajagopalan@Sun.COM 		max_rwqe = hca_attrs->hca_max_cq_sz - 1;
658*13101SVenki.Rajagopalan@Sun.COM 	caps->cp_max_rwqe = max_rwqe;
659*13101SVenki.Rajagopalan@Sun.COM }
660*13101SVenki.Rajagopalan@Sun.COM 
661*13101SVenki.Rajagopalan@Sun.COM void
eib_rb_ibt_hca_init(eib_t * ss,uint_t progress)662*13101SVenki.Rajagopalan@Sun.COM eib_rb_ibt_hca_init(eib_t *ss, uint_t progress)
663*13101SVenki.Rajagopalan@Sun.COM {
664*13101SVenki.Rajagopalan@Sun.COM 	ibt_status_t ret;
665*13101SVenki.Rajagopalan@Sun.COM 
666*13101SVenki.Rajagopalan@Sun.COM 	if (progress & EIB_HCAINIT_CAPAB_RECORDED) {
667*13101SVenki.Rajagopalan@Sun.COM 		if (ss->ei_caps) {
668*13101SVenki.Rajagopalan@Sun.COM 			kmem_free(ss->ei_caps, sizeof (eib_caps_t));
669*13101SVenki.Rajagopalan@Sun.COM 			ss->ei_caps = NULL;
670*13101SVenki.Rajagopalan@Sun.COM 		}
671*13101SVenki.Rajagopalan@Sun.COM 	}
672*13101SVenki.Rajagopalan@Sun.COM 
673*13101SVenki.Rajagopalan@Sun.COM 	if (progress & EIB_HCAINIT_PD_ALLOCD) {
674*13101SVenki.Rajagopalan@Sun.COM 		if (ss->ei_pd_hdl) {
675*13101SVenki.Rajagopalan@Sun.COM 			ret = ibt_free_pd(ss->ei_hca_hdl, ss->ei_pd_hdl);
676*13101SVenki.Rajagopalan@Sun.COM 			if (ret != IBT_SUCCESS) {
677*13101SVenki.Rajagopalan@Sun.COM 				EIB_DPRINTF_WARN(ss->ei_instance,
678*13101SVenki.Rajagopalan@Sun.COM 				    "eib_rb_ibt_hca_init: "
679*13101SVenki.Rajagopalan@Sun.COM 				    "ibt_free_pd(hca_hdl=0x%lx, pd_hdl=0x%lx) "
680*13101SVenki.Rajagopalan@Sun.COM 				    "failed, ret=%d", ss->ei_hca_hdl,
681*13101SVenki.Rajagopalan@Sun.COM 				    ss->ei_pd_hdl, ret);
682*13101SVenki.Rajagopalan@Sun.COM 			}
683*13101SVenki.Rajagopalan@Sun.COM 			ss->ei_pd_hdl = NULL;
684*13101SVenki.Rajagopalan@Sun.COM 		}
685*13101SVenki.Rajagopalan@Sun.COM 	}
686*13101SVenki.Rajagopalan@Sun.COM 
687*13101SVenki.Rajagopalan@Sun.COM 	if (progress & EIB_HCAINIT_HCA_PORTS_QUERIED) {
688*13101SVenki.Rajagopalan@Sun.COM 		ss->ei_props->ep_mtu = 0;
689*13101SVenki.Rajagopalan@Sun.COM 		bzero(&ss->ei_props->ep_sgid, sizeof (ib_gid_t));
690*13101SVenki.Rajagopalan@Sun.COM 	}
691*13101SVenki.Rajagopalan@Sun.COM 
692*13101SVenki.Rajagopalan@Sun.COM 	if (progress & EIB_HCAINIT_ATTRS_ALLOCD) {
693*13101SVenki.Rajagopalan@Sun.COM 		kmem_free(ss->ei_hca_attrs, sizeof (ibt_hca_attr_t));
694*13101SVenki.Rajagopalan@Sun.COM 		ss->ei_hca_attrs = NULL;
695*13101SVenki.Rajagopalan@Sun.COM 	}
696*13101SVenki.Rajagopalan@Sun.COM 
697*13101SVenki.Rajagopalan@Sun.COM 	if (progress & EIB_HCAINIT_HCA_OPENED) {
698*13101SVenki.Rajagopalan@Sun.COM 		ret = ibt_close_hca(ss->ei_hca_hdl);
699*13101SVenki.Rajagopalan@Sun.COM 		if (ret != IBT_SUCCESS) {
700*13101SVenki.Rajagopalan@Sun.COM 			EIB_DPRINTF_WARN(ss->ei_instance,
701*13101SVenki.Rajagopalan@Sun.COM 			    "ibt_close_hca(hca_hdl=0x%lx) failed, "
702*13101SVenki.Rajagopalan@Sun.COM 			    "ret=%d", ss->ei_hca_hdl, ret);
703*13101SVenki.Rajagopalan@Sun.COM 		}
704*13101SVenki.Rajagopalan@Sun.COM 		ss->ei_hca_hdl = NULL;
705*13101SVenki.Rajagopalan@Sun.COM 	}
706*13101SVenki.Rajagopalan@Sun.COM }
707*13101SVenki.Rajagopalan@Sun.COM 
708*13101SVenki.Rajagopalan@Sun.COM static void
eib_ibt_reset_partitions(eib_t * ss)709*13101SVenki.Rajagopalan@Sun.COM eib_ibt_reset_partitions(eib_t *ss)
710*13101SVenki.Rajagopalan@Sun.COM {
711*13101SVenki.Rajagopalan@Sun.COM 	eib_vnic_t *vnic;
712*13101SVenki.Rajagopalan@Sun.COM 	eib_chan_t *chan = NULL;
713*13101SVenki.Rajagopalan@Sun.COM 	uint64_t av;
714*13101SVenki.Rajagopalan@Sun.COM 	int inst = 0;
715*13101SVenki.Rajagopalan@Sun.COM 
716*13101SVenki.Rajagopalan@Sun.COM 	/*
717*13101SVenki.Rajagopalan@Sun.COM 	 * We already have the vhub pkey recorded in our eib_chan_t.
718*13101SVenki.Rajagopalan@Sun.COM 	 * We only need to make sure our pkey index still matches it.
719*13101SVenki.Rajagopalan@Sun.COM 	 * If not, modify the channel appropriately and update our
720*13101SVenki.Rajagopalan@Sun.COM 	 * records.
721*13101SVenki.Rajagopalan@Sun.COM 	 */
722*13101SVenki.Rajagopalan@Sun.COM 	if ((chan = ss->ei_admin_chan) != NULL)
723*13101SVenki.Rajagopalan@Sun.COM 		(void) eib_ibt_modify_chan_pkey(ss, chan, chan->ch_pkey);
724*13101SVenki.Rajagopalan@Sun.COM 
725*13101SVenki.Rajagopalan@Sun.COM 	mutex_enter(&ss->ei_vnic_lock);
726*13101SVenki.Rajagopalan@Sun.COM 	av = ss->ei_active_vnics;
727*13101SVenki.Rajagopalan@Sun.COM 	while ((inst = EIB_FIND_LSB_SET(av)) != -1) {
728*13101SVenki.Rajagopalan@Sun.COM 		if ((vnic = ss->ei_vnic[inst]) != NULL) {
729*13101SVenki.Rajagopalan@Sun.COM 			if ((chan = vnic->vn_ctl_chan) != NULL) {
730*13101SVenki.Rajagopalan@Sun.COM 				(void) eib_ibt_modify_chan_pkey(ss, chan,
731*13101SVenki.Rajagopalan@Sun.COM 				    chan->ch_pkey);
732*13101SVenki.Rajagopalan@Sun.COM 			}
733*13101SVenki.Rajagopalan@Sun.COM 			if ((chan = vnic->vn_data_chan) != NULL) {
734*13101SVenki.Rajagopalan@Sun.COM 				(void) eib_ibt_modify_chan_pkey(ss, chan,
735*13101SVenki.Rajagopalan@Sun.COM 				    chan->ch_pkey);
736*13101SVenki.Rajagopalan@Sun.COM 			}
737*13101SVenki.Rajagopalan@Sun.COM 		}
738*13101SVenki.Rajagopalan@Sun.COM 		av &= (~((uint64_t)1 << inst));
739*13101SVenki.Rajagopalan@Sun.COM 	}
740*13101SVenki.Rajagopalan@Sun.COM 	mutex_exit(&ss->ei_vnic_lock);
741*13101SVenki.Rajagopalan@Sun.COM }
742*13101SVenki.Rajagopalan@Sun.COM 
743*13101SVenki.Rajagopalan@Sun.COM static void
eib_ibt_wakeup_sqd_waiters(eib_t * ss,ibt_channel_hdl_t ev_chan_hdl)744*13101SVenki.Rajagopalan@Sun.COM eib_ibt_wakeup_sqd_waiters(eib_t *ss, ibt_channel_hdl_t ev_chan_hdl)
745*13101SVenki.Rajagopalan@Sun.COM {
746*13101SVenki.Rajagopalan@Sun.COM 	eib_vnic_t *vnic;
747*13101SVenki.Rajagopalan@Sun.COM 	eib_chan_t *chan = NULL;
748*13101SVenki.Rajagopalan@Sun.COM 	uint64_t av;
749*13101SVenki.Rajagopalan@Sun.COM 	int inst = 0;
750*13101SVenki.Rajagopalan@Sun.COM 
751*13101SVenki.Rajagopalan@Sun.COM 	/*
752*13101SVenki.Rajagopalan@Sun.COM 	 * See if this channel has been waiting for its queue to drain.
753*13101SVenki.Rajagopalan@Sun.COM 	 *
754*13101SVenki.Rajagopalan@Sun.COM 	 * Note that since this is especially likely to be called during
755*13101SVenki.Rajagopalan@Sun.COM 	 * logging in to the gateway, we also need to check the vnic
756*13101SVenki.Rajagopalan@Sun.COM 	 * currently being created.
757*13101SVenki.Rajagopalan@Sun.COM 	 */
758*13101SVenki.Rajagopalan@Sun.COM 	mutex_enter(&ss->ei_vnic_lock);
759*13101SVenki.Rajagopalan@Sun.COM 
760*13101SVenki.Rajagopalan@Sun.COM 	if ((vnic = ss->ei_vnic_pending) != NULL) {
761*13101SVenki.Rajagopalan@Sun.COM 		chan = vnic->vn_ctl_chan;
762*13101SVenki.Rajagopalan@Sun.COM 		if ((chan) && (chan->ch_chan == ev_chan_hdl))
763*13101SVenki.Rajagopalan@Sun.COM 			goto wakeup_sqd_waiters;
764*13101SVenki.Rajagopalan@Sun.COM 
765*13101SVenki.Rajagopalan@Sun.COM 		chan = vnic->vn_data_chan;
766*13101SVenki.Rajagopalan@Sun.COM 		if ((chan) && (chan->ch_chan == ev_chan_hdl))
767*13101SVenki.Rajagopalan@Sun.COM 			goto wakeup_sqd_waiters;
768*13101SVenki.Rajagopalan@Sun.COM 	}
769*13101SVenki.Rajagopalan@Sun.COM 
770*13101SVenki.Rajagopalan@Sun.COM 	av = ss->ei_active_vnics;
771*13101SVenki.Rajagopalan@Sun.COM 	while ((inst = EIB_FIND_LSB_SET(av)) != -1) {
772*13101SVenki.Rajagopalan@Sun.COM 		if ((vnic = ss->ei_vnic[inst]) != NULL) {
773*13101SVenki.Rajagopalan@Sun.COM 			chan = vnic->vn_ctl_chan;
774*13101SVenki.Rajagopalan@Sun.COM 			if (chan->ch_chan == ev_chan_hdl)
775*13101SVenki.Rajagopalan@Sun.COM 				break;
776*13101SVenki.Rajagopalan@Sun.COM 
777*13101SVenki.Rajagopalan@Sun.COM 			chan = vnic->vn_data_chan;
778*13101SVenki.Rajagopalan@Sun.COM 			if (chan->ch_chan == ev_chan_hdl)
779*13101SVenki.Rajagopalan@Sun.COM 				break;
780*13101SVenki.Rajagopalan@Sun.COM 		}
781*13101SVenki.Rajagopalan@Sun.COM 		av &= (~((uint64_t)1 << inst));
782*13101SVenki.Rajagopalan@Sun.COM 	}
783*13101SVenki.Rajagopalan@Sun.COM 
784*13101SVenki.Rajagopalan@Sun.COM wakeup_sqd_waiters:
785*13101SVenki.Rajagopalan@Sun.COM 	if (chan) {
786*13101SVenki.Rajagopalan@Sun.COM 		mutex_enter(&chan->ch_cep_lock);
787*13101SVenki.Rajagopalan@Sun.COM 		chan->ch_cep_state = IBT_STATE_SQD;
788*13101SVenki.Rajagopalan@Sun.COM 		cv_broadcast(&chan->ch_cep_cv);
789*13101SVenki.Rajagopalan@Sun.COM 		mutex_exit(&chan->ch_cep_lock);
790*13101SVenki.Rajagopalan@Sun.COM 	}
791*13101SVenki.Rajagopalan@Sun.COM 
792*13101SVenki.Rajagopalan@Sun.COM 	mutex_exit(&ss->ei_vnic_lock);
793*13101SVenki.Rajagopalan@Sun.COM }
794*13101SVenki.Rajagopalan@Sun.COM 
795*13101SVenki.Rajagopalan@Sun.COM static int
eib_ibt_chan_pkey(eib_t * ss,eib_chan_t * chan,ib_pkey_t new_pkey,boolean_t set,boolean_t * pkey_changed)796*13101SVenki.Rajagopalan@Sun.COM eib_ibt_chan_pkey(eib_t *ss, eib_chan_t *chan, ib_pkey_t new_pkey,
797*13101SVenki.Rajagopalan@Sun.COM     boolean_t set, boolean_t *pkey_changed)
798*13101SVenki.Rajagopalan@Sun.COM {
799*13101SVenki.Rajagopalan@Sun.COM 	ibt_qp_info_t qp_attr;
800*13101SVenki.Rajagopalan@Sun.COM 	ibt_status_t ret;
801*13101SVenki.Rajagopalan@Sun.COM 	uint16_t new_pkey_ix;
802*13101SVenki.Rajagopalan@Sun.COM 
803*13101SVenki.Rajagopalan@Sun.COM 	ret = ibt_pkey2index(ss->ei_hca_hdl, ss->ei_props->ep_port_num,
804*13101SVenki.Rajagopalan@Sun.COM 	    new_pkey, &new_pkey_ix);
805*13101SVenki.Rajagopalan@Sun.COM 	if (ret != IBT_SUCCESS) {
806*13101SVenki.Rajagopalan@Sun.COM 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_ibt_chan_pkey: "
807*13101SVenki.Rajagopalan@Sun.COM 		    "ibt_pkey2index(hca_hdl=0x%llx, port_num=0x%x, "
808*13101SVenki.Rajagopalan@Sun.COM 		    "pkey=0x%x) failed, ret=%d",
809*13101SVenki.Rajagopalan@Sun.COM 		    ss->ei_hca_hdl, ss->ei_props->ep_port_num, new_pkey, ret);
810*13101SVenki.Rajagopalan@Sun.COM 		return (EIB_E_FAILURE);
811*13101SVenki.Rajagopalan@Sun.COM 	}
812*13101SVenki.Rajagopalan@Sun.COM 
813*13101SVenki.Rajagopalan@Sun.COM 	/*
814*13101SVenki.Rajagopalan@Sun.COM 	 * If the pkey and the pkey index we have already matches the
815*13101SVenki.Rajagopalan@Sun.COM 	 * new one, nothing to do.
816*13101SVenki.Rajagopalan@Sun.COM 	 */
817*13101SVenki.Rajagopalan@Sun.COM 	mutex_enter(&chan->ch_pkey_lock);
818*13101SVenki.Rajagopalan@Sun.COM 	if ((chan->ch_pkey == new_pkey) && (chan->ch_pkey_ix == new_pkey_ix)) {
819*13101SVenki.Rajagopalan@Sun.COM 		if (pkey_changed) {
820*13101SVenki.Rajagopalan@Sun.COM 			*pkey_changed = B_FALSE;
821*13101SVenki.Rajagopalan@Sun.COM 		}
822*13101SVenki.Rajagopalan@Sun.COM 		mutex_exit(&chan->ch_pkey_lock);
823*13101SVenki.Rajagopalan@Sun.COM 		return (EIB_E_SUCCESS);
824*13101SVenki.Rajagopalan@Sun.COM 	}
825*13101SVenki.Rajagopalan@Sun.COM 	if (pkey_changed) {
826*13101SVenki.Rajagopalan@Sun.COM 		*pkey_changed = B_TRUE;
827*13101SVenki.Rajagopalan@Sun.COM 	}
828*13101SVenki.Rajagopalan@Sun.COM 	mutex_exit(&chan->ch_pkey_lock);
829*13101SVenki.Rajagopalan@Sun.COM 
830*13101SVenki.Rajagopalan@Sun.COM 	/*
831*13101SVenki.Rajagopalan@Sun.COM 	 * Otherwise, if we're asked only to test if the pkey index
832*13101SVenki.Rajagopalan@Sun.COM 	 * supplied matches the one recorded in the channel, return
833*13101SVenki.Rajagopalan@Sun.COM 	 * success, but don't set the pkey.
834*13101SVenki.Rajagopalan@Sun.COM 	 */
835*13101SVenki.Rajagopalan@Sun.COM 	if (!set) {
836*13101SVenki.Rajagopalan@Sun.COM 		return (EIB_E_SUCCESS);
837*13101SVenki.Rajagopalan@Sun.COM 	}
838*13101SVenki.Rajagopalan@Sun.COM 
839*13101SVenki.Rajagopalan@Sun.COM 	/*
840*13101SVenki.Rajagopalan@Sun.COM 	 * Otherwise, we need to change channel pkey.  Pause the
841*13101SVenki.Rajagopalan@Sun.COM 	 * channel sendq first.
842*13101SVenki.Rajagopalan@Sun.COM 	 */
843*13101SVenki.Rajagopalan@Sun.COM 	ret = ibt_pause_sendq(chan->ch_chan, IBT_CEP_SET_SQD_EVENT);
844*13101SVenki.Rajagopalan@Sun.COM 	if (ret != IBT_SUCCESS) {
845*13101SVenki.Rajagopalan@Sun.COM 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_ibt_chan_pkey: "
846*13101SVenki.Rajagopalan@Sun.COM 		    "ibt_pause_sendq(chan_hdl=0x%llx) failed, ret=%d",
847*13101SVenki.Rajagopalan@Sun.COM 		    chan->ch_chan, ret);
848*13101SVenki.Rajagopalan@Sun.COM 		return (EIB_E_FAILURE);
849*13101SVenki.Rajagopalan@Sun.COM 	}
850*13101SVenki.Rajagopalan@Sun.COM 
851*13101SVenki.Rajagopalan@Sun.COM 	/*
852*13101SVenki.Rajagopalan@Sun.COM 	 * Wait for the channel to enter the IBT_STATE_SQD state
853*13101SVenki.Rajagopalan@Sun.COM 	 */
854*13101SVenki.Rajagopalan@Sun.COM 	mutex_enter(&chan->ch_cep_lock);
855*13101SVenki.Rajagopalan@Sun.COM 	while (chan->ch_cep_state != IBT_STATE_SQD)
856*13101SVenki.Rajagopalan@Sun.COM 		cv_wait(&chan->ch_cep_cv, &chan->ch_cep_lock);
857*13101SVenki.Rajagopalan@Sun.COM 	mutex_exit(&chan->ch_cep_lock);
858*13101SVenki.Rajagopalan@Sun.COM 
859*13101SVenki.Rajagopalan@Sun.COM 	/*
860*13101SVenki.Rajagopalan@Sun.COM 	 * Modify the qp with the supplied pkey index and unpause the channel
861*13101SVenki.Rajagopalan@Sun.COM 	 * If either of these operations fail, we'll leave the channel in
862*13101SVenki.Rajagopalan@Sun.COM 	 * the paused state and fail.
863*13101SVenki.Rajagopalan@Sun.COM 	 */
864*13101SVenki.Rajagopalan@Sun.COM 	bzero(&qp_attr, sizeof (ibt_qp_info_t));
865*13101SVenki.Rajagopalan@Sun.COM 
866*13101SVenki.Rajagopalan@Sun.COM 	qp_attr.qp_trans = IBT_UD_SRV;
867*13101SVenki.Rajagopalan@Sun.COM 	qp_attr.qp_current_state = IBT_STATE_SQD;
868*13101SVenki.Rajagopalan@Sun.COM 	qp_attr.qp_state = IBT_STATE_SQD;
869*13101SVenki.Rajagopalan@Sun.COM 	qp_attr.qp_transport.ud.ud_pkey_ix = new_pkey_ix;
870*13101SVenki.Rajagopalan@Sun.COM 
871*13101SVenki.Rajagopalan@Sun.COM 	/*
872*13101SVenki.Rajagopalan@Sun.COM 	 * Modify the qp to set the new pkey index, then unpause the
873*13101SVenki.Rajagopalan@Sun.COM 	 * channel and put it in RTS state and update the new values
874*13101SVenki.Rajagopalan@Sun.COM 	 * in our records
875*13101SVenki.Rajagopalan@Sun.COM 	 */
876*13101SVenki.Rajagopalan@Sun.COM 	mutex_enter(&chan->ch_pkey_lock);
877*13101SVenki.Rajagopalan@Sun.COM 
878*13101SVenki.Rajagopalan@Sun.COM 	ret = ibt_modify_qp(chan->ch_chan,
879*13101SVenki.Rajagopalan@Sun.COM 	    IBT_CEP_SET_STATE | IBT_CEP_SET_PKEY_IX, &qp_attr, NULL);
880*13101SVenki.Rajagopalan@Sun.COM 	if (ret != IBT_SUCCESS) {
881*13101SVenki.Rajagopalan@Sun.COM 		mutex_exit(&chan->ch_pkey_lock);
882*13101SVenki.Rajagopalan@Sun.COM 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_ibt_chan_pkey: "
883*13101SVenki.Rajagopalan@Sun.COM 		    "ibt_modify_qp(chan_hdl=0x%llx, IBT_CEP_SET_PKEY_IX) "
884*13101SVenki.Rajagopalan@Sun.COM 		    "failed for new_pkey_ix=0x%x, ret=%d",
885*13101SVenki.Rajagopalan@Sun.COM 		    chan->ch_chan, new_pkey_ix, ret);
886*13101SVenki.Rajagopalan@Sun.COM 		return (EIB_E_FAILURE);
887*13101SVenki.Rajagopalan@Sun.COM 	}
888*13101SVenki.Rajagopalan@Sun.COM 
889*13101SVenki.Rajagopalan@Sun.COM 	if ((ret = ibt_unpause_sendq(chan->ch_chan)) != IBT_SUCCESS) {
890*13101SVenki.Rajagopalan@Sun.COM 		mutex_exit(&chan->ch_pkey_lock);
891*13101SVenki.Rajagopalan@Sun.COM 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_ibt_chan_pkey: "
892*13101SVenki.Rajagopalan@Sun.COM 		    "ibt_unpause_sendq(chan_hdl=0x%llx) failed, ret=%d",
893*13101SVenki.Rajagopalan@Sun.COM 		    chan->ch_chan, ret);
894*13101SVenki.Rajagopalan@Sun.COM 		return (EIB_E_FAILURE);
895*13101SVenki.Rajagopalan@Sun.COM 	}
896*13101SVenki.Rajagopalan@Sun.COM 
897*13101SVenki.Rajagopalan@Sun.COM 	chan->ch_pkey = new_pkey;
898*13101SVenki.Rajagopalan@Sun.COM 	chan->ch_pkey_ix = new_pkey_ix;
899*13101SVenki.Rajagopalan@Sun.COM 	mutex_exit(&chan->ch_pkey_lock);
900*13101SVenki.Rajagopalan@Sun.COM 
901*13101SVenki.Rajagopalan@Sun.COM 	return (EIB_E_SUCCESS);
902*13101SVenki.Rajagopalan@Sun.COM }
903*13101SVenki.Rajagopalan@Sun.COM 
904*13101SVenki.Rajagopalan@Sun.COM static boolean_t
eib_ibt_has_chan_pkey_changed(eib_t * ss,eib_chan_t * chan)905*13101SVenki.Rajagopalan@Sun.COM eib_ibt_has_chan_pkey_changed(eib_t *ss, eib_chan_t *chan)
906*13101SVenki.Rajagopalan@Sun.COM {
907*13101SVenki.Rajagopalan@Sun.COM 	boolean_t changed;
908*13101SVenki.Rajagopalan@Sun.COM 	int ret;
909*13101SVenki.Rajagopalan@Sun.COM 
910*13101SVenki.Rajagopalan@Sun.COM 	/*
911*13101SVenki.Rajagopalan@Sun.COM 	 * Don't modify the pkey, just ask if the pkey index for the channel's
912*13101SVenki.Rajagopalan@Sun.COM 	 * pkey has changed for any reason.  If we fail, assume that the pkey
913*13101SVenki.Rajagopalan@Sun.COM 	 * has changed.
914*13101SVenki.Rajagopalan@Sun.COM 	 */
915*13101SVenki.Rajagopalan@Sun.COM 	ret = eib_ibt_chan_pkey(ss, chan, chan->ch_pkey, B_FALSE, &changed);
916*13101SVenki.Rajagopalan@Sun.COM 	if (ret != EIB_E_SUCCESS)
917*13101SVenki.Rajagopalan@Sun.COM 		changed = B_TRUE;
918*13101SVenki.Rajagopalan@Sun.COM 
919*13101SVenki.Rajagopalan@Sun.COM 	return (changed);
920*13101SVenki.Rajagopalan@Sun.COM }
921*13101SVenki.Rajagopalan@Sun.COM 
922*13101SVenki.Rajagopalan@Sun.COM static boolean_t
eib_ibt_has_any_pkey_changed(eib_t * ss)923*13101SVenki.Rajagopalan@Sun.COM eib_ibt_has_any_pkey_changed(eib_t *ss)
924*13101SVenki.Rajagopalan@Sun.COM {
925*13101SVenki.Rajagopalan@Sun.COM 	eib_vnic_t *vnic;
926*13101SVenki.Rajagopalan@Sun.COM 	eib_chan_t *chan = NULL;
927*13101SVenki.Rajagopalan@Sun.COM 	uint64_t av;
928*13101SVenki.Rajagopalan@Sun.COM 	int inst = 0;
929*13101SVenki.Rajagopalan@Sun.COM 
930*13101SVenki.Rajagopalan@Sun.COM 	/*
931*13101SVenki.Rajagopalan@Sun.COM 	 * Return true if the pkey index of any our pkeys (of the channels
932*13101SVenki.Rajagopalan@Sun.COM 	 * of all active vnics) has changed.
933*13101SVenki.Rajagopalan@Sun.COM 	 */
934*13101SVenki.Rajagopalan@Sun.COM 
935*13101SVenki.Rajagopalan@Sun.COM 	chan = ss->ei_admin_chan;
936*13101SVenki.Rajagopalan@Sun.COM 	if ((chan) && (eib_ibt_has_chan_pkey_changed(ss, chan)))
937*13101SVenki.Rajagopalan@Sun.COM 		return (B_TRUE);
938*13101SVenki.Rajagopalan@Sun.COM 
939*13101SVenki.Rajagopalan@Sun.COM 	mutex_enter(&ss->ei_vnic_lock);
940*13101SVenki.Rajagopalan@Sun.COM 	av = ss->ei_active_vnics;
941*13101SVenki.Rajagopalan@Sun.COM 	while ((inst = EIB_FIND_LSB_SET(av)) != -1) {
942*13101SVenki.Rajagopalan@Sun.COM 		if ((vnic = ss->ei_vnic[inst]) != NULL) {
943*13101SVenki.Rajagopalan@Sun.COM 			chan = vnic->vn_ctl_chan;
944*13101SVenki.Rajagopalan@Sun.COM 			if ((chan) && (eib_ibt_has_chan_pkey_changed(ss, chan)))
945*13101SVenki.Rajagopalan@Sun.COM 				return (B_TRUE);
946*13101SVenki.Rajagopalan@Sun.COM 
947*13101SVenki.Rajagopalan@Sun.COM 			chan = vnic->vn_data_chan;
948*13101SVenki.Rajagopalan@Sun.COM 			if ((chan) && (eib_ibt_has_chan_pkey_changed(ss, chan)))
949*13101SVenki.Rajagopalan@Sun.COM 				return (B_TRUE);
950*13101SVenki.Rajagopalan@Sun.COM 		}
951*13101SVenki.Rajagopalan@Sun.COM 		av &= (~((uint64_t)1 << inst));
952*13101SVenki.Rajagopalan@Sun.COM 	}
953*13101SVenki.Rajagopalan@Sun.COM 	mutex_exit(&ss->ei_vnic_lock);
954*13101SVenki.Rajagopalan@Sun.COM 
955*13101SVenki.Rajagopalan@Sun.COM 	return (B_FALSE);
956*13101SVenki.Rajagopalan@Sun.COM }
957*13101SVenki.Rajagopalan@Sun.COM 
958*13101SVenki.Rajagopalan@Sun.COM /*
959*13101SVenki.Rajagopalan@Sun.COM  * This routine is currently used simply to derive and record the port
960*13101SVenki.Rajagopalan@Sun.COM  * speed from the loopback path information (for debug purposes).  For
961*13101SVenki.Rajagopalan@Sun.COM  * EoIB, currently the srate used in address vectors to IB neighbors
962*13101SVenki.Rajagopalan@Sun.COM  * and the gateway is fixed at IBT_SRATE_10. Eventually though, this
963*13101SVenki.Rajagopalan@Sun.COM  * information (and sl) has to come from the gateway for all destinations
964*13101SVenki.Rajagopalan@Sun.COM  * in the vhub table.
965*13101SVenki.Rajagopalan@Sun.COM  */
966*13101SVenki.Rajagopalan@Sun.COM static void
eib_ibt_record_srate(eib_t * ss)967*13101SVenki.Rajagopalan@Sun.COM eib_ibt_record_srate(eib_t *ss)
968*13101SVenki.Rajagopalan@Sun.COM {
969*13101SVenki.Rajagopalan@Sun.COM 	ib_gid_t sgid = ss->ei_props->ep_sgid;
970*13101SVenki.Rajagopalan@Sun.COM 	ibt_srate_t srate = IBT_SRATE_10;
971*13101SVenki.Rajagopalan@Sun.COM 	ibt_path_info_t path;
972*13101SVenki.Rajagopalan@Sun.COM 	ibt_path_attr_t path_attr;
973*13101SVenki.Rajagopalan@Sun.COM 	ibt_status_t ret;
974*13101SVenki.Rajagopalan@Sun.COM 	uint8_t num_paths;
975*13101SVenki.Rajagopalan@Sun.COM 
976*13101SVenki.Rajagopalan@Sun.COM 	bzero(&path_attr, sizeof (path_attr));
977*13101SVenki.Rajagopalan@Sun.COM 	path_attr.pa_dgids = &sgid;
978*13101SVenki.Rajagopalan@Sun.COM 	path_attr.pa_num_dgids = 1;
979*13101SVenki.Rajagopalan@Sun.COM 	path_attr.pa_sgid = sgid;
980*13101SVenki.Rajagopalan@Sun.COM 
981*13101SVenki.Rajagopalan@Sun.COM 	ret = ibt_get_paths(ss->ei_ibt_hdl, IBT_PATH_NO_FLAGS,
982*13101SVenki.Rajagopalan@Sun.COM 	    &path_attr, 1, &path, &num_paths);
983*13101SVenki.Rajagopalan@Sun.COM 	if (ret == IBT_SUCCESS && num_paths >= 1) {
984*13101SVenki.Rajagopalan@Sun.COM 		switch (srate = path.pi_prim_cep_path.cep_adds_vect.av_srate) {
985*13101SVenki.Rajagopalan@Sun.COM 		case IBT_SRATE_2:
986*13101SVenki.Rajagopalan@Sun.COM 		case IBT_SRATE_10:
987*13101SVenki.Rajagopalan@Sun.COM 		case IBT_SRATE_30:
988*13101SVenki.Rajagopalan@Sun.COM 		case IBT_SRATE_5:
989*13101SVenki.Rajagopalan@Sun.COM 		case IBT_SRATE_20:
990*13101SVenki.Rajagopalan@Sun.COM 		case IBT_SRATE_40:
991*13101SVenki.Rajagopalan@Sun.COM 		case IBT_SRATE_60:
992*13101SVenki.Rajagopalan@Sun.COM 		case IBT_SRATE_80:
993*13101SVenki.Rajagopalan@Sun.COM 		case IBT_SRATE_120:
994*13101SVenki.Rajagopalan@Sun.COM 			break;
995*13101SVenki.Rajagopalan@Sun.COM 		default:
996*13101SVenki.Rajagopalan@Sun.COM 			srate = IBT_SRATE_10;
997*13101SVenki.Rajagopalan@Sun.COM 		}
998*13101SVenki.Rajagopalan@Sun.COM 	}
999*13101SVenki.Rajagopalan@Sun.COM 
1000*13101SVenki.Rajagopalan@Sun.COM 	ss->ei_props->ep_srate = srate;
1001*13101SVenki.Rajagopalan@Sun.COM 
1002*13101SVenki.Rajagopalan@Sun.COM 	EIB_DPRINTF_DEBUG(ss->ei_instance, "eib_ibt_record_srate: "
1003*13101SVenki.Rajagopalan@Sun.COM 	    "srate = %d", srate);
1004*13101SVenki.Rajagopalan@Sun.COM }
1005