xref: /onnv-gate/usr/src/uts/common/io/ib/clients/iser/iser_ib.c (revision 9586:bd5e99a50121)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/ddi.h>
28 #include <sys/types.h>
29 #include <sys/socket.h>
30 #include <netinet/in.h>
31 #include <sys/sunddi.h>
32 #include <sys/sysmacros.h>
33 #include <sys/iscsi_protocol.h>
34 
35 #include <sys/ib/clients/iser/iser.h>
36 #include <sys/ib/clients/iser/iser_idm.h>
37 
38 /*
39  * iser_ib.c
40  * Routines for InfiniBand transport for iSER
41  *
42  * This file contains the routines to interface with the IBT API to attach and
43  * allocate IB resources, handle async events, and post recv work requests.
44  *
45  */
46 
47 static iser_hca_t *iser_ib_gid2hca(ib_gid_t gid);
48 static iser_hca_t *iser_ib_guid2hca(ib_guid_t guid);
49 
50 static iser_hca_t *iser_ib_alloc_hca(ib_guid_t guid);
51 static int iser_ib_free_hca(iser_hca_t *hca);
52 static int iser_ib_update_hcaports(iser_hca_t *hca);
53 static int iser_ib_init_hcas(void);
54 static int iser_ib_fini_hcas(void);
55 
56 static iser_sbind_t *iser_ib_get_bind(
57     iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid);
58 static int iser_ib_activate_port(
59     idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid);
60 static void iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid);
61 
62 static void iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size);
63 static void iser_ib_fini_qp(iser_qp_t *qp);
64 
65 static int iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size,
66     ibt_cq_hdl_t *cq_hdl);
67 
68 static void iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl,
69     ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size,
70     ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs);
71 
72 static void iser_ib_handle_portup_event(ibt_hca_hdl_t hdl,
73     ibt_async_event_t *event);
74 static void iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl,
75     ibt_async_event_t *event);
76 static void iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl,
77     ibt_async_event_t *event);
78 
79 static void iser_ib_post_recv_task(void *arg);
80 
81 static struct ibt_clnt_modinfo_s iser_ib_modinfo = {
82 	IBTI_V_CURR,
83 	IBT_STORAGE_DEV,
84 	iser_ib_async_handler,
85 	NULL,
86 	"iSER"
87 };
88 
89 /*
90  * iser_ib_init
91  *
92  * This function registers the HCA drivers with IBTF and registers and binds
93  * iSER as a service with IBTF.
94  */
95 int
96 iser_ib_init(void)
97 {
98 	int		status;
99 
100 	/* Register with IBTF */
101 	status = ibt_attach(&iser_ib_modinfo, iser_state->is_dip, iser_state,
102 	    &iser_state->is_ibhdl);
103 	if (status != DDI_SUCCESS) {
104 		ISER_LOG(CE_NOTE, "iser_ib_init: ibt_attach failed (0x%x)",
105 		    status);
106 		return (DDI_FAILURE);
107 	}
108 
109 	/* Create the global work request kmem_cache */
110 	iser_state->iser_wr_cache = kmem_cache_create("iser_wr_cache",
111 	    sizeof (iser_wr_t), 0, NULL, NULL, NULL,
112 	    iser_state, NULL, KM_SLEEP);
113 
114 	/* Populate our list of HCAs */
115 	status = iser_ib_init_hcas();
116 	if (status != DDI_SUCCESS) {
117 		/* HCAs failed to initialize, tear it down */
118 		kmem_cache_destroy(iser_state->iser_wr_cache);
119 		(void) ibt_detach(iser_state->is_ibhdl);
120 		iser_state->is_ibhdl = NULL;
121 		ISER_LOG(CE_NOTE, "iser_ib_init: failed to initialize HCAs");
122 		return (DDI_FAILURE);
123 	}
124 
125 	/* Target will register iSER as a service with IBTF when required */
126 
127 	/* Target will bind this service when it comes online */
128 
129 	return (DDI_SUCCESS);
130 }
131 
132 /*
133  * iser_ib_fini
134  *
135  * This function unbinds and degisters the iSER service from IBTF
136  */
137 int
138 iser_ib_fini(void)
139 {
140 	/* IDM would have already disabled all the services */
141 
142 	/* Teardown the HCA list and associated resources */
143 	if (iser_ib_fini_hcas() != DDI_SUCCESS)
144 		return (DDI_FAILURE);
145 
146 	/* Teardown the global work request kmem_cache */
147 	kmem_cache_destroy(iser_state->iser_wr_cache);
148 
149 	/* Deregister with IBTF */
150 	if (iser_state->is_ibhdl != NULL) {
151 		(void) ibt_detach(iser_state->is_ibhdl);
152 		iser_state->is_ibhdl = NULL;
153 	}
154 
155 	return (DDI_SUCCESS);
156 }
157 
158 /*
159  * iser_ib_register_service
160  *
161  * This function registers the iSER service using the RDMA-Aware Service ID.
162  */
163 int
164 iser_ib_register_service(idm_svc_t *idm_svc)
165 {
166 	ibt_srv_desc_t	srvdesc;
167 	iser_svc_t	*iser_svc;
168 	int		status;
169 
170 	bzero(&srvdesc, sizeof (ibt_srv_desc_t));
171 
172 	/* Set up IBTI client callback handler from the CM */
173 	srvdesc.sd_handler = iser_ib_cm_handler;
174 
175 	srvdesc.sd_flags = IBT_SRV_NO_FLAGS;
176 
177 	iser_svc = (iser_svc_t *)idm_svc->is_iser_svc;
178 
179 	/* Register the service on the specified port */
180 	status = ibt_register_service(
181 	    iser_state->is_ibhdl, &srvdesc,
182 	    iser_svc->is_svcid, 1, &iser_svc->is_srvhdl, NULL);
183 
184 	return (status);
185 }
186 
187 /*
188  * iser_ib_bind_service
189  *
190  * This function binds a given iSER service on all available HCA ports
191  */
192 int
193 iser_ib_bind_service(idm_svc_t *idm_svc)
194 {
195 	iser_hca_t	*hca;
196 	ib_gid_t	gid;
197 	int		num_ports = 0;
198 	int		num_binds = 0;
199 	int		status;
200 	int		i;
201 
202 	ASSERT(idm_svc != NULL);
203 	ASSERT(idm_svc->is_iser_svc != NULL);
204 
205 	/* Register the iSER service on all available ports */
206 	mutex_enter(&iser_state->is_hcalist_lock);
207 
208 	for (hca = list_head(&iser_state->is_hcalist);
209 	    hca != NULL;
210 	    hca = list_next(&iser_state->is_hcalist, hca)) {
211 
212 		for (i = 0; i < hca->hca_num_ports; i++) {
213 			num_ports++;
214 			if (hca->hca_port_info[i].p_linkstate !=
215 			    IBT_PORT_ACTIVE) {
216 				/*
217 				 * Move on. We will attempt to bind service
218 				 * in our async handler if the port comes up
219 				 * at a later time.
220 				 */
221 				continue;
222 			}
223 
224 			gid = hca->hca_port_info[i].p_sgid_tbl[0];
225 
226 			/* If the port is already bound, skip */
227 			if (iser_ib_get_bind(
228 			    idm_svc->is_iser_svc, hca->hca_guid, gid) == NULL) {
229 
230 				status = iser_ib_activate_port(
231 				    idm_svc, hca->hca_guid, gid);
232 				if (status != IBT_SUCCESS) {
233 					ISER_LOG(CE_NOTE,
234 					    "iser_ib_bind_service: "
235 					    "iser_ib_activate_port failure "
236 					    "(0x%x)", status);
237 					continue;
238 				}
239 			}
240 			num_binds++;
241 		}
242 	}
243 	mutex_exit(&iser_state->is_hcalist_lock);
244 
245 	if (num_binds) {
246 		ISER_LOG(CE_NOTE, "iser_ib_bind_service: Service available on "
247 		    "(%d) of (%d) ports", num_binds, num_ports);
248 		return (ISER_STATUS_SUCCESS);
249 	} else {
250 		ISER_LOG(CE_NOTE, "iser_ib_bind_service: Did not bind service");
251 		return (ISER_STATUS_FAIL);
252 	}
253 }
254 
255 /*
256  * iser_ib_unbind_service
257  *
258  * This function unbinds a given service on a all HCA ports
259  */
260 void
261 iser_ib_unbind_service(idm_svc_t *idm_svc)
262 {
263 	iser_svc_t	*iser_svc;
264 	iser_sbind_t	*is_sbind, *next_sb;
265 
266 	if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) {
267 
268 		iser_svc = idm_svc->is_iser_svc;
269 
270 		for (is_sbind = list_head(&iser_svc->is_sbindlist);
271 		    is_sbind != NULL;
272 		    is_sbind = next_sb) {
273 			next_sb = list_next(&iser_svc->is_sbindlist, is_sbind);
274 			ibt_unbind_service(iser_svc->is_srvhdl,
275 			    is_sbind->is_sbindhdl);
276 			list_remove(&iser_svc->is_sbindlist, is_sbind);
277 			kmem_free(is_sbind, sizeof (iser_sbind_t));
278 		}
279 	}
280 }
281 
282 /* ARGSUSED */
283 void
284 iser_ib_deregister_service(idm_svc_t *idm_svc)
285 {
286 	iser_svc_t	*iser_svc;
287 
288 	if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) {
289 
290 		iser_svc = (iser_svc_t *)idm_svc->is_iser_svc;
291 		ibt_deregister_service(iser_state->is_ibhdl,
292 		    iser_svc->is_srvhdl);
293 		ibt_release_ip_sid(iser_svc->is_svcid);
294 	}
295 }
296 
297 /*
298  * iser_ib_get_paths
299  * This function finds the IB path between the local and the remote address.
300  *
301  */
302 int
303 iser_ib_get_paths(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip,
304     ibt_path_info_t *path, ibt_path_ip_src_t *path_src_ip)
305 {
306 	ibt_ip_path_attr_t	ipattr;
307 	int			status;
308 
309 	(void) bzero(&ipattr, sizeof (ibt_ip_path_attr_t));
310 	ipattr.ipa_dst_ip	= remote_ip;
311 	ipattr.ipa_src_ip	= *local_ip;
312 	ipattr.ipa_max_paths	= 1;
313 	ipattr.ipa_ndst		= 1;
314 
315 	(void) bzero(path, sizeof (ibt_path_info_t));
316 	status = ibt_get_ip_paths(iser_state->is_ibhdl, IBT_PATH_NO_FLAGS,
317 	    &ipattr, path, NULL, path_src_ip);
318 	if (status != IBT_SUCCESS) {
319 		ISER_LOG(CE_NOTE, "ibt_get_ip_paths: ibt_get_ip_paths "
320 		    "failure: status (%d)", status);
321 		return (status);
322 	}
323 
324 	if (local_ip != NULL) {
325 		ISER_LOG(CE_NOTE, "iser_ib_get_paths success: IP[%x to %x]",
326 		    local_ip->un.ip4addr, remote_ip->un.ip4addr);
327 	} else {
328 		ISER_LOG(CE_NOTE, "iser_ib_get_paths success: "
329 		    "IP[INADDR_ANY to %x]", remote_ip->un.ip4addr);
330 	}
331 
332 	return (ISER_STATUS_SUCCESS);
333 }
334 
335 /*
336  * iser_ib_alloc_rc_channel
337  *
338  * This function allocates a reliable communication channel using the specified
339  * channel attributes.
340  */
341 iser_chan_t *
342 iser_ib_alloc_rc_channel(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip)
343 {
344 
345 	iser_chan_t			*chan;
346 	ib_gid_t			lgid;
347 	uint8_t				hca_port; /* from path */
348 	iser_hca_t			*hca;
349 	ibt_path_ip_src_t		path_src_ip;
350 	ibt_rc_chan_alloc_args_t	chanargs;
351 	uint_t				sq_size, rq_size;
352 	int				status;
353 
354 	chan = kmem_zalloc(sizeof (iser_chan_t), KM_SLEEP);
355 
356 	mutex_init(&chan->ic_lock, NULL, MUTEX_DRIVER, NULL);
357 	mutex_init(&chan->ic_sq_post_lock, NULL, MUTEX_DRIVER, NULL);
358 
359 	/* Lookup a path to the given destination */
360 	status = iser_ib_get_paths(local_ip, remote_ip, &chan->ic_ibt_path,
361 	    &path_src_ip);
362 
363 	if (status != ISER_STATUS_SUCCESS) {
364 		ISER_LOG(CE_NOTE, "iser_ib_get_paths failed: status (%d)",
365 		    status);
366 		mutex_destroy(&chan->ic_lock);
367 		mutex_destroy(&chan->ic_sq_post_lock);
368 		kmem_free(chan, sizeof (iser_chan_t));
369 		return (NULL);
370 	}
371 
372 	/* get the local gid from the path info */
373 	lgid = chan->ic_ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid;
374 
375 	/* get the hca port from the path info */
376 	hca_port = chan->ic_ibt_path.pi_prim_cep_path.cep_hca_port_num;
377 
378 	/* Lookup the hca using the gid in the path info */
379 	hca = iser_ib_gid2hca(lgid);
380 	if (hca == NULL) {
381 		ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed "
382 		    "to lookup HCA handle");
383 		mutex_destroy(&chan->ic_lock);
384 		mutex_destroy(&chan->ic_sq_post_lock);
385 		kmem_free(chan, sizeof (iser_chan_t));
386 		return (NULL);
387 	}
388 
389 	/* Set up the iSER channel handle with HCA and IP data */
390 	chan->ic_hca		= hca;
391 	chan->ic_localip	= path_src_ip.ip_primary;
392 	chan->ic_remoteip	= *remote_ip;
393 
394 	/*
395 	 * Determine the queue sizes, based upon the HCA query data.
396 	 * For our Work Queues, we will use either our default value,
397 	 * or the HCA's maximum value, whichever is smaller.
398 	 */
399 	sq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_SENDQ_SIZE);
400 	rq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_RECVQ_SIZE);
401 
402 	/*
403 	 * For our Completion Queues, we again check the device maximum.
404 	 * We want to end up with CQs that are the next size up from the
405 	 * WQs they are servicing so that they have some overhead.
406 	 */
407 	if (hca->hca_attr.hca_max_cq_sz >= (sq_size + 1)) {
408 		chan->ic_sendcq_sz = sq_size + 1;
409 	} else {
410 		chan->ic_sendcq_sz = hca->hca_attr.hca_max_cq_sz;
411 		sq_size = chan->ic_sendcq_sz - 1;
412 	}
413 
414 	if (hca->hca_attr.hca_max_cq_sz >= (rq_size + 1)) {
415 		chan->ic_recvcq_sz = rq_size + 1;
416 	} else {
417 		chan->ic_recvcq_sz = hca->hca_attr.hca_max_cq_sz;
418 		rq_size = chan->ic_recvcq_sz - 1;
419 	}
420 
421 	/* Initialize the iSER channel's QP handle */
422 	iser_ib_init_qp(chan, sq_size, rq_size);
423 
424 	/* Set up the Send Completion Queue */
425 	status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_sendcq_sz,
426 	    &chan->ic_sendcq);
427 	if (status != ISER_STATUS_SUCCESS) {
428 		iser_ib_fini_qp(&chan->ic_qp);
429 		mutex_destroy(&chan->ic_lock);
430 		mutex_destroy(&chan->ic_sq_post_lock);
431 		kmem_free(chan, sizeof (iser_chan_t));
432 		return (NULL);
433 	}
434 	ibt_set_cq_handler(chan->ic_sendcq, iser_ib_sendcq_handler, chan);
435 	ibt_enable_cq_notify(chan->ic_sendcq, IBT_NEXT_COMPLETION);
436 
437 	/* Set up the Receive Completion Queue */
438 	status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_recvcq_sz,
439 	    &chan->ic_recvcq);
440 	if (status != ISER_STATUS_SUCCESS) {
441 		(void) ibt_free_cq(chan->ic_sendcq);
442 		iser_ib_fini_qp(&chan->ic_qp);
443 		mutex_destroy(&chan->ic_lock);
444 		mutex_destroy(&chan->ic_sq_post_lock);
445 		kmem_free(chan, sizeof (iser_chan_t));
446 		return (NULL);
447 	}
448 	ibt_set_cq_handler(chan->ic_recvcq, iser_ib_recvcq_handler, chan);
449 	ibt_enable_cq_notify(chan->ic_recvcq, IBT_NEXT_COMPLETION);
450 
451 	/* Setup the channel arguments */
452 	iser_ib_setup_chanargs(hca_port, chan->ic_sendcq, chan->ic_recvcq,
453 	    sq_size, rq_size, hca->hca_pdhdl, &chanargs);
454 
455 	status = ibt_alloc_rc_channel(hca->hca_hdl,
456 	    IBT_ACHAN_NO_FLAGS, &chanargs, &chan->ic_chanhdl, NULL);
457 	if (status != IBT_SUCCESS) {
458 		ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed "
459 		    "ibt_alloc_rc_channel: status (%d)", status);
460 		(void) ibt_free_cq(chan->ic_sendcq);
461 		(void) ibt_free_cq(chan->ic_recvcq);
462 		iser_ib_fini_qp(&chan->ic_qp);
463 		mutex_destroy(&chan->ic_lock);
464 		mutex_destroy(&chan->ic_sq_post_lock);
465 		kmem_free(chan, sizeof (iser_chan_t));
466 		return (NULL);
467 	}
468 
469 	/* Set the 'channel' as the client private data */
470 	(void) ibt_set_chan_private(chan->ic_chanhdl, chan);
471 
472 	ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel success: "
473 	    "chanhdl (0x%p), IP:[%llx to %llx], lgid (%llx:%llx), HCA(%llx) %d",
474 	    (void *)chan->ic_chanhdl,
475 	    (longlong_t)local_ip->un.ip4addr,
476 	    (longlong_t)remote_ip->un.ip4addr,
477 	    (longlong_t)lgid.gid_prefix, (longlong_t)lgid.gid_guid,
478 	    (longlong_t)hca->hca_guid, hca_port);
479 
480 	return (chan);
481 }
482 
483 /*
484  * iser_ib_open_rc_channel
485  * This function opens a RC connection on the given allocated RC channel
486  */
487 int
488 iser_ib_open_rc_channel(iser_chan_t *chan)
489 {
490 	ibt_ip_cm_info_t	ipcm_info;
491 	iser_private_data_t	iser_priv_data;
492 	ibt_chan_open_args_t	ocargs;
493 	ibt_rc_returns_t	ocreturns;
494 	int			status;
495 
496 	mutex_enter(&chan->ic_lock);
497 
498 	/*
499 	 * For connection establishment, the initiator sends a CM REQ using the
500 	 * iSER RDMA-Aware Service ID. Included are the source and destination
501 	 * IP addresses, and the src port.
502 	 */
503 	bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t));
504 	ipcm_info.src_addr = chan->ic_localip;
505 	ipcm_info.dst_addr = chan->ic_remoteip;
506 	ipcm_info.src_port = chan->ic_lport;
507 
508 	/*
509 	 * The CM Private Data field defines the iSER connection parameters
510 	 * such as zero based virtual address exception (ZBVAE) and Send with
511 	 * invalidate Exception (SIE).
512 	 *
513 	 * Solaris IBT does not currently support ZBVAE or SIE.
514 	 */
515 	iser_priv_data.rsvd1	= 0;
516 	iser_priv_data.sie	= 1;
517 	iser_priv_data.zbvae	= 1;
518 
519 	status = ibt_format_ip_private_data(&ipcm_info,
520 	    sizeof (iser_private_data_t), &iser_priv_data);
521 	if (status != IBT_SUCCESS) {
522 		ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status);
523 		mutex_exit(&chan->ic_lock);
524 		return (status);
525 	}
526 
527 	/*
528 	 * Set the SID we are attempting to connect to, based upon the
529 	 * remote port number.
530 	 */
531 	chan->ic_ibt_path.pi_sid = ibt_get_ip_sid(IPPROTO_TCP, chan->ic_rport);
532 
533 	/* Set up the args for the channel open */
534 	bzero(&ocargs, sizeof (ibt_chan_open_args_t));
535 	ocargs.oc_path			= &chan->ic_ibt_path;
536 	ocargs.oc_cm_handler		= iser_ib_cm_handler;
537 	ocargs.oc_cm_clnt_private	= iser_state;
538 	ocargs.oc_rdma_ra_out		= 4;
539 	ocargs.oc_rdma_ra_in		= 4;
540 	ocargs.oc_path_retry_cnt	= 2;
541 	ocargs.oc_path_rnr_retry_cnt	= 2;
542 	ocargs.oc_priv_data_len		= sizeof (iser_private_data_t);
543 	ocargs.oc_priv_data		= &iser_priv_data;
544 
545 	bzero(&ocreturns, sizeof (ibt_rc_returns_t));
546 
547 	status = ibt_open_rc_channel(chan->ic_chanhdl,
548 	    IBT_OCHAN_NO_FLAGS, IBT_BLOCKING, &ocargs, &ocreturns);
549 
550 	if (status != IBT_SUCCESS) {
551 		ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status);
552 		mutex_exit(&chan->ic_lock);
553 		return (status);
554 	}
555 
556 	mutex_exit(&chan->ic_lock);
557 	return (IDM_STATUS_SUCCESS);
558 }
559 
560 /*
561  * iser_ib_close_rc_channel
562  * This function closes the RC channel related to this iser_chan handle.
563  * We invoke this in a non-blocking, no callbacks context.
564  */
565 void
566 iser_ib_close_rc_channel(iser_chan_t *chan)
567 {
568 	int			status;
569 
570 	mutex_enter(&chan->ic_lock);
571 	status = ibt_close_rc_channel(chan->ic_chanhdl, IBT_BLOCKING, NULL,
572 	    0, NULL, NULL, 0);
573 	if (status != IBT_SUCCESS) {
574 		ISER_LOG(CE_NOTE, "iser_ib_close_rc_channel: "
575 		    "ibt_close_rc_channel failed: status (%d)", status);
576 	}
577 	mutex_exit(&chan->ic_lock);
578 }
579 
580 /*
581  * iser_ib_free_rc_channel
582  *
583  * This function tears down an RC channel's QP initialization and frees it.
584  * Note that we do not need synchronization here; the channel has been
585  * closed already, so we should only have completion polling occuring.  Once
586  * complete, we are free to free the IBTF channel, WQ and CQ resources, and
587  * our own related resources.
588  */
589 void
590 iser_ib_free_rc_channel(iser_chan_t *chan)
591 {
592 	iser_qp_t	*iser_qp;
593 
594 	iser_qp = &chan->ic_qp;
595 
596 	/* Ensure the SQ is empty */
597 	while (chan->ic_sq_post_count != 0) {
598 		mutex_exit(&chan->ic_conn->ic_lock);
599 		delay(drv_usectohz(ISER_DELAY_HALF_SECOND));
600 		mutex_enter(&chan->ic_conn->ic_lock);
601 	}
602 	mutex_destroy(&chan->ic_sq_post_lock);
603 
604 	/* Ensure the RQ is empty */
605 	(void) ibt_flush_channel(chan->ic_chanhdl);
606 	mutex_enter(&iser_qp->qp_lock);
607 	while (iser_qp->rq_level != 0) {
608 		mutex_exit(&iser_qp->qp_lock);
609 		mutex_exit(&chan->ic_conn->ic_lock);
610 		delay(drv_usectohz(ISER_DELAY_HALF_SECOND));
611 		mutex_enter(&chan->ic_conn->ic_lock);
612 		mutex_enter(&iser_qp->qp_lock);
613 	}
614 
615 	/* Free our QP handle */
616 	mutex_exit(&iser_qp->qp_lock);
617 	(void) iser_ib_fini_qp(iser_qp);
618 
619 	/* Free the IBT channel resources */
620 	(void) ibt_free_channel(chan->ic_chanhdl);
621 	chan->ic_chanhdl = NULL;
622 
623 	/* Free the CQs */
624 	ibt_free_cq(chan->ic_sendcq);
625 	ibt_free_cq(chan->ic_recvcq);
626 
627 	/* Free the chan handle */
628 	mutex_destroy(&chan->ic_lock);
629 	kmem_free(chan, sizeof (iser_chan_t));
630 }
631 
632 /*
633  * iser_ib_post_recv
634  *
635  * This function handles keeping the RQ full on a given channel.
636  * This routine will mostly be run on a taskq, and will check the
637  * current fill level of the RQ, and post as many WRs as necessary
638  * to fill it again.
639  */
640 
641 int
642 iser_ib_post_recv_async(ibt_channel_hdl_t chanhdl)
643 {
644 	iser_chan_t	*chan;
645 	int		status;
646 
647 	/* Pull our iSER channel handle from the private data */
648 	chan = (iser_chan_t *)ibt_get_chan_private(chanhdl);
649 
650 	/*
651 	 * Caller must check that chan->ic_conn->ic_stage indicates
652 	 * the connection is active (not closing, not closed) and
653 	 * it must hold the mutex cross the check and the call to this function
654 	 */
655 	ASSERT(mutex_owned(&chan->ic_conn->ic_lock));
656 	ASSERT((chan->ic_conn->ic_stage >= ISER_CONN_STAGE_IC_CONNECTED) &&
657 	    (chan->ic_conn->ic_stage <= ISER_CONN_STAGE_LOGGED_IN));
658 	idm_conn_hold(chan->ic_conn->ic_idmc);
659 	status = ddi_taskq_dispatch(iser_taskq, iser_ib_post_recv_task,
660 	    (void *)chanhdl, DDI_NOSLEEP);
661 	if (status != DDI_SUCCESS) {
662 		idm_conn_rele(chan->ic_conn->ic_idmc);
663 	}
664 
665 	return (status);
666 }
667 
668 static void
669 iser_ib_post_recv_task(void *arg)
670 {
671 	ibt_channel_hdl_t	chanhdl = arg;
672 	iser_chan_t		*chan;
673 
674 	/* Pull our iSER channel handle from the private data */
675 	chan = (iser_chan_t *)ibt_get_chan_private(chanhdl);
676 
677 	iser_ib_post_recv(chanhdl);
678 	idm_conn_rele(chan->ic_conn->ic_idmc);
679 }
680 
681 void
682 iser_ib_post_recv(ibt_channel_hdl_t chanhdl)
683 {
684 	iser_chan_t	*chan;
685 	iser_hca_t	*hca;
686 	iser_msg_t	*msg;
687 	ibt_recv_wr_t	*wrlist, wr[ISER_IB_RQ_POST_MAX];
688 	int		rq_space, msg_ret;
689 	int		total_num, npost;
690 	uint_t		nposted;
691 	int		status, i;
692 	iser_qp_t	*iser_qp;
693 	ib_gid_t	lgid;
694 
695 	/* Pull our iSER channel handle from the private data */
696 	chan = (iser_chan_t *)ibt_get_chan_private(chanhdl);
697 
698 	ASSERT(chan != NULL);
699 
700 	mutex_enter(&chan->ic_conn->ic_lock);
701 
702 	/* Bail out if the connection is closed; no need for more recv WRs */
703 	if ((chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSING) ||
704 	    (chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSED)) {
705 		mutex_exit(&chan->ic_conn->ic_lock);
706 		return;
707 	}
708 
709 	/* get the QP handle from the iser_chan */
710 	iser_qp = &chan->ic_qp;
711 
712 	/* get the local gid from the path info */
713 	lgid = chan->ic_ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid;
714 
715 	/* get the hca port from the path info */
716 	hca = iser_ib_gid2hca(lgid);
717 	if (hca == NULL) {
718 		ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to retrieve "
719 		    "HCA handle");
720 		mutex_exit(&chan->ic_conn->ic_lock);
721 		return;
722 	}
723 
724 	/* check for space to post on the RQ */
725 	mutex_enter(&iser_qp->qp_lock);
726 	rq_space = iser_qp->rq_depth - iser_qp->rq_level;
727 	if (rq_space == 0) {
728 		/* The RQ is full, clear the pending flag and return */
729 		iser_qp->rq_taskqpending = B_FALSE;
730 		mutex_exit(&iser_qp->qp_lock);
731 		mutex_exit(&chan->ic_conn->ic_lock);
732 		return;
733 	}
734 
735 	/* Keep track of the lowest value for rq_min_post_level */
736 	if (iser_qp->rq_level < iser_qp->rq_min_post_level)
737 		iser_qp->rq_min_post_level = iser_qp->rq_level;
738 
739 	mutex_exit(&iser_qp->qp_lock);
740 
741 	/* we've room to post, so pull from the msg cache */
742 	msg = iser_msg_get(hca, rq_space, &msg_ret);
743 	if (msg == NULL) {
744 		ISER_LOG(CE_NOTE, "iser_ib_post_recv: no message handles "
745 		    "available in msg cache currently");
746 		/*
747 		 * There are no messages on the cache. Wait a half-
748 		 * second, then try again.
749 		 */
750 		delay(drv_usectohz(ISER_DELAY_HALF_SECOND));
751 		status = iser_ib_post_recv_async(chanhdl);
752 		if (status != DDI_SUCCESS) {
753 			ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to "
754 			    "redispatch routine");
755 			/* Failed to dispatch, clear pending flag */
756 			mutex_enter(&iser_qp->qp_lock);
757 			iser_qp->rq_taskqpending = B_FALSE;
758 			mutex_exit(&iser_qp->qp_lock);
759 		}
760 		mutex_exit(&chan->ic_conn->ic_lock);
761 		return;
762 	}
763 
764 	if (msg_ret != rq_space) {
765 		ISER_LOG(CE_NOTE, "iser_ib_post_recv: requested number of "
766 		    "messages not allocated: requested (%d) allocated (%d)",
767 		    rq_space, msg_ret);
768 		/* We got some, but not all, of our requested depth */
769 		rq_space = msg_ret;
770 	}
771 
772 	/*
773 	 * Now, walk through the allocated WRs and post them,
774 	 * ISER_IB_RQ_POST_MAX (or less) at a time.
775 	 */
776 	wrlist = &wr[0];
777 	total_num = rq_space;
778 
779 	while (total_num) {
780 		/* determine the number to post on this iteration */
781 		npost = (total_num > ISER_IB_RQ_POST_MAX) ?
782 		    ISER_IB_RQ_POST_MAX : total_num;
783 
784 		/* build a list of WRs from the msg list */
785 		for (i = 0; i < npost; i++) {
786 			wrlist[i].wr_id		= (ibt_wrid_t)(uintptr_t)msg;
787 			wrlist[i].wr_nds	= ISER_IB_SGLIST_SIZE;
788 			wrlist[i].wr_sgl	= &msg->msg_ds;
789 			msg = msg->nextp;
790 		}
791 
792 		/* post the list to the RQ */
793 		nposted = 0;
794 		status = ibt_post_recv(chanhdl, wrlist, npost, &nposted);
795 		if ((status != IBT_SUCCESS) || (nposted != npost)) {
796 			ISER_LOG(CE_NOTE, "iser_ib_post_recv: ibt_post_recv "
797 			    "failed: requested (%d) posted (%d) status (%d)",
798 			    npost, nposted, status);
799 			total_num -= nposted;
800 			break;
801 		}
802 
803 		/* decrement total number to post by the number posted */
804 		total_num -= nposted;
805 	}
806 
807 	mutex_enter(&iser_qp->qp_lock);
808 	if (total_num != 0) {
809 		ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to fill RQ, "
810 		    "failed to post (%d) WRs", total_num);
811 		iser_qp->rq_level += rq_space - total_num;
812 	} else {
813 		iser_qp->rq_level += rq_space;
814 	}
815 
816 	/*
817 	 * Now that we've filled the RQ, check that all of the recv WRs
818 	 * haven't just been immediately consumed. If so, taskqpending is
819 	 * still B_TRUE, so we need to fire off a taskq thread to post
820 	 * more WRs.
821 	 */
822 	if (iser_qp->rq_level == 0) {
823 		mutex_exit(&iser_qp->qp_lock);
824 		status = iser_ib_post_recv_async(chanhdl);
825 		if (status != DDI_SUCCESS) {
826 			ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to "
827 			    "dispatch followup routine");
828 			/* Failed to dispatch, clear pending flag */
829 			mutex_enter(&iser_qp->qp_lock);
830 			iser_qp->rq_taskqpending = B_FALSE;
831 			mutex_exit(&iser_qp->qp_lock);
832 		}
833 	} else {
834 		/*
835 		 * We're done, we've filled the RQ. Clear the taskq
836 		 * flag so that we can run again.
837 		 */
838 		iser_qp->rq_taskqpending = B_FALSE;
839 		mutex_exit(&iser_qp->qp_lock);
840 	}
841 
842 	mutex_exit(&chan->ic_conn->ic_lock);
843 }
844 
845 /*
846  * iser_ib_handle_portup_event()
847  * This handles the IBT_EVENT_PORT_UP unaffiliated asynchronous event.
848  *
849  * To facilitate a seamless bringover of the port and configure the CM service
850  * for inbound iSER service requests on this newly active port, the existing
851  * IDM services will be checked for iSER support.
852  * If an iSER service was already created, then this service will simply be
853  * bound to the gid of the newly active port. If on the other hand, the CM
854  * service did not exist, i.e. only socket communication, then a new CM
855  * service will be first registered with the saved service parameters and
856  * then bound to the newly active port.
857  *
858  */
859 /* ARGSUSED */
860 static void
861 iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event)
862 {
863 	iser_hca_t		*hca;
864 	ib_gid_t		gid;
865 	idm_svc_t		*idm_svc;
866 	int			status;
867 
868 	ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: HCA(0x%llx) port(%d)",
869 	    (longlong_t)event->ev_hca_guid, event->ev_port);
870 
871 	/*
872 	 * Query all ports on the HCA and update the port information
873 	 * maintainted in the iser_hca_t structure
874 	 */
875 	hca = iser_ib_guid2hca(event->ev_hca_guid);
876 	if (hca == NULL) {
877 
878 		/* HCA is just made available, first port on that HCA */
879 		hca = iser_ib_alloc_hca(event->ev_hca_guid);
880 
881 		mutex_enter(&iser_state->is_hcalist_lock);
882 		list_insert_tail(&iser_state->is_hcalist, hca);
883 		iser_state->is_num_hcas++;
884 		mutex_exit(&iser_state->is_hcalist_lock);
885 
886 	} else {
887 
888 		status = iser_ib_update_hcaports(hca);
889 
890 		if (status != IBT_SUCCESS) {
891 			ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
892 			    "status(0x%x): iser_ib_update_hcaports failed: "
893 			    "HCA(0x%llx) port(%d)", status,
894 			    (longlong_t)event->ev_hca_guid, event->ev_port);
895 			return;
896 		}
897 	}
898 
899 	gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0];
900 
901 	/*
902 	 * Iterate through the global list of IDM target services
903 	 * and check for existing iSER CM service.
904 	 */
905 	mutex_enter(&idm.idm_global_mutex);
906 	for (idm_svc = list_head(&idm.idm_tgt_svc_list);
907 	    idm_svc != NULL;
908 	    idm_svc = list_next(&idm.idm_tgt_svc_list, idm_svc)) {
909 
910 
911 		if (idm_svc->is_iser_svc == NULL) {
912 
913 			/* Establish a new CM service for iSER requests */
914 			status = iser_tgt_svc_create(
915 			    &idm_svc->is_svc_req, idm_svc);
916 
917 			if (status != IBT_SUCCESS) {
918 				ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
919 				    "status(0x%x): iser_tgt_svc_create failed: "
920 				    "HCA(0x%llx) port(%d)", status,
921 				    (longlong_t)event->ev_hca_guid,
922 				    event->ev_port);
923 
924 				continue;
925 			}
926 		}
927 
928 		status = iser_ib_activate_port(
929 		    idm_svc, event->ev_hca_guid, gid);
930 		if (status != IBT_SUCCESS) {
931 
932 			ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
933 			    "status(0x%x): Bind service on port "
934 			    "(%llx:%llx) failed",
935 			    status, (longlong_t)gid.gid_prefix,
936 			    (longlong_t)gid.gid_guid);
937 
938 			continue;
939 		}
940 		ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: service bound "
941 		    "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid,
942 		    event->ev_port);
943 	}
944 	mutex_exit(&idm.idm_global_mutex);
945 
946 	ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event success: "
947 	    "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid,
948 	    event->ev_port);
949 }
950 
951 /*
952  * iser_ib_handle_portdown_event()
953  * This handles the IBT_EVENT_PORT_DOWN unaffiliated asynchronous error.
954  *
955  * Unconfigure the CM service on the deactivated port and teardown the
956  * connections that are using the CM service.
957  */
958 /* ARGSUSED */
959 static void
960 iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event)
961 {
962 	iser_hca_t		*hca;
963 	ib_gid_t		gid;
964 	int			status;
965 
966 	/*
967 	 * Query all ports on the HCA and update the port information
968 	 * maintainted in the iser_hca_t structure
969 	 */
970 	hca = iser_ib_guid2hca(event->ev_hca_guid);
971 	ASSERT(hca != NULL);
972 
973 	status = iser_ib_update_hcaports(hca);
974 	if (status != IBT_SUCCESS) {
975 		ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event status(0x%x): "
976 		    "ibt_ib_update_hcaports failed: HCA(0x%llx) port(%d)",
977 		    status, (longlong_t)event->ev_hca_guid, event->ev_port);
978 		return;
979 	}
980 
981 	/* get the gid of the new port */
982 	gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0];
983 	iser_ib_deactivate_port(event->ev_hca_guid, gid);
984 
985 	ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event success: "
986 	    "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid,
987 	    event->ev_port);
988 }
989 
990 /*
991  * iser_ib_handle_hca_detach_event()
992  * Quiesce all activity bound for the port, teardown the connection, unbind
993  * iSER services on all ports and release the HCA handle.
994  */
995 /* ARGSUSED */
996 static void
997 iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event)
998 {
999 	iser_hca_t	*nexthca, *hca;
1000 	int		i, status;
1001 
1002 	ISER_LOG(CE_NOTE, "iser_ib_handle_hca_detach_event: HCA(0x%llx)",
1003 	    (longlong_t)event->ev_hca_guid);
1004 
1005 	hca = iser_ib_guid2hca(event->ev_hca_guid);
1006 	for (i = 0; i < hca->hca_num_ports; i++) {
1007 		iser_ib_deactivate_port(hca->hca_guid,
1008 		    hca->hca_port_info[i].p_sgid_tbl[0]);
1009 	}
1010 
1011 	/*
1012 	 * Update the HCA list maintained in the iser_state. Free the
1013 	 * resources allocated to the HCA, i.e. caches, protection domain
1014 	 */
1015 	mutex_enter(&iser_state->is_hcalist_lock);
1016 
1017 	for (hca = list_head(&iser_state->is_hcalist);
1018 	    hca != NULL;
1019 	    hca = nexthca) {
1020 
1021 		nexthca = list_next(&iser_state->is_hcalist, hca);
1022 
1023 		if (hca->hca_guid == event->ev_hca_guid) {
1024 
1025 			list_remove(&iser_state->is_hcalist, hca);
1026 			iser_state->is_num_hcas--;
1027 
1028 			status = iser_ib_free_hca(hca);
1029 			if (status != DDI_SUCCESS) {
1030 				ISER_LOG(CE_WARN, "iser_ib_handle_hca_detach: "
1031 				    "Failed to free hca(%p)", (void *)hca);
1032 				list_insert_tail(&iser_state->is_hcalist, hca);
1033 				iser_state->is_num_hcas++;
1034 			}
1035 			/* No way to return status to IBT if this fails */
1036 		}
1037 	}
1038 	mutex_exit(&iser_state->is_hcalist_lock);
1039 
1040 }
1041 
1042 /*
1043  * iser_ib_async_handler
1044  * An IBT Asynchronous Event handler is registered it with the framework and
1045  * passed via the ibt_attach() routine. This function handles the following
1046  * asynchronous events.
1047  * IBT_EVENT_PORT_UP
1048  * IBT_ERROR_PORT_DOWN
1049  * IBT_HCA_ATTACH_EVENT
1050  * IBT_HCA_DETACH_EVENT
1051  */
1052 /* ARGSUSED */
1053 void
1054 iser_ib_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code,
1055     ibt_async_event_t *event)
1056 {
1057 	switch (code) {
1058 	case IBT_EVENT_PORT_UP:
1059 		iser_ib_handle_portup_event(hdl, event);
1060 		break;
1061 
1062 	case IBT_ERROR_PORT_DOWN:
1063 		iser_ib_handle_portdown_event(hdl, event);
1064 		break;
1065 
1066 	case IBT_HCA_ATTACH_EVENT:
1067 		/*
1068 		 * A new HCA device is available for use, ignore this
1069 		 * event because the corresponding IBT_EVENT_PORT_UP
1070 		 * events will get triggered and handled accordingly.
1071 		 */
1072 		break;
1073 
1074 	case IBT_HCA_DETACH_EVENT:
1075 		iser_ib_handle_hca_detach_event(hdl, event);
1076 		break;
1077 
1078 	default:
1079 		break;
1080 	}
1081 }
1082 
1083 /*
1084  * iser_ib_init_hcas
1085  *
1086  * This function opens all the HCA devices, gathers the HCA state information
1087  * and adds the HCA handle for each HCA found in the iser_soft_state.
1088  */
1089 static int
1090 iser_ib_init_hcas(void)
1091 {
1092 	ib_guid_t	*guid;
1093 	int		num_hcas;
1094 	int		i;
1095 	iser_hca_t	*hca;
1096 
1097 	/* Retrieve the HCA list */
1098 	num_hcas = ibt_get_hca_list(&guid);
1099 	if (num_hcas == 0) {
1100 		/*
1101 		 * This shouldn't happen, but might if we have all HCAs
1102 		 * detach prior to initialization.
1103 		 */
1104 		return (DDI_FAILURE);
1105 	}
1106 
1107 	/* Initialize the hcalist lock */
1108 	mutex_init(&iser_state->is_hcalist_lock, NULL, MUTEX_DRIVER, NULL);
1109 
1110 	/* Create the HCA list */
1111 	list_create(&iser_state->is_hcalist, sizeof (iser_hca_t),
1112 	    offsetof(iser_hca_t, hca_node));
1113 
1114 	for (i = 0; i < num_hcas; i++) {
1115 
1116 		ISER_LOG(CE_NOTE, "iser_ib_init_hcas: initializing HCA "
1117 		    "(0x%llx)", (longlong_t)guid[i]);
1118 
1119 		hca = iser_ib_alloc_hca(guid[i]);
1120 		if (hca == NULL) {
1121 			/* This shouldn't happen, teardown and fail */
1122 			(void) iser_ib_fini_hcas();
1123 			(void) ibt_free_hca_list(guid, num_hcas);
1124 			return (DDI_FAILURE);
1125 		}
1126 
1127 		mutex_enter(&iser_state->is_hcalist_lock);
1128 		list_insert_tail(&iser_state->is_hcalist, hca);
1129 		iser_state->is_num_hcas++;
1130 		mutex_exit(&iser_state->is_hcalist_lock);
1131 
1132 	}
1133 
1134 	/* Free the IBT HCA list */
1135 	(void) ibt_free_hca_list(guid, num_hcas);
1136 
1137 	/* Check that we've initialized at least one HCA */
1138 	mutex_enter(&iser_state->is_hcalist_lock);
1139 	if (list_is_empty(&iser_state->is_hcalist)) {
1140 		ISER_LOG(CE_NOTE, "iser_ib_init_hcas: failed to initialize "
1141 		    "any HCAs");
1142 
1143 		mutex_exit(&iser_state->is_hcalist_lock);
1144 		(void) iser_ib_fini_hcas();
1145 		return (DDI_FAILURE);
1146 	}
1147 	mutex_exit(&iser_state->is_hcalist_lock);
1148 
1149 	return (DDI_SUCCESS);
1150 }
1151 
1152 /*
1153  * iser_ib_fini_hcas
1154  *
1155  * Teardown the iSER HCA list initialized above.
1156  */
1157 static int
1158 iser_ib_fini_hcas(void)
1159 {
1160 	iser_hca_t	*nexthca, *hca;
1161 	int		status;
1162 
1163 	mutex_enter(&iser_state->is_hcalist_lock);
1164 	for (hca = list_head(&iser_state->is_hcalist);
1165 	    hca != NULL;
1166 	    hca = nexthca) {
1167 
1168 		nexthca = list_next(&iser_state->is_hcalist, hca);
1169 
1170 		list_remove(&iser_state->is_hcalist, hca);
1171 
1172 		status = iser_ib_free_hca(hca);
1173 		if (status != IBT_SUCCESS) {
1174 			ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to free "
1175 			    "HCA during fini");
1176 			list_insert_tail(&iser_state->is_hcalist, hca);
1177 			return (DDI_FAILURE);
1178 		}
1179 
1180 		iser_state->is_num_hcas--;
1181 
1182 	}
1183 	mutex_exit(&iser_state->is_hcalist_lock);
1184 	list_destroy(&iser_state->is_hcalist);
1185 	mutex_destroy(&iser_state->is_hcalist_lock);
1186 
1187 	return (DDI_SUCCESS);
1188 }
1189 
1190 /*
1191  * iser_ib_alloc_hca
1192  *
1193  * This function opens the given HCA device, gathers the HCA state information
1194  * and adds the HCA handle
1195  */
1196 static iser_hca_t *
1197 iser_ib_alloc_hca(ib_guid_t guid)
1198 {
1199 	iser_hca_t	*hca;
1200 	int		status;
1201 
1202 	/* Allocate an iser_hca_t HCA handle */
1203 	hca = (iser_hca_t *)kmem_zalloc(sizeof (iser_hca_t), KM_SLEEP);
1204 
1205 	/* Open this HCA */
1206 	status = ibt_open_hca(iser_state->is_ibhdl, guid, &hca->hca_hdl);
1207 	if (status != IBT_SUCCESS) {
1208 		ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_open_hca failed:"
1209 		    " guid (0x%llx) status (0x%x)", (longlong_t)guid, status);
1210 		kmem_free(hca, sizeof (iser_hca_t));
1211 		return (NULL);
1212 	}
1213 
1214 	hca->hca_guid		= guid;
1215 	hca->hca_clnt_hdl	= iser_state->is_ibhdl;
1216 
1217 	/* Query the HCA */
1218 	status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr);
1219 	if (status != IBT_SUCCESS) {
1220 		ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_query_hca "
1221 		    "failure: guid (0x%llx) status (0x%x)",
1222 		    (longlong_t)guid, status);
1223 		(void) ibt_close_hca(hca->hca_hdl);
1224 		kmem_free(hca, sizeof (iser_hca_t));
1225 		return (NULL);
1226 	}
1227 
1228 	/* Query all ports on the HCA */
1229 	status = ibt_query_hca_ports(hca->hca_hdl, 0,
1230 	    &hca->hca_port_info, &hca->hca_num_ports,
1231 	    &hca->hca_port_info_sz);
1232 	if (status != IBT_SUCCESS) {
1233 		ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: "
1234 		    "ibt_query_hca_ports failure: guid (0x%llx) "
1235 		    "status (0x%x)", (longlong_t)guid, status);
1236 		(void) ibt_close_hca(hca->hca_hdl);
1237 		kmem_free(hca, sizeof (iser_hca_t));
1238 		return (NULL);
1239 	}
1240 
1241 	/* Allocate a single PD on this HCA */
1242 	status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS,
1243 	    &hca->hca_pdhdl);
1244 	if (status != IBT_SUCCESS) {
1245 		ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_alloc_pd "
1246 		    "failure: guid (0x%llx) status (0x%x)",
1247 		    (longlong_t)guid, status);
1248 		(void) ibt_close_hca(hca->hca_hdl);
1249 		ibt_free_portinfo(hca->hca_port_info, hca->hca_port_info_sz);
1250 		kmem_free(hca, sizeof (iser_hca_t));
1251 		return (NULL);
1252 	}
1253 
1254 	/* Initialize the message and data MR caches for this HCA */
1255 	iser_init_hca_caches(hca);
1256 
1257 	return (hca);
1258 }
1259 
1260 static int
1261 iser_ib_free_hca(iser_hca_t *hca)
1262 {
1263 	int			status;
1264 	ibt_hca_portinfo_t	*hca_port_info;
1265 	uint_t			hca_port_info_sz;
1266 
1267 	ASSERT(hca != NULL);
1268 	if (hca->hca_failed)
1269 		return (DDI_FAILURE);
1270 
1271 	hca_port_info = hca->hca_port_info;
1272 	hca_port_info_sz = hca->hca_port_info_sz;
1273 
1274 	/*
1275 	 * Free the memory regions before freeing
1276 	 * the associated protection domain
1277 	 */
1278 	iser_fini_hca_caches(hca);
1279 
1280 	status = ibt_free_pd(hca->hca_hdl, hca->hca_pdhdl);
1281 	if (status != IBT_SUCCESS) {
1282 		ISER_LOG(CE_NOTE, "iser_ib_free_hca: failed to free PD "
1283 		    "status=0x%x", status);
1284 		goto out_caches;
1285 	}
1286 
1287 	status = ibt_close_hca(hca->hca_hdl);
1288 	if (status != IBT_SUCCESS) {
1289 		ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to close HCA "
1290 		    "status=0x%x", status);
1291 		goto out_pd;
1292 	}
1293 
1294 	ibt_free_portinfo(hca_port_info, hca_port_info_sz);
1295 
1296 	kmem_free(hca, sizeof (iser_hca_t));
1297 	return (DDI_SUCCESS);
1298 
1299 	/*
1300 	 * We only managed to partially tear down the HCA, try to put it back
1301 	 * like it was before returning.
1302 	 */
1303 out_pd:
1304 	status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, &hca->hca_pdhdl);
1305 	if (status != IBT_SUCCESS) {
1306 		hca->hca_failed = B_TRUE;
1307 		/* Report error and exit */
1308 		ISER_LOG(CE_NOTE, "iser_ib_free_hca: could not re-alloc PD "
1309 		    "status=0x%x", status);
1310 		return (DDI_FAILURE);
1311 	}
1312 
1313 out_caches:
1314 	iser_init_hca_caches(hca);
1315 
1316 	return (DDI_FAILURE);
1317 }
1318 
1319 static int
1320 iser_ib_update_hcaports(iser_hca_t *hca)
1321 {
1322 	ibt_hca_portinfo_t	*pinfop, *oldpinfop;
1323 	uint_t			size, oldsize, nport;
1324 	int			status;
1325 
1326 	ASSERT(hca != NULL);
1327 
1328 	status = ibt_query_hca_ports(hca->hca_hdl, 0, &pinfop, &nport, &size);
1329 	if (status != IBT_SUCCESS) {
1330 		ISER_LOG(CE_NOTE, "ibt_query_hca_ports failed: %d", status);
1331 		return (status);
1332 	}
1333 
1334 	oldpinfop = hca->hca_port_info;
1335 	oldsize	= hca->hca_port_info_sz;
1336 	hca->hca_port_info = pinfop;
1337 	hca->hca_port_info_sz = size;
1338 
1339 	(void) ibt_free_portinfo(oldpinfop, oldsize);
1340 
1341 	return (IBT_SUCCESS);
1342 }
1343 
1344 /*
1345  * iser_ib_gid2hca
1346  * Given a gid, find the corresponding hca
1347  */
1348 iser_hca_t *
1349 iser_ib_gid2hca(ib_gid_t gid)
1350 {
1351 
1352 	iser_hca_t	*hca;
1353 	int		i;
1354 
1355 	mutex_enter(&iser_state->is_hcalist_lock);
1356 	for (hca = list_head(&iser_state->is_hcalist);
1357 	    hca != NULL;
1358 	    hca = list_next(&iser_state->is_hcalist, hca)) {
1359 
1360 		for (i = 0; i < hca->hca_num_ports; i++) {
1361 			if ((hca->hca_port_info[i].p_sgid_tbl[0].gid_prefix ==
1362 			    gid.gid_prefix) &&
1363 			    (hca->hca_port_info[i].p_sgid_tbl[0].gid_guid ==
1364 			    gid.gid_guid)) {
1365 
1366 				mutex_exit(&iser_state->is_hcalist_lock);
1367 
1368 				return (hca);
1369 			}
1370 		}
1371 	}
1372 	mutex_exit(&iser_state->is_hcalist_lock);
1373 	return (NULL);
1374 }
1375 
1376 /*
1377  * iser_ib_guid2hca
1378  * Given a HCA guid, find the corresponding HCA
1379  */
1380 iser_hca_t *
1381 iser_ib_guid2hca(ib_guid_t guid)
1382 {
1383 
1384 	iser_hca_t	*hca;
1385 
1386 	mutex_enter(&iser_state->is_hcalist_lock);
1387 	for (hca = list_head(&iser_state->is_hcalist);
1388 	    hca != NULL;
1389 	    hca = list_next(&iser_state->is_hcalist, hca)) {
1390 
1391 		if (hca->hca_guid == guid) {
1392 			mutex_exit(&iser_state->is_hcalist_lock);
1393 			return (hca);
1394 		}
1395 	}
1396 	mutex_exit(&iser_state->is_hcalist_lock);
1397 	return (NULL);
1398 }
1399 
1400 /*
1401  * iser_ib_conv_sockaddr2ibtaddr
1402  * This function converts a socket address into the IBT format
1403  */
1404 void iser_ib_conv_sockaddr2ibtaddr(
1405     idm_sockaddr_t *saddr, ibt_ip_addr_t *ibt_addr)
1406 {
1407 	if (saddr == NULL) {
1408 		ibt_addr->family = AF_UNSPEC;
1409 		ibt_addr->un.ip4addr = 0;
1410 	} else {
1411 		switch (saddr->sin.sa_family) {
1412 		case AF_INET:
1413 
1414 			ibt_addr->family	= saddr->sin4.sin_family;
1415 			ibt_addr->un.ip4addr	= saddr->sin4.sin_addr.s_addr;
1416 			break;
1417 
1418 		case AF_INET6:
1419 
1420 			ibt_addr->family	= saddr->sin6.sin6_family;
1421 			ibt_addr->un.ip6addr	= saddr->sin6.sin6_addr;
1422 			break;
1423 
1424 		default:
1425 			ibt_addr->family = AF_UNSPEC;
1426 		}
1427 
1428 	}
1429 }
1430 
1431 /*
1432  * iser_ib_conv_ibtaddr2sockaddr
1433  * This function converts an IBT ip address handle to a sockaddr
1434  */
1435 void iser_ib_conv_ibtaddr2sockaddr(struct sockaddr_storage *ss,
1436     ibt_ip_addr_t *ibt_addr, in_port_t port)
1437 {
1438 	struct sockaddr_in *sin;
1439 	struct sockaddr_in6 *sin6;
1440 
1441 	switch (ibt_addr->family) {
1442 	case AF_INET:
1443 	case AF_UNSPEC:
1444 
1445 		sin = (struct sockaddr_in *)ibt_addr;
1446 		sin->sin_port = ntohs(port);
1447 		bcopy(sin, ss, sizeof (struct sockaddr_in));
1448 		break;
1449 
1450 	case AF_INET6:
1451 
1452 		sin6 = (struct sockaddr_in6 *)ibt_addr;
1453 		sin6->sin6_port = ntohs(port);
1454 		bcopy(sin6, ss, sizeof (struct sockaddr_in6));
1455 		break;
1456 
1457 	default:
1458 		ISER_LOG(CE_NOTE, "iser_ib_conv_ibtaddr2sockaddr: "
1459 		    "unknown family type: 0x%x", ibt_addr->family);
1460 	}
1461 }
1462 
1463 /*
1464  * iser_ib_setup_cq
1465  * This function sets up the Completion Queue size and allocates the specified
1466  * Completion Queue
1467  */
1468 static int
1469 iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, ibt_cq_hdl_t *cq_hdl)
1470 {
1471 
1472 	ibt_cq_attr_t		cq_attr;
1473 	int			status;
1474 
1475 	cq_attr.cq_size		= cq_size;
1476 	cq_attr.cq_sched	= 0;
1477 	cq_attr.cq_flags	= IBT_CQ_NO_FLAGS;
1478 
1479 	/* Allocate a Completion Queue */
1480 	status = ibt_alloc_cq(hca_hdl, &cq_attr, cq_hdl, NULL);
1481 	if (status != IBT_SUCCESS) {
1482 		ISER_LOG(CE_NOTE, "iser_ib_setup_cq: ibt_alloc_cq failure (%d)",
1483 		    status);
1484 		return (status);
1485 	}
1486 
1487 	return (ISER_STATUS_SUCCESS);
1488 }
1489 
1490 /*
1491  * iser_ib_setup_chanargs
1492  *
1493  */
1494 static void
1495 iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl,
1496     ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size,
1497     ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs)
1498 {
1499 
1500 	bzero(cargs, sizeof (ibt_rc_chan_alloc_args_t));
1501 
1502 	/*
1503 	 * Set up the size of the channels send queue, receive queue and the
1504 	 * maximum number of elements in a scatter gather list of work requests
1505 	 * posted to the send and receive queues.
1506 	 */
1507 	cargs->rc_sizes.cs_sq		= sq_size;
1508 	cargs->rc_sizes.cs_rq		= rq_size;
1509 	cargs->rc_sizes.cs_sq_sgl	= ISER_IB_SGLIST_SIZE;
1510 	cargs->rc_sizes.cs_rq_sgl	= ISER_IB_SGLIST_SIZE;
1511 
1512 	/*
1513 	 * All Work requests signaled on a WR basis will receive a send
1514 	 * request completion.
1515 	 */
1516 	cargs->rc_flags			= IBT_ALL_SIGNALED;
1517 
1518 	/* Enable RDMA read and RDMA write on the channel end points */
1519 	cargs->rc_control		= IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
1520 
1521 	/* Set the local hca port on which the channel is allocated */
1522 	cargs->rc_hca_port_num		= hca_port;
1523 
1524 	/* Set the Send and Receive Completion Queue handles */
1525 	cargs->rc_scq			= scq_hdl;
1526 	cargs->rc_rcq			= rcq_hdl;
1527 
1528 	/* Set the protection domain associated with the channel */
1529 	cargs->rc_pd			= hca_pdhdl;
1530 
1531 	/* No SRQ usage */
1532 	cargs->rc_srq			= NULL;
1533 }
1534 
1535 /*
1536  * iser_ib_init_qp
1537  * Initialize the QP handle
1538  */
1539 void
1540 iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size)
1541 {
1542 	/* Initialize the handle lock */
1543 	mutex_init(&chan->ic_qp.qp_lock, NULL, MUTEX_DRIVER, NULL);
1544 
1545 	/* Record queue sizes */
1546 	chan->ic_qp.sq_size = sq_size;
1547 	chan->ic_qp.rq_size = rq_size;
1548 
1549 	/* Initialize the RQ monitoring data */
1550 	chan->ic_qp.rq_depth  = rq_size;
1551 	chan->ic_qp.rq_level  = 0;
1552 	chan->ic_qp.rq_lwm = (chan->ic_recvcq_sz * ISER_IB_RQ_LWM_PCT) / 100;
1553 
1554 	/* Initialize the taskq flag */
1555 	chan->ic_qp.rq_taskqpending = B_FALSE;
1556 }
1557 
1558 /*
1559  * iser_ib_fini_qp
1560  * Teardown the QP handle
1561  */
1562 void
1563 iser_ib_fini_qp(iser_qp_t *qp)
1564 {
1565 	/* Destroy the handle lock */
1566 	mutex_destroy(&qp->qp_lock);
1567 }
1568 
1569 static int
1570 iser_ib_activate_port(idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid)
1571 {
1572 	iser_svc_t	*iser_svc;
1573 	iser_sbind_t	*is_sbind;
1574 	int		status;
1575 
1576 	iser_svc = idm_svc->is_iser_svc;
1577 
1578 	/*
1579 	 * Save the address of the service bind handle in the
1580 	 * iser_svc_t to undo the service binding at a later time
1581 	 */
1582 	is_sbind = kmem_zalloc(sizeof (iser_sbind_t), KM_SLEEP);
1583 	is_sbind->is_gid	= gid;
1584 	is_sbind->is_guid	= guid;
1585 
1586 	status  = ibt_bind_service(iser_svc->is_srvhdl, gid, NULL,
1587 	    idm_svc, &is_sbind->is_sbindhdl);
1588 
1589 	if (status != IBT_SUCCESS) {
1590 		ISER_LOG(CE_NOTE, "iser_ib_activate_port: status(0x%x): "
1591 		    "Bind service(%llx) on port(%llx:%llx) failed",
1592 		    status, (longlong_t)iser_svc->is_svcid,
1593 		    (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid);
1594 
1595 		kmem_free(is_sbind, sizeof (iser_sbind_t));
1596 
1597 		return (status);
1598 	}
1599 
1600 	list_insert_tail(&iser_svc->is_sbindlist, is_sbind);
1601 
1602 	return (IBT_SUCCESS);
1603 }
1604 
1605 static void
1606 iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid)
1607 {
1608 	iser_svc_t	*iser_svc;
1609 	iser_conn_t	*iser_conn;
1610 	iser_sbind_t	*is_sbind;
1611 	idm_conn_t	*idm_conn;
1612 
1613 	/*
1614 	 * Iterate through the global list of IDM target connections.
1615 	 * Issue a TRANSPORT_FAIL for any connections on this port, and
1616 	 * if there is a bound service running on the port, tear it down.
1617 	 */
1618 	mutex_enter(&idm.idm_global_mutex);
1619 	for (idm_conn = list_head(&idm.idm_tgt_conn_list);
1620 	    idm_conn != NULL;
1621 	    idm_conn = list_next(&idm.idm_tgt_conn_list, idm_conn)) {
1622 
1623 		if (idm_conn->ic_transport_type != IDM_TRANSPORT_TYPE_ISER) {
1624 			/* this is not an iSER connection, skip it */
1625 			continue;
1626 		}
1627 
1628 		iser_conn = idm_conn->ic_transport_private;
1629 		if (iser_conn->ic_chan->ic_ibt_path.pi_hca_guid != hca_guid) {
1630 			/* this iSER connection is on a different port */
1631 			continue;
1632 		}
1633 
1634 		/* Fail the transport for this connection */
1635 		idm_conn_event(idm_conn, CE_TRANSPORT_FAIL, IDM_STATUS_FAIL);
1636 
1637 		if (idm_conn->ic_conn_type == CONN_TYPE_INI) {
1638 			/* initiator connection, nothing else to do */
1639 			continue;
1640 		}
1641 
1642 		/* Check for a service binding */
1643 		iser_svc = idm_conn->ic_svc_binding->is_iser_svc;
1644 		is_sbind = iser_ib_get_bind(iser_svc, hca_guid, gid);
1645 		if (is_sbind != NULL) {
1646 			/* This service is still bound, tear it down */
1647 			ibt_unbind_service(iser_svc->is_srvhdl,
1648 			    is_sbind->is_sbindhdl);
1649 			list_remove(&iser_svc->is_sbindlist, is_sbind);
1650 			kmem_free(is_sbind, sizeof (iser_sbind_t));
1651 		}
1652 	}
1653 	mutex_exit(&idm.idm_global_mutex);
1654 }
1655 
1656 static iser_sbind_t *
1657 iser_ib_get_bind(iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid)
1658 {
1659 	iser_sbind_t	*is_sbind;
1660 
1661 	for (is_sbind = list_head(&iser_svc->is_sbindlist);
1662 	    is_sbind != NULL;
1663 	    is_sbind = list_next(&iser_svc->is_sbindlist, is_sbind)) {
1664 
1665 		if ((is_sbind->is_guid == hca_guid) &&
1666 		    (is_sbind->is_gid.gid_prefix == gid.gid_prefix) &&
1667 		    (is_sbind->is_gid.gid_guid == gid.gid_guid)) {
1668 			return (is_sbind);
1669 		}
1670 	}
1671 	return (NULL);
1672 }
1673