1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/ddi.h> 28 #include <sys/types.h> 29 #include <sys/socket.h> 30 #include <netinet/in.h> 31 #include <sys/sunddi.h> 32 #include <sys/sysmacros.h> 33 #include <sys/iscsi_protocol.h> 34 35 #include <sys/ib/clients/iser/iser.h> 36 #include <sys/ib/clients/iser/iser_idm.h> 37 38 /* 39 * iser_ib.c 40 * Routines for InfiniBand transport for iSER 41 * 42 * This file contains the routines to interface with the IBT API to attach and 43 * allocate IB resources, handle async events, and post recv work requests. 44 * 45 */ 46 47 static iser_hca_t *iser_ib_gid2hca(ib_gid_t gid); 48 static iser_hca_t *iser_ib_guid2hca(ib_guid_t guid); 49 50 static iser_hca_t *iser_ib_alloc_hca(ib_guid_t guid); 51 static int iser_ib_free_hca(iser_hca_t *hca); 52 static int iser_ib_update_hcaports(iser_hca_t *hca); 53 static int iser_ib_init_hcas(void); 54 static int iser_ib_fini_hcas(void); 55 56 static iser_sbind_t *iser_ib_get_bind( 57 iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid); 58 static int iser_ib_activate_port( 59 idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid); 60 static void iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid); 61 62 static void iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size); 63 static void iser_ib_fini_qp(iser_qp_t *qp); 64 65 static int iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, 66 ibt_cq_hdl_t *cq_hdl); 67 68 static void iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl, 69 ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size, 70 ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs); 71 72 static void iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, 73 ibt_async_event_t *event); 74 static void iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, 75 ibt_async_event_t *event); 76 static void iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, 77 ibt_async_event_t *event); 78 79 static void iser_ib_post_recv_task(void *arg); 80 81 static struct ibt_clnt_modinfo_s iser_ib_modinfo = { 82 IBTI_V_CURR, 83 IBT_STORAGE_DEV, 84 iser_ib_async_handler, 85 NULL, 86 "iSER" 87 }; 88 89 /* 90 * iser_ib_init 91 * 92 * This function registers the HCA drivers with IBTF and registers and binds 93 * iSER as a service with IBTF. 94 */ 95 int 96 iser_ib_init(void) 97 { 98 int status; 99 100 /* Register with IBTF */ 101 status = ibt_attach(&iser_ib_modinfo, iser_state->is_dip, iser_state, 102 &iser_state->is_ibhdl); 103 if (status != DDI_SUCCESS) { 104 ISER_LOG(CE_NOTE, "iser_ib_init: ibt_attach failed (0x%x)", 105 status); 106 return (DDI_FAILURE); 107 } 108 109 /* Create the global work request kmem_cache */ 110 iser_state->iser_wr_cache = kmem_cache_create("iser_wr_cache", 111 sizeof (iser_wr_t), 0, NULL, NULL, NULL, 112 iser_state, NULL, KM_SLEEP); 113 114 /* Populate our list of HCAs */ 115 status = iser_ib_init_hcas(); 116 if (status != DDI_SUCCESS) { 117 /* HCAs failed to initialize, tear it down */ 118 kmem_cache_destroy(iser_state->iser_wr_cache); 119 (void) ibt_detach(iser_state->is_ibhdl); 120 iser_state->is_ibhdl = NULL; 121 ISER_LOG(CE_NOTE, "iser_ib_init: failed to initialize HCAs"); 122 return (DDI_FAILURE); 123 } 124 125 /* Target will register iSER as a service with IBTF when required */ 126 127 /* Target will bind this service when it comes online */ 128 129 return (DDI_SUCCESS); 130 } 131 132 /* 133 * iser_ib_fini 134 * 135 * This function unbinds and degisters the iSER service from IBTF 136 */ 137 int 138 iser_ib_fini(void) 139 { 140 /* IDM would have already disabled all the services */ 141 142 /* Teardown the HCA list and associated resources */ 143 if (iser_ib_fini_hcas() != DDI_SUCCESS) 144 return (DDI_FAILURE); 145 146 /* Teardown the global work request kmem_cache */ 147 kmem_cache_destroy(iser_state->iser_wr_cache); 148 149 /* Deregister with IBTF */ 150 if (iser_state->is_ibhdl != NULL) { 151 (void) ibt_detach(iser_state->is_ibhdl); 152 iser_state->is_ibhdl = NULL; 153 } 154 155 return (DDI_SUCCESS); 156 } 157 158 /* 159 * iser_ib_register_service 160 * 161 * This function registers the iSER service using the RDMA-Aware Service ID. 162 */ 163 int 164 iser_ib_register_service(idm_svc_t *idm_svc) 165 { 166 ibt_srv_desc_t srvdesc; 167 iser_svc_t *iser_svc; 168 int status; 169 170 bzero(&srvdesc, sizeof (ibt_srv_desc_t)); 171 172 /* Set up IBTI client callback handler from the CM */ 173 srvdesc.sd_handler = iser_ib_cm_handler; 174 175 srvdesc.sd_flags = IBT_SRV_NO_FLAGS; 176 177 iser_svc = (iser_svc_t *)idm_svc->is_iser_svc; 178 179 /* Register the service on the specified port */ 180 status = ibt_register_service( 181 iser_state->is_ibhdl, &srvdesc, 182 iser_svc->is_svcid, 1, &iser_svc->is_srvhdl, NULL); 183 184 return (status); 185 } 186 187 /* 188 * iser_ib_bind_service 189 * 190 * This function binds a given iSER service on all available HCA ports 191 */ 192 int 193 iser_ib_bind_service(idm_svc_t *idm_svc) 194 { 195 iser_hca_t *hca; 196 ib_gid_t gid; 197 int num_ports = 0; 198 int num_binds = 0; 199 int status; 200 int i; 201 202 ASSERT(idm_svc != NULL); 203 ASSERT(idm_svc->is_iser_svc != NULL); 204 205 /* Register the iSER service on all available ports */ 206 mutex_enter(&iser_state->is_hcalist_lock); 207 208 for (hca = list_head(&iser_state->is_hcalist); 209 hca != NULL; 210 hca = list_next(&iser_state->is_hcalist, hca)) { 211 212 for (i = 0; i < hca->hca_num_ports; i++) { 213 num_ports++; 214 if (hca->hca_port_info[i].p_linkstate != 215 IBT_PORT_ACTIVE) { 216 /* 217 * Move on. We will attempt to bind service 218 * in our async handler if the port comes up 219 * at a later time. 220 */ 221 continue; 222 } 223 224 gid = hca->hca_port_info[i].p_sgid_tbl[0]; 225 226 /* If the port is already bound, skip */ 227 if (iser_ib_get_bind( 228 idm_svc->is_iser_svc, hca->hca_guid, gid) == NULL) { 229 230 status = iser_ib_activate_port( 231 idm_svc, hca->hca_guid, gid); 232 if (status != IBT_SUCCESS) { 233 ISER_LOG(CE_NOTE, 234 "iser_ib_bind_service: " 235 "iser_ib_activate_port failure " 236 "(0x%x)", status); 237 continue; 238 } 239 } 240 num_binds++; 241 } 242 } 243 mutex_exit(&iser_state->is_hcalist_lock); 244 245 if (num_binds) { 246 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Service available on " 247 "(%d) of (%d) ports", num_binds, num_ports); 248 return (ISER_STATUS_SUCCESS); 249 } else { 250 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Did not bind service"); 251 return (ISER_STATUS_FAIL); 252 } 253 } 254 255 /* 256 * iser_ib_unbind_service 257 * 258 * This function unbinds a given service on a all HCA ports 259 */ 260 void 261 iser_ib_unbind_service(idm_svc_t *idm_svc) 262 { 263 iser_svc_t *iser_svc; 264 iser_sbind_t *is_sbind, *next_sb; 265 266 if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) { 267 268 iser_svc = idm_svc->is_iser_svc; 269 270 for (is_sbind = list_head(&iser_svc->is_sbindlist); 271 is_sbind != NULL; 272 is_sbind = next_sb) { 273 next_sb = list_next(&iser_svc->is_sbindlist, is_sbind); 274 ibt_unbind_service(iser_svc->is_srvhdl, 275 is_sbind->is_sbindhdl); 276 list_remove(&iser_svc->is_sbindlist, is_sbind); 277 kmem_free(is_sbind, sizeof (iser_sbind_t)); 278 } 279 } 280 } 281 282 /* ARGSUSED */ 283 void 284 iser_ib_deregister_service(idm_svc_t *idm_svc) 285 { 286 iser_svc_t *iser_svc; 287 288 if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) { 289 290 iser_svc = (iser_svc_t *)idm_svc->is_iser_svc; 291 ibt_deregister_service(iser_state->is_ibhdl, 292 iser_svc->is_srvhdl); 293 ibt_release_ip_sid(iser_svc->is_svcid); 294 } 295 } 296 297 /* 298 * iser_ib_get_paths 299 * This function finds the IB path between the local and the remote address. 300 * 301 */ 302 int 303 iser_ib_get_paths(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip, 304 ibt_path_info_t *path, ibt_path_ip_src_t *path_src_ip) 305 { 306 ibt_ip_path_attr_t ipattr; 307 int status; 308 309 (void) bzero(&ipattr, sizeof (ibt_ip_path_attr_t)); 310 ipattr.ipa_dst_ip = remote_ip; 311 ipattr.ipa_src_ip = *local_ip; 312 ipattr.ipa_max_paths = 1; 313 ipattr.ipa_ndst = 1; 314 315 (void) bzero(path, sizeof (ibt_path_info_t)); 316 status = ibt_get_ip_paths(iser_state->is_ibhdl, IBT_PATH_NO_FLAGS, 317 &ipattr, path, NULL, path_src_ip); 318 if (status != IBT_SUCCESS) { 319 ISER_LOG(CE_NOTE, "ibt_get_ip_paths: ibt_get_ip_paths " 320 "failure: status (%d)", status); 321 return (status); 322 } 323 324 if (local_ip != NULL) { 325 ISER_LOG(CE_NOTE, "iser_ib_get_paths success: IP[%x to %x]", 326 local_ip->un.ip4addr, remote_ip->un.ip4addr); 327 } else { 328 ISER_LOG(CE_NOTE, "iser_ib_get_paths success: " 329 "IP[INADDR_ANY to %x]", remote_ip->un.ip4addr); 330 } 331 332 return (ISER_STATUS_SUCCESS); 333 } 334 335 /* 336 * iser_ib_alloc_rc_channel 337 * 338 * This function allocates a reliable communication channel using the specified 339 * channel attributes. 340 */ 341 iser_chan_t * 342 iser_ib_alloc_rc_channel(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip) 343 { 344 345 iser_chan_t *chan; 346 ib_gid_t lgid; 347 uint8_t hca_port; /* from path */ 348 iser_hca_t *hca; 349 ibt_path_ip_src_t path_src_ip; 350 ibt_rc_chan_alloc_args_t chanargs; 351 uint_t sq_size, rq_size; 352 int status; 353 354 chan = kmem_zalloc(sizeof (iser_chan_t), KM_SLEEP); 355 356 mutex_init(&chan->ic_lock, NULL, MUTEX_DRIVER, NULL); 357 mutex_init(&chan->ic_sq_post_lock, NULL, MUTEX_DRIVER, NULL); 358 359 /* Lookup a path to the given destination */ 360 status = iser_ib_get_paths(local_ip, remote_ip, &chan->ic_ibt_path, 361 &path_src_ip); 362 363 if (status != ISER_STATUS_SUCCESS) { 364 ISER_LOG(CE_NOTE, "iser_ib_get_paths failed: status (%d)", 365 status); 366 mutex_destroy(&chan->ic_lock); 367 mutex_destroy(&chan->ic_sq_post_lock); 368 kmem_free(chan, sizeof (iser_chan_t)); 369 return (NULL); 370 } 371 372 /* get the local gid from the path info */ 373 lgid = chan->ic_ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid; 374 375 /* get the hca port from the path info */ 376 hca_port = chan->ic_ibt_path.pi_prim_cep_path.cep_hca_port_num; 377 378 /* Lookup the hca using the gid in the path info */ 379 hca = iser_ib_gid2hca(lgid); 380 if (hca == NULL) { 381 ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed " 382 "to lookup HCA handle"); 383 mutex_destroy(&chan->ic_lock); 384 mutex_destroy(&chan->ic_sq_post_lock); 385 kmem_free(chan, sizeof (iser_chan_t)); 386 return (NULL); 387 } 388 389 /* Set up the iSER channel handle with HCA and IP data */ 390 chan->ic_hca = hca; 391 chan->ic_localip = path_src_ip.ip_primary; 392 chan->ic_remoteip = *remote_ip; 393 394 /* 395 * Determine the queue sizes, based upon the HCA query data. 396 * For our Work Queues, we will use either our default value, 397 * or the HCA's maximum value, whichever is smaller. 398 */ 399 sq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_SENDQ_SIZE); 400 rq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_RECVQ_SIZE); 401 402 /* 403 * For our Completion Queues, we again check the device maximum. 404 * We want to end up with CQs that are the next size up from the 405 * WQs they are servicing so that they have some overhead. 406 */ 407 if (hca->hca_attr.hca_max_cq_sz >= (sq_size + 1)) { 408 chan->ic_sendcq_sz = sq_size + 1; 409 } else { 410 chan->ic_sendcq_sz = hca->hca_attr.hca_max_cq_sz; 411 sq_size = chan->ic_sendcq_sz - 1; 412 } 413 414 if (hca->hca_attr.hca_max_cq_sz >= (rq_size + 1)) { 415 chan->ic_recvcq_sz = rq_size + 1; 416 } else { 417 chan->ic_recvcq_sz = hca->hca_attr.hca_max_cq_sz; 418 rq_size = chan->ic_recvcq_sz - 1; 419 } 420 421 /* Initialize the iSER channel's QP handle */ 422 iser_ib_init_qp(chan, sq_size, rq_size); 423 424 /* Set up the Send Completion Queue */ 425 status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_sendcq_sz, 426 &chan->ic_sendcq); 427 if (status != ISER_STATUS_SUCCESS) { 428 iser_ib_fini_qp(&chan->ic_qp); 429 mutex_destroy(&chan->ic_lock); 430 mutex_destroy(&chan->ic_sq_post_lock); 431 kmem_free(chan, sizeof (iser_chan_t)); 432 return (NULL); 433 } 434 ibt_set_cq_handler(chan->ic_sendcq, iser_ib_sendcq_handler, chan); 435 ibt_enable_cq_notify(chan->ic_sendcq, IBT_NEXT_COMPLETION); 436 437 /* Set up the Receive Completion Queue */ 438 status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_recvcq_sz, 439 &chan->ic_recvcq); 440 if (status != ISER_STATUS_SUCCESS) { 441 (void) ibt_free_cq(chan->ic_sendcq); 442 iser_ib_fini_qp(&chan->ic_qp); 443 mutex_destroy(&chan->ic_lock); 444 mutex_destroy(&chan->ic_sq_post_lock); 445 kmem_free(chan, sizeof (iser_chan_t)); 446 return (NULL); 447 } 448 ibt_set_cq_handler(chan->ic_recvcq, iser_ib_recvcq_handler, chan); 449 ibt_enable_cq_notify(chan->ic_recvcq, IBT_NEXT_COMPLETION); 450 451 /* Setup the channel arguments */ 452 iser_ib_setup_chanargs(hca_port, chan->ic_sendcq, chan->ic_recvcq, 453 sq_size, rq_size, hca->hca_pdhdl, &chanargs); 454 455 status = ibt_alloc_rc_channel(hca->hca_hdl, 456 IBT_ACHAN_NO_FLAGS, &chanargs, &chan->ic_chanhdl, NULL); 457 if (status != IBT_SUCCESS) { 458 ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed " 459 "ibt_alloc_rc_channel: status (%d)", status); 460 (void) ibt_free_cq(chan->ic_sendcq); 461 (void) ibt_free_cq(chan->ic_recvcq); 462 iser_ib_fini_qp(&chan->ic_qp); 463 mutex_destroy(&chan->ic_lock); 464 mutex_destroy(&chan->ic_sq_post_lock); 465 kmem_free(chan, sizeof (iser_chan_t)); 466 return (NULL); 467 } 468 469 /* Set the 'channel' as the client private data */ 470 (void) ibt_set_chan_private(chan->ic_chanhdl, chan); 471 472 ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel success: " 473 "chanhdl (0x%p), IP:[%llx to %llx], lgid (%llx:%llx), HCA(%llx) %d", 474 (void *)chan->ic_chanhdl, 475 (longlong_t)local_ip->un.ip4addr, 476 (longlong_t)remote_ip->un.ip4addr, 477 (longlong_t)lgid.gid_prefix, (longlong_t)lgid.gid_guid, 478 (longlong_t)hca->hca_guid, hca_port); 479 480 return (chan); 481 } 482 483 /* 484 * iser_ib_open_rc_channel 485 * This function opens a RC connection on the given allocated RC channel 486 */ 487 int 488 iser_ib_open_rc_channel(iser_chan_t *chan) 489 { 490 ibt_ip_cm_info_t ipcm_info; 491 iser_private_data_t iser_priv_data; 492 ibt_chan_open_args_t ocargs; 493 ibt_rc_returns_t ocreturns; 494 int status; 495 496 mutex_enter(&chan->ic_lock); 497 498 /* 499 * For connection establishment, the initiator sends a CM REQ using the 500 * iSER RDMA-Aware Service ID. Included are the source and destination 501 * IP addresses, and the src port. 502 */ 503 bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t)); 504 ipcm_info.src_addr = chan->ic_localip; 505 ipcm_info.dst_addr = chan->ic_remoteip; 506 ipcm_info.src_port = chan->ic_lport; 507 508 /* 509 * The CM Private Data field defines the iSER connection parameters 510 * such as zero based virtual address exception (ZBVAE) and Send with 511 * invalidate Exception (SIE). 512 * 513 * Solaris IBT does not currently support ZBVAE or SIE. 514 */ 515 iser_priv_data.rsvd1 = 0; 516 iser_priv_data.sie = 1; 517 iser_priv_data.zbvae = 1; 518 519 status = ibt_format_ip_private_data(&ipcm_info, 520 sizeof (iser_private_data_t), &iser_priv_data); 521 if (status != IBT_SUCCESS) { 522 ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status); 523 mutex_exit(&chan->ic_lock); 524 return (status); 525 } 526 527 /* 528 * Set the SID we are attempting to connect to, based upon the 529 * remote port number. 530 */ 531 chan->ic_ibt_path.pi_sid = ibt_get_ip_sid(IPPROTO_TCP, chan->ic_rport); 532 533 /* Set up the args for the channel open */ 534 bzero(&ocargs, sizeof (ibt_chan_open_args_t)); 535 ocargs.oc_path = &chan->ic_ibt_path; 536 ocargs.oc_cm_handler = iser_ib_cm_handler; 537 ocargs.oc_cm_clnt_private = iser_state; 538 ocargs.oc_rdma_ra_out = 4; 539 ocargs.oc_rdma_ra_in = 4; 540 ocargs.oc_path_retry_cnt = 2; 541 ocargs.oc_path_rnr_retry_cnt = 2; 542 ocargs.oc_priv_data_len = sizeof (iser_private_data_t); 543 ocargs.oc_priv_data = &iser_priv_data; 544 545 bzero(&ocreturns, sizeof (ibt_rc_returns_t)); 546 547 status = ibt_open_rc_channel(chan->ic_chanhdl, 548 IBT_OCHAN_NO_FLAGS, IBT_BLOCKING, &ocargs, &ocreturns); 549 550 if (status != IBT_SUCCESS) { 551 ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status); 552 mutex_exit(&chan->ic_lock); 553 return (status); 554 } 555 556 mutex_exit(&chan->ic_lock); 557 return (IDM_STATUS_SUCCESS); 558 } 559 560 /* 561 * iser_ib_close_rc_channel 562 * This function closes the RC channel related to this iser_chan handle. 563 * We invoke this in a non-blocking, no callbacks context. 564 */ 565 void 566 iser_ib_close_rc_channel(iser_chan_t *chan) 567 { 568 int status; 569 570 mutex_enter(&chan->ic_lock); 571 status = ibt_close_rc_channel(chan->ic_chanhdl, IBT_BLOCKING, NULL, 572 0, NULL, NULL, 0); 573 if (status != IBT_SUCCESS) { 574 ISER_LOG(CE_NOTE, "iser_ib_close_rc_channel: " 575 "ibt_close_rc_channel failed: status (%d)", status); 576 } 577 mutex_exit(&chan->ic_lock); 578 } 579 580 /* 581 * iser_ib_free_rc_channel 582 * 583 * This function tears down an RC channel's QP initialization and frees it. 584 * Note that we do not need synchronization here; the channel has been 585 * closed already, so we should only have completion polling occuring. Once 586 * complete, we are free to free the IBTF channel, WQ and CQ resources, and 587 * our own related resources. 588 */ 589 void 590 iser_ib_free_rc_channel(iser_chan_t *chan) 591 { 592 iser_qp_t *iser_qp; 593 594 iser_qp = &chan->ic_qp; 595 596 /* Ensure the SQ is empty */ 597 while (chan->ic_sq_post_count != 0) { 598 mutex_exit(&chan->ic_conn->ic_lock); 599 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 600 mutex_enter(&chan->ic_conn->ic_lock); 601 } 602 mutex_destroy(&chan->ic_sq_post_lock); 603 604 /* Ensure the RQ is empty */ 605 (void) ibt_flush_channel(chan->ic_chanhdl); 606 mutex_enter(&iser_qp->qp_lock); 607 while (iser_qp->rq_level != 0) { 608 mutex_exit(&iser_qp->qp_lock); 609 mutex_exit(&chan->ic_conn->ic_lock); 610 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 611 mutex_enter(&chan->ic_conn->ic_lock); 612 mutex_enter(&iser_qp->qp_lock); 613 } 614 615 /* Free our QP handle */ 616 mutex_exit(&iser_qp->qp_lock); 617 (void) iser_ib_fini_qp(iser_qp); 618 619 /* Free the IBT channel resources */ 620 (void) ibt_free_channel(chan->ic_chanhdl); 621 chan->ic_chanhdl = NULL; 622 623 /* Free the CQs */ 624 ibt_free_cq(chan->ic_sendcq); 625 ibt_free_cq(chan->ic_recvcq); 626 627 /* Free the chan handle */ 628 mutex_destroy(&chan->ic_lock); 629 kmem_free(chan, sizeof (iser_chan_t)); 630 } 631 632 /* 633 * iser_ib_post_recv 634 * 635 * This function handles keeping the RQ full on a given channel. 636 * This routine will mostly be run on a taskq, and will check the 637 * current fill level of the RQ, and post as many WRs as necessary 638 * to fill it again. 639 */ 640 641 int 642 iser_ib_post_recv_async(ibt_channel_hdl_t chanhdl) 643 { 644 iser_chan_t *chan; 645 int status; 646 647 /* Pull our iSER channel handle from the private data */ 648 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); 649 650 /* 651 * Caller must check that chan->ic_conn->ic_stage indicates 652 * the connection is active (not closing, not closed) and 653 * it must hold the mutex cross the check and the call to this function 654 */ 655 ASSERT(mutex_owned(&chan->ic_conn->ic_lock)); 656 ASSERT((chan->ic_conn->ic_stage >= ISER_CONN_STAGE_IC_CONNECTED) && 657 (chan->ic_conn->ic_stage <= ISER_CONN_STAGE_LOGGED_IN)); 658 idm_conn_hold(chan->ic_conn->ic_idmc); 659 status = ddi_taskq_dispatch(iser_taskq, iser_ib_post_recv_task, 660 (void *)chanhdl, DDI_NOSLEEP); 661 if (status != DDI_SUCCESS) { 662 idm_conn_rele(chan->ic_conn->ic_idmc); 663 } 664 665 return (status); 666 } 667 668 static void 669 iser_ib_post_recv_task(void *arg) 670 { 671 ibt_channel_hdl_t chanhdl = arg; 672 iser_chan_t *chan; 673 674 /* Pull our iSER channel handle from the private data */ 675 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); 676 677 iser_ib_post_recv(chanhdl); 678 idm_conn_rele(chan->ic_conn->ic_idmc); 679 } 680 681 void 682 iser_ib_post_recv(ibt_channel_hdl_t chanhdl) 683 { 684 iser_chan_t *chan; 685 iser_hca_t *hca; 686 iser_msg_t *msg; 687 ibt_recv_wr_t *wrlist, wr[ISER_IB_RQ_POST_MAX]; 688 int rq_space, msg_ret; 689 int total_num, npost; 690 uint_t nposted; 691 int status, i; 692 iser_qp_t *iser_qp; 693 ib_gid_t lgid; 694 695 /* Pull our iSER channel handle from the private data */ 696 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); 697 698 ASSERT(chan != NULL); 699 700 mutex_enter(&chan->ic_conn->ic_lock); 701 702 /* Bail out if the connection is closed; no need for more recv WRs */ 703 if ((chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSING) || 704 (chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSED)) { 705 mutex_exit(&chan->ic_conn->ic_lock); 706 return; 707 } 708 709 /* get the QP handle from the iser_chan */ 710 iser_qp = &chan->ic_qp; 711 712 /* get the local gid from the path info */ 713 lgid = chan->ic_ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid; 714 715 /* get the hca port from the path info */ 716 hca = iser_ib_gid2hca(lgid); 717 if (hca == NULL) { 718 ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to retrieve " 719 "HCA handle"); 720 mutex_exit(&chan->ic_conn->ic_lock); 721 return; 722 } 723 724 /* check for space to post on the RQ */ 725 mutex_enter(&iser_qp->qp_lock); 726 rq_space = iser_qp->rq_depth - iser_qp->rq_level; 727 if (rq_space == 0) { 728 /* The RQ is full, clear the pending flag and return */ 729 iser_qp->rq_taskqpending = B_FALSE; 730 mutex_exit(&iser_qp->qp_lock); 731 mutex_exit(&chan->ic_conn->ic_lock); 732 return; 733 } 734 735 /* Keep track of the lowest value for rq_min_post_level */ 736 if (iser_qp->rq_level < iser_qp->rq_min_post_level) 737 iser_qp->rq_min_post_level = iser_qp->rq_level; 738 739 mutex_exit(&iser_qp->qp_lock); 740 741 /* we've room to post, so pull from the msg cache */ 742 msg = iser_msg_get(hca, rq_space, &msg_ret); 743 if (msg == NULL) { 744 ISER_LOG(CE_NOTE, "iser_ib_post_recv: no message handles " 745 "available in msg cache currently"); 746 /* 747 * There are no messages on the cache. Wait a half- 748 * second, then try again. 749 */ 750 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 751 status = iser_ib_post_recv_async(chanhdl); 752 if (status != DDI_SUCCESS) { 753 ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to " 754 "redispatch routine"); 755 /* Failed to dispatch, clear pending flag */ 756 mutex_enter(&iser_qp->qp_lock); 757 iser_qp->rq_taskqpending = B_FALSE; 758 mutex_exit(&iser_qp->qp_lock); 759 } 760 mutex_exit(&chan->ic_conn->ic_lock); 761 return; 762 } 763 764 if (msg_ret != rq_space) { 765 ISER_LOG(CE_NOTE, "iser_ib_post_recv: requested number of " 766 "messages not allocated: requested (%d) allocated (%d)", 767 rq_space, msg_ret); 768 /* We got some, but not all, of our requested depth */ 769 rq_space = msg_ret; 770 } 771 772 /* 773 * Now, walk through the allocated WRs and post them, 774 * ISER_IB_RQ_POST_MAX (or less) at a time. 775 */ 776 wrlist = &wr[0]; 777 total_num = rq_space; 778 779 while (total_num) { 780 /* determine the number to post on this iteration */ 781 npost = (total_num > ISER_IB_RQ_POST_MAX) ? 782 ISER_IB_RQ_POST_MAX : total_num; 783 784 /* build a list of WRs from the msg list */ 785 for (i = 0; i < npost; i++) { 786 wrlist[i].wr_id = (ibt_wrid_t)(uintptr_t)msg; 787 wrlist[i].wr_nds = ISER_IB_SGLIST_SIZE; 788 wrlist[i].wr_sgl = &msg->msg_ds; 789 msg = msg->nextp; 790 } 791 792 /* post the list to the RQ */ 793 nposted = 0; 794 status = ibt_post_recv(chanhdl, wrlist, npost, &nposted); 795 if ((status != IBT_SUCCESS) || (nposted != npost)) { 796 ISER_LOG(CE_NOTE, "iser_ib_post_recv: ibt_post_recv " 797 "failed: requested (%d) posted (%d) status (%d)", 798 npost, nposted, status); 799 total_num -= nposted; 800 break; 801 } 802 803 /* decrement total number to post by the number posted */ 804 total_num -= nposted; 805 } 806 807 mutex_enter(&iser_qp->qp_lock); 808 if (total_num != 0) { 809 ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to fill RQ, " 810 "failed to post (%d) WRs", total_num); 811 iser_qp->rq_level += rq_space - total_num; 812 } else { 813 iser_qp->rq_level += rq_space; 814 } 815 816 /* 817 * Now that we've filled the RQ, check that all of the recv WRs 818 * haven't just been immediately consumed. If so, taskqpending is 819 * still B_TRUE, so we need to fire off a taskq thread to post 820 * more WRs. 821 */ 822 if (iser_qp->rq_level == 0) { 823 mutex_exit(&iser_qp->qp_lock); 824 status = iser_ib_post_recv_async(chanhdl); 825 if (status != DDI_SUCCESS) { 826 ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to " 827 "dispatch followup routine"); 828 /* Failed to dispatch, clear pending flag */ 829 mutex_enter(&iser_qp->qp_lock); 830 iser_qp->rq_taskqpending = B_FALSE; 831 mutex_exit(&iser_qp->qp_lock); 832 } 833 } else { 834 /* 835 * We're done, we've filled the RQ. Clear the taskq 836 * flag so that we can run again. 837 */ 838 iser_qp->rq_taskqpending = B_FALSE; 839 mutex_exit(&iser_qp->qp_lock); 840 } 841 842 mutex_exit(&chan->ic_conn->ic_lock); 843 } 844 845 /* 846 * iser_ib_handle_portup_event() 847 * This handles the IBT_EVENT_PORT_UP unaffiliated asynchronous event. 848 * 849 * To facilitate a seamless bringover of the port and configure the CM service 850 * for inbound iSER service requests on this newly active port, the existing 851 * IDM services will be checked for iSER support. 852 * If an iSER service was already created, then this service will simply be 853 * bound to the gid of the newly active port. If on the other hand, the CM 854 * service did not exist, i.e. only socket communication, then a new CM 855 * service will be first registered with the saved service parameters and 856 * then bound to the newly active port. 857 * 858 */ 859 /* ARGSUSED */ 860 static void 861 iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 862 { 863 iser_hca_t *hca; 864 ib_gid_t gid; 865 idm_svc_t *idm_svc; 866 int status; 867 868 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: HCA(0x%llx) port(%d)", 869 (longlong_t)event->ev_hca_guid, event->ev_port); 870 871 /* 872 * Query all ports on the HCA and update the port information 873 * maintainted in the iser_hca_t structure 874 */ 875 hca = iser_ib_guid2hca(event->ev_hca_guid); 876 if (hca == NULL) { 877 878 /* HCA is just made available, first port on that HCA */ 879 hca = iser_ib_alloc_hca(event->ev_hca_guid); 880 881 mutex_enter(&iser_state->is_hcalist_lock); 882 list_insert_tail(&iser_state->is_hcalist, hca); 883 iser_state->is_num_hcas++; 884 mutex_exit(&iser_state->is_hcalist_lock); 885 886 } else { 887 888 status = iser_ib_update_hcaports(hca); 889 890 if (status != IBT_SUCCESS) { 891 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 892 "status(0x%x): iser_ib_update_hcaports failed: " 893 "HCA(0x%llx) port(%d)", status, 894 (longlong_t)event->ev_hca_guid, event->ev_port); 895 return; 896 } 897 } 898 899 gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0]; 900 901 /* 902 * Iterate through the global list of IDM target services 903 * and check for existing iSER CM service. 904 */ 905 mutex_enter(&idm.idm_global_mutex); 906 for (idm_svc = list_head(&idm.idm_tgt_svc_list); 907 idm_svc != NULL; 908 idm_svc = list_next(&idm.idm_tgt_svc_list, idm_svc)) { 909 910 911 if (idm_svc->is_iser_svc == NULL) { 912 913 /* Establish a new CM service for iSER requests */ 914 status = iser_tgt_svc_create( 915 &idm_svc->is_svc_req, idm_svc); 916 917 if (status != IBT_SUCCESS) { 918 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 919 "status(0x%x): iser_tgt_svc_create failed: " 920 "HCA(0x%llx) port(%d)", status, 921 (longlong_t)event->ev_hca_guid, 922 event->ev_port); 923 924 continue; 925 } 926 } 927 928 status = iser_ib_activate_port( 929 idm_svc, event->ev_hca_guid, gid); 930 if (status != IBT_SUCCESS) { 931 932 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 933 "status(0x%x): Bind service on port " 934 "(%llx:%llx) failed", 935 status, (longlong_t)gid.gid_prefix, 936 (longlong_t)gid.gid_guid); 937 938 continue; 939 } 940 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: service bound " 941 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 942 event->ev_port); 943 } 944 mutex_exit(&idm.idm_global_mutex); 945 946 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event success: " 947 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 948 event->ev_port); 949 } 950 951 /* 952 * iser_ib_handle_portdown_event() 953 * This handles the IBT_EVENT_PORT_DOWN unaffiliated asynchronous error. 954 * 955 * Unconfigure the CM service on the deactivated port and teardown the 956 * connections that are using the CM service. 957 */ 958 /* ARGSUSED */ 959 static void 960 iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 961 { 962 iser_hca_t *hca; 963 ib_gid_t gid; 964 int status; 965 966 /* 967 * Query all ports on the HCA and update the port information 968 * maintainted in the iser_hca_t structure 969 */ 970 hca = iser_ib_guid2hca(event->ev_hca_guid); 971 ASSERT(hca != NULL); 972 973 status = iser_ib_update_hcaports(hca); 974 if (status != IBT_SUCCESS) { 975 ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event status(0x%x): " 976 "ibt_ib_update_hcaports failed: HCA(0x%llx) port(%d)", 977 status, (longlong_t)event->ev_hca_guid, event->ev_port); 978 return; 979 } 980 981 /* get the gid of the new port */ 982 gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0]; 983 iser_ib_deactivate_port(event->ev_hca_guid, gid); 984 985 ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event success: " 986 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 987 event->ev_port); 988 } 989 990 /* 991 * iser_ib_handle_hca_detach_event() 992 * Quiesce all activity bound for the port, teardown the connection, unbind 993 * iSER services on all ports and release the HCA handle. 994 */ 995 /* ARGSUSED */ 996 static void 997 iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 998 { 999 iser_hca_t *nexthca, *hca; 1000 int i, status; 1001 1002 ISER_LOG(CE_NOTE, "iser_ib_handle_hca_detach_event: HCA(0x%llx)", 1003 (longlong_t)event->ev_hca_guid); 1004 1005 hca = iser_ib_guid2hca(event->ev_hca_guid); 1006 for (i = 0; i < hca->hca_num_ports; i++) { 1007 iser_ib_deactivate_port(hca->hca_guid, 1008 hca->hca_port_info[i].p_sgid_tbl[0]); 1009 } 1010 1011 /* 1012 * Update the HCA list maintained in the iser_state. Free the 1013 * resources allocated to the HCA, i.e. caches, protection domain 1014 */ 1015 mutex_enter(&iser_state->is_hcalist_lock); 1016 1017 for (hca = list_head(&iser_state->is_hcalist); 1018 hca != NULL; 1019 hca = nexthca) { 1020 1021 nexthca = list_next(&iser_state->is_hcalist, hca); 1022 1023 if (hca->hca_guid == event->ev_hca_guid) { 1024 1025 list_remove(&iser_state->is_hcalist, hca); 1026 iser_state->is_num_hcas--; 1027 1028 status = iser_ib_free_hca(hca); 1029 if (status != DDI_SUCCESS) { 1030 ISER_LOG(CE_WARN, "iser_ib_handle_hca_detach: " 1031 "Failed to free hca(%p)", (void *)hca); 1032 list_insert_tail(&iser_state->is_hcalist, hca); 1033 iser_state->is_num_hcas++; 1034 } 1035 /* No way to return status to IBT if this fails */ 1036 } 1037 } 1038 mutex_exit(&iser_state->is_hcalist_lock); 1039 1040 } 1041 1042 /* 1043 * iser_ib_async_handler 1044 * An IBT Asynchronous Event handler is registered it with the framework and 1045 * passed via the ibt_attach() routine. This function handles the following 1046 * asynchronous events. 1047 * IBT_EVENT_PORT_UP 1048 * IBT_ERROR_PORT_DOWN 1049 * IBT_HCA_ATTACH_EVENT 1050 * IBT_HCA_DETACH_EVENT 1051 */ 1052 /* ARGSUSED */ 1053 void 1054 iser_ib_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code, 1055 ibt_async_event_t *event) 1056 { 1057 switch (code) { 1058 case IBT_EVENT_PORT_UP: 1059 iser_ib_handle_portup_event(hdl, event); 1060 break; 1061 1062 case IBT_ERROR_PORT_DOWN: 1063 iser_ib_handle_portdown_event(hdl, event); 1064 break; 1065 1066 case IBT_HCA_ATTACH_EVENT: 1067 /* 1068 * A new HCA device is available for use, ignore this 1069 * event because the corresponding IBT_EVENT_PORT_UP 1070 * events will get triggered and handled accordingly. 1071 */ 1072 break; 1073 1074 case IBT_HCA_DETACH_EVENT: 1075 iser_ib_handle_hca_detach_event(hdl, event); 1076 break; 1077 1078 default: 1079 break; 1080 } 1081 } 1082 1083 /* 1084 * iser_ib_init_hcas 1085 * 1086 * This function opens all the HCA devices, gathers the HCA state information 1087 * and adds the HCA handle for each HCA found in the iser_soft_state. 1088 */ 1089 static int 1090 iser_ib_init_hcas(void) 1091 { 1092 ib_guid_t *guid; 1093 int num_hcas; 1094 int i; 1095 iser_hca_t *hca; 1096 1097 /* Retrieve the HCA list */ 1098 num_hcas = ibt_get_hca_list(&guid); 1099 if (num_hcas == 0) { 1100 /* 1101 * This shouldn't happen, but might if we have all HCAs 1102 * detach prior to initialization. 1103 */ 1104 return (DDI_FAILURE); 1105 } 1106 1107 /* Initialize the hcalist lock */ 1108 mutex_init(&iser_state->is_hcalist_lock, NULL, MUTEX_DRIVER, NULL); 1109 1110 /* Create the HCA list */ 1111 list_create(&iser_state->is_hcalist, sizeof (iser_hca_t), 1112 offsetof(iser_hca_t, hca_node)); 1113 1114 for (i = 0; i < num_hcas; i++) { 1115 1116 ISER_LOG(CE_NOTE, "iser_ib_init_hcas: initializing HCA " 1117 "(0x%llx)", (longlong_t)guid[i]); 1118 1119 hca = iser_ib_alloc_hca(guid[i]); 1120 if (hca == NULL) { 1121 /* This shouldn't happen, teardown and fail */ 1122 (void) iser_ib_fini_hcas(); 1123 (void) ibt_free_hca_list(guid, num_hcas); 1124 return (DDI_FAILURE); 1125 } 1126 1127 mutex_enter(&iser_state->is_hcalist_lock); 1128 list_insert_tail(&iser_state->is_hcalist, hca); 1129 iser_state->is_num_hcas++; 1130 mutex_exit(&iser_state->is_hcalist_lock); 1131 1132 } 1133 1134 /* Free the IBT HCA list */ 1135 (void) ibt_free_hca_list(guid, num_hcas); 1136 1137 /* Check that we've initialized at least one HCA */ 1138 mutex_enter(&iser_state->is_hcalist_lock); 1139 if (list_is_empty(&iser_state->is_hcalist)) { 1140 ISER_LOG(CE_NOTE, "iser_ib_init_hcas: failed to initialize " 1141 "any HCAs"); 1142 1143 mutex_exit(&iser_state->is_hcalist_lock); 1144 (void) iser_ib_fini_hcas(); 1145 return (DDI_FAILURE); 1146 } 1147 mutex_exit(&iser_state->is_hcalist_lock); 1148 1149 return (DDI_SUCCESS); 1150 } 1151 1152 /* 1153 * iser_ib_fini_hcas 1154 * 1155 * Teardown the iSER HCA list initialized above. 1156 */ 1157 static int 1158 iser_ib_fini_hcas(void) 1159 { 1160 iser_hca_t *nexthca, *hca; 1161 int status; 1162 1163 mutex_enter(&iser_state->is_hcalist_lock); 1164 for (hca = list_head(&iser_state->is_hcalist); 1165 hca != NULL; 1166 hca = nexthca) { 1167 1168 nexthca = list_next(&iser_state->is_hcalist, hca); 1169 1170 list_remove(&iser_state->is_hcalist, hca); 1171 1172 status = iser_ib_free_hca(hca); 1173 if (status != IBT_SUCCESS) { 1174 ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to free " 1175 "HCA during fini"); 1176 list_insert_tail(&iser_state->is_hcalist, hca); 1177 return (DDI_FAILURE); 1178 } 1179 1180 iser_state->is_num_hcas--; 1181 1182 } 1183 mutex_exit(&iser_state->is_hcalist_lock); 1184 list_destroy(&iser_state->is_hcalist); 1185 mutex_destroy(&iser_state->is_hcalist_lock); 1186 1187 return (DDI_SUCCESS); 1188 } 1189 1190 /* 1191 * iser_ib_alloc_hca 1192 * 1193 * This function opens the given HCA device, gathers the HCA state information 1194 * and adds the HCA handle 1195 */ 1196 static iser_hca_t * 1197 iser_ib_alloc_hca(ib_guid_t guid) 1198 { 1199 iser_hca_t *hca; 1200 int status; 1201 1202 /* Allocate an iser_hca_t HCA handle */ 1203 hca = (iser_hca_t *)kmem_zalloc(sizeof (iser_hca_t), KM_SLEEP); 1204 1205 /* Open this HCA */ 1206 status = ibt_open_hca(iser_state->is_ibhdl, guid, &hca->hca_hdl); 1207 if (status != IBT_SUCCESS) { 1208 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_open_hca failed:" 1209 " guid (0x%llx) status (0x%x)", (longlong_t)guid, status); 1210 kmem_free(hca, sizeof (iser_hca_t)); 1211 return (NULL); 1212 } 1213 1214 hca->hca_guid = guid; 1215 hca->hca_clnt_hdl = iser_state->is_ibhdl; 1216 1217 /* Query the HCA */ 1218 status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr); 1219 if (status != IBT_SUCCESS) { 1220 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_query_hca " 1221 "failure: guid (0x%llx) status (0x%x)", 1222 (longlong_t)guid, status); 1223 (void) ibt_close_hca(hca->hca_hdl); 1224 kmem_free(hca, sizeof (iser_hca_t)); 1225 return (NULL); 1226 } 1227 1228 /* Query all ports on the HCA */ 1229 status = ibt_query_hca_ports(hca->hca_hdl, 0, 1230 &hca->hca_port_info, &hca->hca_num_ports, 1231 &hca->hca_port_info_sz); 1232 if (status != IBT_SUCCESS) { 1233 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: " 1234 "ibt_query_hca_ports failure: guid (0x%llx) " 1235 "status (0x%x)", (longlong_t)guid, status); 1236 (void) ibt_close_hca(hca->hca_hdl); 1237 kmem_free(hca, sizeof (iser_hca_t)); 1238 return (NULL); 1239 } 1240 1241 /* Allocate a single PD on this HCA */ 1242 status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, 1243 &hca->hca_pdhdl); 1244 if (status != IBT_SUCCESS) { 1245 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_alloc_pd " 1246 "failure: guid (0x%llx) status (0x%x)", 1247 (longlong_t)guid, status); 1248 (void) ibt_close_hca(hca->hca_hdl); 1249 ibt_free_portinfo(hca->hca_port_info, hca->hca_port_info_sz); 1250 kmem_free(hca, sizeof (iser_hca_t)); 1251 return (NULL); 1252 } 1253 1254 /* Initialize the message and data MR caches for this HCA */ 1255 iser_init_hca_caches(hca); 1256 1257 return (hca); 1258 } 1259 1260 static int 1261 iser_ib_free_hca(iser_hca_t *hca) 1262 { 1263 int status; 1264 ibt_hca_portinfo_t *hca_port_info; 1265 uint_t hca_port_info_sz; 1266 1267 ASSERT(hca != NULL); 1268 if (hca->hca_failed) 1269 return (DDI_FAILURE); 1270 1271 hca_port_info = hca->hca_port_info; 1272 hca_port_info_sz = hca->hca_port_info_sz; 1273 1274 /* 1275 * Free the memory regions before freeing 1276 * the associated protection domain 1277 */ 1278 iser_fini_hca_caches(hca); 1279 1280 status = ibt_free_pd(hca->hca_hdl, hca->hca_pdhdl); 1281 if (status != IBT_SUCCESS) { 1282 ISER_LOG(CE_NOTE, "iser_ib_free_hca: failed to free PD " 1283 "status=0x%x", status); 1284 goto out_caches; 1285 } 1286 1287 status = ibt_close_hca(hca->hca_hdl); 1288 if (status != IBT_SUCCESS) { 1289 ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to close HCA " 1290 "status=0x%x", status); 1291 goto out_pd; 1292 } 1293 1294 ibt_free_portinfo(hca_port_info, hca_port_info_sz); 1295 1296 kmem_free(hca, sizeof (iser_hca_t)); 1297 return (DDI_SUCCESS); 1298 1299 /* 1300 * We only managed to partially tear down the HCA, try to put it back 1301 * like it was before returning. 1302 */ 1303 out_pd: 1304 status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, &hca->hca_pdhdl); 1305 if (status != IBT_SUCCESS) { 1306 hca->hca_failed = B_TRUE; 1307 /* Report error and exit */ 1308 ISER_LOG(CE_NOTE, "iser_ib_free_hca: could not re-alloc PD " 1309 "status=0x%x", status); 1310 return (DDI_FAILURE); 1311 } 1312 1313 out_caches: 1314 iser_init_hca_caches(hca); 1315 1316 return (DDI_FAILURE); 1317 } 1318 1319 static int 1320 iser_ib_update_hcaports(iser_hca_t *hca) 1321 { 1322 ibt_hca_portinfo_t *pinfop, *oldpinfop; 1323 uint_t size, oldsize, nport; 1324 int status; 1325 1326 ASSERT(hca != NULL); 1327 1328 status = ibt_query_hca_ports(hca->hca_hdl, 0, &pinfop, &nport, &size); 1329 if (status != IBT_SUCCESS) { 1330 ISER_LOG(CE_NOTE, "ibt_query_hca_ports failed: %d", status); 1331 return (status); 1332 } 1333 1334 oldpinfop = hca->hca_port_info; 1335 oldsize = hca->hca_port_info_sz; 1336 hca->hca_port_info = pinfop; 1337 hca->hca_port_info_sz = size; 1338 1339 (void) ibt_free_portinfo(oldpinfop, oldsize); 1340 1341 return (IBT_SUCCESS); 1342 } 1343 1344 /* 1345 * iser_ib_gid2hca 1346 * Given a gid, find the corresponding hca 1347 */ 1348 iser_hca_t * 1349 iser_ib_gid2hca(ib_gid_t gid) 1350 { 1351 1352 iser_hca_t *hca; 1353 int i; 1354 1355 mutex_enter(&iser_state->is_hcalist_lock); 1356 for (hca = list_head(&iser_state->is_hcalist); 1357 hca != NULL; 1358 hca = list_next(&iser_state->is_hcalist, hca)) { 1359 1360 for (i = 0; i < hca->hca_num_ports; i++) { 1361 if ((hca->hca_port_info[i].p_sgid_tbl[0].gid_prefix == 1362 gid.gid_prefix) && 1363 (hca->hca_port_info[i].p_sgid_tbl[0].gid_guid == 1364 gid.gid_guid)) { 1365 1366 mutex_exit(&iser_state->is_hcalist_lock); 1367 1368 return (hca); 1369 } 1370 } 1371 } 1372 mutex_exit(&iser_state->is_hcalist_lock); 1373 return (NULL); 1374 } 1375 1376 /* 1377 * iser_ib_guid2hca 1378 * Given a HCA guid, find the corresponding HCA 1379 */ 1380 iser_hca_t * 1381 iser_ib_guid2hca(ib_guid_t guid) 1382 { 1383 1384 iser_hca_t *hca; 1385 1386 mutex_enter(&iser_state->is_hcalist_lock); 1387 for (hca = list_head(&iser_state->is_hcalist); 1388 hca != NULL; 1389 hca = list_next(&iser_state->is_hcalist, hca)) { 1390 1391 if (hca->hca_guid == guid) { 1392 mutex_exit(&iser_state->is_hcalist_lock); 1393 return (hca); 1394 } 1395 } 1396 mutex_exit(&iser_state->is_hcalist_lock); 1397 return (NULL); 1398 } 1399 1400 /* 1401 * iser_ib_conv_sockaddr2ibtaddr 1402 * This function converts a socket address into the IBT format 1403 */ 1404 void iser_ib_conv_sockaddr2ibtaddr( 1405 idm_sockaddr_t *saddr, ibt_ip_addr_t *ibt_addr) 1406 { 1407 if (saddr == NULL) { 1408 ibt_addr->family = AF_UNSPEC; 1409 ibt_addr->un.ip4addr = 0; 1410 } else { 1411 switch (saddr->sin.sa_family) { 1412 case AF_INET: 1413 1414 ibt_addr->family = saddr->sin4.sin_family; 1415 ibt_addr->un.ip4addr = saddr->sin4.sin_addr.s_addr; 1416 break; 1417 1418 case AF_INET6: 1419 1420 ibt_addr->family = saddr->sin6.sin6_family; 1421 ibt_addr->un.ip6addr = saddr->sin6.sin6_addr; 1422 break; 1423 1424 default: 1425 ibt_addr->family = AF_UNSPEC; 1426 } 1427 1428 } 1429 } 1430 1431 /* 1432 * iser_ib_conv_ibtaddr2sockaddr 1433 * This function converts an IBT ip address handle to a sockaddr 1434 */ 1435 void iser_ib_conv_ibtaddr2sockaddr(struct sockaddr_storage *ss, 1436 ibt_ip_addr_t *ibt_addr, in_port_t port) 1437 { 1438 struct sockaddr_in *sin; 1439 struct sockaddr_in6 *sin6; 1440 1441 switch (ibt_addr->family) { 1442 case AF_INET: 1443 case AF_UNSPEC: 1444 1445 sin = (struct sockaddr_in *)ibt_addr; 1446 sin->sin_port = ntohs(port); 1447 bcopy(sin, ss, sizeof (struct sockaddr_in)); 1448 break; 1449 1450 case AF_INET6: 1451 1452 sin6 = (struct sockaddr_in6 *)ibt_addr; 1453 sin6->sin6_port = ntohs(port); 1454 bcopy(sin6, ss, sizeof (struct sockaddr_in6)); 1455 break; 1456 1457 default: 1458 ISER_LOG(CE_NOTE, "iser_ib_conv_ibtaddr2sockaddr: " 1459 "unknown family type: 0x%x", ibt_addr->family); 1460 } 1461 } 1462 1463 /* 1464 * iser_ib_setup_cq 1465 * This function sets up the Completion Queue size and allocates the specified 1466 * Completion Queue 1467 */ 1468 static int 1469 iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, ibt_cq_hdl_t *cq_hdl) 1470 { 1471 1472 ibt_cq_attr_t cq_attr; 1473 int status; 1474 1475 cq_attr.cq_size = cq_size; 1476 cq_attr.cq_sched = 0; 1477 cq_attr.cq_flags = IBT_CQ_NO_FLAGS; 1478 1479 /* Allocate a Completion Queue */ 1480 status = ibt_alloc_cq(hca_hdl, &cq_attr, cq_hdl, NULL); 1481 if (status != IBT_SUCCESS) { 1482 ISER_LOG(CE_NOTE, "iser_ib_setup_cq: ibt_alloc_cq failure (%d)", 1483 status); 1484 return (status); 1485 } 1486 1487 return (ISER_STATUS_SUCCESS); 1488 } 1489 1490 /* 1491 * iser_ib_setup_chanargs 1492 * 1493 */ 1494 static void 1495 iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl, 1496 ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size, 1497 ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs) 1498 { 1499 1500 bzero(cargs, sizeof (ibt_rc_chan_alloc_args_t)); 1501 1502 /* 1503 * Set up the size of the channels send queue, receive queue and the 1504 * maximum number of elements in a scatter gather list of work requests 1505 * posted to the send and receive queues. 1506 */ 1507 cargs->rc_sizes.cs_sq = sq_size; 1508 cargs->rc_sizes.cs_rq = rq_size; 1509 cargs->rc_sizes.cs_sq_sgl = ISER_IB_SGLIST_SIZE; 1510 cargs->rc_sizes.cs_rq_sgl = ISER_IB_SGLIST_SIZE; 1511 1512 /* 1513 * All Work requests signaled on a WR basis will receive a send 1514 * request completion. 1515 */ 1516 cargs->rc_flags = IBT_ALL_SIGNALED; 1517 1518 /* Enable RDMA read and RDMA write on the channel end points */ 1519 cargs->rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR; 1520 1521 /* Set the local hca port on which the channel is allocated */ 1522 cargs->rc_hca_port_num = hca_port; 1523 1524 /* Set the Send and Receive Completion Queue handles */ 1525 cargs->rc_scq = scq_hdl; 1526 cargs->rc_rcq = rcq_hdl; 1527 1528 /* Set the protection domain associated with the channel */ 1529 cargs->rc_pd = hca_pdhdl; 1530 1531 /* No SRQ usage */ 1532 cargs->rc_srq = NULL; 1533 } 1534 1535 /* 1536 * iser_ib_init_qp 1537 * Initialize the QP handle 1538 */ 1539 void 1540 iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size) 1541 { 1542 /* Initialize the handle lock */ 1543 mutex_init(&chan->ic_qp.qp_lock, NULL, MUTEX_DRIVER, NULL); 1544 1545 /* Record queue sizes */ 1546 chan->ic_qp.sq_size = sq_size; 1547 chan->ic_qp.rq_size = rq_size; 1548 1549 /* Initialize the RQ monitoring data */ 1550 chan->ic_qp.rq_depth = rq_size; 1551 chan->ic_qp.rq_level = 0; 1552 chan->ic_qp.rq_lwm = (chan->ic_recvcq_sz * ISER_IB_RQ_LWM_PCT) / 100; 1553 1554 /* Initialize the taskq flag */ 1555 chan->ic_qp.rq_taskqpending = B_FALSE; 1556 } 1557 1558 /* 1559 * iser_ib_fini_qp 1560 * Teardown the QP handle 1561 */ 1562 void 1563 iser_ib_fini_qp(iser_qp_t *qp) 1564 { 1565 /* Destroy the handle lock */ 1566 mutex_destroy(&qp->qp_lock); 1567 } 1568 1569 static int 1570 iser_ib_activate_port(idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid) 1571 { 1572 iser_svc_t *iser_svc; 1573 iser_sbind_t *is_sbind; 1574 int status; 1575 1576 iser_svc = idm_svc->is_iser_svc; 1577 1578 /* 1579 * Save the address of the service bind handle in the 1580 * iser_svc_t to undo the service binding at a later time 1581 */ 1582 is_sbind = kmem_zalloc(sizeof (iser_sbind_t), KM_SLEEP); 1583 is_sbind->is_gid = gid; 1584 is_sbind->is_guid = guid; 1585 1586 status = ibt_bind_service(iser_svc->is_srvhdl, gid, NULL, 1587 idm_svc, &is_sbind->is_sbindhdl); 1588 1589 if (status != IBT_SUCCESS) { 1590 ISER_LOG(CE_NOTE, "iser_ib_activate_port: status(0x%x): " 1591 "Bind service(%llx) on port(%llx:%llx) failed", 1592 status, (longlong_t)iser_svc->is_svcid, 1593 (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid); 1594 1595 kmem_free(is_sbind, sizeof (iser_sbind_t)); 1596 1597 return (status); 1598 } 1599 1600 list_insert_tail(&iser_svc->is_sbindlist, is_sbind); 1601 1602 return (IBT_SUCCESS); 1603 } 1604 1605 static void 1606 iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid) 1607 { 1608 iser_svc_t *iser_svc; 1609 iser_conn_t *iser_conn; 1610 iser_sbind_t *is_sbind; 1611 idm_conn_t *idm_conn; 1612 1613 /* 1614 * Iterate through the global list of IDM target connections. 1615 * Issue a TRANSPORT_FAIL for any connections on this port, and 1616 * if there is a bound service running on the port, tear it down. 1617 */ 1618 mutex_enter(&idm.idm_global_mutex); 1619 for (idm_conn = list_head(&idm.idm_tgt_conn_list); 1620 idm_conn != NULL; 1621 idm_conn = list_next(&idm.idm_tgt_conn_list, idm_conn)) { 1622 1623 if (idm_conn->ic_transport_type != IDM_TRANSPORT_TYPE_ISER) { 1624 /* this is not an iSER connection, skip it */ 1625 continue; 1626 } 1627 1628 iser_conn = idm_conn->ic_transport_private; 1629 if (iser_conn->ic_chan->ic_ibt_path.pi_hca_guid != hca_guid) { 1630 /* this iSER connection is on a different port */ 1631 continue; 1632 } 1633 1634 /* Fail the transport for this connection */ 1635 idm_conn_event(idm_conn, CE_TRANSPORT_FAIL, IDM_STATUS_FAIL); 1636 1637 if (idm_conn->ic_conn_type == CONN_TYPE_INI) { 1638 /* initiator connection, nothing else to do */ 1639 continue; 1640 } 1641 1642 /* Check for a service binding */ 1643 iser_svc = idm_conn->ic_svc_binding->is_iser_svc; 1644 is_sbind = iser_ib_get_bind(iser_svc, hca_guid, gid); 1645 if (is_sbind != NULL) { 1646 /* This service is still bound, tear it down */ 1647 ibt_unbind_service(iser_svc->is_srvhdl, 1648 is_sbind->is_sbindhdl); 1649 list_remove(&iser_svc->is_sbindlist, is_sbind); 1650 kmem_free(is_sbind, sizeof (iser_sbind_t)); 1651 } 1652 } 1653 mutex_exit(&idm.idm_global_mutex); 1654 } 1655 1656 static iser_sbind_t * 1657 iser_ib_get_bind(iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid) 1658 { 1659 iser_sbind_t *is_sbind; 1660 1661 for (is_sbind = list_head(&iser_svc->is_sbindlist); 1662 is_sbind != NULL; 1663 is_sbind = list_next(&iser_svc->is_sbindlist, is_sbind)) { 1664 1665 if ((is_sbind->is_guid == hca_guid) && 1666 (is_sbind->is_gid.gid_prefix == gid.gid_prefix) && 1667 (is_sbind->is_gid.gid_guid == gid.gid_guid)) { 1668 return (is_sbind); 1669 } 1670 } 1671 return (NULL); 1672 } 1673