1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 /* 58 * Sun elects to include this software in Sun product 59 * under the OpenIB BSD license. 60 * 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 72 * POSSIBILITY OF SUCH DAMAGE. 73 */ 74 75 #pragma ident "%Z%%M% %I% %E% SMI" 76 77 #include <sys/stream.h> 78 #include <sys/ib/clients/rds/rdsib_cm.h> 79 #include <sys/ib/clients/rds/rdsib_ib.h> 80 #include <sys/ib/clients/rds/rdsib_buf.h> 81 #include <sys/ib/clients/rds/rdsib_ep.h> 82 #include <sys/ib/clients/rds/rds_kstat.h> 83 #include <sys/zone.h> 84 85 #define RDS_POLL_CQ_IN_2TICKS 1 86 87 /* 88 * This File contains the endpoint related calls 89 */ 90 91 extern int rds_get_ibaddr(ipaddr_t, ipaddr_t, ib_gid_t *, ib_gid_t *); 92 extern boolean_t rds_islocal(ipaddr_t addr); 93 extern uint_t rds_wc_signal; 94 95 static uint8_t 96 rds_is_port_marked(rds_session_t *sp, in_port_t port) 97 { 98 uint8_t ret; 99 100 if (sp != NULL) { 101 rw_enter(&sp->session_portmap_lock, RW_READER); 102 ret = (sp->session_portmap[port/8] & (1 << (port % 8))); 103 rw_exit(&sp->session_portmap_lock); 104 } else { 105 rw_enter(&rds_local_portmap_lock, RW_READER); 106 ret = (rds_local_portmap[port/8] & (1 << (port % 8))); 107 rw_exit(&rds_local_portmap_lock); 108 } 109 110 return (ret); 111 } 112 113 static uint8_t 114 rds_check_n_mark_port(rds_session_t *sp, in_port_t port) 115 { 116 uint8_t ret; 117 118 if (sp != NULL) { 119 rw_enter(&sp->session_portmap_lock, RW_WRITER); 120 ret = (sp->session_portmap[port/8] & (1 << (port % 8))); 121 if (!ret) { 122 /* port is not marked, mark it */ 123 sp->session_portmap[port/8] = 124 sp->session_portmap[port/8] | (1 << (port % 8)); 125 } 126 rw_exit(&sp->session_portmap_lock); 127 } else { 128 rw_enter(&rds_local_portmap_lock, RW_WRITER); 129 ret = (rds_local_portmap[port/8] & (1 << (port % 8))); 130 if (!ret) { 131 /* port is not marked, mark it */ 132 rds_local_portmap[port/8] = 133 rds_local_portmap[port/8] | (1 << (port % 8)); 134 } 135 rw_exit(&rds_local_portmap_lock); 136 } 137 138 return (ret); 139 } 140 141 static uint8_t 142 rds_check_n_unmark_port(rds_session_t *sp, in_port_t port) 143 { 144 uint8_t ret; 145 146 if (sp != NULL) { 147 rw_enter(&sp->session_portmap_lock, RW_WRITER); 148 ret = (sp->session_portmap[port/8] & (1 << (port % 8))); 149 if (ret) { 150 /* port is marked, unmark it */ 151 sp->session_portmap[port/8] = 152 sp->session_portmap[port/8] & ~(1 << (port % 8)); 153 } 154 rw_exit(&sp->session_portmap_lock); 155 } else { 156 rw_enter(&rds_local_portmap_lock, RW_WRITER); 157 ret = (rds_local_portmap[port/8] & (1 << (port % 8))); 158 if (ret) { 159 /* port is marked, unmark it */ 160 rds_local_portmap[port/8] = 161 rds_local_portmap[port/8] & ~(1 << (port % 8)); 162 } 163 rw_exit(&rds_local_portmap_lock); 164 } 165 166 return (ret); 167 } 168 169 static void 170 rds_mark_all_ports(rds_session_t *sp) 171 { 172 if (sp != NULL) { 173 rw_enter(&sp->session_portmap_lock, RW_WRITER); 174 (void) memset(sp->session_portmap, 0xFF, RDS_PORT_MAP_SIZE); 175 rw_exit(&sp->session_portmap_lock); 176 } else { 177 rw_enter(&rds_local_portmap_lock, RW_WRITER); 178 (void) memset(rds_local_portmap, 0xFF, RDS_PORT_MAP_SIZE); 179 rw_exit(&rds_local_portmap_lock); 180 } 181 } 182 183 static void 184 rds_unmark_all_ports(rds_session_t *sp) 185 { 186 if (sp != NULL) { 187 rw_enter(&sp->session_portmap_lock, RW_WRITER); 188 bzero(sp->session_portmap, RDS_PORT_MAP_SIZE); 189 rw_exit(&sp->session_portmap_lock); 190 } else { 191 rw_enter(&rds_local_portmap_lock, RW_WRITER); 192 bzero(rds_local_portmap, RDS_PORT_MAP_SIZE); 193 rw_exit(&rds_local_portmap_lock); 194 } 195 } 196 197 static void 198 rds_add_session(rds_session_t *sp, boolean_t locked) 199 { 200 RDS_DPRINTF2("rds_add_session", "Enter: SP(%p)", sp); 201 202 if (!locked) { 203 rw_enter(&rdsib_statep->rds_sessionlock, RW_WRITER); 204 } 205 206 sp->session_nextp = rdsib_statep->rds_sessionlistp; 207 rdsib_statep->rds_sessionlistp = sp; 208 rdsib_statep->rds_nsessions++; 209 210 if (!locked) { 211 rw_exit(&rdsib_statep->rds_sessionlock); 212 } 213 RDS_INCR_SESS(); 214 215 RDS_DPRINTF2("rds_add_session", "Return: SP(%p)", sp); 216 } 217 218 /* Session lookup based on destination IP or destination node guid */ 219 rds_session_t * 220 rds_session_lkup(rds_state_t *statep, ipaddr_t remoteip, ib_guid_t node_guid) 221 { 222 rds_session_t *sp; 223 224 RDS_DPRINTF4("rds_session_lkup", "Enter: 0x%p 0x%x 0x%llx", statep, 225 remoteip, node_guid); 226 227 /* A read/write lock is expected, will panic if none of them are held */ 228 ASSERT(rw_lock_held(&statep->rds_sessionlock)); 229 sp = statep->rds_sessionlistp; 230 while (sp) { 231 if ((sp->session_rgid.gid_guid == node_guid) || 232 (sp->session_remip == remoteip)) { 233 break; 234 } 235 236 sp = sp->session_nextp; 237 } 238 239 RDS_DPRINTF4("rds_session_lkup", "Return: SP(%p)", sp); 240 241 return (sp); 242 } 243 244 static void 245 rds_ep_fini(rds_ep_t *ep) 246 { 247 RDS_DPRINTF3("rds_ep_fini", "Enter: EP(%p) type: %d", ep, ep->ep_type); 248 249 /* free send pool */ 250 rds_free_send_pool(ep); 251 252 /* free recv pool */ 253 rds_free_recv_pool(ep); 254 255 RDS_DPRINTF3("rds_ep_fini", "Return EP(%p)", ep); 256 } 257 258 /* Assumes SP write lock is held */ 259 int 260 rds_ep_init(rds_ep_t *ep) 261 { 262 uint_t ret; 263 264 RDS_DPRINTF3("rds_ep_init", "Enter: EP(%p) Type: %d", ep, ep->ep_type); 265 266 /* send pool */ 267 ret = rds_init_send_pool(ep); 268 if (ret != 0) { 269 RDS_DPRINTF2(LABEL, "EP(%p): rds_init_send_pool failed: %d", 270 ep, ret); 271 return (-1); 272 } 273 274 /* recv pool */ 275 ret = rds_init_recv_pool(ep); 276 if (ret != 0) { 277 RDS_DPRINTF2(LABEL, "EP(%p): rds_init_recv_pool failed: %d", 278 ep, ret); 279 rds_free_send_pool(ep); 280 return (-1); 281 } 282 283 /* reset the ep state */ 284 mutex_enter(&ep->ep_lock); 285 ep->ep_state = RDS_EP_STATE_UNCONNECTED; 286 ep->ep_lbufid = NULL; 287 ep->ep_rbufid = NULL; 288 ep->ep_segfbp = NULL; 289 ep->ep_seglbp = NULL; 290 291 /* Initialize the WR to send acknowledgements */ 292 ep->ep_ackwr.wr_id = RDS_RDMAW_WRID; 293 ep->ep_ackwr.wr_flags = IBT_WR_SEND_SOLICIT; 294 ep->ep_ackwr.wr_trans = IBT_RC_SRV; 295 ep->ep_ackwr.wr_opcode = IBT_WRC_RDMAW; 296 ep->ep_ackwr.wr_nds = 1; 297 ep->ep_ackwr.wr_sgl = &ep->ep_ackds; 298 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = NULL; 299 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = 0; 300 mutex_exit(&ep->ep_lock); 301 302 RDS_DPRINTF3("rds_ep_init", "Return: EP(%p) type: %d", ep, ep->ep_type); 303 304 return (0); 305 } 306 307 static int 308 rds_ep_reinit(rds_ep_t *ep, ib_guid_t hca_guid) 309 { 310 int ret; 311 312 RDS_DPRINTF3("rds_ep_reinit", "Enter: EP(%p) Type: %d", 313 ep, ep->ep_type); 314 315 /* Re-initialize send pool */ 316 ret = rds_reinit_send_pool(ep, hca_guid); 317 if (ret != 0) { 318 RDS_DPRINTF2("rds_ep_reinit", 319 "EP(%p): rds_reinit_send_pool failed: %d", ep, ret); 320 return (-1); 321 } 322 323 /* free all the receive buffers in the pool */ 324 rds_free_recv_pool(ep); 325 326 RDS_DPRINTF3("rds_ep_reinit", "Return: EP(%p) Type: %d", 327 ep, ep->ep_type); 328 329 return (0); 330 } 331 332 void 333 rds_session_fini(rds_session_t *sp) 334 { 335 RDS_DPRINTF2("rds_session_fini", "Enter: SP(0x%p)", sp); 336 337 rds_ep_fini(&sp->session_dataep); 338 rds_ep_fini(&sp->session_ctrlep); 339 340 RDS_DPRINTF2("rds_session_fini", "Return: SP(0x%p)", sp); 341 } 342 343 /* 344 * Allocate and initialize the resources needed for the control and 345 * data channels 346 */ 347 int 348 rds_session_init(rds_session_t *sp) 349 { 350 int ret; 351 352 RDS_DPRINTF2("rds_session_init", "Enter: SP(0x%p)", sp); 353 354 /* CALLED WITH SESSION WRITE LOCK */ 355 356 /* allocate and initialize the ctrl channel */ 357 ret = rds_ep_init(&sp->session_ctrlep); 358 if (ret != 0) { 359 RDS_DPRINTF2(LABEL, "SP(%p): Ctrl EP(%p) initialization " 360 "failed", sp, &sp->session_ctrlep); 361 return (-1); 362 } 363 364 RDS_DPRINTF2(LABEL, "SP(%p) Control EP(%p)", sp, &sp->session_ctrlep); 365 366 /* allocate and initialize the data channel */ 367 ret = rds_ep_init(&sp->session_dataep); 368 if (ret != 0) { 369 RDS_DPRINTF2(LABEL, "SP(%p): Data EP(%p) initialization " 370 "failed", sp, &sp->session_dataep); 371 rds_ep_fini(&sp->session_ctrlep); 372 return (-1); 373 } 374 375 RDS_DPRINTF2(LABEL, "SP(%p) Data EP(%p)", sp, &sp->session_dataep); 376 377 RDS_DPRINTF2("rds_session_init", "Return"); 378 379 return (0); 380 } 381 382 /* 383 * This should be called before moving a session from ERROR state to 384 * INIT state. This will update the HCA keys incase the session has moved from 385 * one HCA to another. 386 */ 387 int 388 rds_session_reinit(rds_session_t *sp, ib_gid_t lgid) 389 { 390 rds_hca_t *hcap, *hcap1; 391 int ret; 392 393 RDS_DPRINTF2("rds_session_reinit", "Enter: SP(0x%p)", sp); 394 395 /* CALLED WITH SESSION WRITE LOCK */ 396 397 hcap = rds_gid_to_hcap(rdsib_statep, lgid); 398 if (hcap == NULL) { 399 RDS_DPRINTF1("rds_session_reinit", "SGID is on an " 400 "uninitialized HCA: %llx", lgid.gid_guid); 401 return (-1); 402 } 403 404 hcap1 = rds_gid_to_hcap(rdsib_statep, sp->session_lgid); 405 if (hcap1 == NULL) { 406 RDS_DPRINTF1("rds_session_reinit", "Seems like HCA %llx " 407 "is unplugged", sp->session_lgid.gid_guid); 408 } else if (hcap->hca_guid == hcap1->hca_guid) { 409 /* 410 * No action is needed as the session did not move across 411 * HCAs 412 */ 413 RDS_DPRINTF2("rds_session_reinit", "Failover on the same HCA"); 414 return (0); 415 } 416 417 RDS_DPRINTF2("rds_session_reinit", "Failover across HCAs"); 418 419 /* re-initialize the control channel */ 420 ret = rds_ep_reinit(&sp->session_ctrlep, hcap->hca_guid); 421 if (ret != 0) { 422 RDS_DPRINTF2("rds_session_reinit", 423 "SP(%p): Ctrl EP(%p) re-initialization failed", 424 sp, &sp->session_ctrlep); 425 return (-1); 426 } 427 428 RDS_DPRINTF2("rds_session_reinit", "SP(%p) Control EP(%p)", 429 sp, &sp->session_ctrlep); 430 431 /* re-initialize the data channel */ 432 ret = rds_ep_reinit(&sp->session_dataep, hcap->hca_guid); 433 if (ret != 0) { 434 RDS_DPRINTF2("rds_session_reinit", 435 "SP(%p): Data EP(%p) re-initialization failed", 436 sp, &sp->session_dataep); 437 return (-1); 438 } 439 440 RDS_DPRINTF2("rds_session_reinit", "SP(%p) Data EP(%p)", 441 sp, &sp->session_dataep); 442 443 sp->session_lgid = lgid; 444 445 RDS_DPRINTF2("rds_session_reinit", "Return: SP(0x%p)", sp); 446 447 return (0); 448 } 449 450 static int 451 rds_session_connect(rds_session_t *sp) 452 { 453 ibt_channel_hdl_t ctrlchan, datachan; 454 rds_ep_t *ep; 455 ibt_path_info_t pinfo; 456 ibt_path_attr_t pattr; 457 ib_gid_t lgid, rgid; 458 int ret; 459 460 RDS_DPRINTF2("rds_session_connect", "Enter SP(%p)", sp); 461 462 rw_enter(&sp->session_lock, RW_READER); 463 rgid = sp->session_rgid; 464 lgid = sp->session_lgid; 465 rw_exit(&sp->session_lock); 466 467 /* get paths to the destination */ 468 bzero(&pattr, sizeof (ibt_path_attr_t)); 469 pattr.pa_dgids = &rgid; 470 pattr.pa_sgid = lgid; 471 pattr.pa_sd_flags = IBT_NO_SDATA; 472 pattr.pa_num_dgids = 1; 473 ret = ibt_get_paths(rdsib_statep->rds_ibhdl, IBT_PATH_NO_FLAGS, 474 &pattr, 1, &pinfo, NULL); 475 if (ret != IBT_SUCCESS) { 476 RDS_DPRINTF2(LABEL, "ibt_get_paths failed: %d", ret); 477 return (-1); 478 } 479 pinfo.pi_sid = RDS_SERVICE_ID; 480 481 /* Override the packet life time based on the conf file */ 482 if (IBPktLifeTime != 0) { 483 pinfo.pi_prim_cep_path.cep_cm_opaque1 = IBPktLifeTime; 484 } 485 486 /* Session type may change if we run into peer-to-peer case. */ 487 rw_enter(&sp->session_lock, RW_READER); 488 if (sp->session_type == RDS_SESSION_PASSIVE) { 489 RDS_DPRINTF2("rds_session_connect", "SP(%p) is no longer the " 490 "active end", sp); 491 rw_exit(&sp->session_lock); 492 return (0); /* return success */ 493 } 494 rw_exit(&sp->session_lock); 495 496 /* connect the data ep first */ 497 ep = &sp->session_dataep; 498 mutex_enter(&ep->ep_lock); 499 if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) { 500 ep->ep_state = RDS_EP_STATE_ACTIVE_PENDING; 501 mutex_exit(&ep->ep_lock); 502 ret = rds_open_rc_channel(ep, &pinfo, IBT_BLOCKING, &datachan); 503 if (ret != IBT_SUCCESS) { 504 RDS_DPRINTF2(LABEL, "EP(%p): rds_open_rc_channel " 505 "failed: %d", ep, ret); 506 return (-1); 507 } 508 sp->session_dataep.ep_chanhdl = datachan; 509 } else { 510 RDS_DPRINTF2(LABEL, "SP(%p) Data EP(%p) is in " 511 "unexpected state: %d", sp, ep, ep->ep_state); 512 mutex_exit(&ep->ep_lock); 513 return (-1); 514 } 515 516 RDS_DPRINTF3(LABEL, "SP(%p) EP(%p): Data channel is connected", 517 sp, ep); 518 519 ep = &sp->session_ctrlep; 520 mutex_enter(&ep->ep_lock); 521 if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) { 522 ep->ep_state = RDS_EP_STATE_ACTIVE_PENDING; 523 mutex_exit(&ep->ep_lock); 524 ret = rds_open_rc_channel(ep, &pinfo, IBT_BLOCKING, &ctrlchan); 525 if (ret != IBT_SUCCESS) { 526 RDS_DPRINTF2(LABEL, "EP(%p): rds_open_rc_channel " 527 "failed: %d", ep, ret); 528 return (-1); 529 } 530 sp->session_ctrlep.ep_chanhdl = ctrlchan; 531 } else { 532 RDS_DPRINTF2(LABEL, "SP(%p) Control EP(%p) is in " 533 "unexpected state: %d", sp, ep, ep->ep_state); 534 mutex_exit(&ep->ep_lock); 535 return (-1); 536 } 537 538 RDS_DPRINTF2(LABEL, "Session (%p) 0x%x <--> 0x%x is CONNECTED", 539 sp, sp->session_myip, sp->session_remip); 540 541 RDS_DPRINTF2("rds_session_connect", "Return SP(%p)", sp); 542 543 return (0); 544 } 545 546 /* 547 * Can be called with or without session_lock. 548 */ 549 void 550 rds_session_close(rds_session_t *sp, ibt_execution_mode_t mode, uint_t wait) 551 { 552 rds_ep_t *ep; 553 554 RDS_DPRINTF2("rds_session_close", "SP(%p) State: %d", sp, 555 sp->session_state); 556 557 ep = &sp->session_dataep; 558 RDS_DPRINTF3(LABEL, "EP(%p) State: %d", ep, ep->ep_state); 559 560 /* wait until the SQ is empty before closing */ 561 (void) rds_is_sendq_empty(ep, wait); 562 563 mutex_enter(&ep->ep_lock); 564 while (ep->ep_state == RDS_EP_STATE_CLOSING) { 565 mutex_exit(&ep->ep_lock); 566 delay(drv_usectohz(300000)); 567 mutex_enter(&ep->ep_lock); 568 } 569 570 if (ep->ep_state == RDS_EP_STATE_CONNECTED) { 571 ep->ep_state = RDS_EP_STATE_CLOSING; 572 mutex_exit(&ep->ep_lock); 573 (void) rds_close_rc_channel(ep->ep_chanhdl, mode); 574 mutex_enter(&ep->ep_lock); 575 } 576 rds_ep_free_rc_channel(ep); 577 ep->ep_state = RDS_EP_STATE_UNCONNECTED; 578 ep->ep_segfbp = NULL; 579 ep->ep_seglbp = NULL; 580 mutex_exit(&ep->ep_lock); 581 582 ep = &sp->session_ctrlep; 583 RDS_DPRINTF3(LABEL, "EP(%p) State: %d", ep, ep->ep_state); 584 585 /* wait until the SQ is empty before closing */ 586 (void) rds_is_sendq_empty(ep, 1); 587 588 mutex_enter(&ep->ep_lock); 589 while (ep->ep_state == RDS_EP_STATE_CLOSING) { 590 mutex_exit(&ep->ep_lock); 591 delay(drv_usectohz(300000)); 592 mutex_enter(&ep->ep_lock); 593 } 594 595 if (ep->ep_state == RDS_EP_STATE_CONNECTED) { 596 mutex_exit(&ep->ep_lock); 597 ep->ep_state = RDS_EP_STATE_CLOSING; 598 (void) rds_close_rc_channel(ep->ep_chanhdl, mode); 599 mutex_enter(&ep->ep_lock); 600 } 601 rds_ep_free_rc_channel(ep); 602 ep->ep_state = RDS_EP_STATE_UNCONNECTED; 603 ep->ep_segfbp = NULL; 604 ep->ep_seglbp = NULL; 605 mutex_exit(&ep->ep_lock); 606 607 RDS_DPRINTF2("rds_session_close", "Return (%p)", sp); 608 } 609 610 /* Free the session */ 611 static void 612 rds_destroy_session(rds_session_t *sp) 613 { 614 rds_ep_t *ep; 615 rds_bufpool_t *pool; 616 617 ASSERT((sp->session_state == RDS_SESSION_STATE_CLOSED) || 618 (sp->session_state == RDS_SESSION_STATE_FAILED) || 619 (sp->session_state == RDS_SESSION_STATE_FINI) || 620 (sp->session_state == RDS_SESSION_STATE_PASSIVE_CLOSING)); 621 622 rw_enter(&sp->session_lock, RW_READER); 623 RDS_DPRINTF2("rds_destroy_session", "SP(%p) State: %d", sp, 624 sp->session_state); 625 while (!((sp->session_state == RDS_SESSION_STATE_CLOSED) || 626 (sp->session_state == RDS_SESSION_STATE_FAILED) || 627 (sp->session_state == RDS_SESSION_STATE_FINI))) { 628 rw_exit(&sp->session_lock); 629 delay(drv_usectohz(1000000)); 630 rw_enter(&sp->session_lock, RW_READER); 631 RDS_DPRINTF2("rds_destroy_session", "SP(%p) State: %d WAITING " 632 "ON SESSION", sp, sp->session_state); 633 } 634 rw_exit(&sp->session_lock); 635 636 /* data channel */ 637 ep = &sp->session_dataep; 638 639 /* send pool locks */ 640 pool = &ep->ep_sndpool; 641 cv_destroy(&pool->pool_cv); 642 mutex_destroy(&pool->pool_lock); 643 644 /* recv pool locks */ 645 pool = &ep->ep_rcvpool; 646 cv_destroy(&pool->pool_cv); 647 mutex_destroy(&pool->pool_lock); 648 mutex_destroy(&ep->ep_recvqp.qp_lock); 649 650 /* control channel */ 651 ep = &sp->session_ctrlep; 652 653 /* send pool locks */ 654 pool = &ep->ep_sndpool; 655 cv_destroy(&pool->pool_cv); 656 mutex_destroy(&pool->pool_lock); 657 658 /* recv pool locks */ 659 pool = &ep->ep_rcvpool; 660 cv_destroy(&pool->pool_cv); 661 mutex_destroy(&pool->pool_lock); 662 mutex_destroy(&ep->ep_recvqp.qp_lock); 663 664 /* session */ 665 rw_destroy(&sp->session_lock); 666 rw_destroy(&sp->session_portmap_lock); 667 668 /* free the session */ 669 kmem_free(sp, sizeof (rds_session_t)); 670 671 RDS_DPRINTF2("rds_destroy_session", "SP(%p) Return", sp); 672 } 673 674 /* This is called on the taskq thread */ 675 static void 676 rds_failover_session(void *arg) 677 { 678 rds_session_t *sp = (rds_session_t *)arg; 679 ib_gid_t lgid, rgid; 680 ipaddr_t myip, remip; 681 int ret, cnt = 0; 682 683 RDS_DPRINTF2("rds_failover_session", "Enter: (%p)", sp); 684 685 RDS_INCR_FAILOVERS(); 686 687 rw_enter(&sp->session_lock, RW_WRITER); 688 if (sp->session_type != RDS_SESSION_ACTIVE) { 689 /* 690 * The remote side must have seen the error and initiated 691 * a re-connect. 692 */ 693 RDS_DPRINTF2("rds_failover_session", 694 "SP(%p) has become passive", sp); 695 rw_exit(&sp->session_lock); 696 return; 697 } 698 sp->session_failover++; 699 rw_exit(&sp->session_lock); 700 701 /* 702 * The session is in ERROR state but close both channels 703 * for a clean start. 704 */ 705 rds_session_close(sp, IBT_BLOCKING, 1); 706 707 /* wait 1 sec before re-connecting */ 708 delay(drv_usectohz(1000000)); 709 710 do { 711 /* The ipaddr should be in the network order */ 712 myip = sp->session_myip; 713 remip = sp->session_remip; 714 ret = rds_sc_path_lookup(&myip, &remip); 715 if (ret == 0) { 716 RDS_DPRINTF2(LABEL, "Path not found (0x%x 0x%x)", 717 myip, remip); 718 } 719 /* check if we have (new) path from the source to destination */ 720 ret = rds_get_ibaddr(htonl(myip), htonl(remip), &lgid, &rgid); 721 if (ret == 0) { 722 break; 723 } 724 725 RDS_DPRINTF1(LABEL, "rds_get_ibaddr failed: %d", ret); 726 /* wait 1 sec before re-trying */ 727 delay(drv_usectohz(1000000)); 728 cnt++; 729 } while (cnt < 3); 730 731 if (ret != 0) { 732 rw_enter(&sp->session_lock, RW_WRITER); 733 if (sp->session_type == RDS_SESSION_ACTIVE) { 734 rds_session_fini(sp); 735 sp->session_state = RDS_SESSION_STATE_FAILED; 736 RDS_DPRINTF3("rds_failover_session", 737 "SP(%p) State RDS_SESSION_STATE_FAILED", sp); 738 } else { 739 RDS_DPRINTF2("rds_failover_session", 740 "SP(%p) has become passive", sp); 741 } 742 rw_exit(&sp->session_lock); 743 return; 744 } 745 746 RDS_DPRINTF2(LABEL, "lgid: %llx:%llx rgid: %llx:%llx", 747 lgid.gid_prefix, lgid.gid_guid, rgid.gid_prefix, 748 rgid.gid_guid); 749 750 rw_enter(&sp->session_lock, RW_WRITER); 751 if (sp->session_type != RDS_SESSION_ACTIVE) { 752 /* 753 * The remote side must have seen the error and initiated 754 * a re-connect. 755 */ 756 RDS_DPRINTF2("rds_failover_session", 757 "SP(%p) has become passive", sp); 758 rw_exit(&sp->session_lock); 759 return; 760 } 761 762 /* move the session to init state */ 763 ret = rds_session_reinit(sp, lgid); 764 sp->session_lgid = lgid; 765 sp->session_rgid = rgid; 766 if (ret != 0) { 767 rds_session_fini(sp); 768 sp->session_state = RDS_SESSION_STATE_FAILED; 769 RDS_DPRINTF3("rds_failover_session", 770 "SP(%p) State RDS_SESSION_STATE_FAILED", sp); 771 rw_exit(&sp->session_lock); 772 return; 773 } else { 774 sp->session_state = RDS_SESSION_STATE_INIT; 775 RDS_DPRINTF3("rds_failover_session", 776 "SP(%p) State RDS_SESSION_STATE_INIT", sp); 777 } 778 rw_exit(&sp->session_lock); 779 780 rds_session_open(sp); 781 782 RDS_DPRINTF2("rds_failover_session", "Return: (%p)", sp); 783 } 784 785 void 786 rds_handle_send_error(rds_ep_t *ep) 787 { 788 if (rds_is_sendq_empty(ep, 0)) { 789 /* Session should already be in ERROR, try to reconnect */ 790 RDS_DPRINTF2("rds_handle_send_error", 791 "Dispatching taskq to failover SP(%p)", ep->ep_sp); 792 (void) ddi_taskq_dispatch(rds_taskq, rds_failover_session, 793 (void *)ep->ep_sp, DDI_SLEEP); 794 } 795 } 796 797 /* 798 * Called in the CM handler on the passive side 799 * Called on a taskq thread. 800 */ 801 void 802 rds_cleanup_passive_session(void *arg) 803 { 804 rds_session_t *sp = arg; 805 806 RDS_DPRINTF2("rds_cleanup_passive_session", "SP(%p) State: %d", sp, 807 sp->session_state); 808 ASSERT((sp->session_state == RDS_SESSION_STATE_CLOSED) || 809 (sp->session_state == RDS_SESSION_STATE_ERROR)); 810 811 rds_session_close(sp, IBT_BLOCKING, 1); 812 813 rw_enter(&sp->session_lock, RW_WRITER); 814 if (sp->session_state == RDS_SESSION_STATE_CLOSED) { 815 rds_session_fini(sp); 816 sp->session_state = RDS_SESSION_STATE_FINI; 817 RDS_DPRINTF3("rds_cleanup_passive_session", 818 "SP(%p) State RDS_SESSION_STATE_FINI", sp); 819 } else if (sp->session_state == RDS_SESSION_STATE_ERROR) { 820 rds_session_fini(sp); 821 sp->session_state = RDS_SESSION_STATE_FAILED; 822 RDS_DPRINTF3("rds_cleanup_passive_session", 823 "SP(%p) State RDS_SESSION_STATE_FAILED", sp); 824 } 825 rw_exit(&sp->session_lock); 826 827 RDS_DPRINTF2("rds_cleanup_passive_session", "Return: SP (%p)", sp); 828 } 829 830 /* 831 * Called by the CM handler on the passive side 832 * Called with WRITE lock on the session 833 */ 834 void 835 rds_passive_session_fini(rds_session_t *sp) 836 { 837 rds_ep_t *ep; 838 839 RDS_DPRINTF2("rds_passive_session_fini", "SP(%p) State: %d", sp, 840 sp->session_state); 841 ASSERT((sp->session_state == RDS_SESSION_STATE_CLOSED) || 842 (sp->session_state == RDS_SESSION_STATE_ERROR)); 843 844 /* clean the data channel */ 845 ep = &sp->session_dataep; 846 (void) rds_is_sendq_empty(ep, 1); 847 mutex_enter(&ep->ep_lock); 848 RDS_DPRINTF2("rds_passive_session_fini", "EP(%p) State: %d", ep, 849 ep->ep_state); 850 rds_ep_free_rc_channel(ep); 851 mutex_exit(&ep->ep_lock); 852 853 /* clean the control channel */ 854 ep = &sp->session_ctrlep; 855 (void) rds_is_sendq_empty(ep, 1); 856 mutex_enter(&ep->ep_lock); 857 RDS_DPRINTF2("rds_passive_session_fini", "EP(%p) State: %d", ep, 858 ep->ep_state); 859 rds_ep_free_rc_channel(ep); 860 mutex_exit(&ep->ep_lock); 861 862 rds_session_fini(sp); 863 864 RDS_DPRINTF2("rds_passive_session_fini", "Return: SP (%p)", sp); 865 } 866 867 /* 868 * Can be called: 869 * 1. on driver detach 870 * 2. on taskq thread 871 * arg is always NULL 872 */ 873 /* ARGSUSED */ 874 void 875 rds_close_sessions(void *arg) 876 { 877 rds_session_t *sp, *spnextp; 878 879 RDS_DPRINTF2("rds_close_sessions", "Enter"); 880 881 /* wait until all the buffers are freed by the sockets */ 882 while (RDS_GET_RXPKTS_PEND() != 0) { 883 /* wait one second and try again */ 884 RDS_DPRINTF2("rds_close_sessions", "waiting on " 885 "pending packets", RDS_GET_RXPKTS_PEND()); 886 delay(drv_usectohz(1000000)); 887 } 888 RDS_DPRINTF2("rds_close_sessions", "No more RX packets pending"); 889 890 /* close all the sessions */ 891 rw_enter(&rdsib_statep->rds_sessionlock, RW_WRITER); 892 sp = rdsib_statep->rds_sessionlistp; 893 while (sp) { 894 rw_enter(&sp->session_lock, RW_WRITER); 895 RDS_DPRINTF2("rds_close_sessions", "SP(%p) State: %d", sp, 896 sp->session_state); 897 898 switch (sp->session_state) { 899 case RDS_SESSION_STATE_CONNECTED: 900 sp->session_state = RDS_SESSION_STATE_ACTIVE_CLOSING; 901 rw_exit(&sp->session_lock); 902 903 rds_session_close(sp, IBT_BLOCKING, 2); 904 905 rw_enter(&sp->session_lock, RW_WRITER); 906 sp->session_state = RDS_SESSION_STATE_CLOSED; 907 RDS_DPRINTF3("rds_close_sessions", 908 "SP(%p) State RDS_SESSION_STATE_CLOSED", sp); 909 rds_session_fini(sp); 910 sp->session_state = RDS_SESSION_STATE_FINI; 911 RDS_DPRINTF3("rds_close_sessions", 912 "SP(%p) State RDS_SESSION_STATE_FINI", sp); 913 break; 914 915 case RDS_SESSION_STATE_ERROR: 916 case RDS_SESSION_STATE_PASSIVE_CLOSING: 917 case RDS_SESSION_STATE_INIT: 918 sp->session_state = RDS_SESSION_STATE_ACTIVE_CLOSING; 919 rw_exit(&sp->session_lock); 920 921 rds_session_close(sp, IBT_BLOCKING, 1); 922 923 rw_enter(&sp->session_lock, RW_WRITER); 924 sp->session_state = RDS_SESSION_STATE_CLOSED; 925 RDS_DPRINTF3("rds_close_sessions", 926 "SP(%p) State RDS_SESSION_STATE_CLOSED", sp); 927 /* FALLTHRU */ 928 case RDS_SESSION_STATE_CLOSED: 929 rds_session_fini(sp); 930 sp->session_state = RDS_SESSION_STATE_FINI; 931 RDS_DPRINTF3("rds_close_sessions", 932 "SP(%p) State RDS_SESSION_STATE_FINI", sp); 933 break; 934 } 935 936 rw_exit(&sp->session_lock); 937 sp = sp->session_nextp; 938 } 939 940 sp = rdsib_statep->rds_sessionlistp; 941 rdsib_statep->rds_sessionlistp = NULL; 942 rdsib_statep->rds_nsessions = 0; 943 rw_exit(&rdsib_statep->rds_sessionlock); 944 945 while (sp) { 946 spnextp = sp->session_nextp; 947 rds_destroy_session(sp); 948 RDS_DECR_SESS(); 949 sp = spnextp; 950 } 951 952 /* free the global pool */ 953 rds_free_recv_caches(rdsib_statep); 954 955 RDS_DPRINTF2("rds_close_sessions", "Return"); 956 } 957 958 void 959 rds_session_open(rds_session_t *sp) 960 { 961 int ret; 962 963 RDS_DPRINTF2("rds_session_open", "Enter SP(%p)", sp); 964 965 ret = rds_session_connect(sp); 966 if (ret == -1) { 967 /* 968 * may be the session has become passive due to 969 * hitting peer-to-peer case 970 */ 971 rw_enter(&sp->session_lock, RW_READER); 972 if (sp->session_type == RDS_SESSION_PASSIVE) { 973 RDS_DPRINTF2("rds_session_open", "SP(%p) " 974 "has become passive from active", sp); 975 rw_exit(&sp->session_lock); 976 return; 977 } 978 979 /* get the lock for writing */ 980 rw_exit(&sp->session_lock); 981 rw_enter(&sp->session_lock, RW_WRITER); 982 sp->session_state = RDS_SESSION_STATE_ERROR; 983 RDS_DPRINTF3("rds_session_open", 984 "SP(%p) State RDS_SESSION_STATE_ERROR", sp); 985 rw_exit(&sp->session_lock); 986 987 /* Connect request failed */ 988 rds_session_close(sp, IBT_BLOCKING, 1); 989 990 rw_enter(&sp->session_lock, RW_WRITER); 991 rds_session_fini(sp); 992 sp->session_state = RDS_SESSION_STATE_FAILED; 993 RDS_DPRINTF3("rds_session_open", 994 "SP(%p) State RDS_SESSION_STATE_FAILED", sp); 995 rw_exit(&sp->session_lock); 996 997 return; 998 } 999 1000 RDS_DPRINTF2("rds_session_open", "Return: SP(%p)", sp); 1001 } 1002 1003 /* 1004 * Creates a session and inserts it into the list of sessions. The session 1005 * state would be CREATED. 1006 * Return Values: 1007 * EWOULDBLOCK 1008 */ 1009 rds_session_t * 1010 rds_session_create(rds_state_t *statep, ipaddr_t localip, ipaddr_t remip, 1011 ibt_cm_req_rcv_t *reqp, uint8_t type) 1012 { 1013 ib_gid_t lgid, rgid; 1014 rds_session_t *newp, *oldp; 1015 rds_ep_t *dataep, *ctrlep; 1016 rds_bufpool_t *pool; 1017 rds_hca_t *hcap; 1018 int ret; 1019 1020 RDS_DPRINTF2("rds_session_create", "Enter: 0x%p 0x%x 0x%x", 1021 statep, localip, remip); 1022 1023 /* Allocate and initialize global buffer pool */ 1024 ret = rds_init_recv_caches(statep); 1025 if (ret != 0) { 1026 RDS_DPRINTF2(LABEL, "Buffer Cache Initialization failed"); 1027 return (NULL); 1028 } 1029 1030 /* enough memory for session (includes 2 endpoints) */ 1031 newp = kmem_zalloc(sizeof (rds_session_t), KM_SLEEP); 1032 1033 newp->session_remip = remip; 1034 newp->session_myip = localip; 1035 newp->session_type = type; 1036 newp->session_state = RDS_SESSION_STATE_CREATED; 1037 RDS_DPRINTF3("rds_session_create", 1038 "SP(%p) State RDS_SESSION_STATE_CREATED", newp); 1039 rw_init(&newp->session_lock, NULL, RW_DRIVER, NULL); 1040 rw_init(&newp->session_portmap_lock, NULL, RW_DRIVER, NULL); 1041 1042 /* Initialize data endpoint */ 1043 dataep = &newp->session_dataep; 1044 dataep->ep_remip = newp->session_remip; 1045 dataep->ep_myip = newp->session_myip; 1046 dataep->ep_state = RDS_EP_STATE_UNCONNECTED; 1047 dataep->ep_sp = newp; 1048 dataep->ep_type = RDS_EP_TYPE_DATA; 1049 mutex_init(&dataep->ep_lock, NULL, MUTEX_DRIVER, NULL); 1050 1051 /* Initialize send pool locks */ 1052 pool = &dataep->ep_sndpool; 1053 mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL); 1054 cv_init(&pool->pool_cv, NULL, CV_DRIVER, NULL); 1055 1056 /* Initialize recv pool locks */ 1057 pool = &dataep->ep_rcvpool; 1058 mutex_init(&dataep->ep_recvqp.qp_lock, NULL, MUTEX_DRIVER, NULL); 1059 mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL); 1060 cv_init(&pool->pool_cv, NULL, CV_DRIVER, NULL); 1061 1062 /* Initialize control endpoint */ 1063 ctrlep = &newp->session_ctrlep; 1064 ctrlep->ep_remip = newp->session_remip; 1065 ctrlep->ep_myip = newp->session_myip; 1066 ctrlep->ep_state = RDS_EP_STATE_UNCONNECTED; 1067 ctrlep->ep_sp = newp; 1068 ctrlep->ep_type = RDS_EP_TYPE_CTRL; 1069 mutex_init(&ctrlep->ep_lock, NULL, MUTEX_DRIVER, NULL); 1070 1071 /* Initialize send pool locks */ 1072 pool = &ctrlep->ep_sndpool; 1073 mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL); 1074 cv_init(&pool->pool_cv, NULL, CV_DRIVER, NULL); 1075 1076 /* Initialize recv pool locks */ 1077 pool = &ctrlep->ep_rcvpool; 1078 mutex_init(&ctrlep->ep_recvqp.qp_lock, NULL, MUTEX_DRIVER, NULL); 1079 mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL); 1080 cv_init(&pool->pool_cv, NULL, CV_DRIVER, NULL); 1081 1082 /* lkup if there is already a session */ 1083 rw_enter(&statep->rds_sessionlock, RW_WRITER); 1084 oldp = rds_session_lkup(statep, remip, 0); 1085 if (oldp != NULL) { 1086 /* A session to this destination exists */ 1087 rw_exit(&statep->rds_sessionlock); 1088 rw_destroy(&newp->session_lock); 1089 rw_destroy(&newp->session_portmap_lock); 1090 mutex_destroy(&dataep->ep_lock); 1091 mutex_destroy(&ctrlep->ep_lock); 1092 kmem_free(newp, sizeof (rds_session_t)); 1093 return (NULL); 1094 } 1095 1096 /* Insert this session into the list */ 1097 rds_add_session(newp, B_TRUE); 1098 1099 /* unlock the session list */ 1100 rw_exit(&statep->rds_sessionlock); 1101 1102 if (type == RDS_SESSION_ACTIVE) { 1103 ipaddr_t localip1, remip1; 1104 1105 /* The ipaddr should be in the network order */ 1106 localip1 = localip; 1107 remip1 = remip; 1108 ret = rds_sc_path_lookup(&localip1, &remip1); 1109 if (ret == 0) { 1110 RDS_DPRINTF2(LABEL, "Path not found (0x%x 0x%x)", 1111 localip, remip); 1112 } 1113 1114 /* Get the gids for the source and destination ip addrs */ 1115 ret = rds_get_ibaddr(ntohl(localip1), ntohl(remip1), 1116 &lgid, &rgid); 1117 if (ret != 0) { 1118 RDS_DPRINTF1(LABEL, "rds_get_ibaddr failed: %d", ret); 1119 RDS_SESSION_TRANSITION(newp, RDS_SESSION_STATE_FAILED); 1120 return (NULL); 1121 } 1122 1123 RDS_DPRINTF2(LABEL, "lgid: %llx:%llx rgid: %llx:%llx", 1124 lgid.gid_prefix, lgid.gid_guid, rgid.gid_prefix, 1125 rgid.gid_guid); 1126 } 1127 1128 rw_enter(&newp->session_lock, RW_WRITER); 1129 /* check for peer-to-peer case */ 1130 if (type == newp->session_type) { 1131 /* no peer-to-peer case */ 1132 if (type == RDS_SESSION_ACTIVE) { 1133 newp->session_lgid = lgid; 1134 newp->session_rgid = rgid; 1135 } else { 1136 /* rgid is requester gid & lgid is receiver gid */ 1137 newp->session_rgid = reqp->req_prim_addr.av_dgid; 1138 newp->session_lgid = reqp->req_prim_addr.av_sgid; 1139 } 1140 1141 hcap = rds_gid_to_hcap(statep, newp->session_lgid); 1142 if (hcap == NULL) { 1143 RDS_DPRINTF1(LABEL, "SGID is on an uninitialized " 1144 "HCA: %llx", newp->session_lgid.gid_guid); 1145 newp->session_state = RDS_SESSION_STATE_FAILED; 1146 RDS_DPRINTF3("rds_session_create", 1147 "SP(%p) State RDS_SESSION_STATE_FAILED", newp); 1148 rw_exit(&newp->session_lock); 1149 return (NULL); 1150 } 1151 dataep->ep_hca_guid = hcap->hca_guid; 1152 ctrlep->ep_hca_guid = hcap->hca_guid; 1153 } 1154 rw_exit(&newp->session_lock); 1155 1156 RDS_DPRINTF2("rds_session_create", "Return SP(%p)", newp); 1157 1158 return (newp); 1159 } 1160 1161 void 1162 rds_handle_control_message(rds_session_t *sp, rds_ctrl_pkt_t *cpkt) 1163 { 1164 cpkt->rcp_port = cpkt->rcp_port; 1165 RDS_DPRINTF4("rds_handle_control_message", "Enter: SP(%p) code: %d " 1166 "port: %d", sp, cpkt->rcp_code, cpkt->rcp_port); 1167 1168 switch (cpkt->rcp_code) { 1169 case RDS_CTRL_CODE_STALL: 1170 RDS_INCR_STALLS_RCVD(); 1171 (void) rds_check_n_mark_port(sp, cpkt->rcp_port); 1172 break; 1173 case RDS_CTRL_CODE_UNSTALL: 1174 RDS_INCR_UNSTALLS_RCVD(); 1175 (void) rds_check_n_unmark_port(sp, cpkt->rcp_port); 1176 break; 1177 case RDS_CTRL_CODE_STALL_PORTS: 1178 rds_mark_all_ports(sp); 1179 break; 1180 case RDS_CTRL_CODE_UNSTALL_PORTS: 1181 rds_unmark_all_ports(sp); 1182 break; 1183 case RDS_CTRL_CODE_HEARTBEAT: 1184 break; 1185 default: 1186 RDS_DPRINTF2(LABEL, "ERROR: Invalid Control code: %d", 1187 cpkt->rcp_code); 1188 break; 1189 } 1190 1191 RDS_DPRINTF4("rds_handle_control_message", "Return"); 1192 } 1193 1194 void 1195 rds_post_control_message(rds_session_t *sp, rds_ctrl_pkt_t *cpkt) 1196 { 1197 ibt_send_wr_t wr; 1198 rds_ep_t *ep; 1199 rds_buf_t *bp; 1200 rds_ctrl_pkt_t *cp; 1201 int ret; 1202 1203 RDS_DPRINTF4("rds_post_control_message", "Enter: SP(%p) Code: %d " 1204 "Port: %d", sp, cpkt->rcp_code, cpkt->rcp_port); 1205 1206 ep = &sp->session_ctrlep; 1207 1208 bp = rds_get_send_buf(ep, 1); 1209 if (bp == NULL) { 1210 RDS_DPRINTF2(LABEL, "No buffers available to send control " 1211 "message: SP(%p) Code: %d Port: %d", sp, cpkt->rcp_code, 1212 cpkt->rcp_port); 1213 return; 1214 } 1215 1216 cp = (rds_ctrl_pkt_t *)(uintptr_t)bp->buf_ds.ds_va; 1217 cp->rcp_code = cpkt->rcp_code; 1218 cp->rcp_port = cpkt->rcp_port; 1219 bp->buf_ds.ds_len = RDS_CTRLPKT_SIZE; 1220 1221 wr.wr_id = (uintptr_t)bp; 1222 wr.wr_flags = IBT_WR_SEND_SOLICIT; 1223 wr.wr_trans = IBT_RC_SRV; 1224 wr.wr_opcode = IBT_WRC_SEND; 1225 wr.wr_nds = 1; 1226 wr.wr_sgl = &bp->buf_ds; 1227 RDS_DPRINTF5(LABEL, "ds_va %p ds_len %d ds_lkey 0x%llx", 1228 bp->buf_ds.ds_va, bp->buf_ds.ds_len, bp->buf_ds.ds_key); 1229 ret = ibt_post_send(ep->ep_chanhdl, &wr, 1, NULL); 1230 if (ret != IBT_SUCCESS) { 1231 RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send failed: " 1232 "%d", ep, ret); 1233 bp->buf_state = RDS_SNDBUF_FREE; 1234 rds_free_send_buf(ep, bp, NULL, 1, B_FALSE); 1235 return; 1236 } 1237 1238 RDS_DPRINTF4("rds_post_control_message", "Return SP(%p) Code: %d " 1239 "Port: %d", sp, cpkt->rcp_code, cpkt->rcp_port); 1240 } 1241 1242 void 1243 rds_send_control_message(void *arg) 1244 { 1245 rds_buf_t *bp; 1246 rds_ctrl_pkt_t *cp; 1247 rds_session_t *sp; 1248 uint_t ix; 1249 1250 RDS_DPRINTF4("rds_send_control_message", "Enter"); 1251 1252 bp = (rds_buf_t *)arg; 1253 cp = (rds_ctrl_pkt_t *)(uintptr_t)bp->buf_ds.ds_va; 1254 1255 /* send the stall message on all sessions */ 1256 rw_enter(&rdsib_statep->rds_sessionlock, RW_READER); 1257 1258 sp = rdsib_statep->rds_sessionlistp; 1259 for (ix = 0; ix < rdsib_statep->rds_nsessions; ix++) { 1260 ASSERT(sp != NULL); 1261 if (sp->session_state == RDS_SESSION_STATE_CONNECTED) { 1262 rds_post_control_message(sp, cp); 1263 } 1264 1265 sp = sp->session_nextp; 1266 } 1267 1268 rw_exit(&rdsib_statep->rds_sessionlock); 1269 1270 /* free the arg */ 1271 rds_free_buf(&rds_cpool, bp, 1); 1272 1273 RDS_DPRINTF4("rds_send_control_message", "Return"); 1274 } 1275 1276 void 1277 rds_stall_port(in_port_t port) 1278 { 1279 rds_ctrl_pkt_t *cpkt; 1280 rds_buf_t *bp; 1281 uint_t ix; 1282 1283 RDS_DPRINTF4("rds_stall_port", "Enter: Port %d", port); 1284 1285 RDS_INCR_STALLS_TRIGGERED(); 1286 if (!rds_check_n_mark_port(NULL, port)) { 1287 1288 bp = rds_get_buf(&rds_cpool, 1, &ix); 1289 if (bp == NULL) { 1290 RDS_DPRINTF2(LABEL, "No buffers available " 1291 "to send control message: Code: %d " 1292 "Local Port: %d", RDS_CTRL_CODE_STALL, port); 1293 (void) rds_check_n_unmark_port(NULL, port); 1294 return; 1295 } 1296 1297 cpkt = (rds_ctrl_pkt_t *)(uintptr_t)bp->buf_ds.ds_va; 1298 cpkt->rcp_code = RDS_CTRL_CODE_STALL; 1299 cpkt->rcp_port = port; 1300 #if 0 1301 /* 1302 * Taskq runs at some later point in time and the port may 1303 * not be in stall state anymore at that time. 1304 */ 1305 (void) ddi_taskq_dispatch(rds_taskq, 1306 rds_send_control_message, (void *)bp, DDI_SLEEP); 1307 #else 1308 rds_send_control_message((void *)bp); 1309 #endif 1310 RDS_INCR_STALLS_SENT(); 1311 } else { 1312 RDS_DPRINTF3(LABEL, 1313 "Port %d is already in stall state", port); 1314 } 1315 1316 RDS_DPRINTF4("rds_stall_port", "Return: Port %d", port); 1317 } 1318 1319 void 1320 rds_resume_port(in_port_t port) 1321 { 1322 rds_ctrl_pkt_t *cpkt; 1323 rds_buf_t *bp; 1324 uint_t ix; 1325 1326 RDS_DPRINTF4("rds_resume_port", "Enter: Port %d", port); 1327 1328 RDS_INCR_UNSTALLS_TRIGGERED(); 1329 if (rds_check_n_unmark_port(NULL, port)) { 1330 1331 bp = rds_get_buf(&rds_cpool, 1, &ix); 1332 if (bp == NULL) { 1333 RDS_DPRINTF2(LABEL, "No buffers available " 1334 "to send control message: Code: %d " 1335 "Local Port: %d", RDS_CTRL_CODE_UNSTALL, port); 1336 (void) rds_check_n_mark_port(NULL, port); 1337 return; 1338 } 1339 1340 /* send control message to resume the port for remote traffic */ 1341 cpkt = (rds_ctrl_pkt_t *)(uintptr_t)bp->buf_ds.ds_va; 1342 cpkt->rcp_code = RDS_CTRL_CODE_UNSTALL; 1343 cpkt->rcp_port = port; 1344 (void) ddi_taskq_dispatch(rds_taskq, 1345 rds_send_control_message, (void *)bp, DDI_SLEEP); 1346 RDS_INCR_UNSTALLS_SENT(); 1347 } else { 1348 RDS_DPRINTF5(LABEL, 1349 "Port %d is not stalled anymore", port); 1350 } 1351 1352 RDS_DPRINTF4("rds_resume_port", "Return: Port %d", port); 1353 } 1354 1355 static int 1356 rds_build_n_post_msg(rds_ep_t *ep, uio_t *uiop, in_port_t sendport, 1357 in_port_t recvport) 1358 { 1359 ibt_send_wr_t *wrp, wr; 1360 rds_buf_t *bp, *bp1; 1361 rds_data_hdr_t *pktp; 1362 uint32_t msgsize, npkts, residual, pktno, ix; 1363 int ret; 1364 1365 RDS_DPRINTF4("rds_build_n_post_msg", "Enter: EP(%p) UIOP(%p)", 1366 ep, uiop); 1367 1368 /* how many pkts are needed to carry this msg */ 1369 msgsize = uiop->uio_resid; 1370 npkts = ((msgsize - 1) / UserBufferSize) + 1; 1371 residual = ((msgsize - 1) % UserBufferSize) + 1; 1372 1373 RDS_DPRINTF5(LABEL, "EP(%p) UIOP(%p) msg size: %d npkts: %d", ep, uiop, 1374 msgsize, npkts); 1375 1376 /* Get the buffers needed to post this message */ 1377 bp = rds_get_send_buf(ep, npkts); 1378 if (bp == NULL) { 1379 RDS_INCR_ENOBUFS(); 1380 return (ENOBUFS); 1381 } 1382 1383 if (npkts > 1) { 1384 /* 1385 * multi-pkt messages are posted at the same time as a list 1386 * of WRs 1387 */ 1388 wrp = (ibt_send_wr_t *)kmem_zalloc(sizeof (ibt_send_wr_t) * 1389 npkts, KM_SLEEP); 1390 } 1391 1392 1393 pktno = 0; 1394 bp1 = bp; 1395 do { 1396 /* prepare the header */ 1397 pktp = (rds_data_hdr_t *)(uintptr_t)bp1->buf_ds.ds_va; 1398 pktp->dh_datalen = UserBufferSize; 1399 pktp->dh_npkts = npkts - pktno; 1400 pktp->dh_psn = pktno; 1401 pktp->dh_sendport = sendport; 1402 pktp->dh_recvport = recvport; 1403 bp1->buf_ds.ds_len = RdsPktSize; 1404 1405 /* copy the data */ 1406 ret = uiomove((uint8_t *)pktp + RDS_DATA_HDR_SZ, 1407 UserBufferSize, UIO_WRITE, uiop); 1408 if (ret != 0) { 1409 break; 1410 } 1411 1412 if (uiop->uio_resid == 0) { 1413 pktp->dh_datalen = residual; 1414 bp1->buf_ds.ds_len = residual + RDS_DATA_HDR_SZ; 1415 break; 1416 } 1417 pktno++; 1418 bp1 = bp1->buf_nextp; 1419 } while (uiop->uio_resid); 1420 1421 if (ret) { 1422 /* uiomove failed */ 1423 RDS_DPRINTF2("rds_build_n_post_msg", "UIO(%p) Move FAILED: %d", 1424 uiop, ret); 1425 if (npkts > 1) { 1426 kmem_free(wrp, npkts * sizeof (ibt_send_wr_t)); 1427 } 1428 rds_free_send_buf(ep, bp, NULL, npkts, B_FALSE); 1429 return (ret); 1430 } 1431 1432 if (npkts > 1) { 1433 /* multi-pkt message */ 1434 RDS_DPRINTF5(LABEL, "EP(%p) Sending Multiple Packets", ep); 1435 1436 bp1 = bp; 1437 for (ix = 0; ix < npkts; ix++) { 1438 wrp[ix].wr_id = (uintptr_t)bp1; 1439 wrp[ix].wr_flags = IBT_WR_NO_FLAGS; 1440 wrp[ix].wr_trans = IBT_RC_SRV; 1441 wrp[ix].wr_opcode = IBT_WRC_SEND; 1442 wrp[ix].wr_nds = 1; 1443 wrp[ix].wr_sgl = &bp1->buf_ds; 1444 bp1 = bp1->buf_nextp; 1445 } 1446 wrp[npkts - 1].wr_flags = IBT_WR_SEND_SOLICIT; 1447 1448 ret = ibt_post_send(ep->ep_chanhdl, wrp, npkts, &ix); 1449 if (ret != IBT_SUCCESS) { 1450 RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send failed: " 1451 "%d for %d pkts", ep, ret, npkts); 1452 rds_free_send_buf(ep, bp, NULL, npkts, B_FALSE); 1453 kmem_free(wrp, npkts * sizeof (ibt_send_wr_t)); 1454 return (ret); 1455 } 1456 1457 kmem_free(wrp, npkts * sizeof (ibt_send_wr_t)); 1458 } else { 1459 /* single pkt */ 1460 RDS_DPRINTF5(LABEL, "EP(%p) Sending Single Packet", ep); 1461 wr.wr_id = (uintptr_t)bp; 1462 wr.wr_flags = IBT_WR_SEND_SOLICIT; 1463 wr.wr_trans = IBT_RC_SRV; 1464 wr.wr_opcode = IBT_WRC_SEND; 1465 wr.wr_nds = 1; 1466 wr.wr_sgl = &bp->buf_ds; 1467 RDS_DPRINTF5(LABEL, "ds_va %p ds_key 0x%llx ds_len %d ", 1468 bp->buf_ds.ds_va, bp->buf_ds.ds_key, bp->buf_ds.ds_len); 1469 ret = ibt_post_send(ep->ep_chanhdl, &wr, 1, NULL); 1470 if (ret != IBT_SUCCESS) { 1471 RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send failed: " 1472 "%d", ep, ret); 1473 rds_free_send_buf(ep, bp, NULL, 1, B_FALSE); 1474 return (ret); 1475 } 1476 } 1477 1478 RDS_INCR_TXPKTS(npkts); 1479 RDS_INCR_TXBYTES(msgsize); 1480 1481 RDS_DPRINTF4("rds_build_n_post_msg", "Return: EP(%p) UIOP(%p)", 1482 ep, uiop); 1483 1484 return (0); 1485 } 1486 1487 static int 1488 rds_deliver_loopback_msg(uio_t *uiop, ipaddr_t recvip, ipaddr_t sendip, 1489 in_port_t recvport, in_port_t sendport, zoneid_t zoneid) 1490 { 1491 mblk_t *mp; 1492 int ret; 1493 1494 RDS_DPRINTF4("rds_deliver_loopback_msg", "Enter"); 1495 1496 RDS_DPRINTF3(LABEL, "Loopback message: sendport: " 1497 "%d to recvport: %d", sendport, recvport); 1498 1499 mp = allocb(uiop->uio_resid, BPRI_MED); 1500 if (mp == NULL) { 1501 RDS_DPRINTF2(LABEL, "allocb failed, size: %d\n", 1502 uiop->uio_resid); 1503 return (ENOSPC); 1504 } 1505 mp->b_wptr = mp->b_rptr + uiop->uio_resid; 1506 1507 ret = uiomove(mp->b_rptr, uiop->uio_resid, UIO_WRITE, uiop); 1508 if (ret) { 1509 RDS_DPRINTF2(LABEL, "ERROR: uiomove returned: %d", ret); 1510 freeb(mp); 1511 return (ret); 1512 } 1513 1514 ret = rds_deliver_new_msg(mp, recvip, sendip, recvport, sendport, 1515 zoneid); 1516 if (ret != 0) { 1517 if (ret == ENOSPC) { 1518 /* 1519 * The message is delivered but cannot take more, 1520 * stall the port, if it is not already stalled 1521 */ 1522 RDS_DPRINTF2(LABEL, "Port %d NO SPACE", recvport); 1523 rds_stall_port(recvport); 1524 } else { 1525 RDS_DPRINTF2(LABEL, "Loopback message: port %d -> " 1526 "port %d failed: %d", sendport, recvport, ret); 1527 return (ret); 1528 } 1529 } 1530 1531 RDS_DPRINTF4("rds_deliver_loopback_msg", "Return"); 1532 return (0); 1533 } 1534 1535 static void 1536 rds_resend_messages(void *arg) 1537 { 1538 rds_session_t *sp = (rds_session_t *)arg; 1539 rds_ep_t *ep; 1540 rds_bufpool_t *spool; 1541 rds_buf_t *bp, *endp, *tmp; 1542 ibt_send_wr_t *wrp; 1543 uint_t nwr = 0, ix, jx; 1544 int ret; 1545 1546 RDS_DPRINTF2("rds_resend_messages", "Enter: SP(%p)", sp); 1547 1548 ep = &sp->session_dataep; 1549 1550 spool = &ep->ep_sndpool; 1551 mutex_enter(&spool->pool_lock); 1552 1553 ASSERT(spool->pool_nfree == spool->pool_nbuffers); 1554 1555 if (ep->ep_lbufid == NULL) { 1556 RDS_DPRINTF2("rds_resend_messages", 1557 "SP(%p) Remote session is cleaned up ", sp); 1558 /* 1559 * The remote end cleaned up its session. There may be loss 1560 * of messages. Mark all buffers as acknowledged. 1561 */ 1562 tmp = spool->pool_tailp; 1563 } else { 1564 tmp = (rds_buf_t *)ep->ep_lbufid; 1565 RDS_DPRINTF2("rds_resend_messages", 1566 "SP(%p) Last successful BP(%p) ", sp, tmp); 1567 } 1568 1569 endp = spool->pool_tailp; 1570 bp = spool->pool_headp; 1571 jx = 0; 1572 while ((bp != NULL) && (bp != tmp)) { 1573 bp->buf_state = RDS_SNDBUF_FREE; 1574 jx++; 1575 bp = bp->buf_nextp; 1576 } 1577 1578 if (bp == NULL) { 1579 mutex_exit(&spool->pool_lock); 1580 RDS_DPRINTF2("rds_resend_messages", "Alert: lbufid(%p) is not " 1581 "found in the list", tmp); 1582 1583 rw_enter(&sp->session_lock, RW_WRITER); 1584 if (sp->session_state == RDS_SESSION_STATE_INIT) { 1585 sp->session_state = RDS_SESSION_STATE_CONNECTED; 1586 } else { 1587 RDS_DPRINTF2("rds_resend_messages", "SP(%p) State: %d " 1588 "Expected State: %d", sp, sp->session_state, 1589 RDS_SESSION_STATE_CONNECTED); 1590 } 1591 sp->session_failover--; 1592 rw_exit(&sp->session_lock); 1593 return; 1594 } 1595 1596 /* Found the match */ 1597 bp->buf_state = RDS_SNDBUF_FREE; 1598 jx++; 1599 1600 spool->pool_tailp = bp; 1601 bp = bp->buf_nextp; 1602 spool->pool_tailp->buf_nextp = NULL; 1603 nwr = spool->pool_nfree - jx; 1604 spool->pool_nfree = jx; 1605 mutex_exit(&spool->pool_lock); 1606 1607 RDS_DPRINTF2("rds_resend_messages", "SP(%p): Number of " 1608 "bufs (BP %p) to re-send: %d", sp, bp, nwr); 1609 1610 if (bp) { 1611 wrp = (ibt_send_wr_t *)kmem_zalloc(sizeof (ibt_send_wr_t) * 100, 1612 KM_SLEEP); 1613 1614 while (nwr) { 1615 jx = (nwr > 100) ? 100 : nwr; 1616 1617 tmp = bp; 1618 for (ix = 0; ix < jx; ix++) { 1619 bp->buf_state = RDS_SNDBUF_PENDING; 1620 wrp[ix].wr_id = (uintptr_t)bp; 1621 wrp[ix].wr_flags = IBT_WR_SEND_SOLICIT; 1622 wrp[ix].wr_trans = IBT_RC_SRV; 1623 wrp[ix].wr_opcode = IBT_WRC_SEND; 1624 wrp[ix].wr_nds = 1; 1625 wrp[ix].wr_sgl = &bp->buf_ds; 1626 bp = bp->buf_nextp; 1627 } 1628 1629 ret = ibt_post_send(ep->ep_chanhdl, wrp, jx, &ix); 1630 if (ret != IBT_SUCCESS) { 1631 RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send " 1632 "failed: %d for % pkts", ep, ret, jx); 1633 break; 1634 } 1635 1636 mutex_enter(&spool->pool_lock); 1637 spool->pool_nbusy += jx; 1638 mutex_exit(&spool->pool_lock); 1639 1640 nwr -= jx; 1641 } 1642 1643 kmem_free(wrp, sizeof (ibt_send_wr_t) * 100); 1644 1645 if (nwr != 0) { 1646 1647 /* 1648 * An error while failover is in progress. Some WRs are 1649 * posted while other remain. If any of the posted WRs 1650 * complete in error then they would dispatch a taskq to 1651 * do a failover. Getting the session lock will prevent 1652 * the taskq to wait until we are done here. 1653 */ 1654 rw_enter(&sp->session_lock, RW_READER); 1655 1656 /* 1657 * Wait until all the previous WRs are completed and 1658 * then queue the remaining, otherwise the order of 1659 * the messages may change. 1660 */ 1661 (void) rds_is_sendq_empty(ep, 1); 1662 1663 /* free the remaining buffers */ 1664 rds_free_send_buf(ep, tmp, endp, nwr, B_FALSE); 1665 1666 rw_exit(&sp->session_lock); 1667 return; 1668 } 1669 } 1670 1671 rw_enter(&sp->session_lock, RW_WRITER); 1672 if (sp->session_state == RDS_SESSION_STATE_INIT) { 1673 sp->session_state = RDS_SESSION_STATE_CONNECTED; 1674 } else { 1675 RDS_DPRINTF2("rds_resend_messages", "SP(%p) State: %d " 1676 "Expected State: %d", sp, sp->session_state, 1677 RDS_SESSION_STATE_CONNECTED); 1678 } 1679 sp->session_failover--; 1680 rw_exit(&sp->session_lock); 1681 1682 RDS_DPRINTF2("rds_resend_messages", "Return: SP(%p)", sp); 1683 } 1684 1685 /* 1686 * This is called when a channel is connected. Transition the session to 1687 * CONNECTED state iff both channels are connected. 1688 */ 1689 void 1690 rds_session_active(rds_session_t *sp) 1691 { 1692 rds_ep_t *ep; 1693 uint_t failover; 1694 1695 RDS_DPRINTF2("rds_session_active", "Enter: 0x%p", sp); 1696 1697 rw_enter(&sp->session_lock, RW_READER); 1698 1699 failover = sp->session_failover; 1700 1701 /* 1702 * we establish the data channel first, so check the control channel 1703 * first but make sure it is initialized. 1704 */ 1705 ep = &sp->session_ctrlep; 1706 mutex_enter(&ep->ep_lock); 1707 if (ep->ep_state != RDS_EP_STATE_CONNECTED) { 1708 /* the session is not ready yet */ 1709 mutex_exit(&ep->ep_lock); 1710 rw_exit(&sp->session_lock); 1711 return; 1712 } 1713 mutex_exit(&ep->ep_lock); 1714 1715 /* control channel is connected, check the data channel */ 1716 ep = &sp->session_dataep; 1717 mutex_enter(&ep->ep_lock); 1718 if (ep->ep_state != RDS_EP_STATE_CONNECTED) { 1719 /* data channel is not yet connected */ 1720 mutex_exit(&ep->ep_lock); 1721 rw_exit(&sp->session_lock); 1722 return; 1723 } 1724 mutex_exit(&ep->ep_lock); 1725 1726 if (failover) { 1727 rw_exit(&sp->session_lock); 1728 1729 /* 1730 * The session has failed over. Previous msgs have to be 1731 * re-sent before the session is moved to the connected 1732 * state. 1733 */ 1734 RDS_DPRINTF2("rds_session_active", "SP(%p) Dispatching taskq " 1735 "to re-send messages", sp); 1736 (void) ddi_taskq_dispatch(rds_taskq, 1737 rds_resend_messages, (void *)sp, DDI_SLEEP); 1738 return; 1739 } 1740 1741 /* the session is ready */ 1742 sp->session_state = RDS_SESSION_STATE_CONNECTED; 1743 RDS_DPRINTF3("rds_session_active", 1744 "SP(%p) State RDS_SESSION_STATE_CONNECTED", sp); 1745 1746 rw_exit(&sp->session_lock); 1747 1748 RDS_DPRINTF2("rds_session_active", "Return: SP(%p) is CONNECTED", sp); 1749 } 1750 1751 static int 1752 rds_ep_sendmsg(rds_ep_t *ep, uio_t *uiop, in_port_t sendport, 1753 in_port_t recvport) 1754 { 1755 int ret; 1756 1757 RDS_DPRINTF4("rds_ep_sendmsg", "Enter: EP(%p) sendport: %d recvport: " 1758 "%d", ep, sendport, recvport); 1759 1760 /* make sure the port is not stalled */ 1761 if (rds_is_port_marked(ep->ep_sp, recvport)) { 1762 RDS_DPRINTF2(LABEL, "SP(%p) Port:%d is in stall state", 1763 ep->ep_sp, recvport); 1764 RDS_INCR_EWOULDBLOCK(); 1765 ret = ENOMEM; 1766 } else { 1767 ret = rds_build_n_post_msg(ep, uiop, sendport, recvport); 1768 } 1769 1770 RDS_DPRINTF4("rds_ep_sendmsg", "Return: EP(%p)", ep); 1771 1772 return (ret); 1773 } 1774 1775 /* Send a message to a destination socket */ 1776 int 1777 rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip, in_port_t sendport, 1778 in_port_t recvport, zoneid_t zoneid) 1779 { 1780 rds_session_t *sp; 1781 ib_gid_t lgid, rgid; 1782 rds_hca_t *hcap; 1783 int ret; 1784 1785 RDS_DPRINTF4("rds_sendmsg", "Enter: uiop: 0x%p, srcIP: 0x%x destIP: " 1786 "0x%x sndport: %d recvport: %d", uiop, sendip, recvip, 1787 sendport, recvport); 1788 1789 /* If msg length is 0, just return success */ 1790 if (uiop->uio_resid == 0) { 1791 RDS_DPRINTF2("rds_sendmsg", "Zero sized message"); 1792 return (0); 1793 } 1794 1795 /* Is there a session to the destination? */ 1796 rw_enter(&rdsib_statep->rds_sessionlock, RW_READER); 1797 sp = rds_session_lkup(rdsib_statep, recvip, 0); 1798 rw_exit(&rdsib_statep->rds_sessionlock); 1799 1800 /* Is this a loopback message? */ 1801 if ((sp == NULL) && (rds_islocal(recvip))) { 1802 /* make sure the port is not stalled */ 1803 if (rds_is_port_marked(NULL, recvport)) { 1804 RDS_DPRINTF2(LABEL, "Local Port:%d is in stall state", 1805 recvport); 1806 RDS_INCR_EWOULDBLOCK(); 1807 return (ENOMEM); 1808 } 1809 ret = rds_deliver_loopback_msg(uiop, recvip, sendip, recvport, 1810 sendport, zoneid); 1811 return (ret); 1812 } 1813 1814 /* Not a loopback message */ 1815 if (sp == NULL) { 1816 /* There is no session to the destination, create one. */ 1817 RDS_DPRINTF3(LABEL, "There is no session to the destination " 1818 "IP: 0x%x", recvip); 1819 sp = rds_session_create(rdsib_statep, sendip, recvip, NULL, 1820 RDS_SESSION_ACTIVE); 1821 if (sp != NULL) { 1822 rw_enter(&sp->session_lock, RW_WRITER); 1823 if (sp->session_type == RDS_SESSION_ACTIVE) { 1824 ret = rds_session_init(sp); 1825 if (ret != 0) { 1826 RDS_DPRINTF2("rds_sendmsg", 1827 "SP(%p): rds_session_init failed", 1828 sp); 1829 sp->session_state = 1830 RDS_SESSION_STATE_FAILED; 1831 RDS_DPRINTF3("rds_sendmsg", 1832 "SP(%p) State " 1833 "RDS_SESSION_STATE_FAILED", sp); 1834 rw_exit(&sp->session_lock); 1835 return (EFAULT); 1836 } 1837 sp->session_state = RDS_SESSION_STATE_INIT; 1838 RDS_DPRINTF3("rds_sendmsg", 1839 "SP(%p) State " 1840 "RDS_SESSION_STATE_INIT", sp); 1841 rw_exit(&sp->session_lock); 1842 rds_session_open(sp); 1843 } else { 1844 rw_exit(&sp->session_lock); 1845 } 1846 } else { 1847 /* Is a session created for this destination */ 1848 rw_enter(&rdsib_statep->rds_sessionlock, RW_READER); 1849 sp = rds_session_lkup(rdsib_statep, recvip, 0); 1850 rw_exit(&rdsib_statep->rds_sessionlock); 1851 if (sp == NULL) { 1852 return (EFAULT); 1853 } 1854 } 1855 } 1856 1857 /* There is a session to the destination */ 1858 rw_enter(&sp->session_lock, RW_READER); 1859 if (sp->session_state == RDS_SESSION_STATE_CONNECTED) { 1860 rw_exit(&sp->session_lock); 1861 1862 ret = rds_ep_sendmsg(&sp->session_dataep, uiop, sendport, 1863 recvport); 1864 return (ret); 1865 } else if ((sp->session_state == RDS_SESSION_STATE_FAILED) || 1866 (sp->session_state == RDS_SESSION_STATE_FINI)) { 1867 ipaddr_t sendip1, recvip1; 1868 1869 RDS_DPRINTF3("rds_sendmsg", "SP(%p) is not connected, State: " 1870 "%d", sp); 1871 rw_exit(&sp->session_lock); 1872 rw_enter(&sp->session_lock, RW_WRITER); 1873 if ((sp->session_state == RDS_SESSION_STATE_FAILED) || 1874 (sp->session_state == RDS_SESSION_STATE_FINI)) { 1875 sp->session_state = RDS_SESSION_STATE_CREATED; 1876 sp->session_type = RDS_SESSION_ACTIVE; 1877 RDS_DPRINTF3("rds_sendmsg", "SP(%p) State " 1878 "RDS_SESSION_STATE_CREATED", sp); 1879 rw_exit(&sp->session_lock); 1880 1881 1882 /* The ipaddr should be in the network order */ 1883 sendip1 = sendip; 1884 recvip1 = recvip; 1885 ret = rds_sc_path_lookup(&sendip1, &recvip1); 1886 if (ret == 0) { 1887 RDS_DPRINTF2(LABEL, "Path not found " 1888 "(0x%x 0x%x)", sendip1, recvip1); 1889 } 1890 1891 /* Resolve the IP addresses */ 1892 ret = rds_get_ibaddr(htonl(sendip1), htonl(recvip1), 1893 &lgid, &rgid); 1894 if (ret != 0) { 1895 RDS_DPRINTF1(LABEL, "rds_get_ibaddr failed: %d", 1896 ret); 1897 rw_enter(&sp->session_lock, RW_WRITER); 1898 if (sp->session_type == RDS_SESSION_ACTIVE) { 1899 sp->session_state = 1900 RDS_SESSION_STATE_FAILED; 1901 RDS_DPRINTF3("rds_sendmsg", 1902 "SP(%p) State " 1903 "RDS_SESSION_STATE_FAILED", sp); 1904 rw_exit(&sp->session_lock); 1905 return (EFAULT); 1906 } else { 1907 rw_exit(&sp->session_lock); 1908 return (ENOMEM); 1909 } 1910 } 1911 1912 RDS_DPRINTF2(LABEL, "lgid: %llx:%llx rgid: %llx:%llx", 1913 lgid.gid_prefix, lgid.gid_guid, rgid.gid_prefix, 1914 rgid.gid_guid); 1915 1916 rw_enter(&sp->session_lock, RW_WRITER); 1917 if (sp->session_type == RDS_SESSION_ACTIVE) { 1918 sp->session_lgid = lgid; 1919 sp->session_rgid = rgid; 1920 hcap = rds_gid_to_hcap(rdsib_statep, lgid); 1921 if (hcap == NULL) { 1922 RDS_DPRINTF1(LABEL, "REQ received on " 1923 "an uninitialized HCA: %llx", 1924 sp->session_lgid.gid_guid); 1925 sp->session_state = 1926 RDS_SESSION_STATE_FAILED; 1927 RDS_DPRINTF3("rds_sendmsg", 1928 "SP(%p) State " 1929 "RDS_SESSION_STATE_FAILED", sp); 1930 rw_exit(&sp->session_lock); 1931 return (ENOMEM); 1932 } 1933 1934 ret = rds_session_init(sp); 1935 if (ret != 0) { 1936 RDS_DPRINTF2("rds_sendmsg", 1937 "SP(%p): rds_session_init failed", 1938 sp); 1939 sp->session_state = 1940 RDS_SESSION_STATE_FAILED; 1941 RDS_DPRINTF3("rds_sendmsg", 1942 "SP(%p) State " 1943 "RDS_SESSION_STATE_FAILED", sp); 1944 rw_exit(&sp->session_lock); 1945 return (EFAULT); 1946 } 1947 sp->session_state = RDS_SESSION_STATE_INIT; 1948 rw_exit(&sp->session_lock); 1949 1950 rds_session_open(sp); 1951 1952 } else { 1953 RDS_DPRINTF2(LABEL, "SP(%p): state changed " 1954 "to %d", sp, sp->session_state); 1955 rw_exit(&sp->session_lock); 1956 return (ENOMEM); 1957 } 1958 } else { 1959 RDS_DPRINTF2(LABEL, "SP(%p): Session state %d changed", 1960 sp, sp->session_state); 1961 rw_exit(&sp->session_lock); 1962 return (ENOMEM); 1963 } 1964 } else { 1965 RDS_DPRINTF2(LABEL, "SP(%p): Session is in %d state", 1966 sp, sp->session_state); 1967 rw_exit(&sp->session_lock); 1968 return (ENOMEM); 1969 } 1970 1971 rw_enter(&sp->session_lock, RW_READER); 1972 if (sp->session_state == RDS_SESSION_STATE_CONNECTED) { 1973 rw_exit(&sp->session_lock); 1974 1975 ret = rds_ep_sendmsg(&sp->session_dataep, uiop, sendport, 1976 recvport); 1977 } else { 1978 RDS_DPRINTF2(LABEL, "SP(%p): state(%d) not connected", 1979 sp, sp->session_state); 1980 rw_exit(&sp->session_lock); 1981 } 1982 1983 RDS_DPRINTF4("rds_sendmsg", "Return: SP(%p) ret: %d", sp, ret); 1984 1985 return (ret); 1986 } 1987 1988 /* Note: This is called on the CQ handler thread */ 1989 void 1990 rds_received_msg(rds_ep_t *ep, rds_buf_t *bp) 1991 { 1992 mblk_t *mp, *mp1; 1993 rds_data_hdr_t *pktp, *pktp1; 1994 uint8_t *datap; 1995 rds_buf_t *bp1; 1996 rds_bufpool_t *rpool; 1997 uint_t npkts, ix; 1998 int ret; 1999 2000 RDS_DPRINTF4("rds_received_msg", "Enter: EP(%p)", ep); 2001 2002 pktp = (rds_data_hdr_t *)(uintptr_t)bp->buf_ds.ds_va; 2003 datap = ((uint8_t *)(uintptr_t)bp->buf_ds.ds_va) + RDS_DATA_HDR_SZ; 2004 npkts = pktp->dh_npkts; 2005 2006 /* increment rx pending here */ 2007 rpool = &ep->ep_rcvpool; 2008 mutex_enter(&rpool->pool_lock); 2009 rpool->pool_nbusy += npkts; 2010 mutex_exit(&rpool->pool_lock); 2011 2012 /* this will get freed by sockfs */ 2013 mp = esballoc(datap, pktp->dh_datalen, BPRI_HI, &bp->buf_frtn); 2014 if (mp == NULL) { 2015 RDS_DPRINTF2(LABEL, "EP(%p) BP(%p): allocb failed", 2016 ep, bp); 2017 rds_free_recv_buf(bp, npkts); 2018 return; 2019 } 2020 mp->b_wptr = datap + pktp->dh_datalen; 2021 mp->b_datap->db_type = M_DATA; 2022 2023 mp1 = mp; 2024 bp1 = bp->buf_nextp; 2025 while (bp1 != NULL) { 2026 pktp1 = (rds_data_hdr_t *)(uintptr_t)bp1->buf_ds.ds_va; 2027 datap = ((uint8_t *)(uintptr_t)bp1->buf_ds.ds_va) + 2028 RDS_DATA_HDR_SZ; 2029 2030 mp1->b_cont = esballoc(datap, pktp1->dh_datalen, 2031 BPRI_HI, &bp1->buf_frtn); 2032 if (mp1->b_cont == NULL) { 2033 RDS_DPRINTF2(LABEL, "EP(%p) BP(%p): allocb failed", 2034 ep, bp1); 2035 freemsg(mp); 2036 rds_free_recv_buf(bp1, pktp1->dh_npkts); 2037 return; 2038 } 2039 mp1 = mp1->b_cont; 2040 mp1->b_wptr = datap + pktp1->dh_datalen; 2041 mp1->b_datap->db_type = M_DATA; 2042 2043 bp1 = bp1->buf_nextp; 2044 } 2045 2046 RDS_INCR_RXPKTS_PEND(npkts); 2047 RDS_INCR_RXPKTS(npkts); 2048 RDS_INCR_RXBYTES(msgdsize(mp)); 2049 2050 RDS_DPRINTF5(LABEL, "Deliver Message: sendIP: 0x%x recvIP: 0x%x " 2051 "sendport: %d recvport: %d npkts: %d pktno: %d", ep->ep_remip, 2052 ep->ep_myip, pktp->dh_sendport, pktp->dh_recvport, 2053 npkts, pktp->dh_psn); 2054 2055 /* store the last buffer id, no lock needed */ 2056 if (npkts > 1) { 2057 ep->ep_rbufid = pktp1->dh_bufid; 2058 } else { 2059 ep->ep_rbufid = pktp->dh_bufid; 2060 } 2061 2062 ret = rds_deliver_new_msg(mp, ep->ep_myip, ep->ep_remip, 2063 pktp->dh_recvport, pktp->dh_sendport, ALL_ZONES); 2064 if (ret != 0) { 2065 if (ret == ENOSPC) { 2066 /* 2067 * The message is delivered but cannot take more, 2068 * stall the port 2069 */ 2070 RDS_DPRINTF2(LABEL, "Port %d NO SPACE", 2071 pktp->dh_recvport); 2072 rds_stall_port(pktp->dh_recvport); 2073 } else { 2074 RDS_DPRINTF1(LABEL, "rds_deliver_new_msg returned: %d", 2075 ret); 2076 } 2077 } 2078 2079 mutex_enter(&ep->ep_lock); 2080 if (ep->ep_rdmacnt == 0) { 2081 ep->ep_rdmacnt++; 2082 *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va = ep->ep_rbufid; 2083 mutex_exit(&ep->ep_lock); 2084 2085 /* send acknowledgement */ 2086 RDS_INCR_TXACKS(); 2087 ret = ibt_post_send(ep->ep_chanhdl, &ep->ep_ackwr, 1, &ix); 2088 if (ret != IBT_SUCCESS) { 2089 RDS_DPRINTF1(LABEL, "EP(%p): ibt_post_send for " 2090 "acknowledgement failed: %d, SQ depth: %d", 2091 ep, ret, ep->ep_sndpool.pool_nbusy); 2092 mutex_enter(&ep->ep_lock); 2093 ep->ep_rdmacnt--; 2094 mutex_exit(&ep->ep_lock); 2095 } 2096 } else { 2097 /* no room to send acknowledgement */ 2098 mutex_exit(&ep->ep_lock); 2099 } 2100 2101 RDS_DPRINTF4("rds_received_msg", "Return: EP(%p)", ep); 2102 } 2103