1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 3 * 4 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 5 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 6 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 7 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 8 * 9 * This software is available to you under a choice of one of two 10 * licenses. You may choose to be licensed under the terms of the GNU 11 * General Public License (GPL) Version 2, available from the file 12 * COPYING in the main directory of this source tree, or the 13 * OpenIB.org BSD license below: 14 * 15 * Redistribution and use in source and binary forms, with or 16 * without modification, are permitted provided that the following 17 * conditions are met: 18 * 19 * - Redistributions of source code must retain the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer. 22 * 23 * - Redistributions in binary form must reproduce the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer in the documentation and/or other materials 26 * provided with the distribution. 27 * 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 * SOFTWARE. 36 */ 37 38 #include <sys/cdefs.h> 39 __FBSDID("$FreeBSD$"); 40 41 #define LINUXKPI_PARAM_PREFIX ibcore_ 42 43 #include <linux/completion.h> 44 #include <linux/in.h> 45 #include <linux/in6.h> 46 #include <linux/mutex.h> 47 #include <linux/random.h> 48 #include <linux/idr.h> 49 #include <linux/slab.h> 50 #include <linux/module.h> 51 #include <net/route.h> 52 #include <net/route/nhop.h> 53 54 #include <net/tcp.h> 55 #include <net/ipv6.h> 56 57 #include <netinet/in_fib.h> 58 59 #include <netinet6/in6_fib.h> 60 #include <netinet6/scope6_var.h> 61 #include <netinet6/ip6_var.h> 62 63 #include <rdma/rdma_cm.h> 64 #include <rdma/rdma_cm_ib.h> 65 #include <rdma/rdma_sdp.h> 66 #include <rdma/ib.h> 67 #include <rdma/ib_addr.h> 68 #include <rdma/ib_cache.h> 69 #include <rdma/ib_cm.h> 70 #include <rdma/ib_sa.h> 71 #include <rdma/iw_cm.h> 72 73 #include <sys/priv.h> 74 75 #include "core_priv.h" 76 77 MODULE_AUTHOR("Sean Hefty"); 78 MODULE_DESCRIPTION("Generic RDMA CM Agent"); 79 MODULE_LICENSE("Dual BSD/GPL"); 80 81 #define CMA_CM_RESPONSE_TIMEOUT 20 82 #define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000 83 #define CMA_MAX_CM_RETRIES 15 84 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) 85 #define CMA_IBOE_PACKET_LIFETIME 18 86 87 static const char * const cma_events[] = { 88 [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", 89 [RDMA_CM_EVENT_ADDR_ERROR] = "address error", 90 [RDMA_CM_EVENT_ROUTE_RESOLVED] = "route resolved ", 91 [RDMA_CM_EVENT_ROUTE_ERROR] = "route error", 92 [RDMA_CM_EVENT_CONNECT_REQUEST] = "connect request", 93 [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response", 94 [RDMA_CM_EVENT_CONNECT_ERROR] = "connect error", 95 [RDMA_CM_EVENT_UNREACHABLE] = "unreachable", 96 [RDMA_CM_EVENT_REJECTED] = "rejected", 97 [RDMA_CM_EVENT_ESTABLISHED] = "established", 98 [RDMA_CM_EVENT_DISCONNECTED] = "disconnected", 99 [RDMA_CM_EVENT_DEVICE_REMOVAL] = "device removal", 100 [RDMA_CM_EVENT_MULTICAST_JOIN] = "multicast join", 101 [RDMA_CM_EVENT_MULTICAST_ERROR] = "multicast error", 102 [RDMA_CM_EVENT_ADDR_CHANGE] = "address change", 103 [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", 104 }; 105 106 const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) 107 { 108 size_t index = event; 109 110 return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ? 111 cma_events[index] : "unrecognized event"; 112 } 113 EXPORT_SYMBOL(rdma_event_msg); 114 115 const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id, 116 int reason) 117 { 118 if (rdma_ib_or_roce(id->device, id->port_num)) 119 return ibcm_reject_msg(reason); 120 121 if (rdma_protocol_iwarp(id->device, id->port_num)) 122 return iwcm_reject_msg(reason); 123 124 WARN_ON_ONCE(1); 125 return "unrecognized transport"; 126 } 127 EXPORT_SYMBOL(rdma_reject_msg); 128 129 static int cma_check_linklocal(struct rdma_dev_addr *, struct sockaddr *); 130 static void cma_add_one(struct ib_device *device); 131 static void cma_remove_one(struct ib_device *device, void *client_data); 132 static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id); 133 134 static struct ib_client cma_client = { 135 .name = "cma", 136 .add = cma_add_one, 137 .remove = cma_remove_one 138 }; 139 140 static struct ib_sa_client sa_client; 141 static struct rdma_addr_client addr_client; 142 static LIST_HEAD(dev_list); 143 static LIST_HEAD(listen_any_list); 144 static DEFINE_MUTEX(lock); 145 static struct workqueue_struct *cma_wq; 146 147 struct cma_pernet { 148 struct idr tcp_ps; 149 struct idr udp_ps; 150 struct idr ipoib_ps; 151 struct idr ib_ps; 152 struct idr sdp_ps; 153 }; 154 155 VNET_DEFINE(struct cma_pernet, cma_pernet); 156 157 static struct cma_pernet *cma_pernet_ptr(struct vnet *vnet) 158 { 159 struct cma_pernet *retval; 160 161 CURVNET_SET_QUIET(vnet); 162 retval = &VNET(cma_pernet); 163 CURVNET_RESTORE(); 164 165 return (retval); 166 } 167 168 static struct idr *cma_pernet_idr(struct vnet *net, enum rdma_port_space ps) 169 { 170 struct cma_pernet *pernet = cma_pernet_ptr(net); 171 172 switch (ps) { 173 case RDMA_PS_TCP: 174 return &pernet->tcp_ps; 175 case RDMA_PS_UDP: 176 return &pernet->udp_ps; 177 case RDMA_PS_IPOIB: 178 return &pernet->ipoib_ps; 179 case RDMA_PS_IB: 180 return &pernet->ib_ps; 181 case RDMA_PS_SDP: 182 return &pernet->sdp_ps; 183 default: 184 return NULL; 185 } 186 } 187 188 struct cma_device { 189 struct list_head list; 190 struct ib_device *device; 191 struct completion comp; 192 atomic_t refcount; 193 struct list_head id_list; 194 struct sysctl_ctx_list sysctl_ctx; 195 enum ib_gid_type *default_gid_type; 196 }; 197 198 struct rdma_bind_list { 199 enum rdma_port_space ps; 200 struct hlist_head owners; 201 unsigned short port; 202 }; 203 204 struct class_port_info_context { 205 struct ib_class_port_info *class_port_info; 206 struct ib_device *device; 207 struct completion done; 208 struct ib_sa_query *sa_query; 209 u8 port_num; 210 }; 211 212 static int cma_ps_alloc(struct vnet *vnet, enum rdma_port_space ps, 213 struct rdma_bind_list *bind_list, int snum) 214 { 215 struct idr *idr = cma_pernet_idr(vnet, ps); 216 217 return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); 218 } 219 220 static struct rdma_bind_list *cma_ps_find(struct vnet *net, 221 enum rdma_port_space ps, int snum) 222 { 223 struct idr *idr = cma_pernet_idr(net, ps); 224 225 return idr_find(idr, snum); 226 } 227 228 static void cma_ps_remove(struct vnet *net, enum rdma_port_space ps, int snum) 229 { 230 struct idr *idr = cma_pernet_idr(net, ps); 231 232 idr_remove(idr, snum); 233 } 234 235 enum { 236 CMA_OPTION_AFONLY, 237 }; 238 239 void cma_ref_dev(struct cma_device *cma_dev) 240 { 241 atomic_inc(&cma_dev->refcount); 242 } 243 244 struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, 245 void *cookie) 246 { 247 struct cma_device *cma_dev; 248 struct cma_device *found_cma_dev = NULL; 249 250 mutex_lock(&lock); 251 252 list_for_each_entry(cma_dev, &dev_list, list) 253 if (filter(cma_dev->device, cookie)) { 254 found_cma_dev = cma_dev; 255 break; 256 } 257 258 if (found_cma_dev) 259 cma_ref_dev(found_cma_dev); 260 mutex_unlock(&lock); 261 return found_cma_dev; 262 } 263 264 int cma_get_default_gid_type(struct cma_device *cma_dev, 265 unsigned int port) 266 { 267 if (!rdma_is_port_valid(cma_dev->device, port)) 268 return -EINVAL; 269 270 return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)]; 271 } 272 273 int cma_set_default_gid_type(struct cma_device *cma_dev, 274 unsigned int port, 275 enum ib_gid_type default_gid_type) 276 { 277 unsigned long supported_gids; 278 279 if (!rdma_is_port_valid(cma_dev->device, port)) 280 return -EINVAL; 281 282 supported_gids = roce_gid_type_mask_support(cma_dev->device, port); 283 284 if (!(supported_gids & 1 << default_gid_type)) 285 return -EINVAL; 286 287 cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] = 288 default_gid_type; 289 290 return 0; 291 } 292 293 struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) 294 { 295 return cma_dev->device; 296 } 297 298 /* 299 * Device removal can occur at anytime, so we need extra handling to 300 * serialize notifying the user of device removal with other callbacks. 301 * We do this by disabling removal notification while a callback is in process, 302 * and reporting it after the callback completes. 303 */ 304 struct rdma_id_private { 305 struct rdma_cm_id id; 306 307 struct rdma_bind_list *bind_list; 308 struct hlist_node node; 309 struct list_head list; /* listen_any_list or cma_device.list */ 310 struct list_head listen_list; /* per device listens */ 311 struct cma_device *cma_dev; 312 struct list_head mc_list; 313 314 int internal_id; 315 enum rdma_cm_state state; 316 spinlock_t lock; 317 struct mutex qp_mutex; 318 319 struct completion comp; 320 atomic_t refcount; 321 struct mutex handler_mutex; 322 323 int backlog; 324 int timeout_ms; 325 struct ib_sa_query *query; 326 int query_id; 327 union { 328 struct ib_cm_id *ib; 329 struct iw_cm_id *iw; 330 } cm_id; 331 332 u32 seq_num; 333 u32 qkey; 334 u32 qp_num; 335 pid_t owner; 336 u32 options; 337 u8 srq; 338 u8 tos; 339 u8 timeout_set:1; 340 u8 reuseaddr; 341 u8 afonly; 342 u8 timeout; 343 enum ib_gid_type gid_type; 344 }; 345 346 struct cma_multicast { 347 struct rdma_id_private *id_priv; 348 union { 349 struct ib_sa_multicast *ib; 350 } multicast; 351 struct list_head list; 352 void *context; 353 struct sockaddr_storage addr; 354 struct kref mcref; 355 bool igmp_joined; 356 u8 join_state; 357 }; 358 359 struct cma_work { 360 struct work_struct work; 361 struct rdma_id_private *id; 362 enum rdma_cm_state old_state; 363 enum rdma_cm_state new_state; 364 struct rdma_cm_event event; 365 }; 366 367 struct cma_ndev_work { 368 struct work_struct work; 369 struct rdma_id_private *id; 370 struct rdma_cm_event event; 371 }; 372 373 struct iboe_mcast_work { 374 struct work_struct work; 375 struct rdma_id_private *id; 376 struct cma_multicast *mc; 377 }; 378 379 struct cma_hdr { 380 u8 cma_version; 381 u8 ip_version; /* IP version: 7:4 */ 382 __be16 port; 383 union cma_ip_addr src_addr; 384 union cma_ip_addr dst_addr; 385 }; 386 387 #define CMA_VERSION 0x00 388 #define SDP_MAJ_VERSION 0x2 389 390 struct cma_req_info { 391 struct ib_device *device; 392 int port; 393 union ib_gid local_gid; 394 __be64 service_id; 395 u16 pkey; 396 bool has_gid:1; 397 }; 398 399 static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) 400 { 401 unsigned long flags; 402 int ret; 403 404 spin_lock_irqsave(&id_priv->lock, flags); 405 ret = (id_priv->state == comp); 406 spin_unlock_irqrestore(&id_priv->lock, flags); 407 return ret; 408 } 409 410 static int cma_comp_exch(struct rdma_id_private *id_priv, 411 enum rdma_cm_state comp, enum rdma_cm_state exch) 412 { 413 unsigned long flags; 414 int ret; 415 416 spin_lock_irqsave(&id_priv->lock, flags); 417 if ((ret = (id_priv->state == comp))) 418 id_priv->state = exch; 419 spin_unlock_irqrestore(&id_priv->lock, flags); 420 return ret; 421 } 422 423 static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, 424 enum rdma_cm_state exch) 425 { 426 unsigned long flags; 427 enum rdma_cm_state old; 428 429 spin_lock_irqsave(&id_priv->lock, flags); 430 old = id_priv->state; 431 id_priv->state = exch; 432 spin_unlock_irqrestore(&id_priv->lock, flags); 433 return old; 434 } 435 436 static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) 437 { 438 return hdr->ip_version >> 4; 439 } 440 441 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) 442 { 443 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); 444 } 445 446 static inline u8 sdp_get_majv(u8 sdp_version) 447 { 448 return sdp_version >> 4; 449 } 450 451 static inline u8 sdp_get_ip_ver(const struct sdp_hh *hh) 452 { 453 return hh->ipv_cap >> 4; 454 } 455 456 static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver) 457 { 458 hh->ipv_cap = (ip_ver << 4) | (hh->ipv_cap & 0xF); 459 } 460 461 static int cma_igmp_send(struct ifnet *ndev, const union ib_gid *mgid, bool join) 462 { 463 int retval; 464 465 if (ndev) { 466 union rdma_sockaddr addr; 467 468 rdma_gid2ip(&addr._sockaddr, mgid); 469 470 CURVNET_SET_QUIET(ndev->if_vnet); 471 if (join) 472 retval = -if_addmulti(ndev, &addr._sockaddr, NULL); 473 else 474 retval = -if_delmulti(ndev, &addr._sockaddr); 475 CURVNET_RESTORE(); 476 } else { 477 retval = -ENODEV; 478 } 479 return retval; 480 } 481 482 static void _cma_attach_to_dev(struct rdma_id_private *id_priv, 483 struct cma_device *cma_dev) 484 { 485 cma_ref_dev(cma_dev); 486 id_priv->cma_dev = cma_dev; 487 id_priv->gid_type = 0; 488 id_priv->id.device = cma_dev->device; 489 id_priv->id.route.addr.dev_addr.transport = 490 rdma_node_get_transport(cma_dev->device->node_type); 491 list_add_tail(&id_priv->list, &cma_dev->id_list); 492 } 493 494 static void cma_attach_to_dev(struct rdma_id_private *id_priv, 495 struct cma_device *cma_dev) 496 { 497 _cma_attach_to_dev(id_priv, cma_dev); 498 id_priv->gid_type = 499 cma_dev->default_gid_type[id_priv->id.port_num - 500 rdma_start_port(cma_dev->device)]; 501 } 502 503 void cma_deref_dev(struct cma_device *cma_dev) 504 { 505 if (atomic_dec_and_test(&cma_dev->refcount)) 506 complete(&cma_dev->comp); 507 } 508 509 static inline void release_mc(struct kref *kref) 510 { 511 struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); 512 513 kfree(mc->multicast.ib); 514 kfree(mc); 515 } 516 517 static void cma_release_dev(struct rdma_id_private *id_priv) 518 { 519 mutex_lock(&lock); 520 list_del(&id_priv->list); 521 cma_deref_dev(id_priv->cma_dev); 522 id_priv->cma_dev = NULL; 523 mutex_unlock(&lock); 524 } 525 526 static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) 527 { 528 return (struct sockaddr *) &id_priv->id.route.addr.src_addr; 529 } 530 531 static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) 532 { 533 return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; 534 } 535 536 static inline unsigned short cma_family(struct rdma_id_private *id_priv) 537 { 538 return id_priv->id.route.addr.src_addr.ss_family; 539 } 540 541 static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) 542 { 543 struct ib_sa_mcmember_rec rec; 544 int ret = 0; 545 546 if (id_priv->qkey) { 547 if (qkey && id_priv->qkey != qkey) 548 return -EINVAL; 549 return 0; 550 } 551 552 if (qkey) { 553 id_priv->qkey = qkey; 554 return 0; 555 } 556 557 switch (id_priv->id.ps) { 558 case RDMA_PS_UDP: 559 case RDMA_PS_IB: 560 id_priv->qkey = RDMA_UDP_QKEY; 561 break; 562 case RDMA_PS_IPOIB: 563 ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid); 564 ret = ib_sa_get_mcmember_rec(id_priv->id.device, 565 id_priv->id.port_num, &rec.mgid, 566 &rec); 567 if (!ret) 568 id_priv->qkey = be32_to_cpu(rec.qkey); 569 break; 570 default: 571 break; 572 } 573 return ret; 574 } 575 576 static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) 577 { 578 dev_addr->dev_type = ARPHRD_INFINIBAND; 579 rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr); 580 ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey)); 581 } 582 583 static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 584 { 585 int ret; 586 587 if (addr->sa_family != AF_IB) { 588 ret = rdma_translate_ip(addr, dev_addr); 589 } else { 590 cma_translate_ib((struct sockaddr_ib *) addr, dev_addr); 591 ret = 0; 592 } 593 594 return ret; 595 } 596 597 static inline int cma_validate_port(struct ib_device *device, u8 port, 598 enum ib_gid_type gid_type, 599 union ib_gid *gid, 600 const struct rdma_dev_addr *dev_addr) 601 { 602 const int dev_type = dev_addr->dev_type; 603 struct ifnet *ndev; 604 int ret = -ENODEV; 605 606 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) 607 return ret; 608 609 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) 610 return ret; 611 612 if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { 613 ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 614 } else { 615 ndev = NULL; 616 gid_type = IB_GID_TYPE_IB; 617 } 618 619 ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, 620 ndev, NULL); 621 622 if (ndev) 623 dev_put(ndev); 624 625 return ret; 626 } 627 628 static int cma_acquire_dev(struct rdma_id_private *id_priv, 629 struct rdma_id_private *listen_id_priv) 630 { 631 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 632 struct cma_device *cma_dev; 633 union ib_gid gid, iboe_gid, *gidp; 634 int ret = -ENODEV; 635 u8 port; 636 637 if (dev_addr->dev_type != ARPHRD_INFINIBAND && 638 id_priv->id.ps == RDMA_PS_IPOIB) 639 return -EINVAL; 640 641 mutex_lock(&lock); 642 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 643 &iboe_gid); 644 645 memcpy(&gid, dev_addr->src_dev_addr + 646 rdma_addr_gid_offset(dev_addr), sizeof gid); 647 648 if (listen_id_priv) { 649 cma_dev = listen_id_priv->cma_dev; 650 port = listen_id_priv->id.port_num; 651 652 if (rdma_is_port_valid(cma_dev->device, port)) { 653 gidp = rdma_protocol_roce(cma_dev->device, port) ? 654 &iboe_gid : &gid; 655 656 ret = cma_validate_port(cma_dev->device, port, 657 rdma_protocol_ib(cma_dev->device, port) ? 658 IB_GID_TYPE_IB : 659 listen_id_priv->gid_type, gidp, dev_addr); 660 if (!ret) { 661 id_priv->id.port_num = port; 662 goto out; 663 } 664 } 665 } 666 667 list_for_each_entry(cma_dev, &dev_list, list) { 668 for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { 669 if (listen_id_priv && 670 listen_id_priv->cma_dev == cma_dev && 671 listen_id_priv->id.port_num == port) 672 continue; 673 674 gidp = rdma_protocol_roce(cma_dev->device, port) ? 675 &iboe_gid : &gid; 676 677 ret = cma_validate_port(cma_dev->device, port, 678 rdma_protocol_ib(cma_dev->device, port) ? 679 IB_GID_TYPE_IB : 680 cma_dev->default_gid_type[port - 1], 681 gidp, dev_addr); 682 if (!ret) { 683 id_priv->id.port_num = port; 684 goto out; 685 } 686 } 687 } 688 689 out: 690 if (!ret) 691 cma_attach_to_dev(id_priv, cma_dev); 692 693 mutex_unlock(&lock); 694 return ret; 695 } 696 697 /* 698 * Select the source IB device and address to reach the destination IB address. 699 */ 700 static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) 701 { 702 struct cma_device *cma_dev, *cur_dev; 703 struct sockaddr_ib *addr; 704 union ib_gid gid, sgid, *dgid; 705 u16 pkey, index; 706 u8 p; 707 int i; 708 709 cma_dev = NULL; 710 addr = (struct sockaddr_ib *) cma_dst_addr(id_priv); 711 dgid = (union ib_gid *) &addr->sib_addr; 712 pkey = ntohs(addr->sib_pkey); 713 714 list_for_each_entry(cur_dev, &dev_list, list) { 715 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { 716 if (!rdma_cap_af_ib(cur_dev->device, p)) 717 continue; 718 719 if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) 720 continue; 721 722 for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, 723 &gid, NULL); 724 i++) { 725 if (!memcmp(&gid, dgid, sizeof(gid))) { 726 cma_dev = cur_dev; 727 sgid = gid; 728 id_priv->id.port_num = p; 729 goto found; 730 } 731 732 if (!cma_dev && (gid.global.subnet_prefix == 733 dgid->global.subnet_prefix)) { 734 cma_dev = cur_dev; 735 sgid = gid; 736 id_priv->id.port_num = p; 737 } 738 } 739 } 740 } 741 742 if (!cma_dev) 743 return -ENODEV; 744 745 found: 746 cma_attach_to_dev(id_priv, cma_dev); 747 addr = (struct sockaddr_ib *) cma_src_addr(id_priv); 748 memcpy(&addr->sib_addr, &sgid, sizeof sgid); 749 cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); 750 return 0; 751 } 752 753 static void cma_deref_id(struct rdma_id_private *id_priv) 754 { 755 if (atomic_dec_and_test(&id_priv->refcount)) 756 complete(&id_priv->comp); 757 } 758 759 struct rdma_cm_id *rdma_create_id(struct vnet *net, 760 rdma_cm_event_handler event_handler, 761 void *context, enum rdma_port_space ps, 762 enum ib_qp_type qp_type) 763 { 764 struct rdma_id_private *id_priv; 765 766 #ifdef VIMAGE 767 if (net == NULL) 768 return ERR_PTR(-EINVAL); 769 #endif 770 id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL); 771 if (!id_priv) 772 return ERR_PTR(-ENOMEM); 773 774 id_priv->owner = task_pid_nr(current); 775 id_priv->state = RDMA_CM_IDLE; 776 id_priv->id.context = context; 777 id_priv->id.event_handler = event_handler; 778 id_priv->id.ps = ps; 779 id_priv->id.qp_type = qp_type; 780 id_priv->timeout_set = false; 781 spin_lock_init(&id_priv->lock); 782 mutex_init(&id_priv->qp_mutex); 783 init_completion(&id_priv->comp); 784 atomic_set(&id_priv->refcount, 1); 785 mutex_init(&id_priv->handler_mutex); 786 INIT_LIST_HEAD(&id_priv->listen_list); 787 INIT_LIST_HEAD(&id_priv->mc_list); 788 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); 789 id_priv->id.route.addr.dev_addr.net = net; 790 791 return &id_priv->id; 792 } 793 EXPORT_SYMBOL(rdma_create_id); 794 795 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 796 { 797 struct ib_qp_attr qp_attr; 798 int qp_attr_mask, ret; 799 800 qp_attr.qp_state = IB_QPS_INIT; 801 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 802 if (ret) 803 return ret; 804 805 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 806 if (ret) 807 return ret; 808 809 qp_attr.qp_state = IB_QPS_RTR; 810 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 811 if (ret) 812 return ret; 813 814 qp_attr.qp_state = IB_QPS_RTS; 815 qp_attr.sq_psn = 0; 816 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); 817 818 return ret; 819 } 820 821 static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 822 { 823 struct ib_qp_attr qp_attr; 824 int qp_attr_mask, ret; 825 826 qp_attr.qp_state = IB_QPS_INIT; 827 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 828 if (ret) 829 return ret; 830 831 return ib_modify_qp(qp, &qp_attr, qp_attr_mask); 832 } 833 834 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, 835 struct ib_qp_init_attr *qp_init_attr) 836 { 837 struct rdma_id_private *id_priv; 838 struct ib_qp *qp; 839 int ret; 840 841 id_priv = container_of(id, struct rdma_id_private, id); 842 if (id->device != pd->device) 843 return -EINVAL; 844 845 qp_init_attr->port_num = id->port_num; 846 qp = ib_create_qp(pd, qp_init_attr); 847 if (IS_ERR(qp)) 848 return PTR_ERR(qp); 849 850 if (id->qp_type == IB_QPT_UD) 851 ret = cma_init_ud_qp(id_priv, qp); 852 else 853 ret = cma_init_conn_qp(id_priv, qp); 854 if (ret) 855 goto err; 856 857 id->qp = qp; 858 id_priv->qp_num = qp->qp_num; 859 id_priv->srq = (qp->srq != NULL); 860 return 0; 861 err: 862 ib_destroy_qp(qp); 863 return ret; 864 } 865 EXPORT_SYMBOL(rdma_create_qp); 866 867 void rdma_destroy_qp(struct rdma_cm_id *id) 868 { 869 struct rdma_id_private *id_priv; 870 871 id_priv = container_of(id, struct rdma_id_private, id); 872 mutex_lock(&id_priv->qp_mutex); 873 ib_destroy_qp(id_priv->id.qp); 874 id_priv->id.qp = NULL; 875 mutex_unlock(&id_priv->qp_mutex); 876 } 877 EXPORT_SYMBOL(rdma_destroy_qp); 878 879 static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, 880 struct rdma_conn_param *conn_param) 881 { 882 struct ib_qp_attr qp_attr; 883 int qp_attr_mask, ret; 884 union ib_gid sgid; 885 886 mutex_lock(&id_priv->qp_mutex); 887 if (!id_priv->id.qp) { 888 ret = 0; 889 goto out; 890 } 891 892 /* Need to update QP attributes from default values. */ 893 qp_attr.qp_state = IB_QPS_INIT; 894 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 895 if (ret) 896 goto out; 897 898 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 899 if (ret) 900 goto out; 901 902 qp_attr.qp_state = IB_QPS_RTR; 903 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 904 if (ret) 905 goto out; 906 907 ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, 908 qp_attr.ah_attr.grh.sgid_index, &sgid, NULL); 909 if (ret) 910 goto out; 911 912 BUG_ON(id_priv->cma_dev->device != id_priv->id.device); 913 914 if (conn_param) 915 qp_attr.max_dest_rd_atomic = conn_param->responder_resources; 916 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 917 out: 918 mutex_unlock(&id_priv->qp_mutex); 919 return ret; 920 } 921 922 static int cma_modify_qp_rts(struct rdma_id_private *id_priv, 923 struct rdma_conn_param *conn_param) 924 { 925 struct ib_qp_attr qp_attr; 926 int qp_attr_mask, ret; 927 928 mutex_lock(&id_priv->qp_mutex); 929 if (!id_priv->id.qp) { 930 ret = 0; 931 goto out; 932 } 933 934 qp_attr.qp_state = IB_QPS_RTS; 935 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 936 if (ret) 937 goto out; 938 939 if (conn_param) 940 qp_attr.max_rd_atomic = conn_param->initiator_depth; 941 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 942 out: 943 mutex_unlock(&id_priv->qp_mutex); 944 return ret; 945 } 946 947 static int cma_modify_qp_err(struct rdma_id_private *id_priv) 948 { 949 struct ib_qp_attr qp_attr; 950 int ret; 951 952 mutex_lock(&id_priv->qp_mutex); 953 if (!id_priv->id.qp) { 954 ret = 0; 955 goto out; 956 } 957 958 qp_attr.qp_state = IB_QPS_ERR; 959 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE); 960 out: 961 mutex_unlock(&id_priv->qp_mutex); 962 return ret; 963 } 964 965 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, 966 struct ib_qp_attr *qp_attr, int *qp_attr_mask) 967 { 968 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 969 int ret; 970 u16 pkey; 971 972 if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num)) 973 pkey = 0xffff; 974 else 975 pkey = ib_addr_get_pkey(dev_addr); 976 977 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, 978 pkey, &qp_attr->pkey_index); 979 if (ret) 980 return ret; 981 982 qp_attr->port_num = id_priv->id.port_num; 983 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; 984 985 if (id_priv->id.qp_type == IB_QPT_UD) { 986 ret = cma_set_qkey(id_priv, 0); 987 if (ret) 988 return ret; 989 990 qp_attr->qkey = id_priv->qkey; 991 *qp_attr_mask |= IB_QP_QKEY; 992 } else { 993 qp_attr->qp_access_flags = 0; 994 *qp_attr_mask |= IB_QP_ACCESS_FLAGS; 995 } 996 return 0; 997 } 998 999 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, 1000 int *qp_attr_mask) 1001 { 1002 struct rdma_id_private *id_priv; 1003 int ret = 0; 1004 1005 id_priv = container_of(id, struct rdma_id_private, id); 1006 if (rdma_cap_ib_cm(id->device, id->port_num)) { 1007 if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) 1008 ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); 1009 else 1010 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, 1011 qp_attr_mask); 1012 1013 if (qp_attr->qp_state == IB_QPS_RTR) 1014 qp_attr->rq_psn = id_priv->seq_num; 1015 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 1016 if (!id_priv->cm_id.iw) { 1017 qp_attr->qp_access_flags = 0; 1018 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 1019 } else 1020 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, 1021 qp_attr_mask); 1022 qp_attr->port_num = id_priv->id.port_num; 1023 *qp_attr_mask |= IB_QP_PORT; 1024 } else 1025 ret = -ENOSYS; 1026 1027 if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set) 1028 qp_attr->timeout = id_priv->timeout; 1029 1030 return ret; 1031 } 1032 EXPORT_SYMBOL(rdma_init_qp_attr); 1033 1034 static inline int cma_zero_addr(struct sockaddr *addr) 1035 { 1036 switch (addr->sa_family) { 1037 case AF_INET: 1038 return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); 1039 case AF_INET6: 1040 return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); 1041 case AF_IB: 1042 return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); 1043 default: 1044 return 0; 1045 } 1046 } 1047 1048 static inline int cma_loopback_addr(struct sockaddr *addr) 1049 { 1050 switch (addr->sa_family) { 1051 case AF_INET: 1052 return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); 1053 case AF_INET6: 1054 return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); 1055 case AF_IB: 1056 return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); 1057 default: 1058 return 0; 1059 } 1060 } 1061 1062 static inline int cma_any_addr(struct sockaddr *addr) 1063 { 1064 return cma_zero_addr(addr) || cma_loopback_addr(addr); 1065 } 1066 1067 static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) 1068 { 1069 if (src->sa_family != dst->sa_family) 1070 return -1; 1071 1072 switch (src->sa_family) { 1073 case AF_INET: 1074 return ((struct sockaddr_in *) src)->sin_addr.s_addr != 1075 ((struct sockaddr_in *) dst)->sin_addr.s_addr; 1076 case AF_INET6: 1077 return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, 1078 &((struct sockaddr_in6 *) dst)->sin6_addr); 1079 default: 1080 return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, 1081 &((struct sockaddr_ib *) dst)->sib_addr); 1082 } 1083 } 1084 1085 static __be16 cma_port(struct sockaddr *addr) 1086 { 1087 struct sockaddr_ib *sib; 1088 1089 switch (addr->sa_family) { 1090 case AF_INET: 1091 return ((struct sockaddr_in *) addr)->sin_port; 1092 case AF_INET6: 1093 return ((struct sockaddr_in6 *) addr)->sin6_port; 1094 case AF_IB: 1095 sib = (struct sockaddr_ib *) addr; 1096 return htons((u16) (be64_to_cpu(sib->sib_sid) & 1097 be64_to_cpu(sib->sib_sid_mask))); 1098 default: 1099 return 0; 1100 } 1101 } 1102 1103 static inline int cma_any_port(struct sockaddr *addr) 1104 { 1105 return !cma_port(addr); 1106 } 1107 1108 static void cma_save_ib_info(struct sockaddr *src_addr, 1109 struct sockaddr *dst_addr, 1110 struct rdma_cm_id *listen_id, 1111 struct ib_sa_path_rec *path) 1112 { 1113 struct sockaddr_ib *listen_ib, *ib; 1114 1115 listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; 1116 if (src_addr) { 1117 ib = (struct sockaddr_ib *)src_addr; 1118 ib->sib_family = AF_IB; 1119 if (path) { 1120 ib->sib_pkey = path->pkey; 1121 ib->sib_flowinfo = path->flow_label; 1122 memcpy(&ib->sib_addr, &path->sgid, 16); 1123 ib->sib_sid = path->service_id; 1124 ib->sib_scope_id = 0; 1125 } else { 1126 ib->sib_pkey = listen_ib->sib_pkey; 1127 ib->sib_flowinfo = listen_ib->sib_flowinfo; 1128 ib->sib_addr = listen_ib->sib_addr; 1129 ib->sib_sid = listen_ib->sib_sid; 1130 ib->sib_scope_id = listen_ib->sib_scope_id; 1131 } 1132 ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); 1133 } 1134 if (dst_addr) { 1135 ib = (struct sockaddr_ib *)dst_addr; 1136 ib->sib_family = AF_IB; 1137 if (path) { 1138 ib->sib_pkey = path->pkey; 1139 ib->sib_flowinfo = path->flow_label; 1140 memcpy(&ib->sib_addr, &path->dgid, 16); 1141 } 1142 } 1143 } 1144 1145 static void cma_save_ip4_info(struct sockaddr_in *src_addr, 1146 struct sockaddr_in *dst_addr, 1147 struct cma_hdr *hdr, 1148 __be16 local_port) 1149 { 1150 if (src_addr) { 1151 *src_addr = (struct sockaddr_in) { 1152 .sin_len = sizeof(struct sockaddr_in), 1153 .sin_family = AF_INET, 1154 .sin_addr.s_addr = hdr->dst_addr.ip4.addr, 1155 .sin_port = local_port, 1156 }; 1157 } 1158 1159 if (dst_addr) { 1160 *dst_addr = (struct sockaddr_in) { 1161 .sin_len = sizeof(struct sockaddr_in), 1162 .sin_family = AF_INET, 1163 .sin_addr.s_addr = hdr->src_addr.ip4.addr, 1164 .sin_port = hdr->port, 1165 }; 1166 } 1167 } 1168 1169 static void cma_ip6_clear_scope_id(struct in6_addr *addr) 1170 { 1171 /* make sure link local scope ID gets zeroed */ 1172 if (IN6_IS_SCOPE_LINKLOCAL(addr) || 1173 IN6_IS_ADDR_MC_INTFACELOCAL(addr)) { 1174 /* use byte-access to be alignment safe */ 1175 addr->s6_addr[2] = 0; 1176 addr->s6_addr[3] = 0; 1177 } 1178 } 1179 1180 static void cma_save_ip6_info(struct sockaddr_in6 *src_addr, 1181 struct sockaddr_in6 *dst_addr, 1182 struct cma_hdr *hdr, 1183 __be16 local_port) 1184 { 1185 if (src_addr) { 1186 *src_addr = (struct sockaddr_in6) { 1187 .sin6_len = sizeof(struct sockaddr_in6), 1188 .sin6_family = AF_INET6, 1189 .sin6_addr = hdr->dst_addr.ip6, 1190 .sin6_port = local_port, 1191 }; 1192 cma_ip6_clear_scope_id(&src_addr->sin6_addr); 1193 } 1194 1195 if (dst_addr) { 1196 *dst_addr = (struct sockaddr_in6) { 1197 .sin6_len = sizeof(struct sockaddr_in6), 1198 .sin6_family = AF_INET6, 1199 .sin6_addr = hdr->src_addr.ip6, 1200 .sin6_port = hdr->port, 1201 }; 1202 cma_ip6_clear_scope_id(&dst_addr->sin6_addr); 1203 } 1204 } 1205 1206 static u16 cma_port_from_service_id(__be64 service_id) 1207 { 1208 return (u16)be64_to_cpu(service_id); 1209 } 1210 1211 static int sdp_save_ip_info(struct sockaddr *src_addr, 1212 struct sockaddr *dst_addr, 1213 const struct sdp_hh *hdr, 1214 __be64 service_id) 1215 { 1216 __be16 local_port; 1217 1218 BUG_ON(src_addr == NULL || dst_addr == NULL); 1219 1220 if (sdp_get_majv(hdr->majv_minv) != SDP_MAJ_VERSION) 1221 return -EINVAL; 1222 1223 local_port = htons(cma_port_from_service_id(service_id)); 1224 1225 switch (sdp_get_ip_ver(hdr)) { 1226 case 4: { 1227 struct sockaddr_in *s4, *d4; 1228 1229 s4 = (void *)src_addr; 1230 d4 = (void *)dst_addr; 1231 1232 *s4 = (struct sockaddr_in) { 1233 .sin_len = sizeof(*s4), 1234 .sin_family = AF_INET, 1235 .sin_addr.s_addr = hdr->dst_addr.ip4.addr, 1236 .sin_port = local_port, 1237 }; 1238 *d4 = (struct sockaddr_in) { 1239 .sin_len = sizeof(*d4), 1240 .sin_family = AF_INET, 1241 .sin_addr.s_addr = hdr->src_addr.ip4.addr, 1242 .sin_port = hdr->port, 1243 }; 1244 break; 1245 } 1246 case 6: { 1247 struct sockaddr_in6 *s6, *d6; 1248 1249 s6 = (void *)src_addr; 1250 d6 = (void *)dst_addr; 1251 1252 *s6 = (struct sockaddr_in6) { 1253 .sin6_len = sizeof(*s6), 1254 .sin6_family = AF_INET6, 1255 .sin6_addr = hdr->dst_addr.ip6, 1256 .sin6_port = local_port, 1257 }; 1258 *d6 = (struct sockaddr_in6) { 1259 .sin6_len = sizeof(*d6), 1260 .sin6_family = AF_INET6, 1261 .sin6_addr = hdr->src_addr.ip6, 1262 .sin6_port = hdr->port, 1263 }; 1264 cma_ip6_clear_scope_id(&s6->sin6_addr); 1265 cma_ip6_clear_scope_id(&d6->sin6_addr); 1266 break; 1267 } 1268 default: 1269 return -EAFNOSUPPORT; 1270 } 1271 1272 return 0; 1273 } 1274 1275 static int cma_save_ip_info(struct sockaddr *src_addr, 1276 struct sockaddr *dst_addr, 1277 struct ib_cm_event *ib_event, 1278 __be64 service_id) 1279 { 1280 struct cma_hdr *hdr; 1281 __be16 port; 1282 1283 if (rdma_ps_from_service_id(service_id) == RDMA_PS_SDP) 1284 return sdp_save_ip_info(src_addr, dst_addr, 1285 ib_event->private_data, service_id); 1286 1287 hdr = ib_event->private_data; 1288 if (hdr->cma_version != CMA_VERSION) 1289 return -EINVAL; 1290 1291 port = htons(cma_port_from_service_id(service_id)); 1292 1293 switch (cma_get_ip_ver(hdr)) { 1294 case 4: 1295 cma_save_ip4_info((struct sockaddr_in *)src_addr, 1296 (struct sockaddr_in *)dst_addr, hdr, port); 1297 break; 1298 case 6: 1299 cma_save_ip6_info((struct sockaddr_in6 *)src_addr, 1300 (struct sockaddr_in6 *)dst_addr, hdr, port); 1301 break; 1302 default: 1303 return -EAFNOSUPPORT; 1304 } 1305 1306 return 0; 1307 } 1308 1309 static int cma_save_net_info(struct sockaddr *src_addr, 1310 struct sockaddr *dst_addr, 1311 struct rdma_cm_id *listen_id, 1312 struct ib_cm_event *ib_event, 1313 sa_family_t sa_family, __be64 service_id) 1314 { 1315 if (sa_family == AF_IB) { 1316 if (ib_event->event == IB_CM_REQ_RECEIVED) 1317 cma_save_ib_info(src_addr, dst_addr, listen_id, 1318 ib_event->param.req_rcvd.primary_path); 1319 else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) 1320 cma_save_ib_info(src_addr, dst_addr, listen_id, NULL); 1321 return 0; 1322 } 1323 1324 return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id); 1325 } 1326 1327 static int cma_save_req_info(const struct ib_cm_event *ib_event, 1328 struct cma_req_info *req) 1329 { 1330 const struct ib_cm_req_event_param *req_param = 1331 &ib_event->param.req_rcvd; 1332 const struct ib_cm_sidr_req_event_param *sidr_param = 1333 &ib_event->param.sidr_req_rcvd; 1334 1335 switch (ib_event->event) { 1336 case IB_CM_REQ_RECEIVED: 1337 req->device = req_param->listen_id->device; 1338 req->port = req_param->port; 1339 memcpy(&req->local_gid, &req_param->primary_path->sgid, 1340 sizeof(req->local_gid)); 1341 req->has_gid = true; 1342 req->service_id = req_param->primary_path->service_id; 1343 req->pkey = be16_to_cpu(req_param->primary_path->pkey); 1344 if (req->pkey != req_param->bth_pkey) 1345 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" 1346 "RDMA CMA: in the future this may cause the request to be dropped\n", 1347 req_param->bth_pkey, req->pkey); 1348 break; 1349 case IB_CM_SIDR_REQ_RECEIVED: 1350 req->device = sidr_param->listen_id->device; 1351 req->port = sidr_param->port; 1352 req->has_gid = false; 1353 req->service_id = sidr_param->service_id; 1354 req->pkey = sidr_param->pkey; 1355 if (req->pkey != sidr_param->bth_pkey) 1356 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n" 1357 "RDMA CMA: in the future this may cause the request to be dropped\n", 1358 sidr_param->bth_pkey, req->pkey); 1359 break; 1360 default: 1361 return -EINVAL; 1362 } 1363 1364 return 0; 1365 } 1366 1367 static bool validate_ipv4_net_dev(struct ifnet *net_dev, 1368 const struct sockaddr_in *dst_addr, 1369 const struct sockaddr_in *src_addr) 1370 { 1371 #ifdef INET 1372 __be32 daddr = dst_addr->sin_addr.s_addr, 1373 saddr = src_addr->sin_addr.s_addr; 1374 struct ifnet *dst_dev; 1375 struct nhop_object *nh; 1376 bool ret; 1377 1378 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || 1379 ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) || 1380 ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) || 1381 ipv4_is_loopback(saddr)) 1382 return false; 1383 1384 dst_dev = ip_ifp_find(net_dev->if_vnet, daddr); 1385 if (dst_dev != net_dev) { 1386 if (dst_dev != NULL) 1387 dev_put(dst_dev); 1388 return false; 1389 } 1390 dev_put(dst_dev); 1391 1392 /* 1393 * Check for loopback. 1394 */ 1395 if (saddr == daddr) 1396 return true; 1397 1398 CURVNET_SET(net_dev->if_vnet); 1399 nh = fib4_lookup(RT_DEFAULT_FIB, src_addr->sin_addr, 0, NHR_NONE, 0); 1400 if (nh != NULL) 1401 ret = (nh->nh_ifp == net_dev); 1402 else 1403 ret = false; 1404 CURVNET_RESTORE(); 1405 return ret; 1406 #else 1407 return false; 1408 #endif 1409 } 1410 1411 static bool validate_ipv6_net_dev(struct ifnet *net_dev, 1412 const struct sockaddr_in6 *dst_addr, 1413 const struct sockaddr_in6 *src_addr) 1414 { 1415 #ifdef INET6 1416 struct sockaddr_in6 src_tmp = *src_addr; 1417 struct sockaddr_in6 dst_tmp = *dst_addr; 1418 struct ifnet *dst_dev; 1419 struct nhop_object *nh; 1420 bool ret; 1421 1422 dst_dev = ip6_ifp_find(net_dev->if_vnet, dst_tmp.sin6_addr, 1423 net_dev->if_index); 1424 if (dst_dev != net_dev) { 1425 if (dst_dev != NULL) 1426 dev_put(dst_dev); 1427 return false; 1428 } 1429 dev_put(dst_dev); 1430 1431 CURVNET_SET(net_dev->if_vnet); 1432 1433 /* 1434 * Make sure the scope ID gets embedded. 1435 */ 1436 src_tmp.sin6_scope_id = net_dev->if_index; 1437 sa6_embedscope(&src_tmp, 0); 1438 1439 dst_tmp.sin6_scope_id = net_dev->if_index; 1440 sa6_embedscope(&dst_tmp, 0); 1441 1442 /* 1443 * Check for loopback after scope ID 1444 * has been embedded: 1445 */ 1446 if (memcmp(&src_tmp.sin6_addr, &dst_tmp.sin6_addr, 1447 sizeof(dst_tmp.sin6_addr)) == 0) { 1448 ret = true; 1449 } else { 1450 /* non-loopback case */ 1451 nh = fib6_lookup(RT_DEFAULT_FIB, &src_addr->sin6_addr, 1452 net_dev->if_index, NHR_NONE, 0); 1453 if (nh != NULL) 1454 ret = (nh->nh_ifp == net_dev); 1455 else 1456 ret = false; 1457 } 1458 CURVNET_RESTORE(); 1459 return ret; 1460 #else 1461 return false; 1462 #endif 1463 } 1464 1465 static bool validate_net_dev(struct ifnet *net_dev, 1466 const struct sockaddr *daddr, 1467 const struct sockaddr *saddr) 1468 { 1469 const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr; 1470 const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr; 1471 const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr; 1472 const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr; 1473 1474 switch (daddr->sa_family) { 1475 case AF_INET: 1476 return saddr->sa_family == AF_INET && 1477 validate_ipv4_net_dev(net_dev, daddr4, saddr4); 1478 1479 case AF_INET6: 1480 return saddr->sa_family == AF_INET6 && 1481 validate_ipv6_net_dev(net_dev, daddr6, saddr6); 1482 1483 default: 1484 return false; 1485 } 1486 } 1487 1488 static struct ifnet * 1489 roce_get_net_dev_by_cm_event(struct ib_device *device, u8 port_num, 1490 const struct ib_cm_event *ib_event) 1491 { 1492 struct ib_gid_attr sgid_attr; 1493 union ib_gid sgid; 1494 int err = -EINVAL; 1495 1496 if (ib_event->event == IB_CM_REQ_RECEIVED) { 1497 err = ib_get_cached_gid(device, port_num, 1498 ib_event->param.req_rcvd.ppath_sgid_index, &sgid, &sgid_attr); 1499 } else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { 1500 err = ib_get_cached_gid(device, port_num, 1501 ib_event->param.sidr_req_rcvd.sgid_index, &sgid, &sgid_attr); 1502 } 1503 if (err) 1504 return (NULL); 1505 return (sgid_attr.ndev); 1506 } 1507 1508 static struct ifnet *cma_get_net_dev(struct ib_cm_event *ib_event, 1509 const struct cma_req_info *req) 1510 { 1511 struct sockaddr_storage listen_addr_storage, src_addr_storage; 1512 struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage, 1513 *src_addr = (struct sockaddr *)&src_addr_storage; 1514 struct ifnet *net_dev; 1515 const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL; 1516 struct epoch_tracker et; 1517 int err; 1518 1519 err = cma_save_ip_info(listen_addr, src_addr, ib_event, 1520 req->service_id); 1521 if (err) 1522 return ERR_PTR(err); 1523 1524 if (rdma_protocol_roce(req->device, req->port)) { 1525 net_dev = roce_get_net_dev_by_cm_event(req->device, req->port, 1526 ib_event); 1527 } else { 1528 net_dev = ib_get_net_dev_by_params(req->device, req->port, 1529 req->pkey, 1530 gid, listen_addr); 1531 } 1532 if (!net_dev) 1533 return ERR_PTR(-ENODEV); 1534 1535 NET_EPOCH_ENTER(et); 1536 if (!validate_net_dev(net_dev, listen_addr, src_addr)) { 1537 NET_EPOCH_EXIT(et); 1538 dev_put(net_dev); 1539 return ERR_PTR(-EHOSTUNREACH); 1540 } 1541 NET_EPOCH_EXIT(et); 1542 1543 return net_dev; 1544 } 1545 1546 static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id) 1547 { 1548 return (be64_to_cpu(service_id) >> 16) & 0xffff; 1549 } 1550 1551 static bool sdp_match_private_data(struct rdma_id_private *id_priv, 1552 const struct sdp_hh *hdr, 1553 struct sockaddr *addr) 1554 { 1555 __be32 ip4_addr; 1556 struct in6_addr ip6_addr; 1557 1558 switch (addr->sa_family) { 1559 case AF_INET: 1560 ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr; 1561 if (sdp_get_ip_ver(hdr) != 4) 1562 return false; 1563 if (!cma_any_addr(addr) && 1564 hdr->dst_addr.ip4.addr != ip4_addr) 1565 return false; 1566 break; 1567 case AF_INET6: 1568 ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr; 1569 if (sdp_get_ip_ver(hdr) != 6) 1570 return false; 1571 cma_ip6_clear_scope_id(&ip6_addr); 1572 if (!cma_any_addr(addr) && 1573 memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr))) 1574 return false; 1575 break; 1576 case AF_IB: 1577 return true; 1578 default: 1579 return false; 1580 } 1581 1582 return true; 1583 } 1584 1585 static bool cma_match_private_data(struct rdma_id_private *id_priv, 1586 const void *vhdr) 1587 { 1588 const struct cma_hdr *hdr = vhdr; 1589 struct sockaddr *addr = cma_src_addr(id_priv); 1590 __be32 ip4_addr; 1591 struct in6_addr ip6_addr; 1592 1593 if (cma_any_addr(addr) && !id_priv->afonly) 1594 return true; 1595 1596 if (id_priv->id.ps == RDMA_PS_SDP) 1597 return sdp_match_private_data(id_priv, vhdr, addr); 1598 1599 switch (addr->sa_family) { 1600 case AF_INET: 1601 ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr; 1602 if (cma_get_ip_ver(hdr) != 4) 1603 return false; 1604 if (!cma_any_addr(addr) && 1605 hdr->dst_addr.ip4.addr != ip4_addr) 1606 return false; 1607 break; 1608 case AF_INET6: 1609 ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr; 1610 if (cma_get_ip_ver(hdr) != 6) 1611 return false; 1612 cma_ip6_clear_scope_id(&ip6_addr); 1613 if (!cma_any_addr(addr) && 1614 memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr))) 1615 return false; 1616 break; 1617 case AF_IB: 1618 return true; 1619 default: 1620 return false; 1621 } 1622 1623 return true; 1624 } 1625 1626 static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num) 1627 { 1628 enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num); 1629 enum rdma_transport_type transport = 1630 rdma_node_get_transport(device->node_type); 1631 1632 return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB; 1633 } 1634 1635 static bool cma_protocol_roce(const struct rdma_cm_id *id) 1636 { 1637 struct ib_device *device = id->device; 1638 const int port_num = id->port_num ?: rdma_start_port(device); 1639 1640 return cma_protocol_roce_dev_port(device, port_num); 1641 } 1642 1643 static bool cma_match_net_dev(const struct rdma_cm_id *id, 1644 const struct ifnet *net_dev, 1645 u8 port_num) 1646 { 1647 const struct rdma_addr *addr = &id->route.addr; 1648 1649 if (!net_dev) { 1650 if (id->port_num && id->port_num != port_num) 1651 return false; 1652 1653 if (id->ps == RDMA_PS_SDP) { 1654 if (addr->src_addr.ss_family == AF_INET || 1655 addr->src_addr.ss_family == AF_INET6) 1656 return true; 1657 return false; 1658 } 1659 /* This request is an AF_IB request or a RoCE request */ 1660 return addr->src_addr.ss_family == AF_IB || 1661 cma_protocol_roce_dev_port(id->device, port_num); 1662 } 1663 1664 return !addr->dev_addr.bound_dev_if || 1665 (net_eq(dev_net(net_dev), addr->dev_addr.net) && 1666 addr->dev_addr.bound_dev_if == net_dev->if_index); 1667 } 1668 1669 static struct rdma_id_private *cma_find_listener( 1670 const struct rdma_bind_list *bind_list, 1671 const struct ib_cm_id *cm_id, 1672 const struct ib_cm_event *ib_event, 1673 const struct cma_req_info *req, 1674 const struct ifnet *net_dev) 1675 { 1676 struct rdma_id_private *id_priv, *id_priv_dev; 1677 1678 if (!bind_list) 1679 return ERR_PTR(-EINVAL); 1680 1681 hlist_for_each_entry(id_priv, &bind_list->owners, node) { 1682 if (cma_match_private_data(id_priv, ib_event->private_data)) { 1683 if (id_priv->id.device == cm_id->device && 1684 cma_match_net_dev(&id_priv->id, net_dev, req->port)) 1685 return id_priv; 1686 list_for_each_entry(id_priv_dev, 1687 &id_priv->listen_list, 1688 listen_list) { 1689 if (id_priv_dev->id.device == cm_id->device && 1690 cma_match_net_dev(&id_priv_dev->id, net_dev, req->port)) 1691 return id_priv_dev; 1692 } 1693 } 1694 } 1695 1696 return ERR_PTR(-EINVAL); 1697 } 1698 1699 static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, 1700 struct ib_cm_event *ib_event, 1701 struct ifnet **net_dev) 1702 { 1703 struct cma_req_info req; 1704 struct rdma_bind_list *bind_list; 1705 struct rdma_id_private *id_priv; 1706 int err; 1707 1708 err = cma_save_req_info(ib_event, &req); 1709 if (err) 1710 return ERR_PTR(err); 1711 1712 if (rdma_ps_from_service_id(cm_id->service_id) == RDMA_PS_SDP) { 1713 *net_dev = NULL; 1714 goto there_is_no_net_dev; 1715 } 1716 1717 *net_dev = cma_get_net_dev(ib_event, &req); 1718 if (IS_ERR(*net_dev)) { 1719 if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { 1720 /* Assuming the protocol is AF_IB */ 1721 *net_dev = NULL; 1722 } else { 1723 return ERR_CAST(*net_dev); 1724 } 1725 } 1726 1727 there_is_no_net_dev: 1728 bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, 1729 rdma_ps_from_service_id(req.service_id), 1730 cma_port_from_service_id(req.service_id)); 1731 id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); 1732 if (IS_ERR(id_priv) && *net_dev) { 1733 dev_put(*net_dev); 1734 *net_dev = NULL; 1735 } 1736 1737 return id_priv; 1738 } 1739 1740 static inline int cma_user_data_offset(struct rdma_id_private *id_priv) 1741 { 1742 if (cma_family(id_priv) == AF_IB) 1743 return 0; 1744 if (id_priv->id.ps == RDMA_PS_SDP) 1745 return 0; 1746 return sizeof(struct cma_hdr); 1747 } 1748 1749 static void cma_cancel_route(struct rdma_id_private *id_priv) 1750 { 1751 if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) { 1752 if (id_priv->query) 1753 ib_sa_cancel_query(id_priv->query_id, id_priv->query); 1754 } 1755 } 1756 1757 static void cma_cancel_listens(struct rdma_id_private *id_priv) 1758 { 1759 struct rdma_id_private *dev_id_priv; 1760 1761 /* 1762 * Remove from listen_any_list to prevent added devices from spawning 1763 * additional listen requests. 1764 */ 1765 mutex_lock(&lock); 1766 list_del(&id_priv->list); 1767 1768 while (!list_empty(&id_priv->listen_list)) { 1769 dev_id_priv = list_entry(id_priv->listen_list.next, 1770 struct rdma_id_private, listen_list); 1771 /* sync with device removal to avoid duplicate destruction */ 1772 list_del_init(&dev_id_priv->list); 1773 list_del(&dev_id_priv->listen_list); 1774 mutex_unlock(&lock); 1775 1776 rdma_destroy_id(&dev_id_priv->id); 1777 mutex_lock(&lock); 1778 } 1779 mutex_unlock(&lock); 1780 } 1781 1782 static void cma_cancel_operation(struct rdma_id_private *id_priv, 1783 enum rdma_cm_state state) 1784 { 1785 switch (state) { 1786 case RDMA_CM_ADDR_QUERY: 1787 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); 1788 break; 1789 case RDMA_CM_ROUTE_QUERY: 1790 cma_cancel_route(id_priv); 1791 break; 1792 case RDMA_CM_LISTEN: 1793 if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev) 1794 cma_cancel_listens(id_priv); 1795 break; 1796 default: 1797 break; 1798 } 1799 } 1800 1801 static void cma_release_port(struct rdma_id_private *id_priv) 1802 { 1803 struct rdma_bind_list *bind_list = id_priv->bind_list; 1804 struct vnet *net = id_priv->id.route.addr.dev_addr.net; 1805 1806 if (!bind_list) 1807 return; 1808 1809 mutex_lock(&lock); 1810 hlist_del(&id_priv->node); 1811 if (hlist_empty(&bind_list->owners)) { 1812 cma_ps_remove(net, bind_list->ps, bind_list->port); 1813 kfree(bind_list); 1814 } 1815 mutex_unlock(&lock); 1816 } 1817 1818 static void cma_leave_mc_groups(struct rdma_id_private *id_priv) 1819 { 1820 struct cma_multicast *mc; 1821 1822 while (!list_empty(&id_priv->mc_list)) { 1823 mc = container_of(id_priv->mc_list.next, 1824 struct cma_multicast, list); 1825 list_del(&mc->list); 1826 if (rdma_cap_ib_mcast(id_priv->cma_dev->device, 1827 id_priv->id.port_num)) { 1828 ib_sa_free_multicast(mc->multicast.ib); 1829 kfree(mc); 1830 } else { 1831 if (mc->igmp_joined) { 1832 struct rdma_dev_addr *dev_addr = 1833 &id_priv->id.route.addr.dev_addr; 1834 struct ifnet *ndev = NULL; 1835 1836 if (dev_addr->bound_dev_if) 1837 ndev = dev_get_by_index(dev_addr->net, 1838 dev_addr->bound_dev_if); 1839 if (ndev) { 1840 cma_igmp_send(ndev, 1841 &mc->multicast.ib->rec.mgid, 1842 false); 1843 dev_put(ndev); 1844 } 1845 } 1846 kref_put(&mc->mcref, release_mc); 1847 } 1848 } 1849 } 1850 1851 void rdma_destroy_id(struct rdma_cm_id *id) 1852 { 1853 struct rdma_id_private *id_priv; 1854 enum rdma_cm_state state; 1855 1856 id_priv = container_of(id, struct rdma_id_private, id); 1857 state = cma_exch(id_priv, RDMA_CM_DESTROYING); 1858 cma_cancel_operation(id_priv, state); 1859 1860 /* 1861 * Wait for any active callback to finish. New callbacks will find 1862 * the id_priv state set to destroying and abort. 1863 */ 1864 mutex_lock(&id_priv->handler_mutex); 1865 mutex_unlock(&id_priv->handler_mutex); 1866 1867 if (id_priv->cma_dev) { 1868 if (rdma_cap_ib_cm(id_priv->id.device, 1)) { 1869 if (id_priv->cm_id.ib) 1870 ib_destroy_cm_id(id_priv->cm_id.ib); 1871 } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) { 1872 if (id_priv->cm_id.iw) 1873 iw_destroy_cm_id(id_priv->cm_id.iw); 1874 } 1875 cma_leave_mc_groups(id_priv); 1876 cma_release_dev(id_priv); 1877 } 1878 1879 cma_release_port(id_priv); 1880 cma_deref_id(id_priv); 1881 wait_for_completion(&id_priv->comp); 1882 1883 if (id_priv->internal_id) 1884 cma_deref_id(id_priv->id.context); 1885 1886 kfree(id_priv->id.route.path_rec); 1887 kfree(id_priv); 1888 } 1889 EXPORT_SYMBOL(rdma_destroy_id); 1890 1891 static int cma_rep_recv(struct rdma_id_private *id_priv) 1892 { 1893 int ret; 1894 1895 ret = cma_modify_qp_rtr(id_priv, NULL); 1896 if (ret) 1897 goto reject; 1898 1899 ret = cma_modify_qp_rts(id_priv, NULL); 1900 if (ret) 1901 goto reject; 1902 1903 ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); 1904 if (ret) 1905 goto reject; 1906 1907 return 0; 1908 reject: 1909 cma_modify_qp_err(id_priv); 1910 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, 1911 NULL, 0, NULL, 0); 1912 return ret; 1913 } 1914 1915 static int sdp_verify_rep(const struct sdp_hah *data) 1916 { 1917 if (sdp_get_majv(data->majv_minv) != SDP_MAJ_VERSION) 1918 return -EINVAL; 1919 return 0; 1920 } 1921 1922 static void cma_set_rep_event_data(struct rdma_cm_event *event, 1923 struct ib_cm_rep_event_param *rep_data, 1924 void *private_data) 1925 { 1926 event->param.conn.private_data = private_data; 1927 event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE; 1928 event->param.conn.responder_resources = rep_data->responder_resources; 1929 event->param.conn.initiator_depth = rep_data->initiator_depth; 1930 event->param.conn.flow_control = rep_data->flow_control; 1931 event->param.conn.rnr_retry_count = rep_data->rnr_retry_count; 1932 event->param.conn.srq = rep_data->srq; 1933 event->param.conn.qp_num = rep_data->remote_qpn; 1934 } 1935 1936 static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 1937 { 1938 struct rdma_id_private *id_priv = cm_id->context; 1939 struct rdma_cm_event event; 1940 int ret = 0; 1941 1942 mutex_lock(&id_priv->handler_mutex); 1943 if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && 1944 id_priv->state != RDMA_CM_CONNECT) || 1945 (ib_event->event == IB_CM_TIMEWAIT_EXIT && 1946 id_priv->state != RDMA_CM_DISCONNECT)) 1947 goto out; 1948 1949 memset(&event, 0, sizeof event); 1950 switch (ib_event->event) { 1951 case IB_CM_REQ_ERROR: 1952 case IB_CM_REP_ERROR: 1953 event.event = RDMA_CM_EVENT_UNREACHABLE; 1954 event.status = -ETIMEDOUT; 1955 break; 1956 case IB_CM_REP_RECEIVED: 1957 if (id_priv->id.ps == RDMA_PS_SDP) { 1958 event.status = sdp_verify_rep(ib_event->private_data); 1959 if (event.status) 1960 event.event = RDMA_CM_EVENT_CONNECT_ERROR; 1961 else 1962 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; 1963 } else { 1964 if (id_priv->id.qp) { 1965 event.status = cma_rep_recv(id_priv); 1966 event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : 1967 RDMA_CM_EVENT_ESTABLISHED; 1968 } else { 1969 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; 1970 } 1971 } 1972 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, 1973 ib_event->private_data); 1974 break; 1975 case IB_CM_RTU_RECEIVED: 1976 case IB_CM_USER_ESTABLISHED: 1977 event.event = RDMA_CM_EVENT_ESTABLISHED; 1978 break; 1979 case IB_CM_DREQ_ERROR: 1980 event.status = -ETIMEDOUT; /* fall through */ 1981 case IB_CM_DREQ_RECEIVED: 1982 case IB_CM_DREP_RECEIVED: 1983 if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT, 1984 RDMA_CM_DISCONNECT)) 1985 goto out; 1986 event.event = RDMA_CM_EVENT_DISCONNECTED; 1987 break; 1988 case IB_CM_TIMEWAIT_EXIT: 1989 event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT; 1990 break; 1991 case IB_CM_MRA_RECEIVED: 1992 /* ignore event */ 1993 goto out; 1994 case IB_CM_REJ_RECEIVED: 1995 cma_modify_qp_err(id_priv); 1996 event.status = ib_event->param.rej_rcvd.reason; 1997 event.event = RDMA_CM_EVENT_REJECTED; 1998 event.param.conn.private_data = ib_event->private_data; 1999 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; 2000 break; 2001 default: 2002 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 2003 ib_event->event); 2004 goto out; 2005 } 2006 2007 ret = id_priv->id.event_handler(&id_priv->id, &event); 2008 if (ret) { 2009 /* Destroy the CM ID by returning a non-zero value. */ 2010 id_priv->cm_id.ib = NULL; 2011 cma_exch(id_priv, RDMA_CM_DESTROYING); 2012 mutex_unlock(&id_priv->handler_mutex); 2013 rdma_destroy_id(&id_priv->id); 2014 return ret; 2015 } 2016 out: 2017 mutex_unlock(&id_priv->handler_mutex); 2018 return ret; 2019 } 2020 2021 static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, 2022 struct ib_cm_event *ib_event, 2023 struct ifnet *net_dev) 2024 { 2025 struct rdma_id_private *id_priv; 2026 struct rdma_cm_id *id; 2027 struct rdma_route *rt; 2028 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 2029 const __be64 service_id = 2030 ib_event->param.req_rcvd.primary_path->service_id; 2031 int ret; 2032 2033 id = rdma_create_id(listen_id->route.addr.dev_addr.net, 2034 listen_id->event_handler, listen_id->context, 2035 listen_id->ps, ib_event->param.req_rcvd.qp_type); 2036 if (IS_ERR(id)) 2037 return NULL; 2038 2039 id_priv = container_of(id, struct rdma_id_private, id); 2040 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 2041 (struct sockaddr *)&id->route.addr.dst_addr, 2042 listen_id, ib_event, ss_family, service_id)) 2043 goto err; 2044 2045 rt = &id->route; 2046 rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; 2047 rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, 2048 GFP_KERNEL); 2049 if (!rt->path_rec) 2050 goto err; 2051 2052 rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path; 2053 if (rt->num_paths == 2) 2054 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; 2055 2056 if (net_dev) { 2057 ret = rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); 2058 if (ret) 2059 goto err; 2060 } else { 2061 if (!cma_protocol_roce(listen_id) && 2062 cma_any_addr(cma_src_addr(id_priv))) { 2063 rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; 2064 rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); 2065 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); 2066 } else if (!cma_any_addr(cma_src_addr(id_priv))) { 2067 ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr); 2068 if (ret) 2069 goto err; 2070 } 2071 } 2072 rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); 2073 2074 id_priv->state = RDMA_CM_CONNECT; 2075 return id_priv; 2076 2077 err: 2078 rdma_destroy_id(id); 2079 return NULL; 2080 } 2081 2082 static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, 2083 struct ib_cm_event *ib_event, 2084 struct ifnet *net_dev) 2085 { 2086 struct rdma_id_private *id_priv; 2087 struct rdma_cm_id *id; 2088 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 2089 struct vnet *net = listen_id->route.addr.dev_addr.net; 2090 int ret; 2091 2092 id = rdma_create_id(net, listen_id->event_handler, listen_id->context, 2093 listen_id->ps, IB_QPT_UD); 2094 if (IS_ERR(id)) 2095 return NULL; 2096 2097 id_priv = container_of(id, struct rdma_id_private, id); 2098 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 2099 (struct sockaddr *)&id->route.addr.dst_addr, 2100 listen_id, ib_event, ss_family, 2101 ib_event->param.sidr_req_rcvd.service_id)) 2102 goto err; 2103 2104 if (net_dev) { 2105 ret = rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); 2106 if (ret) 2107 goto err; 2108 } else { 2109 if (!cma_any_addr(cma_src_addr(id_priv))) { 2110 ret = cma_translate_addr(cma_src_addr(id_priv), 2111 &id->route.addr.dev_addr); 2112 if (ret) 2113 goto err; 2114 } 2115 } 2116 2117 id_priv->state = RDMA_CM_CONNECT; 2118 return id_priv; 2119 err: 2120 rdma_destroy_id(id); 2121 return NULL; 2122 } 2123 2124 static void cma_set_req_event_data(struct rdma_cm_event *event, 2125 struct ib_cm_req_event_param *req_data, 2126 void *private_data, int offset) 2127 { 2128 event->param.conn.private_data = (char *)private_data + offset; 2129 event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset; 2130 event->param.conn.responder_resources = req_data->responder_resources; 2131 event->param.conn.initiator_depth = req_data->initiator_depth; 2132 event->param.conn.flow_control = req_data->flow_control; 2133 event->param.conn.retry_count = req_data->retry_count; 2134 event->param.conn.rnr_retry_count = req_data->rnr_retry_count; 2135 event->param.conn.srq = req_data->srq; 2136 event->param.conn.qp_num = req_data->remote_qpn; 2137 } 2138 2139 static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) 2140 { 2141 return (((ib_event->event == IB_CM_REQ_RECEIVED) && 2142 (ib_event->param.req_rcvd.qp_type == id->qp_type)) || 2143 ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && 2144 (id->qp_type == IB_QPT_UD)) || 2145 (!id->qp_type)); 2146 } 2147 2148 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 2149 { 2150 struct rdma_id_private *listen_id, *conn_id = NULL; 2151 struct rdma_cm_event event; 2152 struct ifnet *net_dev; 2153 int offset, ret; 2154 2155 listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); 2156 if (IS_ERR(listen_id)) 2157 return PTR_ERR(listen_id); 2158 2159 if (!cma_check_req_qp_type(&listen_id->id, ib_event)) { 2160 ret = -EINVAL; 2161 goto net_dev_put; 2162 } 2163 2164 mutex_lock(&listen_id->handler_mutex); 2165 if (listen_id->state != RDMA_CM_LISTEN) { 2166 ret = -ECONNABORTED; 2167 goto err1; 2168 } 2169 2170 memset(&event, 0, sizeof event); 2171 offset = cma_user_data_offset(listen_id); 2172 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 2173 if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { 2174 conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev); 2175 event.param.ud.private_data = (char *)ib_event->private_data + offset; 2176 event.param.ud.private_data_len = 2177 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; 2178 } else { 2179 conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev); 2180 cma_set_req_event_data(&event, &ib_event->param.req_rcvd, 2181 ib_event->private_data, offset); 2182 } 2183 if (!conn_id) { 2184 ret = -ENOMEM; 2185 goto err1; 2186 } 2187 2188 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 2189 ret = cma_acquire_dev(conn_id, listen_id); 2190 if (ret) 2191 goto err2; 2192 2193 conn_id->cm_id.ib = cm_id; 2194 cm_id->context = conn_id; 2195 cm_id->cm_handler = cma_ib_handler; 2196 2197 /* 2198 * Protect against the user destroying conn_id from another thread 2199 * until we're done accessing it. 2200 */ 2201 atomic_inc(&conn_id->refcount); 2202 ret = conn_id->id.event_handler(&conn_id->id, &event); 2203 if (ret) 2204 goto err3; 2205 /* 2206 * Acquire mutex to prevent user executing rdma_destroy_id() 2207 * while we're accessing the cm_id. 2208 */ 2209 mutex_lock(&lock); 2210 if (cma_comp(conn_id, RDMA_CM_CONNECT) && 2211 (conn_id->id.qp_type != IB_QPT_UD)) 2212 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 2213 mutex_unlock(&lock); 2214 mutex_unlock(&conn_id->handler_mutex); 2215 mutex_unlock(&listen_id->handler_mutex); 2216 cma_deref_id(conn_id); 2217 if (net_dev) 2218 dev_put(net_dev); 2219 return 0; 2220 2221 err3: 2222 cma_deref_id(conn_id); 2223 /* Destroy the CM ID by returning a non-zero value. */ 2224 conn_id->cm_id.ib = NULL; 2225 err2: 2226 cma_exch(conn_id, RDMA_CM_DESTROYING); 2227 mutex_unlock(&conn_id->handler_mutex); 2228 err1: 2229 mutex_unlock(&listen_id->handler_mutex); 2230 if (conn_id) 2231 rdma_destroy_id(&conn_id->id); 2232 2233 net_dev_put: 2234 if (net_dev) 2235 dev_put(net_dev); 2236 2237 return ret; 2238 } 2239 2240 __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr) 2241 { 2242 if (addr->sa_family == AF_IB) 2243 return ((struct sockaddr_ib *) addr)->sib_sid; 2244 2245 return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr))); 2246 } 2247 EXPORT_SYMBOL(rdma_get_service_id); 2248 2249 static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) 2250 { 2251 struct rdma_id_private *id_priv = iw_id->context; 2252 struct rdma_cm_event event; 2253 int ret = 0; 2254 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2255 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2256 2257 mutex_lock(&id_priv->handler_mutex); 2258 if (id_priv->state != RDMA_CM_CONNECT) 2259 goto out; 2260 2261 memset(&event, 0, sizeof event); 2262 switch (iw_event->event) { 2263 case IW_CM_EVENT_CLOSE: 2264 event.event = RDMA_CM_EVENT_DISCONNECTED; 2265 break; 2266 case IW_CM_EVENT_CONNECT_REPLY: 2267 memcpy(cma_src_addr(id_priv), laddr, 2268 rdma_addr_size(laddr)); 2269 memcpy(cma_dst_addr(id_priv), raddr, 2270 rdma_addr_size(raddr)); 2271 switch (iw_event->status) { 2272 case 0: 2273 event.event = RDMA_CM_EVENT_ESTABLISHED; 2274 event.param.conn.initiator_depth = iw_event->ird; 2275 event.param.conn.responder_resources = iw_event->ord; 2276 break; 2277 case -ECONNRESET: 2278 case -ECONNREFUSED: 2279 event.event = RDMA_CM_EVENT_REJECTED; 2280 break; 2281 case -ETIMEDOUT: 2282 event.event = RDMA_CM_EVENT_UNREACHABLE; 2283 break; 2284 default: 2285 event.event = RDMA_CM_EVENT_CONNECT_ERROR; 2286 break; 2287 } 2288 break; 2289 case IW_CM_EVENT_ESTABLISHED: 2290 event.event = RDMA_CM_EVENT_ESTABLISHED; 2291 event.param.conn.initiator_depth = iw_event->ird; 2292 event.param.conn.responder_resources = iw_event->ord; 2293 break; 2294 default: 2295 BUG_ON(1); 2296 } 2297 2298 event.status = iw_event->status; 2299 event.param.conn.private_data = iw_event->private_data; 2300 event.param.conn.private_data_len = iw_event->private_data_len; 2301 ret = id_priv->id.event_handler(&id_priv->id, &event); 2302 if (ret) { 2303 /* Destroy the CM ID by returning a non-zero value. */ 2304 id_priv->cm_id.iw = NULL; 2305 cma_exch(id_priv, RDMA_CM_DESTROYING); 2306 mutex_unlock(&id_priv->handler_mutex); 2307 rdma_destroy_id(&id_priv->id); 2308 return ret; 2309 } 2310 2311 out: 2312 mutex_unlock(&id_priv->handler_mutex); 2313 return ret; 2314 } 2315 2316 static int iw_conn_req_handler(struct iw_cm_id *cm_id, 2317 struct iw_cm_event *iw_event) 2318 { 2319 struct rdma_cm_id *new_cm_id; 2320 struct rdma_id_private *listen_id, *conn_id; 2321 struct rdma_cm_event event; 2322 int ret = -ECONNABORTED; 2323 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2324 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2325 2326 listen_id = cm_id->context; 2327 2328 mutex_lock(&listen_id->handler_mutex); 2329 if (listen_id->state != RDMA_CM_LISTEN) 2330 goto out; 2331 2332 /* Create a new RDMA id for the new IW CM ID */ 2333 new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, 2334 listen_id->id.event_handler, 2335 listen_id->id.context, 2336 RDMA_PS_TCP, IB_QPT_RC); 2337 if (IS_ERR(new_cm_id)) { 2338 ret = -ENOMEM; 2339 goto out; 2340 } 2341 conn_id = container_of(new_cm_id, struct rdma_id_private, id); 2342 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 2343 conn_id->state = RDMA_CM_CONNECT; 2344 2345 ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr); 2346 if (ret) { 2347 mutex_unlock(&conn_id->handler_mutex); 2348 rdma_destroy_id(new_cm_id); 2349 goto out; 2350 } 2351 2352 ret = cma_acquire_dev(conn_id, listen_id); 2353 if (ret) { 2354 mutex_unlock(&conn_id->handler_mutex); 2355 rdma_destroy_id(new_cm_id); 2356 goto out; 2357 } 2358 2359 conn_id->cm_id.iw = cm_id; 2360 cm_id->context = conn_id; 2361 cm_id->cm_handler = cma_iw_handler; 2362 2363 memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); 2364 memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); 2365 2366 memset(&event, 0, sizeof event); 2367 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 2368 event.param.conn.private_data = iw_event->private_data; 2369 event.param.conn.private_data_len = iw_event->private_data_len; 2370 event.param.conn.initiator_depth = iw_event->ird; 2371 event.param.conn.responder_resources = iw_event->ord; 2372 2373 /* 2374 * Protect against the user destroying conn_id from another thread 2375 * until we're done accessing it. 2376 */ 2377 atomic_inc(&conn_id->refcount); 2378 ret = conn_id->id.event_handler(&conn_id->id, &event); 2379 if (ret) { 2380 /* User wants to destroy the CM ID */ 2381 conn_id->cm_id.iw = NULL; 2382 cma_exch(conn_id, RDMA_CM_DESTROYING); 2383 mutex_unlock(&conn_id->handler_mutex); 2384 cma_deref_id(conn_id); 2385 rdma_destroy_id(&conn_id->id); 2386 goto out; 2387 } 2388 2389 mutex_unlock(&conn_id->handler_mutex); 2390 cma_deref_id(conn_id); 2391 2392 out: 2393 mutex_unlock(&listen_id->handler_mutex); 2394 return ret; 2395 } 2396 2397 static int cma_ib_listen(struct rdma_id_private *id_priv) 2398 { 2399 struct sockaddr *addr; 2400 struct ib_cm_id *id; 2401 __be64 svc_id; 2402 2403 addr = cma_src_addr(id_priv); 2404 svc_id = rdma_get_service_id(&id_priv->id, addr); 2405 id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id); 2406 if (IS_ERR(id)) 2407 return PTR_ERR(id); 2408 id_priv->cm_id.ib = id; 2409 2410 return 0; 2411 } 2412 2413 static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) 2414 { 2415 int ret; 2416 struct iw_cm_id *id; 2417 2418 id = iw_create_cm_id(id_priv->id.device, 2419 iw_conn_req_handler, 2420 id_priv); 2421 if (IS_ERR(id)) 2422 return PTR_ERR(id); 2423 2424 id->tos = id_priv->tos; 2425 id_priv->cm_id.iw = id; 2426 2427 memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), 2428 rdma_addr_size(cma_src_addr(id_priv))); 2429 2430 ret = iw_cm_listen(id_priv->cm_id.iw, backlog); 2431 2432 if (ret) { 2433 iw_destroy_cm_id(id_priv->cm_id.iw); 2434 id_priv->cm_id.iw = NULL; 2435 } 2436 2437 return ret; 2438 } 2439 2440 static int cma_listen_handler(struct rdma_cm_id *id, 2441 struct rdma_cm_event *event) 2442 { 2443 struct rdma_id_private *id_priv = id->context; 2444 2445 id->context = id_priv->id.context; 2446 id->event_handler = id_priv->id.event_handler; 2447 return id_priv->id.event_handler(id, event); 2448 } 2449 2450 static void cma_listen_on_dev(struct rdma_id_private *id_priv, 2451 struct cma_device *cma_dev) 2452 { 2453 struct rdma_id_private *dev_id_priv; 2454 struct rdma_cm_id *id; 2455 struct vnet *net = id_priv->id.route.addr.dev_addr.net; 2456 int ret; 2457 2458 if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) 2459 return; 2460 2461 id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, 2462 id_priv->id.qp_type); 2463 if (IS_ERR(id)) 2464 return; 2465 2466 dev_id_priv = container_of(id, struct rdma_id_private, id); 2467 2468 dev_id_priv->state = RDMA_CM_ADDR_BOUND; 2469 memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), 2470 rdma_addr_size(cma_src_addr(id_priv))); 2471 2472 _cma_attach_to_dev(dev_id_priv, cma_dev); 2473 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 2474 atomic_inc(&id_priv->refcount); 2475 dev_id_priv->internal_id = 1; 2476 dev_id_priv->afonly = id_priv->afonly; 2477 2478 ret = rdma_listen(id, id_priv->backlog); 2479 if (ret) 2480 pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n", 2481 ret, cma_dev->device->name); 2482 } 2483 2484 static void cma_listen_on_all(struct rdma_id_private *id_priv) 2485 { 2486 struct cma_device *cma_dev; 2487 2488 mutex_lock(&lock); 2489 list_add_tail(&id_priv->list, &listen_any_list); 2490 list_for_each_entry(cma_dev, &dev_list, list) 2491 cma_listen_on_dev(id_priv, cma_dev); 2492 mutex_unlock(&lock); 2493 } 2494 2495 void rdma_set_service_type(struct rdma_cm_id *id, int tos) 2496 { 2497 struct rdma_id_private *id_priv; 2498 2499 id_priv = container_of(id, struct rdma_id_private, id); 2500 id_priv->tos = (u8) tos; 2501 } 2502 EXPORT_SYMBOL(rdma_set_service_type); 2503 2504 /** 2505 * rdma_set_ack_timeout() - Set the ack timeout of QP associated 2506 * with a connection identifier. 2507 * @id: Communication identifier to associated with service type. 2508 * @timeout: Ack timeout to set a QP, expressed as 4.096 * 2^(timeout) usec. 2509 * 2510 * This function should be called before rdma_connect() on active side, 2511 * and on passive side before rdma_accept(). It is applicable to primary 2512 * path only. The timeout will affect the local side of the QP, it is not 2513 * negotiated with remote side and zero disables the timer. 2514 * 2515 * Return: 0 for success 2516 */ 2517 int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout) 2518 { 2519 struct rdma_id_private *id_priv; 2520 2521 if (id->qp_type != IB_QPT_RC) 2522 return -EINVAL; 2523 2524 id_priv = container_of(id, struct rdma_id_private, id); 2525 id_priv->timeout = timeout; 2526 id_priv->timeout_set = true; 2527 2528 return 0; 2529 } 2530 EXPORT_SYMBOL(rdma_set_ack_timeout); 2531 2532 static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, 2533 void *context) 2534 { 2535 struct cma_work *work = context; 2536 struct rdma_route *route; 2537 2538 route = &work->id->id.route; 2539 2540 if (!status) { 2541 route->num_paths = 1; 2542 *route->path_rec = *path_rec; 2543 } else { 2544 work->old_state = RDMA_CM_ROUTE_QUERY; 2545 work->new_state = RDMA_CM_ADDR_RESOLVED; 2546 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; 2547 work->event.status = status; 2548 } 2549 2550 queue_work(cma_wq, &work->work); 2551 } 2552 2553 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, 2554 struct cma_work *work) 2555 { 2556 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 2557 struct ib_sa_path_rec path_rec; 2558 ib_sa_comp_mask comp_mask; 2559 struct sockaddr_in6 *sin6; 2560 struct sockaddr_ib *sib; 2561 2562 memset(&path_rec, 0, sizeof path_rec); 2563 rdma_addr_get_sgid(dev_addr, &path_rec.sgid); 2564 rdma_addr_get_dgid(dev_addr, &path_rec.dgid); 2565 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 2566 path_rec.numb_path = 1; 2567 path_rec.reversible = 1; 2568 path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 2569 2570 comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | 2571 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | 2572 IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; 2573 2574 switch (cma_family(id_priv)) { 2575 case AF_INET: 2576 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); 2577 comp_mask |= IB_SA_PATH_REC_QOS_CLASS; 2578 break; 2579 case AF_INET6: 2580 sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 2581 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); 2582 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2583 break; 2584 case AF_IB: 2585 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 2586 path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20); 2587 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2588 break; 2589 } 2590 2591 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, 2592 id_priv->id.port_num, &path_rec, 2593 comp_mask, timeout_ms, 2594 GFP_KERNEL, cma_query_handler, 2595 work, &id_priv->query); 2596 2597 return (id_priv->query_id < 0) ? id_priv->query_id : 0; 2598 } 2599 2600 static void cma_work_handler(struct work_struct *_work) 2601 { 2602 struct cma_work *work = container_of(_work, struct cma_work, work); 2603 struct rdma_id_private *id_priv = work->id; 2604 int destroy = 0; 2605 2606 mutex_lock(&id_priv->handler_mutex); 2607 if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) 2608 goto out; 2609 2610 if (id_priv->id.event_handler(&id_priv->id, &work->event)) { 2611 cma_exch(id_priv, RDMA_CM_DESTROYING); 2612 destroy = 1; 2613 } 2614 out: 2615 mutex_unlock(&id_priv->handler_mutex); 2616 cma_deref_id(id_priv); 2617 if (destroy) 2618 rdma_destroy_id(&id_priv->id); 2619 kfree(work); 2620 } 2621 2622 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) 2623 { 2624 struct rdma_route *route = &id_priv->id.route; 2625 struct cma_work *work; 2626 int ret; 2627 2628 work = kzalloc(sizeof *work, GFP_KERNEL); 2629 if (!work) 2630 return -ENOMEM; 2631 2632 work->id = id_priv; 2633 INIT_WORK(&work->work, cma_work_handler); 2634 work->old_state = RDMA_CM_ROUTE_QUERY; 2635 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2636 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2637 2638 route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); 2639 if (!route->path_rec) { 2640 ret = -ENOMEM; 2641 goto err1; 2642 } 2643 2644 ret = cma_query_ib_route(id_priv, timeout_ms, work); 2645 if (ret) 2646 goto err2; 2647 2648 return 0; 2649 err2: 2650 kfree(route->path_rec); 2651 route->path_rec = NULL; 2652 err1: 2653 kfree(work); 2654 return ret; 2655 } 2656 2657 int rdma_set_ib_paths(struct rdma_cm_id *id, 2658 struct ib_sa_path_rec *path_rec, int num_paths) 2659 { 2660 struct rdma_id_private *id_priv; 2661 int ret; 2662 2663 id_priv = container_of(id, struct rdma_id_private, id); 2664 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2665 RDMA_CM_ROUTE_RESOLVED)) 2666 return -EINVAL; 2667 2668 id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths, 2669 GFP_KERNEL); 2670 if (!id->route.path_rec) { 2671 ret = -ENOMEM; 2672 goto err; 2673 } 2674 2675 id->route.num_paths = num_paths; 2676 return 0; 2677 err: 2678 cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED); 2679 return ret; 2680 } 2681 EXPORT_SYMBOL(rdma_set_ib_paths); 2682 2683 static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) 2684 { 2685 struct cma_work *work; 2686 2687 work = kzalloc(sizeof *work, GFP_KERNEL); 2688 if (!work) 2689 return -ENOMEM; 2690 2691 work->id = id_priv; 2692 INIT_WORK(&work->work, cma_work_handler); 2693 work->old_state = RDMA_CM_ROUTE_QUERY; 2694 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2695 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2696 queue_work(cma_wq, &work->work); 2697 return 0; 2698 } 2699 2700 static int iboe_tos_to_sl(struct ifnet *ndev, int tos) 2701 { 2702 /* get service level, SL, from IPv4 type of service, TOS */ 2703 int sl = (tos >> 5) & 0x7; 2704 2705 /* final mappings are done by the vendor specific drivers */ 2706 return sl; 2707 } 2708 2709 static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type, 2710 unsigned long supported_gids, 2711 enum ib_gid_type default_gid) 2712 { 2713 if ((network_type == RDMA_NETWORK_IPV4 || 2714 network_type == RDMA_NETWORK_IPV6) && 2715 test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) 2716 return IB_GID_TYPE_ROCE_UDP_ENCAP; 2717 2718 return default_gid; 2719 } 2720 2721 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) 2722 { 2723 struct rdma_route *route = &id_priv->id.route; 2724 struct rdma_addr *addr = &route->addr; 2725 struct cma_work *work; 2726 int ret; 2727 struct ifnet *ndev = NULL; 2728 2729 2730 work = kzalloc(sizeof *work, GFP_KERNEL); 2731 if (!work) 2732 return -ENOMEM; 2733 2734 work->id = id_priv; 2735 INIT_WORK(&work->work, cma_work_handler); 2736 2737 route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL); 2738 if (!route->path_rec) { 2739 ret = -ENOMEM; 2740 goto err1; 2741 } 2742 2743 route->num_paths = 1; 2744 2745 if (addr->dev_addr.bound_dev_if) { 2746 unsigned long supported_gids; 2747 2748 ndev = dev_get_by_index(addr->dev_addr.net, 2749 addr->dev_addr.bound_dev_if); 2750 if (!ndev) { 2751 ret = -ENODEV; 2752 goto err2; 2753 } 2754 2755 route->path_rec->net = ndev->if_vnet; 2756 route->path_rec->ifindex = ndev->if_index; 2757 supported_gids = roce_gid_type_mask_support(id_priv->id.device, 2758 id_priv->id.port_num); 2759 route->path_rec->gid_type = 2760 cma_route_gid_type(addr->dev_addr.network, 2761 supported_gids, 2762 id_priv->gid_type); 2763 } 2764 if (!ndev) { 2765 ret = -ENODEV; 2766 goto err2; 2767 } 2768 2769 memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); 2770 2771 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 2772 &route->path_rec->sgid); 2773 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, 2774 &route->path_rec->dgid); 2775 2776 /* Use the hint from IP Stack to select GID Type */ 2777 if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network)) 2778 route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network); 2779 if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB) 2780 /* TODO: get the hoplimit from the inet/inet6 device */ 2781 route->path_rec->hop_limit = addr->dev_addr.hoplimit; 2782 else 2783 route->path_rec->hop_limit = 1; 2784 route->path_rec->reversible = 1; 2785 route->path_rec->pkey = cpu_to_be16(0xffff); 2786 route->path_rec->mtu_selector = IB_SA_EQ; 2787 route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos); 2788 route->path_rec->traffic_class = id_priv->tos; 2789 route->path_rec->mtu = iboe_get_mtu(ndev->if_mtu); 2790 route->path_rec->rate_selector = IB_SA_EQ; 2791 route->path_rec->rate = iboe_get_rate(ndev); 2792 dev_put(ndev); 2793 route->path_rec->packet_life_time_selector = IB_SA_EQ; 2794 route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; 2795 if (!route->path_rec->mtu) { 2796 ret = -EINVAL; 2797 goto err2; 2798 } 2799 2800 work->old_state = RDMA_CM_ROUTE_QUERY; 2801 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2802 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2803 work->event.status = 0; 2804 2805 queue_work(cma_wq, &work->work); 2806 2807 return 0; 2808 2809 err2: 2810 kfree(route->path_rec); 2811 route->path_rec = NULL; 2812 err1: 2813 kfree(work); 2814 return ret; 2815 } 2816 2817 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) 2818 { 2819 struct rdma_id_private *id_priv; 2820 int ret; 2821 2822 id_priv = container_of(id, struct rdma_id_private, id); 2823 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) 2824 return -EINVAL; 2825 2826 atomic_inc(&id_priv->refcount); 2827 if (rdma_cap_ib_sa(id->device, id->port_num)) 2828 ret = cma_resolve_ib_route(id_priv, timeout_ms); 2829 else if (rdma_protocol_roce(id->device, id->port_num)) 2830 ret = cma_resolve_iboe_route(id_priv); 2831 else if (rdma_protocol_iwarp(id->device, id->port_num)) 2832 ret = cma_resolve_iw_route(id_priv, timeout_ms); 2833 else 2834 ret = -ENOSYS; 2835 2836 if (ret) 2837 goto err; 2838 2839 return 0; 2840 err: 2841 cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); 2842 cma_deref_id(id_priv); 2843 return ret; 2844 } 2845 EXPORT_SYMBOL(rdma_resolve_route); 2846 2847 static void cma_set_loopback(struct sockaddr *addr) 2848 { 2849 switch (addr->sa_family) { 2850 case AF_INET: 2851 ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 2852 break; 2853 case AF_INET6: 2854 ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr, 2855 0, 0, 0, htonl(1)); 2856 break; 2857 default: 2858 ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr, 2859 0, 0, 0, htonl(1)); 2860 break; 2861 } 2862 } 2863 2864 static int cma_bind_loopback(struct rdma_id_private *id_priv) 2865 { 2866 struct cma_device *cma_dev, *cur_dev; 2867 struct ib_port_attr port_attr; 2868 union ib_gid gid; 2869 u16 pkey; 2870 int ret; 2871 u8 p; 2872 2873 cma_dev = NULL; 2874 mutex_lock(&lock); 2875 list_for_each_entry(cur_dev, &dev_list, list) { 2876 if (cma_family(id_priv) == AF_IB && 2877 !rdma_cap_ib_cm(cur_dev->device, 1)) 2878 continue; 2879 2880 if (!cma_dev) 2881 cma_dev = cur_dev; 2882 2883 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { 2884 if (!ib_query_port(cur_dev->device, p, &port_attr) && 2885 port_attr.state == IB_PORT_ACTIVE) { 2886 cma_dev = cur_dev; 2887 goto port_found; 2888 } 2889 } 2890 } 2891 2892 if (!cma_dev) { 2893 ret = -ENODEV; 2894 goto out; 2895 } 2896 2897 p = 1; 2898 2899 port_found: 2900 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); 2901 if (ret) 2902 goto out; 2903 2904 ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey); 2905 if (ret) 2906 goto out; 2907 2908 id_priv->id.route.addr.dev_addr.dev_type = 2909 (rdma_protocol_ib(cma_dev->device, p)) ? 2910 ARPHRD_INFINIBAND : ARPHRD_ETHER; 2911 2912 rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2913 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); 2914 id_priv->id.port_num = p; 2915 cma_attach_to_dev(id_priv, cma_dev); 2916 cma_set_loopback(cma_src_addr(id_priv)); 2917 out: 2918 mutex_unlock(&lock); 2919 return ret; 2920 } 2921 2922 static void addr_handler(int status, struct sockaddr *src_addr, 2923 struct rdma_dev_addr *dev_addr, void *context) 2924 { 2925 struct rdma_id_private *id_priv = context; 2926 struct rdma_cm_event event; 2927 2928 memset(&event, 0, sizeof event); 2929 mutex_lock(&id_priv->handler_mutex); 2930 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, 2931 RDMA_CM_ADDR_RESOLVED)) 2932 goto out; 2933 2934 memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); 2935 if (!status && !id_priv->cma_dev) 2936 status = cma_acquire_dev(id_priv, NULL); 2937 2938 if (status) { 2939 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2940 RDMA_CM_ADDR_BOUND)) 2941 goto out; 2942 event.event = RDMA_CM_EVENT_ADDR_ERROR; 2943 event.status = status; 2944 } else 2945 event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2946 2947 if (id_priv->id.event_handler(&id_priv->id, &event)) { 2948 cma_exch(id_priv, RDMA_CM_DESTROYING); 2949 mutex_unlock(&id_priv->handler_mutex); 2950 cma_deref_id(id_priv); 2951 rdma_destroy_id(&id_priv->id); 2952 return; 2953 } 2954 out: 2955 mutex_unlock(&id_priv->handler_mutex); 2956 cma_deref_id(id_priv); 2957 } 2958 2959 static int cma_resolve_loopback(struct rdma_id_private *id_priv) 2960 { 2961 struct cma_work *work; 2962 union ib_gid gid; 2963 int ret; 2964 2965 work = kzalloc(sizeof *work, GFP_KERNEL); 2966 if (!work) 2967 return -ENOMEM; 2968 2969 if (!id_priv->cma_dev) { 2970 ret = cma_bind_loopback(id_priv); 2971 if (ret) 2972 goto err; 2973 } 2974 2975 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2976 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); 2977 2978 work->id = id_priv; 2979 INIT_WORK(&work->work, cma_work_handler); 2980 work->old_state = RDMA_CM_ADDR_QUERY; 2981 work->new_state = RDMA_CM_ADDR_RESOLVED; 2982 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2983 queue_work(cma_wq, &work->work); 2984 return 0; 2985 err: 2986 kfree(work); 2987 return ret; 2988 } 2989 2990 static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) 2991 { 2992 struct cma_work *work; 2993 int ret; 2994 2995 work = kzalloc(sizeof *work, GFP_KERNEL); 2996 if (!work) 2997 return -ENOMEM; 2998 2999 if (!id_priv->cma_dev) { 3000 ret = cma_resolve_ib_dev(id_priv); 3001 if (ret) 3002 goto err; 3003 } 3004 3005 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) 3006 &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); 3007 3008 work->id = id_priv; 3009 INIT_WORK(&work->work, cma_work_handler); 3010 work->old_state = RDMA_CM_ADDR_QUERY; 3011 work->new_state = RDMA_CM_ADDR_RESOLVED; 3012 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 3013 queue_work(cma_wq, &work->work); 3014 return 0; 3015 err: 3016 kfree(work); 3017 return ret; 3018 } 3019 3020 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 3021 struct sockaddr *dst_addr) 3022 { 3023 if (!src_addr || !src_addr->sa_family) { 3024 src_addr = (struct sockaddr *) &id->route.addr.src_addr; 3025 src_addr->sa_family = dst_addr->sa_family; 3026 if (dst_addr->sa_family == AF_INET6) { 3027 struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; 3028 struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; 3029 src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; 3030 if (IN6_IS_SCOPE_LINKLOCAL(&dst_addr6->sin6_addr) || 3031 IN6_IS_ADDR_MC_INTFACELOCAL(&dst_addr6->sin6_addr)) 3032 id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; 3033 } else if (dst_addr->sa_family == AF_IB) { 3034 ((struct sockaddr_ib *) src_addr)->sib_pkey = 3035 ((struct sockaddr_ib *) dst_addr)->sib_pkey; 3036 } 3037 } 3038 return rdma_bind_addr(id, src_addr); 3039 } 3040 3041 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 3042 struct sockaddr *dst_addr, int timeout_ms) 3043 { 3044 struct rdma_id_private *id_priv; 3045 int ret; 3046 3047 id_priv = container_of(id, struct rdma_id_private, id); 3048 if (id_priv->state == RDMA_CM_IDLE) { 3049 ret = cma_bind_addr(id, src_addr, dst_addr); 3050 if (ret) 3051 return ret; 3052 } 3053 3054 if (cma_family(id_priv) != dst_addr->sa_family) 3055 return -EINVAL; 3056 3057 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) 3058 return -EINVAL; 3059 3060 atomic_inc(&id_priv->refcount); 3061 memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); 3062 if (cma_any_addr(dst_addr)) { 3063 ret = cma_resolve_loopback(id_priv); 3064 } else { 3065 if (dst_addr->sa_family == AF_IB) { 3066 ret = cma_resolve_ib_addr(id_priv); 3067 } else { 3068 ret = cma_check_linklocal(&id->route.addr.dev_addr, dst_addr); 3069 if (ret) 3070 goto err; 3071 3072 ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv), 3073 dst_addr, &id->route.addr.dev_addr, 3074 timeout_ms, addr_handler, id_priv); 3075 } 3076 } 3077 if (ret) 3078 goto err; 3079 3080 return 0; 3081 err: 3082 cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); 3083 cma_deref_id(id_priv); 3084 return ret; 3085 } 3086 EXPORT_SYMBOL(rdma_resolve_addr); 3087 3088 int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) 3089 { 3090 struct rdma_id_private *id_priv; 3091 unsigned long flags; 3092 int ret; 3093 3094 id_priv = container_of(id, struct rdma_id_private, id); 3095 spin_lock_irqsave(&id_priv->lock, flags); 3096 if (reuse || id_priv->state == RDMA_CM_IDLE) { 3097 id_priv->reuseaddr = reuse; 3098 ret = 0; 3099 } else { 3100 ret = -EINVAL; 3101 } 3102 spin_unlock_irqrestore(&id_priv->lock, flags); 3103 return ret; 3104 } 3105 EXPORT_SYMBOL(rdma_set_reuseaddr); 3106 3107 int rdma_set_afonly(struct rdma_cm_id *id, int afonly) 3108 { 3109 struct rdma_id_private *id_priv; 3110 unsigned long flags; 3111 int ret; 3112 3113 id_priv = container_of(id, struct rdma_id_private, id); 3114 spin_lock_irqsave(&id_priv->lock, flags); 3115 if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { 3116 id_priv->options |= (1 << CMA_OPTION_AFONLY); 3117 id_priv->afonly = afonly; 3118 ret = 0; 3119 } else { 3120 ret = -EINVAL; 3121 } 3122 spin_unlock_irqrestore(&id_priv->lock, flags); 3123 return ret; 3124 } 3125 EXPORT_SYMBOL(rdma_set_afonly); 3126 3127 static void cma_bind_port(struct rdma_bind_list *bind_list, 3128 struct rdma_id_private *id_priv) 3129 { 3130 struct sockaddr *addr; 3131 struct sockaddr_ib *sib; 3132 u64 sid, mask; 3133 __be16 port; 3134 3135 addr = cma_src_addr(id_priv); 3136 port = htons(bind_list->port); 3137 3138 switch (addr->sa_family) { 3139 case AF_INET: 3140 ((struct sockaddr_in *) addr)->sin_port = port; 3141 break; 3142 case AF_INET6: 3143 ((struct sockaddr_in6 *) addr)->sin6_port = port; 3144 break; 3145 case AF_IB: 3146 sib = (struct sockaddr_ib *) addr; 3147 sid = be64_to_cpu(sib->sib_sid); 3148 mask = be64_to_cpu(sib->sib_sid_mask); 3149 sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port)); 3150 sib->sib_sid_mask = cpu_to_be64(~0ULL); 3151 break; 3152 } 3153 id_priv->bind_list = bind_list; 3154 hlist_add_head(&id_priv->node, &bind_list->owners); 3155 } 3156 3157 static int cma_alloc_port(enum rdma_port_space ps, 3158 struct rdma_id_private *id_priv, unsigned short snum) 3159 { 3160 struct rdma_bind_list *bind_list; 3161 int ret; 3162 3163 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); 3164 if (!bind_list) 3165 return -ENOMEM; 3166 3167 ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list, 3168 snum); 3169 if (ret < 0) 3170 goto err; 3171 3172 bind_list->ps = ps; 3173 bind_list->port = (unsigned short)ret; 3174 cma_bind_port(bind_list, id_priv); 3175 return 0; 3176 err: 3177 kfree(bind_list); 3178 return ret == -ENOSPC ? -EADDRNOTAVAIL : ret; 3179 } 3180 3181 static int cma_alloc_any_port(enum rdma_port_space ps, 3182 struct rdma_id_private *id_priv) 3183 { 3184 static unsigned int last_used_port; 3185 int low, high, remaining; 3186 unsigned int rover; 3187 struct vnet *net = id_priv->id.route.addr.dev_addr.net; 3188 u32 rand; 3189 3190 inet_get_local_port_range(net, &low, &high); 3191 remaining = (high - low) + 1; 3192 get_random_bytes(&rand, sizeof(rand)); 3193 rover = rand % remaining + low; 3194 retry: 3195 if (last_used_port != rover && 3196 !cma_ps_find(net, ps, (unsigned short)rover)) { 3197 int ret = cma_alloc_port(ps, id_priv, rover); 3198 /* 3199 * Remember previously used port number in order to avoid 3200 * re-using same port immediately after it is closed. 3201 */ 3202 if (!ret) 3203 last_used_port = rover; 3204 if (ret != -EADDRNOTAVAIL) 3205 return ret; 3206 } 3207 if (--remaining) { 3208 rover++; 3209 if ((rover < low) || (rover > high)) 3210 rover = low; 3211 goto retry; 3212 } 3213 return -EADDRNOTAVAIL; 3214 } 3215 3216 /* 3217 * Check that the requested port is available. This is called when trying to 3218 * bind to a specific port, or when trying to listen on a bound port. In 3219 * the latter case, the provided id_priv may already be on the bind_list, but 3220 * we still need to check that it's okay to start listening. 3221 */ 3222 static int cma_check_port(struct rdma_bind_list *bind_list, 3223 struct rdma_id_private *id_priv, uint8_t reuseaddr) 3224 { 3225 struct rdma_id_private *cur_id; 3226 struct sockaddr *addr, *cur_addr; 3227 3228 addr = cma_src_addr(id_priv); 3229 hlist_for_each_entry(cur_id, &bind_list->owners, node) { 3230 if (id_priv == cur_id) 3231 continue; 3232 3233 if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && 3234 cur_id->reuseaddr) 3235 continue; 3236 3237 cur_addr = cma_src_addr(cur_id); 3238 if (id_priv->afonly && cur_id->afonly && 3239 (addr->sa_family != cur_addr->sa_family)) 3240 continue; 3241 3242 if (cma_any_addr(addr) || cma_any_addr(cur_addr)) 3243 return -EADDRNOTAVAIL; 3244 3245 if (!cma_addr_cmp(addr, cur_addr)) 3246 return -EADDRINUSE; 3247 } 3248 return 0; 3249 } 3250 3251 static int cma_use_port(enum rdma_port_space ps, 3252 struct rdma_id_private *id_priv) 3253 { 3254 struct rdma_bind_list *bind_list; 3255 unsigned short snum; 3256 int ret; 3257 3258 snum = ntohs(cma_port(cma_src_addr(id_priv))); 3259 if (snum < IPPORT_RESERVED && 3260 priv_check(curthread, PRIV_NETINET_BINDANY) != 0) 3261 return -EACCES; 3262 3263 bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum); 3264 if (!bind_list) { 3265 ret = cma_alloc_port(ps, id_priv, snum); 3266 } else { 3267 ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr); 3268 if (!ret) 3269 cma_bind_port(bind_list, id_priv); 3270 } 3271 return ret; 3272 } 3273 3274 static int cma_bind_listen(struct rdma_id_private *id_priv) 3275 { 3276 struct rdma_bind_list *bind_list = id_priv->bind_list; 3277 int ret = 0; 3278 3279 mutex_lock(&lock); 3280 if (bind_list->owners.first->next) 3281 ret = cma_check_port(bind_list, id_priv, 0); 3282 mutex_unlock(&lock); 3283 return ret; 3284 } 3285 3286 static enum rdma_port_space cma_select_inet_ps( 3287 struct rdma_id_private *id_priv) 3288 { 3289 switch (id_priv->id.ps) { 3290 case RDMA_PS_TCP: 3291 case RDMA_PS_UDP: 3292 case RDMA_PS_IPOIB: 3293 case RDMA_PS_IB: 3294 case RDMA_PS_SDP: 3295 return id_priv->id.ps; 3296 default: 3297 3298 return 0; 3299 } 3300 } 3301 3302 static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv) 3303 { 3304 enum rdma_port_space ps = 0; 3305 struct sockaddr_ib *sib; 3306 u64 sid_ps, mask, sid; 3307 3308 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 3309 mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK; 3310 sid = be64_to_cpu(sib->sib_sid) & mask; 3311 3312 if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) { 3313 sid_ps = RDMA_IB_IP_PS_IB; 3314 ps = RDMA_PS_IB; 3315 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) && 3316 (sid == (RDMA_IB_IP_PS_TCP & mask))) { 3317 sid_ps = RDMA_IB_IP_PS_TCP; 3318 ps = RDMA_PS_TCP; 3319 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) && 3320 (sid == (RDMA_IB_IP_PS_UDP & mask))) { 3321 sid_ps = RDMA_IB_IP_PS_UDP; 3322 ps = RDMA_PS_UDP; 3323 } 3324 3325 if (ps) { 3326 sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib))); 3327 sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK | 3328 be64_to_cpu(sib->sib_sid_mask)); 3329 } 3330 return ps; 3331 } 3332 3333 static int cma_get_port(struct rdma_id_private *id_priv) 3334 { 3335 enum rdma_port_space ps; 3336 int ret; 3337 3338 if (cma_family(id_priv) != AF_IB) 3339 ps = cma_select_inet_ps(id_priv); 3340 else 3341 ps = cma_select_ib_ps(id_priv); 3342 if (!ps) 3343 return -EPROTONOSUPPORT; 3344 3345 mutex_lock(&lock); 3346 if (cma_any_port(cma_src_addr(id_priv))) 3347 ret = cma_alloc_any_port(ps, id_priv); 3348 else 3349 ret = cma_use_port(ps, id_priv); 3350 mutex_unlock(&lock); 3351 3352 return ret; 3353 } 3354 3355 static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, 3356 struct sockaddr *addr) 3357 { 3358 #ifdef INET6 3359 struct sockaddr_in6 sin6; 3360 3361 if (addr->sa_family != AF_INET6) 3362 return 0; 3363 3364 sin6 = *(struct sockaddr_in6 *)addr; 3365 3366 if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr) || 3367 IN6_IS_ADDR_MC_INTFACELOCAL(&sin6.sin6_addr)) { 3368 bool failure; 3369 3370 CURVNET_SET_QUIET(dev_addr->net); 3371 failure = sa6_recoverscope(&sin6) || sin6.sin6_scope_id == 0; 3372 CURVNET_RESTORE(); 3373 3374 /* check if IPv6 scope ID is not set */ 3375 if (failure) 3376 return -EINVAL; 3377 dev_addr->bound_dev_if = sin6.sin6_scope_id; 3378 } 3379 #endif 3380 return 0; 3381 } 3382 3383 int rdma_listen(struct rdma_cm_id *id, int backlog) 3384 { 3385 struct rdma_id_private *id_priv; 3386 int ret; 3387 3388 id_priv = container_of(id, struct rdma_id_private, id); 3389 if (id_priv->state == RDMA_CM_IDLE) { 3390 id->route.addr.src_addr.ss_family = AF_INET; 3391 ret = rdma_bind_addr(id, cma_src_addr(id_priv)); 3392 if (ret) 3393 return ret; 3394 } 3395 3396 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) 3397 return -EINVAL; 3398 3399 if (id_priv->reuseaddr) { 3400 ret = cma_bind_listen(id_priv); 3401 if (ret) 3402 goto err; 3403 } 3404 3405 id_priv->backlog = backlog; 3406 if (id->device) { 3407 if (rdma_cap_ib_cm(id->device, 1)) { 3408 ret = cma_ib_listen(id_priv); 3409 if (ret) 3410 goto err; 3411 } else if (rdma_cap_iw_cm(id->device, 1)) { 3412 ret = cma_iw_listen(id_priv, backlog); 3413 if (ret) 3414 goto err; 3415 } else { 3416 ret = -ENOSYS; 3417 goto err; 3418 } 3419 } else 3420 cma_listen_on_all(id_priv); 3421 3422 return 0; 3423 err: 3424 id_priv->backlog = 0; 3425 cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); 3426 return ret; 3427 } 3428 EXPORT_SYMBOL(rdma_listen); 3429 3430 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) 3431 { 3432 struct rdma_id_private *id_priv; 3433 int ret; 3434 3435 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && 3436 addr->sa_family != AF_IB) 3437 return -EAFNOSUPPORT; 3438 3439 id_priv = container_of(id, struct rdma_id_private, id); 3440 if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) 3441 return -EINVAL; 3442 3443 ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); 3444 if (ret) 3445 goto err1; 3446 3447 memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); 3448 if (!cma_any_addr(addr)) { 3449 ret = cma_translate_addr(addr, &id->route.addr.dev_addr); 3450 if (ret) 3451 goto err1; 3452 3453 ret = cma_acquire_dev(id_priv, NULL); 3454 if (ret) 3455 goto err1; 3456 } 3457 3458 if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { 3459 if (addr->sa_family == AF_INET) 3460 id_priv->afonly = 1; 3461 #ifdef INET6 3462 else if (addr->sa_family == AF_INET6) { 3463 CURVNET_SET_QUIET(id_priv->id.route.addr.dev_addr.net); 3464 id_priv->afonly = V_ip6_v6only; 3465 CURVNET_RESTORE(); 3466 } 3467 #endif 3468 } 3469 ret = cma_get_port(id_priv); 3470 if (ret) 3471 goto err2; 3472 3473 return 0; 3474 err2: 3475 if (id_priv->cma_dev) 3476 cma_release_dev(id_priv); 3477 err1: 3478 cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); 3479 return ret; 3480 } 3481 EXPORT_SYMBOL(rdma_bind_addr); 3482 3483 static int sdp_format_hdr(struct sdp_hh *sdp_hdr, struct rdma_id_private *id_priv) 3484 { 3485 /* 3486 * XXXCEM: CMA just sets the version itself rather than relying on 3487 * passed in packet to have the major version set. Should we? 3488 */ 3489 if (sdp_get_majv(sdp_hdr->majv_minv) != SDP_MAJ_VERSION) 3490 return -EINVAL; 3491 3492 if (cma_family(id_priv) == AF_INET) { 3493 struct sockaddr_in *src4, *dst4; 3494 3495 src4 = (struct sockaddr_in *) cma_src_addr(id_priv); 3496 dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); 3497 3498 sdp_set_ip_ver(sdp_hdr, 4); 3499 sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; 3500 sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; 3501 sdp_hdr->port = src4->sin_port; 3502 } else if (cma_family(id_priv) == AF_INET6) { 3503 struct sockaddr_in6 *src6, *dst6; 3504 3505 src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 3506 dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); 3507 3508 sdp_set_ip_ver(sdp_hdr, 6); 3509 sdp_hdr->src_addr.ip6 = src6->sin6_addr; 3510 sdp_hdr->dst_addr.ip6 = dst6->sin6_addr; 3511 sdp_hdr->port = src6->sin6_port; 3512 cma_ip6_clear_scope_id(&sdp_hdr->src_addr.ip6); 3513 cma_ip6_clear_scope_id(&sdp_hdr->dst_addr.ip6); 3514 } else 3515 return -EAFNOSUPPORT; 3516 return 0; 3517 } 3518 3519 static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) 3520 { 3521 struct cma_hdr *cma_hdr; 3522 3523 if (id_priv->id.ps == RDMA_PS_SDP) 3524 return sdp_format_hdr(hdr, id_priv); 3525 3526 cma_hdr = hdr; 3527 cma_hdr->cma_version = CMA_VERSION; 3528 if (cma_family(id_priv) == AF_INET) { 3529 struct sockaddr_in *src4, *dst4; 3530 3531 src4 = (struct sockaddr_in *) cma_src_addr(id_priv); 3532 dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); 3533 3534 cma_set_ip_ver(cma_hdr, 4); 3535 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; 3536 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; 3537 cma_hdr->port = src4->sin_port; 3538 } else if (cma_family(id_priv) == AF_INET6) { 3539 struct sockaddr_in6 *src6, *dst6; 3540 3541 src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 3542 dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); 3543 3544 cma_set_ip_ver(cma_hdr, 6); 3545 cma_hdr->src_addr.ip6 = src6->sin6_addr; 3546 cma_hdr->dst_addr.ip6 = dst6->sin6_addr; 3547 cma_hdr->port = src6->sin6_port; 3548 cma_ip6_clear_scope_id(&cma_hdr->src_addr.ip6); 3549 cma_ip6_clear_scope_id(&cma_hdr->dst_addr.ip6); 3550 } 3551 return 0; 3552 } 3553 3554 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, 3555 struct ib_cm_event *ib_event) 3556 { 3557 struct rdma_id_private *id_priv = cm_id->context; 3558 struct rdma_cm_event event; 3559 struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; 3560 int ret = 0; 3561 3562 mutex_lock(&id_priv->handler_mutex); 3563 if (id_priv->state != RDMA_CM_CONNECT) 3564 goto out; 3565 3566 memset(&event, 0, sizeof event); 3567 switch (ib_event->event) { 3568 case IB_CM_SIDR_REQ_ERROR: 3569 event.event = RDMA_CM_EVENT_UNREACHABLE; 3570 event.status = -ETIMEDOUT; 3571 break; 3572 case IB_CM_SIDR_REP_RECEIVED: 3573 event.param.ud.private_data = ib_event->private_data; 3574 event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; 3575 if (rep->status != IB_SIDR_SUCCESS) { 3576 event.event = RDMA_CM_EVENT_UNREACHABLE; 3577 event.status = ib_event->param.sidr_rep_rcvd.status; 3578 break; 3579 } 3580 ret = cma_set_qkey(id_priv, rep->qkey); 3581 if (ret) { 3582 event.event = RDMA_CM_EVENT_ADDR_ERROR; 3583 event.status = ret; 3584 break; 3585 } 3586 ret = ib_init_ah_from_path(id_priv->id.device, 3587 id_priv->id.port_num, 3588 id_priv->id.route.path_rec, 3589 &event.param.ud.ah_attr); 3590 if (ret) { 3591 event.event = RDMA_CM_EVENT_ADDR_ERROR; 3592 event.status = ret; 3593 break; 3594 } 3595 event.param.ud.qp_num = rep->qpn; 3596 event.param.ud.qkey = rep->qkey; 3597 event.event = RDMA_CM_EVENT_ESTABLISHED; 3598 event.status = 0; 3599 break; 3600 default: 3601 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 3602 ib_event->event); 3603 goto out; 3604 } 3605 3606 ret = id_priv->id.event_handler(&id_priv->id, &event); 3607 if (ret) { 3608 /* Destroy the CM ID by returning a non-zero value. */ 3609 id_priv->cm_id.ib = NULL; 3610 cma_exch(id_priv, RDMA_CM_DESTROYING); 3611 mutex_unlock(&id_priv->handler_mutex); 3612 rdma_destroy_id(&id_priv->id); 3613 return ret; 3614 } 3615 out: 3616 mutex_unlock(&id_priv->handler_mutex); 3617 return ret; 3618 } 3619 3620 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, 3621 struct rdma_conn_param *conn_param) 3622 { 3623 struct ib_cm_sidr_req_param req; 3624 struct ib_cm_id *id; 3625 void *private_data; 3626 int offset, ret; 3627 3628 memset(&req, 0, sizeof req); 3629 offset = cma_user_data_offset(id_priv); 3630 req.private_data_len = offset + conn_param->private_data_len; 3631 if (req.private_data_len < conn_param->private_data_len) 3632 return -EINVAL; 3633 3634 if (req.private_data_len) { 3635 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3636 if (!private_data) 3637 return -ENOMEM; 3638 } else { 3639 private_data = NULL; 3640 } 3641 3642 if (conn_param->private_data && conn_param->private_data_len) 3643 memcpy((char *)private_data + offset, conn_param->private_data, 3644 conn_param->private_data_len); 3645 3646 if (private_data) { 3647 ret = cma_format_hdr(private_data, id_priv); 3648 if (ret) 3649 goto out; 3650 req.private_data = private_data; 3651 } 3652 3653 id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, 3654 id_priv); 3655 if (IS_ERR(id)) { 3656 ret = PTR_ERR(id); 3657 goto out; 3658 } 3659 id_priv->cm_id.ib = id; 3660 3661 req.path = id_priv->id.route.path_rec; 3662 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3663 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); 3664 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3665 3666 ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); 3667 if (ret) { 3668 ib_destroy_cm_id(id_priv->cm_id.ib); 3669 id_priv->cm_id.ib = NULL; 3670 } 3671 out: 3672 kfree(private_data); 3673 return ret; 3674 } 3675 3676 static int cma_connect_ib(struct rdma_id_private *id_priv, 3677 struct rdma_conn_param *conn_param) 3678 { 3679 struct ib_cm_req_param req; 3680 struct rdma_route *route; 3681 void *private_data; 3682 struct ib_cm_id *id; 3683 int offset, ret; 3684 3685 memset(&req, 0, sizeof req); 3686 offset = cma_user_data_offset(id_priv); 3687 req.private_data_len = offset + conn_param->private_data_len; 3688 if (req.private_data_len < conn_param->private_data_len) 3689 return -EINVAL; 3690 3691 if (req.private_data_len) { 3692 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3693 if (!private_data) 3694 return -ENOMEM; 3695 } else { 3696 private_data = NULL; 3697 } 3698 3699 if (conn_param->private_data && conn_param->private_data_len) 3700 memcpy((char *)private_data + offset, conn_param->private_data, 3701 conn_param->private_data_len); 3702 3703 id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv); 3704 if (IS_ERR(id)) { 3705 ret = PTR_ERR(id); 3706 goto out; 3707 } 3708 id_priv->cm_id.ib = id; 3709 3710 route = &id_priv->id.route; 3711 if (private_data) { 3712 ret = cma_format_hdr(private_data, id_priv); 3713 if (ret) 3714 goto out; 3715 req.private_data = private_data; 3716 } 3717 3718 req.primary_path = &route->path_rec[0]; 3719 if (route->num_paths == 2) 3720 req.alternate_path = &route->path_rec[1]; 3721 3722 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3723 req.qp_num = id_priv->qp_num; 3724 req.qp_type = id_priv->id.qp_type; 3725 req.starting_psn = id_priv->seq_num; 3726 req.responder_resources = conn_param->responder_resources; 3727 req.initiator_depth = conn_param->initiator_depth; 3728 req.flow_control = conn_param->flow_control; 3729 req.retry_count = min_t(u8, 7, conn_param->retry_count); 3730 req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 3731 req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 3732 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 3733 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3734 req.srq = id_priv->srq ? 1 : 0; 3735 3736 ret = ib_send_cm_req(id_priv->cm_id.ib, &req); 3737 out: 3738 if (ret && !IS_ERR(id)) { 3739 ib_destroy_cm_id(id); 3740 id_priv->cm_id.ib = NULL; 3741 } 3742 3743 kfree(private_data); 3744 return ret; 3745 } 3746 3747 static int cma_connect_iw(struct rdma_id_private *id_priv, 3748 struct rdma_conn_param *conn_param) 3749 { 3750 struct iw_cm_id *cm_id; 3751 int ret; 3752 struct iw_cm_conn_param iw_param; 3753 3754 cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); 3755 if (IS_ERR(cm_id)) 3756 return PTR_ERR(cm_id); 3757 3758 cm_id->tos = id_priv->tos; 3759 id_priv->cm_id.iw = cm_id; 3760 3761 memcpy(&cm_id->local_addr, cma_src_addr(id_priv), 3762 rdma_addr_size(cma_src_addr(id_priv))); 3763 memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv), 3764 rdma_addr_size(cma_dst_addr(id_priv))); 3765 3766 ret = cma_modify_qp_rtr(id_priv, conn_param); 3767 if (ret) 3768 goto out; 3769 3770 if (conn_param) { 3771 iw_param.ord = conn_param->initiator_depth; 3772 iw_param.ird = conn_param->responder_resources; 3773 iw_param.private_data = conn_param->private_data; 3774 iw_param.private_data_len = conn_param->private_data_len; 3775 iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; 3776 } else { 3777 memset(&iw_param, 0, sizeof iw_param); 3778 iw_param.qpn = id_priv->qp_num; 3779 } 3780 ret = iw_cm_connect(cm_id, &iw_param); 3781 out: 3782 if (ret) { 3783 iw_destroy_cm_id(cm_id); 3784 id_priv->cm_id.iw = NULL; 3785 } 3786 return ret; 3787 } 3788 3789 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 3790 { 3791 struct rdma_id_private *id_priv; 3792 int ret; 3793 3794 id_priv = container_of(id, struct rdma_id_private, id); 3795 if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) 3796 return -EINVAL; 3797 3798 if (!id->qp) { 3799 id_priv->qp_num = conn_param->qp_num; 3800 id_priv->srq = conn_param->srq; 3801 } 3802 3803 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3804 if (id->qp_type == IB_QPT_UD) 3805 ret = cma_resolve_ib_udp(id_priv, conn_param); 3806 else 3807 ret = cma_connect_ib(id_priv, conn_param); 3808 } else if (rdma_cap_iw_cm(id->device, id->port_num)) 3809 ret = cma_connect_iw(id_priv, conn_param); 3810 else 3811 ret = -ENOSYS; 3812 if (ret) 3813 goto err; 3814 3815 return 0; 3816 err: 3817 cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED); 3818 return ret; 3819 } 3820 EXPORT_SYMBOL(rdma_connect); 3821 3822 static int cma_accept_ib(struct rdma_id_private *id_priv, 3823 struct rdma_conn_param *conn_param) 3824 { 3825 struct ib_cm_rep_param rep; 3826 int ret; 3827 3828 ret = cma_modify_qp_rtr(id_priv, conn_param); 3829 if (ret) 3830 goto out; 3831 3832 ret = cma_modify_qp_rts(id_priv, conn_param); 3833 if (ret) 3834 goto out; 3835 3836 memset(&rep, 0, sizeof rep); 3837 rep.qp_num = id_priv->qp_num; 3838 rep.starting_psn = id_priv->seq_num; 3839 rep.private_data = conn_param->private_data; 3840 rep.private_data_len = conn_param->private_data_len; 3841 rep.responder_resources = conn_param->responder_resources; 3842 rep.initiator_depth = conn_param->initiator_depth; 3843 rep.failover_accepted = 0; 3844 rep.flow_control = conn_param->flow_control; 3845 rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 3846 rep.srq = id_priv->srq ? 1 : 0; 3847 3848 ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); 3849 out: 3850 return ret; 3851 } 3852 3853 static int cma_accept_iw(struct rdma_id_private *id_priv, 3854 struct rdma_conn_param *conn_param) 3855 { 3856 struct iw_cm_conn_param iw_param; 3857 int ret; 3858 3859 ret = cma_modify_qp_rtr(id_priv, conn_param); 3860 if (ret) 3861 return ret; 3862 3863 iw_param.ord = conn_param->initiator_depth; 3864 iw_param.ird = conn_param->responder_resources; 3865 iw_param.private_data = conn_param->private_data; 3866 iw_param.private_data_len = conn_param->private_data_len; 3867 if (id_priv->id.qp) { 3868 iw_param.qpn = id_priv->qp_num; 3869 } else 3870 iw_param.qpn = conn_param->qp_num; 3871 3872 return iw_cm_accept(id_priv->cm_id.iw, &iw_param); 3873 } 3874 3875 static int cma_send_sidr_rep(struct rdma_id_private *id_priv, 3876 enum ib_cm_sidr_status status, u32 qkey, 3877 const void *private_data, int private_data_len) 3878 { 3879 struct ib_cm_sidr_rep_param rep; 3880 int ret; 3881 3882 memset(&rep, 0, sizeof rep); 3883 rep.status = status; 3884 if (status == IB_SIDR_SUCCESS) { 3885 ret = cma_set_qkey(id_priv, qkey); 3886 if (ret) 3887 return ret; 3888 rep.qp_num = id_priv->qp_num; 3889 rep.qkey = id_priv->qkey; 3890 } 3891 rep.private_data = private_data; 3892 rep.private_data_len = private_data_len; 3893 3894 return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); 3895 } 3896 3897 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 3898 { 3899 struct rdma_id_private *id_priv; 3900 int ret; 3901 3902 id_priv = container_of(id, struct rdma_id_private, id); 3903 3904 id_priv->owner = task_pid_nr(current); 3905 3906 if (!cma_comp(id_priv, RDMA_CM_CONNECT)) 3907 return -EINVAL; 3908 3909 if (!id->qp && conn_param) { 3910 id_priv->qp_num = conn_param->qp_num; 3911 id_priv->srq = conn_param->srq; 3912 } 3913 3914 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3915 if (id->qp_type == IB_QPT_UD) { 3916 if (conn_param) 3917 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 3918 conn_param->qkey, 3919 conn_param->private_data, 3920 conn_param->private_data_len); 3921 else 3922 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 3923 0, NULL, 0); 3924 } else { 3925 if (conn_param) 3926 ret = cma_accept_ib(id_priv, conn_param); 3927 else 3928 ret = cma_rep_recv(id_priv); 3929 } 3930 } else if (rdma_cap_iw_cm(id->device, id->port_num)) 3931 ret = cma_accept_iw(id_priv, conn_param); 3932 else 3933 ret = -ENOSYS; 3934 3935 if (ret) 3936 goto reject; 3937 3938 return 0; 3939 reject: 3940 cma_modify_qp_err(id_priv); 3941 rdma_reject(id, NULL, 0); 3942 return ret; 3943 } 3944 EXPORT_SYMBOL(rdma_accept); 3945 3946 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) 3947 { 3948 struct rdma_id_private *id_priv; 3949 int ret; 3950 3951 id_priv = container_of(id, struct rdma_id_private, id); 3952 if (!id_priv->cm_id.ib) 3953 return -EINVAL; 3954 3955 switch (id->device->node_type) { 3956 case RDMA_NODE_IB_CA: 3957 ret = ib_cm_notify(id_priv->cm_id.ib, event); 3958 break; 3959 default: 3960 ret = 0; 3961 break; 3962 } 3963 return ret; 3964 } 3965 EXPORT_SYMBOL(rdma_notify); 3966 3967 int rdma_reject(struct rdma_cm_id *id, const void *private_data, 3968 u8 private_data_len) 3969 { 3970 struct rdma_id_private *id_priv; 3971 int ret; 3972 3973 id_priv = container_of(id, struct rdma_id_private, id); 3974 if (!id_priv->cm_id.ib) 3975 return -EINVAL; 3976 3977 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3978 if (id->qp_type == IB_QPT_UD) 3979 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, 3980 private_data, private_data_len); 3981 else 3982 ret = ib_send_cm_rej(id_priv->cm_id.ib, 3983 IB_CM_REJ_CONSUMER_DEFINED, NULL, 3984 0, private_data, private_data_len); 3985 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 3986 ret = iw_cm_reject(id_priv->cm_id.iw, 3987 private_data, private_data_len); 3988 } else 3989 ret = -ENOSYS; 3990 3991 return ret; 3992 } 3993 EXPORT_SYMBOL(rdma_reject); 3994 3995 int rdma_disconnect(struct rdma_cm_id *id) 3996 { 3997 struct rdma_id_private *id_priv; 3998 int ret; 3999 4000 id_priv = container_of(id, struct rdma_id_private, id); 4001 if (!id_priv->cm_id.ib) 4002 return -EINVAL; 4003 4004 if (rdma_cap_ib_cm(id->device, id->port_num)) { 4005 ret = cma_modify_qp_err(id_priv); 4006 if (ret) 4007 goto out; 4008 /* Initiate or respond to a disconnect. */ 4009 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) 4010 ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); 4011 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 4012 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); 4013 } else 4014 ret = -EINVAL; 4015 4016 out: 4017 return ret; 4018 } 4019 EXPORT_SYMBOL(rdma_disconnect); 4020 4021 static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) 4022 { 4023 struct rdma_id_private *id_priv; 4024 struct cma_multicast *mc = multicast->context; 4025 struct rdma_cm_event event; 4026 int ret = 0; 4027 4028 id_priv = mc->id_priv; 4029 mutex_lock(&id_priv->handler_mutex); 4030 if (id_priv->state != RDMA_CM_ADDR_BOUND && 4031 id_priv->state != RDMA_CM_ADDR_RESOLVED) 4032 goto out; 4033 4034 if (!status) 4035 status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); 4036 mutex_lock(&id_priv->qp_mutex); 4037 if (!status && id_priv->id.qp) 4038 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, 4039 be16_to_cpu(multicast->rec.mlid)); 4040 mutex_unlock(&id_priv->qp_mutex); 4041 4042 memset(&event, 0, sizeof event); 4043 event.status = status; 4044 event.param.ud.private_data = mc->context; 4045 if (!status) { 4046 struct rdma_dev_addr *dev_addr = 4047 &id_priv->id.route.addr.dev_addr; 4048 struct ifnet *ndev = 4049 dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 4050 enum ib_gid_type gid_type = 4051 id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 4052 rdma_start_port(id_priv->cma_dev->device)]; 4053 4054 event.event = RDMA_CM_EVENT_MULTICAST_JOIN; 4055 ret = ib_init_ah_from_mcmember(id_priv->id.device, 4056 id_priv->id.port_num, 4057 &multicast->rec, 4058 ndev, gid_type, 4059 &event.param.ud.ah_attr); 4060 if (ret) 4061 event.event = RDMA_CM_EVENT_MULTICAST_ERROR; 4062 4063 event.param.ud.qp_num = 0xFFFFFF; 4064 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); 4065 if (ndev) 4066 dev_put(ndev); 4067 } else 4068 event.event = RDMA_CM_EVENT_MULTICAST_ERROR; 4069 4070 ret = id_priv->id.event_handler(&id_priv->id, &event); 4071 if (ret) { 4072 cma_exch(id_priv, RDMA_CM_DESTROYING); 4073 mutex_unlock(&id_priv->handler_mutex); 4074 rdma_destroy_id(&id_priv->id); 4075 return 0; 4076 } 4077 4078 out: 4079 mutex_unlock(&id_priv->handler_mutex); 4080 return 0; 4081 } 4082 4083 static void cma_set_mgid(struct rdma_id_private *id_priv, 4084 struct sockaddr *addr, union ib_gid *mgid) 4085 { 4086 unsigned char mc_map[MAX_ADDR_LEN]; 4087 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 4088 struct sockaddr_in *sin = (struct sockaddr_in *) addr; 4089 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr; 4090 4091 if (cma_any_addr(addr)) { 4092 memset(mgid, 0, sizeof *mgid); 4093 } else if ((addr->sa_family == AF_INET6) && 4094 ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) == 4095 0xFF10A01B)) { 4096 /* IPv6 address is an SA assigned MGID. */ 4097 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 4098 } else if (addr->sa_family == AF_IB) { 4099 memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); 4100 } else if (addr->sa_family == AF_INET6) { 4101 ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); 4102 if (id_priv->id.ps == RDMA_PS_UDP) 4103 mc_map[7] = 0x01; /* Use RDMA CM signature */ 4104 *mgid = *(union ib_gid *) (mc_map + 4); 4105 } else { 4106 ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); 4107 if (id_priv->id.ps == RDMA_PS_UDP) 4108 mc_map[7] = 0x01; /* Use RDMA CM signature */ 4109 *mgid = *(union ib_gid *) (mc_map + 4); 4110 } 4111 } 4112 4113 static void cma_query_sa_classport_info_cb(int status, 4114 struct ib_class_port_info *rec, 4115 void *context) 4116 { 4117 struct class_port_info_context *cb_ctx = context; 4118 4119 WARN_ON(!context); 4120 4121 if (status || !rec) { 4122 pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n", 4123 cb_ctx->device->name, cb_ctx->port_num, status); 4124 goto out; 4125 } 4126 4127 memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info)); 4128 4129 out: 4130 complete(&cb_ctx->done); 4131 } 4132 4133 static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num, 4134 struct ib_class_port_info *class_port_info) 4135 { 4136 struct class_port_info_context *cb_ctx; 4137 int ret; 4138 4139 cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL); 4140 if (!cb_ctx) 4141 return -ENOMEM; 4142 4143 cb_ctx->device = device; 4144 cb_ctx->class_port_info = class_port_info; 4145 cb_ctx->port_num = port_num; 4146 init_completion(&cb_ctx->done); 4147 4148 ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num, 4149 CMA_QUERY_CLASSPORT_INFO_TIMEOUT, 4150 GFP_KERNEL, cma_query_sa_classport_info_cb, 4151 cb_ctx, &cb_ctx->sa_query); 4152 if (ret < 0) { 4153 pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n", 4154 device->name, port_num, ret); 4155 goto out; 4156 } 4157 4158 wait_for_completion(&cb_ctx->done); 4159 4160 out: 4161 kfree(cb_ctx); 4162 return ret; 4163 } 4164 4165 static int cma_join_ib_multicast(struct rdma_id_private *id_priv, 4166 struct cma_multicast *mc) 4167 { 4168 struct ib_sa_mcmember_rec rec; 4169 struct ib_class_port_info class_port_info; 4170 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 4171 ib_sa_comp_mask comp_mask; 4172 int ret; 4173 4174 ib_addr_get_mgid(dev_addr, &rec.mgid); 4175 ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, 4176 &rec.mgid, &rec); 4177 if (ret) 4178 return ret; 4179 4180 ret = cma_set_qkey(id_priv, 0); 4181 if (ret) 4182 return ret; 4183 4184 cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); 4185 rec.qkey = cpu_to_be32(id_priv->qkey); 4186 rdma_addr_get_sgid(dev_addr, &rec.port_gid); 4187 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 4188 rec.join_state = mc->join_state; 4189 4190 if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) { 4191 ret = cma_query_sa_classport_info(id_priv->id.device, 4192 id_priv->id.port_num, 4193 &class_port_info); 4194 4195 if (ret) 4196 return ret; 4197 4198 if (!(ib_get_cpi_capmask2(&class_port_info) & 4199 IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) { 4200 pr_warn("RDMA CM: %s port %u Unable to multicast join\n" 4201 "RDMA CM: SM doesn't support Send Only Full Member option\n", 4202 id_priv->id.device->name, id_priv->id.port_num); 4203 return -EOPNOTSUPP; 4204 } 4205 } 4206 4207 comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | 4208 IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | 4209 IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | 4210 IB_SA_MCMEMBER_REC_FLOW_LABEL | 4211 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; 4212 4213 if (id_priv->id.ps == RDMA_PS_IPOIB) 4214 comp_mask |= IB_SA_MCMEMBER_REC_RATE | 4215 IB_SA_MCMEMBER_REC_RATE_SELECTOR | 4216 IB_SA_MCMEMBER_REC_MTU_SELECTOR | 4217 IB_SA_MCMEMBER_REC_MTU | 4218 IB_SA_MCMEMBER_REC_HOP_LIMIT; 4219 4220 mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, 4221 id_priv->id.port_num, &rec, 4222 comp_mask, GFP_KERNEL, 4223 cma_ib_mc_handler, mc); 4224 return PTR_ERR_OR_ZERO(mc->multicast.ib); 4225 } 4226 4227 static void iboe_mcast_work_handler(struct work_struct *work) 4228 { 4229 struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); 4230 struct cma_multicast *mc = mw->mc; 4231 struct ib_sa_multicast *m = mc->multicast.ib; 4232 4233 mc->multicast.ib->context = mc; 4234 cma_ib_mc_handler(0, m); 4235 kref_put(&mc->mcref, release_mc); 4236 kfree(mw); 4237 } 4238 4239 static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, 4240 enum ib_gid_type gid_type) 4241 { 4242 struct sockaddr_in *sin = (struct sockaddr_in *)addr; 4243 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; 4244 4245 if (cma_any_addr(addr)) { 4246 memset(mgid, 0, sizeof *mgid); 4247 } else if (addr->sa_family == AF_INET6) { 4248 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 4249 } else { 4250 mgid->raw[0] = 4251 (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0xff; 4252 mgid->raw[1] = 4253 (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0x0e; 4254 mgid->raw[2] = 0; 4255 mgid->raw[3] = 0; 4256 mgid->raw[4] = 0; 4257 mgid->raw[5] = 0; 4258 mgid->raw[6] = 0; 4259 mgid->raw[7] = 0; 4260 mgid->raw[8] = 0; 4261 mgid->raw[9] = 0; 4262 mgid->raw[10] = 0xff; 4263 mgid->raw[11] = 0xff; 4264 *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr; 4265 } 4266 } 4267 4268 static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, 4269 struct cma_multicast *mc) 4270 { 4271 struct iboe_mcast_work *work; 4272 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 4273 int err = 0; 4274 struct sockaddr *addr = (struct sockaddr *)&mc->addr; 4275 struct ifnet *ndev = NULL; 4276 enum ib_gid_type gid_type; 4277 bool send_only; 4278 4279 send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); 4280 4281 if (cma_zero_addr((struct sockaddr *)&mc->addr)) 4282 return -EINVAL; 4283 4284 work = kzalloc(sizeof *work, GFP_KERNEL); 4285 if (!work) 4286 return -ENOMEM; 4287 4288 mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL); 4289 if (!mc->multicast.ib) { 4290 err = -ENOMEM; 4291 goto out1; 4292 } 4293 4294 gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 4295 rdma_start_port(id_priv->cma_dev->device)]; 4296 cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type); 4297 4298 mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); 4299 if (id_priv->id.ps == RDMA_PS_UDP) 4300 mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); 4301 4302 if (dev_addr->bound_dev_if) 4303 ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 4304 if (!ndev) { 4305 err = -ENODEV; 4306 goto out2; 4307 } 4308 mc->multicast.ib->rec.rate = iboe_get_rate(ndev); 4309 mc->multicast.ib->rec.hop_limit = 1; 4310 mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->if_mtu); 4311 4312 if (addr->sa_family == AF_INET || addr->sa_family == AF_INET6) { 4313 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { 4314 mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; 4315 if (!send_only) { 4316 err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, 4317 true); 4318 if (!err) 4319 mc->igmp_joined = true; 4320 } 4321 } 4322 } else { 4323 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) 4324 err = -ENOTSUPP; 4325 } 4326 dev_put(ndev); 4327 if (err || !mc->multicast.ib->rec.mtu) { 4328 if (!err) 4329 err = -EINVAL; 4330 goto out2; 4331 } 4332 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 4333 &mc->multicast.ib->rec.port_gid); 4334 work->id = id_priv; 4335 work->mc = mc; 4336 INIT_WORK(&work->work, iboe_mcast_work_handler); 4337 kref_get(&mc->mcref); 4338 queue_work(cma_wq, &work->work); 4339 4340 return 0; 4341 4342 out2: 4343 kfree(mc->multicast.ib); 4344 out1: 4345 kfree(work); 4346 return err; 4347 } 4348 4349 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, 4350 u8 join_state, void *context) 4351 { 4352 struct rdma_id_private *id_priv; 4353 struct cma_multicast *mc; 4354 int ret; 4355 4356 if (!id->device) 4357 return -EINVAL; 4358 4359 id_priv = container_of(id, struct rdma_id_private, id); 4360 if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && 4361 !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED)) 4362 return -EINVAL; 4363 4364 mc = kmalloc(sizeof *mc, GFP_KERNEL); 4365 if (!mc) 4366 return -ENOMEM; 4367 4368 memcpy(&mc->addr, addr, rdma_addr_size(addr)); 4369 mc->context = context; 4370 mc->id_priv = id_priv; 4371 mc->igmp_joined = false; 4372 mc->join_state = join_state; 4373 spin_lock(&id_priv->lock); 4374 list_add(&mc->list, &id_priv->mc_list); 4375 spin_unlock(&id_priv->lock); 4376 4377 if (rdma_protocol_roce(id->device, id->port_num)) { 4378 kref_init(&mc->mcref); 4379 ret = cma_iboe_join_multicast(id_priv, mc); 4380 } else if (rdma_cap_ib_mcast(id->device, id->port_num)) 4381 ret = cma_join_ib_multicast(id_priv, mc); 4382 else 4383 ret = -ENOSYS; 4384 4385 if (ret) { 4386 spin_lock_irq(&id_priv->lock); 4387 list_del(&mc->list); 4388 spin_unlock_irq(&id_priv->lock); 4389 kfree(mc); 4390 } 4391 return ret; 4392 } 4393 EXPORT_SYMBOL(rdma_join_multicast); 4394 4395 void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) 4396 { 4397 struct rdma_id_private *id_priv; 4398 struct cma_multicast *mc; 4399 4400 id_priv = container_of(id, struct rdma_id_private, id); 4401 spin_lock_irq(&id_priv->lock); 4402 list_for_each_entry(mc, &id_priv->mc_list, list) { 4403 if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) { 4404 list_del(&mc->list); 4405 spin_unlock_irq(&id_priv->lock); 4406 4407 if (id->qp) 4408 ib_detach_mcast(id->qp, 4409 &mc->multicast.ib->rec.mgid, 4410 be16_to_cpu(mc->multicast.ib->rec.mlid)); 4411 4412 BUG_ON(id_priv->cma_dev->device != id->device); 4413 4414 if (rdma_cap_ib_mcast(id->device, id->port_num)) { 4415 ib_sa_free_multicast(mc->multicast.ib); 4416 kfree(mc); 4417 } else if (rdma_protocol_roce(id->device, id->port_num)) { 4418 if (mc->igmp_joined) { 4419 struct rdma_dev_addr *dev_addr = 4420 &id->route.addr.dev_addr; 4421 struct ifnet *ndev = NULL; 4422 4423 if (dev_addr->bound_dev_if) 4424 ndev = dev_get_by_index(dev_addr->net, 4425 dev_addr->bound_dev_if); 4426 if (ndev) { 4427 cma_igmp_send(ndev, 4428 &mc->multicast.ib->rec.mgid, 4429 false); 4430 dev_put(ndev); 4431 } 4432 mc->igmp_joined = false; 4433 } 4434 kref_put(&mc->mcref, release_mc); 4435 } 4436 return; 4437 } 4438 } 4439 spin_unlock_irq(&id_priv->lock); 4440 } 4441 EXPORT_SYMBOL(rdma_leave_multicast); 4442 4443 static int 4444 sysctl_cma_default_roce_mode(SYSCTL_HANDLER_ARGS) 4445 { 4446 struct cma_device *cma_dev = arg1; 4447 const int port = arg2; 4448 char buf[64]; 4449 int error; 4450 4451 strlcpy(buf, ib_cache_gid_type_str( 4452 cma_get_default_gid_type(cma_dev, port)), sizeof(buf)); 4453 4454 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 4455 if (error != 0 || req->newptr == NULL) 4456 goto done; 4457 4458 error = ib_cache_gid_parse_type_str(buf); 4459 if (error < 0) { 4460 error = EINVAL; 4461 goto done; 4462 } 4463 4464 cma_set_default_gid_type(cma_dev, port, error); 4465 error = 0; 4466 done: 4467 return (error); 4468 } 4469 4470 static void cma_add_one(struct ib_device *device) 4471 { 4472 struct cma_device *cma_dev; 4473 struct rdma_id_private *id_priv; 4474 unsigned int i; 4475 4476 cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); 4477 if (!cma_dev) 4478 return; 4479 4480 sysctl_ctx_init(&cma_dev->sysctl_ctx); 4481 4482 cma_dev->device = device; 4483 cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, 4484 sizeof(*cma_dev->default_gid_type), 4485 GFP_KERNEL); 4486 if (!cma_dev->default_gid_type) { 4487 kfree(cma_dev); 4488 return; 4489 } 4490 for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { 4491 unsigned long supported_gids; 4492 unsigned int default_gid_type; 4493 4494 supported_gids = roce_gid_type_mask_support(device, i); 4495 4496 if (WARN_ON(!supported_gids)) { 4497 /* set something valid */ 4498 default_gid_type = 0; 4499 } else if (test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) { 4500 /* prefer RoCEv2, if supported */ 4501 default_gid_type = IB_GID_TYPE_ROCE_UDP_ENCAP; 4502 } else { 4503 default_gid_type = find_first_bit(&supported_gids, 4504 BITS_PER_LONG); 4505 } 4506 cma_dev->default_gid_type[i - rdma_start_port(device)] = 4507 default_gid_type; 4508 } 4509 4510 init_completion(&cma_dev->comp); 4511 atomic_set(&cma_dev->refcount, 1); 4512 INIT_LIST_HEAD(&cma_dev->id_list); 4513 ib_set_client_data(device, &cma_client, cma_dev); 4514 4515 mutex_lock(&lock); 4516 list_add_tail(&cma_dev->list, &dev_list); 4517 list_for_each_entry(id_priv, &listen_any_list, list) 4518 cma_listen_on_dev(id_priv, cma_dev); 4519 mutex_unlock(&lock); 4520 4521 for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { 4522 char buf[64]; 4523 4524 snprintf(buf, sizeof(buf), "default_roce_mode_port%d", i); 4525 4526 (void) SYSCTL_ADD_PROC(&cma_dev->sysctl_ctx, 4527 SYSCTL_CHILDREN(device->ports_parent->parent->oidp), 4528 OID_AUTO, buf, CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 4529 cma_dev, i, &sysctl_cma_default_roce_mode, "A", 4530 "Default RoCE mode. Valid values: IB/RoCE v1 and RoCE v2"); 4531 } 4532 } 4533 4534 static int cma_remove_id_dev(struct rdma_id_private *id_priv) 4535 { 4536 struct rdma_cm_event event; 4537 enum rdma_cm_state state; 4538 int ret = 0; 4539 4540 /* Record that we want to remove the device */ 4541 state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); 4542 if (state == RDMA_CM_DESTROYING) 4543 return 0; 4544 4545 cma_cancel_operation(id_priv, state); 4546 mutex_lock(&id_priv->handler_mutex); 4547 4548 /* Check for destruction from another callback. */ 4549 if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) 4550 goto out; 4551 4552 memset(&event, 0, sizeof event); 4553 event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; 4554 ret = id_priv->id.event_handler(&id_priv->id, &event); 4555 out: 4556 mutex_unlock(&id_priv->handler_mutex); 4557 return ret; 4558 } 4559 4560 static void cma_process_remove(struct cma_device *cma_dev) 4561 { 4562 struct rdma_id_private *id_priv; 4563 int ret; 4564 4565 mutex_lock(&lock); 4566 while (!list_empty(&cma_dev->id_list)) { 4567 id_priv = list_entry(cma_dev->id_list.next, 4568 struct rdma_id_private, list); 4569 4570 list_del(&id_priv->listen_list); 4571 list_del_init(&id_priv->list); 4572 atomic_inc(&id_priv->refcount); 4573 mutex_unlock(&lock); 4574 4575 ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv); 4576 cma_deref_id(id_priv); 4577 if (ret) 4578 rdma_destroy_id(&id_priv->id); 4579 4580 mutex_lock(&lock); 4581 } 4582 mutex_unlock(&lock); 4583 4584 cma_deref_dev(cma_dev); 4585 wait_for_completion(&cma_dev->comp); 4586 } 4587 4588 static void cma_remove_one(struct ib_device *device, void *client_data) 4589 { 4590 struct cma_device *cma_dev = client_data; 4591 4592 if (!cma_dev) 4593 return; 4594 4595 mutex_lock(&lock); 4596 list_del(&cma_dev->list); 4597 mutex_unlock(&lock); 4598 4599 cma_process_remove(cma_dev); 4600 sysctl_ctx_free(&cma_dev->sysctl_ctx); 4601 kfree(cma_dev->default_gid_type); 4602 kfree(cma_dev); 4603 } 4604 4605 static void cma_init_vnet(void *arg) 4606 { 4607 struct cma_pernet *pernet = &VNET(cma_pernet); 4608 4609 idr_init(&pernet->tcp_ps); 4610 idr_init(&pernet->udp_ps); 4611 idr_init(&pernet->ipoib_ps); 4612 idr_init(&pernet->ib_ps); 4613 idr_init(&pernet->sdp_ps); 4614 } 4615 VNET_SYSINIT(cma_init_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_FIRST, cma_init_vnet, NULL); 4616 4617 static void cma_destroy_vnet(void *arg) 4618 { 4619 struct cma_pernet *pernet = &VNET(cma_pernet); 4620 4621 idr_destroy(&pernet->tcp_ps); 4622 idr_destroy(&pernet->udp_ps); 4623 idr_destroy(&pernet->ipoib_ps); 4624 idr_destroy(&pernet->ib_ps); 4625 idr_destroy(&pernet->sdp_ps); 4626 } 4627 VNET_SYSUNINIT(cma_destroy_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_SECOND, cma_destroy_vnet, NULL); 4628 4629 static int __init cma_init(void) 4630 { 4631 int ret; 4632 4633 cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); 4634 if (!cma_wq) 4635 return -ENOMEM; 4636 4637 ib_sa_register_client(&sa_client); 4638 rdma_addr_register_client(&addr_client); 4639 4640 ret = ib_register_client(&cma_client); 4641 if (ret) 4642 goto err; 4643 4644 cma_configfs_init(); 4645 4646 return 0; 4647 4648 err: 4649 rdma_addr_unregister_client(&addr_client); 4650 ib_sa_unregister_client(&sa_client); 4651 destroy_workqueue(cma_wq); 4652 return ret; 4653 } 4654 4655 static void __exit cma_cleanup(void) 4656 { 4657 cma_configfs_exit(); 4658 ib_unregister_client(&cma_client); 4659 rdma_addr_unregister_client(&addr_client); 4660 ib_sa_unregister_client(&sa_client); 4661 destroy_workqueue(cma_wq); 4662 } 4663 4664 module_init_order(cma_init, SI_ORDER_FOURTH); 4665 module_exit_order(cma_cleanup, SI_ORDER_FOURTH); 4666