1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 3 * 4 * Copyright (c) 2006 Intel Corporation. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #define LINUXKPI_PARAM_PREFIX ibcore_ 39 40 #include <linux/completion.h> 41 #include <linux/dma-mapping.h> 42 #include <linux/err.h> 43 #include <linux/interrupt.h> 44 #include <linux/slab.h> 45 #include <linux/bitops.h> 46 #include <linux/random.h> 47 #include <linux/rbtree.h> 48 49 #include <rdma/ib_cache.h> 50 #include "sa.h" 51 52 static void mcast_add_one(struct ib_device *device); 53 static void mcast_remove_one(struct ib_device *device, void *client_data); 54 55 static struct ib_client mcast_client = { 56 .name = "ib_multicast", 57 .add = mcast_add_one, 58 .remove = mcast_remove_one 59 }; 60 61 static struct ib_sa_client sa_client; 62 static struct workqueue_struct *mcast_wq; 63 static union ib_gid mgid0; 64 65 struct mcast_device; 66 67 struct mcast_port { 68 struct mcast_device *dev; 69 spinlock_t lock; 70 struct rb_root table; 71 atomic_t refcount; 72 struct completion comp; 73 u8 port_num; 74 }; 75 76 struct mcast_device { 77 struct ib_device *device; 78 struct ib_event_handler event_handler; 79 int start_port; 80 int end_port; 81 struct mcast_port port[0]; 82 }; 83 84 enum mcast_state { 85 MCAST_JOINING, 86 MCAST_MEMBER, 87 MCAST_ERROR, 88 }; 89 90 enum mcast_group_state { 91 MCAST_IDLE, 92 MCAST_BUSY, 93 MCAST_GROUP_ERROR, 94 MCAST_PKEY_EVENT 95 }; 96 97 enum { 98 MCAST_INVALID_PKEY_INDEX = 0xFFFF 99 }; 100 101 struct mcast_member; 102 103 struct mcast_group { 104 struct ib_sa_mcmember_rec rec; 105 struct rb_node node; 106 struct mcast_port *port; 107 spinlock_t lock; 108 struct work_struct work; 109 struct list_head pending_list; 110 struct list_head active_list; 111 struct mcast_member *last_join; 112 int members[NUM_JOIN_MEMBERSHIP_TYPES]; 113 atomic_t refcount; 114 enum mcast_group_state state; 115 struct ib_sa_query *query; 116 u16 pkey_index; 117 u8 leave_state; 118 int retries; 119 }; 120 121 struct mcast_member { 122 struct ib_sa_multicast multicast; 123 struct ib_sa_client *client; 124 struct mcast_group *group; 125 struct list_head list; 126 enum mcast_state state; 127 atomic_t refcount; 128 struct completion comp; 129 }; 130 131 static void join_handler(int status, struct ib_sa_mcmember_rec *rec, 132 void *context); 133 static void leave_handler(int status, struct ib_sa_mcmember_rec *rec, 134 void *context); 135 136 static struct mcast_group *mcast_find(struct mcast_port *port, 137 union ib_gid *mgid) 138 { 139 struct rb_node *node = port->table.rb_node; 140 struct mcast_group *group; 141 int ret; 142 143 while (node) { 144 group = rb_entry(node, struct mcast_group, node); 145 ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid); 146 if (!ret) 147 return group; 148 149 if (ret < 0) 150 node = node->rb_left; 151 else 152 node = node->rb_right; 153 } 154 return NULL; 155 } 156 157 static struct mcast_group *mcast_insert(struct mcast_port *port, 158 struct mcast_group *group, 159 int allow_duplicates) 160 { 161 struct rb_node **link = &port->table.rb_node; 162 struct rb_node *parent = NULL; 163 struct mcast_group *cur_group; 164 int ret; 165 166 while (*link) { 167 parent = *link; 168 cur_group = rb_entry(parent, struct mcast_group, node); 169 170 ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw, 171 sizeof group->rec.mgid); 172 if (ret < 0) 173 link = &(*link)->rb_left; 174 else if (ret > 0) 175 link = &(*link)->rb_right; 176 else if (allow_duplicates) 177 link = &(*link)->rb_left; 178 else 179 return cur_group; 180 } 181 rb_link_node(&group->node, parent, link); 182 rb_insert_color(&group->node, &port->table); 183 return NULL; 184 } 185 186 static void deref_port(struct mcast_port *port) 187 { 188 if (atomic_dec_and_test(&port->refcount)) 189 complete(&port->comp); 190 } 191 192 static void release_group(struct mcast_group *group) 193 { 194 struct mcast_port *port = group->port; 195 unsigned long flags; 196 197 spin_lock_irqsave(&port->lock, flags); 198 if (atomic_dec_and_test(&group->refcount)) { 199 rb_erase(&group->node, &port->table); 200 spin_unlock_irqrestore(&port->lock, flags); 201 kfree(group); 202 deref_port(port); 203 } else 204 spin_unlock_irqrestore(&port->lock, flags); 205 } 206 207 static void deref_member(struct mcast_member *member) 208 { 209 if (atomic_dec_and_test(&member->refcount)) 210 complete(&member->comp); 211 } 212 213 static void queue_join(struct mcast_member *member) 214 { 215 struct mcast_group *group = member->group; 216 unsigned long flags; 217 218 spin_lock_irqsave(&group->lock, flags); 219 list_add_tail(&member->list, &group->pending_list); 220 if (group->state == MCAST_IDLE) { 221 group->state = MCAST_BUSY; 222 atomic_inc(&group->refcount); 223 queue_work(mcast_wq, &group->work); 224 } 225 spin_unlock_irqrestore(&group->lock, flags); 226 } 227 228 /* 229 * A multicast group has four types of members: full member, non member, 230 * sendonly non member and sendonly full member. 231 * We need to keep track of the number of members of each 232 * type based on their join state. Adjust the number of members the belong to 233 * the specified join states. 234 */ 235 static void adjust_membership(struct mcast_group *group, u8 join_state, int inc) 236 { 237 int i; 238 239 for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++, join_state >>= 1) 240 if (join_state & 0x1) 241 group->members[i] += inc; 242 } 243 244 /* 245 * If a multicast group has zero members left for a particular join state, but 246 * the group is still a member with the SA, we need to leave that join state. 247 * Determine which join states we still belong to, but that do not have any 248 * active members. 249 */ 250 static u8 get_leave_state(struct mcast_group *group) 251 { 252 u8 leave_state = 0; 253 int i; 254 255 for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++) 256 if (!group->members[i]) 257 leave_state |= (0x1 << i); 258 259 return leave_state & group->rec.join_state; 260 } 261 262 static int check_selector(ib_sa_comp_mask comp_mask, 263 ib_sa_comp_mask selector_mask, 264 ib_sa_comp_mask value_mask, 265 u8 selector, u8 src_value, u8 dst_value) 266 { 267 int err; 268 269 if (!(comp_mask & selector_mask) || !(comp_mask & value_mask)) 270 return 0; 271 272 switch (selector) { 273 case IB_SA_GT: 274 err = (src_value <= dst_value); 275 break; 276 case IB_SA_LT: 277 err = (src_value >= dst_value); 278 break; 279 case IB_SA_EQ: 280 err = (src_value != dst_value); 281 break; 282 default: 283 err = 0; 284 break; 285 } 286 287 return err; 288 } 289 290 static int cmp_rec(struct ib_sa_mcmember_rec *src, 291 struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask) 292 { 293 /* MGID must already match */ 294 295 if (comp_mask & IB_SA_MCMEMBER_REC_PORT_GID && 296 memcmp(&src->port_gid, &dst->port_gid, sizeof src->port_gid)) 297 return -EINVAL; 298 if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey) 299 return -EINVAL; 300 if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid) 301 return -EINVAL; 302 if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR, 303 IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector, 304 src->mtu, dst->mtu)) 305 return -EINVAL; 306 if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS && 307 src->traffic_class != dst->traffic_class) 308 return -EINVAL; 309 if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey) 310 return -EINVAL; 311 if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR, 312 IB_SA_MCMEMBER_REC_RATE, dst->rate_selector, 313 src->rate, dst->rate)) 314 return -EINVAL; 315 if (check_selector(comp_mask, 316 IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR, 317 IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME, 318 dst->packet_life_time_selector, 319 src->packet_life_time, dst->packet_life_time)) 320 return -EINVAL; 321 if (comp_mask & IB_SA_MCMEMBER_REC_SL && src->sl != dst->sl) 322 return -EINVAL; 323 if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL && 324 src->flow_label != dst->flow_label) 325 return -EINVAL; 326 if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT && 327 src->hop_limit != dst->hop_limit) 328 return -EINVAL; 329 if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && src->scope != dst->scope) 330 return -EINVAL; 331 332 /* join_state checked separately, proxy_join ignored */ 333 334 return 0; 335 } 336 337 static int send_join(struct mcast_group *group, struct mcast_member *member) 338 { 339 struct mcast_port *port = group->port; 340 int ret; 341 342 group->last_join = member; 343 ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device, 344 port->port_num, IB_MGMT_METHOD_SET, 345 &member->multicast.rec, 346 member->multicast.comp_mask, 347 3000, GFP_KERNEL, join_handler, group, 348 &group->query); 349 return (ret > 0) ? 0 : ret; 350 } 351 352 static int send_leave(struct mcast_group *group, u8 leave_state) 353 { 354 struct mcast_port *port = group->port; 355 struct ib_sa_mcmember_rec rec; 356 int ret; 357 358 rec = group->rec; 359 rec.join_state = leave_state; 360 group->leave_state = leave_state; 361 362 ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device, 363 port->port_num, IB_SA_METHOD_DELETE, &rec, 364 IB_SA_MCMEMBER_REC_MGID | 365 IB_SA_MCMEMBER_REC_PORT_GID | 366 IB_SA_MCMEMBER_REC_JOIN_STATE, 367 3000, GFP_KERNEL, leave_handler, 368 group, &group->query); 369 return (ret > 0) ? 0 : ret; 370 } 371 372 static void join_group(struct mcast_group *group, struct mcast_member *member, 373 u8 join_state) 374 { 375 member->state = MCAST_MEMBER; 376 adjust_membership(group, join_state, 1); 377 group->rec.join_state |= join_state; 378 member->multicast.rec = group->rec; 379 member->multicast.rec.join_state = join_state; 380 list_move(&member->list, &group->active_list); 381 } 382 383 static int fail_join(struct mcast_group *group, struct mcast_member *member, 384 int status) 385 { 386 spin_lock_irq(&group->lock); 387 list_del_init(&member->list); 388 spin_unlock_irq(&group->lock); 389 return member->multicast.callback(status, &member->multicast); 390 } 391 392 static void process_group_error(struct mcast_group *group) 393 { 394 struct mcast_member *member; 395 int ret = 0; 396 u16 pkey_index; 397 398 if (group->state == MCAST_PKEY_EVENT) 399 ret = ib_find_pkey(group->port->dev->device, 400 group->port->port_num, 401 be16_to_cpu(group->rec.pkey), &pkey_index); 402 403 spin_lock_irq(&group->lock); 404 if (group->state == MCAST_PKEY_EVENT && !ret && 405 group->pkey_index == pkey_index) 406 goto out; 407 408 while (!list_empty(&group->active_list)) { 409 member = list_entry(group->active_list.next, 410 struct mcast_member, list); 411 atomic_inc(&member->refcount); 412 list_del_init(&member->list); 413 adjust_membership(group, member->multicast.rec.join_state, -1); 414 member->state = MCAST_ERROR; 415 spin_unlock_irq(&group->lock); 416 417 ret = member->multicast.callback(-ENETRESET, 418 &member->multicast); 419 deref_member(member); 420 if (ret) 421 ib_sa_free_multicast(&member->multicast); 422 spin_lock_irq(&group->lock); 423 } 424 425 group->rec.join_state = 0; 426 out: 427 group->state = MCAST_BUSY; 428 spin_unlock_irq(&group->lock); 429 } 430 431 static void mcast_work_handler(struct work_struct *work) 432 { 433 struct mcast_group *group; 434 struct mcast_member *member; 435 struct ib_sa_multicast *multicast; 436 int status, ret; 437 u8 join_state; 438 439 group = container_of(work, typeof(*group), work); 440 retest: 441 spin_lock_irq(&group->lock); 442 while (!list_empty(&group->pending_list) || 443 (group->state != MCAST_BUSY)) { 444 445 if (group->state != MCAST_BUSY) { 446 spin_unlock_irq(&group->lock); 447 process_group_error(group); 448 goto retest; 449 } 450 451 member = list_entry(group->pending_list.next, 452 struct mcast_member, list); 453 multicast = &member->multicast; 454 join_state = multicast->rec.join_state; 455 atomic_inc(&member->refcount); 456 457 if (join_state == (group->rec.join_state & join_state)) { 458 status = cmp_rec(&group->rec, &multicast->rec, 459 multicast->comp_mask); 460 if (!status) 461 join_group(group, member, join_state); 462 else 463 list_del_init(&member->list); 464 spin_unlock_irq(&group->lock); 465 ret = multicast->callback(status, multicast); 466 } else { 467 spin_unlock_irq(&group->lock); 468 status = send_join(group, member); 469 if (!status) { 470 deref_member(member); 471 return; 472 } 473 ret = fail_join(group, member, status); 474 } 475 476 deref_member(member); 477 if (ret) 478 ib_sa_free_multicast(&member->multicast); 479 spin_lock_irq(&group->lock); 480 } 481 482 join_state = get_leave_state(group); 483 if (join_state) { 484 group->rec.join_state &= ~join_state; 485 spin_unlock_irq(&group->lock); 486 if (send_leave(group, join_state)) 487 goto retest; 488 } else { 489 group->state = MCAST_IDLE; 490 spin_unlock_irq(&group->lock); 491 release_group(group); 492 } 493 } 494 495 /* 496 * Fail a join request if it is still active - at the head of the pending queue. 497 */ 498 static void process_join_error(struct mcast_group *group, int status) 499 { 500 struct mcast_member *member; 501 int ret; 502 503 spin_lock_irq(&group->lock); 504 member = list_entry(group->pending_list.next, 505 struct mcast_member, list); 506 if (group->last_join == member) { 507 atomic_inc(&member->refcount); 508 list_del_init(&member->list); 509 spin_unlock_irq(&group->lock); 510 ret = member->multicast.callback(status, &member->multicast); 511 deref_member(member); 512 if (ret) 513 ib_sa_free_multicast(&member->multicast); 514 } else 515 spin_unlock_irq(&group->lock); 516 } 517 518 static void join_handler(int status, struct ib_sa_mcmember_rec *rec, 519 void *context) 520 { 521 struct mcast_group *group = context; 522 u16 pkey_index = MCAST_INVALID_PKEY_INDEX; 523 524 if (status) 525 process_join_error(group, status); 526 else { 527 int mgids_changed, is_mgid0; 528 ib_find_pkey(group->port->dev->device, group->port->port_num, 529 be16_to_cpu(rec->pkey), &pkey_index); 530 531 spin_lock_irq(&group->port->lock); 532 if (group->state == MCAST_BUSY && 533 group->pkey_index == MCAST_INVALID_PKEY_INDEX) 534 group->pkey_index = pkey_index; 535 mgids_changed = memcmp(&rec->mgid, &group->rec.mgid, 536 sizeof(group->rec.mgid)); 537 group->rec = *rec; 538 if (mgids_changed) { 539 rb_erase(&group->node, &group->port->table); 540 is_mgid0 = !memcmp(&mgid0, &group->rec.mgid, 541 sizeof(mgid0)); 542 mcast_insert(group->port, group, is_mgid0); 543 } 544 spin_unlock_irq(&group->port->lock); 545 } 546 mcast_work_handler(&group->work); 547 } 548 549 static void leave_handler(int status, struct ib_sa_mcmember_rec *rec, 550 void *context) 551 { 552 struct mcast_group *group = context; 553 554 if (status && group->retries > 0 && 555 !send_leave(group, group->leave_state)) 556 group->retries--; 557 else 558 mcast_work_handler(&group->work); 559 } 560 561 static struct mcast_group *acquire_group(struct mcast_port *port, 562 union ib_gid *mgid, gfp_t gfp_mask) 563 { 564 struct mcast_group *group, *cur_group; 565 unsigned long flags; 566 int is_mgid0; 567 568 is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0); 569 if (!is_mgid0) { 570 spin_lock_irqsave(&port->lock, flags); 571 group = mcast_find(port, mgid); 572 if (group) 573 goto found; 574 spin_unlock_irqrestore(&port->lock, flags); 575 } 576 577 group = kzalloc(sizeof *group, gfp_mask); 578 if (!group) 579 return NULL; 580 581 group->retries = 3; 582 group->port = port; 583 group->rec.mgid = *mgid; 584 group->pkey_index = MCAST_INVALID_PKEY_INDEX; 585 INIT_LIST_HEAD(&group->pending_list); 586 INIT_LIST_HEAD(&group->active_list); 587 INIT_WORK(&group->work, mcast_work_handler); 588 spin_lock_init(&group->lock); 589 590 spin_lock_irqsave(&port->lock, flags); 591 cur_group = mcast_insert(port, group, is_mgid0); 592 if (cur_group) { 593 kfree(group); 594 group = cur_group; 595 } else 596 atomic_inc(&port->refcount); 597 found: 598 atomic_inc(&group->refcount); 599 spin_unlock_irqrestore(&port->lock, flags); 600 return group; 601 } 602 603 /* 604 * We serialize all join requests to a single group to make our lives much 605 * easier. Otherwise, two users could try to join the same group 606 * simultaneously, with different configurations, one could leave while the 607 * join is in progress, etc., which makes locking around error recovery 608 * difficult. 609 */ 610 struct ib_sa_multicast * 611 ib_sa_join_multicast(struct ib_sa_client *client, 612 struct ib_device *device, u8 port_num, 613 struct ib_sa_mcmember_rec *rec, 614 ib_sa_comp_mask comp_mask, gfp_t gfp_mask, 615 int (*callback)(int status, 616 struct ib_sa_multicast *multicast), 617 void *context) 618 { 619 struct mcast_device *dev; 620 struct mcast_member *member; 621 struct ib_sa_multicast *multicast; 622 int ret; 623 624 dev = ib_get_client_data(device, &mcast_client); 625 if (!dev) 626 return ERR_PTR(-ENODEV); 627 628 member = kmalloc(sizeof *member, gfp_mask); 629 if (!member) 630 return ERR_PTR(-ENOMEM); 631 632 ib_sa_client_get(client); 633 member->client = client; 634 member->multicast.rec = *rec; 635 member->multicast.comp_mask = comp_mask; 636 member->multicast.callback = callback; 637 member->multicast.context = context; 638 init_completion(&member->comp); 639 atomic_set(&member->refcount, 1); 640 member->state = MCAST_JOINING; 641 642 member->group = acquire_group(&dev->port[port_num - dev->start_port], 643 &rec->mgid, gfp_mask); 644 if (!member->group) { 645 ret = -ENOMEM; 646 goto err; 647 } 648 649 /* 650 * The user will get the multicast structure in their callback. They 651 * could then free the multicast structure before we can return from 652 * this routine. So we save the pointer to return before queuing 653 * any callback. 654 */ 655 multicast = &member->multicast; 656 queue_join(member); 657 return multicast; 658 659 err: 660 ib_sa_client_put(client); 661 kfree(member); 662 return ERR_PTR(ret); 663 } 664 EXPORT_SYMBOL(ib_sa_join_multicast); 665 666 void ib_sa_free_multicast(struct ib_sa_multicast *multicast) 667 { 668 struct mcast_member *member; 669 struct mcast_group *group; 670 671 member = container_of(multicast, struct mcast_member, multicast); 672 group = member->group; 673 674 spin_lock_irq(&group->lock); 675 if (member->state == MCAST_MEMBER) 676 adjust_membership(group, multicast->rec.join_state, -1); 677 678 list_del_init(&member->list); 679 680 if (group->state == MCAST_IDLE) { 681 group->state = MCAST_BUSY; 682 spin_unlock_irq(&group->lock); 683 /* Continue to hold reference on group until callback */ 684 queue_work(mcast_wq, &group->work); 685 } else { 686 spin_unlock_irq(&group->lock); 687 release_group(group); 688 } 689 690 deref_member(member); 691 wait_for_completion(&member->comp); 692 ib_sa_client_put(member->client); 693 kfree(member); 694 } 695 EXPORT_SYMBOL(ib_sa_free_multicast); 696 697 int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num, 698 union ib_gid *mgid, struct ib_sa_mcmember_rec *rec) 699 { 700 struct mcast_device *dev; 701 struct mcast_port *port; 702 struct mcast_group *group; 703 unsigned long flags; 704 int ret = 0; 705 706 dev = ib_get_client_data(device, &mcast_client); 707 if (!dev) 708 return -ENODEV; 709 710 port = &dev->port[port_num - dev->start_port]; 711 spin_lock_irqsave(&port->lock, flags); 712 group = mcast_find(port, mgid); 713 if (group) 714 *rec = group->rec; 715 else 716 ret = -EADDRNOTAVAIL; 717 spin_unlock_irqrestore(&port->lock, flags); 718 719 return ret; 720 } 721 EXPORT_SYMBOL(ib_sa_get_mcmember_rec); 722 723 int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, 724 struct ib_sa_mcmember_rec *rec, 725 struct net_device *ndev, 726 enum ib_gid_type gid_type, 727 struct ib_ah_attr *ah_attr) 728 { 729 int ret; 730 u16 gid_index; 731 732 /* GID table is not based on the netdevice for IB link layer, 733 * so ignore ndev during search. 734 */ 735 if (rdma_protocol_ib(device, port_num)) 736 ndev = NULL; 737 else if (!rdma_protocol_roce(device, port_num)) 738 return -EINVAL; 739 740 ret = ib_find_cached_gid_by_port(device, &rec->port_gid, 741 gid_type, port_num, 742 ndev, 743 &gid_index); 744 if (ret) 745 return ret; 746 747 memset(ah_attr, 0, sizeof *ah_attr); 748 ah_attr->dlid = be16_to_cpu(rec->mlid); 749 ah_attr->sl = rec->sl; 750 ah_attr->port_num = port_num; 751 ah_attr->static_rate = rec->rate; 752 753 ah_attr->ah_flags = IB_AH_GRH; 754 ah_attr->grh.dgid = rec->mgid; 755 756 ah_attr->grh.sgid_index = (u8) gid_index; 757 ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); 758 ah_attr->grh.hop_limit = rec->hop_limit; 759 ah_attr->grh.traffic_class = rec->traffic_class; 760 761 return 0; 762 } 763 EXPORT_SYMBOL(ib_init_ah_from_mcmember); 764 765 static void mcast_groups_event(struct mcast_port *port, 766 enum mcast_group_state state) 767 { 768 struct mcast_group *group; 769 struct rb_node *node; 770 unsigned long flags; 771 772 spin_lock_irqsave(&port->lock, flags); 773 for (node = rb_first(&port->table); node; node = rb_next(node)) { 774 group = rb_entry(node, struct mcast_group, node); 775 spin_lock(&group->lock); 776 if (group->state == MCAST_IDLE) { 777 atomic_inc(&group->refcount); 778 queue_work(mcast_wq, &group->work); 779 } 780 if (group->state != MCAST_GROUP_ERROR) 781 group->state = state; 782 spin_unlock(&group->lock); 783 } 784 spin_unlock_irqrestore(&port->lock, flags); 785 } 786 787 static void mcast_event_handler(struct ib_event_handler *handler, 788 struct ib_event *event) 789 { 790 struct mcast_device *dev; 791 int index; 792 793 dev = container_of(handler, struct mcast_device, event_handler); 794 if (!rdma_cap_ib_mcast(dev->device, event->element.port_num)) 795 return; 796 797 index = event->element.port_num - dev->start_port; 798 799 switch (event->event) { 800 case IB_EVENT_PORT_ERR: 801 case IB_EVENT_LID_CHANGE: 802 case IB_EVENT_SM_CHANGE: 803 case IB_EVENT_CLIENT_REREGISTER: 804 mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR); 805 break; 806 case IB_EVENT_PKEY_CHANGE: 807 mcast_groups_event(&dev->port[index], MCAST_PKEY_EVENT); 808 break; 809 default: 810 break; 811 } 812 } 813 814 static void mcast_add_one(struct ib_device *device) 815 { 816 struct mcast_device *dev; 817 struct mcast_port *port; 818 int i; 819 int count = 0; 820 821 dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port, 822 GFP_KERNEL); 823 if (!dev) 824 return; 825 826 dev->start_port = rdma_start_port(device); 827 dev->end_port = rdma_end_port(device); 828 829 for (i = 0; i <= dev->end_port - dev->start_port; i++) { 830 if (!rdma_cap_ib_mcast(device, dev->start_port + i)) 831 continue; 832 port = &dev->port[i]; 833 port->dev = dev; 834 port->port_num = dev->start_port + i; 835 spin_lock_init(&port->lock); 836 port->table = RB_ROOT; 837 init_completion(&port->comp); 838 atomic_set(&port->refcount, 1); 839 ++count; 840 } 841 842 if (!count) { 843 kfree(dev); 844 return; 845 } 846 847 dev->device = device; 848 ib_set_client_data(device, &mcast_client, dev); 849 850 INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler); 851 ib_register_event_handler(&dev->event_handler); 852 } 853 854 static void mcast_remove_one(struct ib_device *device, void *client_data) 855 { 856 struct mcast_device *dev = client_data; 857 struct mcast_port *port; 858 int i; 859 860 if (!dev) 861 return; 862 863 ib_unregister_event_handler(&dev->event_handler); 864 flush_workqueue(mcast_wq); 865 866 for (i = 0; i <= dev->end_port - dev->start_port; i++) { 867 if (rdma_cap_ib_mcast(device, dev->start_port + i)) { 868 port = &dev->port[i]; 869 deref_port(port); 870 wait_for_completion(&port->comp); 871 } 872 } 873 874 kfree(dev); 875 } 876 877 int mcast_init(void) 878 { 879 int ret; 880 881 mcast_wq = alloc_ordered_workqueue("ib_mcast", WQ_MEM_RECLAIM); 882 if (!mcast_wq) 883 return -ENOMEM; 884 885 ib_sa_register_client(&sa_client); 886 887 ret = ib_register_client(&mcast_client); 888 if (ret) 889 goto err; 890 return 0; 891 892 err: 893 ib_sa_unregister_client(&sa_client); 894 destroy_workqueue(mcast_wq); 895 return ret; 896 } 897 898 void mcast_cleanup(void) 899 { 900 ib_unregister_client(&mcast_client); 901 ib_sa_unregister_client(&sa_client); 902 destroy_workqueue(mcast_wq); 903 } 904