1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24 /* Copyright (c) 1990 Mentat Inc. */
25
26 /*
27 * Internet Group Management Protocol (IGMP) routines.
28 * Multicast Listener Discovery Protocol (MLD) routines.
29 *
30 * Written by Steve Deering, Stanford, May 1988.
31 * Modified by Rosen Sharma, Stanford, Aug 1994.
32 * Modified by Bill Fenner, Xerox PARC, Feb. 1995.
33 *
34 * MULTICAST 3.5.1.1
35 */
36
37 #include <sys/types.h>
38 #include <sys/stream.h>
39 #include <sys/stropts.h>
40 #include <sys/strlog.h>
41 #include <sys/strsun.h>
42 #include <sys/systm.h>
43 #include <sys/ddi.h>
44 #include <sys/sunddi.h>
45 #include <sys/cmn_err.h>
46 #include <sys/atomic.h>
47 #include <sys/zone.h>
48 #include <sys/callb.h>
49 #include <sys/param.h>
50 #include <sys/socket.h>
51 #include <inet/ipclassifier.h>
52 #include <net/if.h>
53 #include <net/route.h>
54 #include <netinet/in.h>
55 #include <netinet/igmp_var.h>
56 #include <netinet/ip6.h>
57 #include <netinet/icmp6.h>
58 #include <inet/ipsec_impl.h>
59
60 #include <inet/common.h>
61 #include <inet/mi.h>
62 #include <inet/nd.h>
63 #include <inet/tunables.h>
64 #include <inet/ip.h>
65 #include <inet/ip6.h>
66 #include <inet/ip_multi.h>
67 #include <inet/ip_listutils.h>
68
69 #include <netinet/igmp.h>
70 #include <inet/ip_ndp.h>
71 #include <inet/ip_if.h>
72
73 static uint_t igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill);
74 static uint_t igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen);
75 static uint_t mld_query_in(mld_hdr_t *mldh, ill_t *ill);
76 static uint_t mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen);
77 static void igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr);
78 static void mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr);
79 static void igmpv3_sendrpt(ill_t *ill, mrec_t *reclist);
80 static void mldv2_sendrpt(ill_t *ill, mrec_t *reclist);
81 static mrec_t *mcast_bldmrec(mcast_record_t type, in6_addr_t *grp,
82 slist_t *srclist, mrec_t *next);
83 static void mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp,
84 mcast_record_t rtype, slist_t *flist);
85 static mrec_t *mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist);
86
87 /*
88 * Macros used to do timer len conversions. Timer values are always
89 * stored and passed to the timer functions as milliseconds; but the
90 * default values and values from the wire may not be.
91 *
92 * And yes, it's obscure, but decisecond is easier to abbreviate than
93 * "tenths of a second".
94 */
95 #define DSEC_TO_MSEC(dsec) ((dsec) * 100)
96 #define SEC_TO_MSEC(sec) ((sec) * 1000)
97
98 /*
99 * A running timer (scheduled thru timeout) can be cancelled if another
100 * timer with a shorter timeout value is scheduled before it has timed
101 * out. When the shorter timer expires, the original timer is updated
102 * to account for the time elapsed while the shorter timer ran; but this
103 * does not take into account the amount of time already spent in timeout
104 * state before being preempted by the shorter timer, that is the time
105 * interval between time scheduled to time cancelled. This can cause
106 * delays in sending out multicast membership reports. To resolve this
107 * problem, wallclock time (absolute time) is used instead of deltas
108 * (relative time) to track timers.
109 *
110 * The MACRO below gets the lbolt value, used for proper timer scheduling
111 * and firing. Therefore multicast membership reports are sent on time.
112 * The timer does not exactly fire at the time it was scehduled to fire,
113 * there is a difference of a few milliseconds observed. An offset is used
114 * to take care of the difference.
115 */
116
117 #define CURRENT_MSTIME ((uint_t)TICK_TO_MSEC(ddi_get_lbolt()))
118 #define CURRENT_OFFSET (999)
119
120 /*
121 * The first multicast join will trigger the igmp timers / mld timers
122 * The unit for next is milliseconds.
123 */
124 void
igmp_start_timers(unsigned next,ip_stack_t * ipst)125 igmp_start_timers(unsigned next, ip_stack_t *ipst)
126 {
127 int time_left;
128 int ret;
129 timeout_id_t id;
130
131 ASSERT(next != 0 && next != INFINITY);
132
133 mutex_enter(&ipst->ips_igmp_timer_lock);
134
135 if (ipst->ips_igmp_timer_setter_active) {
136 /*
137 * Serialize timer setters, one at a time. If the
138 * timer is currently being set by someone,
139 * just record the next time when it has to be
140 * invoked and return. The current setter will
141 * take care.
142 */
143 ipst->ips_igmp_time_to_next =
144 MIN(ipst->ips_igmp_time_to_next, next);
145 mutex_exit(&ipst->ips_igmp_timer_lock);
146 return;
147 } else {
148 ipst->ips_igmp_timer_setter_active = B_TRUE;
149 }
150 if (ipst->ips_igmp_timeout_id == 0) {
151 /*
152 * The timer is inactive. We need to start a timer
153 */
154 ipst->ips_igmp_time_to_next = next;
155 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler,
156 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next));
157 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt();
158 ipst->ips_igmp_timer_setter_active = B_FALSE;
159 mutex_exit(&ipst->ips_igmp_timer_lock);
160 return;
161 }
162
163 /*
164 * The timer was scheduled sometime back for firing in
165 * 'igmp_time_to_next' ms and is active. We need to
166 * reschedule the timeout if the new 'next' will happen
167 * earlier than the currently scheduled timeout
168 */
169 time_left = ipst->ips_igmp_timer_scheduled_last +
170 MSEC_TO_TICK(ipst->ips_igmp_time_to_next) - ddi_get_lbolt();
171 if (time_left < MSEC_TO_TICK(next)) {
172 ipst->ips_igmp_timer_setter_active = B_FALSE;
173 mutex_exit(&ipst->ips_igmp_timer_lock);
174 return;
175 }
176 id = ipst->ips_igmp_timeout_id;
177
178 mutex_exit(&ipst->ips_igmp_timer_lock);
179 ret = untimeout(id);
180 mutex_enter(&ipst->ips_igmp_timer_lock);
181 /*
182 * The timeout was cancelled, or the timeout handler
183 * completed, while we were blocked in the untimeout.
184 * No other thread could have set the timer meanwhile
185 * since we serialized all the timer setters. Thus
186 * no timer is currently active nor executing nor will
187 * any timer fire in the future. We start the timer now
188 * if needed.
189 */
190 if (ret == -1) {
191 ASSERT(ipst->ips_igmp_timeout_id == 0);
192 } else {
193 ASSERT(ipst->ips_igmp_timeout_id != 0);
194 ipst->ips_igmp_timeout_id = 0;
195 }
196 if (ipst->ips_igmp_time_to_next != 0) {
197 ipst->ips_igmp_time_to_next =
198 MIN(ipst->ips_igmp_time_to_next, next);
199 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler,
200 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next));
201 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt();
202 }
203 ipst->ips_igmp_timer_setter_active = B_FALSE;
204 mutex_exit(&ipst->ips_igmp_timer_lock);
205 }
206
207 /*
208 * mld_start_timers:
209 * The unit for next is milliseconds.
210 */
211 void
mld_start_timers(unsigned next,ip_stack_t * ipst)212 mld_start_timers(unsigned next, ip_stack_t *ipst)
213 {
214 int time_left;
215 int ret;
216 timeout_id_t id;
217
218 ASSERT(next != 0 && next != INFINITY);
219
220 mutex_enter(&ipst->ips_mld_timer_lock);
221 if (ipst->ips_mld_timer_setter_active) {
222 /*
223 * Serialize timer setters, one at a time. If the
224 * timer is currently being set by someone,
225 * just record the next time when it has to be
226 * invoked and return. The current setter will
227 * take care.
228 */
229 ipst->ips_mld_time_to_next =
230 MIN(ipst->ips_mld_time_to_next, next);
231 mutex_exit(&ipst->ips_mld_timer_lock);
232 return;
233 } else {
234 ipst->ips_mld_timer_setter_active = B_TRUE;
235 }
236 if (ipst->ips_mld_timeout_id == 0) {
237 /*
238 * The timer is inactive. We need to start a timer
239 */
240 ipst->ips_mld_time_to_next = next;
241 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler,
242 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next));
243 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt();
244 ipst->ips_mld_timer_setter_active = B_FALSE;
245 mutex_exit(&ipst->ips_mld_timer_lock);
246 return;
247 }
248
249 /*
250 * The timer was scheduled sometime back for firing in
251 * 'igmp_time_to_next' ms and is active. We need to
252 * reschedule the timeout if the new 'next' will happen
253 * earlier than the currently scheduled timeout
254 */
255 time_left = ipst->ips_mld_timer_scheduled_last +
256 MSEC_TO_TICK(ipst->ips_mld_time_to_next) - ddi_get_lbolt();
257 if (time_left < MSEC_TO_TICK(next)) {
258 ipst->ips_mld_timer_setter_active = B_FALSE;
259 mutex_exit(&ipst->ips_mld_timer_lock);
260 return;
261 }
262 id = ipst->ips_mld_timeout_id;
263
264 mutex_exit(&ipst->ips_mld_timer_lock);
265 ret = untimeout(id);
266 mutex_enter(&ipst->ips_mld_timer_lock);
267 /*
268 * The timeout was cancelled, or the timeout handler
269 * completed, while we were blocked in the untimeout.
270 * No other thread could have set the timer meanwhile
271 * since we serialized all the timer setters. Thus
272 * no timer is currently active nor executing nor will
273 * any timer fire in the future. We start the timer now
274 * if needed.
275 */
276 if (ret == -1) {
277 ASSERT(ipst->ips_mld_timeout_id == 0);
278 } else {
279 ASSERT(ipst->ips_mld_timeout_id != 0);
280 ipst->ips_mld_timeout_id = 0;
281 }
282 if (ipst->ips_mld_time_to_next != 0) {
283 ipst->ips_mld_time_to_next =
284 MIN(ipst->ips_mld_time_to_next, next);
285 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler,
286 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next));
287 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt();
288 }
289 ipst->ips_mld_timer_setter_active = B_FALSE;
290 mutex_exit(&ipst->ips_mld_timer_lock);
291 }
292
293 /*
294 * igmp_input:
295 * Return NULL for a bad packet that is discarded here.
296 * Return mp if the message is OK and should be handed to "raw" receivers.
297 * Callers of igmp_input() may need to reinitialize variables that were copied
298 * from the mblk as this calls pullupmsg().
299 */
300 mblk_t *
igmp_input(mblk_t * mp,ip_recv_attr_t * ira)301 igmp_input(mblk_t *mp, ip_recv_attr_t *ira)
302 {
303 igmpa_t *igmpa;
304 ipha_t *ipha = (ipha_t *)(mp->b_rptr);
305 int iphlen, igmplen, mblklen;
306 ilm_t *ilm;
307 uint32_t src, dst;
308 uint32_t group;
309 in6_addr_t v6group;
310 uint_t next;
311 ipif_t *ipif;
312 ill_t *ill = ira->ira_ill;
313 ip_stack_t *ipst = ill->ill_ipst;
314
315 ASSERT(!ill->ill_isv6);
316 ++ipst->ips_igmpstat.igps_rcv_total;
317
318 mblklen = MBLKL(mp);
319 iphlen = ira->ira_ip_hdr_length;
320 if (mblklen < 1 || mblklen < iphlen) {
321 ++ipst->ips_igmpstat.igps_rcv_tooshort;
322 goto bad_pkt;
323 }
324 igmplen = ira->ira_pktlen - iphlen;
325 /*
326 * Since msg sizes are more variable with v3, just pullup the
327 * whole thing now.
328 */
329 if (MBLKL(mp) < (igmplen + iphlen)) {
330 mblk_t *mp1;
331 if ((mp1 = msgpullup(mp, -1)) == NULL) {
332 ++ipst->ips_igmpstat.igps_rcv_tooshort;
333 goto bad_pkt;
334 }
335 freemsg(mp);
336 mp = mp1;
337 ipha = (ipha_t *)(mp->b_rptr);
338 }
339
340 /*
341 * Validate lengths
342 */
343 if (igmplen < IGMP_MINLEN) {
344 ++ipst->ips_igmpstat.igps_rcv_tooshort;
345 goto bad_pkt;
346 }
347
348 igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]);
349 src = ipha->ipha_src;
350 dst = ipha->ipha_dst;
351 if (ip_debug > 1)
352 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
353 "igmp_input: src 0x%x, dst 0x%x on %s\n",
354 (int)ntohl(src), (int)ntohl(dst),
355 ill->ill_name);
356
357 switch (igmpa->igmpa_type) {
358 case IGMP_MEMBERSHIP_QUERY:
359 /*
360 * packet length differentiates between v1/v2 and v3
361 * v1/v2 should be exactly 8 octets long; v3 is >= 12
362 */
363 if ((igmplen == IGMP_MINLEN) ||
364 (ipst->ips_igmp_max_version <= IGMP_V2_ROUTER)) {
365 next = igmp_query_in(ipha, igmpa, ill);
366 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
367 next = igmpv3_query_in((igmp3qa_t *)igmpa, ill,
368 igmplen);
369 } else {
370 ++ipst->ips_igmpstat.igps_rcv_tooshort;
371 goto bad_pkt;
372 }
373 if (next == 0)
374 goto bad_pkt;
375
376 if (next != INFINITY)
377 igmp_start_timers(next, ipst);
378
379 break;
380
381 case IGMP_V1_MEMBERSHIP_REPORT:
382 case IGMP_V2_MEMBERSHIP_REPORT:
383 /*
384 * For fast leave to work, we have to know that we are the
385 * last person to send a report for this group. Reports
386 * generated by us are looped back since we could potentially
387 * be a multicast router, so discard reports sourced by me.
388 */
389 mutex_enter(&ill->ill_lock);
390 for (ipif = ill->ill_ipif; ipif != NULL;
391 ipif = ipif->ipif_next) {
392 if (ipif->ipif_lcl_addr == src) {
393 if (ip_debug > 1) {
394 (void) mi_strlog(ill->ill_rq,
395 1,
396 SL_TRACE,
397 "igmp_input: we are only "
398 "member src 0x%x\n",
399 (int)ntohl(src));
400 }
401 mutex_exit(&ill->ill_lock);
402 return (mp);
403 }
404 }
405 mutex_exit(&ill->ill_lock);
406
407 ++ipst->ips_igmpstat.igps_rcv_reports;
408 group = igmpa->igmpa_group;
409 if (!CLASSD(group)) {
410 ++ipst->ips_igmpstat.igps_rcv_badreports;
411 goto bad_pkt;
412 }
413
414 /*
415 * KLUDGE: if the IP source address of the report has an
416 * unspecified (i.e., zero) subnet number, as is allowed for
417 * a booting host, replace it with the correct subnet number
418 * so that a process-level multicast routing demon can
419 * determine which subnet it arrived from. This is necessary
420 * to compensate for the lack of any way for a process to
421 * determine the arrival interface of an incoming packet.
422 *
423 * Requires that a copy of *this* message it passed up
424 * to the raw interface which is done by our caller.
425 */
426 if ((src & htonl(0xFF000000U)) == 0) { /* Minimum net mask */
427 /* Pick the first ipif on this ill */
428 mutex_enter(&ill->ill_lock);
429 src = ill->ill_ipif->ipif_subnet;
430 mutex_exit(&ill->ill_lock);
431 ip1dbg(("igmp_input: changed src to 0x%x\n",
432 (int)ntohl(src)));
433 ipha->ipha_src = src;
434 }
435
436 /*
437 * If our ill has ILMs that belong to the group being
438 * reported, and we are a 'Delaying Member' in the RFC
439 * terminology, stop our timer for that group and 'clear
440 * flag' i.e. mark as IGMP_OTHERMEMBER.
441 */
442 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
443 IN6_IPADDR_TO_V4MAPPED(group, &v6group);
444 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
445 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group))
446 continue;
447
448 ++ipst->ips_igmpstat.igps_rcv_ourreports;
449 ilm->ilm_timer = INFINITY;
450 ilm->ilm_state = IGMP_OTHERMEMBER;
451 } /* for */
452 rw_exit(&ill->ill_mcast_lock);
453 ill_mcast_timer_start(ill->ill_ipst);
454 break;
455
456 case IGMP_V3_MEMBERSHIP_REPORT:
457 /*
458 * Currently nothing to do here; IGMP router is not
459 * implemented in ip, and v3 hosts don't pay attention
460 * to membership reports.
461 */
462 break;
463 }
464 /*
465 * Pass all valid IGMP packets up to any process(es) listening
466 * on a raw IGMP socket. Do not free the packet.
467 */
468 return (mp);
469
470 bad_pkt:
471 freemsg(mp);
472 return (NULL);
473 }
474
475 static uint_t
igmp_query_in(ipha_t * ipha,igmpa_t * igmpa,ill_t * ill)476 igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill)
477 {
478 ilm_t *ilm;
479 int timer;
480 uint_t next, current;
481 ip_stack_t *ipst;
482
483 ipst = ill->ill_ipst;
484 ++ipst->ips_igmpstat.igps_rcv_queries;
485
486 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
487 /*
488 * In the IGMPv2 specification, there are 3 states and a flag.
489 *
490 * In Non-Member state, we simply don't have a membership record.
491 * In Delaying Member state, our timer is running (ilm->ilm_timer
492 * < INFINITY). In Idle Member state, our timer is not running
493 * (ilm->ilm_timer == INFINITY).
494 *
495 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if
496 * we have heard a report from another member, or IGMP_IREPORTEDLAST
497 * if I sent the last report.
498 */
499 if ((igmpa->igmpa_code == 0) ||
500 (ipst->ips_igmp_max_version == IGMP_V1_ROUTER)) {
501 /*
502 * Query from an old router.
503 * Remember that the querier on this interface is old,
504 * and set the timer to the value in RFC 1112.
505 */
506 ill->ill_mcast_v1_time = 0;
507 ill->ill_mcast_v1_tset = 1;
508 if (ill->ill_mcast_type != IGMP_V1_ROUTER) {
509 ip1dbg(("Received IGMPv1 Query on %s, switching mode "
510 "to IGMP_V1_ROUTER\n", ill->ill_name));
511 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1);
512 ill->ill_mcast_type = IGMP_V1_ROUTER;
513 }
514
515 timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY);
516
517 if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) ||
518 igmpa->igmpa_group != 0) {
519 ++ipst->ips_igmpstat.igps_rcv_badqueries;
520 rw_exit(&ill->ill_mcast_lock);
521 ill_mcast_timer_start(ill->ill_ipst);
522 return (0);
523 }
524
525 } else {
526 in_addr_t group;
527
528 /*
529 * Query from a new router
530 * Simply do a validity check
531 */
532 group = igmpa->igmpa_group;
533 if (group != 0 && (!CLASSD(group))) {
534 ++ipst->ips_igmpstat.igps_rcv_badqueries;
535 rw_exit(&ill->ill_mcast_lock);
536 ill_mcast_timer_start(ill->ill_ipst);
537 return (0);
538 }
539
540 /*
541 * Switch interface state to v2 on receipt of a v2 query
542 * ONLY IF current state is v3. Let things be if current
543 * state if v1 but do reset the v2-querier-present timer.
544 */
545 if (ill->ill_mcast_type == IGMP_V3_ROUTER) {
546 ip1dbg(("Received IGMPv2 Query on %s, switching mode "
547 "to IGMP_V2_ROUTER", ill->ill_name));
548 atomic_add_16(&ill->ill_ifptr->illif_mcast_v2, 1);
549 ill->ill_mcast_type = IGMP_V2_ROUTER;
550 }
551 ill->ill_mcast_v2_time = 0;
552 ill->ill_mcast_v2_tset = 1;
553
554 timer = DSEC_TO_MSEC((int)igmpa->igmpa_code);
555 }
556
557 if (ip_debug > 1) {
558 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
559 "igmp_input: TIMER = igmp_code %d igmp_type 0x%x",
560 (int)ntohs(igmpa->igmpa_code),
561 (int)ntohs(igmpa->igmpa_type));
562 }
563
564 /*
565 * -Start the timers in all of our membership records
566 * for the physical interface on which the query
567 * arrived, excluding those that belong to the "all
568 * hosts" group (224.0.0.1).
569 *
570 * -Restart any timer that is already running but has
571 * a value longer than the requested timeout.
572 *
573 * -Use the value specified in the query message as
574 * the maximum timeout.
575 */
576 next = (unsigned)INFINITY;
577
578 current = CURRENT_MSTIME;
579 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
580
581 /*
582 * A multicast router joins INADDR_ANY address
583 * to enable promiscuous reception of all
584 * mcasts from the interface. This INADDR_ANY
585 * is stored in the ilm_v6addr as V6 unspec addr
586 */
587 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr))
588 continue;
589 if (ilm->ilm_addr == htonl(INADDR_ANY))
590 continue;
591 if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) &&
592 (igmpa->igmpa_group == 0) ||
593 (igmpa->igmpa_group == ilm->ilm_addr)) {
594 if (ilm->ilm_timer > timer) {
595 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer);
596 if (ilm->ilm_timer < next)
597 next = ilm->ilm_timer;
598 ilm->ilm_timer += current;
599 }
600 }
601 }
602 rw_exit(&ill->ill_mcast_lock);
603 /*
604 * No packets have been sent above - no
605 * ill_mcast_send_queued is needed.
606 */
607 ill_mcast_timer_start(ill->ill_ipst);
608
609 return (next);
610 }
611
612 static uint_t
igmpv3_query_in(igmp3qa_t * igmp3qa,ill_t * ill,int igmplen)613 igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen)
614 {
615 uint_t i, next, mrd, qqi, timer, delay, numsrc;
616 uint_t current;
617 ilm_t *ilm;
618 ipaddr_t *src_array;
619 uint8_t qrv;
620 ip_stack_t *ipst;
621
622 ipst = ill->ill_ipst;
623 /* make sure numsrc matches packet size */
624 numsrc = ntohs(igmp3qa->igmp3qa_numsrc);
625 if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) {
626 ++ipst->ips_igmpstat.igps_rcv_tooshort;
627 return (0);
628 }
629 src_array = (ipaddr_t *)&igmp3qa[1];
630
631 ++ipst->ips_igmpstat.igps_rcv_queries;
632
633 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
634
635 if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) {
636 uint_t hdrval, mant, exp;
637 hdrval = (uint_t)igmp3qa->igmp3qa_mxrc;
638 mant = hdrval & IGMP_V3_MAXRT_MANT_MASK;
639 exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4;
640 mrd = (mant | 0x10) << (exp + 3);
641 }
642 if (mrd == 0)
643 mrd = MCAST_DEF_QUERY_RESP_INTERVAL;
644 timer = DSEC_TO_MSEC(mrd);
645 MCAST_RANDOM_DELAY(delay, timer);
646 next = (unsigned)INFINITY;
647 current = CURRENT_MSTIME;
648
649 if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0)
650 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
651 else
652 ill->ill_mcast_rv = qrv;
653
654 if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) {
655 uint_t hdrval, mant, exp;
656 hdrval = (uint_t)igmp3qa->igmp3qa_qqic;
657 mant = hdrval & IGMP_V3_QQI_MANT_MASK;
658 exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4;
659 qqi = (mant | 0x10) << (exp + 3);
660 }
661 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi;
662
663 /*
664 * If we have a pending general query response that's scheduled
665 * sooner than the delay we calculated for this response, then
666 * no action is required (RFC3376 section 5.2 rule 1)
667 */
668 if (ill->ill_global_timer < (current + delay)) {
669 rw_exit(&ill->ill_mcast_lock);
670 ill_mcast_timer_start(ill->ill_ipst);
671 return (next);
672 }
673
674 /*
675 * Now take action depending upon query type:
676 * general, group specific, or group/source specific.
677 */
678 if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) {
679 /*
680 * general query
681 * We know global timer is either not running or is
682 * greater than our calculated delay, so reset it to
683 * our delay (random value in range [0, response time]).
684 */
685 ill->ill_global_timer = current + delay;
686 next = delay;
687 } else {
688 /* group or group/source specific query */
689 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
690 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) ||
691 (ilm->ilm_addr == htonl(INADDR_ANY)) ||
692 (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) ||
693 (igmp3qa->igmp3qa_group != ilm->ilm_addr))
694 continue;
695 /*
696 * If the query is group specific or we have a
697 * pending group specific query, the response is
698 * group specific (pending sources list should be
699 * empty). Otherwise, need to update the pending
700 * sources list for the group and source specific
701 * response.
702 */
703 if (numsrc == 0 || (ilm->ilm_timer < INFINITY &&
704 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) {
705 group_query:
706 FREE_SLIST(ilm->ilm_pendsrcs);
707 ilm->ilm_pendsrcs = NULL;
708 } else {
709 boolean_t overflow;
710 slist_t *pktl;
711 if (numsrc > MAX_FILTER_SIZE ||
712 (ilm->ilm_pendsrcs == NULL &&
713 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) {
714 /*
715 * We've been sent more sources than
716 * we can deal with; or we can't deal
717 * with a source list at all. Revert
718 * to a group specific query.
719 */
720 goto group_query;
721 }
722 if ((pktl = l_alloc()) == NULL)
723 goto group_query;
724 pktl->sl_numsrc = numsrc;
725 for (i = 0; i < numsrc; i++)
726 IN6_IPADDR_TO_V4MAPPED(src_array[i],
727 &(pktl->sl_addr[i]));
728 l_union_in_a(ilm->ilm_pendsrcs, pktl,
729 &overflow);
730 l_free(pktl);
731 if (overflow)
732 goto group_query;
733 }
734
735 ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ?
736 INFINITY : (ilm->ilm_timer - current);
737 /* choose soonest timer */
738 ilm->ilm_timer = MIN(ilm->ilm_timer, delay);
739 if (ilm->ilm_timer < next)
740 next = ilm->ilm_timer;
741 ilm->ilm_timer += current;
742 }
743 }
744 rw_exit(&ill->ill_mcast_lock);
745 /*
746 * No packets have been sent above - no
747 * ill_mcast_send_queued is needed.
748 */
749 ill_mcast_timer_start(ill->ill_ipst);
750
751 return (next);
752 }
753
754 /*
755 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
756 * and it gets sent after the lock is dropped.
757 */
758 void
igmp_joingroup(ilm_t * ilm)759 igmp_joingroup(ilm_t *ilm)
760 {
761 uint_t timer;
762 ill_t *ill;
763 ip_stack_t *ipst = ilm->ilm_ipst;
764
765 ill = ilm->ilm_ill;
766
767 ASSERT(!ill->ill_isv6);
768 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
769
770 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) {
771 ilm->ilm_rtx.rtx_timer = INFINITY;
772 ilm->ilm_state = IGMP_OTHERMEMBER;
773 } else {
774 ip1dbg(("Querier mode %d, sending report, group %x\n",
775 ill->ill_mcast_type, htonl(ilm->ilm_addr)));
776 if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
777 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
778 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
779 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
780 } else if (ill->ill_mcast_type == IGMP_V3_ROUTER) {
781 mrec_t *rp;
782 mcast_record_t rtype;
783 /*
784 * The possible state changes we need to handle here:
785 * Old State New State Report
786 *
787 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0)
788 * INCLUDE(0) EXCLUDE(X) TO_EX(X)
789 *
790 * No need to send the BLOCK(0) report; ALLOW(X)
791 * is enough.
792 */
793 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
794 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE;
795 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
796 ilm->ilm_filter, NULL);
797 igmpv3_sendrpt(ill, rp);
798 /*
799 * Set up retransmission state. Timer is set below,
800 * for both v3 and older versions.
801 */
802 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype,
803 ilm->ilm_filter);
804 }
805
806 /* Set the ilm timer value */
807 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
808 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
809 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
810 timer = ilm->ilm_rtx.rtx_timer;
811 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
812 ilm->ilm_state = IGMP_IREPORTEDLAST;
813
814 /*
815 * We are holding ill_mcast_lock here and the timeout
816 * handler (igmp_timeout_handler_per_ill) acquires that
817 * lock. Hence we can't call igmp_start_timers since it could
818 * deadlock in untimeout().
819 * Instead the thread which drops ill_mcast_lock will have
820 * to call ill_mcast_timer_start().
821 */
822 mutex_enter(&ipst->ips_igmp_timer_lock);
823 ipst->ips_igmp_deferred_next = MIN(timer,
824 ipst->ips_igmp_deferred_next);
825 mutex_exit(&ipst->ips_igmp_timer_lock);
826 }
827
828 if (ip_debug > 1) {
829 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE,
830 "igmp_joingroup: multicast_type %d timer %d",
831 (ilm->ilm_ill->ill_mcast_type),
832 (int)ntohl(timer));
833 }
834 }
835
836 /*
837 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
838 * and it gets sent after the lock is dropped.
839 */
840 void
mld_joingroup(ilm_t * ilm)841 mld_joingroup(ilm_t *ilm)
842 {
843 uint_t timer;
844 ill_t *ill;
845 ip_stack_t *ipst = ilm->ilm_ipst;
846
847 ill = ilm->ilm_ill;
848
849 ASSERT(ill->ill_isv6);
850
851 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
852
853 if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) {
854 ilm->ilm_rtx.rtx_timer = INFINITY;
855 ilm->ilm_state = IGMP_OTHERMEMBER;
856 } else {
857 if (ill->ill_mcast_type == MLD_V1_ROUTER) {
858 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
859 } else {
860 mrec_t *rp;
861 mcast_record_t rtype;
862 /*
863 * The possible state changes we need to handle here:
864 * Old State New State Report
865 *
866 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0)
867 * INCLUDE(0) EXCLUDE(X) TO_EX(X)
868 *
869 * No need to send the BLOCK(0) report; ALLOW(X)
870 * is enough
871 */
872 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
873 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE;
874 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
875 ilm->ilm_filter, NULL);
876 mldv2_sendrpt(ill, rp);
877 /*
878 * Set up retransmission state. Timer is set below,
879 * for both v2 and v1.
880 */
881 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype,
882 ilm->ilm_filter);
883 }
884
885 /* Set the ilm timer value */
886 ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER ||
887 ilm->ilm_rtx.rtx_cnt > 0);
888
889 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
890 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
891 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
892 timer = ilm->ilm_rtx.rtx_timer;
893 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
894 ilm->ilm_state = IGMP_IREPORTEDLAST;
895
896 /*
897 * We are holding ill_mcast_lock here and the timeout
898 * handler (mld_timeout_handler_per_ill) acquires that
899 * lock. Hence we can't call mld_start_timers since it could
900 * deadlock in untimeout().
901 * Instead the thread which drops ill_mcast_lock will have
902 * to call ill_mcast_timer_start().
903 */
904 mutex_enter(&ipst->ips_mld_timer_lock);
905 ipst->ips_mld_deferred_next = MIN(timer,
906 ipst->ips_mld_deferred_next);
907 mutex_exit(&ipst->ips_mld_timer_lock);
908 }
909
910 if (ip_debug > 1) {
911 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE,
912 "mld_joingroup: multicast_type %d timer %d",
913 (ilm->ilm_ill->ill_mcast_type),
914 (int)ntohl(timer));
915 }
916 }
917
918 /*
919 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
920 * and it gets sent after the lock is dropped.
921 */
922 void
igmp_leavegroup(ilm_t * ilm)923 igmp_leavegroup(ilm_t *ilm)
924 {
925 ill_t *ill = ilm->ilm_ill;
926
927 ASSERT(!ill->ill_isv6);
928
929 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
930 if (ilm->ilm_state == IGMP_IREPORTEDLAST &&
931 ill->ill_mcast_type == IGMP_V2_ROUTER &&
932 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) {
933 igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP,
934 (htonl(INADDR_ALLRTRS_GROUP)));
935 return;
936 }
937 if ((ill->ill_mcast_type == IGMP_V3_ROUTER) &&
938 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) {
939 mrec_t *rp;
940 /*
941 * The possible state changes we need to handle here:
942 * Old State New State Report
943 *
944 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X)
945 * EXCLUDE(X) INCLUDE(0) TO_IN(0)
946 *
947 * No need to send the ALLOW(0) report; BLOCK(X) is enough
948 */
949 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
950 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
951 ilm->ilm_filter, NULL);
952 } else {
953 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr,
954 NULL, NULL);
955 }
956 igmpv3_sendrpt(ill, rp);
957 return;
958 }
959 }
960
961 /*
962 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
963 * and it gets sent after the lock is dropped.
964 */
965 void
mld_leavegroup(ilm_t * ilm)966 mld_leavegroup(ilm_t *ilm)
967 {
968 ill_t *ill = ilm->ilm_ill;
969
970 ASSERT(ill->ill_isv6);
971
972 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
973 if (ilm->ilm_state == IGMP_IREPORTEDLAST &&
974 ill->ill_mcast_type == MLD_V1_ROUTER &&
975 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) {
976 mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast);
977 return;
978 }
979 if ((ill->ill_mcast_type == MLD_V2_ROUTER) &&
980 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) {
981 mrec_t *rp;
982 /*
983 * The possible state changes we need to handle here:
984 * Old State New State Report
985 *
986 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X)
987 * EXCLUDE(X) INCLUDE(0) TO_IN(0)
988 *
989 * No need to send the ALLOW(0) report; BLOCK(X) is enough
990 */
991 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
992 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
993 ilm->ilm_filter, NULL);
994 } else {
995 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr,
996 NULL, NULL);
997 }
998 mldv2_sendrpt(ill, rp);
999 return;
1000 }
1001 }
1002
1003 /*
1004 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
1005 * and it gets sent after the lock is dropped.
1006 */
1007 void
igmp_statechange(ilm_t * ilm,mcast_record_t fmode,slist_t * flist)1008 igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist)
1009 {
1010 ill_t *ill;
1011 mrec_t *rp;
1012 ip_stack_t *ipst = ilm->ilm_ipst;
1013
1014 ASSERT(ilm != NULL);
1015
1016 /* state change reports should only be sent if the router is v3 */
1017 if (ilm->ilm_ill->ill_mcast_type != IGMP_V3_ROUTER)
1018 return;
1019
1020 ill = ilm->ilm_ill;
1021 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1022
1023 /*
1024 * Compare existing(old) state with the new state and prepare
1025 * State Change Report, according to the rules in RFC 3376:
1026 *
1027 * Old State New State State Change Report
1028 *
1029 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B)
1030 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A)
1031 * INCLUDE(A) EXCLUDE(B) TO_EX(B)
1032 * EXCLUDE(A) INCLUDE(B) TO_IN(B)
1033 */
1034
1035 if (ilm->ilm_fmode == fmode) {
1036 slist_t *a_minus_b = NULL, *b_minus_a = NULL;
1037 slist_t *allow, *block;
1038 if (((a_minus_b = l_alloc()) == NULL) ||
1039 ((b_minus_a = l_alloc()) == NULL)) {
1040 l_free(a_minus_b);
1041 if (ilm->ilm_fmode == MODE_IS_INCLUDE)
1042 goto send_to_ex;
1043 else
1044 goto send_to_in;
1045 }
1046 l_difference(ilm->ilm_filter, flist, a_minus_b);
1047 l_difference(flist, ilm->ilm_filter, b_minus_a);
1048 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1049 allow = b_minus_a;
1050 block = a_minus_b;
1051 } else {
1052 allow = a_minus_b;
1053 block = b_minus_a;
1054 }
1055 rp = NULL;
1056 if (!SLIST_IS_EMPTY(allow))
1057 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr,
1058 allow, rp);
1059 if (!SLIST_IS_EMPTY(block))
1060 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
1061 block, rp);
1062 l_free(a_minus_b);
1063 l_free(b_minus_a);
1064 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1065 send_to_ex:
1066 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist,
1067 NULL);
1068 } else {
1069 send_to_in:
1070 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist,
1071 NULL);
1072 }
1073
1074 /*
1075 * Need to set up retransmission state; merge the new info with the
1076 * current state (which may be null). If the timer is not currently
1077 * running, the caller will start it when dropping ill_mcast_lock.
1078 */
1079 rp = mcast_merge_rtx(ilm, rp, flist);
1080 if (ilm->ilm_rtx.rtx_timer == INFINITY) {
1081 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
1082 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
1083 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
1084 mutex_enter(&ipst->ips_igmp_timer_lock);
1085 ipst->ips_igmp_deferred_next = MIN(ipst->ips_igmp_deferred_next,
1086 ilm->ilm_rtx.rtx_timer);
1087 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
1088 mutex_exit(&ipst->ips_igmp_timer_lock);
1089 }
1090
1091 igmpv3_sendrpt(ill, rp);
1092 }
1093
1094 /*
1095 * Caller holds ill_mcast_lock. We queue the packet using ill_mcast_queue
1096 * and it gets sent after the lock is dropped.
1097 */
1098 void
mld_statechange(ilm_t * ilm,mcast_record_t fmode,slist_t * flist)1099 mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist)
1100 {
1101 ill_t *ill;
1102 mrec_t *rp = NULL;
1103 ip_stack_t *ipst = ilm->ilm_ipst;
1104
1105 ASSERT(ilm != NULL);
1106
1107 ill = ilm->ilm_ill;
1108 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1109
1110 /* only need to send if we have an mldv2-capable router */
1111 if (ill->ill_mcast_type != MLD_V2_ROUTER) {
1112 return;
1113 }
1114
1115 /*
1116 * Compare existing (old) state with the new state passed in
1117 * and send appropriate MLDv2 State Change Report.
1118 *
1119 * Old State New State State Change Report
1120 *
1121 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B)
1122 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A)
1123 * INCLUDE(A) EXCLUDE(B) TO_EX(B)
1124 * EXCLUDE(A) INCLUDE(B) TO_IN(B)
1125 */
1126 if (ilm->ilm_fmode == fmode) {
1127 slist_t *a_minus_b = NULL, *b_minus_a = NULL;
1128 slist_t *allow, *block;
1129 if (((a_minus_b = l_alloc()) == NULL) ||
1130 ((b_minus_a = l_alloc()) == NULL)) {
1131 l_free(a_minus_b);
1132 if (ilm->ilm_fmode == MODE_IS_INCLUDE)
1133 goto send_to_ex;
1134 else
1135 goto send_to_in;
1136 }
1137 l_difference(ilm->ilm_filter, flist, a_minus_b);
1138 l_difference(flist, ilm->ilm_filter, b_minus_a);
1139 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1140 allow = b_minus_a;
1141 block = a_minus_b;
1142 } else {
1143 allow = a_minus_b;
1144 block = b_minus_a;
1145 }
1146 if (!SLIST_IS_EMPTY(allow))
1147 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr,
1148 allow, rp);
1149 if (!SLIST_IS_EMPTY(block))
1150 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
1151 block, rp);
1152 l_free(a_minus_b);
1153 l_free(b_minus_a);
1154 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1155 send_to_ex:
1156 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist,
1157 NULL);
1158 } else {
1159 send_to_in:
1160 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist,
1161 NULL);
1162 }
1163
1164 /*
1165 * Need to set up retransmission state; merge the new info with the
1166 * current state (which may be null). If the timer is not currently
1167 * running, the caller will start it when dropping ill_mcast_lock.
1168 */
1169 rp = mcast_merge_rtx(ilm, rp, flist);
1170 ASSERT(ilm->ilm_rtx.rtx_cnt > 0);
1171 if (ilm->ilm_rtx.rtx_timer == INFINITY) {
1172 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
1173 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
1174 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
1175 mutex_enter(&ipst->ips_mld_timer_lock);
1176 ipst->ips_mld_deferred_next =
1177 MIN(ipst->ips_mld_deferred_next, ilm->ilm_rtx.rtx_timer);
1178 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
1179 mutex_exit(&ipst->ips_mld_timer_lock);
1180 }
1181
1182 mldv2_sendrpt(ill, rp);
1183 }
1184
1185 uint_t
igmp_timeout_handler_per_ill(ill_t * ill)1186 igmp_timeout_handler_per_ill(ill_t *ill)
1187 {
1188 uint_t next = INFINITY, current;
1189 ilm_t *ilm;
1190 mrec_t *rp = NULL;
1191 mrec_t *rtxrp = NULL;
1192 rtx_state_t *rtxp;
1193 mcast_record_t rtype;
1194
1195 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1196
1197 current = CURRENT_MSTIME;
1198 /* First check the global timer on this interface */
1199 if (ill->ill_global_timer == INFINITY)
1200 goto per_ilm_timer;
1201 if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) {
1202 ill->ill_global_timer = INFINITY;
1203 /*
1204 * Send report for each group on this interface.
1205 * Since we just set the global timer (received a v3 general
1206 * query), need to skip the all hosts addr (224.0.0.1), per
1207 * RFC 3376 section 5.
1208 */
1209 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1210 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP))
1211 continue;
1212 rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr,
1213 ilm->ilm_filter, rp);
1214 /*
1215 * Since we're sending a report on this group, okay
1216 * to delete pending group-specific timers. Note
1217 * that group-specific retransmit timers still need
1218 * to be checked in the per_ilm_timer for-loop.
1219 */
1220 ilm->ilm_timer = INFINITY;
1221 ilm->ilm_state = IGMP_IREPORTEDLAST;
1222 FREE_SLIST(ilm->ilm_pendsrcs);
1223 ilm->ilm_pendsrcs = NULL;
1224 }
1225 igmpv3_sendrpt(ill, rp);
1226 rp = NULL;
1227 } else {
1228 if ((ill->ill_global_timer - current) < next)
1229 next = ill->ill_global_timer - current;
1230 }
1231
1232 per_ilm_timer:
1233 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1234 if (ilm->ilm_timer == INFINITY)
1235 goto per_ilm_rtxtimer;
1236
1237 if (ilm->ilm_timer > (current + CURRENT_OFFSET)) {
1238 if ((ilm->ilm_timer - current) < next)
1239 next = ilm->ilm_timer - current;
1240
1241 if (ip_debug > 1) {
1242 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
1243 "igmp_timo_hlr 2: ilm_timr %d "
1244 "typ %d nxt %d",
1245 (int)ntohl(ilm->ilm_timer - current),
1246 (ill->ill_mcast_type), next);
1247 }
1248
1249 goto per_ilm_rtxtimer;
1250 }
1251
1252 /* the timer has expired, need to take action */
1253 ilm->ilm_timer = INFINITY;
1254 ilm->ilm_state = IGMP_IREPORTEDLAST;
1255 if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
1256 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
1257 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
1258 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
1259 } else {
1260 slist_t *rsp;
1261 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) &&
1262 (rsp = l_alloc()) != NULL) {
1263 /*
1264 * Contents of reply depend on pending
1265 * requested source list.
1266 */
1267 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1268 l_intersection(ilm->ilm_filter,
1269 ilm->ilm_pendsrcs, rsp);
1270 } else {
1271 l_difference(ilm->ilm_pendsrcs,
1272 ilm->ilm_filter, rsp);
1273 }
1274 FREE_SLIST(ilm->ilm_pendsrcs);
1275 ilm->ilm_pendsrcs = NULL;
1276 if (!SLIST_IS_EMPTY(rsp))
1277 rp = mcast_bldmrec(MODE_IS_INCLUDE,
1278 &ilm->ilm_v6addr, rsp, rp);
1279 FREE_SLIST(rsp);
1280 } else {
1281 /*
1282 * Either the pending request is just group-
1283 * specific, or we couldn't get the resources
1284 * (rsp) to build a source-specific reply.
1285 */
1286 rp = mcast_bldmrec(ilm->ilm_fmode,
1287 &ilm->ilm_v6addr, ilm->ilm_filter, rp);
1288 }
1289 igmpv3_sendrpt(ill, rp);
1290 rp = NULL;
1291 }
1292
1293 per_ilm_rtxtimer:
1294 rtxp = &ilm->ilm_rtx;
1295
1296 if (rtxp->rtx_timer == INFINITY)
1297 continue;
1298 if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) {
1299 if ((rtxp->rtx_timer - current) < next)
1300 next = rtxp->rtx_timer - current;
1301 continue;
1302 }
1303
1304 rtxp->rtx_timer = INFINITY;
1305 ilm->ilm_state = IGMP_IREPORTEDLAST;
1306 if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
1307 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
1308 continue;
1309 }
1310 if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
1311 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
1312 continue;
1313 }
1314
1315 /*
1316 * The retransmit timer has popped, and our router is
1317 * IGMPv3. We have to delve into the retransmit state
1318 * stored in the ilm.
1319 *
1320 * Decrement the retransmit count. If the fmode rtx
1321 * count is active, decrement it, and send a filter
1322 * mode change report with the ilm's source list.
1323 * Otherwise, send a source list change report with
1324 * the current retransmit lists.
1325 */
1326 ASSERT(rtxp->rtx_cnt > 0);
1327 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt);
1328 rtxp->rtx_cnt--;
1329 if (rtxp->rtx_fmode_cnt > 0) {
1330 rtxp->rtx_fmode_cnt--;
1331 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
1332 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE;
1333 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
1334 ilm->ilm_filter, rtxrp);
1335 } else {
1336 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES,
1337 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp);
1338 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES,
1339 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp);
1340 }
1341 if (rtxp->rtx_cnt > 0) {
1342 MCAST_RANDOM_DELAY(rtxp->rtx_timer,
1343 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
1344 if (rtxp->rtx_timer < next)
1345 next = rtxp->rtx_timer;
1346 rtxp->rtx_timer += current;
1347 } else {
1348 ASSERT(rtxp->rtx_timer == INFINITY);
1349 CLEAR_SLIST(rtxp->rtx_allow);
1350 CLEAR_SLIST(rtxp->rtx_block);
1351 }
1352 igmpv3_sendrpt(ill, rtxrp);
1353 rtxrp = NULL;
1354 }
1355
1356 rw_exit(&ill->ill_mcast_lock);
1357 /* Send any deferred/queued IP packets */
1358 ill_mcast_send_queued(ill);
1359 /* Defer ill_mcast_timer_start() until the caller is done */
1360
1361 return (next);
1362 }
1363
1364 /*
1365 * igmp_timeout_handler:
1366 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick).
1367 * Returns number of ticks to next event (or 0 if none).
1368 *
1369 * As part of multicast join and leave igmp we may need to send out an
1370 * igmp request. The igmp related state variables in the ilm are protected
1371 * by ill_mcast_lock. A single global igmp timer is used to track igmp timeouts.
1372 * igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers
1373 * starts the igmp timer if needed. It serializes multiple threads trying to
1374 * simultaneously start the timer using the igmp_timer_setter_active flag.
1375 *
1376 * igmp_input() receives igmp queries and responds to the queries
1377 * in a delayed fashion by posting a timer i.e. it calls igmp_start_timers().
1378 * Later the igmp_timer fires, the timeout handler igmp_timerout_handler()
1379 * performs the action exclusively after acquiring ill_mcast_lock.
1380 *
1381 * The igmp_slowtimeo() function is called thru another timer.
1382 * igmp_slowtimeout_lock protects the igmp_slowtimeout_id
1383 */
1384 void
igmp_timeout_handler(void * arg)1385 igmp_timeout_handler(void *arg)
1386 {
1387 ill_t *ill;
1388 uint_t global_next = INFINITY;
1389 uint_t next;
1390 ill_walk_context_t ctx;
1391 ip_stack_t *ipst = arg;
1392
1393 ASSERT(arg != NULL);
1394 mutex_enter(&ipst->ips_igmp_timer_lock);
1395 ASSERT(ipst->ips_igmp_timeout_id != 0);
1396 ipst->ips_igmp_timeout_id = 0;
1397 ipst->ips_igmp_timer_scheduled_last = 0;
1398 ipst->ips_igmp_time_to_next = 0;
1399 mutex_exit(&ipst->ips_igmp_timer_lock);
1400
1401 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1402 ill = ILL_START_WALK_V4(&ctx, ipst);
1403 for (; ill != NULL; ill = ill_next(&ctx, ill)) {
1404 ASSERT(!ill->ill_isv6);
1405 /* Make sure the ill isn't going away. */
1406 if (!ill_check_and_refhold(ill))
1407 continue;
1408 rw_exit(&ipst->ips_ill_g_lock);
1409 next = igmp_timeout_handler_per_ill(ill);
1410 if (next < global_next)
1411 global_next = next;
1412 ill_refrele(ill);
1413 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1414 }
1415 rw_exit(&ipst->ips_ill_g_lock);
1416 if (global_next != INFINITY)
1417 igmp_start_timers(global_next, ipst);
1418 }
1419
1420 /*
1421 * mld_timeout_handler:
1422 * Called when there are timeout events, every next (tick).
1423 * Returns number of ticks to next event (or 0 if none).
1424 */
1425 uint_t
mld_timeout_handler_per_ill(ill_t * ill)1426 mld_timeout_handler_per_ill(ill_t *ill)
1427 {
1428 ilm_t *ilm;
1429 uint_t next = INFINITY, current;
1430 mrec_t *rp, *rtxrp;
1431 rtx_state_t *rtxp;
1432 mcast_record_t rtype;
1433
1434 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1435
1436 current = CURRENT_MSTIME;
1437 /*
1438 * First check the global timer on this interface; the global timer
1439 * is not used for MLDv1, so if it's set we can assume we're v2.
1440 */
1441 if (ill->ill_global_timer == INFINITY)
1442 goto per_ilm_timer;
1443 if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) {
1444 ill->ill_global_timer = INFINITY;
1445 /*
1446 * Send report for each group on this interface.
1447 * Since we just set the global timer (received a v2 general
1448 * query), need to skip the all hosts addr (ff02::1), per
1449 * RFC 3810 section 6.
1450 */
1451 rp = NULL;
1452 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1453 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr,
1454 &ipv6_all_hosts_mcast))
1455 continue;
1456 rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr,
1457 ilm->ilm_filter, rp);
1458 /*
1459 * Since we're sending a report on this group, okay
1460 * to delete pending group-specific timers. Note
1461 * that group-specific retransmit timers still need
1462 * to be checked in the per_ilm_timer for-loop.
1463 */
1464 ilm->ilm_timer = INFINITY;
1465 ilm->ilm_state = IGMP_IREPORTEDLAST;
1466 FREE_SLIST(ilm->ilm_pendsrcs);
1467 ilm->ilm_pendsrcs = NULL;
1468 }
1469 mldv2_sendrpt(ill, rp);
1470 } else {
1471 if ((ill->ill_global_timer - current) < next)
1472 next = ill->ill_global_timer - current;
1473 }
1474
1475 per_ilm_timer:
1476 rp = rtxrp = NULL;
1477 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1478 if (ilm->ilm_timer == INFINITY)
1479 goto per_ilm_rtxtimer;
1480
1481 if (ilm->ilm_timer > (current + CURRENT_OFFSET)) {
1482 if ((ilm->ilm_timer - current) < next)
1483 next = ilm->ilm_timer - current;
1484
1485 if (ip_debug > 1) {
1486 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
1487 "igmp_timo_hlr 2: ilm_timr"
1488 " %d typ %d nxt %d",
1489 (int)ntohl(ilm->ilm_timer - current),
1490 (ill->ill_mcast_type), next);
1491 }
1492
1493 goto per_ilm_rtxtimer;
1494 }
1495
1496 /* the timer has expired, need to take action */
1497 ilm->ilm_timer = INFINITY;
1498 ilm->ilm_state = IGMP_IREPORTEDLAST;
1499 if (ill->ill_mcast_type == MLD_V1_ROUTER) {
1500 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
1501 } else {
1502 slist_t *rsp;
1503 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) &&
1504 (rsp = l_alloc()) != NULL) {
1505 /*
1506 * Contents of reply depend on pending
1507 * requested source list.
1508 */
1509 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1510 l_intersection(ilm->ilm_filter,
1511 ilm->ilm_pendsrcs, rsp);
1512 } else {
1513 l_difference(ilm->ilm_pendsrcs,
1514 ilm->ilm_filter, rsp);
1515 }
1516 FREE_SLIST(ilm->ilm_pendsrcs);
1517 ilm->ilm_pendsrcs = NULL;
1518 if (!SLIST_IS_EMPTY(rsp))
1519 rp = mcast_bldmrec(MODE_IS_INCLUDE,
1520 &ilm->ilm_v6addr, rsp, rp);
1521 FREE_SLIST(rsp);
1522 } else {
1523 rp = mcast_bldmrec(ilm->ilm_fmode,
1524 &ilm->ilm_v6addr, ilm->ilm_filter, rp);
1525 }
1526 }
1527
1528 per_ilm_rtxtimer:
1529 rtxp = &ilm->ilm_rtx;
1530
1531 if (rtxp->rtx_timer == INFINITY)
1532 continue;
1533 if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) {
1534 if ((rtxp->rtx_timer - current) < next)
1535 next = rtxp->rtx_timer - current;
1536 continue;
1537 }
1538
1539 rtxp->rtx_timer = INFINITY;
1540 ilm->ilm_state = IGMP_IREPORTEDLAST;
1541 if (ill->ill_mcast_type == MLD_V1_ROUTER) {
1542 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
1543 continue;
1544 }
1545
1546 /*
1547 * The retransmit timer has popped, and our router is
1548 * MLDv2. We have to delve into the retransmit state
1549 * stored in the ilm.
1550 *
1551 * Decrement the retransmit count. If the fmode rtx
1552 * count is active, decrement it, and send a filter
1553 * mode change report with the ilm's source list.
1554 * Otherwise, send a source list change report with
1555 * the current retransmit lists.
1556 */
1557 ASSERT(rtxp->rtx_cnt > 0);
1558 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt);
1559 rtxp->rtx_cnt--;
1560 if (rtxp->rtx_fmode_cnt > 0) {
1561 rtxp->rtx_fmode_cnt--;
1562 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
1563 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE;
1564 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
1565 ilm->ilm_filter, rtxrp);
1566 } else {
1567 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES,
1568 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp);
1569 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES,
1570 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp);
1571 }
1572 if (rtxp->rtx_cnt > 0) {
1573 MCAST_RANDOM_DELAY(rtxp->rtx_timer,
1574 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
1575 if (rtxp->rtx_timer < next)
1576 next = rtxp->rtx_timer;
1577 rtxp->rtx_timer += current;
1578 } else {
1579 ASSERT(rtxp->rtx_timer == INFINITY);
1580 CLEAR_SLIST(rtxp->rtx_allow);
1581 CLEAR_SLIST(rtxp->rtx_block);
1582 }
1583 }
1584
1585 if (ill->ill_mcast_type == MLD_V2_ROUTER) {
1586 mldv2_sendrpt(ill, rp);
1587 mldv2_sendrpt(ill, rtxrp);
1588 }
1589 rw_exit(&ill->ill_mcast_lock);
1590 /* Send any deferred/queued IP packets */
1591 ill_mcast_send_queued(ill);
1592 /* Defer ill_mcast_timer_start() until the caller is done */
1593
1594 return (next);
1595 }
1596
1597 /*
1598 * mld_timeout_handler:
1599 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick).
1600 * Returns number of ticks to next event (or 0 if none).
1601 * MT issues are same as igmp_timeout_handler
1602 */
1603 void
mld_timeout_handler(void * arg)1604 mld_timeout_handler(void *arg)
1605 {
1606 ill_t *ill;
1607 uint_t global_next = INFINITY;
1608 uint_t next;
1609 ill_walk_context_t ctx;
1610 ip_stack_t *ipst = arg;
1611
1612 ASSERT(arg != NULL);
1613 mutex_enter(&ipst->ips_mld_timer_lock);
1614 ASSERT(ipst->ips_mld_timeout_id != 0);
1615 ipst->ips_mld_timeout_id = 0;
1616 ipst->ips_mld_timer_scheduled_last = 0;
1617 ipst->ips_mld_time_to_next = 0;
1618 mutex_exit(&ipst->ips_mld_timer_lock);
1619
1620 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1621 ill = ILL_START_WALK_V6(&ctx, ipst);
1622 for (; ill != NULL; ill = ill_next(&ctx, ill)) {
1623 ASSERT(ill->ill_isv6);
1624 /* Make sure the ill isn't going away. */
1625 if (!ill_check_and_refhold(ill))
1626 continue;
1627 rw_exit(&ipst->ips_ill_g_lock);
1628 next = mld_timeout_handler_per_ill(ill);
1629 if (next < global_next)
1630 global_next = next;
1631 ill_refrele(ill);
1632 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1633 }
1634 rw_exit(&ipst->ips_ill_g_lock);
1635 if (global_next != INFINITY)
1636 mld_start_timers(global_next, ipst);
1637 }
1638
1639 /*
1640 * Calculate the Older Version Querier Present timeout value, in number
1641 * of slowtimo intervals, for the given ill.
1642 */
1643 #define OVQP(ill) \
1644 ((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \
1645 + MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL)
1646
1647 /*
1648 * igmp_slowtimo:
1649 * - Resets to new router if we didnt we hear from the router
1650 * in IGMP_AGE_THRESHOLD seconds.
1651 * - Resets slowtimeout.
1652 * Check for ips_igmp_max_version ensures that we don't revert to a higher
1653 * IGMP version than configured.
1654 */
1655 void
igmp_slowtimo(void * arg)1656 igmp_slowtimo(void *arg)
1657 {
1658 ill_t *ill;
1659 ill_if_t *ifp;
1660 avl_tree_t *avl_tree;
1661 ip_stack_t *ipst = (ip_stack_t *)arg;
1662
1663 ASSERT(arg != NULL);
1664
1665 /*
1666 * The ill_if_t list is circular, hence the odd loop parameters.
1667 *
1668 * We can't use the ILL_START_WALK and ill_next() wrappers for this
1669 * walk, as we need to check the illif_mcast_* fields in the ill_if_t
1670 * structure (allowing us to skip if none of the instances have timers
1671 * running).
1672 */
1673 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1674 for (ifp = IP_V4_ILL_G_LIST(ipst);
1675 ifp != (ill_if_t *)&IP_V4_ILL_G_LIST(ipst);
1676 ifp = ifp->illif_next) {
1677 /*
1678 * illif_mcast_v[12] are set using atomics. If an ill hears
1679 * a V1 or V2 query now and we miss seeing the count now,
1680 * we will see it the next time igmp_slowtimo is called.
1681 */
1682 if (ifp->illif_mcast_v1 == 0 && ifp->illif_mcast_v2 == 0)
1683 continue;
1684
1685 avl_tree = &ifp->illif_avl_by_ppa;
1686 for (ill = avl_first(avl_tree); ill != NULL;
1687 ill = avl_walk(avl_tree, ill, AVL_AFTER)) {
1688 /* Make sure the ill isn't going away. */
1689 if (!ill_check_and_refhold(ill))
1690 continue;
1691 rw_exit(&ipst->ips_ill_g_lock);
1692 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1693 if (ill->ill_mcast_v1_tset == 1)
1694 ill->ill_mcast_v1_time++;
1695 if (ill->ill_mcast_v2_tset == 1)
1696 ill->ill_mcast_v2_time++;
1697 if ((ill->ill_mcast_type == IGMP_V1_ROUTER) &&
1698 (ipst->ips_igmp_max_version >= IGMP_V2_ROUTER) &&
1699 (ill->ill_mcast_v1_time >= OVQP(ill))) {
1700 if ((ill->ill_mcast_v2_tset > 0) ||
1701 (ipst->ips_igmp_max_version ==
1702 IGMP_V2_ROUTER)) {
1703 ip1dbg(("V1 query timer "
1704 "expired on %s; switching "
1705 "mode to IGMP_V2\n",
1706 ill->ill_name));
1707 ill->ill_mcast_type =
1708 IGMP_V2_ROUTER;
1709 } else {
1710 ip1dbg(("V1 query timer "
1711 "expired on %s; switching "
1712 "mode to IGMP_V3\n",
1713 ill->ill_name));
1714 ill->ill_mcast_type =
1715 IGMP_V3_ROUTER;
1716 }
1717 ill->ill_mcast_v1_time = 0;
1718 ill->ill_mcast_v1_tset = 0;
1719 atomic_add_16(&ifp->illif_mcast_v1, -1);
1720 }
1721 if ((ill->ill_mcast_type == IGMP_V2_ROUTER) &&
1722 (ipst->ips_igmp_max_version >= IGMP_V3_ROUTER) &&
1723 (ill->ill_mcast_v2_time >= OVQP(ill))) {
1724 ip1dbg(("V2 query timer expired on "
1725 "%s; switching mode to IGMP_V3\n",
1726 ill->ill_name));
1727 ill->ill_mcast_type = IGMP_V3_ROUTER;
1728 ill->ill_mcast_v2_time = 0;
1729 ill->ill_mcast_v2_tset = 0;
1730 atomic_add_16(&ifp->illif_mcast_v2, -1);
1731 }
1732 rw_exit(&ill->ill_mcast_lock);
1733 ill_refrele(ill);
1734 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1735 }
1736 }
1737 rw_exit(&ipst->ips_ill_g_lock);
1738 ill_mcast_timer_start(ipst);
1739 mutex_enter(&ipst->ips_igmp_slowtimeout_lock);
1740 ipst->ips_igmp_slowtimeout_id = timeout(igmp_slowtimo, (void *)ipst,
1741 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL));
1742 mutex_exit(&ipst->ips_igmp_slowtimeout_lock);
1743 }
1744
1745 /*
1746 * mld_slowtimo:
1747 * - Resets to newer version if we didn't hear from the older version router
1748 * in MLD_AGE_THRESHOLD seconds.
1749 * - Restarts slowtimeout.
1750 * Check for ips_mld_max_version ensures that we don't revert to a higher
1751 * IGMP version than configured.
1752 */
1753 void
mld_slowtimo(void * arg)1754 mld_slowtimo(void *arg)
1755 {
1756 ill_t *ill;
1757 ill_if_t *ifp;
1758 avl_tree_t *avl_tree;
1759 ip_stack_t *ipst = (ip_stack_t *)arg;
1760
1761 ASSERT(arg != NULL);
1762 /* See comments in igmp_slowtimo() above... */
1763 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1764 for (ifp = IP_V6_ILL_G_LIST(ipst);
1765 ifp != (ill_if_t *)&IP_V6_ILL_G_LIST(ipst);
1766 ifp = ifp->illif_next) {
1767 if (ifp->illif_mcast_v1 == 0)
1768 continue;
1769
1770 avl_tree = &ifp->illif_avl_by_ppa;
1771 for (ill = avl_first(avl_tree); ill != NULL;
1772 ill = avl_walk(avl_tree, ill, AVL_AFTER)) {
1773 /* Make sure the ill isn't going away. */
1774 if (!ill_check_and_refhold(ill))
1775 continue;
1776 rw_exit(&ipst->ips_ill_g_lock);
1777 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1778 if (ill->ill_mcast_v1_tset == 1)
1779 ill->ill_mcast_v1_time++;
1780 if ((ill->ill_mcast_type == MLD_V1_ROUTER) &&
1781 (ipst->ips_mld_max_version >= MLD_V2_ROUTER) &&
1782 (ill->ill_mcast_v1_time >= OVQP(ill))) {
1783 ip1dbg(("MLD query timer expired on"
1784 " %s; switching mode to MLD_V2\n",
1785 ill->ill_name));
1786 ill->ill_mcast_type = MLD_V2_ROUTER;
1787 ill->ill_mcast_v1_time = 0;
1788 ill->ill_mcast_v1_tset = 0;
1789 atomic_add_16(&ifp->illif_mcast_v1, -1);
1790 }
1791 rw_exit(&ill->ill_mcast_lock);
1792 ill_refrele(ill);
1793 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1794 }
1795 }
1796 rw_exit(&ipst->ips_ill_g_lock);
1797 ill_mcast_timer_start(ipst);
1798 mutex_enter(&ipst->ips_mld_slowtimeout_lock);
1799 ipst->ips_mld_slowtimeout_id = timeout(mld_slowtimo, (void *)ipst,
1800 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL));
1801 mutex_exit(&ipst->ips_mld_slowtimeout_lock);
1802 }
1803
1804 /*
1805 * igmp_sendpkt:
1806 * This will send to ip_output_simple just like icmp_inbound.
1807 */
1808 static void
igmp_sendpkt(ilm_t * ilm,uchar_t type,ipaddr_t addr)1809 igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr)
1810 {
1811 mblk_t *mp;
1812 igmpa_t *igmpa;
1813 uint8_t *rtralert;
1814 ipha_t *ipha;
1815 int hdrlen = sizeof (ipha_t) + RTRALERT_LEN;
1816 size_t size = hdrlen + sizeof (igmpa_t);
1817 ill_t *ill = ilm->ilm_ill;
1818 ip_stack_t *ipst = ill->ill_ipst;
1819
1820 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1821
1822 mp = allocb(size, BPRI_HI);
1823 if (mp == NULL) {
1824 return;
1825 }
1826 mp->b_wptr = mp->b_rptr + size;
1827
1828 ipha = (ipha_t *)mp->b_rptr;
1829 rtralert = (uint8_t *)&(ipha[1]);
1830 igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]);
1831 igmpa->igmpa_type = type;
1832 igmpa->igmpa_code = 0;
1833 igmpa->igmpa_group = ilm->ilm_addr;
1834 igmpa->igmpa_cksum = 0;
1835 igmpa->igmpa_cksum = IP_CSUM(mp, hdrlen, 0);
1836
1837 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT;
1838 rtralert[1] = RTRALERT_LEN;
1839 rtralert[2] = 0;
1840 rtralert[3] = 0;
1841
1842 ipha->ipha_version_and_hdr_length = (IP_VERSION << 4)
1843 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS);
1844 ipha->ipha_type_of_service = 0;
1845 ipha->ipha_length = htons(size);
1846 ipha->ipha_ident = 0;
1847 ipha->ipha_fragment_offset_and_flags = 0;
1848 ipha->ipha_ttl = IGMP_TTL;
1849 ipha->ipha_protocol = IPPROTO_IGMP;
1850 ipha->ipha_hdr_checksum = 0;
1851 ipha->ipha_dst = addr ? addr : igmpa->igmpa_group;
1852 ipha->ipha_src = INADDR_ANY;
1853
1854 ill_mcast_queue(ill, mp);
1855
1856 ++ipst->ips_igmpstat.igps_snd_reports;
1857 }
1858
1859 /*
1860 * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill.
1861 * The report will contain one group record
1862 * for each element of reclist. If this causes packet length to
1863 * exceed ill->ill_mc_mtu, multiple reports are sent.
1864 * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(),
1865 * and those buffers are freed here.
1866 */
1867 static void
igmpv3_sendrpt(ill_t * ill,mrec_t * reclist)1868 igmpv3_sendrpt(ill_t *ill, mrec_t *reclist)
1869 {
1870 igmp3ra_t *igmp3ra;
1871 grphdra_t *grphdr;
1872 mblk_t *mp;
1873 ipha_t *ipha;
1874 uint8_t *rtralert;
1875 ipaddr_t *src_array;
1876 int i, j, numrec, more_src_cnt;
1877 size_t hdrsize, size, rsize;
1878 mrec_t *rp, *cur_reclist;
1879 mrec_t *next_reclist = reclist;
1880 boolean_t morepkts;
1881 ip_stack_t *ipst = ill->ill_ipst;
1882
1883 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1884
1885 /* if there aren't any records, there's nothing to send */
1886 if (reclist == NULL)
1887 return;
1888
1889 hdrsize = sizeof (ipha_t) + RTRALERT_LEN;
1890 nextpkt:
1891 size = hdrsize + sizeof (igmp3ra_t);
1892 morepkts = B_FALSE;
1893 more_src_cnt = 0;
1894 cur_reclist = next_reclist;
1895 numrec = 0;
1896 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) {
1897 rsize = sizeof (grphdra_t) +
1898 (rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t));
1899 if (size + rsize > ill->ill_mc_mtu) {
1900 if (rp == cur_reclist) {
1901 /*
1902 * If the first mrec we looked at is too big
1903 * to fit in a single packet (i.e the source
1904 * list is too big), we must either truncate
1905 * the list (if TO_EX or IS_EX), or send
1906 * multiple reports for the same group (all
1907 * other types).
1908 */
1909 int srcspace, srcsperpkt;
1910 srcspace = ill->ill_mc_mtu - (size +
1911 sizeof (grphdra_t));
1912
1913 /*
1914 * Skip if there's not even enough room in
1915 * a single packet to send something useful.
1916 */
1917 if (srcspace <= sizeof (ipaddr_t))
1918 continue;
1919
1920 srcsperpkt = srcspace / sizeof (ipaddr_t);
1921 /*
1922 * Increment size and numrec, because we will
1923 * be sending a record for the mrec we're
1924 * looking at now.
1925 */
1926 size += sizeof (grphdra_t) +
1927 (srcsperpkt * sizeof (ipaddr_t));
1928 numrec++;
1929 if (rp->mrec_type == MODE_IS_EXCLUDE ||
1930 rp->mrec_type == CHANGE_TO_EXCLUDE) {
1931 rp->mrec_srcs.sl_numsrc = srcsperpkt;
1932 if (rp->mrec_next == NULL) {
1933 /* no more packets to send */
1934 break;
1935 } else {
1936 /*
1937 * more packets, but we're
1938 * done with this mrec.
1939 */
1940 next_reclist = rp->mrec_next;
1941 }
1942 } else {
1943 more_src_cnt = rp->mrec_srcs.sl_numsrc
1944 - srcsperpkt;
1945 rp->mrec_srcs.sl_numsrc = srcsperpkt;
1946 /*
1947 * We'll fix up this mrec (remove the
1948 * srcs we've already sent) before
1949 * returning to nextpkt above.
1950 */
1951 next_reclist = rp;
1952 }
1953 } else {
1954 next_reclist = rp;
1955 }
1956 morepkts = B_TRUE;
1957 break;
1958 }
1959 size += rsize;
1960 numrec++;
1961 }
1962
1963 mp = allocb(size, BPRI_HI);
1964 if (mp == NULL) {
1965 goto free_reclist;
1966 }
1967 bzero((char *)mp->b_rptr, size);
1968 mp->b_wptr = (uchar_t *)(mp->b_rptr + size);
1969
1970 ipha = (ipha_t *)mp->b_rptr;
1971 rtralert = (uint8_t *)&(ipha[1]);
1972 igmp3ra = (igmp3ra_t *)&(rtralert[RTRALERT_LEN]);
1973 grphdr = (grphdra_t *)&(igmp3ra[1]);
1974
1975 rp = cur_reclist;
1976 for (i = 0; i < numrec; i++) {
1977 grphdr->grphdra_type = rp->mrec_type;
1978 grphdr->grphdra_numsrc = htons(rp->mrec_srcs.sl_numsrc);
1979 grphdr->grphdra_group = V4_PART_OF_V6(rp->mrec_group);
1980 src_array = (ipaddr_t *)&(grphdr[1]);
1981
1982 for (j = 0; j < rp->mrec_srcs.sl_numsrc; j++)
1983 src_array[j] = V4_PART_OF_V6(rp->mrec_srcs.sl_addr[j]);
1984
1985 grphdr = (grphdra_t *)&(src_array[j]);
1986 rp = rp->mrec_next;
1987 }
1988
1989 igmp3ra->igmp3ra_type = IGMP_V3_MEMBERSHIP_REPORT;
1990 igmp3ra->igmp3ra_numrec = htons(numrec);
1991 igmp3ra->igmp3ra_cksum = IP_CSUM(mp, hdrsize, 0);
1992
1993 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT;
1994 rtralert[1] = RTRALERT_LEN;
1995 rtralert[2] = 0;
1996 rtralert[3] = 0;
1997
1998 ipha->ipha_version_and_hdr_length = IP_VERSION << 4
1999 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS);
2000 ipha->ipha_type_of_service = IPTOS_PREC_INTERNETCONTROL;
2001 ipha->ipha_length = htons(size);
2002 ipha->ipha_ttl = IGMP_TTL;
2003 ipha->ipha_protocol = IPPROTO_IGMP;
2004 ipha->ipha_dst = htonl(INADDR_ALLRPTS_GROUP);
2005 ipha->ipha_src = INADDR_ANY;
2006
2007 ill_mcast_queue(ill, mp);
2008
2009 ++ipst->ips_igmpstat.igps_snd_reports;
2010
2011 if (morepkts) {
2012 if (more_src_cnt > 0) {
2013 int index, mvsize;
2014 slist_t *sl = &next_reclist->mrec_srcs;
2015 index = sl->sl_numsrc;
2016 mvsize = more_src_cnt * sizeof (in6_addr_t);
2017 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index],
2018 mvsize);
2019 sl->sl_numsrc = more_src_cnt;
2020 }
2021 goto nextpkt;
2022 }
2023
2024 free_reclist:
2025 while (reclist != NULL) {
2026 rp = reclist->mrec_next;
2027 mi_free(reclist);
2028 reclist = rp;
2029 }
2030 }
2031
2032 /*
2033 * mld_input:
2034 * Return NULL for a bad packet that is discarded here.
2035 * Return mp if the message is OK and should be handed to "raw" receivers.
2036 * Callers of mld_input() may need to reinitialize variables that were copied
2037 * from the mblk as this calls pullupmsg().
2038 */
2039 mblk_t *
mld_input(mblk_t * mp,ip_recv_attr_t * ira)2040 mld_input(mblk_t *mp, ip_recv_attr_t *ira)
2041 {
2042 ip6_t *ip6h = (ip6_t *)(mp->b_rptr);
2043 mld_hdr_t *mldh;
2044 ilm_t *ilm;
2045 ipif_t *ipif;
2046 uint16_t hdr_length, exthdr_length;
2047 in6_addr_t *v6group_ptr;
2048 uint_t next;
2049 int mldlen;
2050 ill_t *ill = ira->ira_ill;
2051 ip_stack_t *ipst = ill->ill_ipst;
2052
2053 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal);
2054
2055 /* Make sure the src address of the packet is link-local */
2056 if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))) {
2057 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2058 freemsg(mp);
2059 return (NULL);
2060 }
2061
2062 if (ip6h->ip6_hlim != 1) {
2063 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpBadHoplimit);
2064 freemsg(mp);
2065 return (NULL);
2066 }
2067
2068 /* Get to the icmp header part */
2069 hdr_length = ira->ira_ip_hdr_length;
2070 exthdr_length = hdr_length - IPV6_HDR_LEN;
2071
2072 mldlen = ntohs(ip6h->ip6_plen) - exthdr_length;
2073
2074 /* An MLD packet must at least be 24 octets to be valid */
2075 if (mldlen < MLD_MINLEN) {
2076 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2077 freemsg(mp);
2078 return (NULL);
2079 }
2080
2081 mldh = (mld_hdr_t *)(&mp->b_rptr[hdr_length]);
2082
2083 switch (mldh->mld_type) {
2084 case MLD_LISTENER_QUERY:
2085 /*
2086 * packet length differentiates between v1 and v2. v1
2087 * query should be exactly 24 octets long; v2 is >= 28.
2088 */
2089 if ((mldlen == MLD_MINLEN) ||
2090 (ipst->ips_mld_max_version < MLD_V2_ROUTER)) {
2091 next = mld_query_in(mldh, ill);
2092 } else if (mldlen >= MLD_V2_QUERY_MINLEN) {
2093 next = mldv2_query_in((mld2q_t *)mldh, ill, mldlen);
2094 } else {
2095 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2096 freemsg(mp);
2097 return (NULL);
2098 }
2099 if (next == 0) {
2100 return (mp);
2101 }
2102
2103 if (next != INFINITY)
2104 mld_start_timers(next, ipst);
2105 break;
2106
2107 case MLD_LISTENER_REPORT:
2108 /*
2109 * For fast leave to work, we have to know that we are the
2110 * last person to send a report for this group. Reports
2111 * generated by us are looped back since we could potentially
2112 * be a multicast router, so discard reports sourced by me.
2113 */
2114 mutex_enter(&ill->ill_lock);
2115 for (ipif = ill->ill_ipif; ipif != NULL;
2116 ipif = ipif->ipif_next) {
2117 if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
2118 &ip6h->ip6_src)) {
2119 if (ip_debug > 1) {
2120 char buf1[INET6_ADDRSTRLEN];
2121
2122 (void) mi_strlog(ill->ill_rq,
2123 1,
2124 SL_TRACE,
2125 "mld_input: we are only "
2126 "member src %s\n",
2127 inet_ntop(AF_INET6, &ip6h->ip6_src,
2128 buf1, sizeof (buf1)));
2129 }
2130 mutex_exit(&ill->ill_lock);
2131 return (mp);
2132 }
2133 }
2134 mutex_exit(&ill->ill_lock);
2135 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembResponses);
2136
2137 v6group_ptr = &mldh->mld_addr;
2138 if (!IN6_IS_ADDR_MULTICAST(v6group_ptr)) {
2139 BUMP_MIB(ill->ill_icmp6_mib,
2140 ipv6IfIcmpInGroupMembBadReports);
2141 freemsg(mp);
2142 return (NULL);
2143 }
2144
2145
2146 /*
2147 * If we belong to the group being reported, and we are a
2148 * 'Delaying member' per the RFC terminology, stop our timer
2149 * for that group and 'clear flag' i.e. mark ilm_state as
2150 * IGMP_OTHERMEMBER. With zones, there can be multiple group
2151 * membership entries for the same group address (one per zone)
2152 * so we need to walk the ill_ilm list.
2153 */
2154 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
2155 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
2156 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group_ptr))
2157 continue;
2158 BUMP_MIB(ill->ill_icmp6_mib,
2159 ipv6IfIcmpInGroupMembOurReports);
2160
2161 ilm->ilm_timer = INFINITY;
2162 ilm->ilm_state = IGMP_OTHERMEMBER;
2163 }
2164 rw_exit(&ill->ill_mcast_lock);
2165 /*
2166 * No packets have been sent above - no
2167 * ill_mcast_send_queued is needed.
2168 */
2169 ill_mcast_timer_start(ill->ill_ipst);
2170 break;
2171
2172 case MLD_LISTENER_REDUCTION:
2173 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembReductions);
2174 break;
2175 }
2176 return (mp);
2177 }
2178
2179 /*
2180 * Handles an MLDv1 Listener Query. Returns 0 on error, or the appropriate
2181 * (non-zero, unsigned) timer value to be set on success.
2182 */
2183 static uint_t
mld_query_in(mld_hdr_t * mldh,ill_t * ill)2184 mld_query_in(mld_hdr_t *mldh, ill_t *ill)
2185 {
2186 ilm_t *ilm;
2187 int timer;
2188 uint_t next, current;
2189 in6_addr_t *v6group;
2190
2191 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries);
2192
2193 /*
2194 * In the MLD specification, there are 3 states and a flag.
2195 *
2196 * In Non-Listener state, we simply don't have a membership record.
2197 * In Delaying state, our timer is running (ilm->ilm_timer < INFINITY)
2198 * In Idle Member state, our timer is not running (ilm->ilm_timer ==
2199 * INFINITY)
2200 *
2201 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if
2202 * we have heard a report from another member, or IGMP_IREPORTEDLAST
2203 * if I sent the last report.
2204 */
2205 v6group = &mldh->mld_addr;
2206 if (!(IN6_IS_ADDR_UNSPECIFIED(v6group)) &&
2207 ((!IN6_IS_ADDR_MULTICAST(v6group)))) {
2208 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembBadQueries);
2209 return (0);
2210 }
2211
2212 /* Need to do compatibility mode checking */
2213 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
2214 ill->ill_mcast_v1_time = 0;
2215 ill->ill_mcast_v1_tset = 1;
2216 if (ill->ill_mcast_type == MLD_V2_ROUTER) {
2217 ip1dbg(("Received MLDv1 Query on %s, switching mode to "
2218 "MLD_V1_ROUTER\n", ill->ill_name));
2219 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1);
2220 ill->ill_mcast_type = MLD_V1_ROUTER;
2221 }
2222
2223 timer = (int)ntohs(mldh->mld_maxdelay);
2224 if (ip_debug > 1) {
2225 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
2226 "mld_input: TIMER = mld_maxdelay %d mld_type 0x%x",
2227 timer, (int)mldh->mld_type);
2228 }
2229
2230 /*
2231 * -Start the timers in all of our membership records for
2232 * the physical interface on which the query arrived,
2233 * excl:
2234 * 1. those that belong to the "all hosts" group,
2235 * 2. those with 0 scope, or 1 node-local scope.
2236 *
2237 * -Restart any timer that is already running but has a value
2238 * longer that the requested timeout.
2239 * -Use the value specified in the query message as the
2240 * maximum timeout.
2241 */
2242 next = INFINITY;
2243
2244 current = CURRENT_MSTIME;
2245 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
2246 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr));
2247
2248 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) ||
2249 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) ||
2250 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr))
2251 continue;
2252 if ((!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr,
2253 &ipv6_all_hosts_mcast)) &&
2254 (IN6_IS_ADDR_UNSPECIFIED(v6group)) ||
2255 (IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))) {
2256 if (timer == 0) {
2257 /* Respond immediately */
2258 ilm->ilm_timer = INFINITY;
2259 ilm->ilm_state = IGMP_IREPORTEDLAST;
2260 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
2261 break;
2262 }
2263 if (ilm->ilm_timer > timer) {
2264 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer);
2265 if (ilm->ilm_timer < next)
2266 next = ilm->ilm_timer;
2267 ilm->ilm_timer += current;
2268 }
2269 break;
2270 }
2271 }
2272 rw_exit(&ill->ill_mcast_lock);
2273 /* Send any deferred/queued IP packets */
2274 ill_mcast_send_queued(ill);
2275 ill_mcast_timer_start(ill->ill_ipst);
2276
2277 return (next);
2278 }
2279
2280 /*
2281 * Handles an MLDv2 Listener Query. On error, returns 0; on success,
2282 * returns the appropriate (non-zero, unsigned) timer value (which may
2283 * be INFINITY) to be set.
2284 */
2285 static uint_t
mldv2_query_in(mld2q_t * mld2q,ill_t * ill,int mldlen)2286 mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen)
2287 {
2288 ilm_t *ilm;
2289 in6_addr_t *v6group, *src_array;
2290 uint_t next, numsrc, i, mrd, delay, qqi, current;
2291 uint8_t qrv;
2292
2293 v6group = &mld2q->mld2q_addr;
2294 numsrc = ntohs(mld2q->mld2q_numsrc);
2295
2296 /* make sure numsrc matches packet size */
2297 if (mldlen < MLD_V2_QUERY_MINLEN + (numsrc * sizeof (in6_addr_t))) {
2298 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2299 return (0);
2300 }
2301 src_array = (in6_addr_t *)&mld2q[1];
2302
2303 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries);
2304
2305 /* extract Maximum Response Delay from code in header */
2306 mrd = ntohs(mld2q->mld2q_mxrc);
2307 if (mrd >= MLD_V2_MAXRT_FPMIN) {
2308 uint_t hdrval, mant, exp;
2309 hdrval = mrd;
2310 mant = hdrval & MLD_V2_MAXRT_MANT_MASK;
2311 exp = (hdrval & MLD_V2_MAXRT_EXP_MASK) >> 12;
2312 mrd = (mant | 0x1000) << (exp + 3);
2313 }
2314 if (mrd == 0)
2315 mrd = DSEC_TO_MSEC(MCAST_DEF_QUERY_RESP_INTERVAL);
2316
2317 MCAST_RANDOM_DELAY(delay, mrd);
2318 next = (unsigned)INFINITY;
2319 current = CURRENT_MSTIME;
2320
2321 if ((qrv = mld2q->mld2q_sqrv & MLD_V2_RV_MASK) == 0)
2322 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
2323 else
2324 ill->ill_mcast_rv = qrv;
2325
2326 if ((qqi = (uint_t)mld2q->mld2q_qqic) >= MLD_V2_QQI_FPMIN) {
2327 uint_t mant, exp;
2328 mant = qqi & MLD_V2_QQI_MANT_MASK;
2329 exp = (qqi & MLD_V2_QQI_EXP_MASK) >> 12;
2330 qqi = (mant | 0x10) << (exp + 3);
2331 }
2332 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi;
2333
2334 /*
2335 * If we have a pending general query response that's scheduled
2336 * sooner than the delay we calculated for this response, then
2337 * no action is required (MLDv2 draft section 6.2 rule 1)
2338 */
2339 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
2340 if (ill->ill_global_timer < (current + delay)) {
2341 rw_exit(&ill->ill_mcast_lock);
2342 return (next);
2343 }
2344
2345 /*
2346 * Now take action depending on query type: general,
2347 * group specific, or group/source specific.
2348 */
2349 if ((numsrc == 0) && IN6_IS_ADDR_UNSPECIFIED(v6group)) {
2350 /*
2351 * general query
2352 * We know global timer is either not running or is
2353 * greater than our calculated delay, so reset it to
2354 * our delay (random value in range [0, response time])
2355 */
2356 ill->ill_global_timer = current + delay;
2357 next = delay;
2358 } else {
2359 /* group or group/source specific query */
2360 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
2361 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) ||
2362 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) ||
2363 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr) ||
2364 !IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))
2365 continue;
2366
2367 /*
2368 * If the query is group specific or we have a
2369 * pending group specific query, the response is
2370 * group specific (pending sources list should be
2371 * empty). Otherwise, need to update the pending
2372 * sources list for the group and source specific
2373 * response.
2374 */
2375 if (numsrc == 0 || (ilm->ilm_timer < INFINITY &&
2376 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) {
2377 group_query:
2378 FREE_SLIST(ilm->ilm_pendsrcs);
2379 ilm->ilm_pendsrcs = NULL;
2380 } else {
2381 boolean_t overflow;
2382 slist_t *pktl;
2383 if (numsrc > MAX_FILTER_SIZE ||
2384 (ilm->ilm_pendsrcs == NULL &&
2385 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) {
2386 /*
2387 * We've been sent more sources than
2388 * we can deal with; or we can't deal
2389 * with a source list at all. Revert
2390 * to a group specific query.
2391 */
2392 goto group_query;
2393 }
2394 if ((pktl = l_alloc()) == NULL)
2395 goto group_query;
2396 pktl->sl_numsrc = numsrc;
2397 for (i = 0; i < numsrc; i++)
2398 pktl->sl_addr[i] = src_array[i];
2399 l_union_in_a(ilm->ilm_pendsrcs, pktl,
2400 &overflow);
2401 l_free(pktl);
2402 if (overflow)
2403 goto group_query;
2404 }
2405 ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ?
2406 INFINITY : (ilm->ilm_timer - current);
2407 /* set timer to soonest value */
2408 ilm->ilm_timer = MIN(ilm->ilm_timer, delay);
2409 if (ilm->ilm_timer < next)
2410 next = ilm->ilm_timer;
2411 ilm->ilm_timer += current;
2412 break;
2413 }
2414 }
2415 rw_exit(&ill->ill_mcast_lock);
2416 /*
2417 * No packets have been sent above - no
2418 * ill_mcast_send_queued is needed.
2419 */
2420 ill_mcast_timer_start(ill->ill_ipst);
2421
2422 return (next);
2423 }
2424
2425 /*
2426 * Send MLDv1 response packet with hoplimit 1
2427 */
2428 static void
mld_sendpkt(ilm_t * ilm,uchar_t type,const in6_addr_t * v6addr)2429 mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr)
2430 {
2431 mblk_t *mp;
2432 mld_hdr_t *mldh;
2433 ip6_t *ip6h;
2434 ip6_hbh_t *ip6hbh;
2435 struct ip6_opt_router *ip6router;
2436 size_t size = IPV6_HDR_LEN + sizeof (mld_hdr_t);
2437 ill_t *ill = ilm->ilm_ill;
2438
2439 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
2440
2441 /*
2442 * We need to place a router alert option in this packet. The length
2443 * of the options must be a multiple of 8. The hbh option header is 2
2444 * bytes followed by the 4 byte router alert option. That leaves
2445 * 2 bytes of pad for a total of 8 bytes.
2446 */
2447 const int router_alert_length = 8;
2448
2449 ASSERT(ill->ill_isv6);
2450
2451 size += router_alert_length;
2452 mp = allocb(size, BPRI_HI);
2453 if (mp == NULL)
2454 return;
2455 bzero(mp->b_rptr, size);
2456 mp->b_wptr = mp->b_rptr + size;
2457
2458 ip6h = (ip6_t *)mp->b_rptr;
2459 ip6hbh = (struct ip6_hbh *)&ip6h[1];
2460 ip6router = (struct ip6_opt_router *)&ip6hbh[1];
2461 /*
2462 * A zero is a pad option of length 1. The bzero of the whole packet
2463 * above will pad between ip6router and mld.
2464 */
2465 mldh = (mld_hdr_t *)((uint8_t *)ip6hbh + router_alert_length);
2466
2467 mldh->mld_type = type;
2468 mldh->mld_addr = ilm->ilm_v6addr;
2469
2470 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT;
2471 ip6router->ip6or_len = 2;
2472 ip6router->ip6or_value[0] = 0;
2473 ip6router->ip6or_value[1] = IP6_ALERT_MLD;
2474
2475 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6;
2476 ip6hbh->ip6h_len = 0;
2477
2478 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
2479 ip6h->ip6_plen = htons(sizeof (*mldh) + router_alert_length);
2480 ip6h->ip6_nxt = IPPROTO_HOPOPTS;
2481 ip6h->ip6_hops = MLD_HOP_LIMIT;
2482 if (v6addr == NULL)
2483 ip6h->ip6_dst = ilm->ilm_v6addr;
2484 else
2485 ip6h->ip6_dst = *v6addr;
2486
2487 ip6h->ip6_src = ipv6_all_zeros;
2488 /*
2489 * Prepare for checksum by putting icmp length in the icmp
2490 * checksum field. The checksum is calculated in ip_output.
2491 */
2492 mldh->mld_cksum = htons(sizeof (*mldh));
2493
2494 ill_mcast_queue(ill, mp);
2495 }
2496
2497 /*
2498 * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill. The
2499 * report will contain one multicast address record for each element of
2500 * reclist. If this causes packet length to exceed ill->ill_mc_mtu,
2501 * multiple reports are sent. reclist is assumed to be made up of
2502 * buffers allocated by mcast_bldmrec(), and those buffers are freed here.
2503 */
2504 static void
mldv2_sendrpt(ill_t * ill,mrec_t * reclist)2505 mldv2_sendrpt(ill_t *ill, mrec_t *reclist)
2506 {
2507 mblk_t *mp;
2508 mld2r_t *mld2r;
2509 mld2mar_t *mld2mar;
2510 in6_addr_t *srcarray;
2511 ip6_t *ip6h;
2512 ip6_hbh_t *ip6hbh;
2513 struct ip6_opt_router *ip6router;
2514 size_t size, optlen, padlen, icmpsize, rsize;
2515 int i, numrec, more_src_cnt;
2516 mrec_t *rp, *cur_reclist;
2517 mrec_t *next_reclist = reclist;
2518 boolean_t morepkts;
2519
2520 /* If there aren't any records, there's nothing to send */
2521 if (reclist == NULL)
2522 return;
2523
2524 ASSERT(ill->ill_isv6);
2525 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
2526
2527 /*
2528 * Total option length (optlen + padlen) must be a multiple of
2529 * 8 bytes. We assume here that optlen <= 8, so the total option
2530 * length will be 8. Assert this in case anything ever changes.
2531 */
2532 optlen = sizeof (ip6_hbh_t) + sizeof (struct ip6_opt_router);
2533 ASSERT(optlen <= 8);
2534 padlen = 8 - optlen;
2535 nextpkt:
2536 icmpsize = sizeof (mld2r_t);
2537 size = IPV6_HDR_LEN + optlen + padlen + icmpsize;
2538 morepkts = B_FALSE;
2539 more_src_cnt = 0;
2540 for (rp = cur_reclist = next_reclist, numrec = 0; rp != NULL;
2541 rp = rp->mrec_next, numrec++) {
2542 rsize = sizeof (mld2mar_t) +
2543 (rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t));
2544 if (size + rsize > ill->ill_mc_mtu) {
2545 if (rp == cur_reclist) {
2546 /*
2547 * If the first mrec we looked at is too big
2548 * to fit in a single packet (i.e the source
2549 * list is too big), we must either truncate
2550 * the list (if TO_EX or IS_EX), or send
2551 * multiple reports for the same group (all
2552 * other types).
2553 */
2554 int srcspace, srcsperpkt;
2555 srcspace = ill->ill_mc_mtu -
2556 (size + sizeof (mld2mar_t));
2557
2558 /*
2559 * Skip if there's not even enough room in
2560 * a single packet to send something useful.
2561 */
2562 if (srcspace <= sizeof (in6_addr_t))
2563 continue;
2564
2565 srcsperpkt = srcspace / sizeof (in6_addr_t);
2566 /*
2567 * Increment icmpsize and size, because we will
2568 * be sending a record for the mrec we're
2569 * looking at now.
2570 */
2571 rsize = sizeof (mld2mar_t) +
2572 (srcsperpkt * sizeof (in6_addr_t));
2573 icmpsize += rsize;
2574 size += rsize;
2575 if (rp->mrec_type == MODE_IS_EXCLUDE ||
2576 rp->mrec_type == CHANGE_TO_EXCLUDE) {
2577 rp->mrec_srcs.sl_numsrc = srcsperpkt;
2578 if (rp->mrec_next == NULL) {
2579 /* no more packets to send */
2580 break;
2581 } else {
2582 /*
2583 * more packets, but we're
2584 * done with this mrec.
2585 */
2586 next_reclist = rp->mrec_next;
2587 }
2588 } else {
2589 more_src_cnt = rp->mrec_srcs.sl_numsrc
2590 - srcsperpkt;
2591 rp->mrec_srcs.sl_numsrc = srcsperpkt;
2592 /*
2593 * We'll fix up this mrec (remove the
2594 * srcs we've already sent) before
2595 * returning to nextpkt above.
2596 */
2597 next_reclist = rp;
2598 }
2599 } else {
2600 next_reclist = rp;
2601 }
2602 morepkts = B_TRUE;
2603 break;
2604 }
2605 icmpsize += rsize;
2606 size += rsize;
2607 }
2608
2609 mp = allocb(size, BPRI_HI);
2610 if (mp == NULL)
2611 goto free_reclist;
2612 bzero(mp->b_rptr, size);
2613 mp->b_wptr = mp->b_rptr + size;
2614
2615 ip6h = (ip6_t *)mp->b_rptr;
2616 ip6hbh = (ip6_hbh_t *)&(ip6h[1]);
2617 ip6router = (struct ip6_opt_router *)&(ip6hbh[1]);
2618 mld2r = (mld2r_t *)((uint8_t *)ip6hbh + optlen + padlen);
2619 mld2mar = (mld2mar_t *)&(mld2r[1]);
2620
2621 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
2622 ip6h->ip6_plen = htons(optlen + padlen + icmpsize);
2623 ip6h->ip6_nxt = IPPROTO_HOPOPTS;
2624 ip6h->ip6_hops = MLD_HOP_LIMIT;
2625 ip6h->ip6_dst = ipv6_all_v2rtrs_mcast;
2626 ip6h->ip6_src = ipv6_all_zeros;
2627
2628 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6;
2629 /*
2630 * ip6h_len is the number of 8-byte words, not including the first
2631 * 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0.
2632 */
2633 ip6hbh->ip6h_len = 0;
2634
2635 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT;
2636 ip6router->ip6or_len = 2;
2637 ip6router->ip6or_value[0] = 0;
2638 ip6router->ip6or_value[1] = IP6_ALERT_MLD;
2639
2640 mld2r->mld2r_type = MLD_V2_LISTENER_REPORT;
2641 mld2r->mld2r_nummar = htons(numrec);
2642 /*
2643 * Prepare for the checksum by putting icmp length in the icmp
2644 * checksum field. The checksum is calculated in ip_output_simple.
2645 */
2646 mld2r->mld2r_cksum = htons(icmpsize);
2647
2648 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) {
2649 mld2mar->mld2mar_type = rp->mrec_type;
2650 mld2mar->mld2mar_auxlen = 0;
2651 mld2mar->mld2mar_numsrc = htons(rp->mrec_srcs.sl_numsrc);
2652 mld2mar->mld2mar_group = rp->mrec_group;
2653 srcarray = (in6_addr_t *)&(mld2mar[1]);
2654
2655 for (i = 0; i < rp->mrec_srcs.sl_numsrc; i++)
2656 srcarray[i] = rp->mrec_srcs.sl_addr[i];
2657
2658 mld2mar = (mld2mar_t *)&(srcarray[i]);
2659 }
2660
2661 ill_mcast_queue(ill, mp);
2662
2663 if (morepkts) {
2664 if (more_src_cnt > 0) {
2665 int index, mvsize;
2666 slist_t *sl = &next_reclist->mrec_srcs;
2667 index = sl->sl_numsrc;
2668 mvsize = more_src_cnt * sizeof (in6_addr_t);
2669 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index],
2670 mvsize);
2671 sl->sl_numsrc = more_src_cnt;
2672 }
2673 goto nextpkt;
2674 }
2675
2676 free_reclist:
2677 while (reclist != NULL) {
2678 rp = reclist->mrec_next;
2679 mi_free(reclist);
2680 reclist = rp;
2681 }
2682 }
2683
2684 static mrec_t *
mcast_bldmrec(mcast_record_t type,in6_addr_t * grp,slist_t * srclist,mrec_t * next)2685 mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, slist_t *srclist,
2686 mrec_t *next)
2687 {
2688 mrec_t *rp;
2689 int i;
2690
2691 if ((type == ALLOW_NEW_SOURCES || type == BLOCK_OLD_SOURCES) &&
2692 SLIST_IS_EMPTY(srclist))
2693 return (next);
2694
2695 rp = (mrec_t *)mi_alloc(sizeof (mrec_t), BPRI_HI);
2696 if (rp == NULL)
2697 return (next);
2698
2699 rp->mrec_next = next;
2700 rp->mrec_type = type;
2701 rp->mrec_auxlen = 0;
2702 rp->mrec_group = *grp;
2703 if (srclist == NULL) {
2704 rp->mrec_srcs.sl_numsrc = 0;
2705 } else {
2706 rp->mrec_srcs.sl_numsrc = srclist->sl_numsrc;
2707 for (i = 0; i < srclist->sl_numsrc; i++)
2708 rp->mrec_srcs.sl_addr[i] = srclist->sl_addr[i];
2709 }
2710
2711 return (rp);
2712 }
2713
2714 /*
2715 * Set up initial retransmit state. If memory cannot be allocated for
2716 * the source lists, simply create as much state as is possible; memory
2717 * allocation failures are considered one type of transient error that
2718 * the retransmissions are designed to overcome (and if they aren't
2719 * transient, there are bigger problems than failing to notify the
2720 * router about multicast group membership state changes).
2721 */
2722 static void
mcast_init_rtx(ill_t * ill,rtx_state_t * rtxp,mcast_record_t rtype,slist_t * flist)2723 mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype,
2724 slist_t *flist)
2725 {
2726 /*
2727 * There are only three possibilities for rtype:
2728 * New join, transition from INCLUDE {} to INCLUDE {flist}
2729 * => rtype is ALLOW_NEW_SOURCES
2730 * New join, transition from INCLUDE {} to EXCLUDE {flist}
2731 * => rtype is CHANGE_TO_EXCLUDE
2732 * State change that involves a filter mode change
2733 * => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE
2734 */
2735 ASSERT(rtype == CHANGE_TO_EXCLUDE || rtype == CHANGE_TO_INCLUDE ||
2736 rtype == ALLOW_NEW_SOURCES);
2737
2738 rtxp->rtx_cnt = ill->ill_mcast_rv;
2739
2740 switch (rtype) {
2741 case CHANGE_TO_EXCLUDE:
2742 rtxp->rtx_fmode_cnt = ill->ill_mcast_rv;
2743 CLEAR_SLIST(rtxp->rtx_allow);
2744 COPY_SLIST(flist, rtxp->rtx_block);
2745 break;
2746 case ALLOW_NEW_SOURCES:
2747 case CHANGE_TO_INCLUDE:
2748 rtxp->rtx_fmode_cnt =
2749 rtype == ALLOW_NEW_SOURCES ? 0 : ill->ill_mcast_rv;
2750 CLEAR_SLIST(rtxp->rtx_block);
2751 COPY_SLIST(flist, rtxp->rtx_allow);
2752 break;
2753 }
2754 }
2755
2756 /*
2757 * The basic strategy here, as extrapolated from RFC 3810 section 6.1 and
2758 * RFC 3376 section 5.1, covers three cases:
2759 * * The current state change is a filter mode change
2760 * Set filter mode retransmit counter; set retransmit allow or
2761 * block list to new source list as appropriate, and clear the
2762 * retransmit list that was not set; send TO_IN or TO_EX with
2763 * new source list.
2764 * * The current state change is a source list change, but the filter
2765 * mode retransmit counter is > 0
2766 * Decrement filter mode retransmit counter; set retransmit
2767 * allow or block list to new source list as appropriate,
2768 * and clear the retransmit list that was not set; send TO_IN
2769 * or TO_EX with new source list.
2770 * * The current state change is a source list change, and the filter
2771 * mode retransmit counter is 0.
2772 * Merge existing rtx allow and block lists with new state:
2773 * rtx_allow = (new allow + rtx_allow) - new block
2774 * rtx_block = (new block + rtx_block) - new allow
2775 * Send ALLOW and BLOCK records for new retransmit lists;
2776 * decrement retransmit counter.
2777 *
2778 * As is the case for mcast_init_rtx(), memory allocation failures are
2779 * acceptable; we just create as much state as we can.
2780 */
2781 static mrec_t *
mcast_merge_rtx(ilm_t * ilm,mrec_t * mreclist,slist_t * flist)2782 mcast_merge_rtx(ilm_t *ilm, mrec_t *mreclist, slist_t *flist)
2783 {
2784 ill_t *ill;
2785 rtx_state_t *rtxp = &ilm->ilm_rtx;
2786 mcast_record_t txtype;
2787 mrec_t *rp, *rpnext, *rtnmrec;
2788 boolean_t ovf;
2789
2790 ill = ilm->ilm_ill;
2791
2792 if (mreclist == NULL)
2793 return (mreclist);
2794
2795 /*
2796 * A filter mode change is indicated by a single mrec, which is
2797 * either TO_IN or TO_EX. In this case, we just need to set new
2798 * retransmit state as if this were an initial join. There is
2799 * no change to the mrec list.
2800 */
2801 if (mreclist->mrec_type == CHANGE_TO_INCLUDE ||
2802 mreclist->mrec_type == CHANGE_TO_EXCLUDE) {
2803 mcast_init_rtx(ill, rtxp, mreclist->mrec_type,
2804 &mreclist->mrec_srcs);
2805 return (mreclist);
2806 }
2807
2808 /*
2809 * Only the source list has changed
2810 */
2811 rtxp->rtx_cnt = ill->ill_mcast_rv;
2812 if (rtxp->rtx_fmode_cnt > 0) {
2813 /* but we're still sending filter mode change reports */
2814 rtxp->rtx_fmode_cnt--;
2815 if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
2816 CLEAR_SLIST(rtxp->rtx_block);
2817 COPY_SLIST(flist, rtxp->rtx_allow);
2818 txtype = CHANGE_TO_INCLUDE;
2819 } else {
2820 CLEAR_SLIST(rtxp->rtx_allow);
2821 COPY_SLIST(flist, rtxp->rtx_block);
2822 txtype = CHANGE_TO_EXCLUDE;
2823 }
2824 /* overwrite first mrec with new info */
2825 mreclist->mrec_type = txtype;
2826 l_copy(flist, &mreclist->mrec_srcs);
2827 /* then free any remaining mrecs */
2828 for (rp = mreclist->mrec_next; rp != NULL; rp = rpnext) {
2829 rpnext = rp->mrec_next;
2830 mi_free(rp);
2831 }
2832 mreclist->mrec_next = NULL;
2833 rtnmrec = mreclist;
2834 } else {
2835 mrec_t *allow_mrec, *block_mrec;
2836 /*
2837 * Just send the source change reports; but we need to
2838 * recalculate the ALLOW and BLOCK lists based on previous
2839 * state and new changes.
2840 */
2841 rtnmrec = mreclist;
2842 allow_mrec = block_mrec = NULL;
2843 for (rp = mreclist; rp != NULL; rp = rp->mrec_next) {
2844 ASSERT(rp->mrec_type == ALLOW_NEW_SOURCES ||
2845 rp->mrec_type == BLOCK_OLD_SOURCES);
2846 if (rp->mrec_type == ALLOW_NEW_SOURCES)
2847 allow_mrec = rp;
2848 else
2849 block_mrec = rp;
2850 }
2851 /*
2852 * Perform calculations:
2853 * new_allow = mrec_allow + (rtx_allow - mrec_block)
2854 * new_block = mrec_block + (rtx_block - mrec_allow)
2855 *
2856 * Each calc requires two steps, for example:
2857 * rtx_allow = rtx_allow - mrec_block;
2858 * new_allow = mrec_allow + rtx_allow;
2859 *
2860 * Store results in mrec lists, and then copy into rtx lists.
2861 * We do it in this order in case the rtx list hasn't been
2862 * alloc'd yet; if it hasn't and our alloc fails, that's okay,
2863 * Overflows are also okay.
2864 */
2865 if (block_mrec != NULL) {
2866 l_difference_in_a(rtxp->rtx_allow,
2867 &block_mrec->mrec_srcs);
2868 }
2869 if (allow_mrec != NULL) {
2870 l_difference_in_a(rtxp->rtx_block,
2871 &allow_mrec->mrec_srcs);
2872 l_union_in_a(&allow_mrec->mrec_srcs, rtxp->rtx_allow,
2873 &ovf);
2874 }
2875 if (block_mrec != NULL) {
2876 l_union_in_a(&block_mrec->mrec_srcs, rtxp->rtx_block,
2877 &ovf);
2878 COPY_SLIST(&block_mrec->mrec_srcs, rtxp->rtx_block);
2879 } else {
2880 rtnmrec = mcast_bldmrec(BLOCK_OLD_SOURCES,
2881 &ilm->ilm_v6addr, rtxp->rtx_block, allow_mrec);
2882 }
2883 if (allow_mrec != NULL) {
2884 COPY_SLIST(&allow_mrec->mrec_srcs, rtxp->rtx_allow);
2885 } else {
2886 rtnmrec = mcast_bldmrec(ALLOW_NEW_SOURCES,
2887 &ilm->ilm_v6addr, rtxp->rtx_allow, block_mrec);
2888 }
2889 }
2890
2891 return (rtnmrec);
2892 }
2893