xref: /netbsd-src/sys/netinet6/mld6.c (revision f89f6560d453f5e37386cc7938c072d2f528b9fa)
1 /*	$NetBSD: mld6.c,v 1.62 2015/01/20 21:27:36 roy Exp $	*/
2 /*	$KAME: mld6.c,v 1.25 2001/01/16 14:14:18 itojun Exp $	*/
3 
4 /*
5  * Copyright (C) 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1992, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * Stephen Deering of Stanford University.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
65  */
66 
67 /*
68  * Copyright (c) 1988 Stephen Deering.
69  *
70  * This code is derived from software contributed to Berkeley by
71  * Stephen Deering of Stanford University.
72  *
73  * Redistribution and use in source and binary forms, with or without
74  * modification, are permitted provided that the following conditions
75  * are met:
76  * 1. Redistributions of source code must retain the above copyright
77  *    notice, this list of conditions and the following disclaimer.
78  * 2. Redistributions in binary form must reproduce the above copyright
79  *    notice, this list of conditions and the following disclaimer in the
80  *    documentation and/or other materials provided with the distribution.
81  * 3. All advertising materials mentioning features or use of this software
82  *    must display the following acknowledgement:
83  *	This product includes software developed by the University of
84  *	California, Berkeley and its contributors.
85  * 4. Neither the name of the University nor the names of its contributors
86  *    may be used to endorse or promote products derived from this software
87  *    without specific prior written permission.
88  *
89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99  * SUCH DAMAGE.
100  *
101  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
102  */
103 
104 #include <sys/cdefs.h>
105 __KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.62 2015/01/20 21:27:36 roy Exp $");
106 
107 #include "opt_inet.h"
108 
109 #include <sys/param.h>
110 #include <sys/systm.h>
111 #include <sys/mbuf.h>
112 #include <sys/socket.h>
113 #include <sys/socketvar.h>
114 #include <sys/protosw.h>
115 #include <sys/syslog.h>
116 #include <sys/sysctl.h>
117 #include <sys/kernel.h>
118 #include <sys/callout.h>
119 #include <sys/cprng.h>
120 
121 #include <net/if.h>
122 
123 #include <netinet/in.h>
124 #include <netinet/in_var.h>
125 #include <netinet6/in6_var.h>
126 #include <netinet/ip6.h>
127 #include <netinet6/ip6_var.h>
128 #include <netinet6/scope6_var.h>
129 #include <netinet/icmp6.h>
130 #include <netinet6/icmp6_private.h>
131 #include <netinet6/mld6_var.h>
132 
133 #include <net/net_osdep.h>
134 
135 
136 /*
137  * This structure is used to keep track of in6_multi chains which belong to
138  * deleted interface addresses.
139  */
140 static LIST_HEAD(, multi6_kludge) in6_mk = LIST_HEAD_INITIALIZER(in6_mk);
141 
142 struct multi6_kludge {
143 	LIST_ENTRY(multi6_kludge) mk_entry;
144 	struct ifnet *mk_ifp;
145 	struct in6_multihead mk_head;
146 };
147 
148 
149 /*
150  * Protocol constants
151  */
152 
153 /*
154  * time between repetitions of a node's initial report of interest in a
155  * multicast address(in seconds)
156  */
157 #define MLD_UNSOLICITED_REPORT_INTERVAL	10
158 
159 static struct ip6_pktopts ip6_opts;
160 
161 static void mld_start_listening(struct in6_multi *);
162 static void mld_stop_listening(struct in6_multi *);
163 
164 static struct mld_hdr * mld_allocbuf(struct mbuf **, int, struct in6_multi *,
165 	int);
166 static void mld_sendpkt(struct in6_multi *, int, const struct in6_addr *);
167 static void mld_starttimer(struct in6_multi *);
168 static void mld_stoptimer(struct in6_multi *);
169 static u_long mld_timerresid(struct in6_multi *);
170 
171 void
172 mld_init(void)
173 {
174 	static u_int8_t hbh_buf[8];
175 	struct ip6_hbh *hbh = (struct ip6_hbh *)hbh_buf;
176 	u_int16_t rtalert_code = htons((u_int16_t)IP6OPT_RTALERT_MLD);
177 
178 	/* ip6h_nxt will be fill in later */
179 	hbh->ip6h_len = 0;	/* (8 >> 3) - 1 */
180 
181 	/* XXX: grotty hard coding... */
182 	hbh_buf[2] = IP6OPT_PADN;	/* 2 byte padding */
183 	hbh_buf[3] = 0;
184 	hbh_buf[4] = IP6OPT_RTALERT;
185 	hbh_buf[5] = IP6OPT_RTALERT_LEN - 2;
186 	memcpy(&hbh_buf[6], (void *)&rtalert_code, sizeof(u_int16_t));
187 
188 	ip6_opts.ip6po_hbh = hbh;
189 	/* We will specify the hoplimit by a multicast option. */
190 	ip6_opts.ip6po_hlim = -1;
191 	ip6_opts.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
192 }
193 
194 static void
195 mld_starttimer(struct in6_multi *in6m)
196 {
197 	struct timeval now;
198 
199 	KASSERT(in6m->in6m_timer != IN6M_TIMER_UNDEF);
200 
201 	microtime(&now);
202 	in6m->in6m_timer_expire.tv_sec = now.tv_sec + in6m->in6m_timer / hz;
203 	in6m->in6m_timer_expire.tv_usec = now.tv_usec +
204 	    (in6m->in6m_timer % hz) * (1000000 / hz);
205 	if (in6m->in6m_timer_expire.tv_usec > 1000000) {
206 		in6m->in6m_timer_expire.tv_sec++;
207 		in6m->in6m_timer_expire.tv_usec -= 1000000;
208 	}
209 
210 	/* start or restart the timer */
211 	callout_schedule(&in6m->in6m_timer_ch, in6m->in6m_timer);
212 }
213 
214 static void
215 mld_stoptimer(struct in6_multi *in6m)
216 {
217 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
218 		return;
219 
220 	callout_stop(&in6m->in6m_timer_ch);
221 
222 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
223 }
224 
225 static void
226 mld_timeo(void *arg)
227 {
228 	struct in6_multi *in6m = arg;
229 
230 	mutex_enter(softnet_lock);
231 	KERNEL_LOCK(1, NULL);
232 
233 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
234 		goto out;
235 
236 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
237 
238 	switch (in6m->in6m_state) {
239 	case MLD_REPORTPENDING:
240 		mld_start_listening(in6m);
241 		break;
242 	default:
243 		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
244 		break;
245 	}
246 
247 out:
248 	KERNEL_UNLOCK_ONE(NULL);
249 	mutex_exit(softnet_lock);
250 }
251 
252 static u_long
253 mld_timerresid(struct in6_multi *in6m)
254 {
255 	struct timeval now, diff;
256 
257 	microtime(&now);
258 
259 	if (now.tv_sec > in6m->in6m_timer_expire.tv_sec ||
260 	    (now.tv_sec == in6m->in6m_timer_expire.tv_sec &&
261 	    now.tv_usec > in6m->in6m_timer_expire.tv_usec)) {
262 		return (0);
263 	}
264 	diff = in6m->in6m_timer_expire;
265 	diff.tv_sec -= now.tv_sec;
266 	diff.tv_usec -= now.tv_usec;
267 	if (diff.tv_usec < 0) {
268 		diff.tv_sec--;
269 		diff.tv_usec += 1000000;
270 	}
271 
272 	/* return the remaining time in milliseconds */
273 	return diff.tv_sec * 1000 + diff.tv_usec / 1000;
274 }
275 
276 static void
277 mld_start_listening(struct in6_multi *in6m)
278 {
279 	struct in6_addr all_in6;
280 
281 	/*
282 	 * RFC2710 page 10:
283 	 * The node never sends a Report or Done for the link-scope all-nodes
284 	 * address.
285 	 * MLD messages are never sent for multicast addresses whose scope is 0
286 	 * (reserved) or 1 (node-local).
287 	 */
288 	all_in6 = in6addr_linklocal_allnodes;
289 	if (in6_setscope(&all_in6, in6m->in6m_ifp, NULL)) {
290 		/* XXX: this should not happen! */
291 		in6m->in6m_timer = 0;
292 		in6m->in6m_state = MLD_OTHERLISTENER;
293 	}
294 	if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
295 	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) {
296 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
297 		in6m->in6m_state = MLD_OTHERLISTENER;
298 	} else {
299 		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
300 		in6m->in6m_timer = cprng_fast32() %
301 		    (MLD_UNSOLICITED_REPORT_INTERVAL * hz);
302 		in6m->in6m_state = MLD_IREPORTEDLAST;
303 
304 		mld_starttimer(in6m);
305 	}
306 }
307 
308 static void
309 mld_stop_listening(struct in6_multi *in6m)
310 {
311 	struct in6_addr allnode, allrouter;
312 
313 	allnode = in6addr_linklocal_allnodes;
314 	if (in6_setscope(&allnode, in6m->in6m_ifp, NULL)) {
315 		/* XXX: this should not happen! */
316 		return;
317 	}
318 	allrouter = in6addr_linklocal_allrouters;
319 	if (in6_setscope(&allrouter, in6m->in6m_ifp, NULL)) {
320 		/* XXX impossible */
321 		return;
322 	}
323 
324 	if (in6m->in6m_state == MLD_IREPORTEDLAST &&
325 	    (!IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &allnode)) &&
326 	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) >
327 	    IPV6_ADDR_SCOPE_INTFACELOCAL) {
328 		mld_sendpkt(in6m, MLD_LISTENER_DONE, &allrouter);
329 	}
330 }
331 
332 void
333 mld_input(struct mbuf *m, int off)
334 {
335 	struct ip6_hdr *ip6;
336 	struct mld_hdr *mldh;
337 	struct ifnet *ifp = m->m_pkthdr.rcvif;
338 	struct in6_multi *in6m = NULL;
339 	struct in6_addr mld_addr, all_in6;
340 	struct in6_ifaddr *ia;
341 	u_long timer = 0;	/* timer value in the MLD query header */
342 
343 	IP6_EXTHDR_GET(mldh, struct mld_hdr *, m, off, sizeof(*mldh));
344 	if (mldh == NULL) {
345 		ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
346 		return;
347 	}
348 
349 	/* source address validation */
350 	ip6 = mtod(m, struct ip6_hdr *);/* in case mpullup */
351 	if (!IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) {
352 		/*
353 		 * RFC3590 allows the IPv6 unspecified address as the source
354 		 * address of MLD report and done messages.  However, as this
355 		 * same document says, this special rule is for snooping
356 		 * switches and the RFC requires routers to discard MLD packets
357 		 * with the unspecified source address.  The RFC only talks
358 		 * about hosts receiving an MLD query or report in Security
359 		 * Considerations, but this is probably the correct intention.
360 		 * RFC3590 does not talk about other cases than link-local and
361 		 * the unspecified source addresses, but we believe the same
362 		 * rule should be applied.
363 		 * As a result, we only allow link-local addresses as the
364 		 * source address; otherwise, simply discard the packet.
365 		 */
366 #if 0
367 		/*
368 		 * XXX: do not log in an input path to avoid log flooding,
369 		 * though RFC3590 says "SHOULD log" if the source of a query
370 		 * is the unspecified address.
371 		 */
372 		log(LOG_INFO,
373 		    "mld_input: src %s is not link-local (grp=%s)\n",
374 		    ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&mldh->mld_addr));
375 #endif
376 		m_freem(m);
377 		return;
378 	}
379 
380 	/*
381 	 * make a copy for local work (in6_setscope() may modify the 1st arg)
382 	 */
383 	mld_addr = mldh->mld_addr;
384 	if (in6_setscope(&mld_addr, ifp, NULL)) {
385 		/* XXX: this should not happen! */
386 		m_free(m);
387 		return;
388 	}
389 
390 	/*
391 	 * In the MLD specification, there are 3 states and a flag.
392 	 *
393 	 * In Non-Listener state, we simply don't have a membership record.
394 	 * In Delaying Listener state, our timer is running (in6m->in6m_timer)
395 	 * In Idle Listener state, our timer is not running
396 	 * (in6m->in6m_timer==IN6M_TIMER_UNDEF)
397 	 *
398 	 * The flag is in6m->in6m_state, it is set to MLD_OTHERLISTENER if
399 	 * we have heard a report from another member, or MLD_IREPORTEDLAST
400 	 * if we sent the last report.
401 	 */
402 	switch (mldh->mld_type) {
403 	case MLD_LISTENER_QUERY:
404 		if (ifp->if_flags & IFF_LOOPBACK)
405 			break;
406 
407 		if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
408 		    !IN6_IS_ADDR_MULTICAST(&mld_addr))
409 			break;	/* print error or log stat? */
410 
411 		all_in6 = in6addr_linklocal_allnodes;
412 		if (in6_setscope(&all_in6, ifp, NULL)) {
413 			/* XXX: this should not happen! */
414 			break;
415 		}
416 
417 		/*
418 		 * - Start the timers in all of our membership records
419 		 *   that the query applies to for the interface on
420 		 *   which the query arrived excl. those that belong
421 		 *   to the "all-nodes" group (ff02::1).
422 		 * - Restart any timer that is already running but has
423 		 *   a value longer than the requested timeout.
424 		 * - Use the value specified in the query message as
425 		 *   the maximum timeout.
426 		 */
427 		timer = ntohs(mldh->mld_maxdelay);
428 
429 		IFP_TO_IA6(ifp, ia);
430 		if (ia == NULL)
431 			break;
432 
433 		LIST_FOREACH(in6m, &ia->ia6_multiaddrs, in6m_entry) {
434 			if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
435 			    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) <
436 			    IPV6_ADDR_SCOPE_LINKLOCAL)
437 				continue;
438 
439 			if (in6m->in6m_state == MLD_REPORTPENDING)
440 				continue; /* we are not yet ready */
441 
442 			if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
443 			    !IN6_ARE_ADDR_EQUAL(&mld_addr, &in6m->in6m_addr))
444 				continue;
445 
446 			if (timer == 0) {
447 				/* send a report immediately */
448 				mld_stoptimer(in6m);
449 				mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
450 				in6m->in6m_state = MLD_IREPORTEDLAST;
451 			} else if (in6m->in6m_timer == IN6M_TIMER_UNDEF ||
452 			    mld_timerresid(in6m) > timer) {
453 				in6m->in6m_timer =
454 				   1 + (cprng_fast32() % timer) * hz / 1000;
455 				mld_starttimer(in6m);
456 			}
457 		}
458 		break;
459 
460 	case MLD_LISTENER_REPORT:
461 		/*
462 		 * For fast leave to work, we have to know that we are the
463 		 * last person to send a report for this group.  Reports
464 		 * can potentially get looped back if we are a multicast
465 		 * router, so discard reports sourced by me.
466 		 * Note that it is impossible to check IFF_LOOPBACK flag of
467 		 * ifp for this purpose, since ip6_mloopback pass the physical
468 		 * interface to looutput.
469 		 */
470 		if (m->m_flags & M_LOOP) /* XXX: grotty flag, but efficient */
471 			break;
472 
473 		if (!IN6_IS_ADDR_MULTICAST(&mldh->mld_addr))
474 			break;
475 
476 		/*
477 		 * If we belong to the group being reported, stop
478 		 * our timer for that group.
479 		 */
480 		IN6_LOOKUP_MULTI(mld_addr, ifp, in6m);
481 		if (in6m) {
482 			mld_stoptimer(in6m); /* transit to idle state */
483 			in6m->in6m_state = MLD_OTHERLISTENER; /* clear flag */
484 		}
485 		break;
486 	default:		/* this is impossible */
487 #if 0
488 		/*
489 		 * this case should be impossible because of filtering in
490 		 * icmp6_input().  But we explicitly disabled this part
491 		 * just in case.
492 		 */
493 		log(LOG_ERR, "mld_input: illegal type(%d)", mldh->mld_type);
494 #endif
495 		break;
496 	}
497 
498 	m_freem(m);
499 }
500 
501 static void
502 mld_sendpkt(struct in6_multi *in6m, int type,
503 	const struct in6_addr *dst)
504 {
505 	struct mbuf *mh;
506 	struct mld_hdr *mldh;
507 	struct ip6_hdr *ip6 = NULL;
508 	struct ip6_moptions im6o;
509 	struct in6_ifaddr *ia = NULL;
510 	struct ifnet *ifp = in6m->in6m_ifp;
511 	int ignflags;
512 
513 	/*
514 	 * At first, find a link local address on the outgoing interface
515 	 * to use as the source address of the MLD packet.
516 	 * We do not reject tentative addresses for MLD report to deal with
517 	 * the case where we first join a link-local address.
518 	 */
519 	ignflags = (IN6_IFF_NOTREADY|IN6_IFF_ANYCAST) & ~IN6_IFF_TENTATIVE;
520 	if ((ia = in6ifa_ifpforlinklocal(ifp, ignflags)) == NULL)
521 		return;
522 	if ((ia->ia6_flags & IN6_IFF_TENTATIVE))
523 		ia = NULL;
524 
525 	/* Allocate two mbufs to store IPv6 header and MLD header */
526 	mldh = mld_allocbuf(&mh, sizeof(struct mld_hdr), in6m, type);
527 	if (mldh == NULL)
528 		return;
529 
530 	/* fill src/dst here */
531  	ip6 = mtod(mh, struct ip6_hdr *);
532  	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
533  	ip6->ip6_dst = dst ? *dst : in6m->in6m_addr;
534 
535 	mldh->mld_addr = in6m->in6m_addr;
536 	in6_clearscope(&mldh->mld_addr); /* XXX */
537 	mldh->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, sizeof(struct ip6_hdr),
538 	    sizeof(struct mld_hdr));
539 
540 	/* construct multicast option */
541 	memset(&im6o, 0, sizeof(im6o));
542 	im6o.im6o_multicast_ifp = ifp;
543 	im6o.im6o_multicast_hlim = 1;
544 
545 	/*
546 	 * Request loopback of the report if we are acting as a multicast
547 	 * router, so that the process-level routing daemon can hear it.
548 	 */
549 	im6o.im6o_multicast_loop = (ip6_mrouter != NULL);
550 
551 	/* increment output statictics */
552 	ICMP6_STATINC(ICMP6_STAT_OUTHIST + type);
553 	icmp6_ifstat_inc(ifp, ifs6_out_msg);
554 	switch (type) {
555 	case MLD_LISTENER_QUERY:
556 		icmp6_ifstat_inc(ifp, ifs6_out_mldquery);
557 		break;
558 	case MLD_LISTENER_REPORT:
559 		icmp6_ifstat_inc(ifp, ifs6_out_mldreport);
560 		break;
561 	case MLD_LISTENER_DONE:
562 		icmp6_ifstat_inc(ifp, ifs6_out_mlddone);
563 		break;
564 	}
565 
566 	ip6_output(mh, &ip6_opts, NULL, ia ? 0 : IPV6_UNSPECSRC,
567 	    &im6o, NULL, NULL);
568 }
569 
570 static struct mld_hdr *
571 mld_allocbuf(struct mbuf **mh, int len, struct in6_multi *in6m,
572     int type)
573 {
574 	struct mbuf *md;
575 	struct mld_hdr *mldh;
576 	struct ip6_hdr *ip6;
577 
578 	/*
579 	 * Allocate mbufs to store ip6 header and MLD header.
580 	 * We allocate 2 mbufs and make chain in advance because
581 	 * it is more convenient when inserting the hop-by-hop option later.
582 	 */
583 	MGETHDR(*mh, M_DONTWAIT, MT_HEADER);
584 	if (*mh == NULL)
585 		return NULL;
586 	MGET(md, M_DONTWAIT, MT_DATA);
587 	if (md == NULL) {
588 		m_free(*mh);
589 		*mh = NULL;
590 		return NULL;
591 	}
592 	(*mh)->m_next = md;
593 	md->m_next = NULL;
594 
595 	(*mh)->m_pkthdr.rcvif = NULL;
596 	(*mh)->m_pkthdr.len = sizeof(struct ip6_hdr) + len;
597 	(*mh)->m_len = sizeof(struct ip6_hdr);
598 	MH_ALIGN(*mh, sizeof(struct ip6_hdr));
599 
600 	/* fill in the ip6 header */
601 	ip6 = mtod(*mh, struct ip6_hdr *);
602 	memset(ip6, 0, sizeof(*ip6));
603 	ip6->ip6_flow = 0;
604 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
605 	ip6->ip6_vfc |= IPV6_VERSION;
606 	/* ip6_plen will be set later */
607 	ip6->ip6_nxt = IPPROTO_ICMPV6;
608 	/* ip6_hlim will be set by im6o.im6o_multicast_hlim */
609 	/* ip6_src/dst will be set by mld_sendpkt() or mld_sendbuf() */
610 
611 	/* fill in the MLD header as much as possible */
612 	md->m_len = len;
613 	mldh = mtod(md, struct mld_hdr *);
614 	memset(mldh, 0, len);
615 	mldh->mld_type = type;
616 	return mldh;
617 }
618 
619 /*
620  * Add an address to the list of IP6 multicast addresses for a given interface.
621  */
622 struct	in6_multi *
623 in6_addmulti(struct in6_addr *maddr6, struct ifnet *ifp,
624 	int *errorp, int timer)
625 {
626 	struct	in6_ifaddr *ia;
627 	struct	sockaddr_in6 sin6;
628 	struct	in6_multi *in6m;
629 	int	s = splsoftnet();
630 
631 	*errorp = 0;
632 
633 	/*
634 	 * See if address already in list.
635 	 */
636 	IN6_LOOKUP_MULTI(*maddr6, ifp, in6m);
637 	if (in6m != NULL) {
638 		/*
639 		 * Found it; just increment the refrence count.
640 		 */
641 		in6m->in6m_refcount++;
642 	} else {
643 		/*
644 		 * New address; allocate a new multicast record
645 		 * and link it into the interface's multicast list.
646 		 */
647 		in6m = (struct in6_multi *)
648 			malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT|M_ZERO);
649 		if (in6m == NULL) {
650 			splx(s);
651 			*errorp = ENOBUFS;
652 			return (NULL);
653 		}
654 
655 		in6m->in6m_addr = *maddr6;
656 		in6m->in6m_ifp = ifp;
657 		in6m->in6m_refcount = 1;
658 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
659 		IFP_TO_IA6(ifp, ia);
660 		if (ia == NULL) {
661 			free(in6m, M_IPMADDR);
662 			splx(s);
663 			*errorp = EADDRNOTAVAIL; /* appropriate? */
664 			return (NULL);
665 		}
666 		in6m->in6m_ia = ia;
667 		ifaref(&ia->ia_ifa); /* gain a reference */
668 		LIST_INSERT_HEAD(&ia->ia6_multiaddrs, in6m, in6m_entry);
669 
670 		/*
671 		 * Ask the network driver to update its multicast reception
672 		 * filter appropriately for the new address.
673 		 */
674 		sockaddr_in6_init(&sin6, maddr6, 0, 0, 0);
675 		*errorp = if_mcast_op(ifp, SIOCADDMULTI, sin6tosa(&sin6));
676 		if (*errorp) {
677 			LIST_REMOVE(in6m, in6m_entry);
678 			free(in6m, M_IPMADDR);
679 			ifafree(&ia->ia_ifa);
680 			splx(s);
681 			return (NULL);
682 		}
683 
684 		callout_init(&in6m->in6m_timer_ch, CALLOUT_MPSAFE);
685 		callout_setfunc(&in6m->in6m_timer_ch, mld_timeo, in6m);
686 		in6m->in6m_timer = timer;
687 		if (in6m->in6m_timer > 0) {
688 			in6m->in6m_state = MLD_REPORTPENDING;
689 			mld_starttimer(in6m);
690 
691 			splx(s);
692 			return (in6m);
693 		}
694 
695 		/*
696 		 * Let MLD6 know that we have joined a new IP6 multicast
697 		 * group.
698 		 */
699 		mld_start_listening(in6m);
700 	}
701 	splx(s);
702 	return (in6m);
703 }
704 
705 /*
706  * Delete a multicast address record.
707  */
708 void
709 in6_delmulti(struct in6_multi *in6m)
710 {
711 	struct	sockaddr_in6 sin6;
712 	struct	in6_ifaddr *ia;
713 	int	s = splsoftnet();
714 
715 	mld_stoptimer(in6m);
716 
717 	if (--in6m->in6m_refcount == 0) {
718 		/*
719 		 * No remaining claims to this record; let MLD6 know
720 		 * that we are leaving the multicast group.
721 		 */
722 		mld_stop_listening(in6m);
723 
724 		/*
725 		 * Unlink from list.
726 		 */
727 		LIST_REMOVE(in6m, in6m_entry);
728 		if (in6m->in6m_ia != NULL) {
729 			ifafree(&in6m->in6m_ia->ia_ifa); /* release reference */
730 			in6m->in6m_ia = NULL;
731 		}
732 
733 		/*
734 		 * Delete all references of this multicasting group from
735 		 * the membership arrays
736 		 */
737 		for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
738 			struct in6_multi_mship *imm;
739 			LIST_FOREACH(imm, &ia->ia6_memberships, i6mm_chain) {
740 				if (imm->i6mm_maddr == in6m)
741 					imm->i6mm_maddr = NULL;
742 			}
743 		}
744 
745 		/*
746 		 * Notify the network driver to update its multicast
747 		 * reception filter.
748 		 */
749 		sockaddr_in6_init(&sin6, &in6m->in6m_addr, 0, 0, 0);
750 		if_mcast_op(in6m->in6m_ifp, SIOCDELMULTI, sin6tosa(&sin6));
751 
752 		/* Tell mld_timeo we're halting the timer */
753 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
754 		callout_halt(&in6m->in6m_timer_ch, softnet_lock);
755 		callout_destroy(&in6m->in6m_timer_ch);
756 
757 		free(in6m, M_IPMADDR);
758 	}
759 	splx(s);
760 }
761 
762 
763 struct in6_multi_mship *
764 in6_joingroup(struct ifnet *ifp, struct in6_addr *addr,
765 	int *errorp, int timer)
766 {
767 	struct in6_multi_mship *imm;
768 
769 	imm = malloc(sizeof(*imm), M_IPMADDR, M_NOWAIT|M_ZERO);
770 	if (imm == NULL) {
771 		*errorp = ENOBUFS;
772 		return NULL;
773 	}
774 
775 	imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp, timer);
776 	if (!imm->i6mm_maddr) {
777 		/* *errorp is already set */
778 		free(imm, M_IPMADDR);
779 		return NULL;
780 	}
781 	return imm;
782 }
783 
784 int
785 in6_leavegroup(struct in6_multi_mship *imm)
786 {
787 
788 	if (imm->i6mm_maddr) {
789 		in6_delmulti(imm->i6mm_maddr);
790 	}
791 	free(imm, M_IPMADDR);
792 	return 0;
793 }
794 
795 
796 /*
797  * Multicast address kludge:
798  * If there were any multicast addresses attached to this interface address,
799  * either move them to another address on this interface, or save them until
800  * such time as this interface is reconfigured for IPv6.
801  */
802 void
803 in6_savemkludge(struct in6_ifaddr *oia)
804 {
805 	struct in6_ifaddr *ia;
806 	struct in6_multi *in6m;
807 
808 	IFP_TO_IA6(oia->ia_ifp, ia);
809 	if (ia) {	/* there is another address */
810 		KASSERT(ia != oia);
811 		while ((in6m = LIST_FIRST(&oia->ia6_multiaddrs)) != NULL) {
812 			LIST_REMOVE(in6m, in6m_entry);
813 			ifaref(&ia->ia_ifa);
814 			ifafree(&in6m->in6m_ia->ia_ifa);
815 			in6m->in6m_ia = ia;
816 			LIST_INSERT_HEAD(&ia->ia6_multiaddrs, in6m, in6m_entry);
817 		}
818 	} else {	/* last address on this if deleted, save */
819 		struct multi6_kludge *mk;
820 
821 		LIST_FOREACH(mk, &in6_mk, mk_entry) {
822 			if (mk->mk_ifp == oia->ia_ifp)
823 				break;
824 		}
825 		if (mk == NULL) /* this should not happen! */
826 			panic("in6_savemkludge: no kludge space");
827 
828 		while ((in6m = LIST_FIRST(&oia->ia6_multiaddrs)) != NULL) {
829 			LIST_REMOVE(in6m, in6m_entry);
830 			ifafree(&in6m->in6m_ia->ia_ifa); /* release reference */
831 			in6m->in6m_ia = NULL;
832 			LIST_INSERT_HEAD(&mk->mk_head, in6m, in6m_entry);
833 		}
834 	}
835 }
836 
837 /*
838  * Continuation of multicast address hack:
839  * If there was a multicast group list previously saved for this interface,
840  * then we re-attach it to the first address configured on the i/f.
841  */
842 void
843 in6_restoremkludge(struct in6_ifaddr *ia, struct ifnet *ifp)
844 {
845 	struct multi6_kludge *mk;
846 	struct in6_multi *in6m;
847 
848 	LIST_FOREACH(mk, &in6_mk, mk_entry) {
849 		if (mk->mk_ifp == ifp)
850 			break;
851 	}
852 	if (mk == NULL)
853 		return;
854 	while ((in6m = LIST_FIRST(&mk->mk_head)) != NULL) {
855 		LIST_REMOVE(in6m, in6m_entry);
856 		in6m->in6m_ia = ia;
857 		ifaref(&ia->ia_ifa);
858 		LIST_INSERT_HEAD(&ia->ia6_multiaddrs, in6m, in6m_entry);
859 	}
860 }
861 
862 /*
863  * Allocate space for the kludge at interface initialization time.
864  * Formerly, we dynamically allocated the space in in6_savemkludge() with
865  * malloc(M_WAITOK).  However, it was wrong since the function could be called
866  * under an interrupt context (software timer on address lifetime expiration).
867  * Also, we cannot just give up allocating the strucutre, since the group
868  * membership structure is very complex and we need to keep it anyway.
869  * Of course, this function MUST NOT be called under an interrupt context.
870  * Specifically, it is expected to be called only from in6_ifattach(), though
871  * it is a global function.
872  */
873 void
874 in6_createmkludge(struct ifnet *ifp)
875 {
876 	struct multi6_kludge *mk;
877 
878 	LIST_FOREACH(mk, &in6_mk, mk_entry) {
879 		/* If we've already had one, do not allocate. */
880 		if (mk->mk_ifp == ifp)
881 			return;
882 	}
883 
884 	mk = malloc(sizeof(*mk), M_IPMADDR, M_ZERO|M_WAITOK);
885 
886 	LIST_INIT(&mk->mk_head);
887 	mk->mk_ifp = ifp;
888 	LIST_INSERT_HEAD(&in6_mk, mk, mk_entry);
889 }
890 
891 void
892 in6_purgemkludge(struct ifnet *ifp)
893 {
894 	struct multi6_kludge *mk;
895 	struct in6_multi *in6m, *next;
896 
897 	LIST_FOREACH(mk, &in6_mk, mk_entry) {
898 		if (mk->mk_ifp == ifp)
899 			break;
900 	}
901 	if (mk == NULL)
902 		return;
903 
904 	/* leave from all multicast groups joined */
905 	for (in6m = LIST_FIRST(&mk->mk_head); in6m != NULL; in6m = next) {
906 		next = LIST_NEXT(in6m, in6m_entry);
907 		in6_delmulti(in6m);
908 	}
909 	LIST_REMOVE(mk, mk_entry);
910 	free(mk, M_IPMADDR);
911 }
912 
913 static int
914 in6_mkludge_sysctl(SYSCTLFN_ARGS)
915 {
916 	struct multi6_kludge *mk;
917 	struct in6_multi *in6m;
918 	int error;
919 	uint32_t tmp;
920 	size_t written;
921 
922 	if (namelen != 1)
923 		return EINVAL;
924 
925 	if (oldp == NULL) {
926 		*oldlenp = 0;
927 		LIST_FOREACH(mk, &in6_mk, mk_entry) {
928 			if (mk->mk_ifp->if_index == name[0])
929 				continue;
930 			LIST_FOREACH(in6m, &mk->mk_head, in6m_entry) {
931 				*oldlenp += sizeof(struct in6_addr) +
932 				    sizeof(uint32_t);
933 			}
934 		}
935 		return 0;
936 	}
937 
938 	error = 0;
939 	written = 0;
940 	LIST_FOREACH(mk, &in6_mk, mk_entry) {
941 		if (mk->mk_ifp->if_index == name[0])
942 			continue;
943 		LIST_FOREACH(in6m, &mk->mk_head, in6m_entry) {
944 			if (written + sizeof(struct in6_addr) +
945 			    sizeof(uint32_t) > *oldlenp)
946 				goto done;
947 			error = sysctl_copyout(l, &in6m->in6m_addr,
948 			    oldp, sizeof(struct in6_addr));
949 			if (error)
950 				goto done;
951 			oldp = (char *)oldp + sizeof(struct in6_addr);
952 			written += sizeof(struct in6_addr);
953 			tmp = in6m->in6m_refcount;
954 			error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
955 			if (error)
956 				goto done;
957 			oldp = (char *)oldp + sizeof(tmp);
958 			written += sizeof(tmp);
959 		}
960 	}
961 
962 done:
963 	*oldlenp = written;
964 	return error;
965 }
966 
967 static int
968 in6_multicast_sysctl(SYSCTLFN_ARGS)
969 {
970 	struct ifnet *ifp;
971 	struct ifaddr *ifa;
972 	struct in6_ifaddr *ifa6;
973 	struct in6_multi *in6m;
974 	uint32_t tmp;
975 	int error;
976 	size_t written;
977 
978 	if (namelen != 1)
979 		return EINVAL;
980 
981 	ifp = if_byindex(name[0]);
982 	if (ifp == NULL)
983 		return ENODEV;
984 
985 	if (oldp == NULL) {
986 		*oldlenp = 0;
987 		IFADDR_FOREACH(ifa, ifp) {
988 			if (ifa->ifa_addr == NULL)
989 				continue;
990 			if (ifa->ifa_addr->sa_family != AF_INET6)
991 				continue;
992 			ifa6 = (struct in6_ifaddr *)ifa;
993 			LIST_FOREACH(in6m, &ifa6->ia6_multiaddrs, in6m_entry) {
994 				*oldlenp += 2 * sizeof(struct in6_addr) +
995 				    sizeof(uint32_t);
996 			}
997 		}
998 		return 0;
999 	}
1000 
1001 	error = 0;
1002 	written = 0;
1003 	IFADDR_FOREACH(ifa, ifp) {
1004 		if (ifa->ifa_addr == NULL)
1005 			continue;
1006 		if (ifa->ifa_addr->sa_family != AF_INET6)
1007 			continue;
1008 		ifa6 = (struct in6_ifaddr *)ifa;
1009 		LIST_FOREACH(in6m, &ifa6->ia6_multiaddrs, in6m_entry) {
1010 			if (written + 2 * sizeof(struct in6_addr) +
1011 			    sizeof(uint32_t) > *oldlenp)
1012 				goto done;
1013 			error = sysctl_copyout(l, &ifa6->ia_addr.sin6_addr,
1014 			    oldp, sizeof(struct in6_addr));
1015 			if (error)
1016 				goto done;
1017 			oldp = (char *)oldp + sizeof(struct in6_addr);
1018 			written += sizeof(struct in6_addr);
1019 			error = sysctl_copyout(l, &in6m->in6m_addr,
1020 			    oldp, sizeof(struct in6_addr));
1021 			if (error)
1022 				goto done;
1023 			oldp = (char *)oldp + sizeof(struct in6_addr);
1024 			written += sizeof(struct in6_addr);
1025 			tmp = in6m->in6m_refcount;
1026 			error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
1027 			if (error)
1028 				goto done;
1029 			oldp = (char *)oldp + sizeof(tmp);
1030 			written += sizeof(tmp);
1031 		}
1032 	}
1033 done:
1034 	*oldlenp = written;
1035 	return error;
1036 }
1037 
1038 SYSCTL_SETUP(sysctl_in6_mklude_setup, "sysctl net.inet6.multicast_kludge subtree setup")
1039 {
1040 
1041 	sysctl_createv(clog, 0, NULL, NULL,
1042 		       CTLFLAG_PERMANENT,
1043 		       CTLTYPE_NODE, "inet6", NULL,
1044 		       NULL, 0, NULL, 0,
1045 		       CTL_NET, PF_INET6, CTL_EOL);
1046 
1047 	sysctl_createv(clog, 0, NULL, NULL,
1048 		       CTLFLAG_PERMANENT,
1049 		       CTLTYPE_NODE, "multicast",
1050 		       SYSCTL_DESCR("Multicast information"),
1051 		       in6_multicast_sysctl, 0, NULL, 0,
1052 		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1053 
1054 	sysctl_createv(clog, 0, NULL, NULL,
1055 		       CTLFLAG_PERMANENT,
1056 		       CTLTYPE_NODE, "multicast_kludge",
1057 		       SYSCTL_DESCR("multicast kludge information"),
1058 		       in6_mkludge_sysctl, 0, NULL, 0,
1059 		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1060 }
1061