xref: /netbsd-src/sys/netinet6/mld6.c (revision 2e2322c9c07009df921d11b1268f8506affbb8ba)
1 /*	$NetBSD: mld6.c,v 1.75 2016/11/18 06:50:04 knakahara Exp $	*/
2 /*	$KAME: mld6.c,v 1.25 2001/01/16 14:14:18 itojun Exp $	*/
3 
4 /*
5  * Copyright (C) 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1992, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * Stephen Deering of Stanford University.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
65  */
66 
67 /*
68  * Copyright (c) 1988 Stephen Deering.
69  *
70  * This code is derived from software contributed to Berkeley by
71  * Stephen Deering of Stanford University.
72  *
73  * Redistribution and use in source and binary forms, with or without
74  * modification, are permitted provided that the following conditions
75  * are met:
76  * 1. Redistributions of source code must retain the above copyright
77  *    notice, this list of conditions and the following disclaimer.
78  * 2. Redistributions in binary form must reproduce the above copyright
79  *    notice, this list of conditions and the following disclaimer in the
80  *    documentation and/or other materials provided with the distribution.
81  * 3. All advertising materials mentioning features or use of this software
82  *    must display the following acknowledgement:
83  *	This product includes software developed by the University of
84  *	California, Berkeley and its contributors.
85  * 4. Neither the name of the University nor the names of its contributors
86  *    may be used to endorse or promote products derived from this software
87  *    without specific prior written permission.
88  *
89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99  * SUCH DAMAGE.
100  *
101  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
102  */
103 
104 #include <sys/cdefs.h>
105 __KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.75 2016/11/18 06:50:04 knakahara Exp $");
106 
107 #ifdef _KERNEL_OPT
108 #include "opt_inet.h"
109 #include "opt_net_mpsafe.h"
110 #endif
111 
112 #include <sys/param.h>
113 #include <sys/systm.h>
114 #include <sys/mbuf.h>
115 #include <sys/socket.h>
116 #include <sys/socketvar.h>
117 #include <sys/protosw.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/kernel.h>
121 #include <sys/callout.h>
122 #include <sys/cprng.h>
123 
124 #include <net/if.h>
125 
126 #include <netinet/in.h>
127 #include <netinet/in_var.h>
128 #include <netinet6/in6_var.h>
129 #include <netinet/ip6.h>
130 #include <netinet6/ip6_var.h>
131 #include <netinet6/scope6_var.h>
132 #include <netinet/icmp6.h>
133 #include <netinet6/icmp6_private.h>
134 #include <netinet6/mld6_var.h>
135 
136 #include <net/net_osdep.h>
137 
138 
139 /*
140  * This structure is used to keep track of in6_multi chains which belong to
141  * deleted interface addresses.
142  */
143 static LIST_HEAD(, multi6_kludge) in6_mk = LIST_HEAD_INITIALIZER(in6_mk);
144 
145 struct multi6_kludge {
146 	LIST_ENTRY(multi6_kludge) mk_entry;
147 	struct ifnet *mk_ifp;
148 	struct in6_multihead mk_head;
149 };
150 
151 
152 /*
153  * Protocol constants
154  */
155 
156 /*
157  * time between repetitions of a node's initial report of interest in a
158  * multicast address(in seconds)
159  */
160 #define MLD_UNSOLICITED_REPORT_INTERVAL	10
161 
162 static struct ip6_pktopts ip6_opts;
163 
164 static void mld_start_listening(struct in6_multi *);
165 static void mld_stop_listening(struct in6_multi *);
166 
167 static struct mld_hdr * mld_allocbuf(struct mbuf **, int, struct in6_multi *,
168 	int);
169 static void mld_sendpkt(struct in6_multi *, int, const struct in6_addr *);
170 static void mld_starttimer(struct in6_multi *);
171 static void mld_stoptimer(struct in6_multi *);
172 static u_long mld_timerresid(struct in6_multi *);
173 
174 void
175 mld_init(void)
176 {
177 	static u_int8_t hbh_buf[8];
178 	struct ip6_hbh *hbh = (struct ip6_hbh *)hbh_buf;
179 	u_int16_t rtalert_code = htons((u_int16_t)IP6OPT_RTALERT_MLD);
180 
181 	/* ip6h_nxt will be fill in later */
182 	hbh->ip6h_len = 0;	/* (8 >> 3) - 1 */
183 
184 	/* XXX: grotty hard coding... */
185 	hbh_buf[2] = IP6OPT_PADN;	/* 2 byte padding */
186 	hbh_buf[3] = 0;
187 	hbh_buf[4] = IP6OPT_RTALERT;
188 	hbh_buf[5] = IP6OPT_RTALERT_LEN - 2;
189 	memcpy(&hbh_buf[6], (void *)&rtalert_code, sizeof(u_int16_t));
190 
191 	ip6_opts.ip6po_hbh = hbh;
192 	/* We will specify the hoplimit by a multicast option. */
193 	ip6_opts.ip6po_hlim = -1;
194 	ip6_opts.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
195 }
196 
197 static void
198 mld_starttimer(struct in6_multi *in6m)
199 {
200 	struct timeval now;
201 
202 	KASSERT(in6m->in6m_timer != IN6M_TIMER_UNDEF);
203 
204 	microtime(&now);
205 	in6m->in6m_timer_expire.tv_sec = now.tv_sec + in6m->in6m_timer / hz;
206 	in6m->in6m_timer_expire.tv_usec = now.tv_usec +
207 	    (in6m->in6m_timer % hz) * (1000000 / hz);
208 	if (in6m->in6m_timer_expire.tv_usec > 1000000) {
209 		in6m->in6m_timer_expire.tv_sec++;
210 		in6m->in6m_timer_expire.tv_usec -= 1000000;
211 	}
212 
213 	/* start or restart the timer */
214 	callout_schedule(&in6m->in6m_timer_ch, in6m->in6m_timer);
215 }
216 
217 static void
218 mld_stoptimer(struct in6_multi *in6m)
219 {
220 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
221 		return;
222 
223 	callout_stop(&in6m->in6m_timer_ch);
224 
225 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
226 }
227 
228 static void
229 mld_timeo(void *arg)
230 {
231 	struct in6_multi *in6m = arg;
232 
233 	mutex_enter(softnet_lock);
234 	KERNEL_LOCK(1, NULL);
235 
236 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
237 		goto out;
238 
239 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
240 
241 	switch (in6m->in6m_state) {
242 	case MLD_REPORTPENDING:
243 		mld_start_listening(in6m);
244 		break;
245 	default:
246 		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
247 		break;
248 	}
249 
250 out:
251 	KERNEL_UNLOCK_ONE(NULL);
252 	mutex_exit(softnet_lock);
253 }
254 
255 static u_long
256 mld_timerresid(struct in6_multi *in6m)
257 {
258 	struct timeval now, diff;
259 
260 	microtime(&now);
261 
262 	if (now.tv_sec > in6m->in6m_timer_expire.tv_sec ||
263 	    (now.tv_sec == in6m->in6m_timer_expire.tv_sec &&
264 	    now.tv_usec > in6m->in6m_timer_expire.tv_usec)) {
265 		return (0);
266 	}
267 	diff = in6m->in6m_timer_expire;
268 	diff.tv_sec -= now.tv_sec;
269 	diff.tv_usec -= now.tv_usec;
270 	if (diff.tv_usec < 0) {
271 		diff.tv_sec--;
272 		diff.tv_usec += 1000000;
273 	}
274 
275 	/* return the remaining time in milliseconds */
276 	return diff.tv_sec * 1000 + diff.tv_usec / 1000;
277 }
278 
279 static void
280 mld_start_listening(struct in6_multi *in6m)
281 {
282 	struct in6_addr all_in6;
283 
284 	/*
285 	 * RFC2710 page 10:
286 	 * The node never sends a Report or Done for the link-scope all-nodes
287 	 * address.
288 	 * MLD messages are never sent for multicast addresses whose scope is 0
289 	 * (reserved) or 1 (node-local).
290 	 */
291 	all_in6 = in6addr_linklocal_allnodes;
292 	if (in6_setscope(&all_in6, in6m->in6m_ifp, NULL)) {
293 		/* XXX: this should not happen! */
294 		in6m->in6m_timer = 0;
295 		in6m->in6m_state = MLD_OTHERLISTENER;
296 	}
297 	if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
298 	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) {
299 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
300 		in6m->in6m_state = MLD_OTHERLISTENER;
301 	} else {
302 		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
303 		in6m->in6m_timer = cprng_fast32() %
304 		    (MLD_UNSOLICITED_REPORT_INTERVAL * hz);
305 		in6m->in6m_state = MLD_IREPORTEDLAST;
306 
307 		mld_starttimer(in6m);
308 	}
309 }
310 
311 static void
312 mld_stop_listening(struct in6_multi *in6m)
313 {
314 	struct in6_addr allnode, allrouter;
315 
316 	allnode = in6addr_linklocal_allnodes;
317 	if (in6_setscope(&allnode, in6m->in6m_ifp, NULL)) {
318 		/* XXX: this should not happen! */
319 		return;
320 	}
321 	allrouter = in6addr_linklocal_allrouters;
322 	if (in6_setscope(&allrouter, in6m->in6m_ifp, NULL)) {
323 		/* XXX impossible */
324 		return;
325 	}
326 
327 	if (in6m->in6m_state == MLD_IREPORTEDLAST &&
328 	    (!IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &allnode)) &&
329 	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) >
330 	    IPV6_ADDR_SCOPE_INTFACELOCAL) {
331 		mld_sendpkt(in6m, MLD_LISTENER_DONE, &allrouter);
332 	}
333 }
334 
335 void
336 mld_input(struct mbuf *m, int off)
337 {
338 	struct ip6_hdr *ip6;
339 	struct mld_hdr *mldh;
340 	struct ifnet *ifp;
341 	struct in6_multi *in6m = NULL;
342 	struct in6_addr mld_addr, all_in6;
343 	struct in6_ifaddr *ia;
344 	u_long timer = 0;	/* timer value in the MLD query header */
345 	int s;
346 
347 	ifp = m_get_rcvif(m, &s);
348 	IP6_EXTHDR_GET(mldh, struct mld_hdr *, m, off, sizeof(*mldh));
349 	if (mldh == NULL) {
350 		ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
351 		goto out_nodrop;
352 	}
353 
354 	/* source address validation */
355 	ip6 = mtod(m, struct ip6_hdr *);/* in case mpullup */
356 	if (!IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) {
357 		/*
358 		 * RFC3590 allows the IPv6 unspecified address as the source
359 		 * address of MLD report and done messages.  However, as this
360 		 * same document says, this special rule is for snooping
361 		 * switches and the RFC requires routers to discard MLD packets
362 		 * with the unspecified source address.  The RFC only talks
363 		 * about hosts receiving an MLD query or report in Security
364 		 * Considerations, but this is probably the correct intention.
365 		 * RFC3590 does not talk about other cases than link-local and
366 		 * the unspecified source addresses, but we believe the same
367 		 * rule should be applied.
368 		 * As a result, we only allow link-local addresses as the
369 		 * source address; otherwise, simply discard the packet.
370 		 */
371 #if 0
372 		/*
373 		 * XXX: do not log in an input path to avoid log flooding,
374 		 * though RFC3590 says "SHOULD log" if the source of a query
375 		 * is the unspecified address.
376 		 */
377 		log(LOG_INFO,
378 		    "mld_input: src %s is not link-local (grp=%s)\n",
379 		    ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&mldh->mld_addr));
380 #endif
381 		goto out;
382 	}
383 
384 	/*
385 	 * make a copy for local work (in6_setscope() may modify the 1st arg)
386 	 */
387 	mld_addr = mldh->mld_addr;
388 	if (in6_setscope(&mld_addr, ifp, NULL)) {
389 		/* XXX: this should not happen! */
390 		goto out;
391 	}
392 
393 	/*
394 	 * In the MLD specification, there are 3 states and a flag.
395 	 *
396 	 * In Non-Listener state, we simply don't have a membership record.
397 	 * In Delaying Listener state, our timer is running (in6m->in6m_timer)
398 	 * In Idle Listener state, our timer is not running
399 	 * (in6m->in6m_timer==IN6M_TIMER_UNDEF)
400 	 *
401 	 * The flag is in6m->in6m_state, it is set to MLD_OTHERLISTENER if
402 	 * we have heard a report from another member, or MLD_IREPORTEDLAST
403 	 * if we sent the last report.
404 	 */
405 	switch (mldh->mld_type) {
406 	case MLD_LISTENER_QUERY: {
407 		struct psref psref;
408 
409 		if (ifp->if_flags & IFF_LOOPBACK)
410 			break;
411 
412 		if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
413 		    !IN6_IS_ADDR_MULTICAST(&mld_addr))
414 			break;	/* print error or log stat? */
415 
416 		all_in6 = in6addr_linklocal_allnodes;
417 		if (in6_setscope(&all_in6, ifp, NULL)) {
418 			/* XXX: this should not happen! */
419 			break;
420 		}
421 
422 		/*
423 		 * - Start the timers in all of our membership records
424 		 *   that the query applies to for the interface on
425 		 *   which the query arrived excl. those that belong
426 		 *   to the "all-nodes" group (ff02::1).
427 		 * - Restart any timer that is already running but has
428 		 *   a value longer than the requested timeout.
429 		 * - Use the value specified in the query message as
430 		 *   the maximum timeout.
431 		 */
432 		timer = ntohs(mldh->mld_maxdelay);
433 
434 		ia = in6_get_ia_from_ifp_psref(ifp, &psref);
435 		if (ia == NULL)
436 			break;
437 
438 		/* The following operations may sleep */
439 		m_put_rcvif(ifp, &s);
440 		ifp = NULL;
441 
442 		LIST_FOREACH(in6m, &ia->ia6_multiaddrs, in6m_entry) {
443 			if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
444 			    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) <
445 			    IPV6_ADDR_SCOPE_LINKLOCAL)
446 				continue;
447 
448 			if (in6m->in6m_state == MLD_REPORTPENDING)
449 				continue; /* we are not yet ready */
450 
451 			if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
452 			    !IN6_ARE_ADDR_EQUAL(&mld_addr, &in6m->in6m_addr))
453 				continue;
454 
455 			if (timer == 0) {
456 				/* send a report immediately */
457 				mld_stoptimer(in6m);
458 				mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
459 				in6m->in6m_state = MLD_IREPORTEDLAST;
460 			} else if (in6m->in6m_timer == IN6M_TIMER_UNDEF ||
461 			    mld_timerresid(in6m) > timer) {
462 				in6m->in6m_timer =
463 				   1 + (cprng_fast32() % timer) * hz / 1000;
464 				mld_starttimer(in6m);
465 			}
466 		}
467 		ia6_release(ia, &psref);
468 		break;
469 	    }
470 
471 	case MLD_LISTENER_REPORT:
472 		/*
473 		 * For fast leave to work, we have to know that we are the
474 		 * last person to send a report for this group.  Reports
475 		 * can potentially get looped back if we are a multicast
476 		 * router, so discard reports sourced by me.
477 		 * Note that it is impossible to check IFF_LOOPBACK flag of
478 		 * ifp for this purpose, since ip6_mloopback pass the physical
479 		 * interface to looutput.
480 		 */
481 		if (m->m_flags & M_LOOP) /* XXX: grotty flag, but efficient */
482 			break;
483 
484 		if (!IN6_IS_ADDR_MULTICAST(&mldh->mld_addr))
485 			break;
486 
487 		/*
488 		 * If we belong to the group being reported, stop
489 		 * our timer for that group.
490 		 */
491 		IN6_LOOKUP_MULTI(mld_addr, ifp, in6m);
492 		if (in6m) {
493 			mld_stoptimer(in6m); /* transit to idle state */
494 			in6m->in6m_state = MLD_OTHERLISTENER; /* clear flag */
495 		}
496 		break;
497 	default:		/* this is impossible */
498 #if 0
499 		/*
500 		 * this case should be impossible because of filtering in
501 		 * icmp6_input().  But we explicitly disabled this part
502 		 * just in case.
503 		 */
504 		log(LOG_ERR, "mld_input: illegal type(%d)", mldh->mld_type);
505 #endif
506 		break;
507 	}
508 
509 out:
510 	m_freem(m);
511 out_nodrop:
512 	m_put_rcvif(ifp, &s);
513 }
514 
515 static void
516 mld_sendpkt(struct in6_multi *in6m, int type,
517 	const struct in6_addr *dst)
518 {
519 	struct mbuf *mh;
520 	struct mld_hdr *mldh;
521 	struct ip6_hdr *ip6 = NULL;
522 	struct ip6_moptions im6o;
523 	struct in6_ifaddr *ia = NULL;
524 	struct ifnet *ifp = in6m->in6m_ifp;
525 	int ignflags;
526 	struct psref psref;
527 	int bound;
528 
529 	/*
530 	 * At first, find a link local address on the outgoing interface
531 	 * to use as the source address of the MLD packet.
532 	 * We do not reject tentative addresses for MLD report to deal with
533 	 * the case where we first join a link-local address.
534 	 */
535 	ignflags = (IN6_IFF_NOTREADY|IN6_IFF_ANYCAST) & ~IN6_IFF_TENTATIVE;
536 	bound = curlwp_bind();
537 	ia = in6ifa_ifpforlinklocal_psref(ifp, ignflags, &psref);
538 	if (ia == NULL) {
539 		curlwp_bindx(bound);
540 		return;
541 	}
542 	if ((ia->ia6_flags & IN6_IFF_TENTATIVE)) {
543 		ia6_release(ia, &psref);
544 		ia = NULL;
545 	}
546 
547 	/* Allocate two mbufs to store IPv6 header and MLD header */
548 	mldh = mld_allocbuf(&mh, sizeof(struct mld_hdr), in6m, type);
549 	if (mldh == NULL) {
550 		ia6_release(ia, &psref);
551 		curlwp_bindx(bound);
552 		return;
553 	}
554 
555 	/* fill src/dst here */
556  	ip6 = mtod(mh, struct ip6_hdr *);
557  	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
558  	ip6->ip6_dst = dst ? *dst : in6m->in6m_addr;
559 	ia6_release(ia, &psref);
560 	curlwp_bindx(bound);
561 
562 	mldh->mld_addr = in6m->in6m_addr;
563 	in6_clearscope(&mldh->mld_addr); /* XXX */
564 	mldh->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, sizeof(struct ip6_hdr),
565 	    sizeof(struct mld_hdr));
566 
567 	/* construct multicast option */
568 	memset(&im6o, 0, sizeof(im6o));
569 	im6o.im6o_multicast_if_index = if_get_index(ifp);
570 	im6o.im6o_multicast_hlim = 1;
571 
572 	/*
573 	 * Request loopback of the report if we are acting as a multicast
574 	 * router, so that the process-level routing daemon can hear it.
575 	 */
576 	im6o.im6o_multicast_loop = (ip6_mrouter != NULL);
577 
578 	/* increment output statictics */
579 	ICMP6_STATINC(ICMP6_STAT_OUTHIST + type);
580 	icmp6_ifstat_inc(ifp, ifs6_out_msg);
581 	switch (type) {
582 	case MLD_LISTENER_QUERY:
583 		icmp6_ifstat_inc(ifp, ifs6_out_mldquery);
584 		break;
585 	case MLD_LISTENER_REPORT:
586 		icmp6_ifstat_inc(ifp, ifs6_out_mldreport);
587 		break;
588 	case MLD_LISTENER_DONE:
589 		icmp6_ifstat_inc(ifp, ifs6_out_mlddone);
590 		break;
591 	}
592 
593 	ip6_output(mh, &ip6_opts, NULL, ia ? 0 : IPV6_UNSPECSRC,
594 	    &im6o, NULL, NULL);
595 }
596 
597 static struct mld_hdr *
598 mld_allocbuf(struct mbuf **mh, int len, struct in6_multi *in6m,
599     int type)
600 {
601 	struct mbuf *md;
602 	struct mld_hdr *mldh;
603 	struct ip6_hdr *ip6;
604 
605 	/*
606 	 * Allocate mbufs to store ip6 header and MLD header.
607 	 * We allocate 2 mbufs and make chain in advance because
608 	 * it is more convenient when inserting the hop-by-hop option later.
609 	 */
610 	MGETHDR(*mh, M_DONTWAIT, MT_HEADER);
611 	if (*mh == NULL)
612 		return NULL;
613 	MGET(md, M_DONTWAIT, MT_DATA);
614 	if (md == NULL) {
615 		m_free(*mh);
616 		*mh = NULL;
617 		return NULL;
618 	}
619 	(*mh)->m_next = md;
620 	md->m_next = NULL;
621 
622 	m_reset_rcvif((*mh));
623 	(*mh)->m_pkthdr.len = sizeof(struct ip6_hdr) + len;
624 	(*mh)->m_len = sizeof(struct ip6_hdr);
625 	MH_ALIGN(*mh, sizeof(struct ip6_hdr));
626 
627 	/* fill in the ip6 header */
628 	ip6 = mtod(*mh, struct ip6_hdr *);
629 	memset(ip6, 0, sizeof(*ip6));
630 	ip6->ip6_flow = 0;
631 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
632 	ip6->ip6_vfc |= IPV6_VERSION;
633 	/* ip6_plen will be set later */
634 	ip6->ip6_nxt = IPPROTO_ICMPV6;
635 	/* ip6_hlim will be set by im6o.im6o_multicast_hlim */
636 	/* ip6_src/dst will be set by mld_sendpkt() or mld_sendbuf() */
637 
638 	/* fill in the MLD header as much as possible */
639 	md->m_len = len;
640 	mldh = mtod(md, struct mld_hdr *);
641 	memset(mldh, 0, len);
642 	mldh->mld_type = type;
643 	return mldh;
644 }
645 
646 /*
647  * Add an address to the list of IP6 multicast addresses for a given interface.
648  */
649 struct	in6_multi *
650 in6_addmulti(struct in6_addr *maddr6, struct ifnet *ifp,
651 	int *errorp, int timer)
652 {
653 	struct	in6_ifaddr *ia;
654 	struct	sockaddr_in6 sin6;
655 	struct	in6_multi *in6m;
656 	int	s = splsoftnet();
657 
658 	*errorp = 0;
659 
660 	/*
661 	 * See if address already in list.
662 	 */
663 	IN6_LOOKUP_MULTI(*maddr6, ifp, in6m);
664 	if (in6m != NULL) {
665 		/*
666 		 * Found it; just increment the refrence count.
667 		 */
668 		in6m->in6m_refcount++;
669 	} else {
670 		int _s;
671 		/*
672 		 * New address; allocate a new multicast record
673 		 * and link it into the interface's multicast list.
674 		 */
675 		in6m = (struct in6_multi *)
676 			malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT|M_ZERO);
677 		if (in6m == NULL) {
678 			splx(s);
679 			*errorp = ENOBUFS;
680 			return (NULL);
681 		}
682 
683 		in6m->in6m_addr = *maddr6;
684 		in6m->in6m_ifp = ifp;
685 		in6m->in6m_refcount = 1;
686 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
687 		callout_init(&in6m->in6m_timer_ch, CALLOUT_MPSAFE);
688 		callout_setfunc(&in6m->in6m_timer_ch, mld_timeo, in6m);
689 
690 		_s = pserialize_read_enter();
691 		ia = in6_get_ia_from_ifp(ifp);
692 		if (ia == NULL) {
693 			pserialize_read_exit(_s);
694 			callout_destroy(&in6m->in6m_timer_ch);
695 			free(in6m, M_IPMADDR);
696 			splx(s);
697 			*errorp = EADDRNOTAVAIL; /* appropriate? */
698 			return (NULL);
699 		}
700 		in6m->in6m_ia = ia;
701 		ifaref(&ia->ia_ifa); /* gain a reference */
702 		/* FIXME NOMPSAFE: need to lock */
703 		LIST_INSERT_HEAD(&ia->ia6_multiaddrs, in6m, in6m_entry);
704 		pserialize_read_exit(_s);
705 
706 		/*
707 		 * Ask the network driver to update its multicast reception
708 		 * filter appropriately for the new address.
709 		 */
710 		sockaddr_in6_init(&sin6, maddr6, 0, 0, 0);
711 		*errorp = if_mcast_op(ifp, SIOCADDMULTI, sin6tosa(&sin6));
712 		if (*errorp) {
713 			callout_destroy(&in6m->in6m_timer_ch);
714 			LIST_REMOVE(in6m, in6m_entry);
715 			free(in6m, M_IPMADDR);
716 			ifafree(&ia->ia_ifa);
717 			splx(s);
718 			return (NULL);
719 		}
720 
721 		in6m->in6m_timer = timer;
722 		if (in6m->in6m_timer > 0) {
723 			in6m->in6m_state = MLD_REPORTPENDING;
724 			mld_starttimer(in6m);
725 
726 			splx(s);
727 			return (in6m);
728 		}
729 
730 		/*
731 		 * Let MLD6 know that we have joined a new IP6 multicast
732 		 * group.
733 		 */
734 		mld_start_listening(in6m);
735 	}
736 	splx(s);
737 	return (in6m);
738 }
739 
740 /*
741  * Delete a multicast address record.
742  */
743 void
744 in6_delmulti(struct in6_multi *in6m)
745 {
746 	struct	sockaddr_in6 sin6;
747 	struct	in6_ifaddr *ia;
748 	int	s = splsoftnet();
749 
750 	mld_stoptimer(in6m);
751 
752 	if (--in6m->in6m_refcount == 0) {
753 		int _s;
754 
755 		/*
756 		 * No remaining claims to this record; let MLD6 know
757 		 * that we are leaving the multicast group.
758 		 */
759 		mld_stop_listening(in6m);
760 
761 		/*
762 		 * Unlink from list.
763 		 */
764 		LIST_REMOVE(in6m, in6m_entry);
765 		if (in6m->in6m_ia != NULL) {
766 			ifafree(&in6m->in6m_ia->ia_ifa); /* release reference */
767 			in6m->in6m_ia = NULL;
768 		}
769 
770 		/*
771 		 * Delete all references of this multicasting group from
772 		 * the membership arrays
773 		 */
774 		_s = pserialize_read_enter();
775 		IN6_ADDRLIST_READER_FOREACH(ia) {
776 			struct in6_multi_mship *imm;
777 			LIST_FOREACH(imm, &ia->ia6_memberships, i6mm_chain) {
778 				if (imm->i6mm_maddr == in6m)
779 					imm->i6mm_maddr = NULL;
780 			}
781 		}
782 		pserialize_read_exit(_s);
783 
784 		/*
785 		 * Notify the network driver to update its multicast
786 		 * reception filter.
787 		 */
788 		sockaddr_in6_init(&sin6, &in6m->in6m_addr, 0, 0, 0);
789 		if_mcast_op(in6m->in6m_ifp, SIOCDELMULTI, sin6tosa(&sin6));
790 
791 		/* Tell mld_timeo we're halting the timer */
792 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
793 #ifdef NET_MPSAFE
794 		callout_halt(&in6m->in6m_timer_ch, NULL);
795 #else
796 		callout_halt(&in6m->in6m_timer_ch, softnet_lock);
797 #endif
798 		callout_destroy(&in6m->in6m_timer_ch);
799 
800 		free(in6m, M_IPMADDR);
801 	}
802 	splx(s);
803 }
804 
805 
806 struct in6_multi_mship *
807 in6_joingroup(struct ifnet *ifp, struct in6_addr *addr,
808 	int *errorp, int timer)
809 {
810 	struct in6_multi_mship *imm;
811 
812 	imm = malloc(sizeof(*imm), M_IPMADDR, M_NOWAIT|M_ZERO);
813 	if (imm == NULL) {
814 		*errorp = ENOBUFS;
815 		return NULL;
816 	}
817 
818 	imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp, timer);
819 	if (!imm->i6mm_maddr) {
820 		/* *errorp is already set */
821 		free(imm, M_IPMADDR);
822 		return NULL;
823 	}
824 	return imm;
825 }
826 
827 int
828 in6_leavegroup(struct in6_multi_mship *imm)
829 {
830 
831 	if (imm->i6mm_maddr) {
832 		in6_delmulti(imm->i6mm_maddr);
833 	}
834 	free(imm, M_IPMADDR);
835 	return 0;
836 }
837 
838 
839 /*
840  * Multicast address kludge:
841  * If there were any multicast addresses attached to this interface address,
842  * either move them to another address on this interface, or save them until
843  * such time as this interface is reconfigured for IPv6.
844  */
845 void
846 in6_savemkludge(struct in6_ifaddr *oia)
847 {
848 	struct in6_ifaddr *ia;
849 	struct in6_multi *in6m;
850 	int s;
851 
852 	s = pserialize_read_enter();
853 	ia = in6_get_ia_from_ifp(oia->ia_ifp);
854 	if (ia) {	/* there is another address */
855 		KASSERT(ia != oia);
856 		while ((in6m = LIST_FIRST(&oia->ia6_multiaddrs)) != NULL) {
857 			LIST_REMOVE(in6m, in6m_entry);
858 			ifaref(&ia->ia_ifa);
859 			ifafree(&in6m->in6m_ia->ia_ifa);
860 			in6m->in6m_ia = ia;
861 			/* FIXME NOMPSAFE: need to lock */
862 			LIST_INSERT_HEAD(&ia->ia6_multiaddrs, in6m, in6m_entry);
863 		}
864 	} else {	/* last address on this if deleted, save */
865 		struct multi6_kludge *mk;
866 
867 		LIST_FOREACH(mk, &in6_mk, mk_entry) {
868 			if (mk->mk_ifp == oia->ia_ifp)
869 				break;
870 		}
871 		if (mk == NULL) /* this should not happen! */
872 			panic("in6_savemkludge: no kludge space");
873 
874 		while ((in6m = LIST_FIRST(&oia->ia6_multiaddrs)) != NULL) {
875 			LIST_REMOVE(in6m, in6m_entry);
876 			ifafree(&in6m->in6m_ia->ia_ifa); /* release reference */
877 			in6m->in6m_ia = NULL;
878 			LIST_INSERT_HEAD(&mk->mk_head, in6m, in6m_entry);
879 		}
880 	}
881 	pserialize_read_exit(s);
882 }
883 
884 /*
885  * Continuation of multicast address hack:
886  * If there was a multicast group list previously saved for this interface,
887  * then we re-attach it to the first address configured on the i/f.
888  */
889 void
890 in6_restoremkludge(struct in6_ifaddr *ia, struct ifnet *ifp)
891 {
892 	struct multi6_kludge *mk;
893 	struct in6_multi *in6m;
894 
895 	LIST_FOREACH(mk, &in6_mk, mk_entry) {
896 		if (mk->mk_ifp == ifp)
897 			break;
898 	}
899 	if (mk == NULL)
900 		return;
901 	while ((in6m = LIST_FIRST(&mk->mk_head)) != NULL) {
902 		LIST_REMOVE(in6m, in6m_entry);
903 		in6m->in6m_ia = ia;
904 		ifaref(&ia->ia_ifa);
905 		LIST_INSERT_HEAD(&ia->ia6_multiaddrs, in6m, in6m_entry);
906 	}
907 }
908 
909 /*
910  * Allocate space for the kludge at interface initialization time.
911  * Formerly, we dynamically allocated the space in in6_savemkludge() with
912  * malloc(M_WAITOK).  However, it was wrong since the function could be called
913  * under an interrupt context (software timer on address lifetime expiration).
914  * Also, we cannot just give up allocating the strucutre, since the group
915  * membership structure is very complex and we need to keep it anyway.
916  * Of course, this function MUST NOT be called under an interrupt context.
917  * Specifically, it is expected to be called only from in6_ifattach(), though
918  * it is a global function.
919  */
920 void
921 in6_createmkludge(struct ifnet *ifp)
922 {
923 	struct multi6_kludge *mk;
924 
925 	LIST_FOREACH(mk, &in6_mk, mk_entry) {
926 		/* If we've already had one, do not allocate. */
927 		if (mk->mk_ifp == ifp)
928 			return;
929 	}
930 
931 	mk = malloc(sizeof(*mk), M_IPMADDR, M_ZERO|M_WAITOK);
932 
933 	LIST_INIT(&mk->mk_head);
934 	mk->mk_ifp = ifp;
935 	LIST_INSERT_HEAD(&in6_mk, mk, mk_entry);
936 }
937 
938 void
939 in6_purgemkludge(struct ifnet *ifp)
940 {
941 	struct multi6_kludge *mk;
942 	struct in6_multi *in6m, *next;
943 
944 	LIST_FOREACH(mk, &in6_mk, mk_entry) {
945 		if (mk->mk_ifp == ifp)
946 			break;
947 	}
948 	if (mk == NULL)
949 		return;
950 
951 	/* leave from all multicast groups joined */
952 	for (in6m = LIST_FIRST(&mk->mk_head); in6m != NULL; in6m = next) {
953 		next = LIST_NEXT(in6m, in6m_entry);
954 		in6_delmulti(in6m);
955 	}
956 	LIST_REMOVE(mk, mk_entry);
957 	free(mk, M_IPMADDR);
958 }
959 
960 static int
961 in6_mkludge_sysctl(SYSCTLFN_ARGS)
962 {
963 	struct multi6_kludge *mk;
964 	struct in6_multi *in6m;
965 	int error;
966 	uint32_t tmp;
967 	size_t written;
968 
969 	if (namelen != 1)
970 		return EINVAL;
971 
972 	if (oldp == NULL) {
973 		*oldlenp = 0;
974 		LIST_FOREACH(mk, &in6_mk, mk_entry) {
975 			if (mk->mk_ifp->if_index == name[0])
976 				continue;
977 			LIST_FOREACH(in6m, &mk->mk_head, in6m_entry) {
978 				*oldlenp += sizeof(struct in6_addr) +
979 				    sizeof(uint32_t);
980 			}
981 		}
982 		return 0;
983 	}
984 
985 	error = 0;
986 	written = 0;
987 	LIST_FOREACH(mk, &in6_mk, mk_entry) {
988 		if (mk->mk_ifp->if_index == name[0])
989 			continue;
990 		LIST_FOREACH(in6m, &mk->mk_head, in6m_entry) {
991 			if (written + sizeof(struct in6_addr) +
992 			    sizeof(uint32_t) > *oldlenp)
993 				goto done;
994 			error = sysctl_copyout(l, &in6m->in6m_addr,
995 			    oldp, sizeof(struct in6_addr));
996 			if (error)
997 				goto done;
998 			oldp = (char *)oldp + sizeof(struct in6_addr);
999 			written += sizeof(struct in6_addr);
1000 			tmp = in6m->in6m_refcount;
1001 			error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
1002 			if (error)
1003 				goto done;
1004 			oldp = (char *)oldp + sizeof(tmp);
1005 			written += sizeof(tmp);
1006 		}
1007 	}
1008 
1009 done:
1010 	*oldlenp = written;
1011 	return error;
1012 }
1013 
1014 static int
1015 in6_multicast_sysctl(SYSCTLFN_ARGS)
1016 {
1017 	struct ifnet *ifp;
1018 	struct ifaddr *ifa;
1019 	struct in6_ifaddr *ifa6;
1020 	struct in6_multi *in6m;
1021 	uint32_t tmp;
1022 	int error;
1023 	size_t written;
1024 	struct psref psref, psref_ia;
1025 	int bound, s;
1026 
1027 	if (namelen != 1)
1028 		return EINVAL;
1029 
1030 	bound = curlwp_bind();
1031 	ifp = if_get_byindex(name[0], &psref);
1032 	if (ifp == NULL) {
1033 		curlwp_bindx(bound);
1034 		return ENODEV;
1035 	}
1036 
1037 	if (oldp == NULL) {
1038 		*oldlenp = 0;
1039 		s = pserialize_read_enter();
1040 		IFADDR_READER_FOREACH(ifa, ifp) {
1041 			if (ifa->ifa_addr->sa_family != AF_INET6)
1042 				continue;
1043 			ifa6 = (struct in6_ifaddr *)ifa;
1044 			LIST_FOREACH(in6m, &ifa6->ia6_multiaddrs, in6m_entry) {
1045 				*oldlenp += 2 * sizeof(struct in6_addr) +
1046 				    sizeof(uint32_t);
1047 			}
1048 		}
1049 		pserialize_read_exit(s);
1050 		if_put(ifp, &psref);
1051 		curlwp_bindx(bound);
1052 		return 0;
1053 	}
1054 
1055 	error = 0;
1056 	written = 0;
1057 	s = pserialize_read_enter();
1058 	IFADDR_READER_FOREACH(ifa, ifp) {
1059 		if (ifa->ifa_addr->sa_family != AF_INET6)
1060 			continue;
1061 
1062 		ifa_acquire(ifa, &psref_ia);
1063 		pserialize_read_exit(s);
1064 
1065 		ifa6 = (struct in6_ifaddr *)ifa;
1066 		LIST_FOREACH(in6m, &ifa6->ia6_multiaddrs, in6m_entry) {
1067 			if (written + 2 * sizeof(struct in6_addr) +
1068 			    sizeof(uint32_t) > *oldlenp)
1069 				goto done;
1070 			error = sysctl_copyout(l, &ifa6->ia_addr.sin6_addr,
1071 			    oldp, sizeof(struct in6_addr));
1072 			if (error)
1073 				goto done;
1074 			oldp = (char *)oldp + sizeof(struct in6_addr);
1075 			written += sizeof(struct in6_addr);
1076 			error = sysctl_copyout(l, &in6m->in6m_addr,
1077 			    oldp, sizeof(struct in6_addr));
1078 			if (error)
1079 				goto done;
1080 			oldp = (char *)oldp + sizeof(struct in6_addr);
1081 			written += sizeof(struct in6_addr);
1082 			tmp = in6m->in6m_refcount;
1083 			error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
1084 			if (error)
1085 				goto done;
1086 			oldp = (char *)oldp + sizeof(tmp);
1087 			written += sizeof(tmp);
1088 		}
1089 
1090 		s = pserialize_read_enter();
1091 		ifa_release(ifa, &psref_ia);
1092 	}
1093 	pserialize_read_exit(s);
1094 done:
1095 	ifa_release(ifa, &psref_ia);
1096 	if_put(ifp, &psref);
1097 	curlwp_bindx(bound);
1098 	*oldlenp = written;
1099 	return error;
1100 }
1101 
1102 SYSCTL_SETUP(sysctl_in6_mklude_setup, "sysctl net.inet6.multicast_kludge subtree setup")
1103 {
1104 
1105 	sysctl_createv(clog, 0, NULL, NULL,
1106 		       CTLFLAG_PERMANENT,
1107 		       CTLTYPE_NODE, "inet6", NULL,
1108 		       NULL, 0, NULL, 0,
1109 		       CTL_NET, PF_INET6, CTL_EOL);
1110 
1111 	sysctl_createv(clog, 0, NULL, NULL,
1112 		       CTLFLAG_PERMANENT,
1113 		       CTLTYPE_NODE, "multicast",
1114 		       SYSCTL_DESCR("Multicast information"),
1115 		       in6_multicast_sysctl, 0, NULL, 0,
1116 		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1117 
1118 	sysctl_createv(clog, 0, NULL, NULL,
1119 		       CTLFLAG_PERMANENT,
1120 		       CTLTYPE_NODE, "multicast_kludge",
1121 		       SYSCTL_DESCR("multicast kludge information"),
1122 		       in6_mkludge_sysctl, 0, NULL, 0,
1123 		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1124 }
1125