xref: /netbsd-src/sys/netinet6/mld6.c (revision e89934bbf778a6d6d6894877c4da59d0c7835b0f)
1 /*	$NetBSD: mld6.c,v 1.81 2017/02/07 02:38:08 ozaki-r Exp $	*/
2 /*	$KAME: mld6.c,v 1.25 2001/01/16 14:14:18 itojun Exp $	*/
3 
4 /*
5  * Copyright (C) 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1992, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * Stephen Deering of Stanford University.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
65  */
66 
67 /*
68  * Copyright (c) 1988 Stephen Deering.
69  *
70  * This code is derived from software contributed to Berkeley by
71  * Stephen Deering of Stanford University.
72  *
73  * Redistribution and use in source and binary forms, with or without
74  * modification, are permitted provided that the following conditions
75  * are met:
76  * 1. Redistributions of source code must retain the above copyright
77  *    notice, this list of conditions and the following disclaimer.
78  * 2. Redistributions in binary form must reproduce the above copyright
79  *    notice, this list of conditions and the following disclaimer in the
80  *    documentation and/or other materials provided with the distribution.
81  * 3. All advertising materials mentioning features or use of this software
82  *    must display the following acknowledgement:
83  *	This product includes software developed by the University of
84  *	California, Berkeley and its contributors.
85  * 4. Neither the name of the University nor the names of its contributors
86  *    may be used to endorse or promote products derived from this software
87  *    without specific prior written permission.
88  *
89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99  * SUCH DAMAGE.
100  *
101  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
102  */
103 
104 #include <sys/cdefs.h>
105 __KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.81 2017/02/07 02:38:08 ozaki-r Exp $");
106 
107 #ifdef _KERNEL_OPT
108 #include "opt_inet.h"
109 #include "opt_net_mpsafe.h"
110 #endif
111 
112 #include <sys/param.h>
113 #include <sys/systm.h>
114 #include <sys/mbuf.h>
115 #include <sys/socket.h>
116 #include <sys/socketvar.h>
117 #include <sys/syslog.h>
118 #include <sys/sysctl.h>
119 #include <sys/kernel.h>
120 #include <sys/callout.h>
121 #include <sys/cprng.h>
122 
123 #include <net/if.h>
124 
125 #include <netinet/in.h>
126 #include <netinet/in_var.h>
127 #include <netinet6/in6_var.h>
128 #include <netinet/ip6.h>
129 #include <netinet6/ip6_var.h>
130 #include <netinet6/scope6_var.h>
131 #include <netinet/icmp6.h>
132 #include <netinet6/icmp6_private.h>
133 #include <netinet6/mld6_var.h>
134 
135 #include <net/net_osdep.h>
136 
137 
138 /*
139  * This structure is used to keep track of in6_multi chains which belong to
140  * deleted interface addresses.
141  */
142 static LIST_HEAD(, multi6_kludge) in6_mk = LIST_HEAD_INITIALIZER(in6_mk);
143 
144 struct multi6_kludge {
145 	LIST_ENTRY(multi6_kludge) mk_entry;
146 	struct ifnet *mk_ifp;
147 	struct in6_multihead mk_head;
148 };
149 
150 
151 /*
152  * Protocol constants
153  */
154 
155 /*
156  * time between repetitions of a node's initial report of interest in a
157  * multicast address(in seconds)
158  */
159 #define MLD_UNSOLICITED_REPORT_INTERVAL	10
160 
161 static struct ip6_pktopts ip6_opts;
162 
163 static void mld_start_listening(struct in6_multi *);
164 static void mld_stop_listening(struct in6_multi *);
165 
166 static struct mld_hdr * mld_allocbuf(struct mbuf **, int, struct in6_multi *,
167 	int);
168 static void mld_sendpkt(struct in6_multi *, int, const struct in6_addr *);
169 static void mld_starttimer(struct in6_multi *);
170 static void mld_stoptimer(struct in6_multi *);
171 static u_long mld_timerresid(struct in6_multi *);
172 
173 void
174 mld_init(void)
175 {
176 	static u_int8_t hbh_buf[8];
177 	struct ip6_hbh *hbh = (struct ip6_hbh *)hbh_buf;
178 	u_int16_t rtalert_code = htons((u_int16_t)IP6OPT_RTALERT_MLD);
179 
180 	/* ip6h_nxt will be fill in later */
181 	hbh->ip6h_len = 0;	/* (8 >> 3) - 1 */
182 
183 	/* XXX: grotty hard coding... */
184 	hbh_buf[2] = IP6OPT_PADN;	/* 2 byte padding */
185 	hbh_buf[3] = 0;
186 	hbh_buf[4] = IP6OPT_RTALERT;
187 	hbh_buf[5] = IP6OPT_RTALERT_LEN - 2;
188 	memcpy(&hbh_buf[6], (void *)&rtalert_code, sizeof(u_int16_t));
189 
190 	ip6_opts.ip6po_hbh = hbh;
191 	/* We will specify the hoplimit by a multicast option. */
192 	ip6_opts.ip6po_hlim = -1;
193 	ip6_opts.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
194 }
195 
196 static void
197 mld_starttimer(struct in6_multi *in6m)
198 {
199 	struct timeval now;
200 
201 	KASSERT(in6m->in6m_timer != IN6M_TIMER_UNDEF);
202 
203 	microtime(&now);
204 	in6m->in6m_timer_expire.tv_sec = now.tv_sec + in6m->in6m_timer / hz;
205 	in6m->in6m_timer_expire.tv_usec = now.tv_usec +
206 	    (in6m->in6m_timer % hz) * (1000000 / hz);
207 	if (in6m->in6m_timer_expire.tv_usec > 1000000) {
208 		in6m->in6m_timer_expire.tv_sec++;
209 		in6m->in6m_timer_expire.tv_usec -= 1000000;
210 	}
211 
212 	/* start or restart the timer */
213 	callout_schedule(&in6m->in6m_timer_ch, in6m->in6m_timer);
214 }
215 
216 static void
217 mld_stoptimer(struct in6_multi *in6m)
218 {
219 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
220 		return;
221 
222 	callout_stop(&in6m->in6m_timer_ch);
223 
224 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
225 }
226 
227 static void
228 mld_timeo(void *arg)
229 {
230 	struct in6_multi *in6m = arg;
231 
232 	/* XXX NOMPSAFE still need softnet_lock */
233 	mutex_enter(softnet_lock);
234 	KERNEL_LOCK(1, NULL);
235 
236 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
237 		goto out;
238 
239 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
240 
241 	switch (in6m->in6m_state) {
242 	case MLD_REPORTPENDING:
243 		mld_start_listening(in6m);
244 		break;
245 	default:
246 		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
247 		break;
248 	}
249 
250 out:
251 	KERNEL_UNLOCK_ONE(NULL);
252 	mutex_exit(softnet_lock);
253 }
254 
255 static u_long
256 mld_timerresid(struct in6_multi *in6m)
257 {
258 	struct timeval now, diff;
259 
260 	microtime(&now);
261 
262 	if (now.tv_sec > in6m->in6m_timer_expire.tv_sec ||
263 	    (now.tv_sec == in6m->in6m_timer_expire.tv_sec &&
264 	    now.tv_usec > in6m->in6m_timer_expire.tv_usec)) {
265 		return (0);
266 	}
267 	diff = in6m->in6m_timer_expire;
268 	diff.tv_sec -= now.tv_sec;
269 	diff.tv_usec -= now.tv_usec;
270 	if (diff.tv_usec < 0) {
271 		diff.tv_sec--;
272 		diff.tv_usec += 1000000;
273 	}
274 
275 	/* return the remaining time in milliseconds */
276 	return diff.tv_sec * 1000 + diff.tv_usec / 1000;
277 }
278 
279 static void
280 mld_start_listening(struct in6_multi *in6m)
281 {
282 	struct in6_addr all_in6;
283 
284 	/*
285 	 * RFC2710 page 10:
286 	 * The node never sends a Report or Done for the link-scope all-nodes
287 	 * address.
288 	 * MLD messages are never sent for multicast addresses whose scope is 0
289 	 * (reserved) or 1 (node-local).
290 	 */
291 	all_in6 = in6addr_linklocal_allnodes;
292 	if (in6_setscope(&all_in6, in6m->in6m_ifp, NULL)) {
293 		/* XXX: this should not happen! */
294 		in6m->in6m_timer = 0;
295 		in6m->in6m_state = MLD_OTHERLISTENER;
296 	}
297 	if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
298 	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) {
299 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
300 		in6m->in6m_state = MLD_OTHERLISTENER;
301 	} else {
302 		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
303 		in6m->in6m_timer = cprng_fast32() %
304 		    (MLD_UNSOLICITED_REPORT_INTERVAL * hz);
305 		in6m->in6m_state = MLD_IREPORTEDLAST;
306 
307 		mld_starttimer(in6m);
308 	}
309 }
310 
311 static void
312 mld_stop_listening(struct in6_multi *in6m)
313 {
314 	struct in6_addr allnode, allrouter;
315 
316 	allnode = in6addr_linklocal_allnodes;
317 	if (in6_setscope(&allnode, in6m->in6m_ifp, NULL)) {
318 		/* XXX: this should not happen! */
319 		return;
320 	}
321 	allrouter = in6addr_linklocal_allrouters;
322 	if (in6_setscope(&allrouter, in6m->in6m_ifp, NULL)) {
323 		/* XXX impossible */
324 		return;
325 	}
326 
327 	if (in6m->in6m_state == MLD_IREPORTEDLAST &&
328 	    (!IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &allnode)) &&
329 	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) >
330 	    IPV6_ADDR_SCOPE_INTFACELOCAL) {
331 		mld_sendpkt(in6m, MLD_LISTENER_DONE, &allrouter);
332 	}
333 }
334 
335 void
336 mld_input(struct mbuf *m, int off)
337 {
338 	struct ip6_hdr *ip6;
339 	struct mld_hdr *mldh;
340 	struct ifnet *ifp;
341 	struct in6_multi *in6m = NULL;
342 	struct in6_addr mld_addr, all_in6;
343 	struct in6_ifaddr *ia;
344 	u_long timer = 0;	/* timer value in the MLD query header */
345 	int s;
346 
347 	ifp = m_get_rcvif(m, &s);
348 	if (__predict_false(ifp == NULL))
349 		goto out;
350 	IP6_EXTHDR_GET(mldh, struct mld_hdr *, m, off, sizeof(*mldh));
351 	if (mldh == NULL) {
352 		ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
353 		goto out_nodrop;
354 	}
355 
356 	/* source address validation */
357 	ip6 = mtod(m, struct ip6_hdr *);/* in case mpullup */
358 	if (!IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) {
359 		/*
360 		 * RFC3590 allows the IPv6 unspecified address as the source
361 		 * address of MLD report and done messages.  However, as this
362 		 * same document says, this special rule is for snooping
363 		 * switches and the RFC requires routers to discard MLD packets
364 		 * with the unspecified source address.  The RFC only talks
365 		 * about hosts receiving an MLD query or report in Security
366 		 * Considerations, but this is probably the correct intention.
367 		 * RFC3590 does not talk about other cases than link-local and
368 		 * the unspecified source addresses, but we believe the same
369 		 * rule should be applied.
370 		 * As a result, we only allow link-local addresses as the
371 		 * source address; otherwise, simply discard the packet.
372 		 */
373 #if 0
374 		/*
375 		 * XXX: do not log in an input path to avoid log flooding,
376 		 * though RFC3590 says "SHOULD log" if the source of a query
377 		 * is the unspecified address.
378 		 */
379 		char ip6bufs[INET6_ADDRSTRLEN];
380 		char ip6bufm[INET6_ADDRSTRLEN];
381 		log(LOG_INFO,
382 		    "mld_input: src %s is not link-local (grp=%s)\n",
383 		    IN6_PRINT(ip6bufs,&ip6->ip6_src),
384 		    IN6_PRINT(ip6bufm, &mldh->mld_addr));
385 #endif
386 		goto out;
387 	}
388 
389 	/*
390 	 * make a copy for local work (in6_setscope() may modify the 1st arg)
391 	 */
392 	mld_addr = mldh->mld_addr;
393 	if (in6_setscope(&mld_addr, ifp, NULL)) {
394 		/* XXX: this should not happen! */
395 		goto out;
396 	}
397 
398 	/*
399 	 * In the MLD specification, there are 3 states and a flag.
400 	 *
401 	 * In Non-Listener state, we simply don't have a membership record.
402 	 * In Delaying Listener state, our timer is running (in6m->in6m_timer)
403 	 * In Idle Listener state, our timer is not running
404 	 * (in6m->in6m_timer==IN6M_TIMER_UNDEF)
405 	 *
406 	 * The flag is in6m->in6m_state, it is set to MLD_OTHERLISTENER if
407 	 * we have heard a report from another member, or MLD_IREPORTEDLAST
408 	 * if we sent the last report.
409 	 */
410 	switch (mldh->mld_type) {
411 	case MLD_LISTENER_QUERY: {
412 		struct psref psref;
413 
414 		if (ifp->if_flags & IFF_LOOPBACK)
415 			break;
416 
417 		if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
418 		    !IN6_IS_ADDR_MULTICAST(&mld_addr))
419 			break;	/* print error or log stat? */
420 
421 		all_in6 = in6addr_linklocal_allnodes;
422 		if (in6_setscope(&all_in6, ifp, NULL)) {
423 			/* XXX: this should not happen! */
424 			break;
425 		}
426 
427 		/*
428 		 * - Start the timers in all of our membership records
429 		 *   that the query applies to for the interface on
430 		 *   which the query arrived excl. those that belong
431 		 *   to the "all-nodes" group (ff02::1).
432 		 * - Restart any timer that is already running but has
433 		 *   a value longer than the requested timeout.
434 		 * - Use the value specified in the query message as
435 		 *   the maximum timeout.
436 		 */
437 		timer = ntohs(mldh->mld_maxdelay);
438 
439 		ia = in6_get_ia_from_ifp_psref(ifp, &psref);
440 		if (ia == NULL)
441 			break;
442 
443 		/* The following operations may sleep */
444 		m_put_rcvif(ifp, &s);
445 		ifp = NULL;
446 
447 		LIST_FOREACH(in6m, &ia->ia6_multiaddrs, in6m_entry) {
448 			if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
449 			    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) <
450 			    IPV6_ADDR_SCOPE_LINKLOCAL)
451 				continue;
452 
453 			if (in6m->in6m_state == MLD_REPORTPENDING)
454 				continue; /* we are not yet ready */
455 
456 			if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
457 			    !IN6_ARE_ADDR_EQUAL(&mld_addr, &in6m->in6m_addr))
458 				continue;
459 
460 			if (timer == 0) {
461 				/* send a report immediately */
462 				mld_stoptimer(in6m);
463 				mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
464 				in6m->in6m_state = MLD_IREPORTEDLAST;
465 			} else if (in6m->in6m_timer == IN6M_TIMER_UNDEF ||
466 			    mld_timerresid(in6m) > timer) {
467 				in6m->in6m_timer =
468 				   1 + (cprng_fast32() % timer) * hz / 1000;
469 				mld_starttimer(in6m);
470 			}
471 		}
472 		ia6_release(ia, &psref);
473 		break;
474 	    }
475 
476 	case MLD_LISTENER_REPORT:
477 		/*
478 		 * For fast leave to work, we have to know that we are the
479 		 * last person to send a report for this group.  Reports
480 		 * can potentially get looped back if we are a multicast
481 		 * router, so discard reports sourced by me.
482 		 * Note that it is impossible to check IFF_LOOPBACK flag of
483 		 * ifp for this purpose, since ip6_mloopback pass the physical
484 		 * interface to looutput.
485 		 */
486 		if (m->m_flags & M_LOOP) /* XXX: grotty flag, but efficient */
487 			break;
488 
489 		if (!IN6_IS_ADDR_MULTICAST(&mldh->mld_addr))
490 			break;
491 
492 		/*
493 		 * If we belong to the group being reported, stop
494 		 * our timer for that group.
495 		 */
496 		IN6_LOOKUP_MULTI(mld_addr, ifp, in6m);
497 		if (in6m) {
498 			mld_stoptimer(in6m); /* transit to idle state */
499 			in6m->in6m_state = MLD_OTHERLISTENER; /* clear flag */
500 		}
501 		break;
502 	default:		/* this is impossible */
503 #if 0
504 		/*
505 		 * this case should be impossible because of filtering in
506 		 * icmp6_input().  But we explicitly disabled this part
507 		 * just in case.
508 		 */
509 		log(LOG_ERR, "mld_input: illegal type(%d)", mldh->mld_type);
510 #endif
511 		break;
512 	}
513 
514 out:
515 	m_freem(m);
516 out_nodrop:
517 	m_put_rcvif(ifp, &s);
518 }
519 
520 static void
521 mld_sendpkt(struct in6_multi *in6m, int type,
522 	const struct in6_addr *dst)
523 {
524 	struct mbuf *mh;
525 	struct mld_hdr *mldh;
526 	struct ip6_hdr *ip6 = NULL;
527 	struct ip6_moptions im6o;
528 	struct in6_ifaddr *ia = NULL;
529 	struct ifnet *ifp = in6m->in6m_ifp;
530 	int ignflags;
531 	struct psref psref;
532 	int bound;
533 
534 	/*
535 	 * At first, find a link local address on the outgoing interface
536 	 * to use as the source address of the MLD packet.
537 	 * We do not reject tentative addresses for MLD report to deal with
538 	 * the case where we first join a link-local address.
539 	 */
540 	ignflags = (IN6_IFF_NOTREADY|IN6_IFF_ANYCAST) & ~IN6_IFF_TENTATIVE;
541 	bound = curlwp_bind();
542 	ia = in6ifa_ifpforlinklocal_psref(ifp, ignflags, &psref);
543 	if (ia == NULL) {
544 		curlwp_bindx(bound);
545 		return;
546 	}
547 	if ((ia->ia6_flags & IN6_IFF_TENTATIVE)) {
548 		ia6_release(ia, &psref);
549 		ia = NULL;
550 	}
551 
552 	/* Allocate two mbufs to store IPv6 header and MLD header */
553 	mldh = mld_allocbuf(&mh, sizeof(struct mld_hdr), in6m, type);
554 	if (mldh == NULL) {
555 		ia6_release(ia, &psref);
556 		curlwp_bindx(bound);
557 		return;
558 	}
559 
560 	/* fill src/dst here */
561  	ip6 = mtod(mh, struct ip6_hdr *);
562  	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
563  	ip6->ip6_dst = dst ? *dst : in6m->in6m_addr;
564 	ia6_release(ia, &psref);
565 	curlwp_bindx(bound);
566 
567 	mldh->mld_addr = in6m->in6m_addr;
568 	in6_clearscope(&mldh->mld_addr); /* XXX */
569 	mldh->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, sizeof(struct ip6_hdr),
570 	    sizeof(struct mld_hdr));
571 
572 	/* construct multicast option */
573 	memset(&im6o, 0, sizeof(im6o));
574 	im6o.im6o_multicast_if_index = if_get_index(ifp);
575 	im6o.im6o_multicast_hlim = 1;
576 
577 	/*
578 	 * Request loopback of the report if we are acting as a multicast
579 	 * router, so that the process-level routing daemon can hear it.
580 	 */
581 	im6o.im6o_multicast_loop = (ip6_mrouter != NULL);
582 
583 	/* increment output statictics */
584 	ICMP6_STATINC(ICMP6_STAT_OUTHIST + type);
585 	icmp6_ifstat_inc(ifp, ifs6_out_msg);
586 	switch (type) {
587 	case MLD_LISTENER_QUERY:
588 		icmp6_ifstat_inc(ifp, ifs6_out_mldquery);
589 		break;
590 	case MLD_LISTENER_REPORT:
591 		icmp6_ifstat_inc(ifp, ifs6_out_mldreport);
592 		break;
593 	case MLD_LISTENER_DONE:
594 		icmp6_ifstat_inc(ifp, ifs6_out_mlddone);
595 		break;
596 	}
597 
598 	ip6_output(mh, &ip6_opts, NULL, ia ? 0 : IPV6_UNSPECSRC,
599 	    &im6o, NULL, NULL);
600 }
601 
602 static struct mld_hdr *
603 mld_allocbuf(struct mbuf **mh, int len, struct in6_multi *in6m,
604     int type)
605 {
606 	struct mbuf *md;
607 	struct mld_hdr *mldh;
608 	struct ip6_hdr *ip6;
609 
610 	/*
611 	 * Allocate mbufs to store ip6 header and MLD header.
612 	 * We allocate 2 mbufs and make chain in advance because
613 	 * it is more convenient when inserting the hop-by-hop option later.
614 	 */
615 	MGETHDR(*mh, M_DONTWAIT, MT_HEADER);
616 	if (*mh == NULL)
617 		return NULL;
618 	MGET(md, M_DONTWAIT, MT_DATA);
619 	if (md == NULL) {
620 		m_free(*mh);
621 		*mh = NULL;
622 		return NULL;
623 	}
624 	(*mh)->m_next = md;
625 	md->m_next = NULL;
626 
627 	m_reset_rcvif((*mh));
628 	(*mh)->m_pkthdr.len = sizeof(struct ip6_hdr) + len;
629 	(*mh)->m_len = sizeof(struct ip6_hdr);
630 	MH_ALIGN(*mh, sizeof(struct ip6_hdr));
631 
632 	/* fill in the ip6 header */
633 	ip6 = mtod(*mh, struct ip6_hdr *);
634 	memset(ip6, 0, sizeof(*ip6));
635 	ip6->ip6_flow = 0;
636 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
637 	ip6->ip6_vfc |= IPV6_VERSION;
638 	/* ip6_plen will be set later */
639 	ip6->ip6_nxt = IPPROTO_ICMPV6;
640 	/* ip6_hlim will be set by im6o.im6o_multicast_hlim */
641 	/* ip6_src/dst will be set by mld_sendpkt() or mld_sendbuf() */
642 
643 	/* fill in the MLD header as much as possible */
644 	md->m_len = len;
645 	mldh = mtod(md, struct mld_hdr *);
646 	memset(mldh, 0, len);
647 	mldh->mld_type = type;
648 	return mldh;
649 }
650 
651 /*
652  * Add an address to the list of IP6 multicast addresses for a given interface.
653  */
654 struct	in6_multi *
655 in6_addmulti(struct in6_addr *maddr6, struct ifnet *ifp,
656 	int *errorp, int timer)
657 {
658 	struct	in6_ifaddr *ia;
659 	struct	sockaddr_in6 sin6;
660 	struct	in6_multi *in6m;
661 	int	s = splsoftnet();
662 
663 	*errorp = 0;
664 
665 	/*
666 	 * See if address already in list.
667 	 */
668 	IN6_LOOKUP_MULTI(*maddr6, ifp, in6m);
669 	if (in6m != NULL) {
670 		/*
671 		 * Found it; just increment the refrence count.
672 		 */
673 		in6m->in6m_refcount++;
674 	} else {
675 		int _s;
676 		/*
677 		 * New address; allocate a new multicast record
678 		 * and link it into the interface's multicast list.
679 		 */
680 		in6m = (struct in6_multi *)
681 			malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT|M_ZERO);
682 		if (in6m == NULL) {
683 			splx(s);
684 			*errorp = ENOBUFS;
685 			return (NULL);
686 		}
687 
688 		in6m->in6m_addr = *maddr6;
689 		in6m->in6m_ifp = ifp;
690 		in6m->in6m_refcount = 1;
691 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
692 		callout_init(&in6m->in6m_timer_ch, CALLOUT_MPSAFE);
693 		callout_setfunc(&in6m->in6m_timer_ch, mld_timeo, in6m);
694 
695 		_s = pserialize_read_enter();
696 		ia = in6_get_ia_from_ifp(ifp);
697 		if (ia == NULL) {
698 			pserialize_read_exit(_s);
699 			callout_destroy(&in6m->in6m_timer_ch);
700 			free(in6m, M_IPMADDR);
701 			splx(s);
702 			*errorp = EADDRNOTAVAIL; /* appropriate? */
703 			return (NULL);
704 		}
705 		in6m->in6m_ia = ia;
706 		ifaref(&ia->ia_ifa); /* gain a reference */
707 		/* FIXME NOMPSAFE: need to lock */
708 		LIST_INSERT_HEAD(&ia->ia6_multiaddrs, in6m, in6m_entry);
709 		pserialize_read_exit(_s);
710 
711 		/*
712 		 * Ask the network driver to update its multicast reception
713 		 * filter appropriately for the new address.
714 		 */
715 		sockaddr_in6_init(&sin6, maddr6, 0, 0, 0);
716 		*errorp = if_mcast_op(ifp, SIOCADDMULTI, sin6tosa(&sin6));
717 		if (*errorp) {
718 			callout_destroy(&in6m->in6m_timer_ch);
719 			LIST_REMOVE(in6m, in6m_entry);
720 			free(in6m, M_IPMADDR);
721 			ifafree(&ia->ia_ifa);
722 			splx(s);
723 			return (NULL);
724 		}
725 
726 		in6m->in6m_timer = timer;
727 		if (in6m->in6m_timer > 0) {
728 			in6m->in6m_state = MLD_REPORTPENDING;
729 			mld_starttimer(in6m);
730 
731 			splx(s);
732 			return (in6m);
733 		}
734 
735 		/*
736 		 * Let MLD6 know that we have joined a new IP6 multicast
737 		 * group.
738 		 */
739 		mld_start_listening(in6m);
740 	}
741 	splx(s);
742 	return (in6m);
743 }
744 
745 /*
746  * Delete a multicast address record.
747  */
748 void
749 in6_delmulti(struct in6_multi *in6m)
750 {
751 	struct	sockaddr_in6 sin6;
752 	struct	in6_ifaddr *ia;
753 	int	s = splsoftnet();
754 
755 	mld_stoptimer(in6m);
756 
757 	if (--in6m->in6m_refcount == 0) {
758 		int _s;
759 
760 		/*
761 		 * No remaining claims to this record; let MLD6 know
762 		 * that we are leaving the multicast group.
763 		 */
764 		mld_stop_listening(in6m);
765 
766 		/*
767 		 * Unlink from list.
768 		 */
769 		LIST_REMOVE(in6m, in6m_entry);
770 		if (in6m->in6m_ia != NULL) {
771 			ifafree(&in6m->in6m_ia->ia_ifa); /* release reference */
772 			in6m->in6m_ia = NULL;
773 		}
774 
775 		/*
776 		 * Delete all references of this multicasting group from
777 		 * the membership arrays
778 		 */
779 		_s = pserialize_read_enter();
780 		IN6_ADDRLIST_READER_FOREACH(ia) {
781 			struct in6_multi_mship *imm;
782 			LIST_FOREACH(imm, &ia->ia6_memberships, i6mm_chain) {
783 				if (imm->i6mm_maddr == in6m)
784 					imm->i6mm_maddr = NULL;
785 			}
786 		}
787 		pserialize_read_exit(_s);
788 
789 		/*
790 		 * Notify the network driver to update its multicast
791 		 * reception filter.
792 		 */
793 		sockaddr_in6_init(&sin6, &in6m->in6m_addr, 0, 0, 0);
794 		if_mcast_op(in6m->in6m_ifp, SIOCDELMULTI, sin6tosa(&sin6));
795 
796 		/* Tell mld_timeo we're halting the timer */
797 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
798 		if (mutex_owned(softnet_lock))
799 			callout_halt(&in6m->in6m_timer_ch, softnet_lock);
800 		else
801 			callout_halt(&in6m->in6m_timer_ch, NULL);
802 		callout_destroy(&in6m->in6m_timer_ch);
803 
804 		free(in6m, M_IPMADDR);
805 	}
806 	splx(s);
807 }
808 
809 
810 struct in6_multi_mship *
811 in6_joingroup(struct ifnet *ifp, struct in6_addr *addr,
812 	int *errorp, int timer)
813 {
814 	struct in6_multi_mship *imm;
815 
816 	imm = malloc(sizeof(*imm), M_IPMADDR, M_NOWAIT|M_ZERO);
817 	if (imm == NULL) {
818 		*errorp = ENOBUFS;
819 		return NULL;
820 	}
821 
822 	imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp, timer);
823 	if (!imm->i6mm_maddr) {
824 		/* *errorp is already set */
825 		free(imm, M_IPMADDR);
826 		return NULL;
827 	}
828 	return imm;
829 }
830 
831 int
832 in6_leavegroup(struct in6_multi_mship *imm)
833 {
834 
835 	if (imm->i6mm_maddr) {
836 		in6_delmulti(imm->i6mm_maddr);
837 	}
838 	free(imm, M_IPMADDR);
839 	return 0;
840 }
841 
842 
843 /*
844  * Multicast address kludge:
845  * If there were any multicast addresses attached to this interface address,
846  * either move them to another address on this interface, or save them until
847  * such time as this interface is reconfigured for IPv6.
848  */
849 void
850 in6_savemkludge(struct in6_ifaddr *oia)
851 {
852 	struct in6_ifaddr *ia;
853 	struct in6_multi *in6m;
854 	int s;
855 
856 	s = pserialize_read_enter();
857 	ia = in6_get_ia_from_ifp(oia->ia_ifp);
858 	if (ia) {	/* there is another address */
859 		KASSERT(ia != oia);
860 		while ((in6m = LIST_FIRST(&oia->ia6_multiaddrs)) != NULL) {
861 			LIST_REMOVE(in6m, in6m_entry);
862 			ifaref(&ia->ia_ifa);
863 			ifafree(&in6m->in6m_ia->ia_ifa);
864 			in6m->in6m_ia = ia;
865 			/* FIXME NOMPSAFE: need to lock */
866 			LIST_INSERT_HEAD(&ia->ia6_multiaddrs, in6m, in6m_entry);
867 		}
868 	} else {	/* last address on this if deleted, save */
869 		struct multi6_kludge *mk;
870 
871 		LIST_FOREACH(mk, &in6_mk, mk_entry) {
872 			if (mk->mk_ifp == oia->ia_ifp)
873 				break;
874 		}
875 		if (mk == NULL) /* this should not happen! */
876 			panic("in6_savemkludge: no kludge space");
877 
878 		while ((in6m = LIST_FIRST(&oia->ia6_multiaddrs)) != NULL) {
879 			LIST_REMOVE(in6m, in6m_entry);
880 			ifafree(&in6m->in6m_ia->ia_ifa); /* release reference */
881 			in6m->in6m_ia = NULL;
882 			LIST_INSERT_HEAD(&mk->mk_head, in6m, in6m_entry);
883 		}
884 	}
885 	pserialize_read_exit(s);
886 }
887 
888 /*
889  * Continuation of multicast address hack:
890  * If there was a multicast group list previously saved for this interface,
891  * then we re-attach it to the first address configured on the i/f.
892  */
893 void
894 in6_restoremkludge(struct in6_ifaddr *ia, struct ifnet *ifp)
895 {
896 	struct multi6_kludge *mk;
897 	struct in6_multi *in6m;
898 
899 	LIST_FOREACH(mk, &in6_mk, mk_entry) {
900 		if (mk->mk_ifp == ifp)
901 			break;
902 	}
903 	if (mk == NULL)
904 		return;
905 	while ((in6m = LIST_FIRST(&mk->mk_head)) != NULL) {
906 		LIST_REMOVE(in6m, in6m_entry);
907 		in6m->in6m_ia = ia;
908 		ifaref(&ia->ia_ifa);
909 		LIST_INSERT_HEAD(&ia->ia6_multiaddrs, in6m, in6m_entry);
910 	}
911 }
912 
913 /*
914  * Allocate space for the kludge at interface initialization time.
915  * Formerly, we dynamically allocated the space in in6_savemkludge() with
916  * malloc(M_WAITOK).  However, it was wrong since the function could be called
917  * under an interrupt context (software timer on address lifetime expiration).
918  * Also, we cannot just give up allocating the strucutre, since the group
919  * membership structure is very complex and we need to keep it anyway.
920  * Of course, this function MUST NOT be called under an interrupt context.
921  * Specifically, it is expected to be called only from in6_ifattach(), though
922  * it is a global function.
923  */
924 void
925 in6_createmkludge(struct ifnet *ifp)
926 {
927 	struct multi6_kludge *mk;
928 
929 	LIST_FOREACH(mk, &in6_mk, mk_entry) {
930 		/* If we've already had one, do not allocate. */
931 		if (mk->mk_ifp == ifp)
932 			return;
933 	}
934 
935 	mk = malloc(sizeof(*mk), M_IPMADDR, M_ZERO|M_WAITOK);
936 
937 	LIST_INIT(&mk->mk_head);
938 	mk->mk_ifp = ifp;
939 	LIST_INSERT_HEAD(&in6_mk, mk, mk_entry);
940 }
941 
942 void
943 in6_purgemkludge(struct ifnet *ifp)
944 {
945 	struct multi6_kludge *mk;
946 	struct in6_multi *in6m, *next;
947 
948 	LIST_FOREACH(mk, &in6_mk, mk_entry) {
949 		if (mk->mk_ifp == ifp)
950 			break;
951 	}
952 	if (mk == NULL)
953 		return;
954 
955 	/* leave from all multicast groups joined */
956 	for (in6m = LIST_FIRST(&mk->mk_head); in6m != NULL; in6m = next) {
957 		next = LIST_NEXT(in6m, in6m_entry);
958 		in6_delmulti(in6m);
959 	}
960 	LIST_REMOVE(mk, mk_entry);
961 	free(mk, M_IPMADDR);
962 }
963 
964 static int
965 in6_mkludge_sysctl(SYSCTLFN_ARGS)
966 {
967 	struct multi6_kludge *mk;
968 	struct in6_multi *in6m;
969 	int error;
970 	uint32_t tmp;
971 	size_t written;
972 
973 	if (namelen != 1)
974 		return EINVAL;
975 
976 	if (oldp == NULL) {
977 		*oldlenp = 0;
978 		LIST_FOREACH(mk, &in6_mk, mk_entry) {
979 			if (mk->mk_ifp->if_index == name[0])
980 				continue;
981 			LIST_FOREACH(in6m, &mk->mk_head, in6m_entry) {
982 				*oldlenp += sizeof(struct in6_addr) +
983 				    sizeof(uint32_t);
984 			}
985 		}
986 		return 0;
987 	}
988 
989 	error = 0;
990 	written = 0;
991 	LIST_FOREACH(mk, &in6_mk, mk_entry) {
992 		if (mk->mk_ifp->if_index == name[0])
993 			continue;
994 		LIST_FOREACH(in6m, &mk->mk_head, in6m_entry) {
995 			if (written + sizeof(struct in6_addr) +
996 			    sizeof(uint32_t) > *oldlenp)
997 				goto done;
998 			error = sysctl_copyout(l, &in6m->in6m_addr,
999 			    oldp, sizeof(struct in6_addr));
1000 			if (error)
1001 				goto done;
1002 			oldp = (char *)oldp + sizeof(struct in6_addr);
1003 			written += sizeof(struct in6_addr);
1004 			tmp = in6m->in6m_refcount;
1005 			error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
1006 			if (error)
1007 				goto done;
1008 			oldp = (char *)oldp + sizeof(tmp);
1009 			written += sizeof(tmp);
1010 		}
1011 	}
1012 
1013 done:
1014 	*oldlenp = written;
1015 	return error;
1016 }
1017 
1018 static int
1019 in6_multicast_sysctl(SYSCTLFN_ARGS)
1020 {
1021 	struct ifnet *ifp;
1022 	struct ifaddr *ifa;
1023 	struct in6_ifaddr *ifa6;
1024 	struct in6_multi *in6m;
1025 	uint32_t tmp;
1026 	int error;
1027 	size_t written;
1028 	struct psref psref, psref_ia;
1029 	int bound, s;
1030 
1031 	if (namelen != 1)
1032 		return EINVAL;
1033 
1034 	bound = curlwp_bind();
1035 	ifp = if_get_byindex(name[0], &psref);
1036 	if (ifp == NULL) {
1037 		curlwp_bindx(bound);
1038 		return ENODEV;
1039 	}
1040 
1041 	if (oldp == NULL) {
1042 		*oldlenp = 0;
1043 		s = pserialize_read_enter();
1044 		IFADDR_READER_FOREACH(ifa, ifp) {
1045 			if (ifa->ifa_addr->sa_family != AF_INET6)
1046 				continue;
1047 			ifa6 = (struct in6_ifaddr *)ifa;
1048 			LIST_FOREACH(in6m, &ifa6->ia6_multiaddrs, in6m_entry) {
1049 				*oldlenp += 2 * sizeof(struct in6_addr) +
1050 				    sizeof(uint32_t);
1051 			}
1052 		}
1053 		pserialize_read_exit(s);
1054 		if_put(ifp, &psref);
1055 		curlwp_bindx(bound);
1056 		return 0;
1057 	}
1058 
1059 	error = 0;
1060 	written = 0;
1061 	s = pserialize_read_enter();
1062 	IFADDR_READER_FOREACH(ifa, ifp) {
1063 		if (ifa->ifa_addr->sa_family != AF_INET6)
1064 			continue;
1065 
1066 		ifa_acquire(ifa, &psref_ia);
1067 		pserialize_read_exit(s);
1068 
1069 		ifa6 = (struct in6_ifaddr *)ifa;
1070 		LIST_FOREACH(in6m, &ifa6->ia6_multiaddrs, in6m_entry) {
1071 			if (written + 2 * sizeof(struct in6_addr) +
1072 			    sizeof(uint32_t) > *oldlenp)
1073 				goto done;
1074 			error = sysctl_copyout(l, &ifa6->ia_addr.sin6_addr,
1075 			    oldp, sizeof(struct in6_addr));
1076 			if (error)
1077 				goto done;
1078 			oldp = (char *)oldp + sizeof(struct in6_addr);
1079 			written += sizeof(struct in6_addr);
1080 			error = sysctl_copyout(l, &in6m->in6m_addr,
1081 			    oldp, sizeof(struct in6_addr));
1082 			if (error)
1083 				goto done;
1084 			oldp = (char *)oldp + sizeof(struct in6_addr);
1085 			written += sizeof(struct in6_addr);
1086 			tmp = in6m->in6m_refcount;
1087 			error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
1088 			if (error)
1089 				goto done;
1090 			oldp = (char *)oldp + sizeof(tmp);
1091 			written += sizeof(tmp);
1092 		}
1093 
1094 		s = pserialize_read_enter();
1095 		ifa_release(ifa, &psref_ia);
1096 	}
1097 	pserialize_read_exit(s);
1098 done:
1099 	ifa_release(ifa, &psref_ia);
1100 	if_put(ifp, &psref);
1101 	curlwp_bindx(bound);
1102 	*oldlenp = written;
1103 	return error;
1104 }
1105 
1106 void
1107 in6_sysctl_multicast_setup(struct sysctllog **clog)
1108 {
1109 
1110 	sysctl_createv(clog, 0, NULL, NULL,
1111 		       CTLFLAG_PERMANENT,
1112 		       CTLTYPE_NODE, "inet6", NULL,
1113 		       NULL, 0, NULL, 0,
1114 		       CTL_NET, PF_INET6, CTL_EOL);
1115 
1116 	sysctl_createv(clog, 0, NULL, NULL,
1117 		       CTLFLAG_PERMANENT,
1118 		       CTLTYPE_NODE, "multicast",
1119 		       SYSCTL_DESCR("Multicast information"),
1120 		       in6_multicast_sysctl, 0, NULL, 0,
1121 		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1122 
1123 	sysctl_createv(clog, 0, NULL, NULL,
1124 		       CTLFLAG_PERMANENT,
1125 		       CTLTYPE_NODE, "multicast_kludge",
1126 		       SYSCTL_DESCR("multicast kludge information"),
1127 		       in6_mkludge_sysctl, 0, NULL, 0,
1128 		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1129 }
1130