xref: /netbsd-src/sys/netinet6/mld6.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: mld6.c,v 1.99 2018/05/29 04:39:26 ozaki-r Exp $	*/
2 /*	$KAME: mld6.c,v 1.25 2001/01/16 14:14:18 itojun Exp $	*/
3 
4 /*
5  * Copyright (C) 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1992, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * Stephen Deering of Stanford University.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
65  */
66 
67 /*
68  * Copyright (c) 1988 Stephen Deering.
69  *
70  * This code is derived from software contributed to Berkeley by
71  * Stephen Deering of Stanford University.
72  *
73  * Redistribution and use in source and binary forms, with or without
74  * modification, are permitted provided that the following conditions
75  * are met:
76  * 1. Redistributions of source code must retain the above copyright
77  *    notice, this list of conditions and the following disclaimer.
78  * 2. Redistributions in binary form must reproduce the above copyright
79  *    notice, this list of conditions and the following disclaimer in the
80  *    documentation and/or other materials provided with the distribution.
81  * 3. All advertising materials mentioning features or use of this software
82  *    must display the following acknowledgement:
83  *	This product includes software developed by the University of
84  *	California, Berkeley and its contributors.
85  * 4. Neither the name of the University nor the names of its contributors
86  *    may be used to endorse or promote products derived from this software
87  *    without specific prior written permission.
88  *
89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99  * SUCH DAMAGE.
100  *
101  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
102  */
103 
104 #include <sys/cdefs.h>
105 __KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.99 2018/05/29 04:39:26 ozaki-r Exp $");
106 
107 #ifdef _KERNEL_OPT
108 #include "opt_inet.h"
109 #include "opt_net_mpsafe.h"
110 #endif
111 
112 #include <sys/param.h>
113 #include <sys/systm.h>
114 #include <sys/mbuf.h>
115 #include <sys/socket.h>
116 #include <sys/socketvar.h>
117 #include <sys/syslog.h>
118 #include <sys/sysctl.h>
119 #include <sys/kernel.h>
120 #include <sys/callout.h>
121 #include <sys/cprng.h>
122 #include <sys/rwlock.h>
123 
124 #include <net/if.h>
125 
126 #include <netinet/in.h>
127 #include <netinet/in_var.h>
128 #include <netinet6/in6_var.h>
129 #include <netinet/ip6.h>
130 #include <netinet6/ip6_var.h>
131 #include <netinet6/scope6_var.h>
132 #include <netinet/icmp6.h>
133 #include <netinet6/icmp6_private.h>
134 #include <netinet6/mld6_var.h>
135 
136 static krwlock_t	in6_multilock __cacheline_aligned;
137 
138 /*
139  * Protocol constants
140  */
141 
142 /*
143  * time between repetitions of a node's initial report of interest in a
144  * multicast address(in seconds)
145  */
146 #define MLD_UNSOLICITED_REPORT_INTERVAL	10
147 
148 static struct ip6_pktopts ip6_opts;
149 
150 static void mld_start_listening(struct in6_multi *);
151 static void mld_stop_listening(struct in6_multi *);
152 
153 static struct mld_hdr *mld_allocbuf(struct mbuf **, struct in6_multi *, int);
154 static void mld_sendpkt(struct in6_multi *, int, const struct in6_addr *);
155 static void mld_starttimer(struct in6_multi *);
156 static void mld_stoptimer(struct in6_multi *);
157 static u_long mld_timerresid(struct in6_multi *);
158 
159 static void in6m_ref(struct in6_multi *);
160 static void in6m_unref(struct in6_multi *);
161 static void in6m_destroy(struct in6_multi *);
162 
163 void
164 mld_init(void)
165 {
166 	static u_int8_t hbh_buf[8];
167 	struct ip6_hbh *hbh = (struct ip6_hbh *)hbh_buf;
168 	u_int16_t rtalert_code = htons((u_int16_t)IP6OPT_RTALERT_MLD);
169 
170 	/* ip6h_nxt will be fill in later */
171 	hbh->ip6h_len = 0;	/* (8 >> 3) - 1 */
172 
173 	/* XXX: grotty hard coding... */
174 	hbh_buf[2] = IP6OPT_PADN;	/* 2 byte padding */
175 	hbh_buf[3] = 0;
176 	hbh_buf[4] = IP6OPT_RTALERT;
177 	hbh_buf[5] = IP6OPT_RTALERT_LEN - 2;
178 	memcpy(&hbh_buf[6], (void *)&rtalert_code, sizeof(u_int16_t));
179 
180 	ip6_opts.ip6po_hbh = hbh;
181 	/* We will specify the hoplimit by a multicast option. */
182 	ip6_opts.ip6po_hlim = -1;
183 	ip6_opts.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
184 
185 	rw_init(&in6_multilock);
186 }
187 
188 static void
189 mld_starttimer(struct in6_multi *in6m)
190 {
191 	struct timeval now;
192 
193 	KASSERT(rw_write_held(&in6_multilock));
194 	KASSERT(in6m->in6m_timer != IN6M_TIMER_UNDEF);
195 
196 	microtime(&now);
197 	in6m->in6m_timer_expire.tv_sec = now.tv_sec + in6m->in6m_timer / hz;
198 	in6m->in6m_timer_expire.tv_usec = now.tv_usec +
199 	    (in6m->in6m_timer % hz) * (1000000 / hz);
200 	if (in6m->in6m_timer_expire.tv_usec > 1000000) {
201 		in6m->in6m_timer_expire.tv_sec++;
202 		in6m->in6m_timer_expire.tv_usec -= 1000000;
203 	}
204 
205 	/* start or restart the timer */
206 	callout_schedule(&in6m->in6m_timer_ch, in6m->in6m_timer);
207 }
208 
209 /*
210  * mld_stoptimer releases in6_multilock when calling callout_halt.
211  * The caller must ensure in6m won't be freed while releasing the lock.
212  */
213 static void
214 mld_stoptimer(struct in6_multi *in6m)
215 {
216 
217 	KASSERT(rw_write_held(&in6_multilock));
218 
219 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
220 		return;
221 
222 	rw_exit(&in6_multilock);
223 
224 	callout_halt(&in6m->in6m_timer_ch, NULL);
225 
226 	rw_enter(&in6_multilock, RW_WRITER);
227 
228 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
229 }
230 
231 static void
232 mld_timeo(void *arg)
233 {
234 	struct in6_multi *in6m = arg;
235 
236 	KASSERT(in6m->in6m_refcount > 0);
237 
238 	KERNEL_LOCK_UNLESS_NET_MPSAFE();
239 	rw_enter(&in6_multilock, RW_WRITER);
240 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
241 		goto out;
242 
243 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
244 
245 	switch (in6m->in6m_state) {
246 	case MLD_REPORTPENDING:
247 		mld_start_listening(in6m);
248 		break;
249 	default:
250 		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
251 		break;
252 	}
253 
254 out:
255 	rw_exit(&in6_multilock);
256 	KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
257 }
258 
259 static u_long
260 mld_timerresid(struct in6_multi *in6m)
261 {
262 	struct timeval now, diff;
263 
264 	microtime(&now);
265 
266 	if (now.tv_sec > in6m->in6m_timer_expire.tv_sec ||
267 	    (now.tv_sec == in6m->in6m_timer_expire.tv_sec &&
268 	    now.tv_usec > in6m->in6m_timer_expire.tv_usec)) {
269 		return (0);
270 	}
271 	diff = in6m->in6m_timer_expire;
272 	diff.tv_sec -= now.tv_sec;
273 	diff.tv_usec -= now.tv_usec;
274 	if (diff.tv_usec < 0) {
275 		diff.tv_sec--;
276 		diff.tv_usec += 1000000;
277 	}
278 
279 	/* return the remaining time in milliseconds */
280 	return diff.tv_sec * 1000 + diff.tv_usec / 1000;
281 }
282 
283 static void
284 mld_start_listening(struct in6_multi *in6m)
285 {
286 	struct in6_addr all_in6;
287 
288 	KASSERT(rw_write_held(&in6_multilock));
289 
290 	/*
291 	 * RFC2710 page 10:
292 	 * The node never sends a Report or Done for the link-scope all-nodes
293 	 * address.
294 	 * MLD messages are never sent for multicast addresses whose scope is 0
295 	 * (reserved) or 1 (node-local).
296 	 */
297 	all_in6 = in6addr_linklocal_allnodes;
298 	if (in6_setscope(&all_in6, in6m->in6m_ifp, NULL)) {
299 		/* XXX: this should not happen! */
300 		in6m->in6m_timer = 0;
301 		in6m->in6m_state = MLD_OTHERLISTENER;
302 	}
303 	if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
304 	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) {
305 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
306 		in6m->in6m_state = MLD_OTHERLISTENER;
307 	} else {
308 		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
309 		in6m->in6m_timer = cprng_fast32() %
310 		    (MLD_UNSOLICITED_REPORT_INTERVAL * hz);
311 		in6m->in6m_state = MLD_IREPORTEDLAST;
312 
313 		mld_starttimer(in6m);
314 	}
315 }
316 
317 static void
318 mld_stop_listening(struct in6_multi *in6m)
319 {
320 	struct in6_addr allnode, allrouter;
321 
322 	KASSERT(rw_lock_held(&in6_multilock));
323 
324 	allnode = in6addr_linklocal_allnodes;
325 	if (in6_setscope(&allnode, in6m->in6m_ifp, NULL)) {
326 		/* XXX: this should not happen! */
327 		return;
328 	}
329 	allrouter = in6addr_linklocal_allrouters;
330 	if (in6_setscope(&allrouter, in6m->in6m_ifp, NULL)) {
331 		/* XXX impossible */
332 		return;
333 	}
334 
335 	if (in6m->in6m_state == MLD_IREPORTEDLAST &&
336 	    (!IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &allnode)) &&
337 	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) >
338 	    IPV6_ADDR_SCOPE_INTFACELOCAL) {
339 		mld_sendpkt(in6m, MLD_LISTENER_DONE, &allrouter);
340 	}
341 }
342 
343 void
344 mld_input(struct mbuf *m, int off)
345 {
346 	struct ip6_hdr *ip6;
347 	struct mld_hdr *mldh;
348 	struct ifnet *ifp;
349 	struct in6_multi *in6m = NULL;
350 	struct in6_addr mld_addr, all_in6;
351 	u_long timer = 0;	/* timer value in the MLD query header */
352 	struct psref psref;
353 
354 	ifp = m_get_rcvif_psref(m, &psref);
355 	if (__predict_false(ifp == NULL))
356 		goto out;
357 	IP6_EXTHDR_GET(mldh, struct mld_hdr *, m, off, sizeof(*mldh));
358 	if (mldh == NULL) {
359 		ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
360 		goto out_nodrop;
361 	}
362 
363 	ip6 = mtod(m, struct ip6_hdr *);
364 
365 	/* source address validation */
366 	if (!IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) {
367 		/*
368 		 * RFC3590 allows the IPv6 unspecified address as the source
369 		 * address of MLD report and done messages.  However, as this
370 		 * same document says, this special rule is for snooping
371 		 * switches and the RFC requires routers to discard MLD packets
372 		 * with the unspecified source address.  The RFC only talks
373 		 * about hosts receiving an MLD query or report in Security
374 		 * Considerations, but this is probably the correct intention.
375 		 * RFC3590 does not talk about other cases than link-local and
376 		 * the unspecified source addresses, but we believe the same
377 		 * rule should be applied.
378 		 * As a result, we only allow link-local addresses as the
379 		 * source address; otherwise, simply discard the packet.
380 		 */
381 #if 0
382 		/*
383 		 * XXX: do not log in an input path to avoid log flooding,
384 		 * though RFC3590 says "SHOULD log" if the source of a query
385 		 * is the unspecified address.
386 		 */
387 		char ip6bufs[INET6_ADDRSTRLEN];
388 		char ip6bufm[INET6_ADDRSTRLEN];
389 		log(LOG_INFO,
390 		    "mld_input: src %s is not link-local (grp=%s)\n",
391 		    IN6_PRINT(ip6bufs,&ip6->ip6_src),
392 		    IN6_PRINT(ip6bufm, &mldh->mld_addr));
393 #endif
394 		goto out;
395 	}
396 
397 	/*
398 	 * make a copy for local work (in6_setscope() may modify the 1st arg)
399 	 */
400 	mld_addr = mldh->mld_addr;
401 	if (in6_setscope(&mld_addr, ifp, NULL)) {
402 		/* XXX: this should not happen! */
403 		goto out;
404 	}
405 
406 	/*
407 	 * In the MLD specification, there are 3 states and a flag.
408 	 *
409 	 * In Non-Listener state, we simply don't have a membership record.
410 	 * In Delaying Listener state, our timer is running (in6m->in6m_timer)
411 	 * In Idle Listener state, our timer is not running
412 	 * (in6m->in6m_timer==IN6M_TIMER_UNDEF)
413 	 *
414 	 * The flag is in6m->in6m_state, it is set to MLD_OTHERLISTENER if
415 	 * we have heard a report from another member, or MLD_IREPORTEDLAST
416 	 * if we sent the last report.
417 	 */
418 	switch (mldh->mld_type) {
419 	case MLD_LISTENER_QUERY: {
420 		struct in6_multi *next;
421 
422 		if (ifp->if_flags & IFF_LOOPBACK)
423 			break;
424 
425 		if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
426 		    !IN6_IS_ADDR_MULTICAST(&mld_addr))
427 			break;	/* print error or log stat? */
428 
429 		all_in6 = in6addr_linklocal_allnodes;
430 		if (in6_setscope(&all_in6, ifp, NULL)) {
431 			/* XXX: this should not happen! */
432 			break;
433 		}
434 
435 		/*
436 		 * - Start the timers in all of our membership records
437 		 *   that the query applies to for the interface on
438 		 *   which the query arrived excl. those that belong
439 		 *   to the "all-nodes" group (ff02::1).
440 		 * - Restart any timer that is already running but has
441 		 *   a value longer than the requested timeout.
442 		 * - Use the value specified in the query message as
443 		 *   the maximum timeout.
444 		 */
445 		timer = ntohs(mldh->mld_maxdelay);
446 
447 		rw_enter(&in6_multilock, RW_WRITER);
448 		/*
449 		 * mld_stoptimer and mld_sendpkt release in6_multilock
450 		 * temporarily, so we have to prevent in6m from being freed
451 		 * while releasing the lock by having an extra reference to it.
452 		 *
453 		 * Also in6_purge_multi might remove items from the list of the
454 		 * ifp while releasing the lock. Fortunately in6_purge_multi is
455 		 * never executed as long as we have a psref of the ifp.
456 		 */
457 		LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
458 			if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
459 			    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) <
460 			    IPV6_ADDR_SCOPE_LINKLOCAL)
461 				continue;
462 
463 			if (in6m->in6m_state == MLD_REPORTPENDING)
464 				continue; /* we are not yet ready */
465 
466 			if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
467 			    !IN6_ARE_ADDR_EQUAL(&mld_addr, &in6m->in6m_addr))
468 				continue;
469 
470 			if (timer == 0) {
471 				in6m_ref(in6m);
472 
473 				/* send a report immediately */
474 				mld_stoptimer(in6m);
475 				mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
476 				in6m->in6m_state = MLD_IREPORTEDLAST;
477 
478 				in6m_unref(in6m); /* May free in6m */
479 			} else if (in6m->in6m_timer == IN6M_TIMER_UNDEF ||
480 			    mld_timerresid(in6m) > timer) {
481 				in6m->in6m_timer =
482 				   1 + (cprng_fast32() % timer) * hz / 1000;
483 				mld_starttimer(in6m);
484 			}
485 		}
486 		rw_exit(&in6_multilock);
487 		break;
488 	    }
489 
490 	case MLD_LISTENER_REPORT:
491 		/*
492 		 * For fast leave to work, we have to know that we are the
493 		 * last person to send a report for this group.  Reports
494 		 * can potentially get looped back if we are a multicast
495 		 * router, so discard reports sourced by me.
496 		 * Note that it is impossible to check IFF_LOOPBACK flag of
497 		 * ifp for this purpose, since ip6_mloopback pass the physical
498 		 * interface to looutput.
499 		 */
500 		if (m->m_flags & M_LOOP) /* XXX: grotty flag, but efficient */
501 			break;
502 
503 		if (!IN6_IS_ADDR_MULTICAST(&mldh->mld_addr))
504 			break;
505 
506 		/*
507 		 * If we belong to the group being reported, stop
508 		 * our timer for that group.
509 		 */
510 		rw_enter(&in6_multilock, RW_WRITER);
511 		in6m = in6_lookup_multi(&mld_addr, ifp);
512 		if (in6m) {
513 			in6m_ref(in6m);
514 			mld_stoptimer(in6m); /* transit to idle state */
515 			in6m->in6m_state = MLD_OTHERLISTENER; /* clear flag */
516 			in6m_unref(in6m);
517 			in6m = NULL; /* in6m might be freed */
518 		}
519 		rw_exit(&in6_multilock);
520 		break;
521 	default:		/* this is impossible */
522 #if 0
523 		/*
524 		 * this case should be impossible because of filtering in
525 		 * icmp6_input().  But we explicitly disabled this part
526 		 * just in case.
527 		 */
528 		log(LOG_ERR, "mld_input: illegal type(%d)", mldh->mld_type);
529 #endif
530 		break;
531 	}
532 
533 out:
534 	m_freem(m);
535 out_nodrop:
536 	m_put_rcvif_psref(ifp, &psref);
537 }
538 
539 /*
540  * XXX mld_sendpkt must be called with in6_multilock held and
541  * will release in6_multilock before calling ip6_output and
542  * returning to avoid locking against myself in ip6_output.
543  */
544 static void
545 mld_sendpkt(struct in6_multi *in6m, int type, const struct in6_addr *dst)
546 {
547 	struct mbuf *mh;
548 	struct mld_hdr *mldh;
549 	struct ip6_hdr *ip6 = NULL;
550 	struct ip6_moptions im6o;
551 	struct in6_ifaddr *ia = NULL;
552 	struct ifnet *ifp = in6m->in6m_ifp;
553 	int ignflags;
554 	struct psref psref;
555 	int bound;
556 
557 	KASSERT(rw_write_held(&in6_multilock));
558 
559 	/*
560 	 * At first, find a link local address on the outgoing interface
561 	 * to use as the source address of the MLD packet.
562 	 * We do not reject tentative addresses for MLD report to deal with
563 	 * the case where we first join a link-local address.
564 	 */
565 	ignflags = (IN6_IFF_NOTREADY|IN6_IFF_ANYCAST) & ~IN6_IFF_TENTATIVE;
566 	bound = curlwp_bind();
567 	ia = in6ifa_ifpforlinklocal_psref(ifp, ignflags, &psref);
568 	if (ia == NULL) {
569 		curlwp_bindx(bound);
570 		return;
571 	}
572 	if ((ia->ia6_flags & IN6_IFF_TENTATIVE)) {
573 		ia6_release(ia, &psref);
574 		ia = NULL;
575 	}
576 
577 	/* Allocate two mbufs to store IPv6 header and MLD header */
578 	mldh = mld_allocbuf(&mh, in6m, type);
579 	if (mldh == NULL) {
580 		ia6_release(ia, &psref);
581 		curlwp_bindx(bound);
582 		return;
583 	}
584 
585 	/* fill src/dst here */
586 	ip6 = mtod(mh, struct ip6_hdr *);
587 	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
588 	ip6->ip6_dst = dst ? *dst : in6m->in6m_addr;
589 	ia6_release(ia, &psref);
590 	curlwp_bindx(bound);
591 
592 	mldh->mld_addr = in6m->in6m_addr;
593 	in6_clearscope(&mldh->mld_addr); /* XXX */
594 	mldh->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, sizeof(struct ip6_hdr),
595 	    sizeof(struct mld_hdr));
596 
597 	/* construct multicast option */
598 	memset(&im6o, 0, sizeof(im6o));
599 	im6o.im6o_multicast_if_index = if_get_index(ifp);
600 	im6o.im6o_multicast_hlim = 1;
601 
602 	/*
603 	 * Request loopback of the report if we are acting as a multicast
604 	 * router, so that the process-level routing daemon can hear it.
605 	 */
606 	im6o.im6o_multicast_loop = (ip6_mrouter != NULL);
607 
608 	/* increment output statistics */
609 	ICMP6_STATINC(ICMP6_STAT_OUTHIST + type);
610 	icmp6_ifstat_inc(ifp, ifs6_out_msg);
611 	switch (type) {
612 	case MLD_LISTENER_QUERY:
613 		icmp6_ifstat_inc(ifp, ifs6_out_mldquery);
614 		break;
615 	case MLD_LISTENER_REPORT:
616 		icmp6_ifstat_inc(ifp, ifs6_out_mldreport);
617 		break;
618 	case MLD_LISTENER_DONE:
619 		icmp6_ifstat_inc(ifp, ifs6_out_mlddone);
620 		break;
621 	}
622 
623 	/* XXX we cannot call ip6_output with holding in6_multilock */
624 	rw_exit(&in6_multilock);
625 
626 	ip6_output(mh, &ip6_opts, NULL, ia ? 0 : IPV6_UNSPECSRC,
627 	    &im6o, NULL, NULL);
628 
629 	rw_enter(&in6_multilock, RW_WRITER);
630 }
631 
632 static struct mld_hdr *
633 mld_allocbuf(struct mbuf **mh, struct in6_multi *in6m, int type)
634 {
635 	struct mbuf *md;
636 	struct mld_hdr *mldh;
637 	struct ip6_hdr *ip6;
638 
639 	/*
640 	 * Allocate mbufs to store ip6 header and MLD header.
641 	 * We allocate 2 mbufs and make chain in advance because
642 	 * it is more convenient when inserting the hop-by-hop option later.
643 	 */
644 	MGETHDR(*mh, M_DONTWAIT, MT_HEADER);
645 	if (*mh == NULL)
646 		return NULL;
647 	MGET(md, M_DONTWAIT, MT_DATA);
648 	if (md == NULL) {
649 		m_free(*mh);
650 		*mh = NULL;
651 		return NULL;
652 	}
653 	(*mh)->m_next = md;
654 	md->m_next = NULL;
655 
656 	m_reset_rcvif((*mh));
657 	(*mh)->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
658 	(*mh)->m_len = sizeof(struct ip6_hdr);
659 	MH_ALIGN(*mh, sizeof(struct ip6_hdr));
660 
661 	/* fill in the ip6 header */
662 	ip6 = mtod(*mh, struct ip6_hdr *);
663 	memset(ip6, 0, sizeof(*ip6));
664 	ip6->ip6_flow = 0;
665 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
666 	ip6->ip6_vfc |= IPV6_VERSION;
667 	/* ip6_plen will be set later */
668 	ip6->ip6_nxt = IPPROTO_ICMPV6;
669 	/* ip6_hlim will be set by im6o.im6o_multicast_hlim */
670 	/* ip6_src/dst will be set by mld_sendpkt() or mld_sendbuf() */
671 
672 	/* fill in the MLD header as much as possible */
673 	md->m_len = sizeof(struct mld_hdr);
674 	mldh = mtod(md, struct mld_hdr *);
675 	memset(mldh, 0, sizeof(struct mld_hdr));
676 	mldh->mld_type = type;
677 	return mldh;
678 }
679 
680 static void
681 in6m_ref(struct in6_multi *in6m)
682 {
683 
684 	KASSERT(rw_write_held(&in6_multilock));
685 	in6m->in6m_refcount++;
686 }
687 
688 static void
689 in6m_unref(struct in6_multi *in6m)
690 {
691 
692 	KASSERT(rw_write_held(&in6_multilock));
693 	if (--in6m->in6m_refcount == 0)
694 		in6m_destroy(in6m);
695 }
696 
697 /*
698  * Add an address to the list of IP6 multicast addresses for a given interface.
699  */
700 struct	in6_multi *
701 in6_addmulti(struct in6_addr *maddr6, struct ifnet *ifp, int *errorp,
702     int timer)
703 {
704 	struct	sockaddr_in6 sin6;
705 	struct	in6_multi *in6m;
706 
707 	*errorp = 0;
708 
709 	rw_enter(&in6_multilock, RW_WRITER);
710 	/*
711 	 * See if address already in list.
712 	 */
713 	in6m = in6_lookup_multi(maddr6, ifp);
714 	if (in6m != NULL) {
715 		/*
716 		 * Found it; just increment the reference count.
717 		 */
718 		in6m->in6m_refcount++;
719 	} else {
720 		/*
721 		 * New address; allocate a new multicast record
722 		 * and link it into the interface's multicast list.
723 		 */
724 		in6m = malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT|M_ZERO);
725 		if (in6m == NULL) {
726 			*errorp = ENOBUFS;
727 			goto out;
728 		}
729 
730 		in6m->in6m_addr = *maddr6;
731 		in6m->in6m_ifp = ifp;
732 		in6m->in6m_refcount = 1;
733 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
734 		callout_init(&in6m->in6m_timer_ch, CALLOUT_MPSAFE);
735 		callout_setfunc(&in6m->in6m_timer_ch, mld_timeo, in6m);
736 
737 		LIST_INSERT_HEAD(&ifp->if_multiaddrs, in6m, in6m_entry);
738 
739 		/*
740 		 * Ask the network driver to update its multicast reception
741 		 * filter appropriately for the new address.
742 		 */
743 		sockaddr_in6_init(&sin6, maddr6, 0, 0, 0);
744 		*errorp = if_mcast_op(ifp, SIOCADDMULTI, sin6tosa(&sin6));
745 		if (*errorp) {
746 			callout_destroy(&in6m->in6m_timer_ch);
747 			LIST_REMOVE(in6m, in6m_entry);
748 			free(in6m, M_IPMADDR);
749 			in6m = NULL;
750 			goto out;
751 		}
752 
753 		in6m->in6m_timer = timer;
754 		if (in6m->in6m_timer > 0) {
755 			in6m->in6m_state = MLD_REPORTPENDING;
756 			mld_starttimer(in6m);
757 			goto out;
758 		}
759 
760 		/*
761 		 * Let MLD6 know that we have joined a new IP6 multicast
762 		 * group.
763 		 */
764 		mld_start_listening(in6m);
765 	}
766 out:
767 	rw_exit(&in6_multilock);
768 	return in6m;
769 }
770 
771 static void
772 in6m_destroy(struct in6_multi *in6m)
773 {
774 	struct sockaddr_in6 sin6;
775 
776 	KASSERT(rw_write_held(&in6_multilock));
777 	KASSERT(in6m->in6m_refcount == 0);
778 
779 	/*
780 	 * Unlink from list if it's listed.  This must be done before
781 	 * mld_stop_listening because it releases in6_multilock and that allows
782 	 * someone to look up the removing in6m from the list and add a
783 	 * reference to the entry unexpectedly.
784 	 */
785 	if (in6_lookup_multi(&in6m->in6m_addr, in6m->in6m_ifp) != NULL)
786 		LIST_REMOVE(in6m, in6m_entry);
787 
788 	/*
789 	 * No remaining claims to this record; let MLD6 know
790 	 * that we are leaving the multicast group.
791 	 */
792 	mld_stop_listening(in6m);
793 
794 	/*
795 	 * Delete all references of this multicasting group from
796 	 * the membership arrays
797 	 */
798 	in6_purge_mcast_references(in6m);
799 
800 	/*
801 	 * Notify the network driver to update its multicast
802 	 * reception filter.
803 	 */
804 	sockaddr_in6_init(&sin6, &in6m->in6m_addr, 0, 0, 0);
805 	if_mcast_op(in6m->in6m_ifp, SIOCDELMULTI, sin6tosa(&sin6));
806 
807 	/* Tell mld_timeo we're halting the timer */
808 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
809 
810 	rw_exit(&in6_multilock);
811 	callout_halt(&in6m->in6m_timer_ch, NULL);
812 	callout_destroy(&in6m->in6m_timer_ch);
813 
814 	free(in6m, M_IPMADDR);
815 	rw_enter(&in6_multilock, RW_WRITER);
816 }
817 
818 /*
819  * Delete a multicast address record.
820  */
821 void
822 in6_delmulti_locked(struct in6_multi *in6m)
823 {
824 
825 	KASSERT(rw_write_held(&in6_multilock));
826 	KASSERT(in6m->in6m_refcount > 0);
827 
828 	/*
829 	 * The caller should have a reference to in6m. So we don't need to care
830 	 * of releasing the lock in mld_stoptimer.
831 	 */
832 	mld_stoptimer(in6m);
833 	if (--in6m->in6m_refcount == 0)
834 		in6m_destroy(in6m);
835 }
836 
837 void
838 in6_delmulti(struct in6_multi *in6m)
839 {
840 
841 	rw_enter(&in6_multilock, RW_WRITER);
842 	in6_delmulti_locked(in6m);
843 	rw_exit(&in6_multilock);
844 }
845 
846 /*
847  * Look up the in6_multi record for a given IP6 multicast address
848  * on a given interface. If no matching record is found, "in6m"
849  * returns NULL.
850  */
851 struct in6_multi *
852 in6_lookup_multi(const struct in6_addr *addr, const struct ifnet *ifp)
853 {
854 	struct in6_multi *in6m;
855 
856 	KASSERT(rw_lock_held(&in6_multilock));
857 
858 	LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
859 		if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, addr))
860 			break;
861 	}
862 	return in6m;
863 }
864 
865 void
866 in6_lookup_and_delete_multi(const struct in6_addr *addr,
867     const struct ifnet *ifp)
868 {
869 	struct in6_multi *in6m;
870 
871 	rw_enter(&in6_multilock, RW_WRITER);
872 	in6m = in6_lookup_multi(addr, ifp);
873 	if (in6m != NULL)
874 		in6_delmulti_locked(in6m);
875 	rw_exit(&in6_multilock);
876 }
877 
878 bool
879 in6_multi_group(const struct in6_addr *addr, const struct ifnet *ifp)
880 {
881 	bool ingroup;
882 
883 	rw_enter(&in6_multilock, RW_READER);
884 	ingroup = in6_lookup_multi(addr, ifp) != NULL;
885 	rw_exit(&in6_multilock);
886 
887 	return ingroup;
888 }
889 
890 /*
891  * Purge in6_multi records associated to the interface.
892  */
893 void
894 in6_purge_multi(struct ifnet *ifp)
895 {
896 	struct in6_multi *in6m, *next;
897 
898 	rw_enter(&in6_multilock, RW_WRITER);
899 	LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
900 		LIST_REMOVE(in6m, in6m_entry);
901 		/*
902 		 * Normally multicast addresses are already purged at this
903 		 * point. Remaining references aren't accessible via ifp,
904 		 * so what we can do here is to prevent ifp from being
905 		 * accessed via in6m by removing it from the list of ifp.
906 		 */
907 		mld_stoptimer(in6m);
908 	}
909 	rw_exit(&in6_multilock);
910 }
911 
912 void
913 in6_multi_lock(int op)
914 {
915 
916 	rw_enter(&in6_multilock, op);
917 }
918 
919 void
920 in6_multi_unlock(void)
921 {
922 
923 	rw_exit(&in6_multilock);
924 }
925 
926 bool
927 in6_multi_locked(int op)
928 {
929 
930 	switch (op) {
931 	case RW_READER:
932 		return rw_read_held(&in6_multilock);
933 	case RW_WRITER:
934 		return rw_write_held(&in6_multilock);
935 	default:
936 		return rw_lock_held(&in6_multilock);
937 	}
938 }
939 
940 struct in6_multi_mship *
941 in6_joingroup(struct ifnet *ifp, struct in6_addr *addr, int *errorp, int timer)
942 {
943 	struct in6_multi_mship *imm;
944 
945 	imm = malloc(sizeof(*imm), M_IPMADDR, M_NOWAIT|M_ZERO);
946 	if (imm == NULL) {
947 		*errorp = ENOBUFS;
948 		return NULL;
949 	}
950 
951 	imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp, timer);
952 	if (!imm->i6mm_maddr) {
953 		/* *errorp is already set */
954 		free(imm, M_IPMADDR);
955 		return NULL;
956 	}
957 	return imm;
958 }
959 
960 int
961 in6_leavegroup(struct in6_multi_mship *imm)
962 {
963 	struct in6_multi *in6m;
964 
965 	rw_enter(&in6_multilock, RW_WRITER);
966 	in6m = imm->i6mm_maddr;
967 	imm->i6mm_maddr = NULL;
968 	if (in6m != NULL) {
969 		in6_delmulti_locked(in6m);
970 	}
971 	rw_exit(&in6_multilock);
972 	free(imm, M_IPMADDR);
973 	return 0;
974 }
975 
976 /*
977  * DEPRECATED: keep it just to avoid breaking old sysctl users.
978  */
979 static int
980 in6_mkludge_sysctl(SYSCTLFN_ARGS)
981 {
982 
983 	if (namelen != 1)
984 		return EINVAL;
985 	*oldlenp = 0;
986 	return 0;
987 }
988 
989 static int
990 in6_multicast_sysctl(SYSCTLFN_ARGS)
991 {
992 	struct ifnet *ifp;
993 	struct ifaddr *ifa;
994 	struct in6_ifaddr *ia6;
995 	struct in6_multi *in6m;
996 	uint32_t tmp;
997 	int error;
998 	size_t written;
999 	struct psref psref, psref_ia;
1000 	int bound, s;
1001 
1002 	if (namelen != 1)
1003 		return EINVAL;
1004 
1005 	rw_enter(&in6_multilock, RW_READER);
1006 
1007 	bound = curlwp_bind();
1008 	ifp = if_get_byindex(name[0], &psref);
1009 	if (ifp == NULL) {
1010 		curlwp_bindx(bound);
1011 		rw_exit(&in6_multilock);
1012 		return ENODEV;
1013 	}
1014 
1015 	if (oldp == NULL) {
1016 		*oldlenp = 0;
1017 		s = pserialize_read_enter();
1018 		IFADDR_READER_FOREACH(ifa, ifp) {
1019 			LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
1020 				*oldlenp += 2 * sizeof(struct in6_addr) +
1021 				    sizeof(uint32_t);
1022 			}
1023 		}
1024 		pserialize_read_exit(s);
1025 		if_put(ifp, &psref);
1026 		curlwp_bindx(bound);
1027 		rw_exit(&in6_multilock);
1028 		return 0;
1029 	}
1030 
1031 	error = 0;
1032 	written = 0;
1033 	s = pserialize_read_enter();
1034 	IFADDR_READER_FOREACH(ifa, ifp) {
1035 		if (ifa->ifa_addr->sa_family != AF_INET6)
1036 			continue;
1037 
1038 		ifa_acquire(ifa, &psref_ia);
1039 		pserialize_read_exit(s);
1040 
1041 		ia6 = ifatoia6(ifa);
1042 		LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
1043 			if (written + 2 * sizeof(struct in6_addr) +
1044 			    sizeof(uint32_t) > *oldlenp)
1045 				goto done;
1046 			/*
1047 			 * XXX return the first IPv6 address to keep backward
1048 			 * compatibility, however now multicast addresses
1049 			 * don't belong to any IPv6 addresses so it should be
1050 			 * unnecessary.
1051 			 */
1052 			error = sysctl_copyout(l, &ia6->ia_addr.sin6_addr,
1053 			    oldp, sizeof(struct in6_addr));
1054 			if (error)
1055 				goto done;
1056 			oldp = (char *)oldp + sizeof(struct in6_addr);
1057 			written += sizeof(struct in6_addr);
1058 			error = sysctl_copyout(l, &in6m->in6m_addr,
1059 			    oldp, sizeof(struct in6_addr));
1060 			if (error)
1061 				goto done;
1062 			oldp = (char *)oldp + sizeof(struct in6_addr);
1063 			written += sizeof(struct in6_addr);
1064 			tmp = in6m->in6m_refcount;
1065 			error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
1066 			if (error)
1067 				goto done;
1068 			oldp = (char *)oldp + sizeof(tmp);
1069 			written += sizeof(tmp);
1070 		}
1071 
1072 		s = pserialize_read_enter();
1073 
1074 		break;
1075 	}
1076 	pserialize_read_exit(s);
1077 done:
1078 	ifa_release(ifa, &psref_ia);
1079 	if_put(ifp, &psref);
1080 	curlwp_bindx(bound);
1081 	rw_exit(&in6_multilock);
1082 	*oldlenp = written;
1083 	return error;
1084 }
1085 
1086 void
1087 in6_sysctl_multicast_setup(struct sysctllog **clog)
1088 {
1089 
1090 	sysctl_createv(clog, 0, NULL, NULL,
1091 		       CTLFLAG_PERMANENT,
1092 		       CTLTYPE_NODE, "inet6", NULL,
1093 		       NULL, 0, NULL, 0,
1094 		       CTL_NET, PF_INET6, CTL_EOL);
1095 
1096 	sysctl_createv(clog, 0, NULL, NULL,
1097 		       CTLFLAG_PERMANENT,
1098 		       CTLTYPE_NODE, "multicast",
1099 		       SYSCTL_DESCR("Multicast information"),
1100 		       in6_multicast_sysctl, 0, NULL, 0,
1101 		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1102 
1103 	sysctl_createv(clog, 0, NULL, NULL,
1104 		       CTLFLAG_PERMANENT,
1105 		       CTLTYPE_NODE, "multicast_kludge",
1106 		       SYSCTL_DESCR("multicast kludge information"),
1107 		       in6_mkludge_sysctl, 0, NULL, 0,
1108 		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1109 }
1110