xref: /netbsd-src/sys/netinet6/mld6.c (revision 16dce51364ebe8aeafbae46bc5aa167b8115bc45)
1 /*	$NetBSD: mld6.c,v 1.91 2018/02/01 07:49:19 maxv Exp $	*/
2 /*	$KAME: mld6.c,v 1.25 2001/01/16 14:14:18 itojun Exp $	*/
3 
4 /*
5  * Copyright (C) 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1992, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * Stephen Deering of Stanford University.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
65  */
66 
67 /*
68  * Copyright (c) 1988 Stephen Deering.
69  *
70  * This code is derived from software contributed to Berkeley by
71  * Stephen Deering of Stanford University.
72  *
73  * Redistribution and use in source and binary forms, with or without
74  * modification, are permitted provided that the following conditions
75  * are met:
76  * 1. Redistributions of source code must retain the above copyright
77  *    notice, this list of conditions and the following disclaimer.
78  * 2. Redistributions in binary form must reproduce the above copyright
79  *    notice, this list of conditions and the following disclaimer in the
80  *    documentation and/or other materials provided with the distribution.
81  * 3. All advertising materials mentioning features or use of this software
82  *    must display the following acknowledgement:
83  *	This product includes software developed by the University of
84  *	California, Berkeley and its contributors.
85  * 4. Neither the name of the University nor the names of its contributors
86  *    may be used to endorse or promote products derived from this software
87  *    without specific prior written permission.
88  *
89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99  * SUCH DAMAGE.
100  *
101  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
102  */
103 
104 #include <sys/cdefs.h>
105 __KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.91 2018/02/01 07:49:19 maxv Exp $");
106 
107 #ifdef _KERNEL_OPT
108 #include "opt_inet.h"
109 #include "opt_net_mpsafe.h"
110 #endif
111 
112 #include <sys/param.h>
113 #include <sys/systm.h>
114 #include <sys/mbuf.h>
115 #include <sys/socket.h>
116 #include <sys/socketvar.h>
117 #include <sys/syslog.h>
118 #include <sys/sysctl.h>
119 #include <sys/kernel.h>
120 #include <sys/callout.h>
121 #include <sys/cprng.h>
122 #include <sys/rwlock.h>
123 
124 #include <net/if.h>
125 
126 #include <netinet/in.h>
127 #include <netinet/in_var.h>
128 #include <netinet6/in6_var.h>
129 #include <netinet/ip6.h>
130 #include <netinet6/ip6_var.h>
131 #include <netinet6/scope6_var.h>
132 #include <netinet/icmp6.h>
133 #include <netinet6/icmp6_private.h>
134 #include <netinet6/mld6_var.h>
135 
136 #include <net/net_osdep.h>
137 
138 
139 static krwlock_t	in6_multilock __cacheline_aligned;
140 
141 /*
142  * Protocol constants
143  */
144 
145 /*
146  * time between repetitions of a node's initial report of interest in a
147  * multicast address(in seconds)
148  */
149 #define MLD_UNSOLICITED_REPORT_INTERVAL	10
150 
151 static struct ip6_pktopts ip6_opts;
152 
153 static void mld_start_listening(struct in6_multi *);
154 static void mld_stop_listening(struct in6_multi *);
155 
156 static struct mld_hdr *mld_allocbuf(struct mbuf **, struct in6_multi *, int);
157 static void mld_sendpkt(struct in6_multi *, int, const struct in6_addr *);
158 static void mld_starttimer(struct in6_multi *);
159 static void mld_stoptimer(struct in6_multi *);
160 static u_long mld_timerresid(struct in6_multi *);
161 
162 static void in6m_ref(struct in6_multi *);
163 static void in6m_unref(struct in6_multi *);
164 static void in6m_destroy(struct in6_multi *);
165 
166 void
167 mld_init(void)
168 {
169 	static u_int8_t hbh_buf[8];
170 	struct ip6_hbh *hbh = (struct ip6_hbh *)hbh_buf;
171 	u_int16_t rtalert_code = htons((u_int16_t)IP6OPT_RTALERT_MLD);
172 
173 	/* ip6h_nxt will be fill in later */
174 	hbh->ip6h_len = 0;	/* (8 >> 3) - 1 */
175 
176 	/* XXX: grotty hard coding... */
177 	hbh_buf[2] = IP6OPT_PADN;	/* 2 byte padding */
178 	hbh_buf[3] = 0;
179 	hbh_buf[4] = IP6OPT_RTALERT;
180 	hbh_buf[5] = IP6OPT_RTALERT_LEN - 2;
181 	memcpy(&hbh_buf[6], (void *)&rtalert_code, sizeof(u_int16_t));
182 
183 	ip6_opts.ip6po_hbh = hbh;
184 	/* We will specify the hoplimit by a multicast option. */
185 	ip6_opts.ip6po_hlim = -1;
186 	ip6_opts.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
187 
188 	rw_init(&in6_multilock);
189 }
190 
191 static void
192 mld_starttimer(struct in6_multi *in6m)
193 {
194 	struct timeval now;
195 
196 	KASSERT(rw_write_held(&in6_multilock));
197 	KASSERT(in6m->in6m_timer != IN6M_TIMER_UNDEF);
198 
199 	microtime(&now);
200 	in6m->in6m_timer_expire.tv_sec = now.tv_sec + in6m->in6m_timer / hz;
201 	in6m->in6m_timer_expire.tv_usec = now.tv_usec +
202 	    (in6m->in6m_timer % hz) * (1000000 / hz);
203 	if (in6m->in6m_timer_expire.tv_usec > 1000000) {
204 		in6m->in6m_timer_expire.tv_sec++;
205 		in6m->in6m_timer_expire.tv_usec -= 1000000;
206 	}
207 
208 	/* start or restart the timer */
209 	callout_schedule(&in6m->in6m_timer_ch, in6m->in6m_timer);
210 }
211 
212 /*
213  * mld_stoptimer releases in6_multilock when calling callout_halt.
214  * The caller must ensure in6m won't be freed while releasing the lock.
215  */
216 static void
217 mld_stoptimer(struct in6_multi *in6m)
218 {
219 
220 	KASSERT(rw_write_held(&in6_multilock));
221 
222 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
223 		return;
224 
225 	rw_exit(&in6_multilock);
226 
227 	if (mutex_owned(softnet_lock))
228 		callout_halt(&in6m->in6m_timer_ch, softnet_lock);
229 	else
230 		callout_halt(&in6m->in6m_timer_ch, NULL);
231 
232 	rw_enter(&in6_multilock, RW_WRITER);
233 
234 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
235 }
236 
237 static void
238 mld_timeo(void *arg)
239 {
240 	struct in6_multi *in6m = arg;
241 
242 	KASSERT(in6m->in6m_refcount > 0);
243 
244 	SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
245 	rw_enter(&in6_multilock, RW_WRITER);
246 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
247 		goto out;
248 
249 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
250 
251 	switch (in6m->in6m_state) {
252 	case MLD_REPORTPENDING:
253 		mld_start_listening(in6m);
254 		break;
255 	default:
256 		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
257 		break;
258 	}
259 
260 out:
261 	rw_exit(&in6_multilock);
262 	SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
263 }
264 
265 static u_long
266 mld_timerresid(struct in6_multi *in6m)
267 {
268 	struct timeval now, diff;
269 
270 	microtime(&now);
271 
272 	if (now.tv_sec > in6m->in6m_timer_expire.tv_sec ||
273 	    (now.tv_sec == in6m->in6m_timer_expire.tv_sec &&
274 	    now.tv_usec > in6m->in6m_timer_expire.tv_usec)) {
275 		return (0);
276 	}
277 	diff = in6m->in6m_timer_expire;
278 	diff.tv_sec -= now.tv_sec;
279 	diff.tv_usec -= now.tv_usec;
280 	if (diff.tv_usec < 0) {
281 		diff.tv_sec--;
282 		diff.tv_usec += 1000000;
283 	}
284 
285 	/* return the remaining time in milliseconds */
286 	return diff.tv_sec * 1000 + diff.tv_usec / 1000;
287 }
288 
289 static void
290 mld_start_listening(struct in6_multi *in6m)
291 {
292 	struct in6_addr all_in6;
293 
294 	KASSERT(rw_write_held(&in6_multilock));
295 
296 	/*
297 	 * RFC2710 page 10:
298 	 * The node never sends a Report or Done for the link-scope all-nodes
299 	 * address.
300 	 * MLD messages are never sent for multicast addresses whose scope is 0
301 	 * (reserved) or 1 (node-local).
302 	 */
303 	all_in6 = in6addr_linklocal_allnodes;
304 	if (in6_setscope(&all_in6, in6m->in6m_ifp, NULL)) {
305 		/* XXX: this should not happen! */
306 		in6m->in6m_timer = 0;
307 		in6m->in6m_state = MLD_OTHERLISTENER;
308 	}
309 	if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
310 	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) {
311 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
312 		in6m->in6m_state = MLD_OTHERLISTENER;
313 	} else {
314 		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
315 		in6m->in6m_timer = cprng_fast32() %
316 		    (MLD_UNSOLICITED_REPORT_INTERVAL * hz);
317 		in6m->in6m_state = MLD_IREPORTEDLAST;
318 
319 		mld_starttimer(in6m);
320 	}
321 }
322 
323 static void
324 mld_stop_listening(struct in6_multi *in6m)
325 {
326 	struct in6_addr allnode, allrouter;
327 
328 	KASSERT(rw_lock_held(&in6_multilock));
329 
330 	allnode = in6addr_linklocal_allnodes;
331 	if (in6_setscope(&allnode, in6m->in6m_ifp, NULL)) {
332 		/* XXX: this should not happen! */
333 		return;
334 	}
335 	allrouter = in6addr_linklocal_allrouters;
336 	if (in6_setscope(&allrouter, in6m->in6m_ifp, NULL)) {
337 		/* XXX impossible */
338 		return;
339 	}
340 
341 	if (in6m->in6m_state == MLD_IREPORTEDLAST &&
342 	    (!IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &allnode)) &&
343 	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) >
344 	    IPV6_ADDR_SCOPE_INTFACELOCAL) {
345 		mld_sendpkt(in6m, MLD_LISTENER_DONE, &allrouter);
346 	}
347 }
348 
349 void
350 mld_input(struct mbuf *m, int off)
351 {
352 	struct ip6_hdr *ip6;
353 	struct mld_hdr *mldh;
354 	struct ifnet *ifp;
355 	struct in6_multi *in6m = NULL;
356 	struct in6_addr mld_addr, all_in6;
357 	u_long timer = 0;	/* timer value in the MLD query header */
358 	struct psref psref;
359 
360 	ifp = m_get_rcvif_psref(m, &psref);
361 	if (__predict_false(ifp == NULL))
362 		goto out;
363 	IP6_EXTHDR_GET(mldh, struct mld_hdr *, m, off, sizeof(*mldh));
364 	if (mldh == NULL) {
365 		ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
366 		goto out_nodrop;
367 	}
368 
369 	ip6 = mtod(m, struct ip6_hdr *);
370 
371 	/* source address validation */
372 	if (!IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) {
373 		/*
374 		 * RFC3590 allows the IPv6 unspecified address as the source
375 		 * address of MLD report and done messages.  However, as this
376 		 * same document says, this special rule is for snooping
377 		 * switches and the RFC requires routers to discard MLD packets
378 		 * with the unspecified source address.  The RFC only talks
379 		 * about hosts receiving an MLD query or report in Security
380 		 * Considerations, but this is probably the correct intention.
381 		 * RFC3590 does not talk about other cases than link-local and
382 		 * the unspecified source addresses, but we believe the same
383 		 * rule should be applied.
384 		 * As a result, we only allow link-local addresses as the
385 		 * source address; otherwise, simply discard the packet.
386 		 */
387 #if 0
388 		/*
389 		 * XXX: do not log in an input path to avoid log flooding,
390 		 * though RFC3590 says "SHOULD log" if the source of a query
391 		 * is the unspecified address.
392 		 */
393 		char ip6bufs[INET6_ADDRSTRLEN];
394 		char ip6bufm[INET6_ADDRSTRLEN];
395 		log(LOG_INFO,
396 		    "mld_input: src %s is not link-local (grp=%s)\n",
397 		    IN6_PRINT(ip6bufs,&ip6->ip6_src),
398 		    IN6_PRINT(ip6bufm, &mldh->mld_addr));
399 #endif
400 		goto out;
401 	}
402 
403 	/*
404 	 * make a copy for local work (in6_setscope() may modify the 1st arg)
405 	 */
406 	mld_addr = mldh->mld_addr;
407 	if (in6_setscope(&mld_addr, ifp, NULL)) {
408 		/* XXX: this should not happen! */
409 		goto out;
410 	}
411 
412 	/*
413 	 * In the MLD specification, there are 3 states and a flag.
414 	 *
415 	 * In Non-Listener state, we simply don't have a membership record.
416 	 * In Delaying Listener state, our timer is running (in6m->in6m_timer)
417 	 * In Idle Listener state, our timer is not running
418 	 * (in6m->in6m_timer==IN6M_TIMER_UNDEF)
419 	 *
420 	 * The flag is in6m->in6m_state, it is set to MLD_OTHERLISTENER if
421 	 * we have heard a report from another member, or MLD_IREPORTEDLAST
422 	 * if we sent the last report.
423 	 */
424 	switch (mldh->mld_type) {
425 	case MLD_LISTENER_QUERY: {
426 		struct in6_multi *next;
427 
428 		if (ifp->if_flags & IFF_LOOPBACK)
429 			break;
430 
431 		if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
432 		    !IN6_IS_ADDR_MULTICAST(&mld_addr))
433 			break;	/* print error or log stat? */
434 
435 		all_in6 = in6addr_linklocal_allnodes;
436 		if (in6_setscope(&all_in6, ifp, NULL)) {
437 			/* XXX: this should not happen! */
438 			break;
439 		}
440 
441 		/*
442 		 * - Start the timers in all of our membership records
443 		 *   that the query applies to for the interface on
444 		 *   which the query arrived excl. those that belong
445 		 *   to the "all-nodes" group (ff02::1).
446 		 * - Restart any timer that is already running but has
447 		 *   a value longer than the requested timeout.
448 		 * - Use the value specified in the query message as
449 		 *   the maximum timeout.
450 		 */
451 		timer = ntohs(mldh->mld_maxdelay);
452 
453 		rw_enter(&in6_multilock, RW_WRITER);
454 		/*
455 		 * mld_stoptimer and mld_sendpkt release in6_multilock
456 		 * temporarily, so we have to prevent in6m from being freed
457 		 * while releasing the lock by having an extra reference to it.
458 		 *
459 		 * Also in6_purge_multi might remove items from the list of the
460 		 * ifp while releasing the lock. Fortunately in6_purge_multi is
461 		 * never executed as long as we have a psref of the ifp.
462 		 */
463 		LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
464 			if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
465 			    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) <
466 			    IPV6_ADDR_SCOPE_LINKLOCAL)
467 				continue;
468 
469 			if (in6m->in6m_state == MLD_REPORTPENDING)
470 				continue; /* we are not yet ready */
471 
472 			if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
473 			    !IN6_ARE_ADDR_EQUAL(&mld_addr, &in6m->in6m_addr))
474 				continue;
475 
476 			if (timer == 0) {
477 				in6m_ref(in6m);
478 
479 				/* send a report immediately */
480 				mld_stoptimer(in6m);
481 				mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
482 				in6m->in6m_state = MLD_IREPORTEDLAST;
483 
484 				in6m_unref(in6m); /* May free in6m */
485 			} else if (in6m->in6m_timer == IN6M_TIMER_UNDEF ||
486 			    mld_timerresid(in6m) > timer) {
487 				in6m->in6m_timer =
488 				   1 + (cprng_fast32() % timer) * hz / 1000;
489 				mld_starttimer(in6m);
490 			}
491 		}
492 		rw_exit(&in6_multilock);
493 		break;
494 	    }
495 
496 	case MLD_LISTENER_REPORT:
497 		/*
498 		 * For fast leave to work, we have to know that we are the
499 		 * last person to send a report for this group.  Reports
500 		 * can potentially get looped back if we are a multicast
501 		 * router, so discard reports sourced by me.
502 		 * Note that it is impossible to check IFF_LOOPBACK flag of
503 		 * ifp for this purpose, since ip6_mloopback pass the physical
504 		 * interface to looutput.
505 		 */
506 		if (m->m_flags & M_LOOP) /* XXX: grotty flag, but efficient */
507 			break;
508 
509 		if (!IN6_IS_ADDR_MULTICAST(&mldh->mld_addr))
510 			break;
511 
512 		/*
513 		 * If we belong to the group being reported, stop
514 		 * our timer for that group.
515 		 */
516 		rw_enter(&in6_multilock, RW_WRITER);
517 		in6m = in6_lookup_multi(&mld_addr, ifp);
518 		if (in6m) {
519 			in6m_ref(in6m);
520 			mld_stoptimer(in6m); /* transit to idle state */
521 			in6m->in6m_state = MLD_OTHERLISTENER; /* clear flag */
522 			in6m_unref(in6m);
523 			in6m = NULL; /* in6m might be freed */
524 		}
525 		rw_exit(&in6_multilock);
526 		break;
527 	default:		/* this is impossible */
528 #if 0
529 		/*
530 		 * this case should be impossible because of filtering in
531 		 * icmp6_input().  But we explicitly disabled this part
532 		 * just in case.
533 		 */
534 		log(LOG_ERR, "mld_input: illegal type(%d)", mldh->mld_type);
535 #endif
536 		break;
537 	}
538 
539 out:
540 	m_freem(m);
541 out_nodrop:
542 	m_put_rcvif_psref(ifp, &psref);
543 }
544 
545 /*
546  * XXX mld_sendpkt must be called with in6_multilock held and
547  * will release in6_multilock before calling ip6_output and
548  * returning to avoid locking against myself in ip6_output.
549  */
550 static void
551 mld_sendpkt(struct in6_multi *in6m, int type, const struct in6_addr *dst)
552 {
553 	struct mbuf *mh;
554 	struct mld_hdr *mldh;
555 	struct ip6_hdr *ip6 = NULL;
556 	struct ip6_moptions im6o;
557 	struct in6_ifaddr *ia = NULL;
558 	struct ifnet *ifp = in6m->in6m_ifp;
559 	int ignflags;
560 	struct psref psref;
561 	int bound;
562 
563 	KASSERT(rw_write_held(&in6_multilock));
564 
565 	/*
566 	 * At first, find a link local address on the outgoing interface
567 	 * to use as the source address of the MLD packet.
568 	 * We do not reject tentative addresses for MLD report to deal with
569 	 * the case where we first join a link-local address.
570 	 */
571 	ignflags = (IN6_IFF_NOTREADY|IN6_IFF_ANYCAST) & ~IN6_IFF_TENTATIVE;
572 	bound = curlwp_bind();
573 	ia = in6ifa_ifpforlinklocal_psref(ifp, ignflags, &psref);
574 	if (ia == NULL) {
575 		curlwp_bindx(bound);
576 		return;
577 	}
578 	if ((ia->ia6_flags & IN6_IFF_TENTATIVE)) {
579 		ia6_release(ia, &psref);
580 		ia = NULL;
581 	}
582 
583 	/* Allocate two mbufs to store IPv6 header and MLD header */
584 	mldh = mld_allocbuf(&mh, in6m, type);
585 	if (mldh == NULL) {
586 		ia6_release(ia, &psref);
587 		curlwp_bindx(bound);
588 		return;
589 	}
590 
591 	/* fill src/dst here */
592 	ip6 = mtod(mh, struct ip6_hdr *);
593 	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
594 	ip6->ip6_dst = dst ? *dst : in6m->in6m_addr;
595 	ia6_release(ia, &psref);
596 	curlwp_bindx(bound);
597 
598 	mldh->mld_addr = in6m->in6m_addr;
599 	in6_clearscope(&mldh->mld_addr); /* XXX */
600 	mldh->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, sizeof(struct ip6_hdr),
601 	    sizeof(struct mld_hdr));
602 
603 	/* construct multicast option */
604 	memset(&im6o, 0, sizeof(im6o));
605 	im6o.im6o_multicast_if_index = if_get_index(ifp);
606 	im6o.im6o_multicast_hlim = 1;
607 
608 	/*
609 	 * Request loopback of the report if we are acting as a multicast
610 	 * router, so that the process-level routing daemon can hear it.
611 	 */
612 	im6o.im6o_multicast_loop = (ip6_mrouter != NULL);
613 
614 	/* increment output statistics */
615 	ICMP6_STATINC(ICMP6_STAT_OUTHIST + type);
616 	icmp6_ifstat_inc(ifp, ifs6_out_msg);
617 	switch (type) {
618 	case MLD_LISTENER_QUERY:
619 		icmp6_ifstat_inc(ifp, ifs6_out_mldquery);
620 		break;
621 	case MLD_LISTENER_REPORT:
622 		icmp6_ifstat_inc(ifp, ifs6_out_mldreport);
623 		break;
624 	case MLD_LISTENER_DONE:
625 		icmp6_ifstat_inc(ifp, ifs6_out_mlddone);
626 		break;
627 	}
628 
629 	/* XXX we cannot call ip6_output with holding in6_multilock */
630 	rw_exit(&in6_multilock);
631 
632 	ip6_output(mh, &ip6_opts, NULL, ia ? 0 : IPV6_UNSPECSRC,
633 	    &im6o, NULL, NULL);
634 
635 	rw_enter(&in6_multilock, RW_WRITER);
636 }
637 
638 static struct mld_hdr *
639 mld_allocbuf(struct mbuf **mh, struct in6_multi *in6m, int type)
640 {
641 	struct mbuf *md;
642 	struct mld_hdr *mldh;
643 	struct ip6_hdr *ip6;
644 
645 	/*
646 	 * Allocate mbufs to store ip6 header and MLD header.
647 	 * We allocate 2 mbufs and make chain in advance because
648 	 * it is more convenient when inserting the hop-by-hop option later.
649 	 */
650 	MGETHDR(*mh, M_DONTWAIT, MT_HEADER);
651 	if (*mh == NULL)
652 		return NULL;
653 	MGET(md, M_DONTWAIT, MT_DATA);
654 	if (md == NULL) {
655 		m_free(*mh);
656 		*mh = NULL;
657 		return NULL;
658 	}
659 	(*mh)->m_next = md;
660 	md->m_next = NULL;
661 
662 	m_reset_rcvif((*mh));
663 	(*mh)->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
664 	(*mh)->m_len = sizeof(struct ip6_hdr);
665 	MH_ALIGN(*mh, sizeof(struct ip6_hdr));
666 
667 	/* fill in the ip6 header */
668 	ip6 = mtod(*mh, struct ip6_hdr *);
669 	memset(ip6, 0, sizeof(*ip6));
670 	ip6->ip6_flow = 0;
671 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
672 	ip6->ip6_vfc |= IPV6_VERSION;
673 	/* ip6_plen will be set later */
674 	ip6->ip6_nxt = IPPROTO_ICMPV6;
675 	/* ip6_hlim will be set by im6o.im6o_multicast_hlim */
676 	/* ip6_src/dst will be set by mld_sendpkt() or mld_sendbuf() */
677 
678 	/* fill in the MLD header as much as possible */
679 	md->m_len = sizeof(struct mld_hdr);
680 	mldh = mtod(md, struct mld_hdr *);
681 	memset(mldh, 0, sizeof(struct mld_hdr));
682 	mldh->mld_type = type;
683 	return mldh;
684 }
685 
686 static void
687 in6m_ref(struct in6_multi *in6m)
688 {
689 
690 	KASSERT(rw_write_held(&in6_multilock));
691 	in6m->in6m_refcount++;
692 }
693 
694 static void
695 in6m_unref(struct in6_multi *in6m)
696 {
697 
698 	KASSERT(rw_write_held(&in6_multilock));
699 	if (--in6m->in6m_refcount == 0)
700 		in6m_destroy(in6m);
701 }
702 
703 /*
704  * Add an address to the list of IP6 multicast addresses for a given interface.
705  */
706 struct	in6_multi *
707 in6_addmulti(struct in6_addr *maddr6, struct ifnet *ifp, int *errorp,
708     int timer)
709 {
710 	struct	sockaddr_in6 sin6;
711 	struct	in6_multi *in6m;
712 
713 	*errorp = 0;
714 
715 	rw_enter(&in6_multilock, RW_WRITER);
716 	/*
717 	 * See if address already in list.
718 	 */
719 	in6m = in6_lookup_multi(maddr6, ifp);
720 	if (in6m != NULL) {
721 		/*
722 		 * Found it; just increment the reference count.
723 		 */
724 		in6m->in6m_refcount++;
725 	} else {
726 		/*
727 		 * New address; allocate a new multicast record
728 		 * and link it into the interface's multicast list.
729 		 */
730 		in6m = malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT|M_ZERO);
731 		if (in6m == NULL) {
732 			*errorp = ENOBUFS;
733 			goto out;
734 		}
735 
736 		in6m->in6m_addr = *maddr6;
737 		in6m->in6m_ifp = ifp;
738 		in6m->in6m_refcount = 1;
739 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
740 		callout_init(&in6m->in6m_timer_ch, CALLOUT_MPSAFE);
741 		callout_setfunc(&in6m->in6m_timer_ch, mld_timeo, in6m);
742 
743 		LIST_INSERT_HEAD(&ifp->if_multiaddrs, in6m, in6m_entry);
744 
745 		/*
746 		 * Ask the network driver to update its multicast reception
747 		 * filter appropriately for the new address.
748 		 */
749 		sockaddr_in6_init(&sin6, maddr6, 0, 0, 0);
750 		*errorp = if_mcast_op(ifp, SIOCADDMULTI, sin6tosa(&sin6));
751 		if (*errorp) {
752 			callout_destroy(&in6m->in6m_timer_ch);
753 			LIST_REMOVE(in6m, in6m_entry);
754 			free(in6m, M_IPMADDR);
755 			in6m = NULL;
756 			goto out;
757 		}
758 
759 		in6m->in6m_timer = timer;
760 		if (in6m->in6m_timer > 0) {
761 			in6m->in6m_state = MLD_REPORTPENDING;
762 			mld_starttimer(in6m);
763 			goto out;
764 		}
765 
766 		/*
767 		 * Let MLD6 know that we have joined a new IP6 multicast
768 		 * group.
769 		 */
770 		mld_start_listening(in6m);
771 	}
772 out:
773 	rw_exit(&in6_multilock);
774 	return in6m;
775 }
776 
777 static void
778 in6m_destroy(struct in6_multi *in6m)
779 {
780 	struct sockaddr_in6 sin6;
781 
782 	KASSERT(rw_write_held(&in6_multilock));
783 	KASSERT(in6m->in6m_refcount == 0);
784 
785 	/*
786 	 * No remaining claims to this record; let MLD6 know
787 	 * that we are leaving the multicast group.
788 	 */
789 	mld_stop_listening(in6m);
790 
791 	/*
792 	 * Unlink from list.
793 	 */
794 	LIST_REMOVE(in6m, in6m_entry);
795 
796 	/*
797 	 * Delete all references of this multicasting group from
798 	 * the membership arrays
799 	 */
800 	in6_purge_mcast_references(in6m);
801 
802 	/*
803 	 * Notify the network driver to update its multicast
804 	 * reception filter.
805 	 */
806 	sockaddr_in6_init(&sin6, &in6m->in6m_addr, 0, 0, 0);
807 	if_mcast_op(in6m->in6m_ifp, SIOCDELMULTI, sin6tosa(&sin6));
808 
809 	/* Tell mld_timeo we're halting the timer */
810 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
811 	if (mutex_owned(softnet_lock))
812 		callout_halt(&in6m->in6m_timer_ch, softnet_lock);
813 	else
814 		callout_halt(&in6m->in6m_timer_ch, NULL);
815 	callout_destroy(&in6m->in6m_timer_ch);
816 
817 	free(in6m, M_IPMADDR);
818 }
819 
820 /*
821  * Delete a multicast address record.
822  */
823 void
824 in6_delmulti(struct in6_multi *in6m)
825 {
826 
827 	KASSERT(in6m->in6m_refcount > 0);
828 
829 	rw_enter(&in6_multilock, RW_WRITER);
830 	/*
831 	 * The caller should have a reference to in6m. So we don't need to care
832 	 * of releasing the lock in mld_stoptimer.
833 	 */
834 	mld_stoptimer(in6m);
835 	if (--in6m->in6m_refcount == 0)
836 		in6m_destroy(in6m);
837 	rw_exit(&in6_multilock);
838 }
839 
840 /*
841  * Look up the in6_multi record for a given IP6 multicast address
842  * on a given interface. If no matching record is found, "in6m"
843  * returns NULL.
844  */
845 struct in6_multi *
846 in6_lookup_multi(const struct in6_addr *addr, const struct ifnet *ifp)
847 {
848 	struct in6_multi *in6m;
849 
850 	KASSERT(rw_lock_held(&in6_multilock));
851 
852 	LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
853 		if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, addr))
854 			break;
855 	}
856 	return in6m;
857 }
858 
859 bool
860 in6_multi_group(const struct in6_addr *addr, const struct ifnet *ifp)
861 {
862 	bool ingroup;
863 
864 	rw_enter(&in6_multilock, RW_READER);
865 	ingroup = in6_lookup_multi(addr, ifp) != NULL;
866 	rw_exit(&in6_multilock);
867 
868 	return ingroup;
869 }
870 
871 /*
872  * Purge in6_multi records associated to the interface.
873  */
874 void
875 in6_purge_multi(struct ifnet *ifp)
876 {
877 	struct in6_multi *in6m, *next;
878 
879 	rw_enter(&in6_multilock, RW_WRITER);
880 	LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
881 		/*
882 		 * Normally multicast addresses are already purged at this
883 		 * point. Remaining references aren't accessible via ifp,
884 		 * so what we can do here is to prevent ifp from being
885 		 * accessed via in6m by removing it from the list of ifp.
886 		 */
887 		mld_stoptimer(in6m);
888 		LIST_REMOVE(in6m, in6m_entry);
889 	}
890 	rw_exit(&in6_multilock);
891 }
892 
893 void
894 in6_multi_lock(int op)
895 {
896 
897 	rw_enter(&in6_multilock, op);
898 }
899 
900 void
901 in6_multi_unlock(void)
902 {
903 
904 	rw_exit(&in6_multilock);
905 }
906 
907 bool
908 in6_multi_locked(int op)
909 {
910 
911 	switch (op) {
912 	case RW_READER:
913 		return rw_read_held(&in6_multilock);
914 	case RW_WRITER:
915 		return rw_write_held(&in6_multilock);
916 	default:
917 		return rw_lock_held(&in6_multilock);
918 	}
919 }
920 
921 struct in6_multi_mship *
922 in6_joingroup(struct ifnet *ifp, struct in6_addr *addr, int *errorp, int timer)
923 {
924 	struct in6_multi_mship *imm;
925 
926 	imm = malloc(sizeof(*imm), M_IPMADDR, M_NOWAIT|M_ZERO);
927 	if (imm == NULL) {
928 		*errorp = ENOBUFS;
929 		return NULL;
930 	}
931 
932 	imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp, timer);
933 	if (!imm->i6mm_maddr) {
934 		/* *errorp is already set */
935 		free(imm, M_IPMADDR);
936 		return NULL;
937 	}
938 	return imm;
939 }
940 
941 int
942 in6_leavegroup(struct in6_multi_mship *imm)
943 {
944 	struct in6_multi *in6m;
945 
946 	rw_enter(&in6_multilock, RW_READER);
947 	in6m = imm->i6mm_maddr;
948 	rw_exit(&in6_multilock);
949 	if (in6m != NULL) {
950 		in6_delmulti(in6m);
951 	}
952 	free(imm, M_IPMADDR);
953 	return 0;
954 }
955 
956 /*
957  * DEPRECATED: keep it just to avoid breaking old sysctl users.
958  */
959 static int
960 in6_mkludge_sysctl(SYSCTLFN_ARGS)
961 {
962 
963 	if (namelen != 1)
964 		return EINVAL;
965 	*oldlenp = 0;
966 	return 0;
967 }
968 
969 static int
970 in6_multicast_sysctl(SYSCTLFN_ARGS)
971 {
972 	struct ifnet *ifp;
973 	struct ifaddr *ifa;
974 	struct in6_ifaddr *ia6;
975 	struct in6_multi *in6m;
976 	uint32_t tmp;
977 	int error;
978 	size_t written;
979 	struct psref psref, psref_ia;
980 	int bound, s;
981 
982 	if (namelen != 1)
983 		return EINVAL;
984 
985 	rw_enter(&in6_multilock, RW_READER);
986 
987 	bound = curlwp_bind();
988 	ifp = if_get_byindex(name[0], &psref);
989 	if (ifp == NULL) {
990 		curlwp_bindx(bound);
991 		rw_exit(&in6_multilock);
992 		return ENODEV;
993 	}
994 
995 	if (oldp == NULL) {
996 		*oldlenp = 0;
997 		s = pserialize_read_enter();
998 		IFADDR_READER_FOREACH(ifa, ifp) {
999 			LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
1000 				*oldlenp += 2 * sizeof(struct in6_addr) +
1001 				    sizeof(uint32_t);
1002 			}
1003 		}
1004 		pserialize_read_exit(s);
1005 		if_put(ifp, &psref);
1006 		curlwp_bindx(bound);
1007 		rw_exit(&in6_multilock);
1008 		return 0;
1009 	}
1010 
1011 	error = 0;
1012 	written = 0;
1013 	s = pserialize_read_enter();
1014 	IFADDR_READER_FOREACH(ifa, ifp) {
1015 		if (ifa->ifa_addr->sa_family != AF_INET6)
1016 			continue;
1017 
1018 		ifa_acquire(ifa, &psref_ia);
1019 		pserialize_read_exit(s);
1020 
1021 		ia6 = ifatoia6(ifa);
1022 		LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
1023 			if (written + 2 * sizeof(struct in6_addr) +
1024 			    sizeof(uint32_t) > *oldlenp)
1025 				goto done;
1026 			/*
1027 			 * XXX return the first IPv6 address to keep backward
1028 			 * compatibility, however now multicast addresses
1029 			 * don't belong to any IPv6 addresses so it should be
1030 			 * unnecessary.
1031 			 */
1032 			error = sysctl_copyout(l, &ia6->ia_addr.sin6_addr,
1033 			    oldp, sizeof(struct in6_addr));
1034 			if (error)
1035 				goto done;
1036 			oldp = (char *)oldp + sizeof(struct in6_addr);
1037 			written += sizeof(struct in6_addr);
1038 			error = sysctl_copyout(l, &in6m->in6m_addr,
1039 			    oldp, sizeof(struct in6_addr));
1040 			if (error)
1041 				goto done;
1042 			oldp = (char *)oldp + sizeof(struct in6_addr);
1043 			written += sizeof(struct in6_addr);
1044 			tmp = in6m->in6m_refcount;
1045 			error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
1046 			if (error)
1047 				goto done;
1048 			oldp = (char *)oldp + sizeof(tmp);
1049 			written += sizeof(tmp);
1050 		}
1051 
1052 		s = pserialize_read_enter();
1053 
1054 		break;
1055 	}
1056 	pserialize_read_exit(s);
1057 done:
1058 	ifa_release(ifa, &psref_ia);
1059 	if_put(ifp, &psref);
1060 	curlwp_bindx(bound);
1061 	rw_exit(&in6_multilock);
1062 	*oldlenp = written;
1063 	return error;
1064 }
1065 
1066 void
1067 in6_sysctl_multicast_setup(struct sysctllog **clog)
1068 {
1069 
1070 	sysctl_createv(clog, 0, NULL, NULL,
1071 		       CTLFLAG_PERMANENT,
1072 		       CTLTYPE_NODE, "inet6", NULL,
1073 		       NULL, 0, NULL, 0,
1074 		       CTL_NET, PF_INET6, CTL_EOL);
1075 
1076 	sysctl_createv(clog, 0, NULL, NULL,
1077 		       CTLFLAG_PERMANENT,
1078 		       CTLTYPE_NODE, "multicast",
1079 		       SYSCTL_DESCR("Multicast information"),
1080 		       in6_multicast_sysctl, 0, NULL, 0,
1081 		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1082 
1083 	sysctl_createv(clog, 0, NULL, NULL,
1084 		       CTLFLAG_PERMANENT,
1085 		       CTLTYPE_NODE, "multicast_kludge",
1086 		       SYSCTL_DESCR("multicast kludge information"),
1087 		       in6_mkludge_sysctl, 0, NULL, 0,
1088 		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1089 }
1090