xref: /netbsd-src/sys/netinet6/mld6.c (revision 796c32c94f6e154afc9de0f63da35c91bb739b45)
1 /*	$NetBSD: mld6.c,v 1.90 2017/11/17 07:37:12 ozaki-r Exp $	*/
2 /*	$KAME: mld6.c,v 1.25 2001/01/16 14:14:18 itojun Exp $	*/
3 
4 /*
5  * Copyright (C) 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1992, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * Stephen Deering of Stanford University.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
65  */
66 
67 /*
68  * Copyright (c) 1988 Stephen Deering.
69  *
70  * This code is derived from software contributed to Berkeley by
71  * Stephen Deering of Stanford University.
72  *
73  * Redistribution and use in source and binary forms, with or without
74  * modification, are permitted provided that the following conditions
75  * are met:
76  * 1. Redistributions of source code must retain the above copyright
77  *    notice, this list of conditions and the following disclaimer.
78  * 2. Redistributions in binary form must reproduce the above copyright
79  *    notice, this list of conditions and the following disclaimer in the
80  *    documentation and/or other materials provided with the distribution.
81  * 3. All advertising materials mentioning features or use of this software
82  *    must display the following acknowledgement:
83  *	This product includes software developed by the University of
84  *	California, Berkeley and its contributors.
85  * 4. Neither the name of the University nor the names of its contributors
86  *    may be used to endorse or promote products derived from this software
87  *    without specific prior written permission.
88  *
89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99  * SUCH DAMAGE.
100  *
101  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
102  */
103 
104 #include <sys/cdefs.h>
105 __KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.90 2017/11/17 07:37:12 ozaki-r Exp $");
106 
107 #ifdef _KERNEL_OPT
108 #include "opt_inet.h"
109 #include "opt_net_mpsafe.h"
110 #endif
111 
112 #include <sys/param.h>
113 #include <sys/systm.h>
114 #include <sys/mbuf.h>
115 #include <sys/socket.h>
116 #include <sys/socketvar.h>
117 #include <sys/syslog.h>
118 #include <sys/sysctl.h>
119 #include <sys/kernel.h>
120 #include <sys/callout.h>
121 #include <sys/cprng.h>
122 #include <sys/rwlock.h>
123 
124 #include <net/if.h>
125 
126 #include <netinet/in.h>
127 #include <netinet/in_var.h>
128 #include <netinet6/in6_var.h>
129 #include <netinet/ip6.h>
130 #include <netinet6/ip6_var.h>
131 #include <netinet6/scope6_var.h>
132 #include <netinet/icmp6.h>
133 #include <netinet6/icmp6_private.h>
134 #include <netinet6/mld6_var.h>
135 
136 #include <net/net_osdep.h>
137 
138 
139 static krwlock_t	in6_multilock __cacheline_aligned;
140 
141 /*
142  * Protocol constants
143  */
144 
145 /*
146  * time between repetitions of a node's initial report of interest in a
147  * multicast address(in seconds)
148  */
149 #define MLD_UNSOLICITED_REPORT_INTERVAL	10
150 
151 static struct ip6_pktopts ip6_opts;
152 
153 static void mld_start_listening(struct in6_multi *);
154 static void mld_stop_listening(struct in6_multi *);
155 
156 static struct mld_hdr * mld_allocbuf(struct mbuf **, int, struct in6_multi *,
157 	int);
158 static void mld_sendpkt(struct in6_multi *, int, const struct in6_addr *);
159 static void mld_starttimer(struct in6_multi *);
160 static void mld_stoptimer(struct in6_multi *);
161 static u_long mld_timerresid(struct in6_multi *);
162 
163 static void in6m_ref(struct in6_multi *);
164 static void in6m_unref(struct in6_multi *);
165 static void in6m_destroy(struct in6_multi *);
166 
167 void
168 mld_init(void)
169 {
170 	static u_int8_t hbh_buf[8];
171 	struct ip6_hbh *hbh = (struct ip6_hbh *)hbh_buf;
172 	u_int16_t rtalert_code = htons((u_int16_t)IP6OPT_RTALERT_MLD);
173 
174 	/* ip6h_nxt will be fill in later */
175 	hbh->ip6h_len = 0;	/* (8 >> 3) - 1 */
176 
177 	/* XXX: grotty hard coding... */
178 	hbh_buf[2] = IP6OPT_PADN;	/* 2 byte padding */
179 	hbh_buf[3] = 0;
180 	hbh_buf[4] = IP6OPT_RTALERT;
181 	hbh_buf[5] = IP6OPT_RTALERT_LEN - 2;
182 	memcpy(&hbh_buf[6], (void *)&rtalert_code, sizeof(u_int16_t));
183 
184 	ip6_opts.ip6po_hbh = hbh;
185 	/* We will specify the hoplimit by a multicast option. */
186 	ip6_opts.ip6po_hlim = -1;
187 	ip6_opts.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
188 
189 	rw_init(&in6_multilock);
190 }
191 
192 static void
193 mld_starttimer(struct in6_multi *in6m)
194 {
195 	struct timeval now;
196 
197 	KASSERT(rw_write_held(&in6_multilock));
198 	KASSERT(in6m->in6m_timer != IN6M_TIMER_UNDEF);
199 
200 	microtime(&now);
201 	in6m->in6m_timer_expire.tv_sec = now.tv_sec + in6m->in6m_timer / hz;
202 	in6m->in6m_timer_expire.tv_usec = now.tv_usec +
203 	    (in6m->in6m_timer % hz) * (1000000 / hz);
204 	if (in6m->in6m_timer_expire.tv_usec > 1000000) {
205 		in6m->in6m_timer_expire.tv_sec++;
206 		in6m->in6m_timer_expire.tv_usec -= 1000000;
207 	}
208 
209 	/* start or restart the timer */
210 	callout_schedule(&in6m->in6m_timer_ch, in6m->in6m_timer);
211 }
212 
213 /*
214  * mld_stoptimer releases in6_multilock when calling callout_halt.
215  * The caller must ensure in6m won't be freed while releasing the lock.
216  */
217 static void
218 mld_stoptimer(struct in6_multi *in6m)
219 {
220 
221 	KASSERT(rw_write_held(&in6_multilock));
222 
223 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
224 		return;
225 
226 	rw_exit(&in6_multilock);
227 
228 	if (mutex_owned(softnet_lock))
229 		callout_halt(&in6m->in6m_timer_ch, softnet_lock);
230 	else
231 		callout_halt(&in6m->in6m_timer_ch, NULL);
232 
233 	rw_enter(&in6_multilock, RW_WRITER);
234 
235 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
236 }
237 
238 static void
239 mld_timeo(void *arg)
240 {
241 	struct in6_multi *in6m = arg;
242 
243 	KASSERT(in6m->in6m_refcount > 0);
244 
245 	SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
246 	rw_enter(&in6_multilock, RW_WRITER);
247 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
248 		goto out;
249 
250 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
251 
252 	switch (in6m->in6m_state) {
253 	case MLD_REPORTPENDING:
254 		mld_start_listening(in6m);
255 		break;
256 	default:
257 		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
258 		break;
259 	}
260 
261 out:
262 	rw_exit(&in6_multilock);
263 	SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
264 }
265 
266 static u_long
267 mld_timerresid(struct in6_multi *in6m)
268 {
269 	struct timeval now, diff;
270 
271 	microtime(&now);
272 
273 	if (now.tv_sec > in6m->in6m_timer_expire.tv_sec ||
274 	    (now.tv_sec == in6m->in6m_timer_expire.tv_sec &&
275 	    now.tv_usec > in6m->in6m_timer_expire.tv_usec)) {
276 		return (0);
277 	}
278 	diff = in6m->in6m_timer_expire;
279 	diff.tv_sec -= now.tv_sec;
280 	diff.tv_usec -= now.tv_usec;
281 	if (diff.tv_usec < 0) {
282 		diff.tv_sec--;
283 		diff.tv_usec += 1000000;
284 	}
285 
286 	/* return the remaining time in milliseconds */
287 	return diff.tv_sec * 1000 + diff.tv_usec / 1000;
288 }
289 
290 static void
291 mld_start_listening(struct in6_multi *in6m)
292 {
293 	struct in6_addr all_in6;
294 
295 	KASSERT(rw_write_held(&in6_multilock));
296 
297 	/*
298 	 * RFC2710 page 10:
299 	 * The node never sends a Report or Done for the link-scope all-nodes
300 	 * address.
301 	 * MLD messages are never sent for multicast addresses whose scope is 0
302 	 * (reserved) or 1 (node-local).
303 	 */
304 	all_in6 = in6addr_linklocal_allnodes;
305 	if (in6_setscope(&all_in6, in6m->in6m_ifp, NULL)) {
306 		/* XXX: this should not happen! */
307 		in6m->in6m_timer = 0;
308 		in6m->in6m_state = MLD_OTHERLISTENER;
309 	}
310 	if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
311 	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) {
312 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
313 		in6m->in6m_state = MLD_OTHERLISTENER;
314 	} else {
315 		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
316 		in6m->in6m_timer = cprng_fast32() %
317 		    (MLD_UNSOLICITED_REPORT_INTERVAL * hz);
318 		in6m->in6m_state = MLD_IREPORTEDLAST;
319 
320 		mld_starttimer(in6m);
321 	}
322 }
323 
324 static void
325 mld_stop_listening(struct in6_multi *in6m)
326 {
327 	struct in6_addr allnode, allrouter;
328 
329 	KASSERT(rw_lock_held(&in6_multilock));
330 
331 	allnode = in6addr_linklocal_allnodes;
332 	if (in6_setscope(&allnode, in6m->in6m_ifp, NULL)) {
333 		/* XXX: this should not happen! */
334 		return;
335 	}
336 	allrouter = in6addr_linklocal_allrouters;
337 	if (in6_setscope(&allrouter, in6m->in6m_ifp, NULL)) {
338 		/* XXX impossible */
339 		return;
340 	}
341 
342 	if (in6m->in6m_state == MLD_IREPORTEDLAST &&
343 	    (!IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &allnode)) &&
344 	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) >
345 	    IPV6_ADDR_SCOPE_INTFACELOCAL) {
346 		mld_sendpkt(in6m, MLD_LISTENER_DONE, &allrouter);
347 	}
348 }
349 
350 void
351 mld_input(struct mbuf *m, int off)
352 {
353 	struct ip6_hdr *ip6;
354 	struct mld_hdr *mldh;
355 	struct ifnet *ifp;
356 	struct in6_multi *in6m = NULL;
357 	struct in6_addr mld_addr, all_in6;
358 	u_long timer = 0;	/* timer value in the MLD query header */
359 	struct psref psref;
360 
361 	ifp = m_get_rcvif_psref(m, &psref);
362 	if (__predict_false(ifp == NULL))
363 		goto out;
364 	IP6_EXTHDR_GET(mldh, struct mld_hdr *, m, off, sizeof(*mldh));
365 	if (mldh == NULL) {
366 		ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
367 		goto out_nodrop;
368 	}
369 
370 	/* source address validation */
371 	ip6 = mtod(m, struct ip6_hdr *);/* in case mpullup */
372 	if (!IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) {
373 		/*
374 		 * RFC3590 allows the IPv6 unspecified address as the source
375 		 * address of MLD report and done messages.  However, as this
376 		 * same document says, this special rule is for snooping
377 		 * switches and the RFC requires routers to discard MLD packets
378 		 * with the unspecified source address.  The RFC only talks
379 		 * about hosts receiving an MLD query or report in Security
380 		 * Considerations, but this is probably the correct intention.
381 		 * RFC3590 does not talk about other cases than link-local and
382 		 * the unspecified source addresses, but we believe the same
383 		 * rule should be applied.
384 		 * As a result, we only allow link-local addresses as the
385 		 * source address; otherwise, simply discard the packet.
386 		 */
387 #if 0
388 		/*
389 		 * XXX: do not log in an input path to avoid log flooding,
390 		 * though RFC3590 says "SHOULD log" if the source of a query
391 		 * is the unspecified address.
392 		 */
393 		char ip6bufs[INET6_ADDRSTRLEN];
394 		char ip6bufm[INET6_ADDRSTRLEN];
395 		log(LOG_INFO,
396 		    "mld_input: src %s is not link-local (grp=%s)\n",
397 		    IN6_PRINT(ip6bufs,&ip6->ip6_src),
398 		    IN6_PRINT(ip6bufm, &mldh->mld_addr));
399 #endif
400 		goto out;
401 	}
402 
403 	/*
404 	 * make a copy for local work (in6_setscope() may modify the 1st arg)
405 	 */
406 	mld_addr = mldh->mld_addr;
407 	if (in6_setscope(&mld_addr, ifp, NULL)) {
408 		/* XXX: this should not happen! */
409 		goto out;
410 	}
411 
412 	/*
413 	 * In the MLD specification, there are 3 states and a flag.
414 	 *
415 	 * In Non-Listener state, we simply don't have a membership record.
416 	 * In Delaying Listener state, our timer is running (in6m->in6m_timer)
417 	 * In Idle Listener state, our timer is not running
418 	 * (in6m->in6m_timer==IN6M_TIMER_UNDEF)
419 	 *
420 	 * The flag is in6m->in6m_state, it is set to MLD_OTHERLISTENER if
421 	 * we have heard a report from another member, or MLD_IREPORTEDLAST
422 	 * if we sent the last report.
423 	 */
424 	switch (mldh->mld_type) {
425 	case MLD_LISTENER_QUERY: {
426 		struct in6_multi *next;
427 
428 		if (ifp->if_flags & IFF_LOOPBACK)
429 			break;
430 
431 		if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
432 		    !IN6_IS_ADDR_MULTICAST(&mld_addr))
433 			break;	/* print error or log stat? */
434 
435 		all_in6 = in6addr_linklocal_allnodes;
436 		if (in6_setscope(&all_in6, ifp, NULL)) {
437 			/* XXX: this should not happen! */
438 			break;
439 		}
440 
441 		/*
442 		 * - Start the timers in all of our membership records
443 		 *   that the query applies to for the interface on
444 		 *   which the query arrived excl. those that belong
445 		 *   to the "all-nodes" group (ff02::1).
446 		 * - Restart any timer that is already running but has
447 		 *   a value longer than the requested timeout.
448 		 * - Use the value specified in the query message as
449 		 *   the maximum timeout.
450 		 */
451 		timer = ntohs(mldh->mld_maxdelay);
452 
453 		rw_enter(&in6_multilock, RW_WRITER);
454 		/*
455 		 * mld_stoptimer and mld_sendpkt release in6_multilock
456 		 * temporarily, so we have to prevent in6m from being freed
457 		 * while releasing the lock by having an extra reference to it.
458 		 *
459 		 * Also in6_purge_multi might remove items from the list of the
460 		 * ifp while releasing the lock. Fortunately in6_purge_multi is
461 		 * never executed as long as we have a psref of the ifp.
462 		 */
463 		LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
464 			if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
465 			    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) <
466 			    IPV6_ADDR_SCOPE_LINKLOCAL)
467 				continue;
468 
469 			if (in6m->in6m_state == MLD_REPORTPENDING)
470 				continue; /* we are not yet ready */
471 
472 			if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
473 			    !IN6_ARE_ADDR_EQUAL(&mld_addr, &in6m->in6m_addr))
474 				continue;
475 
476 			if (timer == 0) {
477 				in6m_ref(in6m);
478 
479 				/* send a report immediately */
480 				mld_stoptimer(in6m);
481 				mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
482 				in6m->in6m_state = MLD_IREPORTEDLAST;
483 
484 				in6m_unref(in6m); /* May free in6m */
485 			} else if (in6m->in6m_timer == IN6M_TIMER_UNDEF ||
486 			    mld_timerresid(in6m) > timer) {
487 				in6m->in6m_timer =
488 				   1 + (cprng_fast32() % timer) * hz / 1000;
489 				mld_starttimer(in6m);
490 			}
491 		}
492 		rw_exit(&in6_multilock);
493 		break;
494 	    }
495 
496 	case MLD_LISTENER_REPORT:
497 		/*
498 		 * For fast leave to work, we have to know that we are the
499 		 * last person to send a report for this group.  Reports
500 		 * can potentially get looped back if we are a multicast
501 		 * router, so discard reports sourced by me.
502 		 * Note that it is impossible to check IFF_LOOPBACK flag of
503 		 * ifp for this purpose, since ip6_mloopback pass the physical
504 		 * interface to looutput.
505 		 */
506 		if (m->m_flags & M_LOOP) /* XXX: grotty flag, but efficient */
507 			break;
508 
509 		if (!IN6_IS_ADDR_MULTICAST(&mldh->mld_addr))
510 			break;
511 
512 		/*
513 		 * If we belong to the group being reported, stop
514 		 * our timer for that group.
515 		 */
516 		rw_enter(&in6_multilock, RW_WRITER);
517 		in6m = in6_lookup_multi(&mld_addr, ifp);
518 		if (in6m) {
519 			in6m_ref(in6m);
520 			mld_stoptimer(in6m); /* transit to idle state */
521 			in6m->in6m_state = MLD_OTHERLISTENER; /* clear flag */
522 			in6m_unref(in6m);
523 			in6m = NULL; /* in6m might be freed */
524 		}
525 		rw_exit(&in6_multilock);
526 		break;
527 	default:		/* this is impossible */
528 #if 0
529 		/*
530 		 * this case should be impossible because of filtering in
531 		 * icmp6_input().  But we explicitly disabled this part
532 		 * just in case.
533 		 */
534 		log(LOG_ERR, "mld_input: illegal type(%d)", mldh->mld_type);
535 #endif
536 		break;
537 	}
538 
539 out:
540 	m_freem(m);
541 out_nodrop:
542 	m_put_rcvif_psref(ifp, &psref);
543 }
544 
545 /*
546  * XXX mld_sendpkt must be called with in6_multilock held and
547  * will release in6_multilock before calling ip6_output and
548  * returning to avoid locking against myself in ip6_output.
549  */
550 static void
551 mld_sendpkt(struct in6_multi *in6m, int type,
552 	const struct in6_addr *dst)
553 {
554 	struct mbuf *mh;
555 	struct mld_hdr *mldh;
556 	struct ip6_hdr *ip6 = NULL;
557 	struct ip6_moptions im6o;
558 	struct in6_ifaddr *ia = NULL;
559 	struct ifnet *ifp = in6m->in6m_ifp;
560 	int ignflags;
561 	struct psref psref;
562 	int bound;
563 
564 	KASSERT(rw_write_held(&in6_multilock));
565 
566 	/*
567 	 * At first, find a link local address on the outgoing interface
568 	 * to use as the source address of the MLD packet.
569 	 * We do not reject tentative addresses for MLD report to deal with
570 	 * the case where we first join a link-local address.
571 	 */
572 	ignflags = (IN6_IFF_NOTREADY|IN6_IFF_ANYCAST) & ~IN6_IFF_TENTATIVE;
573 	bound = curlwp_bind();
574 	ia = in6ifa_ifpforlinklocal_psref(ifp, ignflags, &psref);
575 	if (ia == NULL) {
576 		curlwp_bindx(bound);
577 		return;
578 	}
579 	if ((ia->ia6_flags & IN6_IFF_TENTATIVE)) {
580 		ia6_release(ia, &psref);
581 		ia = NULL;
582 	}
583 
584 	/* Allocate two mbufs to store IPv6 header and MLD header */
585 	mldh = mld_allocbuf(&mh, sizeof(struct mld_hdr), in6m, type);
586 	if (mldh == NULL) {
587 		ia6_release(ia, &psref);
588 		curlwp_bindx(bound);
589 		return;
590 	}
591 
592 	/* fill src/dst here */
593  	ip6 = mtod(mh, struct ip6_hdr *);
594  	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
595  	ip6->ip6_dst = dst ? *dst : in6m->in6m_addr;
596 	ia6_release(ia, &psref);
597 	curlwp_bindx(bound);
598 
599 	mldh->mld_addr = in6m->in6m_addr;
600 	in6_clearscope(&mldh->mld_addr); /* XXX */
601 	mldh->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, sizeof(struct ip6_hdr),
602 	    sizeof(struct mld_hdr));
603 
604 	/* construct multicast option */
605 	memset(&im6o, 0, sizeof(im6o));
606 	im6o.im6o_multicast_if_index = if_get_index(ifp);
607 	im6o.im6o_multicast_hlim = 1;
608 
609 	/*
610 	 * Request loopback of the report if we are acting as a multicast
611 	 * router, so that the process-level routing daemon can hear it.
612 	 */
613 	im6o.im6o_multicast_loop = (ip6_mrouter != NULL);
614 
615 	/* increment output statictics */
616 	ICMP6_STATINC(ICMP6_STAT_OUTHIST + type);
617 	icmp6_ifstat_inc(ifp, ifs6_out_msg);
618 	switch (type) {
619 	case MLD_LISTENER_QUERY:
620 		icmp6_ifstat_inc(ifp, ifs6_out_mldquery);
621 		break;
622 	case MLD_LISTENER_REPORT:
623 		icmp6_ifstat_inc(ifp, ifs6_out_mldreport);
624 		break;
625 	case MLD_LISTENER_DONE:
626 		icmp6_ifstat_inc(ifp, ifs6_out_mlddone);
627 		break;
628 	}
629 
630 	/* XXX we cannot call ip6_output with holding in6_multilock */
631 	rw_exit(&in6_multilock);
632 
633 	ip6_output(mh, &ip6_opts, NULL, ia ? 0 : IPV6_UNSPECSRC,
634 	    &im6o, NULL, NULL);
635 
636 	rw_enter(&in6_multilock, RW_WRITER);
637 }
638 
639 static struct mld_hdr *
640 mld_allocbuf(struct mbuf **mh, int len, struct in6_multi *in6m,
641     int type)
642 {
643 	struct mbuf *md;
644 	struct mld_hdr *mldh;
645 	struct ip6_hdr *ip6;
646 
647 	/*
648 	 * Allocate mbufs to store ip6 header and MLD header.
649 	 * We allocate 2 mbufs and make chain in advance because
650 	 * it is more convenient when inserting the hop-by-hop option later.
651 	 */
652 	MGETHDR(*mh, M_DONTWAIT, MT_HEADER);
653 	if (*mh == NULL)
654 		return NULL;
655 	MGET(md, M_DONTWAIT, MT_DATA);
656 	if (md == NULL) {
657 		m_free(*mh);
658 		*mh = NULL;
659 		return NULL;
660 	}
661 	(*mh)->m_next = md;
662 	md->m_next = NULL;
663 
664 	m_reset_rcvif((*mh));
665 	(*mh)->m_pkthdr.len = sizeof(struct ip6_hdr) + len;
666 	(*mh)->m_len = sizeof(struct ip6_hdr);
667 	MH_ALIGN(*mh, sizeof(struct ip6_hdr));
668 
669 	/* fill in the ip6 header */
670 	ip6 = mtod(*mh, struct ip6_hdr *);
671 	memset(ip6, 0, sizeof(*ip6));
672 	ip6->ip6_flow = 0;
673 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
674 	ip6->ip6_vfc |= IPV6_VERSION;
675 	/* ip6_plen will be set later */
676 	ip6->ip6_nxt = IPPROTO_ICMPV6;
677 	/* ip6_hlim will be set by im6o.im6o_multicast_hlim */
678 	/* ip6_src/dst will be set by mld_sendpkt() or mld_sendbuf() */
679 
680 	/* fill in the MLD header as much as possible */
681 	md->m_len = len;
682 	mldh = mtod(md, struct mld_hdr *);
683 	memset(mldh, 0, len);
684 	mldh->mld_type = type;
685 	return mldh;
686 }
687 
688 static void
689 in6m_ref(struct in6_multi *in6m)
690 {
691 
692 	KASSERT(rw_write_held(&in6_multilock));
693 	in6m->in6m_refcount++;
694 }
695 
696 static void
697 in6m_unref(struct in6_multi *in6m)
698 {
699 
700 	KASSERT(rw_write_held(&in6_multilock));
701 	if (--in6m->in6m_refcount == 0)
702 		in6m_destroy(in6m);
703 }
704 
705 /*
706  * Add an address to the list of IP6 multicast addresses for a given interface.
707  */
708 struct	in6_multi *
709 in6_addmulti(struct in6_addr *maddr6, struct ifnet *ifp,
710 	int *errorp, int timer)
711 {
712 	struct	sockaddr_in6 sin6;
713 	struct	in6_multi *in6m;
714 
715 	*errorp = 0;
716 
717 	rw_enter(&in6_multilock, RW_WRITER);
718 	/*
719 	 * See if address already in list.
720 	 */
721 	in6m = in6_lookup_multi(maddr6, ifp);
722 	if (in6m != NULL) {
723 		/*
724 		 * Found it; just increment the refrence count.
725 		 */
726 		in6m->in6m_refcount++;
727 	} else {
728 		/*
729 		 * New address; allocate a new multicast record
730 		 * and link it into the interface's multicast list.
731 		 */
732 		in6m = (struct in6_multi *)
733 			malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT|M_ZERO);
734 		if (in6m == NULL) {
735 			*errorp = ENOBUFS;
736 			goto out;
737 		}
738 
739 		in6m->in6m_addr = *maddr6;
740 		in6m->in6m_ifp = ifp;
741 		in6m->in6m_refcount = 1;
742 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
743 		callout_init(&in6m->in6m_timer_ch, CALLOUT_MPSAFE);
744 		callout_setfunc(&in6m->in6m_timer_ch, mld_timeo, in6m);
745 
746 		LIST_INSERT_HEAD(&ifp->if_multiaddrs, in6m, in6m_entry);
747 
748 		/*
749 		 * Ask the network driver to update its multicast reception
750 		 * filter appropriately for the new address.
751 		 */
752 		sockaddr_in6_init(&sin6, maddr6, 0, 0, 0);
753 		*errorp = if_mcast_op(ifp, SIOCADDMULTI, sin6tosa(&sin6));
754 		if (*errorp) {
755 			callout_destroy(&in6m->in6m_timer_ch);
756 			LIST_REMOVE(in6m, in6m_entry);
757 			free(in6m, M_IPMADDR);
758 			in6m = NULL;
759 			goto out;
760 		}
761 
762 		in6m->in6m_timer = timer;
763 		if (in6m->in6m_timer > 0) {
764 			in6m->in6m_state = MLD_REPORTPENDING;
765 			mld_starttimer(in6m);
766 			goto out;
767 		}
768 
769 		/*
770 		 * Let MLD6 know that we have joined a new IP6 multicast
771 		 * group.
772 		 */
773 		mld_start_listening(in6m);
774 	}
775 out:
776 	rw_exit(&in6_multilock);
777 	return in6m;
778 }
779 
780 static void
781 in6m_destroy(struct in6_multi *in6m)
782 {
783 	struct sockaddr_in6 sin6;
784 
785 	KASSERT(rw_write_held(&in6_multilock));
786 	KASSERT(in6m->in6m_refcount == 0);
787 
788 	/*
789 	 * No remaining claims to this record; let MLD6 know
790 	 * that we are leaving the multicast group.
791 	 */
792 	mld_stop_listening(in6m);
793 
794 	/*
795 	 * Unlink from list.
796 	 */
797 	LIST_REMOVE(in6m, in6m_entry);
798 
799 	/*
800 	 * Delete all references of this multicasting group from
801 	 * the membership arrays
802 	 */
803 	in6_purge_mcast_references(in6m);
804 
805 	/*
806 	 * Notify the network driver to update its multicast
807 	 * reception filter.
808 	 */
809 	sockaddr_in6_init(&sin6, &in6m->in6m_addr, 0, 0, 0);
810 	if_mcast_op(in6m->in6m_ifp, SIOCDELMULTI, sin6tosa(&sin6));
811 
812 	/* Tell mld_timeo we're halting the timer */
813 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
814 	if (mutex_owned(softnet_lock))
815 		callout_halt(&in6m->in6m_timer_ch, softnet_lock);
816 	else
817 		callout_halt(&in6m->in6m_timer_ch, NULL);
818 	callout_destroy(&in6m->in6m_timer_ch);
819 
820 	free(in6m, M_IPMADDR);
821 }
822 
823 /*
824  * Delete a multicast address record.
825  */
826 void
827 in6_delmulti(struct in6_multi *in6m)
828 {
829 
830 	KASSERT(in6m->in6m_refcount > 0);
831 
832 	rw_enter(&in6_multilock, RW_WRITER);
833 	/*
834 	 * The caller should have a reference to in6m. So we don't need to care
835 	 * of releasing the lock in mld_stoptimer.
836 	 */
837 	mld_stoptimer(in6m);
838 	if (--in6m->in6m_refcount == 0)
839 		in6m_destroy(in6m);
840 	rw_exit(&in6_multilock);
841 }
842 
843 /*
844  * Look up the in6_multi record for a given IP6 multicast address
845  * on a given interface. If no matching record is found, "in6m"
846  * returns NULL.
847  */
848 struct in6_multi *
849 in6_lookup_multi(const struct in6_addr *addr, const struct ifnet *ifp)
850 {
851 	struct in6_multi *in6m;
852 
853 	KASSERT(rw_lock_held(&in6_multilock));
854 
855 	LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
856 		if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, addr))
857 			break;
858 	}
859 	return in6m;
860 }
861 
862 bool
863 in6_multi_group(const struct in6_addr *addr, const struct ifnet *ifp)
864 {
865 	bool ingroup;
866 
867 	rw_enter(&in6_multilock, RW_READER);
868 	ingroup = in6_lookup_multi(addr, ifp) != NULL;
869 	rw_exit(&in6_multilock);
870 
871 	return ingroup;
872 }
873 
874 /*
875  * Purge in6_multi records associated to the interface.
876  */
877 void
878 in6_purge_multi(struct ifnet *ifp)
879 {
880 	struct in6_multi *in6m, *next;
881 
882 	rw_enter(&in6_multilock, RW_WRITER);
883 	LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
884 		/*
885 		 * Normally multicast addresses are already purged at this
886 		 * point. Remaining references aren't accessible via ifp,
887 		 * so what we can do here is to prevent ifp from being
888 		 * accessed via in6m by removing it from the list of ifp.
889 		 */
890 		mld_stoptimer(in6m);
891 		LIST_REMOVE(in6m, in6m_entry);
892 	}
893 	rw_exit(&in6_multilock);
894 }
895 
896 void
897 in6_multi_lock(int op)
898 {
899 
900 	rw_enter(&in6_multilock, op);
901 }
902 
903 void
904 in6_multi_unlock(void)
905 {
906 
907 	rw_exit(&in6_multilock);
908 }
909 
910 bool
911 in6_multi_locked(int op)
912 {
913 
914 	switch (op) {
915 	case RW_READER:
916 		return rw_read_held(&in6_multilock);
917 	case RW_WRITER:
918 		return rw_write_held(&in6_multilock);
919 	default:
920 		return rw_lock_held(&in6_multilock);
921 	}
922 }
923 
924 struct in6_multi_mship *
925 in6_joingroup(struct ifnet *ifp, struct in6_addr *addr,
926 	int *errorp, int timer)
927 {
928 	struct in6_multi_mship *imm;
929 
930 	imm = malloc(sizeof(*imm), M_IPMADDR, M_NOWAIT|M_ZERO);
931 	if (imm == NULL) {
932 		*errorp = ENOBUFS;
933 		return NULL;
934 	}
935 
936 	imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp, timer);
937 	if (!imm->i6mm_maddr) {
938 		/* *errorp is already set */
939 		free(imm, M_IPMADDR);
940 		return NULL;
941 	}
942 	return imm;
943 }
944 
945 int
946 in6_leavegroup(struct in6_multi_mship *imm)
947 {
948 	struct in6_multi *in6m;
949 
950 	rw_enter(&in6_multilock, RW_READER);
951 	in6m = imm->i6mm_maddr;
952 	rw_exit(&in6_multilock);
953 	if (in6m != NULL) {
954 		in6_delmulti(in6m);
955 	}
956 	free(imm, M_IPMADDR);
957 	return 0;
958 }
959 
960 /*
961  * DEPRECATED: keep it just to avoid breaking old sysctl users.
962  */
963 static int
964 in6_mkludge_sysctl(SYSCTLFN_ARGS)
965 {
966 
967 	if (namelen != 1)
968 		return EINVAL;
969 	*oldlenp = 0;
970 	return 0;
971 }
972 
973 static int
974 in6_multicast_sysctl(SYSCTLFN_ARGS)
975 {
976 	struct ifnet *ifp;
977 	struct ifaddr *ifa;
978 	struct in6_ifaddr *ia6;
979 	struct in6_multi *in6m;
980 	uint32_t tmp;
981 	int error;
982 	size_t written;
983 	struct psref psref, psref_ia;
984 	int bound, s;
985 
986 	if (namelen != 1)
987 		return EINVAL;
988 
989 	rw_enter(&in6_multilock, RW_READER);
990 
991 	bound = curlwp_bind();
992 	ifp = if_get_byindex(name[0], &psref);
993 	if (ifp == NULL) {
994 		curlwp_bindx(bound);
995 		rw_exit(&in6_multilock);
996 		return ENODEV;
997 	}
998 
999 	if (oldp == NULL) {
1000 		*oldlenp = 0;
1001 		s = pserialize_read_enter();
1002 		IFADDR_READER_FOREACH(ifa, ifp) {
1003 			LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
1004 				*oldlenp += 2 * sizeof(struct in6_addr) +
1005 				    sizeof(uint32_t);
1006 			}
1007 		}
1008 		pserialize_read_exit(s);
1009 		if_put(ifp, &psref);
1010 		curlwp_bindx(bound);
1011 		rw_exit(&in6_multilock);
1012 		return 0;
1013 	}
1014 
1015 	error = 0;
1016 	written = 0;
1017 	s = pserialize_read_enter();
1018 	IFADDR_READER_FOREACH(ifa, ifp) {
1019 		if (ifa->ifa_addr->sa_family != AF_INET6)
1020 			continue;
1021 
1022 		ifa_acquire(ifa, &psref_ia);
1023 		pserialize_read_exit(s);
1024 
1025 		ia6 = ifatoia6(ifa);
1026 		LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
1027 			if (written + 2 * sizeof(struct in6_addr) +
1028 			    sizeof(uint32_t) > *oldlenp)
1029 				goto done;
1030 			/*
1031 			 * XXX return the first IPv6 address to keep backward
1032 			 * compatibility, however now multicast addresses
1033 			 * don't belong to any IPv6 addresses so it should be
1034 			 * unnecessary.
1035 			 */
1036 			error = sysctl_copyout(l, &ia6->ia_addr.sin6_addr,
1037 			    oldp, sizeof(struct in6_addr));
1038 			if (error)
1039 				goto done;
1040 			oldp = (char *)oldp + sizeof(struct in6_addr);
1041 			written += sizeof(struct in6_addr);
1042 			error = sysctl_copyout(l, &in6m->in6m_addr,
1043 			    oldp, sizeof(struct in6_addr));
1044 			if (error)
1045 				goto done;
1046 			oldp = (char *)oldp + sizeof(struct in6_addr);
1047 			written += sizeof(struct in6_addr);
1048 			tmp = in6m->in6m_refcount;
1049 			error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
1050 			if (error)
1051 				goto done;
1052 			oldp = (char *)oldp + sizeof(tmp);
1053 			written += sizeof(tmp);
1054 		}
1055 
1056 		s = pserialize_read_enter();
1057 
1058 		break;
1059 	}
1060 	pserialize_read_exit(s);
1061 done:
1062 	ifa_release(ifa, &psref_ia);
1063 	if_put(ifp, &psref);
1064 	curlwp_bindx(bound);
1065 	rw_exit(&in6_multilock);
1066 	*oldlenp = written;
1067 	return error;
1068 }
1069 
1070 void
1071 in6_sysctl_multicast_setup(struct sysctllog **clog)
1072 {
1073 
1074 	sysctl_createv(clog, 0, NULL, NULL,
1075 		       CTLFLAG_PERMANENT,
1076 		       CTLTYPE_NODE, "inet6", NULL,
1077 		       NULL, 0, NULL, 0,
1078 		       CTL_NET, PF_INET6, CTL_EOL);
1079 
1080 	sysctl_createv(clog, 0, NULL, NULL,
1081 		       CTLFLAG_PERMANENT,
1082 		       CTLTYPE_NODE, "multicast",
1083 		       SYSCTL_DESCR("Multicast information"),
1084 		       in6_multicast_sysctl, 0, NULL, 0,
1085 		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1086 
1087 	sysctl_createv(clog, 0, NULL, NULL,
1088 		       CTLFLAG_PERMANENT,
1089 		       CTLTYPE_NODE, "multicast_kludge",
1090 		       SYSCTL_DESCR("multicast kludge information"),
1091 		       in6_mkludge_sysctl, 0, NULL, 0,
1092 		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1093 }
1094