xref: /netbsd-src/sys/netinet6/mld6.c (revision 760452d22f8440db3936c5c562afd9b32c35cc6d)
1 /*	$NetBSD: mld6.c,v 1.101 2019/09/25 09:53:38 ozaki-r Exp $	*/
2 /*	$KAME: mld6.c,v 1.25 2001/01/16 14:14:18 itojun Exp $	*/
3 
4 /*
5  * Copyright (C) 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1992, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * Stephen Deering of Stanford University.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
65  */
66 
67 /*
68  * Copyright (c) 1988 Stephen Deering.
69  *
70  * This code is derived from software contributed to Berkeley by
71  * Stephen Deering of Stanford University.
72  *
73  * Redistribution and use in source and binary forms, with or without
74  * modification, are permitted provided that the following conditions
75  * are met:
76  * 1. Redistributions of source code must retain the above copyright
77  *    notice, this list of conditions and the following disclaimer.
78  * 2. Redistributions in binary form must reproduce the above copyright
79  *    notice, this list of conditions and the following disclaimer in the
80  *    documentation and/or other materials provided with the distribution.
81  * 3. All advertising materials mentioning features or use of this software
82  *    must display the following acknowledgement:
83  *	This product includes software developed by the University of
84  *	California, Berkeley and its contributors.
85  * 4. Neither the name of the University nor the names of its contributors
86  *    may be used to endorse or promote products derived from this software
87  *    without specific prior written permission.
88  *
89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99  * SUCH DAMAGE.
100  *
101  *	@(#)igmp.c	8.1 (Berkeley) 7/19/93
102  */
103 
104 #include <sys/cdefs.h>
105 __KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.101 2019/09/25 09:53:38 ozaki-r Exp $");
106 
107 #ifdef _KERNEL_OPT
108 #include "opt_inet.h"
109 #include "opt_net_mpsafe.h"
110 #endif
111 
112 #include <sys/param.h>
113 #include <sys/systm.h>
114 #include <sys/mbuf.h>
115 #include <sys/socket.h>
116 #include <sys/socketvar.h>
117 #include <sys/syslog.h>
118 #include <sys/sysctl.h>
119 #include <sys/kernel.h>
120 #include <sys/callout.h>
121 #include <sys/cprng.h>
122 #include <sys/rwlock.h>
123 
124 #include <net/if.h>
125 
126 #include <netinet/in.h>
127 #include <netinet/in_var.h>
128 #include <netinet6/in6_var.h>
129 #include <netinet/ip6.h>
130 #include <netinet6/ip6_var.h>
131 #include <netinet6/scope6_var.h>
132 #include <netinet/icmp6.h>
133 #include <netinet6/icmp6_private.h>
134 #include <netinet6/mld6_var.h>
135 
136 static krwlock_t	in6_multilock __cacheline_aligned;
137 
138 /*
139  * Protocol constants
140  */
141 
142 /*
143  * time between repetitions of a node's initial report of interest in a
144  * multicast address(in seconds)
145  */
146 #define MLD_UNSOLICITED_REPORT_INTERVAL	10
147 
148 static struct ip6_pktopts ip6_opts;
149 
150 static void mld_start_listening(struct in6_multi *);
151 static void mld_stop_listening(struct in6_multi *);
152 
153 static struct mld_hdr *mld_allocbuf(struct mbuf **, struct in6_multi *, int);
154 static void mld_sendpkt(struct in6_multi *, int, const struct in6_addr *);
155 static void mld_starttimer(struct in6_multi *);
156 static void mld_stoptimer(struct in6_multi *);
157 static u_long mld_timerresid(struct in6_multi *);
158 
159 static void in6m_ref(struct in6_multi *);
160 static void in6m_unref(struct in6_multi *);
161 static void in6m_destroy(struct in6_multi *);
162 
163 void
mld_init(void)164 mld_init(void)
165 {
166 	static u_int8_t hbh_buf[8];
167 	struct ip6_hbh *hbh = (struct ip6_hbh *)hbh_buf;
168 	u_int16_t rtalert_code = htons((u_int16_t)IP6OPT_RTALERT_MLD);
169 
170 	/* ip6h_nxt will be fill in later */
171 	hbh->ip6h_len = 0;	/* (8 >> 3) - 1 */
172 
173 	/* XXX: grotty hard coding... */
174 	hbh_buf[2] = IP6OPT_PADN;	/* 2 byte padding */
175 	hbh_buf[3] = 0;
176 	hbh_buf[4] = IP6OPT_RTALERT;
177 	hbh_buf[5] = IP6OPT_RTALERT_LEN - 2;
178 	memcpy(&hbh_buf[6], (void *)&rtalert_code, sizeof(u_int16_t));
179 
180 	ip6_opts.ip6po_hbh = hbh;
181 	/* We will specify the hoplimit by a multicast option. */
182 	ip6_opts.ip6po_hlim = -1;
183 	ip6_opts.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
184 
185 	rw_init(&in6_multilock);
186 }
187 
188 static void
mld_starttimer(struct in6_multi * in6m)189 mld_starttimer(struct in6_multi *in6m)
190 {
191 	struct timeval now;
192 
193 	KASSERT(rw_write_held(&in6_multilock));
194 	KASSERTMSG(in6m->in6m_timer != IN6M_TIMER_UNDEF,
195 	    "in6m_timer=%d", in6m->in6m_timer);
196 
197 	microtime(&now);
198 	in6m->in6m_timer_expire.tv_sec = now.tv_sec + in6m->in6m_timer / hz;
199 	in6m->in6m_timer_expire.tv_usec = now.tv_usec +
200 	    (in6m->in6m_timer % hz) * (1000000 / hz);
201 	if (in6m->in6m_timer_expire.tv_usec > 1000000) {
202 		in6m->in6m_timer_expire.tv_sec++;
203 		in6m->in6m_timer_expire.tv_usec -= 1000000;
204 	}
205 
206 	/* start or restart the timer */
207 	callout_schedule(&in6m->in6m_timer_ch, in6m->in6m_timer);
208 }
209 
210 /*
211  * mld_stoptimer releases in6_multilock when calling callout_halt.
212  * The caller must ensure in6m won't be freed while releasing the lock.
213  */
214 static void
mld_stoptimer(struct in6_multi * in6m)215 mld_stoptimer(struct in6_multi *in6m)
216 {
217 
218 	KASSERT(rw_write_held(&in6_multilock));
219 
220 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
221 		return;
222 
223 	rw_exit(&in6_multilock);
224 
225 	callout_halt(&in6m->in6m_timer_ch, NULL);
226 
227 	rw_enter(&in6_multilock, RW_WRITER);
228 
229 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
230 }
231 
232 static void
mld_timeo(void * arg)233 mld_timeo(void *arg)
234 {
235 	struct in6_multi *in6m = arg;
236 
237 	KASSERTMSG(in6m->in6m_refcount > 0, "in6m_refcount=%d",
238 	    in6m->in6m_refcount);
239 
240 	KERNEL_LOCK_UNLESS_NET_MPSAFE();
241 	rw_enter(&in6_multilock, RW_WRITER);
242 	if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
243 		goto out;
244 
245 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
246 
247 	switch (in6m->in6m_state) {
248 	case MLD_REPORTPENDING:
249 		mld_start_listening(in6m);
250 		break;
251 	default:
252 		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
253 		break;
254 	}
255 
256 out:
257 	rw_exit(&in6_multilock);
258 	KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
259 }
260 
261 static u_long
mld_timerresid(struct in6_multi * in6m)262 mld_timerresid(struct in6_multi *in6m)
263 {
264 	struct timeval now, diff;
265 
266 	microtime(&now);
267 
268 	if (now.tv_sec > in6m->in6m_timer_expire.tv_sec ||
269 	    (now.tv_sec == in6m->in6m_timer_expire.tv_sec &&
270 	    now.tv_usec > in6m->in6m_timer_expire.tv_usec)) {
271 		return (0);
272 	}
273 	diff = in6m->in6m_timer_expire;
274 	diff.tv_sec -= now.tv_sec;
275 	diff.tv_usec -= now.tv_usec;
276 	if (diff.tv_usec < 0) {
277 		diff.tv_sec--;
278 		diff.tv_usec += 1000000;
279 	}
280 
281 	/* return the remaining time in milliseconds */
282 	return diff.tv_sec * 1000 + diff.tv_usec / 1000;
283 }
284 
285 static void
mld_start_listening(struct in6_multi * in6m)286 mld_start_listening(struct in6_multi *in6m)
287 {
288 	struct in6_addr all_in6;
289 
290 	KASSERT(rw_write_held(&in6_multilock));
291 
292 	/*
293 	 * RFC2710 page 10:
294 	 * The node never sends a Report or Done for the link-scope all-nodes
295 	 * address.
296 	 * MLD messages are never sent for multicast addresses whose scope is 0
297 	 * (reserved) or 1 (node-local).
298 	 */
299 	all_in6 = in6addr_linklocal_allnodes;
300 	if (in6_setscope(&all_in6, in6m->in6m_ifp, NULL)) {
301 		/* XXX: this should not happen! */
302 		in6m->in6m_timer = 0;
303 		in6m->in6m_state = MLD_OTHERLISTENER;
304 	}
305 	if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
306 	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) {
307 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
308 		in6m->in6m_state = MLD_OTHERLISTENER;
309 	} else {
310 		mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
311 		in6m->in6m_timer = cprng_fast32() %
312 		    (MLD_UNSOLICITED_REPORT_INTERVAL * hz);
313 		in6m->in6m_state = MLD_IREPORTEDLAST;
314 
315 		mld_starttimer(in6m);
316 	}
317 }
318 
319 static void
mld_stop_listening(struct in6_multi * in6m)320 mld_stop_listening(struct in6_multi *in6m)
321 {
322 	struct in6_addr allnode, allrouter;
323 
324 	KASSERT(rw_lock_held(&in6_multilock));
325 
326 	allnode = in6addr_linklocal_allnodes;
327 	if (in6_setscope(&allnode, in6m->in6m_ifp, NULL)) {
328 		/* XXX: this should not happen! */
329 		return;
330 	}
331 	allrouter = in6addr_linklocal_allrouters;
332 	if (in6_setscope(&allrouter, in6m->in6m_ifp, NULL)) {
333 		/* XXX impossible */
334 		return;
335 	}
336 
337 	if (in6m->in6m_state == MLD_IREPORTEDLAST &&
338 	    (!IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &allnode)) &&
339 	    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) >
340 	    IPV6_ADDR_SCOPE_INTFACELOCAL) {
341 		mld_sendpkt(in6m, MLD_LISTENER_DONE, &allrouter);
342 	}
343 }
344 
345 void
mld_input(struct mbuf * m,int off)346 mld_input(struct mbuf *m, int off)
347 {
348 	struct ip6_hdr *ip6;
349 	struct mld_hdr *mldh;
350 	struct ifnet *ifp;
351 	struct in6_multi *in6m = NULL;
352 	struct in6_addr mld_addr, all_in6;
353 	u_long timer = 0;	/* timer value in the MLD query header */
354 	struct psref psref;
355 
356 	ifp = m_get_rcvif_psref(m, &psref);
357 	if (__predict_false(ifp == NULL))
358 		goto out;
359 	IP6_EXTHDR_GET(mldh, struct mld_hdr *, m, off, sizeof(*mldh));
360 	if (mldh == NULL) {
361 		ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
362 		goto out_nodrop;
363 	}
364 
365 	ip6 = mtod(m, struct ip6_hdr *);
366 
367 	/* source address validation */
368 	if (!IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) {
369 		/*
370 		 * RFC3590 allows the IPv6 unspecified address as the source
371 		 * address of MLD report and done messages.  However, as this
372 		 * same document says, this special rule is for snooping
373 		 * switches and the RFC requires routers to discard MLD packets
374 		 * with the unspecified source address.  The RFC only talks
375 		 * about hosts receiving an MLD query or report in Security
376 		 * Considerations, but this is probably the correct intention.
377 		 * RFC3590 does not talk about other cases than link-local and
378 		 * the unspecified source addresses, but we believe the same
379 		 * rule should be applied.
380 		 * As a result, we only allow link-local addresses as the
381 		 * source address; otherwise, simply discard the packet.
382 		 */
383 #if 0
384 		/*
385 		 * XXX: do not log in an input path to avoid log flooding,
386 		 * though RFC3590 says "SHOULD log" if the source of a query
387 		 * is the unspecified address.
388 		 */
389 		char ip6bufs[INET6_ADDRSTRLEN];
390 		char ip6bufm[INET6_ADDRSTRLEN];
391 		log(LOG_INFO,
392 		    "mld_input: src %s is not link-local (grp=%s)\n",
393 		    IN6_PRINT(ip6bufs,&ip6->ip6_src),
394 		    IN6_PRINT(ip6bufm, &mldh->mld_addr));
395 #endif
396 		goto out;
397 	}
398 
399 	/*
400 	 * make a copy for local work (in6_setscope() may modify the 1st arg)
401 	 */
402 	mld_addr = mldh->mld_addr;
403 	if (in6_setscope(&mld_addr, ifp, NULL)) {
404 		/* XXX: this should not happen! */
405 		goto out;
406 	}
407 
408 	/*
409 	 * In the MLD specification, there are 3 states and a flag.
410 	 *
411 	 * In Non-Listener state, we simply don't have a membership record.
412 	 * In Delaying Listener state, our timer is running (in6m->in6m_timer)
413 	 * In Idle Listener state, our timer is not running
414 	 * (in6m->in6m_timer==IN6M_TIMER_UNDEF)
415 	 *
416 	 * The flag is in6m->in6m_state, it is set to MLD_OTHERLISTENER if
417 	 * we have heard a report from another member, or MLD_IREPORTEDLAST
418 	 * if we sent the last report.
419 	 */
420 	switch (mldh->mld_type) {
421 	case MLD_LISTENER_QUERY: {
422 		struct in6_multi *next;
423 
424 		if (ifp->if_flags & IFF_LOOPBACK)
425 			break;
426 
427 		if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
428 		    !IN6_IS_ADDR_MULTICAST(&mld_addr))
429 			break;	/* print error or log stat? */
430 
431 		all_in6 = in6addr_linklocal_allnodes;
432 		if (in6_setscope(&all_in6, ifp, NULL)) {
433 			/* XXX: this should not happen! */
434 			break;
435 		}
436 
437 		/*
438 		 * - Start the timers in all of our membership records
439 		 *   that the query applies to for the interface on
440 		 *   which the query arrived excl. those that belong
441 		 *   to the "all-nodes" group (ff02::1).
442 		 * - Restart any timer that is already running but has
443 		 *   a value longer than the requested timeout.
444 		 * - Use the value specified in the query message as
445 		 *   the maximum timeout.
446 		 */
447 		timer = ntohs(mldh->mld_maxdelay);
448 
449 		rw_enter(&in6_multilock, RW_WRITER);
450 		/*
451 		 * mld_stoptimer and mld_sendpkt release in6_multilock
452 		 * temporarily, so we have to prevent in6m from being freed
453 		 * while releasing the lock by having an extra reference to it.
454 		 *
455 		 * Also in6_purge_multi might remove items from the list of the
456 		 * ifp while releasing the lock. Fortunately in6_purge_multi is
457 		 * never executed as long as we have a psref of the ifp.
458 		 */
459 		LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
460 			if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
461 			    IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) <
462 			    IPV6_ADDR_SCOPE_LINKLOCAL)
463 				continue;
464 
465 			if (in6m->in6m_state == MLD_REPORTPENDING)
466 				continue; /* we are not yet ready */
467 
468 			if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
469 			    !IN6_ARE_ADDR_EQUAL(&mld_addr, &in6m->in6m_addr))
470 				continue;
471 
472 			if (timer == 0) {
473 				in6m_ref(in6m);
474 
475 				/* send a report immediately */
476 				mld_stoptimer(in6m);
477 				mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
478 				in6m->in6m_state = MLD_IREPORTEDLAST;
479 
480 				in6m_unref(in6m); /* May free in6m */
481 			} else if (in6m->in6m_timer == IN6M_TIMER_UNDEF ||
482 			    mld_timerresid(in6m) > timer) {
483 				in6m->in6m_timer =
484 				   1 + (cprng_fast32() % timer) * hz / 1000;
485 				mld_starttimer(in6m);
486 			}
487 		}
488 		rw_exit(&in6_multilock);
489 		break;
490 	    }
491 
492 	case MLD_LISTENER_REPORT:
493 		/*
494 		 * For fast leave to work, we have to know that we are the
495 		 * last person to send a report for this group.  Reports
496 		 * can potentially get looped back if we are a multicast
497 		 * router, so discard reports sourced by me.
498 		 * Note that it is impossible to check IFF_LOOPBACK flag of
499 		 * ifp for this purpose, since ip6_mloopback pass the physical
500 		 * interface to looutput.
501 		 */
502 		if (m->m_flags & M_LOOP) /* XXX: grotty flag, but efficient */
503 			break;
504 
505 		if (!IN6_IS_ADDR_MULTICAST(&mldh->mld_addr))
506 			break;
507 
508 		/*
509 		 * If we belong to the group being reported, stop
510 		 * our timer for that group.
511 		 */
512 		rw_enter(&in6_multilock, RW_WRITER);
513 		in6m = in6_lookup_multi(&mld_addr, ifp);
514 		if (in6m) {
515 			in6m_ref(in6m);
516 			mld_stoptimer(in6m); /* transit to idle state */
517 			in6m->in6m_state = MLD_OTHERLISTENER; /* clear flag */
518 			in6m_unref(in6m);
519 			in6m = NULL; /* in6m might be freed */
520 		}
521 		rw_exit(&in6_multilock);
522 		break;
523 	default:		/* this is impossible */
524 #if 0
525 		/*
526 		 * this case should be impossible because of filtering in
527 		 * icmp6_input().  But we explicitly disabled this part
528 		 * just in case.
529 		 */
530 		log(LOG_ERR, "mld_input: illegal type(%d)", mldh->mld_type);
531 #endif
532 		break;
533 	}
534 
535 out:
536 	m_freem(m);
537 out_nodrop:
538 	m_put_rcvif_psref(ifp, &psref);
539 }
540 
541 /*
542  * XXX mld_sendpkt must be called with in6_multilock held and
543  * will release in6_multilock before calling ip6_output and
544  * returning to avoid locking against myself in ip6_output.
545  */
546 static void
mld_sendpkt(struct in6_multi * in6m,int type,const struct in6_addr * dst)547 mld_sendpkt(struct in6_multi *in6m, int type, const struct in6_addr *dst)
548 {
549 	struct mbuf *mh;
550 	struct mld_hdr *mldh;
551 	struct ip6_hdr *ip6 = NULL;
552 	struct ip6_moptions im6o;
553 	struct in6_ifaddr *ia = NULL;
554 	struct ifnet *ifp = in6m->in6m_ifp;
555 	int ignflags;
556 	struct psref psref;
557 	int bound;
558 
559 	KASSERT(rw_write_held(&in6_multilock));
560 
561 	/*
562 	 * At first, find a link local address on the outgoing interface
563 	 * to use as the source address of the MLD packet.
564 	 * We do not reject tentative addresses for MLD report to deal with
565 	 * the case where we first join a link-local address.
566 	 */
567 	ignflags = (IN6_IFF_NOTREADY|IN6_IFF_ANYCAST) & ~IN6_IFF_TENTATIVE;
568 	bound = curlwp_bind();
569 	ia = in6ifa_ifpforlinklocal_psref(ifp, ignflags, &psref);
570 	if (ia == NULL) {
571 		curlwp_bindx(bound);
572 		return;
573 	}
574 	if ((ia->ia6_flags & IN6_IFF_TENTATIVE)) {
575 		ia6_release(ia, &psref);
576 		ia = NULL;
577 	}
578 
579 	/* Allocate two mbufs to store IPv6 header and MLD header */
580 	mldh = mld_allocbuf(&mh, in6m, type);
581 	if (mldh == NULL) {
582 		ia6_release(ia, &psref);
583 		curlwp_bindx(bound);
584 		return;
585 	}
586 
587 	/* fill src/dst here */
588 	ip6 = mtod(mh, struct ip6_hdr *);
589 	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
590 	ip6->ip6_dst = dst ? *dst : in6m->in6m_addr;
591 	ia6_release(ia, &psref);
592 	curlwp_bindx(bound);
593 
594 	mldh->mld_addr = in6m->in6m_addr;
595 	in6_clearscope(&mldh->mld_addr); /* XXX */
596 	mldh->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, sizeof(struct ip6_hdr),
597 	    sizeof(struct mld_hdr));
598 
599 	/* construct multicast option */
600 	memset(&im6o, 0, sizeof(im6o));
601 	im6o.im6o_multicast_if_index = if_get_index(ifp);
602 	im6o.im6o_multicast_hlim = 1;
603 
604 	/*
605 	 * Request loopback of the report if we are acting as a multicast
606 	 * router, so that the process-level routing daemon can hear it.
607 	 */
608 	im6o.im6o_multicast_loop = (ip6_mrouter != NULL);
609 
610 	/* increment output statistics */
611 	ICMP6_STATINC(ICMP6_STAT_OUTHIST + type);
612 	icmp6_ifstat_inc(ifp, ifs6_out_msg);
613 	switch (type) {
614 	case MLD_LISTENER_QUERY:
615 		icmp6_ifstat_inc(ifp, ifs6_out_mldquery);
616 		break;
617 	case MLD_LISTENER_REPORT:
618 		icmp6_ifstat_inc(ifp, ifs6_out_mldreport);
619 		break;
620 	case MLD_LISTENER_DONE:
621 		icmp6_ifstat_inc(ifp, ifs6_out_mlddone);
622 		break;
623 	}
624 
625 	/* XXX we cannot call ip6_output with holding in6_multilock */
626 	rw_exit(&in6_multilock);
627 
628 	ip6_output(mh, &ip6_opts, NULL, ia ? 0 : IPV6_UNSPECSRC,
629 	    &im6o, NULL, NULL);
630 
631 	rw_enter(&in6_multilock, RW_WRITER);
632 }
633 
634 static struct mld_hdr *
mld_allocbuf(struct mbuf ** mh,struct in6_multi * in6m,int type)635 mld_allocbuf(struct mbuf **mh, struct in6_multi *in6m, int type)
636 {
637 	struct mbuf *md;
638 	struct mld_hdr *mldh;
639 	struct ip6_hdr *ip6;
640 
641 	/*
642 	 * Allocate mbufs to store ip6 header and MLD header.
643 	 * We allocate 2 mbufs and make chain in advance because
644 	 * it is more convenient when inserting the hop-by-hop option later.
645 	 */
646 	MGETHDR(*mh, M_DONTWAIT, MT_HEADER);
647 	if (*mh == NULL)
648 		return NULL;
649 	MGET(md, M_DONTWAIT, MT_DATA);
650 	if (md == NULL) {
651 		m_free(*mh);
652 		*mh = NULL;
653 		return NULL;
654 	}
655 	(*mh)->m_next = md;
656 	md->m_next = NULL;
657 
658 	m_reset_rcvif((*mh));
659 	(*mh)->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
660 	(*mh)->m_len = sizeof(struct ip6_hdr);
661 	m_align(*mh, sizeof(struct ip6_hdr));
662 
663 	/* fill in the ip6 header */
664 	ip6 = mtod(*mh, struct ip6_hdr *);
665 	memset(ip6, 0, sizeof(*ip6));
666 	ip6->ip6_flow = 0;
667 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
668 	ip6->ip6_vfc |= IPV6_VERSION;
669 	/* ip6_plen will be set later */
670 	ip6->ip6_nxt = IPPROTO_ICMPV6;
671 	/* ip6_hlim will be set by im6o.im6o_multicast_hlim */
672 	/* ip6_src/dst will be set by mld_sendpkt() or mld_sendbuf() */
673 
674 	/* fill in the MLD header as much as possible */
675 	md->m_len = sizeof(struct mld_hdr);
676 	mldh = mtod(md, struct mld_hdr *);
677 	memset(mldh, 0, sizeof(struct mld_hdr));
678 	mldh->mld_type = type;
679 	return mldh;
680 }
681 
682 static void
in6m_ref(struct in6_multi * in6m)683 in6m_ref(struct in6_multi *in6m)
684 {
685 
686 	KASSERT(rw_write_held(&in6_multilock));
687 	in6m->in6m_refcount++;
688 }
689 
690 static void
in6m_unref(struct in6_multi * in6m)691 in6m_unref(struct in6_multi *in6m)
692 {
693 
694 	KASSERT(rw_write_held(&in6_multilock));
695 	if (--in6m->in6m_refcount == 0)
696 		in6m_destroy(in6m);
697 }
698 
699 /*
700  * Add an address to the list of IP6 multicast addresses for a given interface.
701  */
702 struct	in6_multi *
in6_addmulti(struct in6_addr * maddr6,struct ifnet * ifp,int * errorp,int timer)703 in6_addmulti(struct in6_addr *maddr6, struct ifnet *ifp, int *errorp,
704     int timer)
705 {
706 	struct	sockaddr_in6 sin6;
707 	struct	in6_multi *in6m;
708 
709 	*errorp = 0;
710 
711 	rw_enter(&in6_multilock, RW_WRITER);
712 	/*
713 	 * See if address already in list.
714 	 */
715 	in6m = in6_lookup_multi(maddr6, ifp);
716 	if (in6m != NULL) {
717 		/*
718 		 * Found it; just increment the reference count.
719 		 */
720 		in6m->in6m_refcount++;
721 	} else {
722 		/*
723 		 * New address; allocate a new multicast record
724 		 * and link it into the interface's multicast list.
725 		 */
726 		in6m = malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT|M_ZERO);
727 		if (in6m == NULL) {
728 			*errorp = ENOBUFS;
729 			goto out;
730 		}
731 
732 		in6m->in6m_addr = *maddr6;
733 		in6m->in6m_ifp = ifp;
734 		in6m->in6m_refcount = 1;
735 		in6m->in6m_timer = IN6M_TIMER_UNDEF;
736 		callout_init(&in6m->in6m_timer_ch, CALLOUT_MPSAFE);
737 		callout_setfunc(&in6m->in6m_timer_ch, mld_timeo, in6m);
738 
739 		LIST_INSERT_HEAD(&ifp->if_multiaddrs, in6m, in6m_entry);
740 
741 		/*
742 		 * Ask the network driver to update its multicast reception
743 		 * filter appropriately for the new address.
744 		 */
745 		sockaddr_in6_init(&sin6, maddr6, 0, 0, 0);
746 		*errorp = if_mcast_op(ifp, SIOCADDMULTI, sin6tosa(&sin6));
747 		if (*errorp) {
748 			callout_destroy(&in6m->in6m_timer_ch);
749 			LIST_REMOVE(in6m, in6m_entry);
750 			free(in6m, M_IPMADDR);
751 			in6m = NULL;
752 			goto out;
753 		}
754 
755 		in6m->in6m_timer = timer;
756 		if (in6m->in6m_timer > 0) {
757 			in6m->in6m_state = MLD_REPORTPENDING;
758 			mld_starttimer(in6m);
759 			goto out;
760 		}
761 
762 		/*
763 		 * Let MLD6 know that we have joined a new IP6 multicast
764 		 * group.
765 		 */
766 		mld_start_listening(in6m);
767 	}
768 out:
769 	rw_exit(&in6_multilock);
770 	return in6m;
771 }
772 
773 static void
in6m_destroy(struct in6_multi * in6m)774 in6m_destroy(struct in6_multi *in6m)
775 {
776 	struct sockaddr_in6 sin6;
777 
778 	KASSERT(rw_write_held(&in6_multilock));
779 	KASSERTMSG(in6m->in6m_refcount == 0, "in6m_refcount=%d",
780 	    in6m->in6m_refcount);
781 
782 	/*
783 	 * Unlink from list if it's listed.  This must be done before
784 	 * mld_stop_listening because it releases in6_multilock and that allows
785 	 * someone to look up the removing in6m from the list and add a
786 	 * reference to the entry unexpectedly.
787 	 */
788 	if (in6_lookup_multi(&in6m->in6m_addr, in6m->in6m_ifp) != NULL)
789 		LIST_REMOVE(in6m, in6m_entry);
790 
791 	/*
792 	 * No remaining claims to this record; let MLD6 know
793 	 * that we are leaving the multicast group.
794 	 */
795 	mld_stop_listening(in6m);
796 
797 	/*
798 	 * Delete all references of this multicasting group from
799 	 * the membership arrays
800 	 */
801 	in6_purge_mcast_references(in6m);
802 
803 	/*
804 	 * Notify the network driver to update its multicast
805 	 * reception filter.
806 	 */
807 	sockaddr_in6_init(&sin6, &in6m->in6m_addr, 0, 0, 0);
808 	if_mcast_op(in6m->in6m_ifp, SIOCDELMULTI, sin6tosa(&sin6));
809 
810 	/* Tell mld_timeo we're halting the timer */
811 	in6m->in6m_timer = IN6M_TIMER_UNDEF;
812 
813 	rw_exit(&in6_multilock);
814 	callout_halt(&in6m->in6m_timer_ch, NULL);
815 	callout_destroy(&in6m->in6m_timer_ch);
816 
817 	free(in6m, M_IPMADDR);
818 	rw_enter(&in6_multilock, RW_WRITER);
819 }
820 
821 /*
822  * Delete a multicast address record.
823  */
824 void
in6_delmulti_locked(struct in6_multi * in6m)825 in6_delmulti_locked(struct in6_multi *in6m)
826 {
827 
828 	KASSERT(rw_write_held(&in6_multilock));
829 	KASSERTMSG(in6m->in6m_refcount > 0, "in6m_refcount=%d",
830 	    in6m->in6m_refcount);
831 
832 	/*
833 	 * The caller should have a reference to in6m. So we don't need to care
834 	 * of releasing the lock in mld_stoptimer.
835 	 */
836 	mld_stoptimer(in6m);
837 	if (--in6m->in6m_refcount == 0)
838 		in6m_destroy(in6m);
839 }
840 
841 void
in6_delmulti(struct in6_multi * in6m)842 in6_delmulti(struct in6_multi *in6m)
843 {
844 
845 	rw_enter(&in6_multilock, RW_WRITER);
846 	in6_delmulti_locked(in6m);
847 	rw_exit(&in6_multilock);
848 }
849 
850 /*
851  * Look up the in6_multi record for a given IP6 multicast address
852  * on a given interface. If no matching record is found, "in6m"
853  * returns NULL.
854  */
855 struct in6_multi *
in6_lookup_multi(const struct in6_addr * addr,const struct ifnet * ifp)856 in6_lookup_multi(const struct in6_addr *addr, const struct ifnet *ifp)
857 {
858 	struct in6_multi *in6m;
859 
860 	KASSERT(rw_lock_held(&in6_multilock));
861 
862 	LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
863 		if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, addr))
864 			break;
865 	}
866 	return in6m;
867 }
868 
869 void
in6_lookup_and_delete_multi(const struct in6_addr * addr,const struct ifnet * ifp)870 in6_lookup_and_delete_multi(const struct in6_addr *addr,
871     const struct ifnet *ifp)
872 {
873 	struct in6_multi *in6m;
874 
875 	rw_enter(&in6_multilock, RW_WRITER);
876 	in6m = in6_lookup_multi(addr, ifp);
877 	if (in6m != NULL)
878 		in6_delmulti_locked(in6m);
879 	rw_exit(&in6_multilock);
880 }
881 
882 bool
in6_multi_group(const struct in6_addr * addr,const struct ifnet * ifp)883 in6_multi_group(const struct in6_addr *addr, const struct ifnet *ifp)
884 {
885 	bool ingroup;
886 
887 	rw_enter(&in6_multilock, RW_READER);
888 	ingroup = in6_lookup_multi(addr, ifp) != NULL;
889 	rw_exit(&in6_multilock);
890 
891 	return ingroup;
892 }
893 
894 /*
895  * Purge in6_multi records associated to the interface.
896  */
897 void
in6_purge_multi(struct ifnet * ifp)898 in6_purge_multi(struct ifnet *ifp)
899 {
900 	struct in6_multi *in6m, *next;
901 
902 	rw_enter(&in6_multilock, RW_WRITER);
903 	LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
904 		LIST_REMOVE(in6m, in6m_entry);
905 		/*
906 		 * Normally multicast addresses are already purged at this
907 		 * point. Remaining references aren't accessible via ifp,
908 		 * so what we can do here is to prevent ifp from being
909 		 * accessed via in6m by removing it from the list of ifp.
910 		 */
911 		mld_stoptimer(in6m);
912 	}
913 	rw_exit(&in6_multilock);
914 }
915 
916 void
in6_multi_lock(int op)917 in6_multi_lock(int op)
918 {
919 
920 	rw_enter(&in6_multilock, op);
921 }
922 
923 void
in6_multi_unlock(void)924 in6_multi_unlock(void)
925 {
926 
927 	rw_exit(&in6_multilock);
928 }
929 
930 bool
in6_multi_locked(int op)931 in6_multi_locked(int op)
932 {
933 
934 	switch (op) {
935 	case RW_READER:
936 		return rw_read_held(&in6_multilock);
937 	case RW_WRITER:
938 		return rw_write_held(&in6_multilock);
939 	default:
940 		return rw_lock_held(&in6_multilock);
941 	}
942 }
943 
944 struct in6_multi_mship *
in6_joingroup(struct ifnet * ifp,struct in6_addr * addr,int * errorp,int timer)945 in6_joingroup(struct ifnet *ifp, struct in6_addr *addr, int *errorp, int timer)
946 {
947 	struct in6_multi_mship *imm;
948 
949 	imm = malloc(sizeof(*imm), M_IPMADDR, M_NOWAIT|M_ZERO);
950 	if (imm == NULL) {
951 		*errorp = ENOBUFS;
952 		return NULL;
953 	}
954 
955 	imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp, timer);
956 	if (!imm->i6mm_maddr) {
957 		/* *errorp is already set */
958 		free(imm, M_IPMADDR);
959 		return NULL;
960 	}
961 	return imm;
962 }
963 
964 int
in6_leavegroup(struct in6_multi_mship * imm)965 in6_leavegroup(struct in6_multi_mship *imm)
966 {
967 	struct in6_multi *in6m;
968 
969 	rw_enter(&in6_multilock, RW_WRITER);
970 	in6m = imm->i6mm_maddr;
971 	imm->i6mm_maddr = NULL;
972 	if (in6m != NULL) {
973 		in6_delmulti_locked(in6m);
974 	}
975 	rw_exit(&in6_multilock);
976 	free(imm, M_IPMADDR);
977 	return 0;
978 }
979 
980 /*
981  * DEPRECATED: keep it just to avoid breaking old sysctl users.
982  */
983 static int
in6_mkludge_sysctl(SYSCTLFN_ARGS)984 in6_mkludge_sysctl(SYSCTLFN_ARGS)
985 {
986 
987 	if (namelen != 1)
988 		return EINVAL;
989 	*oldlenp = 0;
990 	return 0;
991 }
992 
993 static int
in6_multicast_sysctl(SYSCTLFN_ARGS)994 in6_multicast_sysctl(SYSCTLFN_ARGS)
995 {
996 	struct ifnet *ifp;
997 	struct ifaddr *ifa;
998 	struct in6_ifaddr *ia6;
999 	struct in6_multi *in6m;
1000 	uint32_t tmp;
1001 	int error;
1002 	size_t written;
1003 	struct psref psref, psref_ia;
1004 	int bound, s;
1005 
1006 	if (namelen != 1)
1007 		return EINVAL;
1008 
1009 	rw_enter(&in6_multilock, RW_READER);
1010 
1011 	bound = curlwp_bind();
1012 	ifp = if_get_byindex(name[0], &psref);
1013 	if (ifp == NULL) {
1014 		curlwp_bindx(bound);
1015 		rw_exit(&in6_multilock);
1016 		return ENODEV;
1017 	}
1018 
1019 	if (oldp == NULL) {
1020 		*oldlenp = 0;
1021 		s = pserialize_read_enter();
1022 		IFADDR_READER_FOREACH(ifa, ifp) {
1023 			LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
1024 				*oldlenp += 2 * sizeof(struct in6_addr) +
1025 				    sizeof(uint32_t);
1026 			}
1027 		}
1028 		pserialize_read_exit(s);
1029 		if_put(ifp, &psref);
1030 		curlwp_bindx(bound);
1031 		rw_exit(&in6_multilock);
1032 		return 0;
1033 	}
1034 
1035 	error = 0;
1036 	written = 0;
1037 	s = pserialize_read_enter();
1038 	IFADDR_READER_FOREACH(ifa, ifp) {
1039 		if (ifa->ifa_addr->sa_family != AF_INET6)
1040 			continue;
1041 
1042 		ifa_acquire(ifa, &psref_ia);
1043 		pserialize_read_exit(s);
1044 
1045 		ia6 = ifatoia6(ifa);
1046 		LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
1047 			if (written + 2 * sizeof(struct in6_addr) +
1048 			    sizeof(uint32_t) > *oldlenp)
1049 				goto done;
1050 			/*
1051 			 * XXX return the first IPv6 address to keep backward
1052 			 * compatibility, however now multicast addresses
1053 			 * don't belong to any IPv6 addresses so it should be
1054 			 * unnecessary.
1055 			 */
1056 			error = sysctl_copyout(l, &ia6->ia_addr.sin6_addr,
1057 			    oldp, sizeof(struct in6_addr));
1058 			if (error)
1059 				goto done;
1060 			oldp = (char *)oldp + sizeof(struct in6_addr);
1061 			written += sizeof(struct in6_addr);
1062 			error = sysctl_copyout(l, &in6m->in6m_addr,
1063 			    oldp, sizeof(struct in6_addr));
1064 			if (error)
1065 				goto done;
1066 			oldp = (char *)oldp + sizeof(struct in6_addr);
1067 			written += sizeof(struct in6_addr);
1068 			tmp = in6m->in6m_refcount;
1069 			error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
1070 			if (error)
1071 				goto done;
1072 			oldp = (char *)oldp + sizeof(tmp);
1073 			written += sizeof(tmp);
1074 		}
1075 
1076 		s = pserialize_read_enter();
1077 
1078 		break;
1079 	}
1080 	pserialize_read_exit(s);
1081 done:
1082 	ifa_release(ifa, &psref_ia);
1083 	if_put(ifp, &psref);
1084 	curlwp_bindx(bound);
1085 	rw_exit(&in6_multilock);
1086 	*oldlenp = written;
1087 	return error;
1088 }
1089 
1090 void
in6_sysctl_multicast_setup(struct sysctllog ** clog)1091 in6_sysctl_multicast_setup(struct sysctllog **clog)
1092 {
1093 
1094 	sysctl_createv(clog, 0, NULL, NULL,
1095 		       CTLFLAG_PERMANENT,
1096 		       CTLTYPE_NODE, "inet6", NULL,
1097 		       NULL, 0, NULL, 0,
1098 		       CTL_NET, PF_INET6, CTL_EOL);
1099 
1100 	sysctl_createv(clog, 0, NULL, NULL,
1101 		       CTLFLAG_PERMANENT,
1102 		       CTLTYPE_NODE, "multicast",
1103 		       SYSCTL_DESCR("Multicast information"),
1104 		       in6_multicast_sysctl, 0, NULL, 0,
1105 		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1106 
1107 	sysctl_createv(clog, 0, NULL, NULL,
1108 		       CTLFLAG_PERMANENT,
1109 		       CTLTYPE_NODE, "multicast_kludge",
1110 		       SYSCTL_DESCR("multicast kludge information"),
1111 		       in6_mkludge_sysctl, 0, NULL, 0,
1112 		       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1113 }
1114