xref: /netbsd-src/sys/net/if.c (revision 627f7eb200a4419d89b531d55fccd2ee3ffdcde0)
1 /*	$NetBSD: if.c,v 1.484 2020/10/15 10:20:44 roy Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999, 2000, 2001, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by William Studenmund and Jason R. Thorpe.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the project nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  */
60 
61 /*
62  * Copyright (c) 1980, 1986, 1993
63  *	The Regents of the University of California.  All rights reserved.
64  *
65  * Redistribution and use in source and binary forms, with or without
66  * modification, are permitted provided that the following conditions
67  * are met:
68  * 1. Redistributions of source code must retain the above copyright
69  *    notice, this list of conditions and the following disclaimer.
70  * 2. Redistributions in binary form must reproduce the above copyright
71  *    notice, this list of conditions and the following disclaimer in the
72  *    documentation and/or other materials provided with the distribution.
73  * 3. Neither the name of the University nor the names of its contributors
74  *    may be used to endorse or promote products derived from this software
75  *    without specific prior written permission.
76  *
77  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
78  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
79  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
80  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
81  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
82  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
83  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
84  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
85  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
86  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
87  * SUCH DAMAGE.
88  *
89  *	@(#)if.c	8.5 (Berkeley) 1/9/95
90  */
91 
92 #include <sys/cdefs.h>
93 __KERNEL_RCSID(0, "$NetBSD: if.c,v 1.484 2020/10/15 10:20:44 roy Exp $");
94 
95 #if defined(_KERNEL_OPT)
96 #include "opt_inet.h"
97 #include "opt_ipsec.h"
98 #include "opt_atalk.h"
99 #include "opt_wlan.h"
100 #include "opt_net_mpsafe.h"
101 #include "opt_mrouting.h"
102 #endif
103 
104 #include <sys/param.h>
105 #include <sys/mbuf.h>
106 #include <sys/systm.h>
107 #include <sys/callout.h>
108 #include <sys/proc.h>
109 #include <sys/socket.h>
110 #include <sys/socketvar.h>
111 #include <sys/domain.h>
112 #include <sys/protosw.h>
113 #include <sys/kernel.h>
114 #include <sys/ioctl.h>
115 #include <sys/sysctl.h>
116 #include <sys/syslog.h>
117 #include <sys/kauth.h>
118 #include <sys/kmem.h>
119 #include <sys/xcall.h>
120 #include <sys/cpu.h>
121 #include <sys/intr.h>
122 #include <sys/module_hook.h>
123 #include <sys/compat_stub.h>
124 #include <sys/msan.h>
125 
126 #include <net/if.h>
127 #include <net/if_dl.h>
128 #include <net/if_ether.h>
129 #include <net/if_media.h>
130 #include <net80211/ieee80211.h>
131 #include <net80211/ieee80211_ioctl.h>
132 #include <net/if_types.h>
133 #include <net/route.h>
134 #include <net/netisr.h>
135 #include <sys/module.h>
136 #ifdef NETATALK
137 #include <netatalk/at_extern.h>
138 #include <netatalk/at.h>
139 #endif
140 #include <net/pfil.h>
141 #include <netinet/in.h>
142 #include <netinet/in_var.h>
143 #include <netinet/ip_encap.h>
144 #include <net/bpf.h>
145 
146 #ifdef INET6
147 #include <netinet6/in6_var.h>
148 #include <netinet6/nd6.h>
149 #endif
150 
151 #include "ether.h"
152 
153 #include "bridge.h"
154 #if NBRIDGE > 0
155 #include <net/if_bridgevar.h>
156 #endif
157 
158 #include "carp.h"
159 #if NCARP > 0
160 #include <netinet/ip_carp.h>
161 #endif
162 
163 #include <compat/sys/sockio.h>
164 
165 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
166 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
167 
168 /*
169  * XXX reusing (ifp)->if_snd->ifq_lock rather than having another spin mutex
170  * for each ifnet.  It doesn't matter because:
171  * - if IFEF_MPSAFE is enabled, if_snd isn't used and lock contentions on
172  *   ifq_lock don't happen
173  * - if IFEF_MPSAFE is disabled, there is no lock contention on ifq_lock
174  *   because if_snd, if_link_state_change and if_link_state_change_process
175  *   are all called with KERNEL_LOCK
176  */
177 #define IF_LINK_STATE_CHANGE_LOCK(ifp)		\
178 	mutex_enter((ifp)->if_snd.ifq_lock)
179 #define IF_LINK_STATE_CHANGE_UNLOCK(ifp)	\
180 	mutex_exit((ifp)->if_snd.ifq_lock)
181 
182 /*
183  * Global list of interfaces.
184  */
185 /* DEPRECATED. Remove it once kvm(3) users disappeared */
186 struct ifnet_head		ifnet_list;
187 
188 struct pslist_head		ifnet_pslist;
189 static ifnet_t **		ifindex2ifnet = NULL;
190 static u_int			if_index = 1;
191 static size_t			if_indexlim = 0;
192 static uint64_t			index_gen;
193 /* Mutex to protect the above objects. */
194 kmutex_t			ifnet_mtx __cacheline_aligned;
195 static struct psref_class	*ifnet_psref_class __read_mostly;
196 static pserialize_t		ifnet_psz;
197 static struct workqueue		*ifnet_link_state_wq __read_mostly;
198 
199 static kmutex_t			if_clone_mtx;
200 
201 struct ifnet *lo0ifp;
202 int	ifqmaxlen = IFQ_MAXLEN;
203 
204 struct psref_class		*ifa_psref_class __read_mostly;
205 
206 static int	if_delroute_matcher(struct rtentry *, void *);
207 
208 static bool if_is_unit(const char *);
209 static struct if_clone *if_clone_lookup(const char *, int *);
210 
211 static LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
212 static int if_cloners_count;
213 
214 /* Packet filtering hook for interfaces. */
215 pfil_head_t *			if_pfil __read_mostly;
216 
217 static kauth_listener_t if_listener;
218 
219 static int doifioctl(struct socket *, u_long, void *, struct lwp *);
220 static void if_detach_queues(struct ifnet *, struct ifqueue *);
221 static void sysctl_sndq_setup(struct sysctllog **, const char *,
222     struct ifaltq *);
223 static void if_slowtimo(void *);
224 static void if_attachdomain1(struct ifnet *);
225 static int ifconf(u_long, void *);
226 static int if_transmit(struct ifnet *, struct mbuf *);
227 static int if_clone_create(const char *);
228 static int if_clone_destroy(const char *);
229 static void if_link_state_change_work(struct work *, void *);
230 static void if_up_locked(struct ifnet *);
231 static void _if_down(struct ifnet *);
232 static void if_down_deactivated(struct ifnet *);
233 
234 struct if_percpuq {
235 	struct ifnet	*ipq_ifp;
236 	void		*ipq_si;
237 	struct percpu	*ipq_ifqs;	/* struct ifqueue */
238 };
239 
240 static struct mbuf *if_percpuq_dequeue(struct if_percpuq *);
241 
242 static void if_percpuq_drops(void *, void *, struct cpu_info *);
243 static int sysctl_percpuq_drops_handler(SYSCTLFN_PROTO);
244 static void sysctl_percpuq_setup(struct sysctllog **, const char *,
245     struct if_percpuq *);
246 
247 struct if_deferred_start {
248 	struct ifnet	*ids_ifp;
249 	void		(*ids_if_start)(struct ifnet *);
250 	void		*ids_si;
251 };
252 
253 static void if_deferred_start_softint(void *);
254 static void if_deferred_start_common(struct ifnet *);
255 static void if_deferred_start_destroy(struct ifnet *);
256 
257 #if defined(INET) || defined(INET6)
258 static void sysctl_net_pktq_setup(struct sysctllog **, int);
259 #endif
260 
261 /*
262  * Hook for if_vlan - needed by if_agr
263  */
264 struct if_vlan_vlan_input_hook_t if_vlan_vlan_input_hook;
265 
266 static void if_sysctl_setup(struct sysctllog **);
267 
268 static int
269 if_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
270     void *arg0, void *arg1, void *arg2, void *arg3)
271 {
272 	int result;
273 	enum kauth_network_req req;
274 
275 	result = KAUTH_RESULT_DEFER;
276 	req = (enum kauth_network_req)(uintptr_t)arg1;
277 
278 	if (action != KAUTH_NETWORK_INTERFACE)
279 		return result;
280 
281 	if ((req == KAUTH_REQ_NETWORK_INTERFACE_GET) ||
282 	    (req == KAUTH_REQ_NETWORK_INTERFACE_SET))
283 		result = KAUTH_RESULT_ALLOW;
284 
285 	return result;
286 }
287 
288 /*
289  * Network interface utility routines.
290  *
291  * Routines with ifa_ifwith* names take sockaddr *'s as
292  * parameters.
293  */
294 void
295 ifinit(void)
296 {
297 
298 #if (defined(INET) || defined(INET6))
299 	encapinit();
300 #endif
301 
302 	if_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
303 	    if_listener_cb, NULL);
304 
305 	/* interfaces are available, inform socket code */
306 	ifioctl = doifioctl;
307 }
308 
309 /*
310  * XXX Initialization before configure().
311  * XXX hack to get pfil_add_hook working in autoconf.
312  */
313 void
314 ifinit1(void)
315 {
316 	int error __diagused;
317 
318 #ifdef NET_MPSAFE
319 	printf("NET_MPSAFE enabled\n");
320 #endif
321 
322 	mutex_init(&if_clone_mtx, MUTEX_DEFAULT, IPL_NONE);
323 
324 	TAILQ_INIT(&ifnet_list);
325 	mutex_init(&ifnet_mtx, MUTEX_DEFAULT, IPL_NONE);
326 	ifnet_psz = pserialize_create();
327 	ifnet_psref_class = psref_class_create("ifnet", IPL_SOFTNET);
328 	ifa_psref_class = psref_class_create("ifa", IPL_SOFTNET);
329 	error = workqueue_create(&ifnet_link_state_wq, "iflnkst",
330 	    if_link_state_change_work, NULL, PRI_SOFTNET, IPL_NET,
331 	    WQ_MPSAFE);
332 	KASSERT(error == 0);
333 	PSLIST_INIT(&ifnet_pslist);
334 
335 	if_indexlim = 8;
336 
337 	if_pfil = pfil_head_create(PFIL_TYPE_IFNET, NULL);
338 	KASSERT(if_pfil != NULL);
339 
340 #if NETHER > 0 || defined(NETATALK) || defined(WLAN)
341 	etherinit();
342 #endif
343 }
344 
345 /* XXX must be after domaininit() */
346 void
347 ifinit_post(void)
348 {
349 
350 	if_sysctl_setup(NULL);
351 }
352 
353 ifnet_t *
354 if_alloc(u_char type)
355 {
356 	return kmem_zalloc(sizeof(ifnet_t), KM_SLEEP);
357 }
358 
359 void
360 if_free(ifnet_t *ifp)
361 {
362 	kmem_free(ifp, sizeof(ifnet_t));
363 }
364 
365 void
366 if_initname(struct ifnet *ifp, const char *name, int unit)
367 {
368 	(void)snprintf(ifp->if_xname, sizeof(ifp->if_xname),
369 	    "%s%d", name, unit);
370 }
371 
372 /*
373  * Null routines used while an interface is going away.  These routines
374  * just return an error.
375  */
376 
377 int
378 if_nulloutput(struct ifnet *ifp, struct mbuf *m,
379     const struct sockaddr *so, const struct rtentry *rt)
380 {
381 
382 	return ENXIO;
383 }
384 
385 void
386 if_nullinput(struct ifnet *ifp, struct mbuf *m)
387 {
388 
389 	/* Nothing. */
390 }
391 
392 void
393 if_nullstart(struct ifnet *ifp)
394 {
395 
396 	/* Nothing. */
397 }
398 
399 int
400 if_nulltransmit(struct ifnet *ifp, struct mbuf *m)
401 {
402 
403 	m_freem(m);
404 	return ENXIO;
405 }
406 
407 int
408 if_nullioctl(struct ifnet *ifp, u_long cmd, void *data)
409 {
410 
411 	return ENXIO;
412 }
413 
414 int
415 if_nullinit(struct ifnet *ifp)
416 {
417 
418 	return ENXIO;
419 }
420 
421 void
422 if_nullstop(struct ifnet *ifp, int disable)
423 {
424 
425 	/* Nothing. */
426 }
427 
428 void
429 if_nullslowtimo(struct ifnet *ifp)
430 {
431 
432 	/* Nothing. */
433 }
434 
435 void
436 if_nulldrain(struct ifnet *ifp)
437 {
438 
439 	/* Nothing. */
440 }
441 
442 void
443 if_set_sadl(struct ifnet *ifp, const void *lla, u_char addrlen, bool factory)
444 {
445 	struct ifaddr *ifa;
446 	struct sockaddr_dl *sdl;
447 
448 	ifp->if_addrlen = addrlen;
449 	if_alloc_sadl(ifp);
450 	ifa = ifp->if_dl;
451 	sdl = satosdl(ifa->ifa_addr);
452 
453 	(void)sockaddr_dl_setaddr(sdl, sdl->sdl_len, lla, ifp->if_addrlen);
454 	if (factory) {
455 		KASSERT(ifp->if_hwdl == NULL);
456 		ifp->if_hwdl = ifp->if_dl;
457 		ifaref(ifp->if_hwdl);
458 	}
459 	/* TBD routing socket */
460 }
461 
462 struct ifaddr *
463 if_dl_create(const struct ifnet *ifp, const struct sockaddr_dl **sdlp)
464 {
465 	unsigned socksize, ifasize;
466 	int addrlen, namelen;
467 	struct sockaddr_dl *mask, *sdl;
468 	struct ifaddr *ifa;
469 
470 	namelen = strlen(ifp->if_xname);
471 	addrlen = ifp->if_addrlen;
472 	socksize = roundup(sockaddr_dl_measure(namelen, addrlen), sizeof(long));
473 	ifasize = sizeof(*ifa) + 2 * socksize;
474 	ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
475 
476 	sdl = (struct sockaddr_dl *)(ifa + 1);
477 	mask = (struct sockaddr_dl *)(socksize + (char *)sdl);
478 
479 	sockaddr_dl_init(sdl, socksize, ifp->if_index, ifp->if_type,
480 	    ifp->if_xname, namelen, NULL, addrlen);
481 	mask->sdl_family = AF_LINK;
482 	mask->sdl_len = sockaddr_dl_measure(namelen, 0);
483 	memset(&mask->sdl_data[0], 0xff, namelen);
484 	ifa->ifa_rtrequest = link_rtrequest;
485 	ifa->ifa_addr = (struct sockaddr *)sdl;
486 	ifa->ifa_netmask = (struct sockaddr *)mask;
487 	ifa_psref_init(ifa);
488 
489 	*sdlp = sdl;
490 
491 	return ifa;
492 }
493 
494 static void
495 if_sadl_setrefs(struct ifnet *ifp, struct ifaddr *ifa)
496 {
497 	const struct sockaddr_dl *sdl;
498 
499 	ifp->if_dl = ifa;
500 	ifaref(ifa);
501 	sdl = satosdl(ifa->ifa_addr);
502 	ifp->if_sadl = sdl;
503 }
504 
505 /*
506  * Allocate the link level name for the specified interface.  This
507  * is an attachment helper.  It must be called after ifp->if_addrlen
508  * is initialized, which may not be the case when if_attach() is
509  * called.
510  */
511 void
512 if_alloc_sadl(struct ifnet *ifp)
513 {
514 	struct ifaddr *ifa;
515 	const struct sockaddr_dl *sdl;
516 
517 	/*
518 	 * If the interface already has a link name, release it
519 	 * now.  This is useful for interfaces that can change
520 	 * link types, and thus switch link names often.
521 	 */
522 	if (ifp->if_sadl != NULL)
523 		if_free_sadl(ifp, 0);
524 
525 	ifa = if_dl_create(ifp, &sdl);
526 
527 	ifa_insert(ifp, ifa);
528 	if_sadl_setrefs(ifp, ifa);
529 }
530 
531 static void
532 if_deactivate_sadl(struct ifnet *ifp)
533 {
534 	struct ifaddr *ifa;
535 
536 	KASSERT(ifp->if_dl != NULL);
537 
538 	ifa = ifp->if_dl;
539 
540 	ifp->if_sadl = NULL;
541 
542 	ifp->if_dl = NULL;
543 	ifafree(ifa);
544 }
545 
546 static void
547 if_replace_sadl(struct ifnet *ifp, struct ifaddr *ifa)
548 {
549 	struct ifaddr *old;
550 
551 	KASSERT(ifp->if_dl != NULL);
552 
553 	old = ifp->if_dl;
554 
555 	ifaref(ifa);
556 	/* XXX Update if_dl and if_sadl atomically */
557 	ifp->if_dl = ifa;
558 	ifp->if_sadl = satosdl(ifa->ifa_addr);
559 
560 	ifafree(old);
561 }
562 
563 void
564 if_activate_sadl(struct ifnet *ifp, struct ifaddr *ifa0,
565     const struct sockaddr_dl *sdl)
566 {
567 	int s, ss;
568 	struct ifaddr *ifa;
569 	int bound = curlwp_bind();
570 
571 	KASSERT(ifa_held(ifa0));
572 
573 	s = splsoftnet();
574 
575 	if_replace_sadl(ifp, ifa0);
576 
577 	ss = pserialize_read_enter();
578 	IFADDR_READER_FOREACH(ifa, ifp) {
579 		struct psref psref;
580 		ifa_acquire(ifa, &psref);
581 		pserialize_read_exit(ss);
582 
583 		rtinit(ifa, RTM_LLINFO_UPD, 0);
584 
585 		ss = pserialize_read_enter();
586 		ifa_release(ifa, &psref);
587 	}
588 	pserialize_read_exit(ss);
589 
590 	splx(s);
591 	curlwp_bindx(bound);
592 }
593 
594 /*
595  * Free the link level name for the specified interface.  This is
596  * a detach helper.  This is called from if_detach().
597  */
598 void
599 if_free_sadl(struct ifnet *ifp, int factory)
600 {
601 	struct ifaddr *ifa;
602 	int s;
603 
604 	if (factory && ifp->if_hwdl != NULL) {
605 		ifa = ifp->if_hwdl;
606 		ifp->if_hwdl = NULL;
607 		ifafree(ifa);
608 	}
609 
610 	ifa = ifp->if_dl;
611 	if (ifa == NULL) {
612 		KASSERT(ifp->if_sadl == NULL);
613 		return;
614 	}
615 
616 	KASSERT(ifp->if_sadl != NULL);
617 
618 	s = splsoftnet();
619 	KASSERT(ifa->ifa_addr->sa_family == AF_LINK);
620 	ifa_remove(ifp, ifa);
621 	if_deactivate_sadl(ifp);
622 	splx(s);
623 }
624 
625 static void
626 if_getindex(ifnet_t *ifp)
627 {
628 	bool hitlimit = false;
629 
630 	ifp->if_index_gen = index_gen++;
631 
632 	ifp->if_index = if_index;
633 	if (ifindex2ifnet == NULL) {
634 		if_index++;
635 		goto skip;
636 	}
637 	while (if_byindex(ifp->if_index)) {
638 		/*
639 		 * If we hit USHRT_MAX, we skip back to 0 since
640 		 * there are a number of places where the value
641 		 * of if_index or if_index itself is compared
642 		 * to or stored in an unsigned short.  By
643 		 * jumping back, we won't botch those assignments
644 		 * or comparisons.
645 		 */
646 		if (++if_index == 0) {
647 			if_index = 1;
648 		} else if (if_index == USHRT_MAX) {
649 			/*
650 			 * However, if we have to jump back to
651 			 * zero *twice* without finding an empty
652 			 * slot in ifindex2ifnet[], then there
653 			 * there are too many (>65535) interfaces.
654 			 */
655 			if (hitlimit) {
656 				panic("too many interfaces");
657 			}
658 			hitlimit = true;
659 			if_index = 1;
660 		}
661 		ifp->if_index = if_index;
662 	}
663 skip:
664 	/*
665 	 * ifindex2ifnet is indexed by if_index. Since if_index will
666 	 * grow dynamically, it should grow too.
667 	 */
668 	if (ifindex2ifnet == NULL || ifp->if_index >= if_indexlim) {
669 		size_t m, n, oldlim;
670 		void *q;
671 
672 		oldlim = if_indexlim;
673 		while (ifp->if_index >= if_indexlim)
674 			if_indexlim <<= 1;
675 
676 		/* grow ifindex2ifnet */
677 		m = oldlim * sizeof(struct ifnet *);
678 		n = if_indexlim * sizeof(struct ifnet *);
679 		q = malloc(n, M_IFADDR, M_WAITOK | M_ZERO);
680 		if (ifindex2ifnet != NULL) {
681 			memcpy(q, ifindex2ifnet, m);
682 			free(ifindex2ifnet, M_IFADDR);
683 		}
684 		ifindex2ifnet = (struct ifnet **)q;
685 	}
686 	ifindex2ifnet[ifp->if_index] = ifp;
687 }
688 
689 /*
690  * Initialize an interface and assign an index for it.
691  *
692  * It must be called prior to a device specific attach routine
693  * (e.g., ether_ifattach and ieee80211_ifattach) or if_alloc_sadl,
694  * and be followed by if_register:
695  *
696  *     if_initialize(ifp);
697  *     ether_ifattach(ifp, enaddr);
698  *     if_register(ifp);
699  */
700 int
701 if_initialize(ifnet_t *ifp)
702 {
703 	int rv = 0;
704 
705 	KASSERT(if_indexlim > 0);
706 	TAILQ_INIT(&ifp->if_addrlist);
707 
708 	/*
709 	 * Link level name is allocated later by a separate call to
710 	 * if_alloc_sadl().
711 	 */
712 
713 	if (ifp->if_snd.ifq_maxlen == 0)
714 		ifp->if_snd.ifq_maxlen = ifqmaxlen;
715 
716 	ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */
717 
718 	ifp->if_link_state = LINK_STATE_UNKNOWN;
719 	ifp->if_link_queue = -1; /* all bits set, see link_state_change() */
720 	ifp->if_link_scheduled = false;
721 
722 	ifp->if_capenable = 0;
723 	ifp->if_csum_flags_tx = 0;
724 	ifp->if_csum_flags_rx = 0;
725 
726 #ifdef ALTQ
727 	ifp->if_snd.altq_type = 0;
728 	ifp->if_snd.altq_disc = NULL;
729 	ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE;
730 	ifp->if_snd.altq_tbr  = NULL;
731 	ifp->if_snd.altq_ifp  = ifp;
732 #endif
733 
734 	IFQ_LOCK_INIT(&ifp->if_snd);
735 
736 	ifp->if_pfil = pfil_head_create(PFIL_TYPE_IFNET, ifp);
737 	pfil_run_ifhooks(if_pfil, PFIL_IFNET_ATTACH, ifp);
738 
739 	IF_AFDATA_LOCK_INIT(ifp);
740 
741 	PSLIST_ENTRY_INIT(ifp, if_pslist_entry);
742 	PSLIST_INIT(&ifp->if_addr_pslist);
743 	psref_target_init(&ifp->if_psref, ifnet_psref_class);
744 	ifp->if_ioctl_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
745 	LIST_INIT(&ifp->if_multiaddrs);
746 	if ((rv = if_stats_init(ifp)) != 0) {
747 		goto fail;
748 	}
749 
750 	IFNET_GLOBAL_LOCK();
751 	if_getindex(ifp);
752 	IFNET_GLOBAL_UNLOCK();
753 
754 	return 0;
755 
756 fail:
757 	IF_AFDATA_LOCK_DESTROY(ifp);
758 
759 	pfil_run_ifhooks(if_pfil, PFIL_IFNET_DETACH, ifp);
760 	(void)pfil_head_destroy(ifp->if_pfil);
761 
762 	IFQ_LOCK_DESTROY(&ifp->if_snd);
763 
764 	return rv;
765 }
766 
767 /*
768  * Register an interface to the list of "active" interfaces.
769  */
770 void
771 if_register(ifnet_t *ifp)
772 {
773 	/*
774 	 * If the driver has not supplied its own if_ioctl, then
775 	 * supply the default.
776 	 */
777 	if (ifp->if_ioctl == NULL)
778 		ifp->if_ioctl = ifioctl_common;
779 
780 	sysctl_sndq_setup(&ifp->if_sysctl_log, ifp->if_xname, &ifp->if_snd);
781 
782 	if (!STAILQ_EMPTY(&domains))
783 		if_attachdomain1(ifp);
784 
785 	/* Announce the interface. */
786 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
787 
788 	if (ifp->if_slowtimo != NULL) {
789 		ifp->if_slowtimo_ch =
790 		    kmem_zalloc(sizeof(*ifp->if_slowtimo_ch), KM_SLEEP);
791 		callout_init(ifp->if_slowtimo_ch, 0);
792 		callout_setfunc(ifp->if_slowtimo_ch, if_slowtimo, ifp);
793 		if_slowtimo(ifp);
794 	}
795 
796 	if (ifp->if_transmit == NULL || ifp->if_transmit == if_nulltransmit)
797 		ifp->if_transmit = if_transmit;
798 
799 	IFNET_GLOBAL_LOCK();
800 	TAILQ_INSERT_TAIL(&ifnet_list, ifp, if_list);
801 	IFNET_WRITER_INSERT_TAIL(ifp);
802 	IFNET_GLOBAL_UNLOCK();
803 }
804 
805 /*
806  * The if_percpuq framework
807  *
808  * It allows network device drivers to execute the network stack
809  * in softint (so called softint-based if_input). It utilizes
810  * softint and percpu ifqueue. It doesn't distribute any packets
811  * between CPUs, unlike pktqueue(9).
812  *
813  * Currently we support two options for device drivers to apply the framework:
814  * - Use it implicitly with less changes
815  *   - If you use if_attach in driver's _attach function and if_input in
816  *     driver's Rx interrupt handler, a packet is queued and a softint handles
817  *     the packet implicitly
818  * - Use it explicitly in each driver (recommended)
819  *   - You can use if_percpuq_* directly in your driver
820  *   - In this case, you need to allocate struct if_percpuq in driver's softc
821  *   - See wm(4) as a reference implementation
822  */
823 
824 static void
825 if_percpuq_softint(void *arg)
826 {
827 	struct if_percpuq *ipq = arg;
828 	struct ifnet *ifp = ipq->ipq_ifp;
829 	struct mbuf *m;
830 
831 	while ((m = if_percpuq_dequeue(ipq)) != NULL) {
832 		if_statinc(ifp, if_ipackets);
833 		bpf_mtap(ifp, m, BPF_D_IN);
834 
835 		ifp->_if_input(ifp, m);
836 	}
837 }
838 
839 static void
840 if_percpuq_init_ifq(void *p, void *arg __unused, struct cpu_info *ci __unused)
841 {
842 	struct ifqueue *const ifq = p;
843 
844 	memset(ifq, 0, sizeof(*ifq));
845 	ifq->ifq_maxlen = IFQ_MAXLEN;
846 }
847 
848 struct if_percpuq *
849 if_percpuq_create(struct ifnet *ifp)
850 {
851 	struct if_percpuq *ipq;
852 	u_int flags = SOFTINT_NET;
853 
854 	flags |= if_is_mpsafe(ifp) ? SOFTINT_MPSAFE : 0;
855 
856 	ipq = kmem_zalloc(sizeof(*ipq), KM_SLEEP);
857 	ipq->ipq_ifp = ifp;
858 	ipq->ipq_si = softint_establish(flags, if_percpuq_softint, ipq);
859 	ipq->ipq_ifqs = percpu_alloc(sizeof(struct ifqueue));
860 	percpu_foreach(ipq->ipq_ifqs, &if_percpuq_init_ifq, NULL);
861 
862 	sysctl_percpuq_setup(&ifp->if_sysctl_log, ifp->if_xname, ipq);
863 
864 	return ipq;
865 }
866 
867 static struct mbuf *
868 if_percpuq_dequeue(struct if_percpuq *ipq)
869 {
870 	struct mbuf *m;
871 	struct ifqueue *ifq;
872 	int s;
873 
874 	s = splnet();
875 	ifq = percpu_getref(ipq->ipq_ifqs);
876 	IF_DEQUEUE(ifq, m);
877 	percpu_putref(ipq->ipq_ifqs);
878 	splx(s);
879 
880 	return m;
881 }
882 
883 static void
884 if_percpuq_purge_ifq(void *p, void *arg __unused, struct cpu_info *ci __unused)
885 {
886 	struct ifqueue *const ifq = p;
887 
888 	IF_PURGE(ifq);
889 }
890 
891 void
892 if_percpuq_destroy(struct if_percpuq *ipq)
893 {
894 
895 	/* if_detach may already destroy it */
896 	if (ipq == NULL)
897 		return;
898 
899 	softint_disestablish(ipq->ipq_si);
900 	percpu_foreach(ipq->ipq_ifqs, &if_percpuq_purge_ifq, NULL);
901 	percpu_free(ipq->ipq_ifqs, sizeof(struct ifqueue));
902 	kmem_free(ipq, sizeof(*ipq));
903 }
904 
905 void
906 if_percpuq_enqueue(struct if_percpuq *ipq, struct mbuf *m)
907 {
908 	struct ifqueue *ifq;
909 	int s;
910 
911 	KASSERT(ipq != NULL);
912 
913 	s = splnet();
914 	ifq = percpu_getref(ipq->ipq_ifqs);
915 	if (IF_QFULL(ifq)) {
916 		IF_DROP(ifq);
917 		percpu_putref(ipq->ipq_ifqs);
918 		m_freem(m);
919 		goto out;
920 	}
921 	IF_ENQUEUE(ifq, m);
922 	percpu_putref(ipq->ipq_ifqs);
923 
924 	softint_schedule(ipq->ipq_si);
925 out:
926 	splx(s);
927 }
928 
929 static void
930 if_percpuq_drops(void *p, void *arg, struct cpu_info *ci __unused)
931 {
932 	struct ifqueue *const ifq = p;
933 	int *sum = arg;
934 
935 	*sum += ifq->ifq_drops;
936 }
937 
938 static int
939 sysctl_percpuq_drops_handler(SYSCTLFN_ARGS)
940 {
941 	struct sysctlnode node;
942 	struct if_percpuq *ipq;
943 	int sum = 0;
944 	int error;
945 
946 	node = *rnode;
947 	ipq = node.sysctl_data;
948 
949 	percpu_foreach(ipq->ipq_ifqs, if_percpuq_drops, &sum);
950 
951 	node.sysctl_data = &sum;
952 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
953 	if (error != 0 || newp == NULL)
954 		return error;
955 
956 	return 0;
957 }
958 
959 static void
960 sysctl_percpuq_setup(struct sysctllog **clog, const char* ifname,
961     struct if_percpuq *ipq)
962 {
963 	const struct sysctlnode *cnode, *rnode;
964 
965 	if (sysctl_createv(clog, 0, NULL, &rnode,
966 		       CTLFLAG_PERMANENT,
967 		       CTLTYPE_NODE, "interfaces",
968 		       SYSCTL_DESCR("Per-interface controls"),
969 		       NULL, 0, NULL, 0,
970 		       CTL_NET, CTL_CREATE, CTL_EOL) != 0)
971 		goto bad;
972 
973 	if (sysctl_createv(clog, 0, &rnode, &rnode,
974 		       CTLFLAG_PERMANENT,
975 		       CTLTYPE_NODE, ifname,
976 		       SYSCTL_DESCR("Interface controls"),
977 		       NULL, 0, NULL, 0,
978 		       CTL_CREATE, CTL_EOL) != 0)
979 		goto bad;
980 
981 	if (sysctl_createv(clog, 0, &rnode, &rnode,
982 		       CTLFLAG_PERMANENT,
983 		       CTLTYPE_NODE, "rcvq",
984 		       SYSCTL_DESCR("Interface input queue controls"),
985 		       NULL, 0, NULL, 0,
986 		       CTL_CREATE, CTL_EOL) != 0)
987 		goto bad;
988 
989 #ifdef NOTYET
990 	/* XXX Should show each per-CPU queue length? */
991 	if (sysctl_createv(clog, 0, &rnode, &rnode,
992 		       CTLFLAG_PERMANENT,
993 		       CTLTYPE_INT, "len",
994 		       SYSCTL_DESCR("Current input queue length"),
995 		       sysctl_percpuq_len, 0, NULL, 0,
996 		       CTL_CREATE, CTL_EOL) != 0)
997 		goto bad;
998 
999 	if (sysctl_createv(clog, 0, &rnode, &cnode,
1000 		       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1001 		       CTLTYPE_INT, "maxlen",
1002 		       SYSCTL_DESCR("Maximum allowed input queue length"),
1003 		       sysctl_percpuq_maxlen_handler, 0, (void *)ipq, 0,
1004 		       CTL_CREATE, CTL_EOL) != 0)
1005 		goto bad;
1006 #endif
1007 
1008 	if (sysctl_createv(clog, 0, &rnode, &cnode,
1009 		       CTLFLAG_PERMANENT,
1010 		       CTLTYPE_INT, "drops",
1011 		       SYSCTL_DESCR("Total packets dropped due to full input queue"),
1012 		       sysctl_percpuq_drops_handler, 0, (void *)ipq, 0,
1013 		       CTL_CREATE, CTL_EOL) != 0)
1014 		goto bad;
1015 
1016 	return;
1017 bad:
1018 	printf("%s: could not attach sysctl nodes\n", ifname);
1019 	return;
1020 }
1021 
1022 /*
1023  * The deferred if_start framework
1024  *
1025  * The common APIs to defer if_start to softint when if_start is requested
1026  * from a device driver running in hardware interrupt context.
1027  */
1028 /*
1029  * Call ifp->if_start (or equivalent) in a dedicated softint for
1030  * deferred if_start.
1031  */
1032 static void
1033 if_deferred_start_softint(void *arg)
1034 {
1035 	struct if_deferred_start *ids = arg;
1036 	struct ifnet *ifp = ids->ids_ifp;
1037 
1038 	ids->ids_if_start(ifp);
1039 }
1040 
1041 /*
1042  * The default callback function for deferred if_start.
1043  */
1044 static void
1045 if_deferred_start_common(struct ifnet *ifp)
1046 {
1047 	int s;
1048 
1049 	s = splnet();
1050 	if_start_lock(ifp);
1051 	splx(s);
1052 }
1053 
1054 static inline bool
1055 if_snd_is_used(struct ifnet *ifp)
1056 {
1057 
1058 	return ALTQ_IS_ENABLED(&ifp->if_snd) ||
1059 		ifp->if_transmit == if_transmit ||
1060 		ifp->if_transmit == NULL || ifp->if_transmit == if_nulltransmit;
1061 }
1062 
1063 /*
1064  * Schedule deferred if_start.
1065  */
1066 void
1067 if_schedule_deferred_start(struct ifnet *ifp)
1068 {
1069 
1070 	KASSERT(ifp->if_deferred_start != NULL);
1071 
1072 	if (if_snd_is_used(ifp) && IFQ_IS_EMPTY(&ifp->if_snd))
1073 		return;
1074 
1075 	softint_schedule(ifp->if_deferred_start->ids_si);
1076 }
1077 
1078 /*
1079  * Create an instance of deferred if_start. A driver should call the function
1080  * only if the driver needs deferred if_start. Drivers can setup their own
1081  * deferred if_start function via 2nd argument.
1082  */
1083 void
1084 if_deferred_start_init(struct ifnet *ifp, void (*func)(struct ifnet *))
1085 {
1086 	struct if_deferred_start *ids;
1087 	u_int flags = SOFTINT_NET;
1088 
1089 	flags |= if_is_mpsafe(ifp) ? SOFTINT_MPSAFE : 0;
1090 
1091 	ids = kmem_zalloc(sizeof(*ids), KM_SLEEP);
1092 	ids->ids_ifp = ifp;
1093 	ids->ids_si = softint_establish(flags, if_deferred_start_softint, ids);
1094 	if (func != NULL)
1095 		ids->ids_if_start = func;
1096 	else
1097 		ids->ids_if_start = if_deferred_start_common;
1098 
1099 	ifp->if_deferred_start = ids;
1100 }
1101 
1102 static void
1103 if_deferred_start_destroy(struct ifnet *ifp)
1104 {
1105 
1106 	if (ifp->if_deferred_start == NULL)
1107 		return;
1108 
1109 	softint_disestablish(ifp->if_deferred_start->ids_si);
1110 	kmem_free(ifp->if_deferred_start, sizeof(*ifp->if_deferred_start));
1111 	ifp->if_deferred_start = NULL;
1112 }
1113 
1114 /*
1115  * The common interface input routine that is called by device drivers,
1116  * which should be used only when the driver's rx handler already runs
1117  * in softint.
1118  */
1119 void
1120 if_input(struct ifnet *ifp, struct mbuf *m)
1121 {
1122 
1123 	KASSERT(ifp->if_percpuq == NULL);
1124 	KASSERT(!cpu_intr_p());
1125 
1126 	if_statinc(ifp, if_ipackets);
1127 	bpf_mtap(ifp, m, BPF_D_IN);
1128 
1129 	ifp->_if_input(ifp, m);
1130 }
1131 
1132 /*
1133  * DEPRECATED. Use if_initialize and if_register instead.
1134  * See the above comment of if_initialize.
1135  *
1136  * Note that it implicitly enables if_percpuq to make drivers easy to
1137  * migrate softint-based if_input without much changes. If you don't
1138  * want to enable it, use if_initialize instead.
1139  */
1140 int
1141 if_attach(ifnet_t *ifp)
1142 {
1143 	int rv;
1144 
1145 	rv = if_initialize(ifp);
1146 	if (rv != 0)
1147 		return rv;
1148 
1149 	ifp->if_percpuq = if_percpuq_create(ifp);
1150 	if_register(ifp);
1151 
1152 	return 0;
1153 }
1154 
1155 void
1156 if_attachdomain(void)
1157 {
1158 	struct ifnet *ifp;
1159 	int s;
1160 	int bound = curlwp_bind();
1161 
1162 	s = pserialize_read_enter();
1163 	IFNET_READER_FOREACH(ifp) {
1164 		struct psref psref;
1165 		psref_acquire(&psref, &ifp->if_psref, ifnet_psref_class);
1166 		pserialize_read_exit(s);
1167 		if_attachdomain1(ifp);
1168 		s = pserialize_read_enter();
1169 		psref_release(&psref, &ifp->if_psref, ifnet_psref_class);
1170 	}
1171 	pserialize_read_exit(s);
1172 	curlwp_bindx(bound);
1173 }
1174 
1175 static void
1176 if_attachdomain1(struct ifnet *ifp)
1177 {
1178 	struct domain *dp;
1179 	int s;
1180 
1181 	s = splsoftnet();
1182 
1183 	/* address family dependent data region */
1184 	memset(ifp->if_afdata, 0, sizeof(ifp->if_afdata));
1185 	DOMAIN_FOREACH(dp) {
1186 		if (dp->dom_ifattach != NULL)
1187 			ifp->if_afdata[dp->dom_family] =
1188 			    (*dp->dom_ifattach)(ifp);
1189 	}
1190 
1191 	splx(s);
1192 }
1193 
1194 /*
1195  * Deactivate an interface.  This points all of the procedure
1196  * handles at error stubs.  May be called from interrupt context.
1197  */
1198 void
1199 if_deactivate(struct ifnet *ifp)
1200 {
1201 	int s;
1202 
1203 	s = splsoftnet();
1204 
1205 	ifp->if_output	 = if_nulloutput;
1206 	ifp->_if_input	 = if_nullinput;
1207 	ifp->if_start	 = if_nullstart;
1208 	ifp->if_transmit = if_nulltransmit;
1209 	ifp->if_ioctl	 = if_nullioctl;
1210 	ifp->if_init	 = if_nullinit;
1211 	ifp->if_stop	 = if_nullstop;
1212 	ifp->if_slowtimo = if_nullslowtimo;
1213 	ifp->if_drain	 = if_nulldrain;
1214 
1215 	ifp->if_link_state_changed = NULL;
1216 
1217 	/* No more packets may be enqueued. */
1218 	ifp->if_snd.ifq_maxlen = 0;
1219 
1220 	splx(s);
1221 }
1222 
1223 bool
1224 if_is_deactivated(const struct ifnet *ifp)
1225 {
1226 
1227 	return ifp->if_output == if_nulloutput;
1228 }
1229 
1230 void
1231 if_purgeaddrs(struct ifnet *ifp, int family, void (*purgeaddr)(struct ifaddr *))
1232 {
1233 	struct ifaddr *ifa, *nifa;
1234 	int s;
1235 
1236 	s = pserialize_read_enter();
1237 	for (ifa = IFADDR_READER_FIRST(ifp); ifa; ifa = nifa) {
1238 		nifa = IFADDR_READER_NEXT(ifa);
1239 		if (ifa->ifa_addr->sa_family != family)
1240 			continue;
1241 		pserialize_read_exit(s);
1242 
1243 		(*purgeaddr)(ifa);
1244 
1245 		s = pserialize_read_enter();
1246 	}
1247 	pserialize_read_exit(s);
1248 }
1249 
1250 #ifdef IFAREF_DEBUG
1251 static struct ifaddr **ifa_list;
1252 static int ifa_list_size;
1253 
1254 /* Depends on only one if_attach runs at once */
1255 static void
1256 if_build_ifa_list(struct ifnet *ifp)
1257 {
1258 	struct ifaddr *ifa;
1259 	int i;
1260 
1261 	KASSERT(ifa_list == NULL);
1262 	KASSERT(ifa_list_size == 0);
1263 
1264 	IFADDR_READER_FOREACH(ifa, ifp)
1265 		ifa_list_size++;
1266 
1267 	ifa_list = kmem_alloc(sizeof(*ifa) * ifa_list_size, KM_SLEEP);
1268 	i = 0;
1269 	IFADDR_READER_FOREACH(ifa, ifp) {
1270 		ifa_list[i++] = ifa;
1271 		ifaref(ifa);
1272 	}
1273 }
1274 
1275 static void
1276 if_check_and_free_ifa_list(struct ifnet *ifp)
1277 {
1278 	int i;
1279 	struct ifaddr *ifa;
1280 
1281 	if (ifa_list == NULL)
1282 		return;
1283 
1284 	for (i = 0; i < ifa_list_size; i++) {
1285 		char buf[64];
1286 
1287 		ifa = ifa_list[i];
1288 		sockaddr_format(ifa->ifa_addr, buf, sizeof(buf));
1289 		if (ifa->ifa_refcnt > 1) {
1290 			log(LOG_WARNING,
1291 			    "ifa(%s) still referenced (refcnt=%d)\n",
1292 			    buf, ifa->ifa_refcnt - 1);
1293 		} else
1294 			log(LOG_DEBUG,
1295 			    "ifa(%s) not referenced (refcnt=%d)\n",
1296 			    buf, ifa->ifa_refcnt - 1);
1297 		ifafree(ifa);
1298 	}
1299 
1300 	kmem_free(ifa_list, sizeof(*ifa) * ifa_list_size);
1301 	ifa_list = NULL;
1302 	ifa_list_size = 0;
1303 }
1304 #endif
1305 
1306 /*
1307  * Detach an interface from the list of "active" interfaces,
1308  * freeing any resources as we go along.
1309  *
1310  * NOTE: This routine must be called with a valid thread context,
1311  * as it may block.
1312  */
1313 void
1314 if_detach(struct ifnet *ifp)
1315 {
1316 	struct socket so;
1317 	struct ifaddr *ifa;
1318 #ifdef IFAREF_DEBUG
1319 	struct ifaddr *last_ifa = NULL;
1320 #endif
1321 	struct domain *dp;
1322 	const struct protosw *pr;
1323 	int s, i, family, purged;
1324 
1325 #ifdef IFAREF_DEBUG
1326 	if_build_ifa_list(ifp);
1327 #endif
1328 	/*
1329 	 * XXX It's kind of lame that we have to have the
1330 	 * XXX socket structure...
1331 	 */
1332 	memset(&so, 0, sizeof(so));
1333 
1334 	s = splnet();
1335 
1336 	sysctl_teardown(&ifp->if_sysctl_log);
1337 
1338 	IFNET_LOCK(ifp);
1339 
1340 	/*
1341 	 * Unset all queued link states and pretend a
1342 	 * link state change is scheduled.
1343 	 * This stops any more link state changes occuring for this
1344 	 * interface while it's being detached so it's safe
1345 	 * to drain the workqueue.
1346 	 */
1347 	IF_LINK_STATE_CHANGE_LOCK(ifp);
1348 	ifp->if_link_queue = -1; /* all bits set, see link_state_change() */
1349 	ifp->if_link_scheduled = true;
1350 	IF_LINK_STATE_CHANGE_UNLOCK(ifp);
1351 	workqueue_wait(ifnet_link_state_wq, &ifp->if_link_work);
1352 
1353 	if_deactivate(ifp);
1354 	IFNET_UNLOCK(ifp);
1355 
1356 	/*
1357 	 * Unlink from the list and wait for all readers to leave
1358 	 * from pserialize read sections.  Note that we can't do
1359 	 * psref_target_destroy here.  See below.
1360 	 */
1361 	IFNET_GLOBAL_LOCK();
1362 	ifindex2ifnet[ifp->if_index] = NULL;
1363 	TAILQ_REMOVE(&ifnet_list, ifp, if_list);
1364 	IFNET_WRITER_REMOVE(ifp);
1365 	pserialize_perform(ifnet_psz);
1366 	IFNET_GLOBAL_UNLOCK();
1367 
1368 	if (ifp->if_slowtimo != NULL && ifp->if_slowtimo_ch != NULL) {
1369 		ifp->if_slowtimo = NULL;
1370 		callout_halt(ifp->if_slowtimo_ch, NULL);
1371 		callout_destroy(ifp->if_slowtimo_ch);
1372 		kmem_free(ifp->if_slowtimo_ch, sizeof(*ifp->if_slowtimo_ch));
1373 	}
1374 	if_deferred_start_destroy(ifp);
1375 
1376 	/*
1377 	 * Do an if_down() to give protocols a chance to do something.
1378 	 */
1379 	if_down_deactivated(ifp);
1380 
1381 #ifdef ALTQ
1382 	if (ALTQ_IS_ENABLED(&ifp->if_snd))
1383 		altq_disable(&ifp->if_snd);
1384 	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
1385 		altq_detach(&ifp->if_snd);
1386 #endif
1387 
1388 #if NCARP > 0
1389 	/* Remove the interface from any carp group it is a part of.  */
1390 	if (ifp->if_carp != NULL && ifp->if_type != IFT_CARP)
1391 		carp_ifdetach(ifp);
1392 #endif
1393 
1394 	/*
1395 	 * Rip all the addresses off the interface.  This should make
1396 	 * all of the routes go away.
1397 	 *
1398 	 * pr_usrreq calls can remove an arbitrary number of ifaddrs
1399 	 * from the list, including our "cursor", ifa.  For safety,
1400 	 * and to honor the TAILQ abstraction, I just restart the
1401 	 * loop after each removal.  Note that the loop will exit
1402 	 * when all of the remaining ifaddrs belong to the AF_LINK
1403 	 * family.  I am counting on the historical fact that at
1404 	 * least one pr_usrreq in each address domain removes at
1405 	 * least one ifaddr.
1406 	 */
1407 again:
1408 	/*
1409 	 * At this point, no other one tries to remove ifa in the list,
1410 	 * so we don't need to take a lock or psref.  Avoid using
1411 	 * IFADDR_READER_FOREACH to pass over an inspection of contract
1412 	 * violations of pserialize.
1413 	 */
1414 	IFADDR_WRITER_FOREACH(ifa, ifp) {
1415 		family = ifa->ifa_addr->sa_family;
1416 #ifdef IFAREF_DEBUG
1417 		printf("if_detach: ifaddr %p, family %d, refcnt %d\n",
1418 		    ifa, family, ifa->ifa_refcnt);
1419 		if (last_ifa != NULL && ifa == last_ifa)
1420 			panic("if_detach: loop detected");
1421 		last_ifa = ifa;
1422 #endif
1423 		if (family == AF_LINK)
1424 			continue;
1425 		dp = pffinddomain(family);
1426 		KASSERTMSG(dp != NULL, "no domain for AF %d", family);
1427 		/*
1428 		 * XXX These PURGEIF calls are redundant with the
1429 		 * purge-all-families calls below, but are left in for
1430 		 * now both to make a smaller change, and to avoid
1431 		 * unplanned interactions with clearing of
1432 		 * ifp->if_addrlist.
1433 		 */
1434 		purged = 0;
1435 		for (pr = dp->dom_protosw;
1436 		     pr < dp->dom_protoswNPROTOSW; pr++) {
1437 			so.so_proto = pr;
1438 			if (pr->pr_usrreqs) {
1439 				(void) (*pr->pr_usrreqs->pr_purgeif)(&so, ifp);
1440 				purged = 1;
1441 			}
1442 		}
1443 		if (purged == 0) {
1444 			/*
1445 			 * XXX What's really the best thing to do
1446 			 * XXX here?  --thorpej@NetBSD.org
1447 			 */
1448 			printf("if_detach: WARNING: AF %d not purged\n",
1449 			    family);
1450 			ifa_remove(ifp, ifa);
1451 		}
1452 		goto again;
1453 	}
1454 
1455 	if_free_sadl(ifp, 1);
1456 
1457 restart:
1458 	IFADDR_WRITER_FOREACH(ifa, ifp) {
1459 		family = ifa->ifa_addr->sa_family;
1460 		KASSERT(family == AF_LINK);
1461 		ifa_remove(ifp, ifa);
1462 		goto restart;
1463 	}
1464 
1465 	/* Delete stray routes from the routing table. */
1466 	for (i = 0; i <= AF_MAX; i++)
1467 		rt_delete_matched_entries(i, if_delroute_matcher, ifp);
1468 
1469 	DOMAIN_FOREACH(dp) {
1470 		if (dp->dom_ifdetach != NULL && ifp->if_afdata[dp->dom_family])
1471 		{
1472 			void *p = ifp->if_afdata[dp->dom_family];
1473 			if (p) {
1474 				ifp->if_afdata[dp->dom_family] = NULL;
1475 				(*dp->dom_ifdetach)(ifp, p);
1476 			}
1477 		}
1478 
1479 		/*
1480 		 * One would expect multicast memberships (INET and
1481 		 * INET6) on UDP sockets to be purged by the PURGEIF
1482 		 * calls above, but if all addresses were removed from
1483 		 * the interface prior to destruction, the calls will
1484 		 * not be made (e.g. ppp, for which pppd(8) generally
1485 		 * removes addresses before destroying the interface).
1486 		 * Because there is no invariant that multicast
1487 		 * memberships only exist for interfaces with IPv4
1488 		 * addresses, we must call PURGEIF regardless of
1489 		 * addresses.  (Protocols which might store ifnet
1490 		 * pointers are marked with PR_PURGEIF.)
1491 		 */
1492 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
1493 			so.so_proto = pr;
1494 			if (pr->pr_usrreqs && pr->pr_flags & PR_PURGEIF)
1495 				(void)(*pr->pr_usrreqs->pr_purgeif)(&so, ifp);
1496 		}
1497 	}
1498 
1499 	/*
1500 	 * Must be done after the above pr_purgeif because if_psref may be
1501 	 * still used in pr_purgeif.
1502 	 */
1503 	psref_target_destroy(&ifp->if_psref, ifnet_psref_class);
1504 	PSLIST_ENTRY_DESTROY(ifp, if_pslist_entry);
1505 
1506 	pfil_run_ifhooks(if_pfil, PFIL_IFNET_DETACH, ifp);
1507 	(void)pfil_head_destroy(ifp->if_pfil);
1508 
1509 	/* Announce that the interface is gone. */
1510 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1511 
1512 	IF_AFDATA_LOCK_DESTROY(ifp);
1513 
1514 	/*
1515 	 * remove packets that came from ifp, from software interrupt queues.
1516 	 */
1517 	DOMAIN_FOREACH(dp) {
1518 		for (i = 0; i < __arraycount(dp->dom_ifqueues); i++) {
1519 			struct ifqueue *iq = dp->dom_ifqueues[i];
1520 			if (iq == NULL)
1521 				break;
1522 			dp->dom_ifqueues[i] = NULL;
1523 			if_detach_queues(ifp, iq);
1524 		}
1525 	}
1526 
1527 	/*
1528 	 * IP queues have to be processed separately: net-queue barrier
1529 	 * ensures that the packets are dequeued while a cross-call will
1530 	 * ensure that the interrupts have completed. FIXME: not quite..
1531 	 */
1532 #ifdef INET
1533 	pktq_barrier(ip_pktq);
1534 #endif
1535 #ifdef INET6
1536 	if (in6_present)
1537 		pktq_barrier(ip6_pktq);
1538 #endif
1539 	xc_barrier(0);
1540 
1541 	if (ifp->if_percpuq != NULL) {
1542 		if_percpuq_destroy(ifp->if_percpuq);
1543 		ifp->if_percpuq = NULL;
1544 	}
1545 
1546 	mutex_obj_free(ifp->if_ioctl_lock);
1547 	ifp->if_ioctl_lock = NULL;
1548 	mutex_obj_free(ifp->if_snd.ifq_lock);
1549 	if_stats_fini(ifp);
1550 
1551 	splx(s);
1552 
1553 #ifdef IFAREF_DEBUG
1554 	if_check_and_free_ifa_list(ifp);
1555 #endif
1556 }
1557 
1558 static void
1559 if_detach_queues(struct ifnet *ifp, struct ifqueue *q)
1560 {
1561 	struct mbuf *m, *prev, *next;
1562 
1563 	prev = NULL;
1564 	for (m = q->ifq_head; m != NULL; m = next) {
1565 		KASSERT((m->m_flags & M_PKTHDR) != 0);
1566 
1567 		next = m->m_nextpkt;
1568 		if (m->m_pkthdr.rcvif_index != ifp->if_index) {
1569 			prev = m;
1570 			continue;
1571 		}
1572 
1573 		if (prev != NULL)
1574 			prev->m_nextpkt = m->m_nextpkt;
1575 		else
1576 			q->ifq_head = m->m_nextpkt;
1577 		if (q->ifq_tail == m)
1578 			q->ifq_tail = prev;
1579 		q->ifq_len--;
1580 
1581 		m->m_nextpkt = NULL;
1582 		m_freem(m);
1583 		IF_DROP(q);
1584 	}
1585 }
1586 
1587 /*
1588  * Callback for a radix tree walk to delete all references to an
1589  * ifnet.
1590  */
1591 static int
1592 if_delroute_matcher(struct rtentry *rt, void *v)
1593 {
1594 	struct ifnet *ifp = (struct ifnet *)v;
1595 
1596 	if (rt->rt_ifp == ifp)
1597 		return 1;
1598 	else
1599 		return 0;
1600 }
1601 
1602 /*
1603  * Create a clone network interface.
1604  */
1605 static int
1606 if_clone_create(const char *name)
1607 {
1608 	struct if_clone *ifc;
1609 	int unit;
1610 	struct ifnet *ifp;
1611 	struct psref psref;
1612 
1613 	KASSERT(mutex_owned(&if_clone_mtx));
1614 
1615 	ifc = if_clone_lookup(name, &unit);
1616 	if (ifc == NULL)
1617 		return EINVAL;
1618 
1619 	ifp = if_get(name, &psref);
1620 	if (ifp != NULL) {
1621 		if_put(ifp, &psref);
1622 		return EEXIST;
1623 	}
1624 
1625 	return (*ifc->ifc_create)(ifc, unit);
1626 }
1627 
1628 /*
1629  * Destroy a clone network interface.
1630  */
1631 static int
1632 if_clone_destroy(const char *name)
1633 {
1634 	struct if_clone *ifc;
1635 	struct ifnet *ifp;
1636 	struct psref psref;
1637 	int error;
1638 	int (*if_ioctl)(struct ifnet *, u_long, void *);
1639 
1640 	KASSERT(mutex_owned(&if_clone_mtx));
1641 
1642 	ifc = if_clone_lookup(name, NULL);
1643 	if (ifc == NULL)
1644 		return EINVAL;
1645 
1646 	if (ifc->ifc_destroy == NULL)
1647 		return EOPNOTSUPP;
1648 
1649 	ifp = if_get(name, &psref);
1650 	if (ifp == NULL)
1651 		return ENXIO;
1652 
1653 	/* We have to disable ioctls here */
1654 	IFNET_LOCK(ifp);
1655 	if_ioctl = ifp->if_ioctl;
1656 	ifp->if_ioctl = if_nullioctl;
1657 	IFNET_UNLOCK(ifp);
1658 
1659 	/*
1660 	 * We cannot call ifc_destroy with holding ifp.
1661 	 * Releasing ifp here is safe thanks to if_clone_mtx.
1662 	 */
1663 	if_put(ifp, &psref);
1664 
1665 	error = (*ifc->ifc_destroy)(ifp);
1666 
1667 	if (error != 0) {
1668 		/* We have to restore if_ioctl on error */
1669 		IFNET_LOCK(ifp);
1670 		ifp->if_ioctl = if_ioctl;
1671 		IFNET_UNLOCK(ifp);
1672 	}
1673 
1674 	return error;
1675 }
1676 
1677 static bool
1678 if_is_unit(const char *name)
1679 {
1680 
1681 	while (*name != '\0') {
1682 		if (*name < '0' || *name > '9')
1683 			return false;
1684 		name++;
1685 	}
1686 
1687 	return true;
1688 }
1689 
1690 /*
1691  * Look up a network interface cloner.
1692  */
1693 static struct if_clone *
1694 if_clone_lookup(const char *name, int *unitp)
1695 {
1696 	struct if_clone *ifc;
1697 	const char *cp;
1698 	char *dp, ifname[IFNAMSIZ + 3];
1699 	int unit;
1700 
1701 	KASSERT(mutex_owned(&if_clone_mtx));
1702 
1703 	strcpy(ifname, "if_");
1704 	/* separate interface name from unit */
1705 	/* TODO: search unit number from backward */
1706 	for (dp = ifname + 3, cp = name; cp - name < IFNAMSIZ &&
1707 	    *cp && !if_is_unit(cp);)
1708 		*dp++ = *cp++;
1709 
1710 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1711 		return NULL;	/* No name or unit number */
1712 	*dp++ = '\0';
1713 
1714 again:
1715 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1716 		if (strcmp(ifname + 3, ifc->ifc_name) == 0)
1717 			break;
1718 	}
1719 
1720 	if (ifc == NULL) {
1721 		int error;
1722 		if (*ifname == '\0')
1723 			return NULL;
1724 		mutex_exit(&if_clone_mtx);
1725 		error = module_autoload(ifname, MODULE_CLASS_DRIVER);
1726 		mutex_enter(&if_clone_mtx);
1727 		if (error)
1728 			return NULL;
1729 		*ifname = '\0';
1730 		goto again;
1731 	}
1732 
1733 	unit = 0;
1734 	while (cp - name < IFNAMSIZ && *cp) {
1735 		if (*cp < '0' || *cp > '9' || unit >= INT_MAX / 10) {
1736 			/* Bogus unit number. */
1737 			return NULL;
1738 		}
1739 		unit = (unit * 10) + (*cp++ - '0');
1740 	}
1741 
1742 	if (unitp != NULL)
1743 		*unitp = unit;
1744 	return ifc;
1745 }
1746 
1747 /*
1748  * Register a network interface cloner.
1749  */
1750 void
1751 if_clone_attach(struct if_clone *ifc)
1752 {
1753 
1754 	mutex_enter(&if_clone_mtx);
1755 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1756 	if_cloners_count++;
1757 	mutex_exit(&if_clone_mtx);
1758 }
1759 
1760 /*
1761  * Unregister a network interface cloner.
1762  */
1763 void
1764 if_clone_detach(struct if_clone *ifc)
1765 {
1766 
1767 	mutex_enter(&if_clone_mtx);
1768 	LIST_REMOVE(ifc, ifc_list);
1769 	if_cloners_count--;
1770 	mutex_exit(&if_clone_mtx);
1771 }
1772 
1773 /*
1774  * Provide list of interface cloners to userspace.
1775  */
1776 int
1777 if_clone_list(int buf_count, char *buffer, int *total)
1778 {
1779 	char outbuf[IFNAMSIZ], *dst;
1780 	struct if_clone *ifc;
1781 	int count, error = 0;
1782 
1783 	mutex_enter(&if_clone_mtx);
1784 	*total = if_cloners_count;
1785 	if ((dst = buffer) == NULL) {
1786 		/* Just asking how many there are. */
1787 		goto out;
1788 	}
1789 
1790 	if (buf_count < 0) {
1791 		error = EINVAL;
1792 		goto out;
1793 	}
1794 
1795 	count = (if_cloners_count < buf_count) ?
1796 	    if_cloners_count : buf_count;
1797 
1798 	for (ifc = LIST_FIRST(&if_cloners); ifc != NULL && count != 0;
1799 	     ifc = LIST_NEXT(ifc, ifc_list), count--, dst += IFNAMSIZ) {
1800 		(void)strncpy(outbuf, ifc->ifc_name, sizeof(outbuf));
1801 		if (outbuf[sizeof(outbuf) - 1] != '\0') {
1802 			error = ENAMETOOLONG;
1803 			goto out;
1804 		}
1805 		error = copyout(outbuf, dst, sizeof(outbuf));
1806 		if (error != 0)
1807 			break;
1808 	}
1809 
1810 out:
1811 	mutex_exit(&if_clone_mtx);
1812 	return error;
1813 }
1814 
1815 void
1816 ifa_psref_init(struct ifaddr *ifa)
1817 {
1818 
1819 	psref_target_init(&ifa->ifa_psref, ifa_psref_class);
1820 }
1821 
1822 void
1823 ifaref(struct ifaddr *ifa)
1824 {
1825 
1826 	atomic_inc_uint(&ifa->ifa_refcnt);
1827 }
1828 
1829 void
1830 ifafree(struct ifaddr *ifa)
1831 {
1832 	KASSERT(ifa != NULL);
1833 	KASSERTMSG(ifa->ifa_refcnt > 0, "ifa_refcnt=%d", ifa->ifa_refcnt);
1834 
1835 	if (atomic_dec_uint_nv(&ifa->ifa_refcnt) == 0) {
1836 		free(ifa, M_IFADDR);
1837 	}
1838 }
1839 
1840 bool
1841 ifa_is_destroying(struct ifaddr *ifa)
1842 {
1843 
1844 	return ISSET(ifa->ifa_flags, IFA_DESTROYING);
1845 }
1846 
1847 void
1848 ifa_insert(struct ifnet *ifp, struct ifaddr *ifa)
1849 {
1850 
1851 	ifa->ifa_ifp = ifp;
1852 
1853 	/*
1854 	 * Check MP-safety for IFEF_MPSAFE drivers.
1855 	 * Check !IFF_RUNNING for initialization routines that normally don't
1856 	 * take IFNET_LOCK but it's safe because there is no competitor.
1857 	 * XXX there are false positive cases because IFF_RUNNING can be off on
1858 	 * if_stop.
1859 	 */
1860 	KASSERT(!if_is_mpsafe(ifp) || !ISSET(ifp->if_flags, IFF_RUNNING) ||
1861 	    IFNET_LOCKED(ifp));
1862 
1863 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
1864 	IFADDR_ENTRY_INIT(ifa);
1865 	IFADDR_WRITER_INSERT_TAIL(ifp, ifa);
1866 
1867 	ifaref(ifa);
1868 }
1869 
1870 void
1871 ifa_remove(struct ifnet *ifp, struct ifaddr *ifa)
1872 {
1873 
1874 	KASSERT(ifa->ifa_ifp == ifp);
1875 	/*
1876 	 * Check MP-safety for IFEF_MPSAFE drivers.
1877 	 * if_is_deactivated indicates ifa_remove is called form if_detach
1878 	 * where is safe even if IFNET_LOCK isn't held.
1879 	 */
1880 	KASSERT(!if_is_mpsafe(ifp) || if_is_deactivated(ifp) || IFNET_LOCKED(ifp));
1881 
1882 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
1883 	IFADDR_WRITER_REMOVE(ifa);
1884 #ifdef NET_MPSAFE
1885 	IFNET_GLOBAL_LOCK();
1886 	pserialize_perform(ifnet_psz);
1887 	IFNET_GLOBAL_UNLOCK();
1888 #endif
1889 
1890 #ifdef NET_MPSAFE
1891 	psref_target_destroy(&ifa->ifa_psref, ifa_psref_class);
1892 #endif
1893 	IFADDR_ENTRY_DESTROY(ifa);
1894 	ifafree(ifa);
1895 }
1896 
1897 void
1898 ifa_acquire(struct ifaddr *ifa, struct psref *psref)
1899 {
1900 
1901 	PSREF_DEBUG_FILL_RETURN_ADDRESS(psref);
1902 	psref_acquire(psref, &ifa->ifa_psref, ifa_psref_class);
1903 }
1904 
1905 void
1906 ifa_release(struct ifaddr *ifa, struct psref *psref)
1907 {
1908 
1909 	if (ifa == NULL)
1910 		return;
1911 
1912 	psref_release(psref, &ifa->ifa_psref, ifa_psref_class);
1913 }
1914 
1915 bool
1916 ifa_held(struct ifaddr *ifa)
1917 {
1918 
1919 	return psref_held(&ifa->ifa_psref, ifa_psref_class);
1920 }
1921 
1922 static inline int
1923 equal(const struct sockaddr *sa1, const struct sockaddr *sa2)
1924 {
1925 	return sockaddr_cmp(sa1, sa2) == 0;
1926 }
1927 
1928 /*
1929  * Locate an interface based on a complete address.
1930  */
1931 /*ARGSUSED*/
1932 struct ifaddr *
1933 ifa_ifwithaddr(const struct sockaddr *addr)
1934 {
1935 	struct ifnet *ifp;
1936 	struct ifaddr *ifa;
1937 
1938 	IFNET_READER_FOREACH(ifp) {
1939 		if (if_is_deactivated(ifp))
1940 			continue;
1941 		IFADDR_READER_FOREACH(ifa, ifp) {
1942 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1943 				continue;
1944 			if (equal(addr, ifa->ifa_addr))
1945 				return ifa;
1946 			if ((ifp->if_flags & IFF_BROADCAST) &&
1947 			    ifa->ifa_broadaddr &&
1948 			    /* IP6 doesn't have broadcast */
1949 			    ifa->ifa_broadaddr->sa_len != 0 &&
1950 			    equal(ifa->ifa_broadaddr, addr))
1951 				return ifa;
1952 		}
1953 	}
1954 	return NULL;
1955 }
1956 
1957 struct ifaddr *
1958 ifa_ifwithaddr_psref(const struct sockaddr *addr, struct psref *psref)
1959 {
1960 	struct ifaddr *ifa;
1961 	int s = pserialize_read_enter();
1962 
1963 	ifa = ifa_ifwithaddr(addr);
1964 	if (ifa != NULL)
1965 		ifa_acquire(ifa, psref);
1966 	pserialize_read_exit(s);
1967 
1968 	return ifa;
1969 }
1970 
1971 /*
1972  * Locate the point to point interface with a given destination address.
1973  */
1974 /*ARGSUSED*/
1975 struct ifaddr *
1976 ifa_ifwithdstaddr(const struct sockaddr *addr)
1977 {
1978 	struct ifnet *ifp;
1979 	struct ifaddr *ifa;
1980 
1981 	IFNET_READER_FOREACH(ifp) {
1982 		if (if_is_deactivated(ifp))
1983 			continue;
1984 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
1985 			continue;
1986 		IFADDR_READER_FOREACH(ifa, ifp) {
1987 			if (ifa->ifa_addr->sa_family != addr->sa_family ||
1988 			    ifa->ifa_dstaddr == NULL)
1989 				continue;
1990 			if (equal(addr, ifa->ifa_dstaddr))
1991 				return ifa;
1992 		}
1993 	}
1994 
1995 	return NULL;
1996 }
1997 
1998 struct ifaddr *
1999 ifa_ifwithdstaddr_psref(const struct sockaddr *addr, struct psref *psref)
2000 {
2001 	struct ifaddr *ifa;
2002 	int s;
2003 
2004 	s = pserialize_read_enter();
2005 	ifa = ifa_ifwithdstaddr(addr);
2006 	if (ifa != NULL)
2007 		ifa_acquire(ifa, psref);
2008 	pserialize_read_exit(s);
2009 
2010 	return ifa;
2011 }
2012 
2013 /*
2014  * Find an interface on a specific network.  If many, choice
2015  * is most specific found.
2016  */
2017 struct ifaddr *
2018 ifa_ifwithnet(const struct sockaddr *addr)
2019 {
2020 	struct ifnet *ifp;
2021 	struct ifaddr *ifa, *ifa_maybe = NULL;
2022 	const struct sockaddr_dl *sdl;
2023 	u_int af = addr->sa_family;
2024 	const char *addr_data = addr->sa_data, *cplim;
2025 
2026 	if (af == AF_LINK) {
2027 		sdl = satocsdl(addr);
2028 		if (sdl->sdl_index && sdl->sdl_index < if_indexlim &&
2029 		    ifindex2ifnet[sdl->sdl_index] &&
2030 		    !if_is_deactivated(ifindex2ifnet[sdl->sdl_index])) {
2031 			return ifindex2ifnet[sdl->sdl_index]->if_dl;
2032 		}
2033 	}
2034 #ifdef NETATALK
2035 	if (af == AF_APPLETALK) {
2036 		const struct sockaddr_at *sat, *sat2;
2037 		sat = (const struct sockaddr_at *)addr;
2038 		IFNET_READER_FOREACH(ifp) {
2039 			if (if_is_deactivated(ifp))
2040 				continue;
2041 			ifa = at_ifawithnet((const struct sockaddr_at *)addr, ifp);
2042 			if (ifa == NULL)
2043 				continue;
2044 			sat2 = (struct sockaddr_at *)ifa->ifa_addr;
2045 			if (sat2->sat_addr.s_net == sat->sat_addr.s_net)
2046 				return ifa; /* exact match */
2047 			if (ifa_maybe == NULL) {
2048 				/* else keep the if with the right range */
2049 				ifa_maybe = ifa;
2050 			}
2051 		}
2052 		return ifa_maybe;
2053 	}
2054 #endif
2055 	IFNET_READER_FOREACH(ifp) {
2056 		if (if_is_deactivated(ifp))
2057 			continue;
2058 		IFADDR_READER_FOREACH(ifa, ifp) {
2059 			const char *cp, *cp2, *cp3;
2060 
2061 			if (ifa->ifa_addr->sa_family != af ||
2062 			    ifa->ifa_netmask == NULL)
2063  next:				continue;
2064 			cp = addr_data;
2065 			cp2 = ifa->ifa_addr->sa_data;
2066 			cp3 = ifa->ifa_netmask->sa_data;
2067 			cplim = (const char *)ifa->ifa_netmask +
2068 			    ifa->ifa_netmask->sa_len;
2069 			while (cp3 < cplim) {
2070 				if ((*cp++ ^ *cp2++) & *cp3++) {
2071 					/* want to continue for() loop */
2072 					goto next;
2073 				}
2074 			}
2075 			if (ifa_maybe == NULL ||
2076 			    rt_refines(ifa->ifa_netmask,
2077 			               ifa_maybe->ifa_netmask))
2078 				ifa_maybe = ifa;
2079 		}
2080 	}
2081 	return ifa_maybe;
2082 }
2083 
2084 struct ifaddr *
2085 ifa_ifwithnet_psref(const struct sockaddr *addr, struct psref *psref)
2086 {
2087 	struct ifaddr *ifa;
2088 	int s;
2089 
2090 	s = pserialize_read_enter();
2091 	ifa = ifa_ifwithnet(addr);
2092 	if (ifa != NULL)
2093 		ifa_acquire(ifa, psref);
2094 	pserialize_read_exit(s);
2095 
2096 	return ifa;
2097 }
2098 
2099 /*
2100  * Find the interface of the addresss.
2101  */
2102 struct ifaddr *
2103 ifa_ifwithladdr(const struct sockaddr *addr)
2104 {
2105 	struct ifaddr *ia;
2106 
2107 	if ((ia = ifa_ifwithaddr(addr)) || (ia = ifa_ifwithdstaddr(addr)) ||
2108 	    (ia = ifa_ifwithnet(addr)))
2109 		return ia;
2110 	return NULL;
2111 }
2112 
2113 struct ifaddr *
2114 ifa_ifwithladdr_psref(const struct sockaddr *addr, struct psref *psref)
2115 {
2116 	struct ifaddr *ifa;
2117 	int s;
2118 
2119 	s = pserialize_read_enter();
2120 	ifa = ifa_ifwithladdr(addr);
2121 	if (ifa != NULL)
2122 		ifa_acquire(ifa, psref);
2123 	pserialize_read_exit(s);
2124 
2125 	return ifa;
2126 }
2127 
2128 /*
2129  * Find an interface using a specific address family
2130  */
2131 struct ifaddr *
2132 ifa_ifwithaf(int af)
2133 {
2134 	struct ifnet *ifp;
2135 	struct ifaddr *ifa = NULL;
2136 	int s;
2137 
2138 	s = pserialize_read_enter();
2139 	IFNET_READER_FOREACH(ifp) {
2140 		if (if_is_deactivated(ifp))
2141 			continue;
2142 		IFADDR_READER_FOREACH(ifa, ifp) {
2143 			if (ifa->ifa_addr->sa_family == af)
2144 				goto out;
2145 		}
2146 	}
2147 out:
2148 	pserialize_read_exit(s);
2149 	return ifa;
2150 }
2151 
2152 /*
2153  * Find an interface address specific to an interface best matching
2154  * a given address.
2155  */
2156 struct ifaddr *
2157 ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
2158 {
2159 	struct ifaddr *ifa;
2160 	const char *cp, *cp2, *cp3;
2161 	const char *cplim;
2162 	struct ifaddr *ifa_maybe = 0;
2163 	u_int af = addr->sa_family;
2164 
2165 	if (if_is_deactivated(ifp))
2166 		return NULL;
2167 
2168 	if (af >= AF_MAX)
2169 		return NULL;
2170 
2171 	IFADDR_READER_FOREACH(ifa, ifp) {
2172 		if (ifa->ifa_addr->sa_family != af)
2173 			continue;
2174 		ifa_maybe = ifa;
2175 		if (ifa->ifa_netmask == NULL) {
2176 			if (equal(addr, ifa->ifa_addr) ||
2177 			    (ifa->ifa_dstaddr &&
2178 			     equal(addr, ifa->ifa_dstaddr)))
2179 				return ifa;
2180 			continue;
2181 		}
2182 		cp = addr->sa_data;
2183 		cp2 = ifa->ifa_addr->sa_data;
2184 		cp3 = ifa->ifa_netmask->sa_data;
2185 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
2186 		for (; cp3 < cplim; cp3++) {
2187 			if ((*cp++ ^ *cp2++) & *cp3)
2188 				break;
2189 		}
2190 		if (cp3 == cplim)
2191 			return ifa;
2192 	}
2193 	return ifa_maybe;
2194 }
2195 
2196 struct ifaddr *
2197 ifaof_ifpforaddr_psref(const struct sockaddr *addr, struct ifnet *ifp,
2198     struct psref *psref)
2199 {
2200 	struct ifaddr *ifa;
2201 	int s;
2202 
2203 	s = pserialize_read_enter();
2204 	ifa = ifaof_ifpforaddr(addr, ifp);
2205 	if (ifa != NULL)
2206 		ifa_acquire(ifa, psref);
2207 	pserialize_read_exit(s);
2208 
2209 	return ifa;
2210 }
2211 
2212 /*
2213  * Default action when installing a route with a Link Level gateway.
2214  * Lookup an appropriate real ifa to point to.
2215  * This should be moved to /sys/net/link.c eventually.
2216  */
2217 void
2218 link_rtrequest(int cmd, struct rtentry *rt, const struct rt_addrinfo *info)
2219 {
2220 	struct ifaddr *ifa;
2221 	const struct sockaddr *dst;
2222 	struct ifnet *ifp;
2223 	struct psref psref;
2224 
2225 	if (cmd != RTM_ADD || ISSET(info->rti_flags, RTF_DONTCHANGEIFA))
2226 		return;
2227 	ifp = rt->rt_ifa->ifa_ifp;
2228 	dst = rt_getkey(rt);
2229 	if ((ifa = ifaof_ifpforaddr_psref(dst, ifp, &psref)) != NULL) {
2230 		rt_replace_ifa(rt, ifa);
2231 		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
2232 			ifa->ifa_rtrequest(cmd, rt, info);
2233 		ifa_release(ifa, &psref);
2234 	}
2235 }
2236 
2237 /*
2238  * bitmask macros to manage a densely packed link_state change queue.
2239  * Because we need to store LINK_STATE_UNKNOWN(0), LINK_STATE_DOWN(1) and
2240  * LINK_STATE_UP(2) we need 2 bits for each state change.
2241  * As a state change to store is 0, treat all bits set as an unset item.
2242  */
2243 #define LQ_ITEM_BITS		2
2244 #define LQ_ITEM_MASK		((1 << LQ_ITEM_BITS) - 1)
2245 #define LQ_MASK(i)		(LQ_ITEM_MASK << (i) * LQ_ITEM_BITS)
2246 #define LINK_STATE_UNSET	LQ_ITEM_MASK
2247 #define LQ_ITEM(q, i)		(((q) & LQ_MASK((i))) >> (i) * LQ_ITEM_BITS)
2248 #define LQ_STORE(q, i, v)						      \
2249 	do {								      \
2250 		(q) &= ~LQ_MASK((i));					      \
2251 		(q) |= (v) << (i) * LQ_ITEM_BITS;			      \
2252 	} while (0 /* CONSTCOND */)
2253 #define LQ_MAX(q)		((sizeof((q)) * NBBY) / LQ_ITEM_BITS)
2254 #define LQ_POP(q, v)							      \
2255 	do {								      \
2256 		(v) = LQ_ITEM((q), 0);					      \
2257 		(q) >>= LQ_ITEM_BITS;					      \
2258 		(q) |= LINK_STATE_UNSET << (LQ_MAX((q)) - 1) * LQ_ITEM_BITS;  \
2259 	} while (0 /* CONSTCOND */)
2260 #define LQ_PUSH(q, v)							      \
2261 	do {								      \
2262 		(q) >>= LQ_ITEM_BITS;					      \
2263 		(q) |= (v) << (LQ_MAX((q)) - 1) * LQ_ITEM_BITS;		      \
2264 	} while (0 /* CONSTCOND */)
2265 #define LQ_FIND_UNSET(q, i)						      \
2266 	for ((i) = 0; i < LQ_MAX((q)); (i)++) {				      \
2267 		if (LQ_ITEM((q), (i)) == LINK_STATE_UNSET)		      \
2268 			break;						      \
2269 	}
2270 
2271 /*
2272  * Handle a change in the interface link state and
2273  * queue notifications.
2274  */
2275 void
2276 if_link_state_change(struct ifnet *ifp, int link_state)
2277 {
2278 	int idx;
2279 
2280 	/* Ensure change is to a valid state */
2281 	switch (link_state) {
2282 	case LINK_STATE_UNKNOWN:	/* FALLTHROUGH */
2283 	case LINK_STATE_DOWN:		/* FALLTHROUGH */
2284 	case LINK_STATE_UP:
2285 		break;
2286 	default:
2287 #ifdef DEBUG
2288 		printf("%s: invalid link state %d\n",
2289 		    ifp->if_xname, link_state);
2290 #endif
2291 		return;
2292 	}
2293 
2294 	IF_LINK_STATE_CHANGE_LOCK(ifp);
2295 
2296 	/* Find the last unset event in the queue. */
2297 	LQ_FIND_UNSET(ifp->if_link_queue, idx);
2298 
2299 	if (idx == 0) {
2300 		/*
2301 		 * There is no queue of link state changes.
2302 		 * As we have the lock we can safely compare against the
2303 		 * current link state and return if the same.
2304 		 * Otherwise, if scheduled is true then the interface is being
2305 		 * detached and the queue is being drained so we need
2306 		 * to avoid queuing more work.
2307 		 */
2308 		 if (ifp->if_link_state == link_state || ifp->if_link_scheduled)
2309 			goto out;
2310 	} else {
2311 		/* Ensure link_state doesn't match the last queued state. */
2312 		if (LQ_ITEM(ifp->if_link_queue, idx - 1) == (uint8_t)link_state)
2313 			goto out;
2314 	}
2315 
2316 	/* Handle queue overflow. */
2317 	if (idx == LQ_MAX(ifp->if_link_queue)) {
2318 		uint8_t lost;
2319 
2320 		/*
2321 		 * The DOWN state must be protected from being pushed off
2322 		 * the queue to ensure that userland will always be
2323 		 * in a sane state.
2324 		 * Because DOWN is protected, there is no need to protect
2325 		 * UNKNOWN.
2326 		 * It should be invalid to change from any other state to
2327 		 * UNKNOWN anyway ...
2328 		 */
2329 		lost = LQ_ITEM(ifp->if_link_queue, 0);
2330 		LQ_PUSH(ifp->if_link_queue, (uint8_t)link_state);
2331 		if (lost == LINK_STATE_DOWN) {
2332 			lost = LQ_ITEM(ifp->if_link_queue, 0);
2333 			LQ_STORE(ifp->if_link_queue, 0, LINK_STATE_DOWN);
2334 		}
2335 		printf("%s: lost link state change %s\n",
2336 		    ifp->if_xname,
2337 		    lost == LINK_STATE_UP ? "UP" :
2338 		    lost == LINK_STATE_DOWN ? "DOWN" :
2339 		    "UNKNOWN");
2340 	} else
2341 		LQ_STORE(ifp->if_link_queue, idx, (uint8_t)link_state);
2342 
2343 	if (ifp->if_link_scheduled)
2344 		goto out;
2345 
2346 	ifp->if_link_scheduled = true;
2347 	workqueue_enqueue(ifnet_link_state_wq, &ifp->if_link_work, NULL);
2348 
2349 out:
2350 	IF_LINK_STATE_CHANGE_UNLOCK(ifp);
2351 }
2352 
2353 /*
2354  * Handle interface link state change notifications.
2355  */
2356 static void
2357 if_link_state_change_process(struct ifnet *ifp, int link_state)
2358 {
2359 	struct domain *dp;
2360 	int s = splnet();
2361 	bool notify;
2362 
2363 	KASSERT(!cpu_intr_p());
2364 
2365 	IF_LINK_STATE_CHANGE_LOCK(ifp);
2366 
2367 	/* Ensure the change is still valid. */
2368 	if (ifp->if_link_state == link_state) {
2369 		IF_LINK_STATE_CHANGE_UNLOCK(ifp);
2370 		splx(s);
2371 		return;
2372 	}
2373 
2374 #ifdef DEBUG
2375 	log(LOG_DEBUG, "%s: link state %s (was %s)\n", ifp->if_xname,
2376 		link_state == LINK_STATE_UP ? "UP" :
2377 		link_state == LINK_STATE_DOWN ? "DOWN" :
2378 		"UNKNOWN",
2379 		ifp->if_link_state == LINK_STATE_UP ? "UP" :
2380 		ifp->if_link_state == LINK_STATE_DOWN ? "DOWN" :
2381 		"UNKNOWN");
2382 #endif
2383 
2384 	/*
2385 	 * When going from UNKNOWN to UP, we need to mark existing
2386 	 * addresses as tentative and restart DAD as we may have
2387 	 * erroneously not found a duplicate.
2388 	 *
2389 	 * This needs to happen before rt_ifmsg to avoid a race where
2390 	 * listeners would have an address and expect it to work right
2391 	 * away.
2392 	 */
2393 	notify = (link_state == LINK_STATE_UP &&
2394 	    ifp->if_link_state == LINK_STATE_UNKNOWN);
2395 	ifp->if_link_state = link_state;
2396 	/* The following routines may sleep so release the spin mutex */
2397 	IF_LINK_STATE_CHANGE_UNLOCK(ifp);
2398 
2399 	KERNEL_LOCK_UNLESS_NET_MPSAFE();
2400 	if (notify) {
2401 		DOMAIN_FOREACH(dp) {
2402 			if (dp->dom_if_link_state_change != NULL)
2403 				dp->dom_if_link_state_change(ifp,
2404 				    LINK_STATE_DOWN);
2405 		}
2406 	}
2407 
2408 	/* Notify that the link state has changed. */
2409 	rt_ifmsg(ifp);
2410 
2411 #if NCARP > 0
2412 	if (ifp->if_carp)
2413 		carp_carpdev_state(ifp);
2414 #endif
2415 
2416 	if (ifp->if_link_state_changed != NULL)
2417 		ifp->if_link_state_changed(ifp, link_state);
2418 
2419 #if NBRIDGE > 0
2420 	if (ifp->if_bridge != NULL)
2421 		bridge_calc_link_state(ifp->if_bridge);
2422 #endif
2423 
2424 	DOMAIN_FOREACH(dp) {
2425 		if (dp->dom_if_link_state_change != NULL)
2426 			dp->dom_if_link_state_change(ifp, link_state);
2427 	}
2428 	KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
2429 	splx(s);
2430 }
2431 
2432 /*
2433  * Process the interface link state change queue.
2434  */
2435 static void
2436 if_link_state_change_work(struct work *work, void *arg)
2437 {
2438 	struct ifnet *ifp = container_of(work, struct ifnet, if_link_work);
2439 	int s;
2440 	uint8_t state;
2441 
2442 	KERNEL_LOCK_UNLESS_NET_MPSAFE();
2443 	s = splnet();
2444 
2445 	/* Pop a link state change from the queue and process it.
2446 	 * If there is nothing to process then if_detach() has been called.
2447 	 * We keep if_link_scheduled = true so the queue can safely drain
2448 	 * without more work being queued. */
2449 	IF_LINK_STATE_CHANGE_LOCK(ifp);
2450 	LQ_POP(ifp->if_link_queue, state);
2451 	IF_LINK_STATE_CHANGE_UNLOCK(ifp);
2452 	if (state == LINK_STATE_UNSET)
2453 		goto out;
2454 
2455 	if_link_state_change_process(ifp, state);
2456 
2457 	/* If there is a link state change to come, schedule it. */
2458 	IF_LINK_STATE_CHANGE_LOCK(ifp);
2459 	if (LQ_ITEM(ifp->if_link_queue, 0) != LINK_STATE_UNSET) {
2460 		ifp->if_link_scheduled = true;
2461 		workqueue_enqueue(ifnet_link_state_wq, &ifp->if_link_work, NULL);
2462 	} else
2463 		ifp->if_link_scheduled = false;
2464 	IF_LINK_STATE_CHANGE_UNLOCK(ifp);
2465 
2466 out:
2467 	splx(s);
2468 	KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
2469 }
2470 
2471 /*
2472  * Used to mark addresses on an interface as DETATCHED or TENTATIVE
2473  * and thus start Duplicate Address Detection without changing the
2474  * real link state.
2475  */
2476 void
2477 if_domain_link_state_change(struct ifnet *ifp, int link_state)
2478 {
2479 	struct domain *dp;
2480 	int s = splnet();
2481 
2482 	KERNEL_LOCK_UNLESS_NET_MPSAFE();
2483 
2484 	DOMAIN_FOREACH(dp) {
2485 		if (dp->dom_if_link_state_change != NULL)
2486 			dp->dom_if_link_state_change(ifp, link_state);
2487 	}
2488 
2489 	splx(s);
2490 	KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
2491 }
2492 
2493 /*
2494  * Default action when installing a local route on a point-to-point
2495  * interface.
2496  */
2497 void
2498 p2p_rtrequest(int req, struct rtentry *rt,
2499     __unused const struct rt_addrinfo *info)
2500 {
2501 	struct ifnet *ifp = rt->rt_ifp;
2502 	struct ifaddr *ifa, *lo0ifa;
2503 	int s = pserialize_read_enter();
2504 
2505 	switch (req) {
2506 	case RTM_ADD:
2507 		if ((rt->rt_flags & RTF_LOCAL) == 0)
2508 			break;
2509 
2510 		rt->rt_ifp = lo0ifp;
2511 
2512 		if (ISSET(info->rti_flags, RTF_DONTCHANGEIFA))
2513 			break;
2514 
2515 		IFADDR_READER_FOREACH(ifa, ifp) {
2516 			if (equal(rt_getkey(rt), ifa->ifa_addr))
2517 				break;
2518 		}
2519 		if (ifa == NULL)
2520 			break;
2521 
2522 		/*
2523 		 * Ensure lo0 has an address of the same family.
2524 		 */
2525 		IFADDR_READER_FOREACH(lo0ifa, lo0ifp) {
2526 			if (lo0ifa->ifa_addr->sa_family ==
2527 			    ifa->ifa_addr->sa_family)
2528 				break;
2529 		}
2530 		if (lo0ifa == NULL)
2531 			break;
2532 
2533 		/*
2534 		 * Make sure to set rt->rt_ifa to the interface
2535 		 * address we are using, otherwise we will have trouble
2536 		 * with source address selection.
2537 		 */
2538 		if (ifa != rt->rt_ifa)
2539 			rt_replace_ifa(rt, ifa);
2540 		break;
2541 	case RTM_DELETE:
2542 	default:
2543 		break;
2544 	}
2545 	pserialize_read_exit(s);
2546 }
2547 
2548 static void
2549 _if_down(struct ifnet *ifp)
2550 {
2551 	struct ifaddr *ifa;
2552 	struct domain *dp;
2553 	int s, bound;
2554 	struct psref psref;
2555 
2556 	ifp->if_flags &= ~IFF_UP;
2557 	nanotime(&ifp->if_lastchange);
2558 
2559 	bound = curlwp_bind();
2560 	s = pserialize_read_enter();
2561 	IFADDR_READER_FOREACH(ifa, ifp) {
2562 		ifa_acquire(ifa, &psref);
2563 		pserialize_read_exit(s);
2564 
2565 		pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
2566 
2567 		s = pserialize_read_enter();
2568 		ifa_release(ifa, &psref);
2569 	}
2570 	pserialize_read_exit(s);
2571 	curlwp_bindx(bound);
2572 
2573 	IFQ_PURGE(&ifp->if_snd);
2574 #if NCARP > 0
2575 	if (ifp->if_carp)
2576 		carp_carpdev_state(ifp);
2577 #endif
2578 	rt_ifmsg(ifp);
2579 	DOMAIN_FOREACH(dp) {
2580 		if (dp->dom_if_down)
2581 			dp->dom_if_down(ifp);
2582 	}
2583 }
2584 
2585 static void
2586 if_down_deactivated(struct ifnet *ifp)
2587 {
2588 
2589 	KASSERT(if_is_deactivated(ifp));
2590 	_if_down(ifp);
2591 }
2592 
2593 void
2594 if_down_locked(struct ifnet *ifp)
2595 {
2596 
2597 	KASSERT(IFNET_LOCKED(ifp));
2598 	_if_down(ifp);
2599 }
2600 
2601 /*
2602  * Mark an interface down and notify protocols of
2603  * the transition.
2604  * NOTE: must be called at splsoftnet or equivalent.
2605  */
2606 void
2607 if_down(struct ifnet *ifp)
2608 {
2609 
2610 	IFNET_LOCK(ifp);
2611 	if_down_locked(ifp);
2612 	IFNET_UNLOCK(ifp);
2613 }
2614 
2615 /*
2616  * Must be called with holding if_ioctl_lock.
2617  */
2618 static void
2619 if_up_locked(struct ifnet *ifp)
2620 {
2621 #ifdef notyet
2622 	struct ifaddr *ifa;
2623 #endif
2624 	struct domain *dp;
2625 
2626 	KASSERT(IFNET_LOCKED(ifp));
2627 
2628 	KASSERT(!if_is_deactivated(ifp));
2629 	ifp->if_flags |= IFF_UP;
2630 	nanotime(&ifp->if_lastchange);
2631 #ifdef notyet
2632 	/* this has no effect on IP, and will kill all ISO connections XXX */
2633 	IFADDR_READER_FOREACH(ifa, ifp)
2634 		pfctlinput(PRC_IFUP, ifa->ifa_addr);
2635 #endif
2636 #if NCARP > 0
2637 	if (ifp->if_carp)
2638 		carp_carpdev_state(ifp);
2639 #endif
2640 	rt_ifmsg(ifp);
2641 	DOMAIN_FOREACH(dp) {
2642 		if (dp->dom_if_up)
2643 			dp->dom_if_up(ifp);
2644 	}
2645 }
2646 
2647 /*
2648  * Handle interface slowtimo timer routine.  Called
2649  * from softclock, we decrement timer (if set) and
2650  * call the appropriate interface routine on expiration.
2651  */
2652 static void
2653 if_slowtimo(void *arg)
2654 {
2655 	void (*slowtimo)(struct ifnet *);
2656 	struct ifnet *ifp = arg;
2657 	int s;
2658 
2659 	slowtimo = ifp->if_slowtimo;
2660 	if (__predict_false(slowtimo == NULL))
2661 		return;
2662 
2663 	s = splnet();
2664 	if (ifp->if_timer != 0 && --ifp->if_timer == 0)
2665 		(*slowtimo)(ifp);
2666 
2667 	splx(s);
2668 
2669 	if (__predict_true(ifp->if_slowtimo != NULL))
2670 		callout_schedule(ifp->if_slowtimo_ch, hz / IFNET_SLOWHZ);
2671 }
2672 
2673 /*
2674  * Mark an interface up and notify protocols of
2675  * the transition.
2676  * NOTE: must be called at splsoftnet or equivalent.
2677  */
2678 void
2679 if_up(struct ifnet *ifp)
2680 {
2681 
2682 	IFNET_LOCK(ifp);
2683 	if_up_locked(ifp);
2684 	IFNET_UNLOCK(ifp);
2685 }
2686 
2687 /*
2688  * Set/clear promiscuous mode on interface ifp based on the truth value
2689  * of pswitch.  The calls are reference counted so that only the first
2690  * "on" request actually has an effect, as does the final "off" request.
2691  * Results are undefined if the "off" and "on" requests are not matched.
2692  */
2693 int
2694 ifpromisc_locked(struct ifnet *ifp, int pswitch)
2695 {
2696 	int pcount, ret = 0;
2697 	u_short nflags;
2698 
2699 	KASSERT(IFNET_LOCKED(ifp));
2700 
2701 	pcount = ifp->if_pcount;
2702 	if (pswitch) {
2703 		/*
2704 		 * Allow the device to be "placed" into promiscuous
2705 		 * mode even if it is not configured up.  It will
2706 		 * consult IFF_PROMISC when it is brought up.
2707 		 */
2708 		if (ifp->if_pcount++ != 0)
2709 			goto out;
2710 		nflags = ifp->if_flags | IFF_PROMISC;
2711 	} else {
2712 		if (--ifp->if_pcount > 0)
2713 			goto out;
2714 		nflags = ifp->if_flags & ~IFF_PROMISC;
2715 	}
2716 	ret = if_flags_set(ifp, nflags);
2717 	/* Restore interface state if not successful. */
2718 	if (ret != 0) {
2719 		ifp->if_pcount = pcount;
2720 	}
2721 out:
2722 	return ret;
2723 }
2724 
2725 int
2726 ifpromisc(struct ifnet *ifp, int pswitch)
2727 {
2728 	int e;
2729 
2730 	IFNET_LOCK(ifp);
2731 	e = ifpromisc_locked(ifp, pswitch);
2732 	IFNET_UNLOCK(ifp);
2733 
2734 	return e;
2735 }
2736 
2737 /*
2738  * Map interface name to
2739  * interface structure pointer.
2740  */
2741 struct ifnet *
2742 ifunit(const char *name)
2743 {
2744 	struct ifnet *ifp;
2745 	const char *cp = name;
2746 	u_int unit = 0;
2747 	u_int i;
2748 	int s;
2749 
2750 	/*
2751 	 * If the entire name is a number, treat it as an ifindex.
2752 	 */
2753 	for (i = 0; i < IFNAMSIZ && *cp >= '0' && *cp <= '9'; i++, cp++) {
2754 		unit = unit * 10 + (*cp - '0');
2755 	}
2756 
2757 	/*
2758 	 * If the number took all of the name, then it's a valid ifindex.
2759 	 */
2760 	if (i == IFNAMSIZ || (cp != name && *cp == '\0'))
2761 		return if_byindex(unit);
2762 
2763 	ifp = NULL;
2764 	s = pserialize_read_enter();
2765 	IFNET_READER_FOREACH(ifp) {
2766 		if (if_is_deactivated(ifp))
2767 			continue;
2768 	 	if (strcmp(ifp->if_xname, name) == 0)
2769 			goto out;
2770 	}
2771 out:
2772 	pserialize_read_exit(s);
2773 	return ifp;
2774 }
2775 
2776 /*
2777  * Get a reference of an ifnet object by an interface name.
2778  * The returned reference is protected by psref(9). The caller
2779  * must release a returned reference by if_put after use.
2780  */
2781 struct ifnet *
2782 if_get(const char *name, struct psref *psref)
2783 {
2784 	struct ifnet *ifp;
2785 	const char *cp = name;
2786 	u_int unit = 0;
2787 	u_int i;
2788 	int s;
2789 
2790 	/*
2791 	 * If the entire name is a number, treat it as an ifindex.
2792 	 */
2793 	for (i = 0; i < IFNAMSIZ && *cp >= '0' && *cp <= '9'; i++, cp++) {
2794 		unit = unit * 10 + (*cp - '0');
2795 	}
2796 
2797 	/*
2798 	 * If the number took all of the name, then it's a valid ifindex.
2799 	 */
2800 	if (i == IFNAMSIZ || (cp != name && *cp == '\0'))
2801 		return if_get_byindex(unit, psref);
2802 
2803 	ifp = NULL;
2804 	s = pserialize_read_enter();
2805 	IFNET_READER_FOREACH(ifp) {
2806 		if (if_is_deactivated(ifp))
2807 			continue;
2808 		if (strcmp(ifp->if_xname, name) == 0) {
2809 			PSREF_DEBUG_FILL_RETURN_ADDRESS(psref);
2810 			psref_acquire(psref, &ifp->if_psref,
2811 			    ifnet_psref_class);
2812 			goto out;
2813 		}
2814 	}
2815 out:
2816 	pserialize_read_exit(s);
2817 	return ifp;
2818 }
2819 
2820 /*
2821  * Release a reference of an ifnet object given by if_get, if_get_byindex
2822  * or if_get_bylla.
2823  */
2824 void
2825 if_put(const struct ifnet *ifp, struct psref *psref)
2826 {
2827 
2828 	if (ifp == NULL)
2829 		return;
2830 
2831 	psref_release(psref, &ifp->if_psref, ifnet_psref_class);
2832 }
2833 
2834 /*
2835  * Return ifp having idx. Return NULL if not found.  Normally if_byindex
2836  * should be used.
2837  */
2838 ifnet_t *
2839 _if_byindex(u_int idx)
2840 {
2841 
2842 	return (__predict_true(idx < if_indexlim)) ? ifindex2ifnet[idx] : NULL;
2843 }
2844 
2845 /*
2846  * Return ifp having idx. Return NULL if not found or the found ifp is
2847  * already deactivated.
2848  */
2849 ifnet_t *
2850 if_byindex(u_int idx)
2851 {
2852 	ifnet_t *ifp;
2853 
2854 	ifp = _if_byindex(idx);
2855 	if (ifp != NULL && if_is_deactivated(ifp))
2856 		ifp = NULL;
2857 	return ifp;
2858 }
2859 
2860 /*
2861  * Get a reference of an ifnet object by an interface index.
2862  * The returned reference is protected by psref(9). The caller
2863  * must release a returned reference by if_put after use.
2864  */
2865 ifnet_t *
2866 if_get_byindex(u_int idx, struct psref *psref)
2867 {
2868 	ifnet_t *ifp;
2869 	int s;
2870 
2871 	s = pserialize_read_enter();
2872 	ifp = if_byindex(idx);
2873 	if (__predict_true(ifp != NULL)) {
2874 		PSREF_DEBUG_FILL_RETURN_ADDRESS(psref);
2875 		psref_acquire(psref, &ifp->if_psref, ifnet_psref_class);
2876 	}
2877 	pserialize_read_exit(s);
2878 
2879 	return ifp;
2880 }
2881 
2882 ifnet_t *
2883 if_get_bylla(const void *lla, unsigned char lla_len, struct psref *psref)
2884 {
2885 	ifnet_t *ifp;
2886 	int s;
2887 
2888 	s = pserialize_read_enter();
2889 	IFNET_READER_FOREACH(ifp) {
2890 		if (if_is_deactivated(ifp))
2891 			continue;
2892 		if (ifp->if_addrlen != lla_len)
2893 			continue;
2894 		if (memcmp(lla, CLLADDR(ifp->if_sadl), lla_len) == 0) {
2895 			psref_acquire(psref, &ifp->if_psref,
2896 			    ifnet_psref_class);
2897 			break;
2898 		}
2899 	}
2900 	pserialize_read_exit(s);
2901 
2902 	return ifp;
2903 }
2904 
2905 /*
2906  * Note that it's safe only if the passed ifp is guaranteed to not be freed,
2907  * for example using pserialize or the ifp is already held or some other
2908  * object is held which guarantes the ifp to not be freed indirectly.
2909  */
2910 void
2911 if_acquire(struct ifnet *ifp, struct psref *psref)
2912 {
2913 
2914 	KASSERT(ifp->if_index != 0);
2915 	psref_acquire(psref, &ifp->if_psref, ifnet_psref_class);
2916 }
2917 
2918 bool
2919 if_held(struct ifnet *ifp)
2920 {
2921 
2922 	return psref_held(&ifp->if_psref, ifnet_psref_class);
2923 }
2924 
2925 /*
2926  * Some tunnel interfaces can nest, e.g. IPv4 over IPv4 gif(4) tunnel over IPv4.
2927  * Check the tunnel nesting count.
2928  * Return > 0, if tunnel nesting count is more than limit.
2929  * Return 0, if tunnel nesting count is equal or less than limit.
2930  */
2931 int
2932 if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, int limit)
2933 {
2934 	struct m_tag *mtag;
2935 	int *count;
2936 
2937 	mtag = m_tag_find(m, PACKET_TAG_TUNNEL_INFO);
2938 	if (mtag != NULL) {
2939 		count = (int *)(mtag + 1);
2940 		if (++(*count) > limit) {
2941 			log(LOG_NOTICE,
2942 			    "%s: recursively called too many times(%d)\n",
2943 			    ifp->if_xname, *count);
2944 			return EIO;
2945 		}
2946 	} else {
2947 		mtag = m_tag_get(PACKET_TAG_TUNNEL_INFO, sizeof(*count),
2948 		    M_NOWAIT);
2949 		if (mtag != NULL) {
2950 			m_tag_prepend(m, mtag);
2951 			count = (int *)(mtag + 1);
2952 			*count = 0;
2953 		} else {
2954 			log(LOG_DEBUG,
2955 			    "%s: m_tag_get() failed, recursion calls are not prevented.\n",
2956 			    ifp->if_xname);
2957 		}
2958 	}
2959 
2960 	return 0;
2961 }
2962 
2963 static void
2964 if_tunnel_ro_init_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
2965 {
2966 	struct tunnel_ro *tro = p;
2967 
2968 	tro->tr_ro = kmem_zalloc(sizeof(*tro->tr_ro), KM_SLEEP);
2969 	tro->tr_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
2970 }
2971 
2972 static void
2973 if_tunnel_ro_fini_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
2974 {
2975 	struct tunnel_ro *tro = p;
2976 
2977 	rtcache_free(tro->tr_ro);
2978 	kmem_free(tro->tr_ro, sizeof(*tro->tr_ro));
2979 
2980 	mutex_obj_free(tro->tr_lock);
2981 }
2982 
2983 percpu_t *
2984 if_tunnel_alloc_ro_percpu(void)
2985 {
2986 
2987 	return percpu_create(sizeof(struct tunnel_ro),
2988 	    if_tunnel_ro_init_pc, if_tunnel_ro_fini_pc, NULL);
2989 }
2990 
2991 void
2992 if_tunnel_free_ro_percpu(percpu_t *ro_percpu)
2993 {
2994 
2995 	percpu_free(ro_percpu, sizeof(struct tunnel_ro));
2996 }
2997 
2998 
2999 static void
3000 if_tunnel_rtcache_free_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
3001 {
3002 	struct tunnel_ro *tro = p;
3003 
3004 	mutex_enter(tro->tr_lock);
3005 	rtcache_free(tro->tr_ro);
3006 	mutex_exit(tro->tr_lock);
3007 }
3008 
3009 void if_tunnel_ro_percpu_rtcache_free(percpu_t *ro_percpu)
3010 {
3011 
3012 	percpu_foreach(ro_percpu, if_tunnel_rtcache_free_pc, NULL);
3013 }
3014 
3015 void
3016 if_export_if_data(ifnet_t * const ifp, struct if_data *ifi, bool zero_stats)
3017 {
3018 
3019 	/* Collet the volatile stats first; this zeros *ifi. */
3020 	if_stats_to_if_data(ifp, ifi, zero_stats);
3021 
3022 	ifi->ifi_type = ifp->if_type;
3023 	ifi->ifi_addrlen = ifp->if_addrlen;
3024 	ifi->ifi_hdrlen = ifp->if_hdrlen;
3025 	ifi->ifi_link_state = ifp->if_link_state;
3026 	ifi->ifi_mtu = ifp->if_mtu;
3027 	ifi->ifi_metric = ifp->if_metric;
3028 	ifi->ifi_baudrate = ifp->if_baudrate;
3029 	ifi->ifi_lastchange = ifp->if_lastchange;
3030 }
3031 
3032 /* common */
3033 int
3034 ifioctl_common(struct ifnet *ifp, u_long cmd, void *data)
3035 {
3036 	int s;
3037 	struct ifreq *ifr;
3038 	struct ifcapreq *ifcr;
3039 	struct ifdatareq *ifdr;
3040 	unsigned short flags;
3041 	char *descr;
3042 	int error;
3043 
3044 	switch (cmd) {
3045 	case SIOCSIFCAP:
3046 		ifcr = data;
3047 		if ((ifcr->ifcr_capenable & ~ifp->if_capabilities) != 0)
3048 			return EINVAL;
3049 
3050 		if (ifcr->ifcr_capenable == ifp->if_capenable)
3051 			return 0;
3052 
3053 		ifp->if_capenable = ifcr->ifcr_capenable;
3054 
3055 		/* Pre-compute the checksum flags mask. */
3056 		ifp->if_csum_flags_tx = 0;
3057 		ifp->if_csum_flags_rx = 0;
3058 		if (ifp->if_capenable & IFCAP_CSUM_IPv4_Tx)
3059 			ifp->if_csum_flags_tx |= M_CSUM_IPv4;
3060 		if (ifp->if_capenable & IFCAP_CSUM_IPv4_Rx)
3061 			ifp->if_csum_flags_rx |= M_CSUM_IPv4;
3062 
3063 		if (ifp->if_capenable & IFCAP_CSUM_TCPv4_Tx)
3064 			ifp->if_csum_flags_tx |= M_CSUM_TCPv4;
3065 		if (ifp->if_capenable & IFCAP_CSUM_TCPv4_Rx)
3066 			ifp->if_csum_flags_rx |= M_CSUM_TCPv4;
3067 
3068 		if (ifp->if_capenable & IFCAP_CSUM_UDPv4_Tx)
3069 			ifp->if_csum_flags_tx |= M_CSUM_UDPv4;
3070 		if (ifp->if_capenable & IFCAP_CSUM_UDPv4_Rx)
3071 			ifp->if_csum_flags_rx |= M_CSUM_UDPv4;
3072 
3073 		if (ifp->if_capenable & IFCAP_CSUM_TCPv6_Tx)
3074 			ifp->if_csum_flags_tx |= M_CSUM_TCPv6;
3075 		if (ifp->if_capenable & IFCAP_CSUM_TCPv6_Rx)
3076 			ifp->if_csum_flags_rx |= M_CSUM_TCPv6;
3077 
3078 		if (ifp->if_capenable & IFCAP_CSUM_UDPv6_Tx)
3079 			ifp->if_csum_flags_tx |= M_CSUM_UDPv6;
3080 		if (ifp->if_capenable & IFCAP_CSUM_UDPv6_Rx)
3081 			ifp->if_csum_flags_rx |= M_CSUM_UDPv6;
3082 
3083 		if (ifp->if_capenable & IFCAP_TSOv4)
3084 			ifp->if_csum_flags_tx |= M_CSUM_TSOv4;
3085 		if (ifp->if_capenable & IFCAP_TSOv6)
3086 			ifp->if_csum_flags_tx |= M_CSUM_TSOv6;
3087 
3088 #if NBRIDGE > 0
3089 		if (ifp->if_bridge != NULL)
3090 			bridge_calc_csum_flags(ifp->if_bridge);
3091 #endif
3092 
3093 		if (ifp->if_flags & IFF_UP)
3094 			return ENETRESET;
3095 		return 0;
3096 	case SIOCSIFFLAGS:
3097 		ifr = data;
3098 		/*
3099 		 * If if_is_mpsafe(ifp), KERNEL_LOCK isn't held here, but if_up
3100 		 * and if_down aren't MP-safe yet, so we must hold the lock.
3101 		 */
3102 		KERNEL_LOCK_IF_IFP_MPSAFE(ifp);
3103 		if (ifp->if_flags & IFF_UP && (ifr->ifr_flags & IFF_UP) == 0) {
3104 			s = splsoftnet();
3105 			if_down_locked(ifp);
3106 			splx(s);
3107 		}
3108 		if (ifr->ifr_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) {
3109 			s = splsoftnet();
3110 			if_up_locked(ifp);
3111 			splx(s);
3112 		}
3113 		KERNEL_UNLOCK_IF_IFP_MPSAFE(ifp);
3114 		flags = (ifp->if_flags & IFF_CANTCHANGE) |
3115 		    (ifr->ifr_flags &~ IFF_CANTCHANGE);
3116 		if (ifp->if_flags != flags) {
3117 			ifp->if_flags = flags;
3118 			/* Notify that the flags have changed. */
3119 			rt_ifmsg(ifp);
3120 		}
3121 		break;
3122 	case SIOCGIFFLAGS:
3123 		ifr = data;
3124 		ifr->ifr_flags = ifp->if_flags;
3125 		break;
3126 
3127 	case SIOCGIFMETRIC:
3128 		ifr = data;
3129 		ifr->ifr_metric = ifp->if_metric;
3130 		break;
3131 
3132 	case SIOCGIFMTU:
3133 		ifr = data;
3134 		ifr->ifr_mtu = ifp->if_mtu;
3135 		break;
3136 
3137 	case SIOCGIFDLT:
3138 		ifr = data;
3139 		ifr->ifr_dlt = ifp->if_dlt;
3140 		break;
3141 
3142 	case SIOCGIFCAP:
3143 		ifcr = data;
3144 		ifcr->ifcr_capabilities = ifp->if_capabilities;
3145 		ifcr->ifcr_capenable = ifp->if_capenable;
3146 		break;
3147 
3148 	case SIOCSIFMETRIC:
3149 		ifr = data;
3150 		ifp->if_metric = ifr->ifr_metric;
3151 		break;
3152 
3153 	case SIOCGIFDATA:
3154 		ifdr = data;
3155 		if_export_if_data(ifp, &ifdr->ifdr_data, false);
3156 		break;
3157 
3158 	case SIOCGIFINDEX:
3159 		ifr = data;
3160 		ifr->ifr_index = ifp->if_index;
3161 		break;
3162 
3163 	case SIOCZIFDATA:
3164 		ifdr = data;
3165 		if_export_if_data(ifp, &ifdr->ifdr_data, true);
3166 		getnanotime(&ifp->if_lastchange);
3167 		break;
3168 	case SIOCSIFMTU:
3169 		ifr = data;
3170 		if (ifp->if_mtu == ifr->ifr_mtu)
3171 			break;
3172 		ifp->if_mtu = ifr->ifr_mtu;
3173 		return ENETRESET;
3174 	case SIOCSIFDESCR:
3175 		error = kauth_authorize_network(curlwp->l_cred,
3176 		    KAUTH_NETWORK_INTERFACE,
3177 		    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, KAUTH_ARG(cmd),
3178 		    NULL);
3179 		if (error)
3180 			return error;
3181 
3182 		ifr = data;
3183 
3184 		if (ifr->ifr_buflen > IFDESCRSIZE)
3185 			return ENAMETOOLONG;
3186 
3187 		if (ifr->ifr_buf == NULL || ifr->ifr_buflen == 0) {
3188 			/* unset description */
3189 			descr = NULL;
3190 		} else {
3191 			descr = kmem_zalloc(IFDESCRSIZE, KM_SLEEP);
3192 			/*
3193 			 * copy (IFDESCRSIZE - 1) bytes to ensure
3194 			 * terminating nul
3195 			 */
3196 			error = copyin(ifr->ifr_buf, descr, IFDESCRSIZE - 1);
3197 			if (error) {
3198 				kmem_free(descr, IFDESCRSIZE);
3199 				return error;
3200 			}
3201 		}
3202 
3203 		if (ifp->if_description != NULL)
3204 			kmem_free(ifp->if_description, IFDESCRSIZE);
3205 
3206 		ifp->if_description = descr;
3207 		break;
3208 
3209  	case SIOCGIFDESCR:
3210 		ifr = data;
3211 		descr = ifp->if_description;
3212 
3213 		if (descr == NULL)
3214 			return ENOMSG;
3215 
3216 		if (ifr->ifr_buflen < IFDESCRSIZE)
3217 			return EINVAL;
3218 
3219 		error = copyout(descr, ifr->ifr_buf, IFDESCRSIZE);
3220 		if (error)
3221 			return error;
3222  		break;
3223 
3224 	default:
3225 		return ENOTTY;
3226 	}
3227 	return 0;
3228 }
3229 
3230 int
3231 ifaddrpref_ioctl(struct socket *so, u_long cmd, void *data, struct ifnet *ifp)
3232 {
3233 	struct if_addrprefreq *ifap = (struct if_addrprefreq *)data;
3234 	struct ifaddr *ifa;
3235 	const struct sockaddr *any, *sa;
3236 	union {
3237 		struct sockaddr sa;
3238 		struct sockaddr_storage ss;
3239 	} u, v;
3240 	int s, error = 0;
3241 
3242 	switch (cmd) {
3243 	case SIOCSIFADDRPREF:
3244 		error = kauth_authorize_network(curlwp->l_cred,
3245 		    KAUTH_NETWORK_INTERFACE,
3246 		    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, KAUTH_ARG(cmd),
3247 		    NULL);
3248 		if (error)
3249 			return error;
3250 		break;
3251 	case SIOCGIFADDRPREF:
3252 		break;
3253 	default:
3254 		return EOPNOTSUPP;
3255 	}
3256 
3257 	/* sanity checks */
3258 	if (data == NULL || ifp == NULL) {
3259 		panic("invalid argument to %s", __func__);
3260 		/*NOTREACHED*/
3261 	}
3262 
3263 	/* address must be specified on ADD and DELETE */
3264 	sa = sstocsa(&ifap->ifap_addr);
3265 	if (sa->sa_family != sofamily(so))
3266 		return EINVAL;
3267 	if ((any = sockaddr_any(sa)) == NULL || sa->sa_len != any->sa_len)
3268 		return EINVAL;
3269 
3270 	sockaddr_externalize(&v.sa, sizeof(v.ss), sa);
3271 
3272 	s = pserialize_read_enter();
3273 	IFADDR_READER_FOREACH(ifa, ifp) {
3274 		if (ifa->ifa_addr->sa_family != sa->sa_family)
3275 			continue;
3276 		sockaddr_externalize(&u.sa, sizeof(u.ss), ifa->ifa_addr);
3277 		if (sockaddr_cmp(&u.sa, &v.sa) == 0)
3278 			break;
3279 	}
3280 	if (ifa == NULL) {
3281 		error = EADDRNOTAVAIL;
3282 		goto out;
3283 	}
3284 
3285 	switch (cmd) {
3286 	case SIOCSIFADDRPREF:
3287 		ifa->ifa_preference = ifap->ifap_preference;
3288 		goto out;
3289 	case SIOCGIFADDRPREF:
3290 		/* fill in the if_laddrreq structure */
3291 		(void)sockaddr_copy(sstosa(&ifap->ifap_addr),
3292 		    sizeof(ifap->ifap_addr), ifa->ifa_addr);
3293 		ifap->ifap_preference = ifa->ifa_preference;
3294 		goto out;
3295 	default:
3296 		error = EOPNOTSUPP;
3297 	}
3298 out:
3299 	pserialize_read_exit(s);
3300 	return error;
3301 }
3302 
3303 /*
3304  * Interface ioctls.
3305  */
3306 static int
3307 doifioctl(struct socket *so, u_long cmd, void *data, struct lwp *l)
3308 {
3309 	struct ifnet *ifp;
3310 	struct ifreq *ifr;
3311 	int error = 0;
3312 	u_long ocmd = cmd;
3313 	u_short oif_flags;
3314 	struct ifreq ifrb;
3315 	struct oifreq *oifr = NULL;
3316 	int r;
3317 	struct psref psref;
3318 	int bound;
3319 	bool do_if43_post = false;
3320 	bool do_ifm80_post = false;
3321 
3322 	switch (cmd) {
3323 	case SIOCGIFCONF:
3324 		return ifconf(cmd, data);
3325 	case SIOCINITIFADDR:
3326 		return EPERM;
3327 	default:
3328 		MODULE_HOOK_CALL(uipc_syscalls_40_hook, (cmd, data), enosys(),
3329 		    error);
3330 		if (error != ENOSYS)
3331 			return error;
3332 		MODULE_HOOK_CALL(uipc_syscalls_50_hook, (l, cmd, data),
3333 		    enosys(), error);
3334 		if (error != ENOSYS)
3335 			return error;
3336 		error = 0;
3337 		break;
3338 	}
3339 
3340 	ifr = data;
3341 	/* Pre-conversion */
3342 	MODULE_HOOK_CALL(if_cvtcmd_43_hook, (&cmd, ocmd), enosys(), error);
3343 	if (cmd != ocmd) {
3344 		oifr = data;
3345 		data = ifr = &ifrb;
3346 		IFREQO2N_43(oifr, ifr);
3347 		do_if43_post = true;
3348 	}
3349 	MODULE_HOOK_CALL(ifmedia_80_pre_hook, (ifr, &cmd, &do_ifm80_post),
3350 	    enosys(), error);
3351 
3352 	switch (cmd) {
3353 	case SIOCIFCREATE:
3354 	case SIOCIFDESTROY:
3355 		bound = curlwp_bind();
3356 		if (l != NULL) {
3357 			ifp = if_get(ifr->ifr_name, &psref);
3358 			error = kauth_authorize_network(l->l_cred,
3359 			    KAUTH_NETWORK_INTERFACE,
3360 			    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp,
3361 			    KAUTH_ARG(cmd), NULL);
3362 			if (ifp != NULL)
3363 				if_put(ifp, &psref);
3364 			if (error != 0) {
3365 				curlwp_bindx(bound);
3366 				return error;
3367 			}
3368 		}
3369 		KERNEL_LOCK_UNLESS_NET_MPSAFE();
3370 		mutex_enter(&if_clone_mtx);
3371 		r = (cmd == SIOCIFCREATE) ?
3372 			if_clone_create(ifr->ifr_name) :
3373 			if_clone_destroy(ifr->ifr_name);
3374 		mutex_exit(&if_clone_mtx);
3375 		KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
3376 		curlwp_bindx(bound);
3377 		return r;
3378 
3379 	case SIOCIFGCLONERS:
3380 		{
3381 			struct if_clonereq *req = (struct if_clonereq *)data;
3382 			return if_clone_list(req->ifcr_count, req->ifcr_buffer,
3383 			    &req->ifcr_total);
3384 		}
3385 	}
3386 
3387 	bound = curlwp_bind();
3388 	ifp = if_get(ifr->ifr_name, &psref);
3389 	if (ifp == NULL) {
3390 		curlwp_bindx(bound);
3391 		return ENXIO;
3392 	}
3393 
3394 	switch (cmd) {
3395 	case SIOCALIFADDR:
3396 	case SIOCDLIFADDR:
3397 	case SIOCSIFADDRPREF:
3398 	case SIOCSIFFLAGS:
3399 	case SIOCSIFCAP:
3400 	case SIOCSIFMETRIC:
3401 	case SIOCZIFDATA:
3402 	case SIOCSIFMTU:
3403 	case SIOCSIFPHYADDR:
3404 	case SIOCDIFPHYADDR:
3405 #ifdef INET6
3406 	case SIOCSIFPHYADDR_IN6:
3407 #endif
3408 	case SIOCSLIFPHYADDR:
3409 	case SIOCADDMULTI:
3410 	case SIOCDELMULTI:
3411 	case SIOCSETHERCAP:
3412 	case SIOCSIFMEDIA:
3413 	case SIOCSDRVSPEC:
3414 	case SIOCG80211:
3415 	case SIOCS80211:
3416 	case SIOCS80211NWID:
3417 	case SIOCS80211NWKEY:
3418 	case SIOCS80211POWER:
3419 	case SIOCS80211BSSID:
3420 	case SIOCS80211CHANNEL:
3421 	case SIOCSLINKSTR:
3422 		if (l != NULL) {
3423 			error = kauth_authorize_network(l->l_cred,
3424 			    KAUTH_NETWORK_INTERFACE,
3425 			    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp,
3426 			    KAUTH_ARG(cmd), NULL);
3427 			if (error != 0)
3428 				goto out;
3429 		}
3430 	}
3431 
3432 	oif_flags = ifp->if_flags;
3433 
3434 	KERNEL_LOCK_UNLESS_IFP_MPSAFE(ifp);
3435 	IFNET_LOCK(ifp);
3436 
3437 	error = (*ifp->if_ioctl)(ifp, cmd, data);
3438 	if (error != ENOTTY)
3439 		;
3440 	else if (so->so_proto == NULL)
3441 		error = EOPNOTSUPP;
3442 	else {
3443 		KERNEL_LOCK_IF_IFP_MPSAFE(ifp);
3444 		MODULE_HOOK_CALL(if_ifioctl_43_hook,
3445 			     (so, ocmd, cmd, data, l), enosys(), error);
3446 		if (error == ENOSYS)
3447 			error = (*so->so_proto->pr_usrreqs->pr_ioctl)(so,
3448 			    cmd, data, ifp);
3449 		KERNEL_UNLOCK_IF_IFP_MPSAFE(ifp);
3450 	}
3451 
3452 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0) {
3453 		if ((ifp->if_flags & IFF_UP) != 0) {
3454 			int s = splsoftnet();
3455 			if_up_locked(ifp);
3456 			splx(s);
3457 		}
3458 	}
3459 
3460 	/* Post-conversion */
3461 	if (do_ifm80_post && (error == 0))
3462 		MODULE_HOOK_CALL(ifmedia_80_post_hook, (ifr, cmd),
3463 		    enosys(), error);
3464 	if (do_if43_post)
3465 		IFREQN2O_43(oifr, ifr);
3466 
3467 	IFNET_UNLOCK(ifp);
3468 	KERNEL_UNLOCK_UNLESS_IFP_MPSAFE(ifp);
3469 out:
3470 	if_put(ifp, &psref);
3471 	curlwp_bindx(bound);
3472 	return error;
3473 }
3474 
3475 /*
3476  * Return interface configuration
3477  * of system.  List may be used
3478  * in later ioctl's (above) to get
3479  * other information.
3480  *
3481  * Each record is a struct ifreq.  Before the addition of
3482  * sockaddr_storage, the API rule was that sockaddr flavors that did
3483  * not fit would extend beyond the struct ifreq, with the next struct
3484  * ifreq starting sa_len beyond the struct sockaddr.  Because the
3485  * union in struct ifreq includes struct sockaddr_storage, every kind
3486  * of sockaddr must fit.  Thus, there are no longer any overlength
3487  * records.
3488  *
3489  * Records are added to the user buffer if they fit, and ifc_len is
3490  * adjusted to the length that was written.  Thus, the user is only
3491  * assured of getting the complete list if ifc_len on return is at
3492  * least sizeof(struct ifreq) less than it was on entry.
3493  *
3494  * If the user buffer pointer is NULL, this routine copies no data and
3495  * returns the amount of space that would be needed.
3496  *
3497  * Invariants:
3498  * ifrp points to the next part of the user's buffer to be used.  If
3499  * ifrp != NULL, space holds the number of bytes remaining that we may
3500  * write at ifrp.  Otherwise, space holds the number of bytes that
3501  * would have been written had there been adequate space.
3502  */
3503 /*ARGSUSED*/
3504 static int
3505 ifconf(u_long cmd, void *data)
3506 {
3507 	struct ifconf *ifc = (struct ifconf *)data;
3508 	struct ifnet *ifp;
3509 	struct ifaddr *ifa;
3510 	struct ifreq ifr, *ifrp = NULL;
3511 	int space = 0, error = 0;
3512 	const int sz = (int)sizeof(struct ifreq);
3513 	const bool docopy = ifc->ifc_req != NULL;
3514 	int s;
3515 	int bound;
3516 	struct psref psref;
3517 
3518 	if (docopy) {
3519 		if (ifc->ifc_len < 0)
3520 			return EINVAL;
3521 
3522 		space = ifc->ifc_len;
3523 		ifrp = ifc->ifc_req;
3524 	}
3525 	memset(&ifr, 0, sizeof(ifr));
3526 
3527 	bound = curlwp_bind();
3528 	s = pserialize_read_enter();
3529 	IFNET_READER_FOREACH(ifp) {
3530 		psref_acquire(&psref, &ifp->if_psref, ifnet_psref_class);
3531 		pserialize_read_exit(s);
3532 
3533 		(void)strncpy(ifr.ifr_name, ifp->if_xname,
3534 		    sizeof(ifr.ifr_name));
3535 		if (ifr.ifr_name[sizeof(ifr.ifr_name) - 1] != '\0') {
3536 			error = ENAMETOOLONG;
3537 			goto release_exit;
3538 		}
3539 		if (IFADDR_READER_EMPTY(ifp)) {
3540 			/* Interface with no addresses - send zero sockaddr. */
3541 			memset(&ifr.ifr_addr, 0, sizeof(ifr.ifr_addr));
3542 			if (!docopy) {
3543 				space += sz;
3544 				goto next;
3545 			}
3546 			if (space >= sz) {
3547 				error = copyout(&ifr, ifrp, sz);
3548 				if (error != 0)
3549 					goto release_exit;
3550 				ifrp++;
3551 				space -= sz;
3552 			}
3553 		}
3554 
3555 		s = pserialize_read_enter();
3556 		IFADDR_READER_FOREACH(ifa, ifp) {
3557 			struct sockaddr *sa = ifa->ifa_addr;
3558 			/* all sockaddrs must fit in sockaddr_storage */
3559 			KASSERT(sa->sa_len <= sizeof(ifr.ifr_ifru));
3560 
3561 			if (!docopy) {
3562 				space += sz;
3563 				continue;
3564 			}
3565 			memcpy(&ifr.ifr_space, sa, sa->sa_len);
3566 			pserialize_read_exit(s);
3567 
3568 			if (space >= sz) {
3569 				error = copyout(&ifr, ifrp, sz);
3570 				if (error != 0)
3571 					goto release_exit;
3572 				ifrp++; space -= sz;
3573 			}
3574 			s = pserialize_read_enter();
3575 		}
3576 		pserialize_read_exit(s);
3577 
3578         next:
3579 		s = pserialize_read_enter();
3580 		psref_release(&psref, &ifp->if_psref, ifnet_psref_class);
3581 	}
3582 	pserialize_read_exit(s);
3583 	curlwp_bindx(bound);
3584 
3585 	if (docopy) {
3586 		KASSERT(0 <= space && space <= ifc->ifc_len);
3587 		ifc->ifc_len -= space;
3588 	} else {
3589 		KASSERT(space >= 0);
3590 		ifc->ifc_len = space;
3591 	}
3592 	return (0);
3593 
3594 release_exit:
3595 	psref_release(&psref, &ifp->if_psref, ifnet_psref_class);
3596 	curlwp_bindx(bound);
3597 	return error;
3598 }
3599 
3600 int
3601 ifreq_setaddr(u_long cmd, struct ifreq *ifr, const struct sockaddr *sa)
3602 {
3603 	uint8_t len = sizeof(ifr->ifr_ifru.ifru_space);
3604 	struct ifreq ifrb;
3605 	struct oifreq *oifr = NULL;
3606 	u_long ocmd = cmd;
3607 	int hook;
3608 
3609 	MODULE_HOOK_CALL(if_cvtcmd_43_hook, (&cmd, ocmd), enosys(), hook);
3610 	if (hook != ENOSYS) {
3611 		if (cmd != ocmd) {
3612 			oifr = (struct oifreq *)(void *)ifr;
3613 			ifr = &ifrb;
3614 			IFREQO2N_43(oifr, ifr);
3615 				len = sizeof(oifr->ifr_addr);
3616 		}
3617 	}
3618 
3619 	if (len < sa->sa_len)
3620 		return EFBIG;
3621 
3622 	memset(&ifr->ifr_addr, 0, len);
3623 	sockaddr_copy(&ifr->ifr_addr, len, sa);
3624 
3625 	if (cmd != ocmd)
3626 		IFREQN2O_43(oifr, ifr);
3627 	return 0;
3628 }
3629 
3630 /*
3631  * wrapper function for the drivers which doesn't have if_transmit().
3632  */
3633 static int
3634 if_transmit(struct ifnet *ifp, struct mbuf *m)
3635 {
3636 	int s, error;
3637 	size_t pktlen = m->m_pkthdr.len;
3638 	bool mcast = (m->m_flags & M_MCAST) != 0;
3639 
3640 	s = splnet();
3641 
3642 	IFQ_ENQUEUE(&ifp->if_snd, m, error);
3643 	if (error != 0) {
3644 		/* mbuf is already freed */
3645 		goto out;
3646 	}
3647 
3648 	net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
3649 	if_statadd_ref(nsr, if_obytes, pktlen);
3650 	if (mcast)
3651 		if_statinc_ref(nsr, if_omcasts);
3652 	IF_STAT_PUTREF(ifp);
3653 
3654 	if ((ifp->if_flags & IFF_OACTIVE) == 0)
3655 		if_start_lock(ifp);
3656 out:
3657 	splx(s);
3658 
3659 	return error;
3660 }
3661 
3662 int
3663 if_transmit_lock(struct ifnet *ifp, struct mbuf *m)
3664 {
3665 	int error;
3666 
3667 	kmsan_check_mbuf(m);
3668 
3669 #ifdef ALTQ
3670 	KERNEL_LOCK(1, NULL);
3671 	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
3672 		error = if_transmit(ifp, m);
3673 		KERNEL_UNLOCK_ONE(NULL);
3674 	} else {
3675 		KERNEL_UNLOCK_ONE(NULL);
3676 		error = (*ifp->if_transmit)(ifp, m);
3677 		/* mbuf is alredy freed */
3678 	}
3679 #else /* !ALTQ */
3680 	error = (*ifp->if_transmit)(ifp, m);
3681 	/* mbuf is alredy freed */
3682 #endif /* !ALTQ */
3683 
3684 	return error;
3685 }
3686 
3687 /*
3688  * Queue message on interface, and start output if interface
3689  * not yet active.
3690  */
3691 int
3692 ifq_enqueue(struct ifnet *ifp, struct mbuf *m)
3693 {
3694 
3695 	return if_transmit_lock(ifp, m);
3696 }
3697 
3698 /*
3699  * Queue message on interface, possibly using a second fast queue
3700  */
3701 int
3702 ifq_enqueue2(struct ifnet *ifp, struct ifqueue *ifq, struct mbuf *m)
3703 {
3704 	int error = 0;
3705 
3706 	if (ifq != NULL
3707 #ifdef ALTQ
3708 	    && ALTQ_IS_ENABLED(&ifp->if_snd) == 0
3709 #endif
3710 	    ) {
3711 		if (IF_QFULL(ifq)) {
3712 			IF_DROP(&ifp->if_snd);
3713 			m_freem(m);
3714 			if (error == 0)
3715 				error = ENOBUFS;
3716 		} else
3717 			IF_ENQUEUE(ifq, m);
3718 	} else
3719 		IFQ_ENQUEUE(&ifp->if_snd, m, error);
3720 	if (error != 0) {
3721 		if_statinc(ifp, if_oerrors);
3722 		return error;
3723 	}
3724 	return 0;
3725 }
3726 
3727 int
3728 if_addr_init(ifnet_t *ifp, struct ifaddr *ifa, const bool src)
3729 {
3730 	int rc;
3731 
3732 	KASSERT(IFNET_LOCKED(ifp));
3733 	if (ifp->if_initaddr != NULL)
3734 		rc = (*ifp->if_initaddr)(ifp, ifa, src);
3735 	else if (src ||
3736 	         (rc = (*ifp->if_ioctl)(ifp, SIOCSIFDSTADDR, ifa)) == ENOTTY)
3737 		rc = (*ifp->if_ioctl)(ifp, SIOCINITIFADDR, ifa);
3738 
3739 	return rc;
3740 }
3741 
3742 int
3743 if_do_dad(struct ifnet *ifp)
3744 {
3745 	if ((ifp->if_flags & IFF_LOOPBACK) != 0)
3746 		return 0;
3747 
3748 	switch (ifp->if_type) {
3749 	case IFT_FAITH:
3750 		/*
3751 		 * These interfaces do not have the IFF_LOOPBACK flag,
3752 		 * but loop packets back.  We do not have to do DAD on such
3753 		 * interfaces.  We should even omit it, because loop-backed
3754 		 * responses would confuse the DAD procedure.
3755 		 */
3756 		return 0;
3757 	default:
3758 		/*
3759 		 * Our DAD routine requires the interface up and running.
3760 		 * However, some interfaces can be up before the RUNNING
3761 		 * status.  Additionaly, users may try to assign addresses
3762 		 * before the interface becomes up (or running).
3763 		 * We simply skip DAD in such a case as a work around.
3764 		 * XXX: we should rather mark "tentative" on such addresses,
3765 		 * and do DAD after the interface becomes ready.
3766 		 */
3767 		if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
3768 		    (IFF_UP | IFF_RUNNING))
3769 			return 0;
3770 
3771 		return 1;
3772 	}
3773 }
3774 
3775 int
3776 if_flags_set(ifnet_t *ifp, const u_short flags)
3777 {
3778 	int rc;
3779 
3780 	KASSERT(IFNET_LOCKED(ifp));
3781 
3782 	if (ifp->if_setflags != NULL)
3783 		rc = (*ifp->if_setflags)(ifp, flags);
3784 	else {
3785 		u_short cantflags, chgdflags;
3786 		struct ifreq ifr;
3787 
3788 		chgdflags = ifp->if_flags ^ flags;
3789 		cantflags = chgdflags & IFF_CANTCHANGE;
3790 
3791 		if (cantflags != 0)
3792 			ifp->if_flags ^= cantflags;
3793 
3794                 /* Traditionally, we do not call if_ioctl after
3795                  * setting/clearing only IFF_PROMISC if the interface
3796                  * isn't IFF_UP.  Uphold that tradition.
3797 		 */
3798 		if (chgdflags == IFF_PROMISC && (ifp->if_flags & IFF_UP) == 0)
3799 			return 0;
3800 
3801 		memset(&ifr, 0, sizeof(ifr));
3802 
3803 		ifr.ifr_flags = flags & ~IFF_CANTCHANGE;
3804 		rc = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, &ifr);
3805 
3806 		if (rc != 0 && cantflags != 0)
3807 			ifp->if_flags ^= cantflags;
3808 	}
3809 
3810 	return rc;
3811 }
3812 
3813 int
3814 if_mcast_op(ifnet_t *ifp, const unsigned long cmd, const struct sockaddr *sa)
3815 {
3816 	int rc;
3817 	struct ifreq ifr;
3818 
3819 	/*
3820 	 * XXX NOMPSAFE - this calls if_ioctl without holding IFNET_LOCK()
3821 	 * in some cases - e.g. when called from vlan/netinet/netinet6 code
3822 	 * directly rather than via doifoictl()
3823 	 */
3824 	ifreq_setaddr(cmd, &ifr, sa);
3825 	rc = (*ifp->if_ioctl)(ifp, cmd, &ifr);
3826 
3827 	return rc;
3828 }
3829 
3830 static void
3831 sysctl_sndq_setup(struct sysctllog **clog, const char *ifname,
3832     struct ifaltq *ifq)
3833 {
3834 	const struct sysctlnode *cnode, *rnode;
3835 
3836 	if (sysctl_createv(clog, 0, NULL, &rnode,
3837 		       CTLFLAG_PERMANENT,
3838 		       CTLTYPE_NODE, "interfaces",
3839 		       SYSCTL_DESCR("Per-interface controls"),
3840 		       NULL, 0, NULL, 0,
3841 		       CTL_NET, CTL_CREATE, CTL_EOL) != 0)
3842 		goto bad;
3843 
3844 	if (sysctl_createv(clog, 0, &rnode, &rnode,
3845 		       CTLFLAG_PERMANENT,
3846 		       CTLTYPE_NODE, ifname,
3847 		       SYSCTL_DESCR("Interface controls"),
3848 		       NULL, 0, NULL, 0,
3849 		       CTL_CREATE, CTL_EOL) != 0)
3850 		goto bad;
3851 
3852 	if (sysctl_createv(clog, 0, &rnode, &rnode,
3853 		       CTLFLAG_PERMANENT,
3854 		       CTLTYPE_NODE, "sndq",
3855 		       SYSCTL_DESCR("Interface output queue controls"),
3856 		       NULL, 0, NULL, 0,
3857 		       CTL_CREATE, CTL_EOL) != 0)
3858 		goto bad;
3859 
3860 	if (sysctl_createv(clog, 0, &rnode, &cnode,
3861 		       CTLFLAG_PERMANENT,
3862 		       CTLTYPE_INT, "len",
3863 		       SYSCTL_DESCR("Current output queue length"),
3864 		       NULL, 0, &ifq->ifq_len, 0,
3865 		       CTL_CREATE, CTL_EOL) != 0)
3866 		goto bad;
3867 
3868 	if (sysctl_createv(clog, 0, &rnode, &cnode,
3869 		       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
3870 		       CTLTYPE_INT, "maxlen",
3871 		       SYSCTL_DESCR("Maximum allowed output queue length"),
3872 		       NULL, 0, &ifq->ifq_maxlen, 0,
3873 		       CTL_CREATE, CTL_EOL) != 0)
3874 		goto bad;
3875 
3876 	if (sysctl_createv(clog, 0, &rnode, &cnode,
3877 		       CTLFLAG_PERMANENT,
3878 		       CTLTYPE_INT, "drops",
3879 		       SYSCTL_DESCR("Packets dropped due to full output queue"),
3880 		       NULL, 0, &ifq->ifq_drops, 0,
3881 		       CTL_CREATE, CTL_EOL) != 0)
3882 		goto bad;
3883 
3884 	return;
3885 bad:
3886 	printf("%s: could not attach sysctl nodes\n", ifname);
3887 	return;
3888 }
3889 
3890 #if defined(INET) || defined(INET6)
3891 
3892 #define	SYSCTL_NET_PKTQ(q, cn, c)					\
3893 	static int							\
3894 	sysctl_net_##q##_##cn(SYSCTLFN_ARGS)				\
3895 	{								\
3896 		return sysctl_pktq_count(SYSCTLFN_CALL(rnode), q, c);	\
3897 	}
3898 
3899 #if defined(INET)
3900 static int
3901 sysctl_net_ip_pktq_maxlen(SYSCTLFN_ARGS)
3902 {
3903 	return sysctl_pktq_maxlen(SYSCTLFN_CALL(rnode), ip_pktq);
3904 }
3905 SYSCTL_NET_PKTQ(ip_pktq, items, PKTQ_NITEMS)
3906 SYSCTL_NET_PKTQ(ip_pktq, drops, PKTQ_DROPS)
3907 #endif
3908 
3909 #if defined(INET6)
3910 static int
3911 sysctl_net_ip6_pktq_maxlen(SYSCTLFN_ARGS)
3912 {
3913 	return sysctl_pktq_maxlen(SYSCTLFN_CALL(rnode), ip6_pktq);
3914 }
3915 SYSCTL_NET_PKTQ(ip6_pktq, items, PKTQ_NITEMS)
3916 SYSCTL_NET_PKTQ(ip6_pktq, drops, PKTQ_DROPS)
3917 #endif
3918 
3919 static void
3920 sysctl_net_pktq_setup(struct sysctllog **clog, int pf)
3921 {
3922 	sysctlfn len_func = NULL, maxlen_func = NULL, drops_func = NULL;
3923 	const char *pfname = NULL, *ipname = NULL;
3924 	int ipn = 0, qid = 0;
3925 
3926 	switch (pf) {
3927 #if defined(INET)
3928 	case PF_INET:
3929 		len_func = sysctl_net_ip_pktq_items;
3930 		maxlen_func = sysctl_net_ip_pktq_maxlen;
3931 		drops_func = sysctl_net_ip_pktq_drops;
3932 		pfname = "inet", ipn = IPPROTO_IP;
3933 		ipname = "ip", qid = IPCTL_IFQ;
3934 		break;
3935 #endif
3936 #if defined(INET6)
3937 	case PF_INET6:
3938 		len_func = sysctl_net_ip6_pktq_items;
3939 		maxlen_func = sysctl_net_ip6_pktq_maxlen;
3940 		drops_func = sysctl_net_ip6_pktq_drops;
3941 		pfname = "inet6", ipn = IPPROTO_IPV6;
3942 		ipname = "ip6", qid = IPV6CTL_IFQ;
3943 		break;
3944 #endif
3945 	default:
3946 		KASSERT(false);
3947 	}
3948 
3949 	sysctl_createv(clog, 0, NULL, NULL,
3950 		       CTLFLAG_PERMANENT,
3951 		       CTLTYPE_NODE, pfname, NULL,
3952 		       NULL, 0, NULL, 0,
3953 		       CTL_NET, pf, CTL_EOL);
3954 	sysctl_createv(clog, 0, NULL, NULL,
3955 		       CTLFLAG_PERMANENT,
3956 		       CTLTYPE_NODE, ipname, NULL,
3957 		       NULL, 0, NULL, 0,
3958 		       CTL_NET, pf, ipn, CTL_EOL);
3959 	sysctl_createv(clog, 0, NULL, NULL,
3960 		       CTLFLAG_PERMANENT,
3961 		       CTLTYPE_NODE, "ifq",
3962 		       SYSCTL_DESCR("Protocol input queue controls"),
3963 		       NULL, 0, NULL, 0,
3964 		       CTL_NET, pf, ipn, qid, CTL_EOL);
3965 
3966 	sysctl_createv(clog, 0, NULL, NULL,
3967 		       CTLFLAG_PERMANENT,
3968 		       CTLTYPE_QUAD, "len",
3969 		       SYSCTL_DESCR("Current input queue length"),
3970 		       len_func, 0, NULL, 0,
3971 		       CTL_NET, pf, ipn, qid, IFQCTL_LEN, CTL_EOL);
3972 	sysctl_createv(clog, 0, NULL, NULL,
3973 		       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
3974 		       CTLTYPE_INT, "maxlen",
3975 		       SYSCTL_DESCR("Maximum allowed input queue length"),
3976 		       maxlen_func, 0, NULL, 0,
3977 		       CTL_NET, pf, ipn, qid, IFQCTL_MAXLEN, CTL_EOL);
3978 	sysctl_createv(clog, 0, NULL, NULL,
3979 		       CTLFLAG_PERMANENT,
3980 		       CTLTYPE_QUAD, "drops",
3981 		       SYSCTL_DESCR("Packets dropped due to full input queue"),
3982 		       drops_func, 0, NULL, 0,
3983 		       CTL_NET, pf, ipn, qid, IFQCTL_DROPS, CTL_EOL);
3984 }
3985 #endif /* INET || INET6 */
3986 
3987 static int
3988 if_sdl_sysctl(SYSCTLFN_ARGS)
3989 {
3990 	struct ifnet *ifp;
3991 	const struct sockaddr_dl *sdl;
3992 	struct psref psref;
3993 	int error = 0;
3994 	int bound;
3995 
3996 	if (namelen != 1)
3997 		return EINVAL;
3998 
3999 	bound = curlwp_bind();
4000 	ifp = if_get_byindex(name[0], &psref);
4001 	if (ifp == NULL) {
4002 		error = ENODEV;
4003 		goto out0;
4004 	}
4005 
4006 	sdl = ifp->if_sadl;
4007 	if (sdl == NULL) {
4008 		*oldlenp = 0;
4009 		goto out1;
4010 	}
4011 
4012 	if (oldp == NULL) {
4013 		*oldlenp = sdl->sdl_alen;
4014 		goto out1;
4015 	}
4016 
4017 	if (*oldlenp >= sdl->sdl_alen)
4018 		*oldlenp = sdl->sdl_alen;
4019 	error = sysctl_copyout(l, &sdl->sdl_data[sdl->sdl_nlen], oldp, *oldlenp);
4020 out1:
4021 	if_put(ifp, &psref);
4022 out0:
4023 	curlwp_bindx(bound);
4024 	return error;
4025 }
4026 
4027 static void
4028 if_sysctl_setup(struct sysctllog **clog)
4029 {
4030 	const struct sysctlnode *rnode = NULL;
4031 
4032 	sysctl_createv(clog, 0, NULL, &rnode,
4033 		       CTLFLAG_PERMANENT,
4034 		       CTLTYPE_NODE, "sdl",
4035 		       SYSCTL_DESCR("Get active link-layer address"),
4036 		       if_sdl_sysctl, 0, NULL, 0,
4037 		       CTL_NET, CTL_CREATE, CTL_EOL);
4038 
4039 #if defined(INET)
4040 	sysctl_net_pktq_setup(NULL, PF_INET);
4041 #endif
4042 #ifdef INET6
4043 	if (in6_present)
4044 		sysctl_net_pktq_setup(NULL, PF_INET6);
4045 #endif
4046 }
4047