xref: /netbsd-src/sys/net/if.c (revision 0df165c04d0a9ca1adde9ed2b890344c937954a6)
1 /*	$NetBSD: if.c,v 1.203 2007/11/01 20:37:48 dyoung Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999, 2000, 2001 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by William Studenmund and Jason R. Thorpe.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*
40  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
41  * All rights reserved.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  * 3. Neither the name of the project nor the names of its contributors
52  *    may be used to endorse or promote products derived from this software
53  *    without specific prior written permission.
54  *
55  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
56  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
59  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65  * SUCH DAMAGE.
66  */
67 
68 /*
69  * Copyright (c) 1980, 1986, 1993
70  *	The Regents of the University of California.  All rights reserved.
71  *
72  * Redistribution and use in source and binary forms, with or without
73  * modification, are permitted provided that the following conditions
74  * are met:
75  * 1. Redistributions of source code must retain the above copyright
76  *    notice, this list of conditions and the following disclaimer.
77  * 2. Redistributions in binary form must reproduce the above copyright
78  *    notice, this list of conditions and the following disclaimer in the
79  *    documentation and/or other materials provided with the distribution.
80  * 3. Neither the name of the University nor the names of its contributors
81  *    may be used to endorse or promote products derived from this software
82  *    without specific prior written permission.
83  *
84  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
85  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
86  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
87  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
88  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
89  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
90  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
91  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
92  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
93  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
94  * SUCH DAMAGE.
95  *
96  *	@(#)if.c	8.5 (Berkeley) 1/9/95
97  */
98 
99 #include <sys/cdefs.h>
100 __KERNEL_RCSID(0, "$NetBSD: if.c,v 1.203 2007/11/01 20:37:48 dyoung Exp $");
101 
102 #include "opt_inet.h"
103 
104 #include "opt_atalk.h"
105 #include "opt_natm.h"
106 #include "opt_pfil_hooks.h"
107 
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/systm.h>
111 #include <sys/callout.h>
112 #include <sys/proc.h>
113 #include <sys/socket.h>
114 #include <sys/socketvar.h>
115 #include <sys/domain.h>
116 #include <sys/protosw.h>
117 #include <sys/kernel.h>
118 #include <sys/ioctl.h>
119 #include <sys/sysctl.h>
120 #include <sys/syslog.h>
121 #include <sys/kauth.h>
122 
123 #include <net/if.h>
124 #include <net/if_dl.h>
125 #include <net/if_ether.h>
126 #include <net/if_media.h>
127 #include <net80211/ieee80211.h>
128 #include <net80211/ieee80211_ioctl.h>
129 #include <net/if_types.h>
130 #include <net/radix.h>
131 #include <net/route.h>
132 #include <net/netisr.h>
133 #ifdef NETATALK
134 #include <netatalk/at_extern.h>
135 #include <netatalk/at.h>
136 #endif
137 #include <net/pfil.h>
138 
139 #ifdef INET6
140 #include <netinet/in.h>
141 #include <netinet6/in6_var.h>
142 #include <netinet6/nd6.h>
143 #endif
144 
145 #include "carp.h"
146 #if NCARP > 0
147 #include <netinet/ip_carp.h>
148 #endif
149 
150 #include <compat/sys/sockio.h>
151 #include <compat/sys/socket.h>
152 
153 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
154 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
155 
156 int	ifqmaxlen = IFQ_MAXLEN;
157 callout_t if_slowtimo_ch;
158 
159 int netisr;			/* scheduling bits for network */
160 
161 static int	if_rt_walktree(struct rtentry *, void *);
162 
163 static struct if_clone *if_clone_lookup(const char *, int *);
164 static int	if_clone_list(struct if_clonereq *);
165 
166 static LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
167 static int if_cloners_count;
168 
169 #ifdef PFIL_HOOKS
170 struct pfil_head if_pfil;	/* packet filtering hook for interfaces */
171 #endif
172 
173 static void if_detach_queues(struct ifnet *, struct ifqueue *);
174 
175 /*
176  * Network interface utility routines.
177  *
178  * Routines with ifa_ifwith* names take sockaddr *'s as
179  * parameters.
180  */
181 void
182 ifinit(void)
183 {
184 
185 	callout_init(&if_slowtimo_ch, 0);
186 	if_slowtimo(NULL);
187 #ifdef PFIL_HOOKS
188 	if_pfil.ph_type = PFIL_TYPE_IFNET;
189 	if_pfil.ph_ifnet = NULL;
190 	if (pfil_head_register(&if_pfil) != 0)
191 		printf("WARNING: unable to register pfil hook\n");
192 #endif
193 }
194 
195 /*
196  * Null routines used while an interface is going away.  These routines
197  * just return an error.
198  */
199 
200 int
201 if_nulloutput(struct ifnet *ifp, struct mbuf *m,
202     const struct sockaddr *so, struct rtentry *rt)
203 {
204 
205 	return ENXIO;
206 }
207 
208 void
209 if_nullinput(struct ifnet *ifp, struct mbuf *m)
210 {
211 
212 	/* Nothing. */
213 }
214 
215 void
216 if_nullstart(struct ifnet *ifp)
217 {
218 
219 	/* Nothing. */
220 }
221 
222 int
223 if_nullioctl(struct ifnet *ifp, u_long cmd, void *data)
224 {
225 
226 	return ENXIO;
227 }
228 
229 int
230 if_nullinit(struct ifnet *ifp)
231 {
232 
233 	return ENXIO;
234 }
235 
236 void
237 if_nullstop(struct ifnet *ifp, int disable)
238 {
239 
240 	/* Nothing. */
241 }
242 
243 void
244 if_nullwatchdog(struct ifnet *ifp)
245 {
246 
247 	/* Nothing. */
248 }
249 
250 void
251 if_nulldrain(struct ifnet *ifp)
252 {
253 
254 	/* Nothing. */
255 }
256 
257 static u_int if_index = 1;
258 struct ifnet_head ifnet;
259 size_t if_indexlim = 0;
260 struct ifaddr **ifnet_addrs = NULL;
261 struct ifnet **ifindex2ifnet = NULL;
262 struct ifnet *lo0ifp;
263 
264 /*
265  * Allocate the link level name for the specified interface.  This
266  * is an attachment helper.  It must be called after ifp->if_addrlen
267  * is initialized, which may not be the case when if_attach() is
268  * called.
269  */
270 void
271 if_alloc_sadl(struct ifnet *ifp)
272 {
273 	unsigned socksize, ifasize;
274 	int addrlen, namelen;
275 	struct sockaddr_dl *mask, *sdl;
276 	struct ifaddr *ifa;
277 
278 	/*
279 	 * If the interface already has a link name, release it
280 	 * now.  This is useful for interfaces that can change
281 	 * link types, and thus switch link names often.
282 	 */
283 	if (ifp->if_sadl != NULL)
284 		if_free_sadl(ifp);
285 
286 	namelen = strlen(ifp->if_xname);
287 	addrlen = ifp->if_addrlen;
288 	socksize = roundup(sockaddr_dl_measure(namelen, addrlen), sizeof(long));
289 	ifasize = sizeof(*ifa) + 2 * socksize;
290 	ifa = (struct ifaddr *)malloc(ifasize, M_IFADDR, M_WAITOK|M_ZERO);
291 
292 	sdl = (struct sockaddr_dl *)(ifa + 1);
293 	mask = (struct sockaddr_dl *)(socksize + (char *)sdl);
294 
295 	sockaddr_dl_init(sdl, socksize, ifp->if_index, ifp->if_type,
296 	    ifp->if_xname, namelen, NULL, addrlen);
297 	mask->sdl_len = sockaddr_dl_measure(namelen, 0);
298 	memset(&mask->sdl_data[0], 0xff, namelen);
299 
300 	ifnet_addrs[ifp->if_index] = ifa;
301 	IFAREF(ifa);
302 	ifa->ifa_ifp = ifp;
303 	ifa->ifa_rtrequest = link_rtrequest;
304 	TAILQ_INSERT_HEAD(&ifp->if_addrlist, ifa, ifa_list);
305 	IFAREF(ifa);
306 	ifa->ifa_addr = (struct sockaddr *)sdl;
307 	ifp->if_sadl = sdl;
308 	ifa->ifa_netmask = (struct sockaddr *)mask;
309 }
310 
311 /*
312  * Free the link level name for the specified interface.  This is
313  * a detach helper.  This is called from if_detach() or from
314  * link layer type specific detach functions.
315  */
316 void
317 if_free_sadl(struct ifnet *ifp)
318 {
319 	struct ifaddr *ifa;
320 	int s;
321 
322 	ifa = ifnet_addrs[ifp->if_index];
323 	if (ifa == NULL) {
324 		KASSERT(ifp->if_sadl == NULL);
325 		return;
326 	}
327 
328 	KASSERT(ifp->if_sadl != NULL);
329 
330 	s = splnet();
331 	rtinit(ifa, RTM_DELETE, 0);
332 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
333 	IFAFREE(ifa);
334 
335 	ifp->if_sadl = NULL;
336 
337 	ifnet_addrs[ifp->if_index] = NULL;
338 	IFAFREE(ifa);
339 	splx(s);
340 }
341 
342 /*
343  * Attach an interface to the
344  * list of "active" interfaces.
345  */
346 void
347 if_attach(struct ifnet *ifp)
348 {
349 	int indexlim = 0;
350 
351 	if (if_indexlim == 0) {
352 		TAILQ_INIT(&ifnet);
353 		if_indexlim = 8;
354 	}
355 	TAILQ_INIT(&ifp->if_addrlist);
356 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
357 	ifp->if_index = if_index;
358 	if (ifindex2ifnet == NULL)
359 		if_index++;
360 	else
361 		while (ifp->if_index < if_indexlim &&
362 		    ifindex2ifnet[ifp->if_index] != NULL) {
363 			++if_index;
364 			if (if_index == 0)
365 				if_index = 1;
366 			/*
367 			 * If we hit USHRT_MAX, we skip back to 0 since
368 			 * there are a number of places where the value
369 			 * of if_index or if_index itself is compared
370 			 * to or stored in an unsigned short.  By
371 			 * jumping back, we won't botch those assignments
372 			 * or comparisons.
373 			 */
374 			else if (if_index == USHRT_MAX) {
375 				/*
376 				 * However, if we have to jump back to
377 				 * zero *twice* without finding an empty
378 				 * slot in ifindex2ifnet[], then there
379 				 * there are too many (>65535) interfaces.
380 				 */
381 				if (indexlim++)
382 					panic("too many interfaces");
383 				else
384 					if_index = 1;
385 			}
386 			ifp->if_index = if_index;
387 		}
388 
389 	/*
390 	 * We have some arrays that should be indexed by if_index.
391 	 * since if_index will grow dynamically, they should grow too.
392 	 *	struct ifadd **ifnet_addrs
393 	 *	struct ifnet **ifindex2ifnet
394 	 */
395 	if (ifnet_addrs == NULL || ifindex2ifnet == NULL ||
396 	    ifp->if_index >= if_indexlim) {
397 		size_t m, n, oldlim;
398 		void *q;
399 
400 		oldlim = if_indexlim;
401 		while (ifp->if_index >= if_indexlim)
402 			if_indexlim <<= 1;
403 
404 		/* grow ifnet_addrs */
405 		m = oldlim * sizeof(struct ifaddr *);
406 		n = if_indexlim * sizeof(struct ifaddr *);
407 		q = (void *)malloc(n, M_IFADDR, M_WAITOK|M_ZERO);
408 		if (ifnet_addrs != NULL) {
409 			memcpy(q, ifnet_addrs, m);
410 			free((void *)ifnet_addrs, M_IFADDR);
411 		}
412 		ifnet_addrs = (struct ifaddr **)q;
413 
414 		/* grow ifindex2ifnet */
415 		m = oldlim * sizeof(struct ifnet *);
416 		n = if_indexlim * sizeof(struct ifnet *);
417 		q = (void *)malloc(n, M_IFADDR, M_WAITOK|M_ZERO);
418 		if (ifindex2ifnet != NULL) {
419 			memcpy(q, (void *)ifindex2ifnet, m);
420 			free((void *)ifindex2ifnet, M_IFADDR);
421 		}
422 		ifindex2ifnet = (struct ifnet **)q;
423 	}
424 
425 	ifindex2ifnet[ifp->if_index] = ifp;
426 
427 	/*
428 	 * Link level name is allocated later by a separate call to
429 	 * if_alloc_sadl().
430 	 */
431 
432 	if (ifp->if_snd.ifq_maxlen == 0)
433 		ifp->if_snd.ifq_maxlen = ifqmaxlen;
434 	ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */
435 
436 	ifp->if_link_state = LINK_STATE_UNKNOWN;
437 
438 	ifp->if_capenable = 0;
439 	ifp->if_csum_flags_tx = 0;
440 	ifp->if_csum_flags_rx = 0;
441 
442 #ifdef ALTQ
443 	ifp->if_snd.altq_type = 0;
444 	ifp->if_snd.altq_disc = NULL;
445 	ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE;
446 	ifp->if_snd.altq_tbr  = NULL;
447 	ifp->if_snd.altq_ifp  = ifp;
448 #endif
449 
450 #ifdef PFIL_HOOKS
451 	ifp->if_pfil.ph_type = PFIL_TYPE_IFNET;
452 	ifp->if_pfil.ph_ifnet = ifp;
453 	if (pfil_head_register(&ifp->if_pfil) != 0)
454 		printf("%s: WARNING: unable to register pfil hook\n",
455 		    ifp->if_xname);
456 	(void)pfil_run_hooks(&if_pfil,
457 	    (struct mbuf **)PFIL_IFNET_ATTACH, ifp, PFIL_IFNET);
458 #endif
459 
460 	if (!STAILQ_EMPTY(&domains))
461 		if_attachdomain1(ifp);
462 
463 	/* Announce the interface. */
464 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
465 }
466 
467 void
468 if_attachdomain(void)
469 {
470 	struct ifnet *ifp;
471 	int s;
472 
473 	s = splnet();
474 	IFNET_FOREACH(ifp)
475 		if_attachdomain1(ifp);
476 	splx(s);
477 }
478 
479 void
480 if_attachdomain1(struct ifnet *ifp)
481 {
482 	struct domain *dp;
483 	int s;
484 
485 	s = splnet();
486 
487 	/* address family dependent data region */
488 	memset(ifp->if_afdata, 0, sizeof(ifp->if_afdata));
489 	DOMAIN_FOREACH(dp) {
490 		if (dp->dom_ifattach != NULL)
491 			ifp->if_afdata[dp->dom_family] =
492 			    (*dp->dom_ifattach)(ifp);
493 	}
494 
495 	splx(s);
496 }
497 
498 /*
499  * Deactivate an interface.  This points all of the procedure
500  * handles at error stubs.  May be called from interrupt context.
501  */
502 void
503 if_deactivate(struct ifnet *ifp)
504 {
505 	int s;
506 
507 	s = splnet();
508 
509 	ifp->if_output	 = if_nulloutput;
510 	ifp->if_input	 = if_nullinput;
511 	ifp->if_start	 = if_nullstart;
512 	ifp->if_ioctl	 = if_nullioctl;
513 	ifp->if_init	 = if_nullinit;
514 	ifp->if_stop	 = if_nullstop;
515 	ifp->if_watchdog = if_nullwatchdog;
516 	ifp->if_drain	 = if_nulldrain;
517 
518 	/* No more packets may be enqueued. */
519 	ifp->if_snd.ifq_maxlen = 0;
520 
521 	splx(s);
522 }
523 
524 /*
525  * Detach an interface from the list of "active" interfaces,
526  * freeing any resources as we go along.
527  *
528  * NOTE: This routine must be called with a valid thread context,
529  * as it may block.
530  */
531 void
532 if_detach(struct ifnet *ifp)
533 {
534 	struct socket so;
535 	struct ifaddr *ifa;
536 #ifdef IFAREF_DEBUG
537 	struct ifaddr *last_ifa = NULL;
538 #endif
539 	struct domain *dp;
540 	const struct protosw *pr;
541 	int s, i, family, purged;
542 
543 	/*
544 	 * XXX It's kind of lame that we have to have the
545 	 * XXX socket structure...
546 	 */
547 	memset(&so, 0, sizeof(so));
548 
549 	s = splnet();
550 
551 	/*
552 	 * Do an if_down() to give protocols a chance to do something.
553 	 */
554 	if_down(ifp);
555 
556 #ifdef ALTQ
557 	if (ALTQ_IS_ENABLED(&ifp->if_snd))
558 		altq_disable(&ifp->if_snd);
559 	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
560 		altq_detach(&ifp->if_snd);
561 #endif
562 
563 
564 #if NCARP > 0
565 	/* Remove the interface from any carp group it is a part of.  */
566 	if (ifp->if_carp != NULL && ifp->if_type != IFT_CARP)
567 		carp_ifdetach(ifp);
568 #endif
569 
570 	/*
571 	 * Rip all the addresses off the interface.  This should make
572 	 * all of the routes go away.
573 	 *
574 	 * pr_usrreq calls can remove an arbitrary number of ifaddrs
575 	 * from the list, including our "cursor", ifa.  For safety,
576 	 * and to honor the TAILQ abstraction, I just restart the
577 	 * loop after each removal.  Note that the loop will exit
578 	 * when all of the remaining ifaddrs belong to the AF_LINK
579 	 * family.  I am counting on the historical fact that at
580 	 * least one pr_usrreq in each address domain removes at
581 	 * least one ifaddr.
582 	 */
583 again:
584 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
585 		family = ifa->ifa_addr->sa_family;
586 #ifdef IFAREF_DEBUG
587 		printf("if_detach: ifaddr %p, family %d, refcnt %d\n",
588 		    ifa, family, ifa->ifa_refcnt);
589 		if (last_ifa != NULL && ifa == last_ifa)
590 			panic("if_detach: loop detected");
591 		last_ifa = ifa;
592 #endif
593 		if (family == AF_LINK)
594 			continue;
595 		dp = pffinddomain(family);
596 #ifdef DIAGNOSTIC
597 		if (dp == NULL)
598 			panic("if_detach: no domain for AF %d",
599 			    family);
600 #endif
601 		/*
602 		 * XXX These PURGEIF calls are redundant with the
603 		 * purge-all-families calls below, but are left in for
604 		 * now both to make a smaller change, and to avoid
605 		 * unplanned interactions with clearing of
606 		 * ifp->if_addrlist.
607 		 */
608 		purged = 0;
609 		for (pr = dp->dom_protosw;
610 		     pr < dp->dom_protoswNPROTOSW; pr++) {
611 			so.so_proto = pr;
612 			if (pr->pr_usrreq != NULL) {
613 				(void) (*pr->pr_usrreq)(&so,
614 				    PRU_PURGEIF, NULL, NULL,
615 				    (struct mbuf *) ifp, curlwp);
616 				purged = 1;
617 			}
618 		}
619 		if (purged == 0) {
620 			/*
621 			 * XXX What's really the best thing to do
622 			 * XXX here?  --thorpej@NetBSD.org
623 			 */
624 			printf("if_detach: WARNING: AF %d not purged\n",
625 			    family);
626 			TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
627 		}
628 		goto again;
629 	}
630 
631 	if_free_sadl(ifp);
632 
633 	/* Walk the routing table looking for stragglers. */
634 	for (i = 0; i <= AF_MAX; i++)
635 		(void)rt_walktree(i, if_rt_walktree, ifp);
636 
637 	DOMAIN_FOREACH(dp) {
638 		if (dp->dom_ifdetach != NULL && ifp->if_afdata[dp->dom_family])
639 			(*dp->dom_ifdetach)(ifp,
640 			    ifp->if_afdata[dp->dom_family]);
641 
642 		/*
643 		 * One would expect multicast memberships (INET and
644 		 * INET6) on UDP sockets to be purged by the PURGEIF
645 		 * calls above, but if all addresses were removed from
646 		 * the interface prior to destruction, the calls will
647 		 * not be made (e.g. ppp, for which pppd(8) generally
648 		 * removes addresses before destroying the interface).
649 		 * Because there is no invariant that multicast
650 		 * memberships only exist for interfaces with IPv4
651 		 * addresses, we must call PURGEIF regardless of
652 		 * addresses.  (Protocols which might store ifnet
653 		 * pointers are marked with PR_PURGEIF.)
654 		 */
655 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
656 			so.so_proto = pr;
657 			if (pr->pr_usrreq != NULL && pr->pr_flags & PR_PURGEIF)
658 				(void)(*pr->pr_usrreq)(&so, PRU_PURGEIF, NULL,
659 				    NULL, (struct mbuf *)ifp, curlwp);
660 		}
661 	}
662 
663 #ifdef PFIL_HOOKS
664 	(void)pfil_run_hooks(&if_pfil,
665 	    (struct mbuf **)PFIL_IFNET_DETACH, ifp, PFIL_IFNET);
666 	(void)pfil_head_unregister(&ifp->if_pfil);
667 #endif
668 
669 	/* Announce that the interface is gone. */
670 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
671 
672 	ifindex2ifnet[ifp->if_index] = NULL;
673 
674 	TAILQ_REMOVE(&ifnet, ifp, if_list);
675 
676 	/*
677 	 * remove packets that came from ifp, from software interrupt queues.
678 	 */
679 	DOMAIN_FOREACH(dp) {
680 		for (i = 0; i < __arraycount(dp->dom_ifqueues); i++) {
681 			if (dp->dom_ifqueues[i] == NULL)
682 				break;
683 			if_detach_queues(ifp, dp->dom_ifqueues[i]);
684 		}
685 	}
686 
687 	splx(s);
688 }
689 
690 static void
691 if_detach_queues(struct ifnet *ifp, struct ifqueue *q)
692 {
693 	struct mbuf *m, *prev, *next;
694 
695 	prev = NULL;
696 	for (m = q->ifq_head; m != NULL; m = next) {
697 		next = m->m_nextpkt;
698 #ifdef DIAGNOSTIC
699 		if ((m->m_flags & M_PKTHDR) == 0) {
700 			prev = m;
701 			continue;
702 		}
703 #endif
704 		if (m->m_pkthdr.rcvif != ifp) {
705 			prev = m;
706 			continue;
707 		}
708 
709 		if (prev != NULL)
710 			prev->m_nextpkt = m->m_nextpkt;
711 		else
712 			q->ifq_head = m->m_nextpkt;
713 		if (q->ifq_tail == m)
714 			q->ifq_tail = prev;
715 		q->ifq_len--;
716 
717 		m->m_nextpkt = NULL;
718 		m_freem(m);
719 		IF_DROP(q);
720 	}
721 }
722 
723 /*
724  * Callback for a radix tree walk to delete all references to an
725  * ifnet.
726  */
727 static int
728 if_rt_walktree(struct rtentry *rt, void *v)
729 {
730 	struct ifnet *ifp = (struct ifnet *)v;
731 	int error;
732 
733 	if (rt->rt_ifp != ifp)
734 		return 0;
735 
736 	/* Delete the entry. */
737 	++rt->rt_refcnt;
738 	error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway,
739 	    rt_mask(rt), rt->rt_flags, NULL);
740 	KASSERT((rt->rt_flags & RTF_UP) == 0);
741 	rt->rt_ifp = NULL;
742 	RTFREE(rt);
743 	if (error != 0)
744 		printf("%s: warning: unable to delete rtentry @ %p, "
745 		    "error = %d\n", ifp->if_xname, rt, error);
746 	return 0;
747 }
748 
749 /*
750  * Create a clone network interface.
751  */
752 int
753 if_clone_create(const char *name)
754 {
755 	struct if_clone *ifc;
756 	int unit;
757 
758 	ifc = if_clone_lookup(name, &unit);
759 	if (ifc == NULL)
760 		return EINVAL;
761 
762 	if (ifunit(name) != NULL)
763 		return EEXIST;
764 
765 	return (*ifc->ifc_create)(ifc, unit);
766 }
767 
768 /*
769  * Destroy a clone network interface.
770  */
771 int
772 if_clone_destroy(const char *name)
773 {
774 	struct if_clone *ifc;
775 	struct ifnet *ifp;
776 
777 	ifc = if_clone_lookup(name, NULL);
778 	if (ifc == NULL)
779 		return EINVAL;
780 
781 	ifp = ifunit(name);
782 	if (ifp == NULL)
783 		return ENXIO;
784 
785 	if (ifc->ifc_destroy == NULL)
786 		return EOPNOTSUPP;
787 
788 	return (*ifc->ifc_destroy)(ifp);
789 }
790 
791 /*
792  * Look up a network interface cloner.
793  */
794 static struct if_clone *
795 if_clone_lookup(const char *name, int *unitp)
796 {
797 	struct if_clone *ifc;
798 	const char *cp;
799 	int unit;
800 
801 	/* separate interface name from unit */
802 	for (cp = name;
803 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
804 	    cp++)
805 		continue;
806 
807 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
808 		return NULL;	/* No name or unit number */
809 
810 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
811 		if (strlen(ifc->ifc_name) == cp - name &&
812 		    strncmp(name, ifc->ifc_name, cp - name) == 0)
813 			break;
814 	}
815 
816 	if (ifc == NULL)
817 		return NULL;
818 
819 	unit = 0;
820 	while (cp - name < IFNAMSIZ && *cp) {
821 		if (*cp < '0' || *cp > '9' || unit > INT_MAX / 10) {
822 			/* Bogus unit number. */
823 			return NULL;
824 		}
825 		unit = (unit * 10) + (*cp++ - '0');
826 	}
827 
828 	if (unitp != NULL)
829 		*unitp = unit;
830 	return ifc;
831 }
832 
833 /*
834  * Register a network interface cloner.
835  */
836 void
837 if_clone_attach(struct if_clone *ifc)
838 {
839 
840 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
841 	if_cloners_count++;
842 }
843 
844 /*
845  * Unregister a network interface cloner.
846  */
847 void
848 if_clone_detach(struct if_clone *ifc)
849 {
850 
851 	LIST_REMOVE(ifc, ifc_list);
852 	if_cloners_count--;
853 }
854 
855 /*
856  * Provide list of interface cloners to userspace.
857  */
858 static int
859 if_clone_list(struct if_clonereq *ifcr)
860 {
861 	char outbuf[IFNAMSIZ], *dst;
862 	struct if_clone *ifc;
863 	int count, error = 0;
864 
865 	ifcr->ifcr_total = if_cloners_count;
866 	if ((dst = ifcr->ifcr_buffer) == NULL) {
867 		/* Just asking how many there are. */
868 		return 0;
869 	}
870 
871 	if (ifcr->ifcr_count < 0)
872 		return EINVAL;
873 
874 	count = (if_cloners_count < ifcr->ifcr_count) ?
875 	    if_cloners_count : ifcr->ifcr_count;
876 
877 	for (ifc = LIST_FIRST(&if_cloners); ifc != NULL && count != 0;
878 	     ifc = LIST_NEXT(ifc, ifc_list), count--, dst += IFNAMSIZ) {
879 		(void)strncpy(outbuf, ifc->ifc_name, sizeof(outbuf));
880 		if (outbuf[sizeof(outbuf) - 1] != '\0')
881 			return ENAMETOOLONG;
882 		error = copyout(outbuf, dst, sizeof(outbuf));
883 		if (error != 0)
884 			break;
885 	}
886 
887 	return error;
888 }
889 
890 static inline int
891 equal(const struct sockaddr *sa1, const struct sockaddr *sa2)
892 {
893 	return sockaddr_cmp(sa1, sa2) == 0;
894 }
895 
896 /*
897  * Locate an interface based on a complete address.
898  */
899 /*ARGSUSED*/
900 struct ifaddr *
901 ifa_ifwithaddr(const struct sockaddr *addr)
902 {
903 	struct ifnet *ifp;
904 	struct ifaddr *ifa;
905 
906 	IFNET_FOREACH(ifp) {
907 		if (ifp->if_output == if_nulloutput)
908 			continue;
909 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
910 			if (ifa->ifa_addr->sa_family != addr->sa_family)
911 				continue;
912 			if (equal(addr, ifa->ifa_addr))
913 				return ifa;
914 			if ((ifp->if_flags & IFF_BROADCAST) &&
915 			    ifa->ifa_broadaddr &&
916 			    /* IP6 doesn't have broadcast */
917 			    ifa->ifa_broadaddr->sa_len != 0 &&
918 			    equal(ifa->ifa_broadaddr, addr))
919 				return ifa;
920 		}
921 	}
922 	return NULL;
923 }
924 
925 /*
926  * Locate the point to point interface with a given destination address.
927  */
928 /*ARGSUSED*/
929 struct ifaddr *
930 ifa_ifwithdstaddr(const struct sockaddr *addr)
931 {
932 	struct ifnet *ifp;
933 	struct ifaddr *ifa;
934 
935 	IFNET_FOREACH(ifp) {
936 		if (ifp->if_output == if_nulloutput)
937 			continue;
938 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
939 			continue;
940 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
941 			if (ifa->ifa_addr->sa_family != addr->sa_family ||
942 			    ifa->ifa_dstaddr == NULL)
943 				continue;
944 			if (equal(addr, ifa->ifa_dstaddr))
945 				return ifa;
946 		}
947 	}
948 	return NULL;
949 }
950 
951 /*
952  * Find an interface on a specific network.  If many, choice
953  * is most specific found.
954  */
955 struct ifaddr *
956 ifa_ifwithnet(const struct sockaddr *addr)
957 {
958 	struct ifnet *ifp;
959 	struct ifaddr *ifa;
960 	const struct sockaddr_dl *sdl;
961 	struct ifaddr *ifa_maybe = 0;
962 	u_int af = addr->sa_family;
963 	const char *addr_data = addr->sa_data, *cplim;
964 
965 	if (af == AF_LINK) {
966 		sdl = satocsdl(addr);
967 		if (sdl->sdl_index && sdl->sdl_index < if_indexlim &&
968 		    ifindex2ifnet[sdl->sdl_index] &&
969 		    ifindex2ifnet[sdl->sdl_index]->if_output != if_nulloutput)
970 			return ifnet_addrs[sdl->sdl_index];
971 	}
972 #ifdef NETATALK
973 	if (af == AF_APPLETALK) {
974 		const struct sockaddr_at *sat, *sat2;
975 		sat = (const struct sockaddr_at *)addr;
976 		IFNET_FOREACH(ifp) {
977 			if (ifp->if_output == if_nulloutput)
978 				continue;
979 			ifa = at_ifawithnet((const struct sockaddr_at *)addr, ifp);
980 			if (ifa == NULL)
981 				continue;
982 			sat2 = (struct sockaddr_at *)ifa->ifa_addr;
983 			if (sat2->sat_addr.s_net == sat->sat_addr.s_net)
984 				return ifa; /* exact match */
985 			if (ifa_maybe == NULL) {
986 				/* else keep the if with the right range */
987 				ifa_maybe = ifa;
988 			}
989 		}
990 		return ifa_maybe;
991 	}
992 #endif
993 	IFNET_FOREACH(ifp) {
994 		if (ifp->if_output == if_nulloutput)
995 			continue;
996 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
997 			const char *cp, *cp2, *cp3;
998 
999 			if (ifa->ifa_addr->sa_family != af ||
1000 			    ifa->ifa_netmask == NULL)
1001  next:				continue;
1002 			cp = addr_data;
1003 			cp2 = ifa->ifa_addr->sa_data;
1004 			cp3 = ifa->ifa_netmask->sa_data;
1005 			cplim = (const char *)ifa->ifa_netmask +
1006 			    ifa->ifa_netmask->sa_len;
1007 			while (cp3 < cplim) {
1008 				if ((*cp++ ^ *cp2++) & *cp3++) {
1009 					/* want to continue for() loop */
1010 					goto next;
1011 				}
1012 			}
1013 			if (ifa_maybe == NULL ||
1014 			    rn_refines((void *)ifa->ifa_netmask,
1015 			    (void *)ifa_maybe->ifa_netmask))
1016 				ifa_maybe = ifa;
1017 		}
1018 	}
1019 	return ifa_maybe;
1020 }
1021 
1022 /*
1023  * Find the interface of the addresss.
1024  */
1025 struct ifaddr *
1026 ifa_ifwithladdr(const struct sockaddr *addr)
1027 {
1028 	struct ifaddr *ia;
1029 
1030 	if ((ia = ifa_ifwithaddr(addr)) || (ia = ifa_ifwithdstaddr(addr)) ||
1031 	    (ia = ifa_ifwithnet(addr)))
1032 		return ia;
1033 	return NULL;
1034 }
1035 
1036 /*
1037  * Find an interface using a specific address family
1038  */
1039 struct ifaddr *
1040 ifa_ifwithaf(int af)
1041 {
1042 	struct ifnet *ifp;
1043 	struct ifaddr *ifa;
1044 
1045 	IFNET_FOREACH(ifp) {
1046 		if (ifp->if_output == if_nulloutput)
1047 			continue;
1048 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1049 			if (ifa->ifa_addr->sa_family == af)
1050 				return ifa;
1051 		}
1052 	}
1053 	return NULL;
1054 }
1055 
1056 /*
1057  * Find an interface address specific to an interface best matching
1058  * a given address.
1059  */
1060 struct ifaddr *
1061 ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
1062 {
1063 	struct ifaddr *ifa;
1064 	const char *cp, *cp2, *cp3;
1065 	const char *cplim;
1066 	struct ifaddr *ifa_maybe = 0;
1067 	u_int af = addr->sa_family;
1068 
1069 	if (ifp->if_output == if_nulloutput)
1070 		return NULL;
1071 
1072 	if (af >= AF_MAX)
1073 		return NULL;
1074 
1075 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1076 		if (ifa->ifa_addr->sa_family != af)
1077 			continue;
1078 		ifa_maybe = ifa;
1079 		if (ifa->ifa_netmask == NULL) {
1080 			if (equal(addr, ifa->ifa_addr) ||
1081 			    (ifa->ifa_dstaddr &&
1082 			     equal(addr, ifa->ifa_dstaddr)))
1083 				return ifa;
1084 			continue;
1085 		}
1086 		cp = addr->sa_data;
1087 		cp2 = ifa->ifa_addr->sa_data;
1088 		cp3 = ifa->ifa_netmask->sa_data;
1089 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1090 		for (; cp3 < cplim; cp3++) {
1091 			if ((*cp++ ^ *cp2++) & *cp3)
1092 				break;
1093 		}
1094 		if (cp3 == cplim)
1095 			return ifa;
1096 	}
1097 	return ifa_maybe;
1098 }
1099 
1100 /*
1101  * Default action when installing a route with a Link Level gateway.
1102  * Lookup an appropriate real ifa to point to.
1103  * This should be moved to /sys/net/link.c eventually.
1104  */
1105 void
1106 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
1107 {
1108 	struct ifaddr *ifa;
1109 	const struct sockaddr *dst;
1110 	struct ifnet *ifp;
1111 
1112 	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == NULL) ||
1113 	    ((ifp = ifa->ifa_ifp) == NULL) || ((dst = rt_getkey(rt)) == NULL))
1114 		return;
1115 	if ((ifa = ifaof_ifpforaddr(dst, ifp)) != NULL) {
1116 		rt_replace_ifa(rt, ifa);
1117 		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1118 			ifa->ifa_rtrequest(cmd, rt, info);
1119 	}
1120 }
1121 
1122 /*
1123  * Handle a change in the interface link state.
1124  */
1125 void
1126 if_link_state_change(struct ifnet *ifp, int link_state)
1127 {
1128 	if (ifp->if_link_state == link_state)
1129 		return;
1130 	ifp->if_link_state = link_state;
1131 	/* Notify that the link state has changed. */
1132 	rt_ifmsg(ifp);
1133 #if NCARP > 0
1134 	if (ifp->if_carp)
1135 		carp_carpdev_state(ifp);
1136 #endif
1137 }
1138 
1139 /*
1140  * Mark an interface down and notify protocols of
1141  * the transition.
1142  * NOTE: must be called at splsoftnet or equivalent.
1143  */
1144 void
1145 if_down(struct ifnet *ifp)
1146 {
1147 	struct ifaddr *ifa;
1148 
1149 	ifp->if_flags &= ~IFF_UP;
1150 	microtime(&ifp->if_lastchange);
1151 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
1152 		pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1153 	IFQ_PURGE(&ifp->if_snd);
1154 #if NCARP > 0
1155 	if (ifp->if_carp)
1156 		carp_carpdev_state(ifp);
1157 #endif
1158 	rt_ifmsg(ifp);
1159 }
1160 
1161 /*
1162  * Mark an interface up and notify protocols of
1163  * the transition.
1164  * NOTE: must be called at splsoftnet or equivalent.
1165  */
1166 void
1167 if_up(struct ifnet *ifp)
1168 {
1169 #ifdef notyet
1170 	struct ifaddr *ifa;
1171 #endif
1172 
1173 	ifp->if_flags |= IFF_UP;
1174 	microtime(&ifp->if_lastchange);
1175 #ifdef notyet
1176 	/* this has no effect on IP, and will kill all ISO connections XXX */
1177 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
1178 		pfctlinput(PRC_IFUP, ifa->ifa_addr);
1179 #endif
1180 #if NCARP > 0
1181 	if (ifp->if_carp)
1182 		carp_carpdev_state(ifp);
1183 #endif
1184 	rt_ifmsg(ifp);
1185 #ifdef INET6
1186 	in6_if_up(ifp);
1187 #endif
1188 }
1189 
1190 /*
1191  * Handle interface watchdog timer routines.  Called
1192  * from softclock, we decrement timers (if set) and
1193  * call the appropriate interface routine on expiration.
1194  */
1195 void
1196 if_slowtimo(void *arg)
1197 {
1198 	struct ifnet *ifp;
1199 	int s = splnet();
1200 
1201 	IFNET_FOREACH(ifp) {
1202 		if (ifp->if_timer == 0 || --ifp->if_timer)
1203 			continue;
1204 		if (ifp->if_watchdog != NULL)
1205 			(*ifp->if_watchdog)(ifp);
1206 	}
1207 	splx(s);
1208 	callout_reset(&if_slowtimo_ch, hz / IFNET_SLOWHZ, if_slowtimo, NULL);
1209 }
1210 
1211 /*
1212  * Set/clear promiscuous mode on interface ifp based on the truth value
1213  * of pswitch.  The calls are reference counted so that only the first
1214  * "on" request actually has an effect, as does the final "off" request.
1215  * Results are undefined if the "off" and "on" requests are not matched.
1216  */
1217 int
1218 ifpromisc(struct ifnet *ifp, int pswitch)
1219 {
1220 	int pcount, ret;
1221 	short flags;
1222 	struct ifreq ifr;
1223 
1224 	pcount = ifp->if_pcount;
1225 	flags = ifp->if_flags;
1226 	if (pswitch) {
1227 		/*
1228 		 * Allow the device to be "placed" into promiscuous
1229 		 * mode even if it is not configured up.  It will
1230 		 * consult IFF_PROMISC when it is is brought up.
1231 		 */
1232 		if (ifp->if_pcount++ != 0)
1233 			return 0;
1234 		ifp->if_flags |= IFF_PROMISC;
1235 		if ((ifp->if_flags & IFF_UP) == 0)
1236 			return 0;
1237 	} else {
1238 		if (--ifp->if_pcount > 0)
1239 			return 0;
1240 		ifp->if_flags &= ~IFF_PROMISC;
1241 		/*
1242 		 * If the device is not configured up, we should not need to
1243 		 * turn off promiscuous mode (device should have turned it
1244 		 * off when interface went down; and will look at IFF_PROMISC
1245 		 * again next time interface comes up).
1246 		 */
1247 		if ((ifp->if_flags & IFF_UP) == 0)
1248 			return 0;
1249 	}
1250 	memset(&ifr, 0, sizeof(ifr));
1251 	ifr.ifr_flags = ifp->if_flags;
1252 	ret = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (void *) &ifr);
1253 	/* Restore interface state if not successful. */
1254 	if (ret != 0) {
1255 		ifp->if_pcount = pcount;
1256 		ifp->if_flags = flags;
1257 	}
1258 	return ret;
1259 }
1260 
1261 /*
1262  * Map interface name to
1263  * interface structure pointer.
1264  */
1265 struct ifnet *
1266 ifunit(const char *name)
1267 {
1268 	struct ifnet *ifp;
1269 	const char *cp = name;
1270 	u_int unit = 0;
1271 	u_int i;
1272 
1273 	/*
1274 	 * If the entire name is a number, treat it as an ifindex.
1275 	 */
1276 	for (i = 0; i < IFNAMSIZ && *cp >= '0' && *cp <= '9'; i++, cp++) {
1277 		unit = unit * 10 + (*cp - '0');
1278 	}
1279 
1280 	/*
1281 	 * If the number took all of the name, then it's a valid ifindex.
1282 	 */
1283 	if (i == IFNAMSIZ || (cp != name && *cp == '\0')) {
1284 		if (unit >= if_indexlim)
1285 			return NULL;
1286 		ifp = ifindex2ifnet[unit];
1287 		if (ifp == NULL || ifp->if_output == if_nulloutput)
1288 			return NULL;
1289 		return ifp;
1290 	}
1291 
1292 	IFNET_FOREACH(ifp) {
1293 		if (ifp->if_output == if_nulloutput)
1294 			continue;
1295 	 	if (strcmp(ifp->if_xname, name) == 0)
1296 			return ifp;
1297 	}
1298 	return NULL;
1299 }
1300 
1301 /*
1302  * Interface ioctls.
1303  */
1304 int
1305 ifioctl(struct socket *so, u_long cmd, void *data, struct lwp *l)
1306 {
1307 	struct ifnet *ifp;
1308 	struct ifreq *ifr;
1309 	struct ifcapreq *ifcr;
1310 	struct ifdatareq *ifdr;
1311 	int s, error = 0;
1312 #if defined(COMPAT_OSOCK) || defined(COMPAT_OIFREQ)
1313 	u_long ocmd = cmd;
1314 #endif
1315 	short oif_flags;
1316 #ifdef COMPAT_OIFREQ
1317 	struct ifreq ifrb;
1318 	struct oifreq *oifr = NULL;
1319 #endif
1320 
1321 	switch (cmd) {
1322 #ifdef COMPAT_OIFREQ
1323 	case OSIOCGIFCONF:
1324 	case OOSIOCGIFCONF:
1325 		return compat_ifconf(cmd, data);
1326 #endif
1327 	case SIOCGIFCONF:
1328 		return ifconf(cmd, data);
1329 	}
1330 
1331 #ifdef COMPAT_OIFREQ
1332 	cmd = compat_cvtcmd(cmd);
1333 	if (cmd != ocmd) {
1334 		oifr = data;
1335 		data = ifr = &ifrb;
1336 		ifreqo2n(oifr, ifr);
1337 	} else
1338 #endif
1339 		ifr = data;
1340 	ifcr = data;
1341 	ifdr = data;
1342 
1343 	ifp = ifunit(ifr->ifr_name);
1344 
1345 	switch (cmd) {
1346 	case SIOCIFCREATE:
1347 	case SIOCIFDESTROY:
1348 		if (l != NULL) {
1349 			error = kauth_authorize_network(l->l_cred,
1350 			    KAUTH_NETWORK_INTERFACE,
1351 			    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp,
1352 			    (void *)cmd, NULL);
1353 			if (error != 0)
1354 				return error;
1355 		}
1356 		return (cmd == SIOCIFCREATE) ?
1357 			if_clone_create(ifr->ifr_name) :
1358 			if_clone_destroy(ifr->ifr_name);
1359 
1360 	case SIOCIFGCLONERS:
1361 		return if_clone_list((struct if_clonereq *)data);
1362 	}
1363 
1364 	if (ifp == NULL)
1365 		return ENXIO;
1366 
1367 	switch (cmd) {
1368 	case SIOCSIFFLAGS:
1369 	case SIOCSIFCAP:
1370 	case SIOCSIFMETRIC:
1371 	case SIOCZIFDATA:
1372 	case SIOCSIFMTU:
1373 	case SIOCSIFPHYADDR:
1374 	case SIOCDIFPHYADDR:
1375 #ifdef INET6
1376 	case SIOCSIFPHYADDR_IN6:
1377 #endif
1378 	case SIOCSLIFPHYADDR:
1379 	case SIOCADDMULTI:
1380 	case SIOCDELMULTI:
1381 	case SIOCSIFMEDIA:
1382 	case SIOCSDRVSPEC:
1383 	case SIOCG80211:
1384 	case SIOCS80211:
1385 	case SIOCS80211NWID:
1386 	case SIOCS80211NWKEY:
1387 	case SIOCS80211POWER:
1388 	case SIOCS80211BSSID:
1389 	case SIOCS80211CHANNEL:
1390 		if (l != NULL) {
1391 			error = kauth_authorize_network(l->l_cred,
1392 			    KAUTH_NETWORK_INTERFACE,
1393 			    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp,
1394 			    (void *)cmd, NULL);
1395 			if (error != 0)
1396 				return error;
1397 		}
1398 	}
1399 
1400 	oif_flags = ifp->if_flags;
1401 	switch (cmd) {
1402 
1403 	case SIOCGIFFLAGS:
1404 		ifr->ifr_flags = ifp->if_flags;
1405 		break;
1406 
1407 	case SIOCGIFMETRIC:
1408 		ifr->ifr_metric = ifp->if_metric;
1409 		break;
1410 
1411 	case SIOCGIFMTU:
1412 		ifr->ifr_mtu = ifp->if_mtu;
1413 		break;
1414 
1415 	case SIOCGIFDLT:
1416 		ifr->ifr_dlt = ifp->if_dlt;
1417 		break;
1418 
1419 	case SIOCSIFFLAGS:
1420 		if (ifp->if_flags & IFF_UP && (ifr->ifr_flags & IFF_UP) == 0) {
1421 			s = splnet();
1422 			if_down(ifp);
1423 			splx(s);
1424 		}
1425 		if (ifr->ifr_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) {
1426 			s = splnet();
1427 			if_up(ifp);
1428 			splx(s);
1429 		}
1430 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1431 			(ifr->ifr_flags &~ IFF_CANTCHANGE);
1432 		if (ifp->if_ioctl)
1433 			(void)(*ifp->if_ioctl)(ifp, cmd, data);
1434 		break;
1435 
1436 	case SIOCGIFCAP:
1437 		ifcr->ifcr_capabilities = ifp->if_capabilities;
1438 		ifcr->ifcr_capenable = ifp->if_capenable;
1439 		break;
1440 
1441 	case SIOCSIFCAP:
1442 		if ((ifcr->ifcr_capenable & ~ifp->if_capabilities) != 0)
1443 			return EINVAL;
1444 		if (ifp->if_ioctl == NULL)
1445 			return EOPNOTSUPP;
1446 
1447 		/* Must prevent race with packet reception here. */
1448 		s = splnet();
1449 		if (ifcr->ifcr_capenable != ifp->if_capenable) {
1450 			struct ifreq ifrq;
1451 
1452 			ifrq.ifr_flags = ifp->if_flags;
1453 			ifp->if_capenable = ifcr->ifcr_capenable;
1454 
1455 			/* Pre-compute the checksum flags mask. */
1456 			ifp->if_csum_flags_tx = 0;
1457 			ifp->if_csum_flags_rx = 0;
1458 			if (ifp->if_capenable & IFCAP_CSUM_IPv4_Tx) {
1459 				ifp->if_csum_flags_tx |= M_CSUM_IPv4;
1460 			}
1461 			if (ifp->if_capenable & IFCAP_CSUM_IPv4_Rx) {
1462 				ifp->if_csum_flags_rx |= M_CSUM_IPv4;
1463 			}
1464 
1465 			if (ifp->if_capenable & IFCAP_CSUM_TCPv4_Tx) {
1466 				ifp->if_csum_flags_tx |= M_CSUM_TCPv4;
1467 			}
1468 			if (ifp->if_capenable & IFCAP_CSUM_TCPv4_Rx) {
1469 				ifp->if_csum_flags_rx |= M_CSUM_TCPv4;
1470 			}
1471 
1472 			if (ifp->if_capenable & IFCAP_CSUM_UDPv4_Tx) {
1473 				ifp->if_csum_flags_tx |= M_CSUM_UDPv4;
1474 			}
1475 			if (ifp->if_capenable & IFCAP_CSUM_UDPv4_Rx) {
1476 				ifp->if_csum_flags_rx |= M_CSUM_UDPv4;
1477 			}
1478 
1479 			if (ifp->if_capenable & IFCAP_CSUM_TCPv6_Tx) {
1480 				ifp->if_csum_flags_tx |= M_CSUM_TCPv6;
1481 			}
1482 			if (ifp->if_capenable & IFCAP_CSUM_TCPv6_Rx) {
1483 				ifp->if_csum_flags_rx |= M_CSUM_TCPv6;
1484 			}
1485 
1486 			if (ifp->if_capenable & IFCAP_CSUM_UDPv6_Tx) {
1487 				ifp->if_csum_flags_tx |= M_CSUM_UDPv6;
1488 			}
1489 			if (ifp->if_capenable & IFCAP_CSUM_UDPv6_Rx) {
1490 				ifp->if_csum_flags_rx |= M_CSUM_UDPv6;
1491 			}
1492 
1493 			/*
1494 			 * Only kick the interface if it's up.  If it's
1495 			 * not up now, it will notice the cap enables
1496 			 * when it is brought up later.
1497 			 */
1498 			if (ifp->if_flags & IFF_UP)
1499 				(void)(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS,
1500 				    (void *)&ifrq);
1501 		}
1502 		splx(s);
1503 		break;
1504 
1505 	case SIOCSIFMETRIC:
1506 		ifp->if_metric = ifr->ifr_metric;
1507 		break;
1508 
1509 	case SIOCGIFDATA:
1510 		ifdr->ifdr_data = ifp->if_data;
1511 		break;
1512 
1513 	case SIOCZIFDATA:
1514 		ifdr->ifdr_data = ifp->if_data;
1515 		/*
1516 		 * Assumes that the volatile counters that can be
1517 		 * zero'ed are at the end of if_data.
1518 		 */
1519 		memset(&ifp->if_data.ifi_ipackets, 0, sizeof(ifp->if_data) -
1520 		    offsetof(struct if_data, ifi_ipackets));
1521 		break;
1522 
1523 	case SIOCSIFMTU:
1524 	{
1525 		u_long oldmtu = ifp->if_mtu;
1526 
1527 		if (ifp->if_ioctl == NULL)
1528 			return EOPNOTSUPP;
1529 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1530 
1531 		/*
1532 		 * If the link MTU changed, do network layer specific procedure.
1533 		 */
1534 		if (ifp->if_mtu != oldmtu) {
1535 #ifdef INET6
1536 			nd6_setmtu(ifp);
1537 #endif
1538 		}
1539 		break;
1540 	}
1541 	case SIOCSIFPHYADDR:
1542 	case SIOCDIFPHYADDR:
1543 #ifdef INET6
1544 	case SIOCSIFPHYADDR_IN6:
1545 #endif
1546 	case SIOCSLIFPHYADDR:
1547 	case SIOCADDMULTI:
1548 	case SIOCDELMULTI:
1549 	case SIOCSIFMEDIA:
1550 	case SIOCGIFPSRCADDR:
1551 	case SIOCGIFPDSTADDR:
1552 	case SIOCGLIFPHYADDR:
1553 	case SIOCGIFMEDIA:
1554 	case SIOCG80211:
1555 	case SIOCS80211:
1556 	case SIOCS80211NWID:
1557 	case SIOCS80211NWKEY:
1558 	case SIOCS80211POWER:
1559 	case SIOCS80211BSSID:
1560 	case SIOCS80211CHANNEL:
1561 		if (ifp->if_ioctl == NULL)
1562 			return EOPNOTSUPP;
1563 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1564 		break;
1565 
1566 	case SIOCSDRVSPEC:
1567 	default:
1568 		if (so->so_proto == NULL)
1569 			return EOPNOTSUPP;
1570 #ifdef COMPAT_OSOCK
1571 		error = compat_ifioctl(so, ocmd, cmd, data, l);
1572 #else
1573 		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
1574 		    (struct mbuf *)cmd, (struct mbuf *)data,
1575 		    (struct mbuf *)ifp, l));
1576 #endif
1577 		break;
1578 	}
1579 
1580 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0) {
1581 #ifdef INET6
1582 		if ((ifp->if_flags & IFF_UP) != 0) {
1583 			s = splnet();
1584 			in6_if_up(ifp);
1585 			splx(s);
1586 		}
1587 #endif
1588 	}
1589 #ifdef COMPAT_OIFREQ
1590 	if (cmd != ocmd)
1591 		ifreqn2o(oifr, ifr);
1592 #endif
1593 
1594 	return error;
1595 }
1596 
1597 /*
1598  * Return interface configuration
1599  * of system.  List may be used
1600  * in later ioctl's (above) to get
1601  * other information.
1602  *
1603  * Each record is a struct ifreq.  Before the addition of
1604  * sockaddr_storage, the API rule was that sockaddr flavors that did
1605  * not fit would extend beyond the struct ifreq, with the next struct
1606  * ifreq starting sa_len beyond the struct sockaddr.  Because the
1607  * union in struct ifreq includes struct sockaddr_storage, every kind
1608  * of sockaddr must fit.  Thus, there are no longer any overlength
1609  * records.
1610  *
1611  * Records are added to the user buffer if they fit, and ifc_len is
1612  * adjusted to the length that was written.  Thus, the user is only
1613  * assured of getting the complete list if ifc_len on return is at
1614  * least sizeof(struct ifreq) less than it was on entry.
1615  *
1616  * If the user buffer pointer is NULL, this routine copies no data and
1617  * returns the amount of space that would be needed.
1618  *
1619  * Invariants:
1620  * ifrp points to the next part of the user's buffer to be used.  If
1621  * ifrp != NULL, space holds the number of bytes remaining that we may
1622  * write at ifrp.  Otherwise, space holds the number of bytes that
1623  * would have been written had there been adequate space.
1624  */
1625 /*ARGSUSED*/
1626 int
1627 ifconf(u_long cmd, void *data)
1628 {
1629 	struct ifconf *ifc = (struct ifconf *)data;
1630 	struct ifnet *ifp;
1631 	struct ifaddr *ifa;
1632 	struct ifreq ifr, *ifrp;
1633 	int space, error = 0;
1634 	const int sz = (int)sizeof(struct ifreq);
1635 
1636 	if ((ifrp = ifc->ifc_req) == NULL)
1637 		space = 0;
1638 	else
1639 		space = ifc->ifc_len;
1640 	IFNET_FOREACH(ifp) {
1641 		(void)strncpy(ifr.ifr_name, ifp->if_xname,
1642 		    sizeof(ifr.ifr_name));
1643 		if (ifr.ifr_name[sizeof(ifr.ifr_name) - 1] != '\0')
1644 			return ENAMETOOLONG;
1645 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
1646 			/* Interface with no addresses - send zero sockaddr. */
1647 			memset(&ifr.ifr_addr, 0, sizeof(ifr.ifr_addr));
1648 			if (ifrp != NULL)
1649 			{
1650 				if (space >= sz) {
1651 					error = copyout(&ifr, ifrp, sz);
1652 					if (error != 0)
1653 						return (error);
1654 					ifrp++; space -= sz;
1655 				}
1656 			}
1657 			else
1658 				space += sz;
1659 			continue;
1660 		}
1661 
1662 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1663 			struct sockaddr *sa = ifa->ifa_addr;
1664 			/* all sockaddrs must fit in sockaddr_storage */
1665 			KASSERT(sa->sa_len <= sizeof(ifr.ifr_ifru));
1666 
1667 			if (ifrp != NULL)
1668 			{
1669 				memcpy(&ifr.ifr_space, sa, sa->sa_len);
1670 				if (space >= sz) {
1671 					error = copyout(&ifr, ifrp, sz);
1672 					if (error != 0)
1673 						return (error);
1674 					ifrp++; space -= sz;
1675 				}
1676 			}
1677 			else
1678 				space += sz;
1679 		}
1680 	}
1681 	if (ifrp != NULL)
1682 	{
1683 		KASSERT(0 <= space && space <= ifc->ifc_len);
1684 		ifc->ifc_len -= space;
1685 	}
1686 	else
1687 	{
1688 		KASSERT(space >= 0);
1689 		ifc->ifc_len = space;
1690 	}
1691 	return (0);
1692 }
1693 
1694 int
1695 ifreq_setaddr(const u_long cmd, struct ifreq *ifr, const struct sockaddr *sa)
1696 {
1697 	uint8_t len;
1698 	u_long ncmd;
1699 	const uint8_t osockspace = sizeof(ifr->ifr_addr);
1700 	const uint8_t sockspace = sizeof(ifr->ifr_ifru.ifru_space);
1701 
1702 #ifdef INET6
1703 	if (cmd == SIOCGIFPSRCADDR_IN6 || cmd == SIOCGIFPDSTADDR_IN6)
1704 		len = MIN(sizeof(struct sockaddr_in6), sa->sa_len);
1705 	else
1706 #endif /* INET6 */
1707 	if ((ncmd = compat_cvtcmd(cmd)) != cmd)
1708 		len = MIN(osockspace, sa->sa_len);
1709 	else
1710 		len = MIN(sockspace, sa->sa_len);
1711 	if (len < sa->sa_len)
1712 		return EFBIG;
1713 	sockaddr_copy(&ifr->ifr_addr, len, sa);
1714 	return 0;
1715 }
1716 
1717 /*
1718  * Queue message on interface, and start output if interface
1719  * not yet active.
1720  */
1721 int
1722 ifq_enqueue(struct ifnet *ifp, struct mbuf *m
1723     ALTQ_COMMA ALTQ_DECL(struct altq_pktattr *pktattr))
1724 {
1725 	int len = m->m_pkthdr.len;
1726 	int mflags = m->m_flags;
1727 	int s = splnet();
1728 	int error;
1729 
1730 	IFQ_ENQUEUE(&ifp->if_snd, m, pktattr, error);
1731 	if (error != 0)
1732 		goto out;
1733 	ifp->if_obytes += len;
1734 	if (mflags & M_MCAST)
1735 		ifp->if_omcasts++;
1736 	if ((ifp->if_flags & IFF_OACTIVE) == 0)
1737 		(*ifp->if_start)(ifp);
1738 out:
1739 	splx(s);
1740 	return error;
1741 }
1742 
1743 /*
1744  * Queue message on interface, possibly using a second fast queue
1745  */
1746 int
1747 ifq_enqueue2(struct ifnet *ifp, struct ifqueue *ifq, struct mbuf *m
1748     ALTQ_COMMA ALTQ_DECL(struct altq_pktattr *pktattr))
1749 {
1750 	int error = 0;
1751 
1752 	if (ifq != NULL
1753 #ifdef ALTQ
1754 	    && ALTQ_IS_ENABLED(&ifp->if_snd) == 0
1755 #endif
1756 	    ) {
1757 		if (IF_QFULL(ifq)) {
1758 			IF_DROP(&ifp->if_snd);
1759 			m_freem(m);
1760 			if (error == 0)
1761 				error = ENOBUFS;
1762 		} else
1763 			IF_ENQUEUE(ifq, m);
1764 	} else
1765 		IFQ_ENQUEUE(&ifp->if_snd, m, pktattr, error);
1766 	if (error != 0) {
1767 		++ifp->if_oerrors;
1768 		return error;
1769 	}
1770 	return 0;
1771 }
1772 
1773 
1774 #if defined(INET) || defined(INET6)
1775 static void
1776 sysctl_net_ifq_setup(struct sysctllog **clog,
1777 		     int pf, const char *pfname,
1778 		     int ipn, const char *ipname,
1779 		     int qid, struct ifqueue *ifq)
1780 {
1781 
1782 	sysctl_createv(clog, 0, NULL, NULL,
1783 		       CTLFLAG_PERMANENT,
1784 		       CTLTYPE_NODE, "net", NULL,
1785 		       NULL, 0, NULL, 0,
1786 		       CTL_NET, CTL_EOL);
1787 	sysctl_createv(clog, 0, NULL, NULL,
1788 		       CTLFLAG_PERMANENT,
1789 		       CTLTYPE_NODE, pfname, NULL,
1790 		       NULL, 0, NULL, 0,
1791 		       CTL_NET, pf, CTL_EOL);
1792 	sysctl_createv(clog, 0, NULL, NULL,
1793 		       CTLFLAG_PERMANENT,
1794 		       CTLTYPE_NODE, ipname, NULL,
1795 		       NULL, 0, NULL, 0,
1796 		       CTL_NET, pf, ipn, CTL_EOL);
1797 	sysctl_createv(clog, 0, NULL, NULL,
1798 		       CTLFLAG_PERMANENT,
1799 		       CTLTYPE_NODE, "ifq",
1800 		       SYSCTL_DESCR("Protocol input queue controls"),
1801 		       NULL, 0, NULL, 0,
1802 		       CTL_NET, pf, ipn, qid, CTL_EOL);
1803 
1804 	sysctl_createv(clog, 0, NULL, NULL,
1805 		       CTLFLAG_PERMANENT,
1806 		       CTLTYPE_INT, "len",
1807 		       SYSCTL_DESCR("Current input queue length"),
1808 		       NULL, 0, &ifq->ifq_len, 0,
1809 		       CTL_NET, pf, ipn, qid, IFQCTL_LEN, CTL_EOL);
1810 	sysctl_createv(clog, 0, NULL, NULL,
1811 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1812 		       CTLTYPE_INT, "maxlen",
1813 		       SYSCTL_DESCR("Maximum allowed input queue length"),
1814 		       NULL, 0, &ifq->ifq_maxlen, 0,
1815 		       CTL_NET, pf, ipn, qid, IFQCTL_MAXLEN, CTL_EOL);
1816 #ifdef notyet
1817 	sysctl_createv(clog, 0, NULL, NULL,
1818 		       CTLFLAG_PERMANENT,
1819 		       CTLTYPE_INT, "peak",
1820 		       SYSCTL_DESCR("Highest input queue length"),
1821 		       NULL, 0, &ifq->ifq_peak, 0,
1822 		       CTL_NET, pf, ipn, qid, IFQCTL_PEAK, CTL_EOL);
1823 #endif
1824 	sysctl_createv(clog, 0, NULL, NULL,
1825 		       CTLFLAG_PERMANENT,
1826 		       CTLTYPE_INT, "drops",
1827 		       SYSCTL_DESCR("Packets dropped due to full input queue"),
1828 		       NULL, 0, &ifq->ifq_drops, 0,
1829 		       CTL_NET, pf, ipn, qid, IFQCTL_DROPS, CTL_EOL);
1830 }
1831 
1832 #ifdef INET
1833 SYSCTL_SETUP(sysctl_net_inet_ip_ifq_setup,
1834 	     "sysctl net.inet.ip.ifq subtree setup")
1835 {
1836 	extern struct ifqueue ipintrq;
1837 
1838 	sysctl_net_ifq_setup(clog, PF_INET, "inet", IPPROTO_IP, "ip",
1839 			     IPCTL_IFQ, &ipintrq);
1840 }
1841 #endif /* INET */
1842 
1843 #ifdef INET6
1844 SYSCTL_SETUP(sysctl_net_inet6_ip6_ifq_setup,
1845 	     "sysctl net.inet6.ip6.ifq subtree setup")
1846 {
1847 	extern struct ifqueue ip6intrq;
1848 
1849 	sysctl_net_ifq_setup(clog, PF_INET6, "inet6", IPPROTO_IPV6, "ip6",
1850 			     IPV6CTL_IFQ, &ip6intrq);
1851 }
1852 #endif /* INET6 */
1853 #endif /* INET || INET6 */
1854