xref: /openbsd-src/sys/net/if.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: if.c,v 1.297 2014/07/12 18:44:22 tedu Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "pf.h"
68 #include "trunk.h"
69 #include "ether.h"
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/mbuf.h>
74 #include <sys/pool.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/timeout.h>
78 #include <sys/tree.h>
79 #include <sys/protosw.h>
80 #include <sys/kernel.h>
81 #include <sys/ioctl.h>
82 #include <sys/domain.h>
83 #include <sys/sysctl.h>
84 #include <sys/workq.h>
85 
86 #include <net/if.h>
87 #include <net/if_dl.h>
88 #include <net/if_media.h>
89 #include <net/if_types.h>
90 #include <net/route.h>
91 #include <net/netisr.h>
92 
93 #ifdef INET
94 #include <netinet/in.h>
95 #include <netinet/if_ether.h>
96 #include <netinet/igmp.h>
97 #ifdef MROUTING
98 #include <netinet/ip_mroute.h>
99 #endif
100 #endif
101 
102 #ifdef INET6
103 #ifndef INET
104 #include <netinet/in.h>
105 #endif
106 #include <netinet6/in6_var.h>
107 #include <netinet6/in6_ifattach.h>
108 #include <netinet6/nd6.h>
109 #include <netinet/ip6.h>
110 #include <netinet6/ip6_var.h>
111 #endif
112 
113 #ifdef MPLS
114 #include <netmpls/mpls.h>
115 #endif
116 
117 #if NBPFILTER > 0
118 #include <net/bpf.h>
119 #endif
120 
121 #if NBRIDGE > 0
122 #include <net/if_bridge.h>
123 #endif
124 
125 #if NCARP > 0
126 #include <netinet/ip_carp.h>
127 #endif
128 
129 #if NPF > 0
130 #include <net/pfvar.h>
131 #endif
132 
133 void	if_attachsetup(struct ifnet *);
134 void	if_attachdomain1(struct ifnet *);
135 void	if_attach_common(struct ifnet *);
136 
137 void	if_detach_queues(struct ifnet *, struct ifqueue *);
138 void	if_detached_start(struct ifnet *);
139 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
140 void	if_detached_watchdog(struct ifnet *);
141 
142 int	if_getgroup(caddr_t, struct ifnet *);
143 int	if_getgroupmembers(caddr_t);
144 int	if_getgroupattribs(caddr_t);
145 int	if_setgroupattribs(caddr_t);
146 
147 int	if_clone_list(struct if_clonereq *);
148 struct if_clone	*if_clone_lookup(const char *, int *);
149 
150 void	if_congestion_clear(void *);
151 int	if_group_egress_build(void);
152 
153 void	if_link_state_change_task(void *, void *);
154 
155 struct ifaddr_item {
156 	RB_ENTRY(ifaddr_item)	 ifai_entry;
157 	struct sockaddr		*ifai_addr;
158 	struct ifaddr		*ifai_ifa;
159 	struct ifaddr_item	*ifai_next;
160 	u_int			 ifai_rdomain;
161 };
162 
163 int	ifai_cmp(struct ifaddr_item *,  struct ifaddr_item *);
164 void	ifa_item_insert(struct sockaddr *, struct ifaddr *, struct ifnet *);
165 void	ifa_item_remove(struct sockaddr *, struct ifaddr *, struct ifnet *);
166 #ifndef SMALL_KERNEL
167 void	ifa_print_rb(void);
168 #endif
169 
170 RB_HEAD(ifaddr_items, ifaddr_item) ifaddr_items = RB_INITIALIZER(&ifaddr_items);
171 RB_PROTOTYPE(ifaddr_items, ifaddr_item, ifai_entry, ifai_cmp);
172 RB_GENERATE(ifaddr_items, ifaddr_item, ifai_entry, ifai_cmp);
173 
174 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
175 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
176 int if_cloners_count;
177 
178 struct pool ifaddr_item_pl;
179 
180 struct timeout net_tick_to;
181 void	net_tick(void *);
182 int	net_livelocked(void);
183 
184 /*
185  * Network interface utility routines.
186  *
187  * Routines with ifa_ifwith* names take sockaddr *'s as
188  * parameters.
189  */
190 void
191 ifinit()
192 {
193 	static struct timeout if_slowtim;
194 
195 	pool_init(&ifaddr_item_pl, sizeof(struct ifaddr_item), 0, 0, 0,
196 	    "ifaddritem", NULL);
197 
198 	timeout_set(&if_slowtim, if_slowtimo, &if_slowtim);
199 	timeout_set(&net_tick_to, net_tick, &net_tick_to);
200 
201 	if_slowtimo(&if_slowtim);
202 	net_tick(&net_tick_to);
203 }
204 
205 static unsigned int if_index = 0;
206 static unsigned int if_indexlim = 0;
207 struct ifnet **ifindex2ifnet = NULL;
208 struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
209 struct ifnet_head iftxlist = TAILQ_HEAD_INITIALIZER(iftxlist);
210 struct ifnet *lo0ifp;
211 
212 /*
213  * Attach an interface to the
214  * list of "active" interfaces.
215  */
216 void
217 if_attachsetup(struct ifnet *ifp)
218 {
219 	int wrapped = 0;
220 
221 	/*
222 	 * Always increment the index to avoid races.
223 	 */
224 	if_index++;
225 
226 	/*
227 	 * If we hit USHRT_MAX, we skip back to 1 since there are a
228 	 * number of places where the value of ifp->if_index or
229 	 * if_index itself is compared to or stored in an unsigned
230 	 * short.  By jumping back, we won't botch those assignments
231 	 * or comparisons.
232 	 */
233 	if (if_index == USHRT_MAX) {
234 		if_index = 1;
235 		wrapped++;
236 	}
237 
238 	while (if_index < if_indexlim && ifindex2ifnet[if_index] != NULL) {
239 		if_index++;
240 
241 		if (if_index == USHRT_MAX) {
242 			/*
243 			 * If we have to jump back to 1 twice without
244 			 * finding an empty slot then there are too many
245 			 * interfaces.
246 			 */
247 			if (wrapped)
248 				panic("too many interfaces");
249 
250 			if_index = 1;
251 			wrapped++;
252 		}
253 	}
254 	ifp->if_index = if_index;
255 
256 	/*
257 	 * We have some arrays that should be indexed by if_index.
258 	 * since if_index will grow dynamically, they should grow too.
259 	 *	struct ifnet **ifindex2ifnet
260 	 */
261 	if (ifindex2ifnet == NULL || if_index >= if_indexlim) {
262 		size_t m, n, oldlim;
263 		caddr_t q;
264 
265 		oldlim = if_indexlim;
266 		if (if_indexlim == 0)
267 			if_indexlim = 8;
268 		while (if_index >= if_indexlim)
269 			if_indexlim <<= 1;
270 
271 		/* grow ifindex2ifnet */
272 		m = oldlim * sizeof(struct ifnet *);
273 		n = if_indexlim * sizeof(struct ifnet *);
274 		q = (caddr_t)malloc(n, M_IFADDR, M_WAITOK|M_ZERO);
275 		if (ifindex2ifnet) {
276 			bcopy((caddr_t)ifindex2ifnet, q, m);
277 			free((caddr_t)ifindex2ifnet, M_IFADDR, 0);
278 		}
279 		ifindex2ifnet = (struct ifnet **)q;
280 	}
281 
282 	TAILQ_INIT(&ifp->if_groups);
283 
284 	if_addgroup(ifp, IFG_ALL);
285 
286 	ifindex2ifnet[if_index] = ifp;
287 
288 	if (ifp->if_snd.ifq_maxlen == 0)
289 		IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
290 
291 	if (domains)
292 		if_attachdomain1(ifp);
293 #if NPF > 0
294 	pfi_attach_ifnet(ifp);
295 #endif
296 
297 	/* Announce the interface. */
298 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
299 }
300 
301 /*
302  * Allocate the link level name for the specified interface.  This
303  * is an attachment helper.  It must be called after ifp->if_addrlen
304  * is initialized, which may not be the case when if_attach() is
305  * called.
306  */
307 void
308 if_alloc_sadl(struct ifnet *ifp)
309 {
310 	unsigned int socksize, ifasize;
311 	int namelen, masklen;
312 	struct sockaddr_dl *sdl;
313 	struct ifaddr *ifa;
314 
315 	/*
316 	 * If the interface already has a link name, release it
317 	 * now.  This is useful for interfaces that can change
318 	 * link types, and thus switch link names often.
319 	 */
320 	if (ifp->if_sadl != NULL)
321 		if_free_sadl(ifp);
322 
323 	namelen = strlen(ifp->if_xname);
324 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
325 	socksize = masklen + ifp->if_addrlen;
326 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
327 	if (socksize < sizeof(*sdl))
328 		socksize = sizeof(*sdl);
329 	socksize = ROUNDUP(socksize);
330 	ifasize = sizeof(*ifa) + socksize;
331 	ifa = malloc(ifasize, M_IFADDR, M_WAITOK|M_ZERO);
332 	sdl = (struct sockaddr_dl *)(ifa + 1);
333 	sdl->sdl_len = socksize;
334 	sdl->sdl_family = AF_LINK;
335 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
336 	sdl->sdl_nlen = namelen;
337 	sdl->sdl_alen = ifp->if_addrlen;
338 	sdl->sdl_index = ifp->if_index;
339 	sdl->sdl_type = ifp->if_type;
340 	ifp->if_lladdr = ifa;
341 	ifa->ifa_ifp = ifp;
342 	ifa->ifa_rtrequest = link_rtrequest;
343 	ifa->ifa_addr = (struct sockaddr *)sdl;
344 	ifp->if_sadl = sdl;
345 	ifa->ifa_netmask = NULL;
346 }
347 
348 /*
349  * Free the link level name for the specified interface.  This is
350  * a detach helper.  This is called from if_detach() or from
351  * link layer type specific detach functions.
352  */
353 void
354 if_free_sadl(struct ifnet *ifp)
355 {
356 	struct ifaddr *ifa;
357 	int s;
358 
359 	ifa = ifp->if_lladdr;
360 	if (ifa == NULL)
361 		return;
362 
363 	s = splnet();
364 	rt_ifa_del(ifa, 0, ifa->ifa_addr);
365 	ifafree(ifp->if_lladdr);
366 	ifp->if_lladdr = NULL;
367 	ifp->if_sadl = NULL;
368 	splx(s);
369 }
370 
371 void
372 if_attachdomain()
373 {
374 	struct ifnet *ifp;
375 	int s;
376 
377 	s = splnet();
378 	TAILQ_FOREACH(ifp, &ifnet, if_list)
379 		if_attachdomain1(ifp);
380 	splx(s);
381 }
382 
383 void
384 if_attachdomain1(struct ifnet *ifp)
385 {
386 	struct domain *dp;
387 	int s;
388 
389 	s = splnet();
390 
391 	/* address family dependent data region */
392 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
393 	for (dp = domains; dp; dp = dp->dom_next) {
394 		if (dp->dom_ifattach)
395 			ifp->if_afdata[dp->dom_family] =
396 			    (*dp->dom_ifattach)(ifp);
397 	}
398 
399 	splx(s);
400 }
401 
402 void
403 if_attachhead(struct ifnet *ifp)
404 {
405 	if_attach_common(ifp);
406 	TAILQ_INSERT_HEAD(&ifnet, ifp, if_list);
407 	if_attachsetup(ifp);
408 }
409 
410 void
411 if_attach(struct ifnet *ifp)
412 {
413 #if NCARP > 0
414 	struct ifnet *before = NULL;
415 #endif
416 
417 	if_attach_common(ifp);
418 
419 #if NCARP > 0
420 	if (ifp->if_type != IFT_CARP)
421 		TAILQ_FOREACH(before, &ifnet, if_list)
422 			if (before->if_type == IFT_CARP)
423 				break;
424 	if (before == NULL)
425 		TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
426 	else
427 		TAILQ_INSERT_BEFORE(before, ifp, if_list);
428 #else
429 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
430 #endif
431 #ifdef INET6
432 	ifp->if_xflags |= IFXF_NOINET6;
433 #endif
434 
435 	if_attachsetup(ifp);
436 }
437 
438 void
439 if_attach_common(struct ifnet *ifp)
440 {
441 	TAILQ_INIT(&ifp->if_addrlist);
442 	TAILQ_INIT(&ifp->if_maddrlist);
443 
444 	ifp->if_addrhooks = malloc(sizeof(*ifp->if_addrhooks),
445 	    M_TEMP, M_WAITOK);
446 	TAILQ_INIT(ifp->if_addrhooks);
447 	ifp->if_linkstatehooks = malloc(sizeof(*ifp->if_linkstatehooks),
448 	    M_TEMP, M_WAITOK);
449 	TAILQ_INIT(ifp->if_linkstatehooks);
450 	ifp->if_detachhooks = malloc(sizeof(*ifp->if_detachhooks),
451 	    M_TEMP, M_WAITOK);
452 	TAILQ_INIT(ifp->if_detachhooks);
453 }
454 
455 void
456 if_start(struct ifnet *ifp)
457 {
458 
459 	splassert(IPL_NET);
460 
461 	if (ifp->if_snd.ifq_len >= min(8, ifp->if_snd.ifq_maxlen) &&
462 	    !ISSET(ifp->if_flags, IFF_OACTIVE)) {
463 		if (ISSET(ifp->if_xflags, IFXF_TXREADY)) {
464 			TAILQ_REMOVE(&iftxlist, ifp, if_txlist);
465 			CLR(ifp->if_xflags, IFXF_TXREADY);
466 		}
467 		ifp->if_start(ifp);
468 		return;
469 	}
470 
471 	if (!ISSET(ifp->if_xflags, IFXF_TXREADY)) {
472 		SET(ifp->if_xflags, IFXF_TXREADY);
473 		TAILQ_INSERT_TAIL(&iftxlist, ifp, if_txlist);
474 		schednetisr(NETISR_TX);
475 	}
476 }
477 
478 void
479 nettxintr(void)
480 {
481 	struct ifnet *ifp;
482 	int s;
483 
484 	s = splnet();
485 	while ((ifp = TAILQ_FIRST(&iftxlist)) != NULL) {
486 		TAILQ_REMOVE(&iftxlist, ifp, if_txlist);
487 		CLR(ifp->if_xflags, IFXF_TXREADY);
488 		ifp->if_start(ifp);
489 	}
490 	splx(s);
491 }
492 
493 /*
494  * Detach an interface from everything in the kernel.  Also deallocate
495  * private resources.
496  */
497 void
498 if_detach(struct ifnet *ifp)
499 {
500 	struct ifaddr *ifa;
501 	struct ifg_list *ifg;
502 	int s = splnet();
503 	struct domain *dp;
504 
505 	ifp->if_flags &= ~IFF_OACTIVE;
506 	ifp->if_start = if_detached_start;
507 	ifp->if_ioctl = if_detached_ioctl;
508 	ifp->if_watchdog = if_detached_watchdog;
509 
510 	/*
511 	 * Call detach hooks from head to tail.  To make sure detach
512 	 * hooks are executed in the reverse order they were added, all
513 	 * the hooks have to be added to the head!
514 	 */
515 	dohooks(ifp->if_detachhooks, HOOK_REMOVE | HOOK_FREE);
516 
517 #if NBRIDGE > 0
518 	/* Remove the interface from any bridge it is part of.  */
519 	if (ifp->if_bridgeport)
520 		bridge_ifdetach(ifp);
521 #endif
522 
523 #if NCARP > 0
524 	/* Remove the interface from any carp group it is a part of.  */
525 	if (ifp->if_carp && ifp->if_type != IFT_CARP)
526 		carp_ifdetach(ifp);
527 #endif
528 
529 #if NBPFILTER > 0
530 	bpfdetach(ifp);
531 #endif
532 	rt_if_remove(ifp);
533 #ifdef INET
534 	rti_delete(ifp);
535 #if NETHER > 0 && defined(NFSCLIENT)
536 	if (ifp == revarp_ifp)
537 		revarp_ifp = NULL;
538 #endif
539 #ifdef MROUTING
540 	vif_delete(ifp);
541 #endif
542 #endif
543 #ifdef INET
544 	in_ifdetach(ifp);
545 #endif
546 #ifdef INET6
547 	in6_ifdetach(ifp);
548 #endif
549 
550 #if NPF > 0
551 	pfi_detach_ifnet(ifp);
552 #endif
553 
554 	/*
555 	 * remove packets came from ifp, from software interrupt queues.
556 	 * net/netisr_dispatch.h is not usable, as some of them use
557 	 * strange queue names.
558 	 */
559 #define IF_DETACH_QUEUES(x) \
560 do { \
561 	extern struct ifqueue x; \
562 	if_detach_queues(ifp, & x); \
563 } while (0)
564 #ifdef INET
565 	IF_DETACH_QUEUES(arpintrq);
566 	IF_DETACH_QUEUES(ipintrq);
567 #endif
568 #ifdef INET6
569 	IF_DETACH_QUEUES(ip6intrq);
570 #endif
571 #undef IF_DETACH_QUEUES
572 
573 	/*
574 	 * XXX transient ifp refs?  inpcb.ip_moptions.imo_multicast_ifp?
575 	 * Other network stacks than INET?
576 	 */
577 
578 	/* Remove the interface from the list of all interfaces.  */
579 	TAILQ_REMOVE(&ifnet, ifp, if_list);
580 	if (ISSET(ifp->if_xflags, IFXF_TXREADY))
581 		TAILQ_REMOVE(&iftxlist, ifp, if_txlist);
582 
583 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
584 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
585 
586 	if_free_sadl(ifp);
587 
588 	/* We should not have any address left at this point. */
589 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
590 #ifdef DIAGNOSTIC
591 		printf("%s: address list non empty\n", ifp->if_xname);
592 #endif
593 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
594 			ifa_del(ifp, ifa);
595 			ifa->ifa_ifp = NULL;
596 			ifafree(ifa);
597 		}
598 	}
599 
600 	free(ifp->if_addrhooks, M_TEMP, 0);
601 	free(ifp->if_linkstatehooks, M_TEMP, 0);
602 	free(ifp->if_detachhooks, M_TEMP, 0);
603 
604 	for (dp = domains; dp; dp = dp->dom_next) {
605 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
606 			(*dp->dom_ifdetach)(ifp,
607 			    ifp->if_afdata[dp->dom_family]);
608 	}
609 
610 	/* Announce that the interface is gone. */
611 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
612 
613 	ifindex2ifnet[ifp->if_index] = NULL;
614 	splx(s);
615 }
616 
617 void
618 if_detach_queues(struct ifnet *ifp, struct ifqueue *q)
619 {
620 	struct mbuf *m, *prev = NULL, *next;
621 	int prio;
622 
623 	for (prio = 0; prio <= IFQ_MAXPRIO; prio++) {
624 		for (m = q->ifq_q[prio].head; m; m = next) {
625 			next = m->m_nextpkt;
626 #ifdef DIAGNOSTIC
627 			if ((m->m_flags & M_PKTHDR) == 0) {
628 				prev = m;
629 				continue;
630 			}
631 #endif
632 			if (m->m_pkthdr.rcvif != ifp) {
633 				prev = m;
634 				continue;
635 			}
636 
637 			if (prev)
638 				prev->m_nextpkt = m->m_nextpkt;
639 			else
640 				q->ifq_q[prio].head = m->m_nextpkt;
641 			if (q->ifq_q[prio].tail == m)
642 				q->ifq_q[prio].tail = prev;
643 			q->ifq_len--;
644 
645 			m->m_nextpkt = NULL;
646 			m_freem(m);
647 			IF_DROP(q);
648 		}
649 	}
650 }
651 
652 /*
653  * Create a clone network interface.
654  */
655 int
656 if_clone_create(const char *name)
657 {
658 	struct if_clone *ifc;
659 	struct ifnet *ifp;
660 	int unit, ret;
661 
662 	ifc = if_clone_lookup(name, &unit);
663 	if (ifc == NULL)
664 		return (EINVAL);
665 
666 	if (ifunit(name) != NULL)
667 		return (EEXIST);
668 
669 	if ((ret = (*ifc->ifc_create)(ifc, unit)) == 0 &&
670 	    (ifp = ifunit(name)) != NULL)
671 		if_addgroup(ifp, ifc->ifc_name);
672 
673 	return (ret);
674 }
675 
676 /*
677  * Destroy a clone network interface.
678  */
679 int
680 if_clone_destroy(const char *name)
681 {
682 	struct if_clone *ifc;
683 	struct ifnet *ifp;
684 	int s;
685 
686 	ifc = if_clone_lookup(name, NULL);
687 	if (ifc == NULL)
688 		return (EINVAL);
689 
690 	ifp = ifunit(name);
691 	if (ifp == NULL)
692 		return (ENXIO);
693 
694 	if (ifc->ifc_destroy == NULL)
695 		return (EOPNOTSUPP);
696 
697 	if (ifp->if_flags & IFF_UP) {
698 		s = splnet();
699 		if_down(ifp);
700 		splx(s);
701 	}
702 
703 	return ((*ifc->ifc_destroy)(ifp));
704 }
705 
706 /*
707  * Look up a network interface cloner.
708  */
709 struct if_clone *
710 if_clone_lookup(const char *name, int *unitp)
711 {
712 	struct if_clone *ifc;
713 	const char *cp;
714 	int unit;
715 
716 	/* separate interface name from unit */
717 	for (cp = name;
718 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
719 	    cp++)
720 		continue;
721 
722 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
723 		return (NULL);	/* No name or unit number */
724 
725 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
726 		return (NULL);	/* unit number 0 padded */
727 
728 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
729 		if (strlen(ifc->ifc_name) == cp - name &&
730 		    !strncmp(name, ifc->ifc_name, cp - name))
731 			break;
732 	}
733 
734 	if (ifc == NULL)
735 		return (NULL);
736 
737 	unit = 0;
738 	while (cp - name < IFNAMSIZ && *cp) {
739 		if (*cp < '0' || *cp > '9' ||
740 		    unit > (INT_MAX - (*cp - '0')) / 10) {
741 			/* Bogus unit number. */
742 			return (NULL);
743 		}
744 		unit = (unit * 10) + (*cp++ - '0');
745 	}
746 
747 	if (unitp != NULL)
748 		*unitp = unit;
749 	return (ifc);
750 }
751 
752 /*
753  * Register a network interface cloner.
754  */
755 void
756 if_clone_attach(struct if_clone *ifc)
757 {
758 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
759 	if_cloners_count++;
760 }
761 
762 /*
763  * Unregister a network interface cloner.
764  */
765 void
766 if_clone_detach(struct if_clone *ifc)
767 {
768 
769 	LIST_REMOVE(ifc, ifc_list);
770 	if_cloners_count--;
771 }
772 
773 /*
774  * Provide list of interface cloners to userspace.
775  */
776 int
777 if_clone_list(struct if_clonereq *ifcr)
778 {
779 	char outbuf[IFNAMSIZ], *dst;
780 	struct if_clone *ifc;
781 	int count, error = 0;
782 
783 	ifcr->ifcr_total = if_cloners_count;
784 	if ((dst = ifcr->ifcr_buffer) == NULL) {
785 		/* Just asking how many there are. */
786 		return (0);
787 	}
788 
789 	if (ifcr->ifcr_count < 0)
790 		return (EINVAL);
791 
792 	count = (if_cloners_count < ifcr->ifcr_count) ?
793 	    if_cloners_count : ifcr->ifcr_count;
794 
795 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
796 		if (count == 0)
797 			break;
798 		bzero(outbuf, sizeof outbuf);
799 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
800 		error = copyout(outbuf, dst, IFNAMSIZ);
801 		if (error)
802 			break;
803 		count--;
804 		dst += IFNAMSIZ;
805 	}
806 
807 	return (error);
808 }
809 
810 /*
811  * set queue congestion marker and register timeout to clear it
812  */
813 void
814 if_congestion(struct ifqueue *ifq)
815 {
816 	/* Not currently needed, all callers check this */
817 	if (ifq->ifq_congestion)
818 		return;
819 
820 	ifq->ifq_congestion = malloc(sizeof(struct timeout), M_TEMP, M_NOWAIT);
821 	if (ifq->ifq_congestion == NULL)
822 		return;
823 	timeout_set(ifq->ifq_congestion, if_congestion_clear, ifq);
824 	timeout_add(ifq->ifq_congestion, hz / 100);
825 }
826 
827 /*
828  * clear the congestion flag
829  */
830 void
831 if_congestion_clear(void *arg)
832 {
833 	struct ifqueue *ifq = arg;
834 	struct timeout *to = ifq->ifq_congestion;
835 
836 	ifq->ifq_congestion = NULL;
837 	free(to, M_TEMP, 0);
838 }
839 
840 /*
841  * Locate an interface based on a complete address.
842  */
843 /*ARGSUSED*/
844 struct ifaddr *
845 ifa_ifwithaddr(struct sockaddr *addr, u_int rtableid)
846 {
847 	struct ifaddr_item *ifai, key;
848 
849 	bzero(&key, sizeof(key));
850 	key.ifai_addr = addr;
851 	key.ifai_rdomain = rtable_l2(rtableid);
852 
853 	ifai = RB_FIND(ifaddr_items, &ifaddr_items, &key);
854 	if (ifai)
855 		return (ifai->ifai_ifa);
856 	return (NULL);
857 }
858 
859 #define	equal(a1, a2)	\
860 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
861 	((struct sockaddr *)(a1))->sa_len) == 0)
862 
863 /*
864  * Locate the point to point interface with a given destination address.
865  */
866 /*ARGSUSED*/
867 struct ifaddr *
868 ifa_ifwithdstaddr(struct sockaddr *addr, u_int rdomain)
869 {
870 	struct ifnet *ifp;
871 	struct ifaddr *ifa;
872 
873 	rdomain = rtable_l2(rdomain);
874 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
875 		if (ifp->if_rdomain != rdomain)
876 			continue;
877 		if (ifp->if_flags & IFF_POINTOPOINT)
878 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
879 				if (ifa->ifa_addr->sa_family !=
880 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
881 					continue;
882 				if (equal(addr, ifa->ifa_dstaddr))
883 					return (ifa);
884 			}
885 	}
886 	return (NULL);
887 }
888 
889 /*
890  * Find an interface on a specific network.  If many, choice
891  * is most specific found.
892  */
893 struct ifaddr *
894 ifa_ifwithnet(struct sockaddr *sa, u_int rtableid)
895 {
896 	struct ifnet *ifp;
897 	struct ifaddr *ifa, *ifa_maybe = NULL;
898 	char *cplim, *addr_data = sa->sa_data;
899 	u_int rdomain;
900 
901 	rdomain = rtable_l2(rtableid);
902 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
903 		if (ifp->if_rdomain != rdomain)
904 			continue;
905 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
906 			char *cp, *cp2, *cp3;
907 
908 			if (ifa->ifa_addr->sa_family != sa->sa_family ||
909 			    ifa->ifa_netmask == 0)
910 				next: continue;
911 			cp = addr_data;
912 			cp2 = ifa->ifa_addr->sa_data;
913 			cp3 = ifa->ifa_netmask->sa_data;
914 			cplim = (char *)ifa->ifa_netmask +
915 				ifa->ifa_netmask->sa_len;
916 			while (cp3 < cplim)
917 				if ((*cp++ ^ *cp2++) & *cp3++)
918 				    /* want to continue for() loop */
919 					goto next;
920 			if (ifa_maybe == 0 ||
921 			    rn_refines((caddr_t)ifa->ifa_netmask,
922 			    (caddr_t)ifa_maybe->ifa_netmask))
923 				ifa_maybe = ifa;
924 		}
925 	}
926 	return (ifa_maybe);
927 }
928 
929 /*
930  * Find an interface address specific to an interface best matching
931  * a given address.
932  */
933 struct ifaddr *
934 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
935 {
936 	struct ifaddr *ifa;
937 	char *cp, *cp2, *cp3;
938 	char *cplim;
939 	struct ifaddr *ifa_maybe = NULL;
940 	u_int af = addr->sa_family;
941 
942 	if (af >= AF_MAX)
943 		return (NULL);
944 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
945 		if (ifa->ifa_addr->sa_family != af)
946 			continue;
947 		if (ifa_maybe == NULL)
948 			ifa_maybe = ifa;
949 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
950 			if (equal(addr, ifa->ifa_addr) ||
951 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
952 				return (ifa);
953 			continue;
954 		}
955 		cp = addr->sa_data;
956 		cp2 = ifa->ifa_addr->sa_data;
957 		cp3 = ifa->ifa_netmask->sa_data;
958 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
959 		for (; cp3 < cplim; cp3++)
960 			if ((*cp++ ^ *cp2++) & *cp3)
961 				break;
962 		if (cp3 == cplim)
963 			return (ifa);
964 	}
965 	return (ifa_maybe);
966 }
967 
968 /*
969  * Default action when installing a route with a Link Level gateway.
970  * Lookup an appropriate real ifa to point to.
971  * This should be moved to /sys/net/link.c eventually.
972  */
973 void
974 link_rtrequest(int cmd, struct rtentry *rt)
975 {
976 	struct ifaddr *ifa;
977 	struct sockaddr *dst;
978 	struct ifnet *ifp;
979 
980 	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
981 	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
982 		return;
983 	if ((ifa = ifaof_ifpforaddr(dst, ifp)) != NULL) {
984 		ifa->ifa_refcnt++;
985 		ifafree(rt->rt_ifa);
986 		rt->rt_ifa = ifa;
987 		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
988 			ifa->ifa_rtrequest(cmd, rt);
989 	}
990 }
991 
992 /*
993  * Default action when installing a local route on a point-to-point
994  * interface.
995  */
996 void
997 p2p_rtrequest(int req, struct rtentry *rt)
998 {
999 	struct ifnet *ifp = rt->rt_ifp;
1000 	struct ifaddr *ifa, *lo0ifa;
1001 
1002 	switch (req) {
1003 	case RTM_ADD:
1004 		/*
1005 		 * XXX Here we abuse RTF_LLINFO to add a route to
1006 		 * loopback.  We do that to always have a route
1007 		 * pointing to our address.
1008 		 */
1009 		if ((rt->rt_flags & RTF_LLINFO) == 0)
1010 			break;
1011 
1012 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1013 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1014 			    rt_key(rt)->sa_len) == 0)
1015 				break;
1016 		}
1017 
1018 		if (ifa == NULL)
1019 			break;
1020 
1021 		/*
1022 		 * XXX Since lo0 is in the default rdomain we should not
1023 		 * (ab)use it for any route related to an interface of a
1024 		 * different rdomain.
1025 		 */
1026 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list)
1027 			if (lo0ifa->ifa_addr->sa_family ==
1028 			    ifa->ifa_addr->sa_family)
1029 				break;
1030 
1031 		if (lo0ifa == NULL)
1032 			break;
1033 
1034 		rt_setgate(rt, rt_key(rt), lo0ifa->ifa_addr, ifp->if_rdomain);
1035 		rt->rt_ifp = lo0ifp;
1036 		rt->rt_flags &= ~RTF_LLINFO;
1037 
1038 		/*
1039 		 * make sure to set rt->rt_ifa to the interface
1040 		 * address we are using, otherwise we will have trouble
1041 		 * with source address selection.
1042 		 */
1043 		if (ifa != rt->rt_ifa) {
1044 			ifafree(rt->rt_ifa);
1045 			ifa->ifa_refcnt++;
1046 			rt->rt_ifa = ifa;
1047 		}
1048 		break;
1049 	case RTM_DELETE:
1050 	case RTM_RESOLVE:
1051 	default:
1052 		break;
1053 	}
1054 }
1055 
1056 
1057 /*
1058  * Bring down all interfaces
1059  */
1060 void
1061 if_downall(void)
1062 {
1063 	struct ifreq ifrq;	/* XXX only partly built */
1064 	struct ifnet *ifp;
1065 	int s;
1066 
1067 	s = splnet();
1068 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1069 		if ((ifp->if_flags & IFF_UP) == 0)
1070 			continue;
1071 		if_down(ifp);
1072 		ifp->if_flags &= ~IFF_UP;
1073 
1074 		if (ifp->if_ioctl) {
1075 			ifrq.ifr_flags = ifp->if_flags;
1076 			(void) (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS,
1077 			    (caddr_t)&ifrq);
1078 		}
1079 	}
1080 	splx(s);
1081 }
1082 
1083 /*
1084  * Mark an interface down and notify protocols of
1085  * the transition.
1086  * NOTE: must be called at splsoftnet or equivalent.
1087  */
1088 void
1089 if_down(struct ifnet *ifp)
1090 {
1091 	struct ifaddr *ifa;
1092 
1093 	splsoftassert(IPL_SOFTNET);
1094 
1095 	ifp->if_flags &= ~IFF_UP;
1096 	microtime(&ifp->if_lastchange);
1097 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1098 		pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1099 	}
1100 	IFQ_PURGE(&ifp->if_snd);
1101 #if NCARP > 0
1102 	if (ifp->if_carp)
1103 		carp_carpdev_state(ifp);
1104 #endif
1105 #if NBRIDGE > 0
1106 	if (ifp->if_bridgeport)
1107 		bstp_ifstate(ifp);
1108 #endif
1109 	rt_ifmsg(ifp);
1110 #ifndef SMALL_KERNEL
1111 	rt_if_track(ifp);
1112 #endif
1113 }
1114 
1115 /*
1116  * Mark an interface up and notify protocols of
1117  * the transition.
1118  * NOTE: must be called at splsoftnet or equivalent.
1119  */
1120 void
1121 if_up(struct ifnet *ifp)
1122 {
1123 	splsoftassert(IPL_SOFTNET);
1124 
1125 	ifp->if_flags |= IFF_UP;
1126 	microtime(&ifp->if_lastchange);
1127 #if NCARP > 0
1128 	if (ifp->if_carp)
1129 		carp_carpdev_state(ifp);
1130 #endif
1131 #if NBRIDGE > 0
1132 	if (ifp->if_bridgeport)
1133 		bstp_ifstate(ifp);
1134 #endif
1135 	rt_ifmsg(ifp);
1136 #ifdef INET6
1137 	if (!(ifp->if_xflags & IFXF_NOINET6))
1138 		in6_if_up(ifp);
1139 #endif
1140 
1141 #ifndef SMALL_KERNEL
1142 	rt_if_track(ifp);
1143 #endif
1144 }
1145 
1146 /*
1147  * Schedule a link state change task.
1148  */
1149 void
1150 if_link_state_change(struct ifnet *ifp)
1151 {
1152 	/* try to put the routing table update task on syswq */
1153 	workq_add_task(NULL, 0, if_link_state_change_task,
1154 	    (void *)((unsigned long)ifp->if_index), NULL);
1155 }
1156 
1157 /*
1158  * Process a link state change.
1159  */
1160 void
1161 if_link_state_change_task(void *arg, void *unused)
1162 {
1163 	unsigned int index = (unsigned long)arg;
1164 	struct ifnet *ifp;
1165 	int s;
1166 
1167 	s = splsoftnet();
1168 	if ((ifp = if_get(index)) != NULL) {
1169 		rt_ifmsg(ifp);
1170 #ifndef SMALL_KERNEL
1171 		rt_if_track(ifp);
1172 #endif
1173 		dohooks(ifp->if_linkstatehooks, 0);
1174 	}
1175 	splx(s);
1176 }
1177 
1178 /*
1179  * Handle interface watchdog timer routines.  Called
1180  * from softclock, we decrement timers (if set) and
1181  * call the appropriate interface routine on expiration.
1182  */
1183 void
1184 if_slowtimo(void *arg)
1185 {
1186 	struct timeout *to = (struct timeout *)arg;
1187 	struct ifnet *ifp;
1188 	int s = splnet();
1189 
1190 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1191 		if (ifp->if_timer == 0 || --ifp->if_timer)
1192 			continue;
1193 		if (ifp->if_watchdog)
1194 			(*ifp->if_watchdog)(ifp);
1195 	}
1196 	splx(s);
1197 	timeout_add(to, hz / IFNET_SLOWHZ);
1198 }
1199 
1200 /*
1201  * Map interface name to interface structure pointer.
1202  */
1203 struct ifnet *
1204 ifunit(const char *name)
1205 {
1206 	struct ifnet *ifp;
1207 
1208 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1209 		if (strcmp(ifp->if_xname, name) == 0)
1210 			return (ifp);
1211 	}
1212 	return (NULL);
1213 }
1214 
1215 /*
1216  * Map interface index to interface structure pointer.
1217  */
1218 struct ifnet *
1219 if_get(unsigned int index)
1220 {
1221 	struct ifnet *ifp = NULL;
1222 
1223 	if (index < if_indexlim)
1224 		ifp = ifindex2ifnet[index];
1225 
1226 	return (ifp);
1227 }
1228 
1229 /*
1230  * Interface ioctls.
1231  */
1232 int
1233 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1234 {
1235 	struct ifnet *ifp;
1236 	struct ifreq *ifr;
1237 	struct ifaddr *ifa;
1238 	struct sockaddr_dl *sdl;
1239 	struct ifgroupreq *ifgr;
1240 	char ifdescrbuf[IFDESCRSIZE];
1241 	char ifrtlabelbuf[RTLABEL_LEN];
1242 	int s, error = 0;
1243 	size_t bytesdone;
1244 	short oif_flags;
1245 	const char *label;
1246 	short up = 0;
1247 
1248 	switch (cmd) {
1249 
1250 	case SIOCGIFCONF:
1251 	case OSIOCGIFCONF:
1252 		return (ifconf(cmd, data));
1253 	}
1254 	ifr = (struct ifreq *)data;
1255 
1256 	switch (cmd) {
1257 	case SIOCIFCREATE:
1258 	case SIOCIFDESTROY:
1259 		if ((error = suser(p, 0)) != 0)
1260 			return (error);
1261 		return ((cmd == SIOCIFCREATE) ?
1262 		    if_clone_create(ifr->ifr_name) :
1263 		    if_clone_destroy(ifr->ifr_name));
1264 	case SIOCIFGCLONERS:
1265 		return (if_clone_list((struct if_clonereq *)data));
1266 	case SIOCGIFGMEMB:
1267 		return (if_getgroupmembers(data));
1268 	case SIOCGIFGATTR:
1269 		return (if_getgroupattribs(data));
1270 	case SIOCSIFGATTR:
1271 		if ((error = suser(p, 0)) != 0)
1272 			return (error);
1273 		return (if_setgroupattribs(data));
1274 	}
1275 
1276 	ifp = ifunit(ifr->ifr_name);
1277 	if (ifp == 0)
1278 		return (ENXIO);
1279 	oif_flags = ifp->if_flags;
1280 	switch (cmd) {
1281 
1282 	case SIOCGIFFLAGS:
1283 		ifr->ifr_flags = ifp->if_flags;
1284 		break;
1285 
1286 	case SIOCGIFXFLAGS:
1287 		ifr->ifr_flags = ifp->if_xflags;
1288 		break;
1289 
1290 	case SIOCGIFMETRIC:
1291 		ifr->ifr_metric = ifp->if_metric;
1292 		break;
1293 
1294 	case SIOCGIFMTU:
1295 		ifr->ifr_mtu = ifp->if_mtu;
1296 		break;
1297 
1298 	case SIOCGIFHARDMTU:
1299 		ifr->ifr_hardmtu = ifp->if_hardmtu;
1300 		break;
1301 
1302 	case SIOCGIFDATA:
1303 		error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data,
1304 		    sizeof(ifp->if_data));
1305 		break;
1306 
1307 	case SIOCSIFFLAGS:
1308 		if ((error = suser(p, 0)) != 0)
1309 			return (error);
1310 		if (ifp->if_flags & IFF_UP && (ifr->ifr_flags & IFF_UP) == 0) {
1311 			s = splnet();
1312 			if_down(ifp);
1313 			splx(s);
1314 		}
1315 		if (ifr->ifr_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) {
1316 			s = splnet();
1317 			if_up(ifp);
1318 			splx(s);
1319 		}
1320 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1321 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
1322 		if (ifp->if_ioctl)
1323 			(void) (*ifp->if_ioctl)(ifp, cmd, data);
1324 		break;
1325 
1326 	case SIOCSIFXFLAGS:
1327 		if ((error = suser(p, 0)) != 0)
1328 			return (error);
1329 
1330 #ifdef INET6
1331 		if (ifr->ifr_flags & IFXF_NOINET6 &&
1332 		    !(ifp->if_xflags & IFXF_NOINET6)) {
1333 			s = splnet();
1334 			in6_ifdetach(ifp);
1335 			splx(s);
1336 		}
1337 		if (ifp->if_xflags & IFXF_NOINET6 &&
1338 		    !(ifr->ifr_flags & IFXF_NOINET6)) {
1339 			ifp->if_xflags &= ~IFXF_NOINET6;
1340 			if (ifp->if_flags & IFF_UP) {
1341 				/* configure link-local address */
1342 				s = splnet();
1343 				in6_if_up(ifp);
1344 				splx(s);
1345 			}
1346 		}
1347 #endif
1348 
1349 #ifdef MPLS
1350 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
1351 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
1352 			s = splnet();
1353 			ifp->if_xflags |= IFXF_MPLS;
1354 			ifp->if_ll_output = ifp->if_output;
1355 			ifp->if_output = mpls_output;
1356 			splx(s);
1357 		}
1358 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
1359 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
1360 			s = splnet();
1361 			ifp->if_xflags &= ~IFXF_MPLS;
1362 			ifp->if_output = ifp->if_ll_output;
1363 			ifp->if_ll_output = NULL;
1364 			splx(s);
1365 		}
1366 #endif
1367 
1368 #ifndef SMALL_KERNEL
1369 		if (ifp->if_capabilities & IFCAP_WOL) {
1370 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
1371 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
1372 				s = splnet();
1373 				ifp->if_xflags |= IFXF_WOL;
1374 				error = ifp->if_wol(ifp, 1);
1375 				splx(s);
1376 				if (error)
1377 					return (error);
1378 			}
1379 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
1380 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
1381 				s = splnet();
1382 				ifp->if_xflags &= ~IFXF_WOL;
1383 				error = ifp->if_wol(ifp, 0);
1384 				splx(s);
1385 				if (error)
1386 					return (error);
1387 			}
1388 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
1389 			ifr->ifr_flags &= ~IFXF_WOL;
1390 			error = ENOTSUP;
1391 		}
1392 #endif
1393 
1394 		ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
1395 			(ifr->ifr_flags & ~IFXF_CANTCHANGE);
1396 		rt_ifmsg(ifp);
1397 		break;
1398 
1399 	case SIOCSIFMETRIC:
1400 		if ((error = suser(p, 0)) != 0)
1401 			return (error);
1402 		ifp->if_metric = ifr->ifr_metric;
1403 		break;
1404 
1405 	case SIOCSIFMTU:
1406 	{
1407 #ifdef INET6
1408 		int oldmtu = ifp->if_mtu;
1409 #endif
1410 
1411 		if ((error = suser(p, 0)) != 0)
1412 			return (error);
1413 		if (ifp->if_ioctl == NULL)
1414 			return (EOPNOTSUPP);
1415 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1416 
1417 		/*
1418 		 * If the link MTU changed, do network layer specific procedure.
1419 		 */
1420 #ifdef INET6
1421 		if (ifp->if_mtu != oldmtu)
1422 			nd6_setmtu(ifp);
1423 #endif
1424 		break;
1425 	}
1426 
1427 	case SIOCSIFPHYADDR:
1428 	case SIOCDIFPHYADDR:
1429 #ifdef INET6
1430 	case SIOCSIFPHYADDR_IN6:
1431 #endif
1432 	case SIOCSLIFPHYADDR:
1433 	case SIOCSLIFPHYRTABLE:
1434 	case SIOCADDMULTI:
1435 	case SIOCDELMULTI:
1436 	case SIOCSIFMEDIA:
1437 		if ((error = suser(p, 0)) != 0)
1438 			return (error);
1439 		/* FALLTHROUGH */
1440 	case SIOCGIFPSRCADDR:
1441 	case SIOCGIFPDSTADDR:
1442 	case SIOCGLIFPHYADDR:
1443 	case SIOCGLIFPHYRTABLE:
1444 	case SIOCGIFMEDIA:
1445 		if (ifp->if_ioctl == 0)
1446 			return (EOPNOTSUPP);
1447 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1448 		break;
1449 
1450 	case SIOCGIFDESCR:
1451 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
1452 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
1453 		    &bytesdone);
1454 		break;
1455 
1456 	case SIOCSIFDESCR:
1457 		if ((error = suser(p, 0)) != 0)
1458 			return (error);
1459 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
1460 		    IFDESCRSIZE, &bytesdone);
1461 		if (error == 0) {
1462 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
1463 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
1464 		}
1465 		break;
1466 
1467 	case SIOCGIFRTLABEL:
1468 		if (ifp->if_rtlabelid &&
1469 		    (label = rtlabel_id2name(ifp->if_rtlabelid)) != NULL) {
1470 			strlcpy(ifrtlabelbuf, label, RTLABEL_LEN);
1471 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
1472 			    RTLABEL_LEN, &bytesdone);
1473 		} else
1474 			error = ENOENT;
1475 		break;
1476 
1477 	case SIOCSIFRTLABEL:
1478 		if ((error = suser(p, 0)) != 0)
1479 			return (error);
1480 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
1481 		    RTLABEL_LEN, &bytesdone);
1482 		if (error == 0) {
1483 			rtlabel_unref(ifp->if_rtlabelid);
1484 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
1485 		}
1486 		break;
1487 
1488 	case SIOCGIFPRIORITY:
1489 		ifr->ifr_metric = ifp->if_priority;
1490 		break;
1491 
1492 	case SIOCSIFPRIORITY:
1493 		if ((error = suser(p, 0)) != 0)
1494 			return (error);
1495 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15)
1496 			return (EINVAL);
1497 		ifp->if_priority = ifr->ifr_metric;
1498 		break;
1499 
1500 	case SIOCGIFRDOMAIN:
1501 		ifr->ifr_rdomainid = ifp->if_rdomain;
1502 		break;
1503 
1504 	case SIOCSIFRDOMAIN:
1505 		if ((error = suser(p, 0)) != 0)
1506 			return (error);
1507 		if (ifr->ifr_rdomainid < 0 ||
1508 		    ifr->ifr_rdomainid > RT_TABLEID_MAX)
1509 			return (EINVAL);
1510 
1511 		/* make sure that the routing table exists */
1512 		if (!rtable_exists(ifr->ifr_rdomainid)) {
1513 			s = splsoftnet();
1514 			if ((error = rtable_add(ifr->ifr_rdomainid)) == 0)
1515 				rtable_l2set(ifr->ifr_rdomainid, ifr->ifr_rdomainid);
1516 			splx(s);
1517 			if (error)
1518 				return (error);
1519 		}
1520 
1521 		/* make sure that the routing table is a real rdomain */
1522 		if (ifr->ifr_rdomainid != rtable_l2(ifr->ifr_rdomainid))
1523 			return (EINVAL);
1524 
1525 		/* remove all routing entries when switching domains */
1526 		/* XXX hell this is ugly */
1527 		if (ifr->ifr_rdomainid != ifp->if_rdomain) {
1528 			s = splnet();
1529 			if (ifp->if_flags & IFF_UP)
1530 				up = 1;
1531 			/*
1532 			 * We are tearing down the world.
1533 			 * Take down the IF so:
1534 			 * 1. everything that cares gets a message
1535 			 * 2. the automagic IPv6 bits are recreated
1536 			 */
1537 			if (up)
1538 				if_down(ifp);
1539 			rt_if_remove(ifp);
1540 #ifdef INET
1541 			rti_delete(ifp);
1542 #ifdef MROUTING
1543 			vif_delete(ifp);
1544 #endif
1545 #endif
1546 #ifdef INET6
1547 			in6_ifdetach(ifp);
1548 #endif
1549 #ifdef INET
1550 			in_ifdetach(ifp);
1551 #endif
1552 			splx(s);
1553 		}
1554 
1555 		/* Let devices like enc(4) or mpe(4) know about the change */
1556 		if ((error = (*ifp->if_ioctl)(ifp, cmd, data)) != ENOTTY)
1557 			return (error);
1558 		error = 0;
1559 
1560 		/* Add interface to the specified rdomain */
1561 		ifp->if_rdomain = ifr->ifr_rdomainid;
1562 		break;
1563 
1564 	case SIOCAIFGROUP:
1565 		if ((error = suser(p, 0)))
1566 			return (error);
1567 		ifgr = (struct ifgroupreq *)data;
1568 		if ((error = if_addgroup(ifp, ifgr->ifgr_group)))
1569 			return (error);
1570 		(*ifp->if_ioctl)(ifp, cmd, data); /* XXX error check */
1571 		break;
1572 
1573 	case SIOCGIFGROUP:
1574 		if ((error = if_getgroup(data, ifp)))
1575 			return (error);
1576 		break;
1577 
1578 	case SIOCDIFGROUP:
1579 		if ((error = suser(p, 0)))
1580 			return (error);
1581 		(*ifp->if_ioctl)(ifp, cmd, data); /* XXX error check */
1582 		ifgr = (struct ifgroupreq *)data;
1583 		if ((error = if_delgroup(ifp, ifgr->ifgr_group)))
1584 			return (error);
1585 		break;
1586 
1587 	case SIOCSIFLLADDR:
1588 		if ((error = suser(p, 0)))
1589 			return (error);
1590 		ifa = ifp->if_lladdr;
1591 		if (ifa == NULL)
1592 			return (EINVAL);
1593 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1594 		if (sdl == NULL)
1595 			return (EINVAL);
1596 		if (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN)
1597 			return (EINVAL);
1598 		if (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))
1599 			return (EINVAL);
1600 		switch (ifp->if_type) {
1601 		case IFT_ETHER:
1602 		case IFT_CARP:
1603 		case IFT_XETHER:
1604 		case IFT_ISO88025:
1605 			bcopy((caddr_t)ifr->ifr_addr.sa_data,
1606 			    (caddr_t)((struct arpcom *)ifp)->ac_enaddr,
1607 			    ETHER_ADDR_LEN);
1608 			bcopy((caddr_t)ifr->ifr_addr.sa_data,
1609 			    LLADDR(sdl), ETHER_ADDR_LEN);
1610 			error = (*ifp->if_ioctl)(ifp, cmd, data);
1611 			if (error == ENOTTY)
1612 				error = 0;
1613 			break;
1614 		default:
1615 			return (ENODEV);
1616 		}
1617 
1618 		ifnewlladdr(ifp);
1619 		break;
1620 
1621 	default:
1622 		if (so->so_proto == 0)
1623 			return (EOPNOTSUPP);
1624 #if !defined(COMPAT_43) && !defined(COMPAT_LINUX)
1625 		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
1626 			(struct mbuf *) cmd, (struct mbuf *) data,
1627 			(struct mbuf *) ifp, p));
1628 #else
1629 	    {
1630 		u_long ocmd = cmd;
1631 
1632 		switch (cmd) {
1633 
1634 		case SIOCSIFADDR:
1635 		case SIOCSIFDSTADDR:
1636 		case SIOCSIFBRDADDR:
1637 		case SIOCSIFNETMASK:
1638 #if BYTE_ORDER != BIG_ENDIAN
1639 			if (ifr->ifr_addr.sa_family == 0 &&
1640 			    ifr->ifr_addr.sa_len < 16) {
1641 				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1642 				ifr->ifr_addr.sa_len = 16;
1643 			}
1644 #else
1645 			if (ifr->ifr_addr.sa_len == 0)
1646 				ifr->ifr_addr.sa_len = 16;
1647 #endif
1648 			break;
1649 
1650 		case OSIOCGIFADDR:
1651 			cmd = SIOCGIFADDR;
1652 			break;
1653 
1654 		case OSIOCGIFDSTADDR:
1655 			cmd = SIOCGIFDSTADDR;
1656 			break;
1657 
1658 		case OSIOCGIFBRDADDR:
1659 			cmd = SIOCGIFBRDADDR;
1660 			break;
1661 
1662 		case OSIOCGIFNETMASK:
1663 			cmd = SIOCGIFNETMASK;
1664 		}
1665 		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
1666 		    (struct mbuf *) cmd, (struct mbuf *) data,
1667 		    (struct mbuf *) ifp, p));
1668 		switch (ocmd) {
1669 
1670 		case OSIOCGIFADDR:
1671 		case OSIOCGIFDSTADDR:
1672 		case OSIOCGIFBRDADDR:
1673 		case OSIOCGIFNETMASK:
1674 			*(u_int16_t *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1675 		}
1676 
1677 	    }
1678 #endif
1679 		break;
1680 	}
1681 
1682 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0) {
1683 		microtime(&ifp->if_lastchange);
1684 #ifdef INET6
1685 		if (!(ifp->if_xflags & IFXF_NOINET6) &&
1686 		    (ifp->if_flags & IFF_UP) != 0) {
1687 			s = splnet();
1688 			in6_if_up(ifp);
1689 			splx(s);
1690 		}
1691 #endif
1692 	}
1693 	/* If we took down the IF, bring it back */
1694 	if (up) {
1695 		s = splnet();
1696 		if_up(ifp);
1697 		splx(s);
1698 	}
1699 	return (error);
1700 }
1701 
1702 /*
1703  * Return interface configuration
1704  * of system.  List may be used
1705  * in later ioctl's (above) to get
1706  * other information.
1707  */
1708 /*ARGSUSED*/
1709 int
1710 ifconf(u_long cmd, caddr_t data)
1711 {
1712 	struct ifconf *ifc = (struct ifconf *)data;
1713 	struct ifnet *ifp;
1714 	struct ifaddr *ifa;
1715 	struct ifreq ifr, *ifrp;
1716 	int space = ifc->ifc_len, error = 0;
1717 
1718 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
1719 	if (space == 0) {
1720 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
1721 			struct sockaddr *sa;
1722 
1723 			if (TAILQ_EMPTY(&ifp->if_addrlist))
1724 				space += sizeof (ifr);
1725 			else
1726 				TAILQ_FOREACH(ifa,
1727 				    &ifp->if_addrlist, ifa_list) {
1728 					sa = ifa->ifa_addr;
1729 #if defined(COMPAT_43) || defined(COMPAT_LINUX)
1730 					if (cmd != OSIOCGIFCONF)
1731 #endif
1732 					if (sa->sa_len > sizeof(*sa))
1733 						space += sa->sa_len -
1734 						    sizeof(*sa);
1735 					space += sizeof(ifr);
1736 				}
1737 		}
1738 		ifc->ifc_len = space;
1739 		return (0);
1740 	}
1741 
1742 	ifrp = ifc->ifc_req;
1743 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1744 		if (space < sizeof(ifr))
1745 			break;
1746 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
1747 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
1748 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
1749 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1750 			    sizeof(ifr));
1751 			if (error)
1752 				break;
1753 			space -= sizeof (ifr), ifrp++;
1754 		} else
1755 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1756 				struct sockaddr *sa = ifa->ifa_addr;
1757 
1758 				if (space < sizeof(ifr))
1759 					break;
1760 #if defined(COMPAT_43) || defined(COMPAT_LINUX)
1761 				if (cmd == OSIOCGIFCONF) {
1762 					struct osockaddr *osa =
1763 					    (struct osockaddr *)&ifr.ifr_addr;
1764 					ifr.ifr_addr = *sa;
1765 					osa->sa_family = sa->sa_family;
1766 					error = copyout((caddr_t)&ifr,
1767 					    (caddr_t)ifrp, sizeof (ifr));
1768 					ifrp++;
1769 				} else
1770 #endif
1771 				if (sa->sa_len <= sizeof(*sa)) {
1772 					ifr.ifr_addr = *sa;
1773 					error = copyout((caddr_t)&ifr,
1774 					    (caddr_t)ifrp, sizeof (ifr));
1775 					ifrp++;
1776 				} else {
1777 					space -= sa->sa_len - sizeof(*sa);
1778 					if (space < sizeof (ifr))
1779 						break;
1780 					error = copyout((caddr_t)&ifr,
1781 					    (caddr_t)ifrp,
1782 					    sizeof(ifr.ifr_name));
1783 					if (error == 0)
1784 						error = copyout((caddr_t)sa,
1785 						    (caddr_t)&ifrp->ifr_addr,
1786 						    sa->sa_len);
1787 					ifrp = (struct ifreq *)(sa->sa_len +
1788 					    (caddr_t)&ifrp->ifr_addr);
1789 				}
1790 				if (error)
1791 					break;
1792 				space -= sizeof (ifr);
1793 			}
1794 	}
1795 	ifc->ifc_len -= space;
1796 	return (error);
1797 }
1798 
1799 /*
1800  * Dummy functions replaced in ifnet during detach (if protocols decide to
1801  * fiddle with the if during detach.
1802  */
1803 void
1804 if_detached_start(struct ifnet *ifp)
1805 {
1806 	struct mbuf *m;
1807 
1808 	while (1) {
1809 		IF_DEQUEUE(&ifp->if_snd, m);
1810 
1811 		if (m == NULL)
1812 			return;
1813 		m_freem(m);
1814 	}
1815 }
1816 
1817 int
1818 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
1819 {
1820 	return ENODEV;
1821 }
1822 
1823 void
1824 if_detached_watchdog(struct ifnet *ifp)
1825 {
1826 	/* nothing */
1827 }
1828 
1829 /*
1830  * Create interface group without members
1831  */
1832 struct ifg_group *
1833 if_creategroup(const char *groupname)
1834 {
1835 	struct ifg_group	*ifg;
1836 
1837 	if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
1838 		return (NULL);
1839 
1840 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
1841 	ifg->ifg_refcnt = 0;
1842 	ifg->ifg_carp_demoted = 0;
1843 	TAILQ_INIT(&ifg->ifg_members);
1844 #if NPF > 0
1845 	pfi_attach_ifgroup(ifg);
1846 #endif
1847 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
1848 
1849 	return (ifg);
1850 }
1851 
1852 /*
1853  * Add a group to an interface
1854  */
1855 int
1856 if_addgroup(struct ifnet *ifp, const char *groupname)
1857 {
1858 	struct ifg_list		*ifgl;
1859 	struct ifg_group	*ifg = NULL;
1860 	struct ifg_member	*ifgm;
1861 
1862 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
1863 	    groupname[strlen(groupname) - 1] <= '9')
1864 		return (EINVAL);
1865 
1866 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1867 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
1868 			return (EEXIST);
1869 
1870 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
1871 		return (ENOMEM);
1872 
1873 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
1874 		free(ifgl, M_TEMP, 0);
1875 		return (ENOMEM);
1876 	}
1877 
1878 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
1879 		if (!strcmp(ifg->ifg_group, groupname))
1880 			break;
1881 
1882 	if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
1883 		free(ifgl, M_TEMP, 0);
1884 		free(ifgm, M_TEMP, 0);
1885 		return (ENOMEM);
1886 	}
1887 
1888 	ifg->ifg_refcnt++;
1889 	ifgl->ifgl_group = ifg;
1890 	ifgm->ifgm_ifp = ifp;
1891 
1892 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
1893 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
1894 
1895 #if NPF > 0
1896 	pfi_group_change(groupname);
1897 #endif
1898 
1899 	return (0);
1900 }
1901 
1902 /*
1903  * Remove a group from an interface
1904  */
1905 int
1906 if_delgroup(struct ifnet *ifp, const char *groupname)
1907 {
1908 	struct ifg_list		*ifgl;
1909 	struct ifg_member	*ifgm;
1910 
1911 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1912 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
1913 			break;
1914 	if (ifgl == NULL)
1915 		return (ENOENT);
1916 
1917 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
1918 
1919 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
1920 		if (ifgm->ifgm_ifp == ifp)
1921 			break;
1922 
1923 	if (ifgm != NULL) {
1924 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
1925 		free(ifgm, M_TEMP, 0);
1926 	}
1927 
1928 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
1929 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
1930 #if NPF > 0
1931 		pfi_detach_ifgroup(ifgl->ifgl_group);
1932 #endif
1933 		free(ifgl->ifgl_group, M_TEMP, 0);
1934 	}
1935 
1936 	free(ifgl, M_TEMP, 0);
1937 
1938 #if NPF > 0
1939 	pfi_group_change(groupname);
1940 #endif
1941 
1942 	return (0);
1943 }
1944 
1945 /*
1946  * Stores all groups from an interface in memory pointed
1947  * to by data
1948  */
1949 int
1950 if_getgroup(caddr_t data, struct ifnet *ifp)
1951 {
1952 	int			 len, error;
1953 	struct ifg_list		*ifgl;
1954 	struct ifg_req		 ifgrq, *ifgp;
1955 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
1956 
1957 	if (ifgr->ifgr_len == 0) {
1958 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
1959 			ifgr->ifgr_len += sizeof(struct ifg_req);
1960 		return (0);
1961 	}
1962 
1963 	len = ifgr->ifgr_len;
1964 	ifgp = ifgr->ifgr_groups;
1965 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
1966 		if (len < sizeof(ifgrq))
1967 			return (EINVAL);
1968 		bzero(&ifgrq, sizeof ifgrq);
1969 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
1970 		    sizeof(ifgrq.ifgrq_group));
1971 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
1972 		    sizeof(struct ifg_req))))
1973 			return (error);
1974 		len -= sizeof(ifgrq);
1975 		ifgp++;
1976 	}
1977 
1978 	return (0);
1979 }
1980 
1981 /*
1982  * Stores all members of a group in memory pointed to by data
1983  */
1984 int
1985 if_getgroupmembers(caddr_t data)
1986 {
1987 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
1988 	struct ifg_group	*ifg;
1989 	struct ifg_member	*ifgm;
1990 	struct ifg_req		 ifgrq, *ifgp;
1991 	int			 len, error;
1992 
1993 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
1994 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
1995 			break;
1996 	if (ifg == NULL)
1997 		return (ENOENT);
1998 
1999 	if (ifgr->ifgr_len == 0) {
2000 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2001 			ifgr->ifgr_len += sizeof(ifgrq);
2002 		return (0);
2003 	}
2004 
2005 	len = ifgr->ifgr_len;
2006 	ifgp = ifgr->ifgr_groups;
2007 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
2008 		if (len < sizeof(ifgrq))
2009 			return (EINVAL);
2010 		bzero(&ifgrq, sizeof ifgrq);
2011 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
2012 		    sizeof(ifgrq.ifgrq_member));
2013 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2014 		    sizeof(struct ifg_req))))
2015 			return (error);
2016 		len -= sizeof(ifgrq);
2017 		ifgp++;
2018 	}
2019 
2020 	return (0);
2021 }
2022 
2023 int
2024 if_getgroupattribs(caddr_t data)
2025 {
2026 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2027 	struct ifg_group	*ifg;
2028 
2029 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2030 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2031 			break;
2032 	if (ifg == NULL)
2033 		return (ENOENT);
2034 
2035 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
2036 
2037 	return (0);
2038 }
2039 
2040 int
2041 if_setgroupattribs(caddr_t data)
2042 {
2043 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2044 	struct ifg_group	*ifg;
2045 	struct ifg_member	*ifgm;
2046 	int			 demote;
2047 
2048 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2049 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2050 			break;
2051 	if (ifg == NULL)
2052 		return (ENOENT);
2053 
2054 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
2055 	if (demote + ifg->ifg_carp_demoted > 0xff ||
2056 	    demote + ifg->ifg_carp_demoted < 0)
2057 		return (EINVAL);
2058 
2059 	ifg->ifg_carp_demoted += demote;
2060 
2061 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2062 		if (ifgm->ifgm_ifp->if_ioctl)
2063 			ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp,
2064 			    SIOCSIFGATTR, data);
2065 	return (0);
2066 }
2067 
2068 void
2069 if_group_routechange(struct sockaddr *dst, struct sockaddr *mask)
2070 {
2071 	switch (dst->sa_family) {
2072 	case AF_INET:
2073 		if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
2074 		    mask && (mask->sa_len == 0 ||
2075 		    satosin(mask)->sin_addr.s_addr == INADDR_ANY))
2076 			if_group_egress_build();
2077 		break;
2078 #ifdef INET6
2079 	case AF_INET6:
2080 		if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
2081 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
2082 		    IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
2083 		    &in6addr_any)))
2084 			if_group_egress_build();
2085 		break;
2086 #endif
2087 	}
2088 }
2089 
2090 int
2091 if_group_egress_build(void)
2092 {
2093 	struct ifg_group	*ifg;
2094 	struct ifg_member	*ifgm, *next;
2095 	struct sockaddr_in	 sa_in;
2096 #ifdef INET6
2097 	struct sockaddr_in6	 sa_in6;
2098 #endif
2099 	struct rtentry		*rt;
2100 
2101 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2102 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
2103 			break;
2104 
2105 	if (ifg != NULL)
2106 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
2107 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
2108 
2109 	bzero(&sa_in, sizeof(sa_in));
2110 	sa_in.sin_len = sizeof(sa_in);
2111 	sa_in.sin_family = AF_INET;
2112 	if ((rt = rt_lookup(sintosa(&sa_in), sintosa(&sa_in), 0)) != NULL) {
2113 		do {
2114 			if (rt->rt_ifp)
2115 				if_addgroup(rt->rt_ifp, IFG_EGRESS);
2116 #ifndef SMALL_KERNEL
2117 			rt = rt_mpath_next(rt);
2118 #else
2119 			rt = NULL;
2120 #endif
2121 		} while (rt != NULL);
2122 	}
2123 
2124 #ifdef INET6
2125 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
2126 	if ((rt = rt_lookup(sin6tosa(&sa_in6), sin6tosa(&sa_in6), 0)) != NULL) {
2127 		do {
2128 			if (rt->rt_ifp)
2129 				if_addgroup(rt->rt_ifp, IFG_EGRESS);
2130 #ifndef SMALL_KERNEL
2131 			rt = rt_mpath_next(rt);
2132 #else
2133 			rt = NULL;
2134 #endif
2135 		} while (rt != NULL);
2136 	}
2137 #endif
2138 
2139 	return (0);
2140 }
2141 
2142 /*
2143  * Set/clear promiscuous mode on interface ifp based on the truth value
2144  * of pswitch.  The calls are reference counted so that only the first
2145  * "on" request actually has an effect, as does the final "off" request.
2146  * Results are undefined if the "off" and "on" requests are not matched.
2147  */
2148 int
2149 ifpromisc(struct ifnet *ifp, int pswitch)
2150 {
2151 	struct ifreq ifr;
2152 
2153 	if (pswitch) {
2154 		/*
2155 		 * If the device is not configured up, we cannot put it in
2156 		 * promiscuous mode.
2157 		 */
2158 		if ((ifp->if_flags & IFF_UP) == 0)
2159 			return (ENETDOWN);
2160 		if (ifp->if_pcount++ != 0)
2161 			return (0);
2162 		ifp->if_flags |= IFF_PROMISC;
2163 	} else {
2164 		if (--ifp->if_pcount > 0)
2165 			return (0);
2166 		ifp->if_flags &= ~IFF_PROMISC;
2167 		/*
2168 		 * If the device is not configured up, we should not need to
2169 		 * turn off promiscuous mode (device should have turned it
2170 		 * off when interface went down; and will look at IFF_PROMISC
2171 		 * again next time interface comes up).
2172 		 */
2173 		if ((ifp->if_flags & IFF_UP) == 0)
2174 			return (0);
2175 	}
2176 	ifr.ifr_flags = ifp->if_flags;
2177 	return ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
2178 }
2179 
2180 int
2181 sysctl_ifq(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2182     void *newp, size_t newlen, struct ifqueue *ifq)
2183 {
2184 	/* All sysctl names at this level are terminal. */
2185 	if (namelen != 1)
2186 		return (ENOTDIR);
2187 
2188 	switch (name[0]) {
2189 	case IFQCTL_LEN:
2190 		return (sysctl_rdint(oldp, oldlenp, newp, ifq->ifq_len));
2191 	case IFQCTL_MAXLEN:
2192 		return (sysctl_int(oldp, oldlenp, newp, newlen,
2193 		    &ifq->ifq_maxlen));
2194 	case IFQCTL_DROPS:
2195 		return (sysctl_rdint(oldp, oldlenp, newp, ifq->ifq_drops));
2196 	default:
2197 		return (EOPNOTSUPP);
2198 	}
2199 	/* NOTREACHED */
2200 }
2201 
2202 void
2203 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
2204 {
2205 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
2206 	ifa_item_insert(ifa->ifa_addr, ifa, ifp);
2207 	if (ifp->if_flags & IFF_BROADCAST && ifa->ifa_broadaddr)
2208 		ifa_item_insert(ifa->ifa_broadaddr, ifa, ifp);
2209 }
2210 
2211 void
2212 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
2213 {
2214 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
2215 	ifa_item_remove(ifa->ifa_addr, ifa, ifp);
2216 	if (ifp->if_flags & IFF_BROADCAST && ifa->ifa_broadaddr)
2217 		ifa_item_remove(ifa->ifa_broadaddr, ifa, ifp);
2218 }
2219 
2220 void
2221 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
2222 {
2223 	ifa_item_remove(ifa->ifa_broadaddr, ifa, ifp);
2224 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
2225 		panic("ifa_update_broadaddr does not support dynamic length");
2226 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
2227 	ifa_item_insert(ifa->ifa_broadaddr, ifa, ifp);
2228 }
2229 
2230 int
2231 ifai_cmp(struct ifaddr_item *a, struct ifaddr_item *b)
2232 {
2233 	if (a->ifai_rdomain != b->ifai_rdomain)
2234 		return (a->ifai_rdomain - b->ifai_rdomain);
2235 	/* safe even with a's sa_len > b's because memcmp aborts early */
2236 	return (memcmp(a->ifai_addr, b->ifai_addr, a->ifai_addr->sa_len));
2237 }
2238 
2239 void
2240 ifa_item_insert(struct sockaddr *sa, struct ifaddr *ifa, struct ifnet *ifp)
2241 {
2242 	struct ifaddr_item	*ifai, *p;
2243 
2244 	ifai = pool_get(&ifaddr_item_pl, PR_WAITOK);
2245 	ifai->ifai_addr = sa;
2246 	ifai->ifai_ifa = ifa;
2247 	ifai->ifai_rdomain = ifp->if_rdomain;
2248 	ifai->ifai_next = NULL;
2249 	if ((p = RB_INSERT(ifaddr_items, &ifaddr_items, ifai)) != NULL) {
2250 		if (sa->sa_family == AF_LINK) {
2251 			RB_REMOVE(ifaddr_items, &ifaddr_items, p);
2252 			ifai->ifai_next = p;
2253 			RB_INSERT(ifaddr_items, &ifaddr_items, ifai);
2254 		} else {
2255 			while(p->ifai_next)
2256 				p = p->ifai_next;
2257 			p->ifai_next = ifai;
2258 		}
2259 	}
2260 }
2261 
2262 void
2263 ifa_item_remove(struct sockaddr *sa, struct ifaddr *ifa, struct ifnet *ifp)
2264 {
2265 	struct ifaddr_item	*ifai, *ifai_first, *ifai_last, key;
2266 
2267 	bzero(&key, sizeof(key));
2268 	key.ifai_addr = sa;
2269 	key.ifai_rdomain = ifp->if_rdomain;
2270 	ifai_first = RB_FIND(ifaddr_items, &ifaddr_items, &key);
2271 	for (ifai = ifai_first; ifai; ifai = ifai->ifai_next) {
2272 		if (ifai->ifai_ifa == ifa)
2273 			break;
2274 		ifai_last = ifai;
2275 	}
2276 	if (!ifai)
2277 		return;
2278 	if (ifai == ifai_first) {
2279 		RB_REMOVE(ifaddr_items, &ifaddr_items, ifai);
2280 		if (ifai->ifai_next)
2281 			RB_INSERT(ifaddr_items, &ifaddr_items, ifai->ifai_next);
2282 	} else
2283 		ifai_last->ifai_next = ifai->ifai_next;
2284 	pool_put(&ifaddr_item_pl, ifai);
2285 }
2286 
2287 #ifndef SMALL_KERNEL
2288 /* debug function, can be called from ddb> */
2289 void
2290 ifa_print_rb(void)
2291 {
2292 	struct ifaddr_item *ifai, *p;
2293 	RB_FOREACH(p, ifaddr_items, &ifaddr_items) {
2294 		for (ifai = p; ifai; ifai = ifai->ifai_next) {
2295 			char addr[INET6_ADDRSTRLEN];
2296 
2297 			switch (ifai->ifai_addr->sa_family) {
2298 			case AF_INET:
2299 				printf("%s", inet_ntop(AF_INET,
2300 				    &satosin(ifai->ifai_addr)->sin_addr,
2301 				    addr, sizeof(addr)));
2302 				break;
2303 #ifdef INET6
2304 			case AF_INET6:
2305 				printf("%s", inet_ntop(AF_INET6,
2306 				    &(satosin6(ifai->ifai_addr))->sin6_addr,
2307 				    addr, sizeof(addr)));
2308 				break;
2309 #endif
2310 			case AF_LINK:
2311 				printf("%s",
2312 				    ether_sprintf(ifai->ifai_addr->sa_data));
2313 				break;
2314 			}
2315 			printf(" on %s\n", ifai->ifai_ifa->ifa_ifp->if_xname);
2316 		}
2317 	}
2318 }
2319 #endif /* SMALL_KERNEL */
2320 
2321 void
2322 ifnewlladdr(struct ifnet *ifp)
2323 {
2324 	struct ifaddr *ifa;
2325 	struct ifreq ifrq;
2326 	short up;
2327 	int s;
2328 
2329 	s = splnet();
2330 	up = ifp->if_flags & IFF_UP;
2331 
2332 	if (up) {
2333 		/* go down for a moment... */
2334 		ifp->if_flags &= ~IFF_UP;
2335 		ifrq.ifr_flags = ifp->if_flags;
2336 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
2337 	}
2338 
2339 	ifp->if_flags |= IFF_UP;
2340 	ifrq.ifr_flags = ifp->if_flags;
2341 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
2342 
2343 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2344 		if (ifa->ifa_addr != NULL &&
2345 		    ifa->ifa_addr->sa_family == AF_INET)
2346 			arp_ifinit((struct arpcom *)ifp, ifa);
2347 	}
2348 #ifdef INET6
2349 	/* Update the link-local address. Don't do it if we're
2350 	 * a router to avoid confusing hosts on the network. */
2351 	if (!(ifp->if_xflags & IFXF_NOINET6) && !ip6_forwarding) {
2352 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
2353 		if (ifa) {
2354 			in6_purgeaddr(ifa);
2355 			dohooks(ifp->if_addrhooks, 0);
2356 			in6_ifattach_linklocal(ifp, NULL);
2357 			if (in6if_do_dad(ifp)) {
2358 				ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
2359 				if (ifa)
2360 					nd6_dad_start(ifa, NULL);
2361 			}
2362 		}
2363 	}
2364 #endif
2365 	if (!up) {
2366 		/* go back down */
2367 		ifp->if_flags &= ~IFF_UP;
2368 		ifrq.ifr_flags = ifp->if_flags;
2369 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
2370 	}
2371 	splx(s);
2372 }
2373 
2374 int net_ticks;
2375 u_int net_livelocks;
2376 
2377 void
2378 net_tick(void *null)
2379 {
2380 	extern int ticks;
2381 
2382 	if (ticks - net_ticks > 1)
2383 		net_livelocks++;
2384 
2385 	net_ticks = ticks;
2386 
2387 	timeout_add(&net_tick_to, 1);
2388 }
2389 
2390 int
2391 net_livelocked()
2392 {
2393 	extern int ticks;
2394 
2395 	return (ticks - net_ticks > 1);
2396 }
2397 
2398 void
2399 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
2400 {
2401 	extern int ticks;
2402 
2403 	memset(rxr, 0, sizeof(*rxr));
2404 
2405 	rxr->rxr_adjusted = ticks;
2406 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
2407 	rxr->rxr_hwm = hwm;
2408 }
2409 
2410 static inline void
2411 if_rxr_adjust_cwm(struct if_rxring *rxr)
2412 {
2413 	extern int ticks;
2414 
2415 	if (net_livelocked()) {
2416 		if (rxr->rxr_cwm > rxr->rxr_lwm)
2417 			rxr->rxr_cwm--;
2418 		else
2419 			return;
2420 	} else if (rxr->rxr_alive > 4)
2421 		return;
2422 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
2423 		rxr->rxr_cwm++;
2424 
2425 	rxr->rxr_adjusted = ticks;
2426 }
2427 
2428 u_int
2429 if_rxr_get(struct if_rxring *rxr, u_int max)
2430 {
2431 	extern int ticks;
2432 	u_int diff;
2433 
2434 	if (ticks - rxr->rxr_adjusted >= 1) {
2435 		/* we're free to try for an adjustment */
2436 		if_rxr_adjust_cwm(rxr);
2437 	}
2438 
2439 	if (rxr->rxr_alive >= rxr->rxr_cwm)
2440 		return (0);
2441 
2442 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
2443 	rxr->rxr_alive += diff;
2444 
2445 	return (diff);
2446 }
2447 
2448 int
2449 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
2450 {
2451 	struct if_rxrinfo kifri;
2452 	int error;
2453 	u_int n;
2454 
2455 	error = copyin(uifri, &kifri, sizeof(kifri));
2456 	if (error)
2457 		return (error);
2458 
2459 	n = min(t, kifri.ifri_total);
2460 	kifri.ifri_total = t;
2461 
2462 	if (n > 0) {
2463 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
2464 		if (error)
2465 			return (error);
2466 	}
2467 
2468 	return (copyout(&kifri, uifri, sizeof(kifri)));
2469 }
2470 
2471 int
2472 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
2473     struct if_rxring *rxr)
2474 {
2475 	struct if_rxring_info ifr;
2476 
2477 	memset(&ifr, 0, sizeof(ifr));
2478 
2479 	if (name != NULL)
2480 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
2481 
2482 	ifr.ifr_size = size;
2483 	ifr.ifr_info = *rxr;
2484 
2485 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
2486 }
2487