xref: /openbsd-src/sys/net/if.c (revision b99ef4df7fac99f3475b694d6cd4990521c99ae6)
1 /*	$OpenBSD: if.c,v 1.627 2021/02/08 12:30:10 bluhm Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "pfsync.h"
70 #include "ppp.h"
71 #include "switch.h"
72 #include "if_wg.h"
73 
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/mbuf.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/timeout.h>
80 #include <sys/protosw.h>
81 #include <sys/kernel.h>
82 #include <sys/ioctl.h>
83 #include <sys/domain.h>
84 #include <sys/task.h>
85 #include <sys/atomic.h>
86 #include <sys/percpu.h>
87 #include <sys/proc.h>
88 #include <sys/stdint.h>	/* uintptr_t */
89 #include <sys/rwlock.h>
90 
91 #include <net/if.h>
92 #include <net/if_dl.h>
93 #include <net/if_types.h>
94 #include <net/route.h>
95 #include <net/netisr.h>
96 
97 #include <netinet/in.h>
98 #include <netinet/if_ether.h>
99 #include <netinet/igmp.h>
100 #ifdef MROUTING
101 #include <netinet/ip_mroute.h>
102 #endif
103 
104 #ifdef INET6
105 #include <netinet6/in6_var.h>
106 #include <netinet6/in6_ifattach.h>
107 #include <netinet6/nd6.h>
108 #include <netinet/ip6.h>
109 #include <netinet6/ip6_var.h>
110 #endif
111 
112 #ifdef MPLS
113 #include <netmpls/mpls.h>
114 #endif
115 
116 #if NBPFILTER > 0
117 #include <net/bpf.h>
118 #endif
119 
120 #if NBRIDGE > 0
121 #include <net/if_bridge.h>
122 #endif
123 
124 #if NCARP > 0
125 #include <netinet/ip_carp.h>
126 #endif
127 
128 #if NPF > 0
129 #include <net/pfvar.h>
130 #endif
131 
132 #include <sys/device.h>
133 
134 void	if_attachsetup(struct ifnet *);
135 void	if_attachdomain(struct ifnet *);
136 void	if_attach_common(struct ifnet *);
137 void	if_remove(struct ifnet *);
138 int	if_createrdomain(int, struct ifnet *);
139 int	if_setrdomain(struct ifnet *, int);
140 void	if_slowtimo(void *);
141 
142 void	if_detached_qstart(struct ifqueue *);
143 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
144 
145 int	ifioctl_get(u_long, caddr_t);
146 int	ifconf(caddr_t);
147 static int
148 	if_sffpage_check(const caddr_t);
149 
150 int	if_getgroup(caddr_t, struct ifnet *);
151 int	if_getgroupmembers(caddr_t);
152 int	if_getgroupattribs(caddr_t);
153 int	if_setgroupattribs(caddr_t);
154 int	if_getgrouplist(caddr_t);
155 
156 void	if_linkstate(struct ifnet *);
157 void	if_linkstate_task(void *);
158 
159 int	if_clone_list(struct if_clonereq *);
160 struct if_clone	*if_clone_lookup(const char *, int *);
161 
162 int	if_group_egress_build(void);
163 
164 void	if_watchdog_task(void *);
165 
166 void	if_netisr(void *);
167 
168 #ifdef DDB
169 void	ifa_print_all(void);
170 #endif
171 
172 void	if_qstart_compat(struct ifqueue *);
173 
174 /*
175  * interface index map
176  *
177  * the kernel maintains a mapping of interface indexes to struct ifnet
178  * pointers.
179  *
180  * the map is an array of struct ifnet pointers prefixed by an if_map
181  * structure. the if_map structure stores the length of its array.
182  *
183  * as interfaces are attached to the system, the map is grown on demand
184  * up to USHRT_MAX entries.
185  *
186  * interface index 0 is reserved and represents no interface. this
187  * supports the use of the interface index as the scope for IPv6 link
188  * local addresses, where scope 0 means no scope has been specified.
189  * it also supports the use of interface index as the unique identifier
190  * for network interfaces in SNMP applications as per RFC2863. therefore
191  * if_get(0) returns NULL.
192  */
193 
194 void if_ifp_dtor(void *, void *);
195 void if_map_dtor(void *, void *);
196 struct ifnet *if_ref(struct ifnet *);
197 
198 /*
199  * struct if_map
200  *
201  * bounded array of ifnet srp pointers used to fetch references of live
202  * interfaces with if_get().
203  */
204 
205 struct if_map {
206 	unsigned long		 limit;
207 	/* followed by limit ifnet srp pointers */
208 };
209 
210 /*
211  * struct if_idxmap
212  *
213  * infrastructure to manage updates and accesses to the current if_map.
214  */
215 
216 struct if_idxmap {
217 	unsigned int		 serial;
218 	unsigned int		 count;
219 	struct srp		 map;
220 };
221 
222 void	if_idxmap_init(unsigned int);
223 void	if_idxmap_insert(struct ifnet *);
224 void	if_idxmap_remove(struct ifnet *);
225 
226 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
227 
228 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
229 int if_cloners_count;
230 
231 struct rwlock if_cloners_lock = RWLOCK_INITIALIZER("clonerlock");
232 
233 /* hooks should only be added, deleted, and run from a process context */
234 struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE);
235 void	if_hooks_run(struct task_list *);
236 
237 int	ifq_congestion;
238 
239 int		 netisr;
240 
241 #define	NET_TASKQ	1
242 struct taskq	*nettqmp[NET_TASKQ];
243 
244 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
245 
246 /*
247  * Serialize socket operations to ensure no new sleeping points
248  * are introduced in IP output paths.
249  */
250 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
251 
252 /*
253  * Network interface utility routines.
254  */
255 void
256 ifinit(void)
257 {
258 	unsigned int	i;
259 
260 	/*
261 	 * most machines boot with 4 or 5 interfaces, so size the initial map
262 	 * to accomodate this
263 	 */
264 	if_idxmap_init(8);
265 
266 	for (i = 0; i < NET_TASKQ; i++) {
267 		nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
268 		if (nettqmp[i] == NULL)
269 			panic("unable to create network taskq %d", i);
270 	}
271 }
272 
273 static struct if_idxmap if_idxmap = {
274 	0,
275 	0,
276 	SRP_INITIALIZER()
277 };
278 
279 struct srp_gc if_ifp_gc = SRP_GC_INITIALIZER(if_ifp_dtor, NULL);
280 struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
281 
282 struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
283 
284 void
285 if_idxmap_init(unsigned int limit)
286 {
287 	struct if_map *if_map;
288 	struct srp *map;
289 	unsigned int i;
290 
291 	if_idxmap.serial = 1; /* skip ifidx 0 so it can return NULL */
292 
293 	if_map = malloc(sizeof(*if_map) + limit * sizeof(*map),
294 	    M_IFADDR, M_WAITOK);
295 
296 	if_map->limit = limit;
297 	map = (struct srp *)(if_map + 1);
298 	for (i = 0; i < limit; i++)
299 		srp_init(&map[i]);
300 
301 	/* this is called early so there's nothing to race with */
302 	srp_update_locked(&if_map_gc, &if_idxmap.map, if_map);
303 }
304 
305 void
306 if_idxmap_insert(struct ifnet *ifp)
307 {
308 	struct if_map *if_map;
309 	struct srp *map;
310 	unsigned int index, i;
311 
312 	refcnt_init(&ifp->if_refcnt);
313 
314 	/* the kernel lock guarantees serialised modifications to if_idxmap */
315 	KERNEL_ASSERT_LOCKED();
316 
317 	if (++if_idxmap.count > USHRT_MAX)
318 		panic("too many interfaces");
319 
320 	if_map = srp_get_locked(&if_idxmap.map);
321 	map = (struct srp *)(if_map + 1);
322 
323 	index = if_idxmap.serial++ & USHRT_MAX;
324 
325 	if (index >= if_map->limit) {
326 		struct if_map *nif_map;
327 		struct srp *nmap;
328 		unsigned int nlimit;
329 		struct ifnet *nifp;
330 
331 		nlimit = if_map->limit * 2;
332 		nif_map = malloc(sizeof(*nif_map) + nlimit * sizeof(*nmap),
333 		    M_IFADDR, M_WAITOK);
334 		nmap = (struct srp *)(nif_map + 1);
335 
336 		nif_map->limit = nlimit;
337 		for (i = 0; i < if_map->limit; i++) {
338 			srp_init(&nmap[i]);
339 			nifp = srp_get_locked(&map[i]);
340 			if (nifp != NULL) {
341 				srp_update_locked(&if_ifp_gc, &nmap[i],
342 				    if_ref(nifp));
343 			}
344 		}
345 
346 		while (i < nlimit) {
347 			srp_init(&nmap[i]);
348 			i++;
349 		}
350 
351 		srp_update_locked(&if_map_gc, &if_idxmap.map, nif_map);
352 		if_map = nif_map;
353 		map = nmap;
354 	}
355 
356 	/* pick the next free index */
357 	for (i = 0; i < USHRT_MAX; i++) {
358 		if (index != 0 && srp_get_locked(&map[index]) == NULL)
359 			break;
360 
361 		index = if_idxmap.serial++ & USHRT_MAX;
362 	}
363 
364 	/* commit */
365 	ifp->if_index = index;
366 	srp_update_locked(&if_ifp_gc, &map[index], if_ref(ifp));
367 }
368 
369 void
370 if_idxmap_remove(struct ifnet *ifp)
371 {
372 	struct if_map *if_map;
373 	struct srp *map;
374 	unsigned int index;
375 
376 	index = ifp->if_index;
377 
378 	/* the kernel lock guarantees serialised modifications to if_idxmap */
379 	KERNEL_ASSERT_LOCKED();
380 
381 	if_map = srp_get_locked(&if_idxmap.map);
382 	KASSERT(index < if_map->limit);
383 
384 	map = (struct srp *)(if_map + 1);
385 	KASSERT(ifp == (struct ifnet *)srp_get_locked(&map[index]));
386 
387 	srp_update_locked(&if_ifp_gc, &map[index], NULL);
388 	if_idxmap.count--;
389 	/* end of if_idxmap modifications */
390 }
391 
392 void
393 if_ifp_dtor(void *null, void *ifp)
394 {
395 	if_put(ifp);
396 }
397 
398 void
399 if_map_dtor(void *null, void *m)
400 {
401 	struct if_map *if_map = m;
402 	struct srp *map = (struct srp *)(if_map + 1);
403 	unsigned int i;
404 
405 	/*
406 	 * dont need to serialize the use of update_locked since this is
407 	 * the last reference to this map. there's nothing to race against.
408 	 */
409 	for (i = 0; i < if_map->limit; i++)
410 		srp_update_locked(&if_ifp_gc, &map[i], NULL);
411 
412 	free(if_map, M_IFADDR, sizeof(*if_map) + if_map->limit * sizeof(*map));
413 }
414 
415 /*
416  * Attach an interface to the
417  * list of "active" interfaces.
418  */
419 void
420 if_attachsetup(struct ifnet *ifp)
421 {
422 	unsigned long ifidx;
423 
424 	NET_ASSERT_LOCKED();
425 
426 	TAILQ_INIT(&ifp->if_groups);
427 
428 	if_addgroup(ifp, IFG_ALL);
429 
430 	if_attachdomain(ifp);
431 #if NPF > 0
432 	pfi_attach_ifnet(ifp);
433 #endif
434 
435 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
436 	if_slowtimo(ifp);
437 
438 	if_idxmap_insert(ifp);
439 	KASSERT(if_get(0) == NULL);
440 
441 	ifidx = ifp->if_index;
442 
443 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
444 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
445 
446 	/* Announce the interface. */
447 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
448 }
449 
450 /*
451  * Allocate the link level name for the specified interface.  This
452  * is an attachment helper.  It must be called after ifp->if_addrlen
453  * is initialized, which may not be the case when if_attach() is
454  * called.
455  */
456 void
457 if_alloc_sadl(struct ifnet *ifp)
458 {
459 	unsigned int socksize;
460 	int namelen, masklen;
461 	struct sockaddr_dl *sdl;
462 
463 	/*
464 	 * If the interface already has a link name, release it
465 	 * now.  This is useful for interfaces that can change
466 	 * link types, and thus switch link names often.
467 	 */
468 	if_free_sadl(ifp);
469 
470 	namelen = strlen(ifp->if_xname);
471 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
472 	socksize = masklen + ifp->if_addrlen;
473 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
474 	if (socksize < sizeof(*sdl))
475 		socksize = sizeof(*sdl);
476 	socksize = ROUNDUP(socksize);
477 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
478 	sdl->sdl_len = socksize;
479 	sdl->sdl_family = AF_LINK;
480 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
481 	sdl->sdl_nlen = namelen;
482 	sdl->sdl_alen = ifp->if_addrlen;
483 	sdl->sdl_index = ifp->if_index;
484 	sdl->sdl_type = ifp->if_type;
485 	ifp->if_sadl = sdl;
486 }
487 
488 /*
489  * Free the link level name for the specified interface.  This is
490  * a detach helper.  This is called from if_detach() or from
491  * link layer type specific detach functions.
492  */
493 void
494 if_free_sadl(struct ifnet *ifp)
495 {
496 	if (ifp->if_sadl == NULL)
497 		return;
498 
499 	free(ifp->if_sadl, M_IFADDR, ifp->if_sadl->sdl_len);
500 	ifp->if_sadl = NULL;
501 }
502 
503 void
504 if_attachdomain(struct ifnet *ifp)
505 {
506 	struct domain *dp;
507 	int i, s;
508 
509 	s = splnet();
510 
511 	/* address family dependent data region */
512 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
513 	for (i = 0; (dp = domains[i]) != NULL; i++) {
514 		if (dp->dom_ifattach)
515 			ifp->if_afdata[dp->dom_family] =
516 			    (*dp->dom_ifattach)(ifp);
517 	}
518 
519 	splx(s);
520 }
521 
522 void
523 if_attachhead(struct ifnet *ifp)
524 {
525 	if_attach_common(ifp);
526 	NET_LOCK();
527 	TAILQ_INSERT_HEAD(&ifnet, ifp, if_list);
528 	if_attachsetup(ifp);
529 	NET_UNLOCK();
530 }
531 
532 void
533 if_attach(struct ifnet *ifp)
534 {
535 	if_attach_common(ifp);
536 	NET_LOCK();
537 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
538 	if_attachsetup(ifp);
539 	NET_UNLOCK();
540 }
541 
542 void
543 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
544 {
545 	struct ifqueue **map;
546 	struct ifqueue *ifq;
547 	int i;
548 
549 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
550 	KASSERT(nqs != 0);
551 
552 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
553 
554 	ifp->if_snd.ifq_softc = NULL;
555 	map[0] = &ifp->if_snd;
556 
557 	for (i = 1; i < nqs; i++) {
558 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
559 		ifq_set_maxlen(ifq, ifp->if_snd.ifq_maxlen);
560 		ifq_init(ifq, ifp, i);
561 		map[i] = ifq;
562 	}
563 
564 	ifp->if_ifqs = map;
565 	ifp->if_nifqs = nqs;
566 }
567 
568 void
569 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
570 {
571 	struct ifiqueue **map;
572 	struct ifiqueue *ifiq;
573 	unsigned int i;
574 
575 	KASSERT(niqs != 0);
576 
577 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
578 
579 	ifp->if_rcv.ifiq_softc = NULL;
580 	map[0] = &ifp->if_rcv;
581 
582 	for (i = 1; i < niqs; i++) {
583 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
584 		ifiq_init(ifiq, ifp, i);
585 		map[i] = ifiq;
586 	}
587 
588 	ifp->if_iqs = map;
589 	ifp->if_niqs = niqs;
590 }
591 
592 void
593 if_attach_common(struct ifnet *ifp)
594 {
595 	KASSERT(ifp->if_ioctl != NULL);
596 
597 	TAILQ_INIT(&ifp->if_addrlist);
598 	TAILQ_INIT(&ifp->if_maddrlist);
599 
600 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
601 		KASSERTMSG(ifp->if_qstart == NULL,
602 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
603 		ifp->if_qstart = if_qstart_compat;
604 	} else {
605 		KASSERTMSG(ifp->if_start == NULL,
606 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
607 		KASSERTMSG(ifp->if_qstart != NULL,
608 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
609 	}
610 
611 	ifq_init(&ifp->if_snd, ifp, 0);
612 
613 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
614 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
615 	ifp->if_nifqs = 1;
616 	if (ifp->if_txmit == 0)
617 		ifp->if_txmit = IF_TXMIT_DEFAULT;
618 
619 	ifiq_init(&ifp->if_rcv, ifp, 0);
620 
621 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
622 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
623 	ifp->if_niqs = 1;
624 
625 	TAILQ_INIT(&ifp->if_addrhooks);
626 	TAILQ_INIT(&ifp->if_linkstatehooks);
627 	TAILQ_INIT(&ifp->if_detachhooks);
628 
629 	if (ifp->if_rtrequest == NULL)
630 		ifp->if_rtrequest = if_rtrequest_dummy;
631 	if (ifp->if_enqueue == NULL)
632 		ifp->if_enqueue = if_enqueue_ifq;
633 	ifp->if_llprio = IFQ_DEFPRIO;
634 }
635 
636 void
637 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
638 {
639 	/*
640 	 * only switch the ifq_ops on the first ifq on an interface.
641 	 *
642 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
643 	 * works on a single ifq. because the code uses the ifq_ops
644 	 * on the first ifq (if_snd) to select a queue for an mbuf,
645 	 * by switching only the first one we change both the algorithm
646 	 * and force the routing of all new packets to it.
647 	 */
648 	ifq_attach(&ifp->if_snd, newops, args);
649 }
650 
651 void
652 if_start(struct ifnet *ifp)
653 {
654 	KASSERT(ifp->if_qstart == if_qstart_compat);
655 	if_qstart_compat(&ifp->if_snd);
656 }
657 void
658 if_qstart_compat(struct ifqueue *ifq)
659 {
660 	struct ifnet *ifp = ifq->ifq_if;
661 	int s;
662 
663 	/*
664 	 * the stack assumes that an interface can have multiple
665 	 * transmit rings, but a lot of drivers are still written
666 	 * so that interfaces and send rings have a 1:1 mapping.
667 	 * this provides compatability between the stack and the older
668 	 * drivers by translating from the only queue they have
669 	 * (ifp->if_snd) back to the interface and calling if_start.
670 	 */
671 
672 	KERNEL_LOCK();
673 	s = splnet();
674 	(*ifp->if_start)(ifp);
675 	splx(s);
676 	KERNEL_UNLOCK();
677 }
678 
679 int
680 if_enqueue(struct ifnet *ifp, struct mbuf *m)
681 {
682 	CLR(m->m_pkthdr.csum_flags, M_TIMESTAMP);
683 
684 #if NPF > 0
685 	if (m->m_pkthdr.pf.delay > 0)
686 		return (pf_delay_pkt(m, ifp->if_index));
687 #endif
688 
689 #if NBRIDGE > 0
690 	if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
691 		int error;
692 
693 		error = bridge_enqueue(ifp, m);
694 		return (error);
695 	}
696 #endif
697 
698 #if NPF > 0
699 	pf_pkt_addr_changed(m);
700 #endif	/* NPF > 0 */
701 
702 	return ((*ifp->if_enqueue)(ifp, m));
703 }
704 
705 int
706 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
707 {
708 	struct ifqueue *ifq = &ifp->if_snd;
709 	int error;
710 
711 	if (ifp->if_nifqs > 1) {
712 		unsigned int idx;
713 
714 		/*
715 		 * use the operations on the first ifq to pick which of
716 		 * the array gets this mbuf.
717 		 */
718 
719 		idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
720 		ifq = ifp->if_ifqs[idx];
721 	}
722 
723 	error = ifq_enqueue(ifq, m);
724 	if (error)
725 		return (error);
726 
727 	ifq_start(ifq);
728 
729 	return (0);
730 }
731 
732 void
733 if_input(struct ifnet *ifp, struct mbuf_list *ml)
734 {
735 	ifiq_input(&ifp->if_rcv, ml);
736 }
737 
738 int
739 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
740 {
741 	int keepflags;
742 
743 #if NBPFILTER > 0
744 	/*
745 	 * Only send packets to bpf if they are destinated to local
746 	 * addresses.
747 	 *
748 	 * if_input_local() is also called for SIMPLEX interfaces to
749 	 * duplicate packets for local use.  But don't dup them to bpf.
750 	 */
751 	if (ifp->if_flags & IFF_LOOPBACK) {
752 		caddr_t if_bpf = ifp->if_bpf;
753 
754 		if (if_bpf)
755 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
756 	}
757 #endif
758 	keepflags = m->m_flags & (M_BCAST|M_MCAST);
759 	m_resethdr(m);
760 	m->m_flags |= M_LOOP | keepflags;
761 	m->m_pkthdr.ph_ifidx = ifp->if_index;
762 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
763 
764 	ifp->if_opackets++;
765 	ifp->if_obytes += m->m_pkthdr.len;
766 
767 	ifp->if_ipackets++;
768 	ifp->if_ibytes += m->m_pkthdr.len;
769 
770 	switch (af) {
771 	case AF_INET:
772 		ipv4_input(ifp, m);
773 		break;
774 #ifdef INET6
775 	case AF_INET6:
776 		ipv6_input(ifp, m);
777 		break;
778 #endif /* INET6 */
779 #ifdef MPLS
780 	case AF_MPLS:
781 		mpls_input(ifp, m);
782 		break;
783 #endif /* MPLS */
784 	default:
785 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
786 		m_freem(m);
787 		return (EAFNOSUPPORT);
788 	}
789 
790 	return (0);
791 }
792 
793 int
794 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
795 {
796 	struct ifiqueue *ifiq;
797 	unsigned int flow = 0;
798 
799 	m->m_pkthdr.ph_family = af;
800 	m->m_pkthdr.ph_ifidx = ifp->if_index;
801 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
802 
803 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
804 		flow = m->m_pkthdr.ph_flowid;
805 
806 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
807 
808 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
809 }
810 
811 void
812 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
813 {
814 	struct mbuf *m;
815 
816 	if (ml_empty(ml))
817 		return;
818 
819 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
820 		enqueue_randomness(ml_len(ml) ^ (uintptr_t)MBUF_LIST_FIRST(ml));
821 
822 	/*
823 	 * We grab the NET_LOCK() before processing any packet to
824 	 * ensure there's no contention on the routing table lock.
825 	 *
826 	 * Without it we could race with a userland thread to insert
827 	 * a L2 entry in ip{6,}_output().  Such race would result in
828 	 * one of the threads sleeping *inside* the IP output path.
829 	 *
830 	 * Since we have a NET_LOCK() we also use it to serialize access
831 	 * to PF globals, pipex globals, unicast and multicast addresses
832 	 * lists and the socket layer.
833 	 */
834 	NET_LOCK();
835 	while ((m = ml_dequeue(ml)) != NULL)
836 		(*ifp->if_input)(ifp, m);
837 	NET_UNLOCK();
838 }
839 
840 void
841 if_vinput(struct ifnet *ifp, struct mbuf *m)
842 {
843 #if NBPFILTER > 0
844 	caddr_t if_bpf;
845 #endif
846 
847 	m->m_pkthdr.ph_ifidx = ifp->if_index;
848 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
849 
850 	counters_pkt(ifp->if_counters,
851 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
852 
853 #if NBPFILTER > 0
854 	if_bpf = ifp->if_bpf;
855 	if (if_bpf) {
856 		if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN)) {
857 			m_freem(m);
858 			return;
859 		}
860 	}
861 #endif
862 
863 	(*ifp->if_input)(ifp, m);
864 }
865 
866 void
867 if_netisr(void *unused)
868 {
869 	int n, t = 0;
870 
871 	NET_LOCK();
872 
873 	while ((n = netisr) != 0) {
874 		/* Like sched_pause() but with a rwlock dance. */
875 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
876 			NET_UNLOCK();
877 			yield();
878 			NET_LOCK();
879 		}
880 
881 		atomic_clearbits_int(&netisr, n);
882 
883 #if NETHER > 0
884 		if (n & (1 << NETISR_ARP)) {
885 			KERNEL_LOCK();
886 			arpintr();
887 			KERNEL_UNLOCK();
888 		}
889 #endif
890 #if NPPP > 0
891 		if (n & (1 << NETISR_PPP)) {
892 			KERNEL_LOCK();
893 			pppintr();
894 			KERNEL_UNLOCK();
895 		}
896 #endif
897 #if NBRIDGE > 0
898 		if (n & (1 << NETISR_BRIDGE))
899 			bridgeintr();
900 #endif
901 #if NSWITCH > 0
902 		if (n & (1 << NETISR_SWITCH)) {
903 			KERNEL_LOCK();
904 			switchintr();
905 			KERNEL_UNLOCK();
906 		}
907 #endif
908 		t |= n;
909 	}
910 
911 #if NPFSYNC > 0
912 	if (t & (1 << NETISR_PFSYNC)) {
913 		KERNEL_LOCK();
914 		pfsyncintr();
915 		KERNEL_UNLOCK();
916 	}
917 #endif
918 
919 	NET_UNLOCK();
920 }
921 
922 void
923 if_hooks_run(struct task_list *hooks)
924 {
925 	struct task *t, *nt;
926 	struct task cursor = { .t_func = NULL };
927 	void (*func)(void *);
928 	void *arg;
929 
930 	mtx_enter(&if_hooks_mtx);
931 	for (t = TAILQ_FIRST(hooks); t != NULL; t = nt) {
932 		if (t->t_func == NULL) { /* skip cursors */
933 			nt = TAILQ_NEXT(t, t_entry);
934 			continue;
935 		}
936 		func = t->t_func;
937 		arg = t->t_arg;
938 
939 		TAILQ_INSERT_AFTER(hooks, t, &cursor, t_entry);
940 		mtx_leave(&if_hooks_mtx);
941 
942 		(*func)(arg);
943 
944 		mtx_enter(&if_hooks_mtx);
945 		nt = TAILQ_NEXT(&cursor, t_entry); /* avoid _Q_INVALIDATE */
946 		TAILQ_REMOVE(hooks, &cursor, t_entry);
947 	}
948 	mtx_leave(&if_hooks_mtx);
949 }
950 
951 void
952 if_remove(struct ifnet *ifp)
953 {
954 	/* Remove the interface from the list of all interfaces. */
955 	NET_LOCK();
956 	TAILQ_REMOVE(&ifnet, ifp, if_list);
957 	NET_UNLOCK();
958 
959 	/* Remove the interface from the interface index map. */
960 	if_idxmap_remove(ifp);
961 
962 	/* Sleep until the last reference is released. */
963 	refcnt_finalize(&ifp->if_refcnt, "ifrm");
964 }
965 
966 void
967 if_deactivate(struct ifnet *ifp)
968 {
969 	/*
970 	 * Call detach hooks from head to tail.  To make sure detach
971 	 * hooks are executed in the reverse order they were added, all
972 	 * the hooks have to be added to the head!
973 	 */
974 
975 	NET_LOCK();
976 	if_hooks_run(&ifp->if_detachhooks);
977 	NET_UNLOCK();
978 }
979 
980 void
981 if_detachhook_add(struct ifnet *ifp, struct task *t)
982 {
983 	mtx_enter(&if_hooks_mtx);
984 	TAILQ_INSERT_HEAD(&ifp->if_detachhooks, t, t_entry);
985 	mtx_leave(&if_hooks_mtx);
986 }
987 
988 void
989 if_detachhook_del(struct ifnet *ifp, struct task *t)
990 {
991 	mtx_enter(&if_hooks_mtx);
992 	TAILQ_REMOVE(&ifp->if_detachhooks, t, t_entry);
993 	mtx_leave(&if_hooks_mtx);
994 }
995 
996 /*
997  * Detach an interface from everything in the kernel.  Also deallocate
998  * private resources.
999  */
1000 void
1001 if_detach(struct ifnet *ifp)
1002 {
1003 	struct ifaddr *ifa;
1004 	struct ifg_list *ifg;
1005 	struct domain *dp;
1006 	int i, s;
1007 
1008 	/* Undo pseudo-driver changes. */
1009 	if_deactivate(ifp);
1010 
1011 	/* Other CPUs must not have a reference before we start destroying. */
1012 	if_remove(ifp);
1013 
1014 	ifq_clr_oactive(&ifp->if_snd);
1015 
1016 #if NBPFILTER > 0
1017 	bpfdetach(ifp);
1018 #endif
1019 
1020 	NET_LOCK();
1021 	s = splnet();
1022 	ifp->if_qstart = if_detached_qstart;
1023 	ifp->if_ioctl = if_detached_ioctl;
1024 	ifp->if_watchdog = NULL;
1025 
1026 	/* Remove the watchdog timeout & task */
1027 	timeout_del(&ifp->if_slowtimo);
1028 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1029 
1030 	/* Remove the link state task */
1031 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1032 
1033 	rti_delete(ifp);
1034 #if NETHER > 0 && defined(NFSCLIENT)
1035 	if (ifp->if_index == revarp_ifidx)
1036 		revarp_ifidx = 0;
1037 #endif
1038 #ifdef MROUTING
1039 	vif_delete(ifp);
1040 #endif
1041 	in_ifdetach(ifp);
1042 #ifdef INET6
1043 	in6_ifdetach(ifp);
1044 #endif
1045 #if NPF > 0
1046 	pfi_detach_ifnet(ifp);
1047 #endif
1048 
1049 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1050 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1051 
1052 	if_free_sadl(ifp);
1053 
1054 	/* We should not have any address left at this point. */
1055 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1056 #ifdef DIAGNOSTIC
1057 		printf("%s: address list non empty\n", ifp->if_xname);
1058 #endif
1059 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1060 			ifa_del(ifp, ifa);
1061 			ifa->ifa_ifp = NULL;
1062 			ifafree(ifa);
1063 		}
1064 	}
1065 
1066 	KASSERT(TAILQ_EMPTY(&ifp->if_addrhooks));
1067 	KASSERT(TAILQ_EMPTY(&ifp->if_linkstatehooks));
1068 	KASSERT(TAILQ_EMPTY(&ifp->if_detachhooks));
1069 
1070 	for (i = 0; (dp = domains[i]) != NULL; i++) {
1071 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
1072 			(*dp->dom_ifdetach)(ifp,
1073 			    ifp->if_afdata[dp->dom_family]);
1074 	}
1075 
1076 	/* Announce that the interface is gone. */
1077 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1078 	splx(s);
1079 	NET_UNLOCK();
1080 
1081 	if (ifp->if_counters != NULL)
1082 		if_counters_free(ifp);
1083 
1084 	for (i = 0; i < ifp->if_nifqs; i++)
1085 		ifq_destroy(ifp->if_ifqs[i]);
1086 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1087 		for (i = 1; i < ifp->if_nifqs; i++) {
1088 			free(ifp->if_ifqs[i], M_DEVBUF,
1089 			    sizeof(struct ifqueue));
1090 		}
1091 		free(ifp->if_ifqs, M_DEVBUF,
1092 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1093 	}
1094 
1095 	for (i = 0; i < ifp->if_niqs; i++)
1096 		ifiq_destroy(ifp->if_iqs[i]);
1097 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1098 		for (i = 1; i < ifp->if_niqs; i++) {
1099 			free(ifp->if_iqs[i], M_DEVBUF,
1100 			    sizeof(struct ifiqueue));
1101 		}
1102 		free(ifp->if_iqs, M_DEVBUF,
1103 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1104 	}
1105 }
1106 
1107 /*
1108  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1109  */
1110 int
1111 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1112 {
1113 	struct ifnet *ifp;
1114 	int connected = 0;
1115 
1116 	ifp = if_get(ifidx);
1117 	if (ifp == NULL)
1118 		return (0);
1119 
1120 	if (ifp0->if_index == ifp->if_index)
1121 		connected = 1;
1122 
1123 #if NBRIDGE > 0
1124 	if (ifp0->if_bridgeidx != 0 && ifp0->if_bridgeidx == ifp->if_bridgeidx)
1125 		connected = 1;
1126 #endif
1127 #if NCARP > 0
1128 	if ((ifp0->if_type == IFT_CARP &&
1129 	    ifp0->if_carpdevidx == ifp->if_index) ||
1130 	    (ifp->if_type == IFT_CARP && ifp->if_carpdevidx == ifp0->if_index))
1131 		connected = 1;
1132 #endif
1133 
1134 	if_put(ifp);
1135 	return (connected);
1136 }
1137 
1138 /*
1139  * Create a clone network interface.
1140  */
1141 int
1142 if_clone_create(const char *name, int rdomain)
1143 {
1144 	struct if_clone *ifc;
1145 	struct ifnet *ifp;
1146 	int unit, ret;
1147 
1148 	ifc = if_clone_lookup(name, &unit);
1149 	if (ifc == NULL)
1150 		return (EINVAL);
1151 
1152 	rw_enter_write(&if_cloners_lock);
1153 
1154 	if ((ifp = if_unit(name)) != NULL) {
1155 		ret = EEXIST;
1156 		goto unlock;
1157 	}
1158 
1159 	ret = (*ifc->ifc_create)(ifc, unit);
1160 
1161 	if (ret != 0 || (ifp = if_unit(name)) == NULL)
1162 		goto unlock;
1163 
1164 	NET_LOCK();
1165 	if_addgroup(ifp, ifc->ifc_name);
1166 	if (rdomain != 0)
1167 		if_setrdomain(ifp, rdomain);
1168 	NET_UNLOCK();
1169 unlock:
1170 	rw_exit_write(&if_cloners_lock);
1171 	if_put(ifp);
1172 
1173 	return (ret);
1174 }
1175 
1176 /*
1177  * Destroy a clone network interface.
1178  */
1179 int
1180 if_clone_destroy(const char *name)
1181 {
1182 	struct if_clone *ifc;
1183 	struct ifnet *ifp;
1184 	int ret;
1185 
1186 	ifc = if_clone_lookup(name, NULL);
1187 	if (ifc == NULL)
1188 		return (EINVAL);
1189 
1190 	if (ifc->ifc_destroy == NULL)
1191 		return (EOPNOTSUPP);
1192 
1193 	rw_enter_write(&if_cloners_lock);
1194 
1195 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1196 		if (strcmp(ifp->if_xname, name) == 0)
1197 			break;
1198 	}
1199 	if (ifp == NULL) {
1200 		rw_exit_write(&if_cloners_lock);
1201 		return (ENXIO);
1202 	}
1203 
1204 	NET_LOCK();
1205 	if (ifp->if_flags & IFF_UP) {
1206 		int s;
1207 		s = splnet();
1208 		if_down(ifp);
1209 		splx(s);
1210 	}
1211 	NET_UNLOCK();
1212 	ret = (*ifc->ifc_destroy)(ifp);
1213 
1214 	rw_exit_write(&if_cloners_lock);
1215 
1216 	return (ret);
1217 }
1218 
1219 /*
1220  * Look up a network interface cloner.
1221  */
1222 struct if_clone *
1223 if_clone_lookup(const char *name, int *unitp)
1224 {
1225 	struct if_clone *ifc;
1226 	const char *cp;
1227 	int unit;
1228 
1229 	/* separate interface name from unit */
1230 	for (cp = name;
1231 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1232 	    cp++)
1233 		continue;
1234 
1235 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1236 		return (NULL);	/* No name or unit number */
1237 
1238 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1239 		return (NULL);	/* unit number 0 padded */
1240 
1241 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1242 		if (strlen(ifc->ifc_name) == cp - name &&
1243 		    !strncmp(name, ifc->ifc_name, cp - name))
1244 			break;
1245 	}
1246 
1247 	if (ifc == NULL)
1248 		return (NULL);
1249 
1250 	unit = 0;
1251 	while (cp - name < IFNAMSIZ && *cp) {
1252 		if (*cp < '0' || *cp > '9' ||
1253 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1254 			/* Bogus unit number. */
1255 			return (NULL);
1256 		}
1257 		unit = (unit * 10) + (*cp++ - '0');
1258 	}
1259 
1260 	if (unitp != NULL)
1261 		*unitp = unit;
1262 	return (ifc);
1263 }
1264 
1265 /*
1266  * Register a network interface cloner.
1267  */
1268 void
1269 if_clone_attach(struct if_clone *ifc)
1270 {
1271 	/*
1272 	 * we are called at kernel boot by main(), when pseudo devices are
1273 	 * being attached. The main() is the only guy which may alter the
1274 	 * if_cloners. While system is running and main() is done with
1275 	 * initialization, the if_cloners becomes immutable.
1276 	 */
1277 	KASSERT(pdevinit_done == 0);
1278 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1279 	if_cloners_count++;
1280 }
1281 
1282 /*
1283  * Provide list of interface cloners to userspace.
1284  */
1285 int
1286 if_clone_list(struct if_clonereq *ifcr)
1287 {
1288 	char outbuf[IFNAMSIZ], *dst;
1289 	struct if_clone *ifc;
1290 	int count, error = 0;
1291 
1292 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1293 		/* Just asking how many there are. */
1294 		ifcr->ifcr_total = if_cloners_count;
1295 		return (0);
1296 	}
1297 
1298 	if (ifcr->ifcr_count < 0)
1299 		return (EINVAL);
1300 
1301 	ifcr->ifcr_total = if_cloners_count;
1302 	count = MIN(if_cloners_count, ifcr->ifcr_count);
1303 
1304 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1305 		if (count == 0)
1306 			break;
1307 		bzero(outbuf, sizeof outbuf);
1308 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1309 		error = copyout(outbuf, dst, IFNAMSIZ);
1310 		if (error)
1311 			break;
1312 		count--;
1313 		dst += IFNAMSIZ;
1314 	}
1315 
1316 	return (error);
1317 }
1318 
1319 /*
1320  * set queue congestion marker
1321  */
1322 void
1323 if_congestion(void)
1324 {
1325 	extern int ticks;
1326 
1327 	ifq_congestion = ticks;
1328 }
1329 
1330 int
1331 if_congested(void)
1332 {
1333 	extern int ticks;
1334 	int diff;
1335 
1336 	diff = ticks - ifq_congestion;
1337 	if (diff < 0) {
1338 		ifq_congestion = ticks - hz;
1339 		return (0);
1340 	}
1341 
1342 	return (diff <= (hz / 100));
1343 }
1344 
1345 #define	equal(a1, a2)	\
1346 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1347 	(a1)->sa_len) == 0)
1348 
1349 /*
1350  * Locate an interface based on a complete address.
1351  */
1352 struct ifaddr *
1353 ifa_ifwithaddr(struct sockaddr *addr, u_int rtableid)
1354 {
1355 	struct ifnet *ifp;
1356 	struct ifaddr *ifa;
1357 	u_int rdomain;
1358 
1359 	rdomain = rtable_l2(rtableid);
1360 	KERNEL_LOCK();
1361 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1362 		if (ifp->if_rdomain != rdomain)
1363 			continue;
1364 
1365 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1366 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1367 				continue;
1368 
1369 			if (equal(addr, ifa->ifa_addr)) {
1370 				KERNEL_UNLOCK();
1371 				return (ifa);
1372 			}
1373 		}
1374 	}
1375 	KERNEL_UNLOCK();
1376 	return (NULL);
1377 }
1378 
1379 /*
1380  * Locate the point to point interface with a given destination address.
1381  */
1382 struct ifaddr *
1383 ifa_ifwithdstaddr(struct sockaddr *addr, u_int rdomain)
1384 {
1385 	struct ifnet *ifp;
1386 	struct ifaddr *ifa;
1387 
1388 	rdomain = rtable_l2(rdomain);
1389 	KERNEL_LOCK();
1390 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1391 		if (ifp->if_rdomain != rdomain)
1392 			continue;
1393 		if (ifp->if_flags & IFF_POINTOPOINT) {
1394 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1395 				if (ifa->ifa_addr->sa_family !=
1396 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1397 					continue;
1398 				if (equal(addr, ifa->ifa_dstaddr)) {
1399 					KERNEL_UNLOCK();
1400 					return (ifa);
1401 				}
1402 			}
1403 		}
1404 	}
1405 	KERNEL_UNLOCK();
1406 	return (NULL);
1407 }
1408 
1409 /*
1410  * Find an interface address specific to an interface best matching
1411  * a given address.
1412  */
1413 struct ifaddr *
1414 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1415 {
1416 	struct ifaddr *ifa;
1417 	char *cp, *cp2, *cp3;
1418 	char *cplim;
1419 	struct ifaddr *ifa_maybe = NULL;
1420 	u_int af = addr->sa_family;
1421 
1422 	if (af >= AF_MAX)
1423 		return (NULL);
1424 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1425 		if (ifa->ifa_addr->sa_family != af)
1426 			continue;
1427 		if (ifa_maybe == NULL)
1428 			ifa_maybe = ifa;
1429 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1430 			if (equal(addr, ifa->ifa_addr) ||
1431 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1432 				return (ifa);
1433 			continue;
1434 		}
1435 		cp = addr->sa_data;
1436 		cp2 = ifa->ifa_addr->sa_data;
1437 		cp3 = ifa->ifa_netmask->sa_data;
1438 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1439 		for (; cp3 < cplim; cp3++)
1440 			if ((*cp++ ^ *cp2++) & *cp3)
1441 				break;
1442 		if (cp3 == cplim)
1443 			return (ifa);
1444 	}
1445 	return (ifa_maybe);
1446 }
1447 
1448 void
1449 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1450 {
1451 }
1452 
1453 /*
1454  * Default action when installing a local route on a point-to-point
1455  * interface.
1456  */
1457 void
1458 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1459 {
1460 	struct ifnet *lo0ifp;
1461 	struct ifaddr *ifa, *lo0ifa;
1462 
1463 	switch (req) {
1464 	case RTM_ADD:
1465 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1466 			break;
1467 
1468 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1469 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1470 			    rt_key(rt)->sa_len) == 0)
1471 				break;
1472 		}
1473 
1474 		if (ifa == NULL)
1475 			break;
1476 
1477 		KASSERT(ifa == rt->rt_ifa);
1478 
1479 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1480 		KASSERT(lo0ifp != NULL);
1481 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1482 			if (lo0ifa->ifa_addr->sa_family ==
1483 			    ifa->ifa_addr->sa_family)
1484 				break;
1485 		}
1486 		if_put(lo0ifp);
1487 
1488 		if (lo0ifa == NULL)
1489 			break;
1490 
1491 		rt->rt_flags &= ~RTF_LLINFO;
1492 		break;
1493 	case RTM_DELETE:
1494 	case RTM_RESOLVE:
1495 	default:
1496 		break;
1497 	}
1498 }
1499 
1500 
1501 /*
1502  * Bring down all interfaces
1503  */
1504 void
1505 if_downall(void)
1506 {
1507 	struct ifreq ifrq;	/* XXX only partly built */
1508 	struct ifnet *ifp;
1509 
1510 	NET_LOCK();
1511 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1512 		if ((ifp->if_flags & IFF_UP) == 0)
1513 			continue;
1514 		if_down(ifp);
1515 		ifrq.ifr_flags = ifp->if_flags;
1516 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1517 	}
1518 	NET_UNLOCK();
1519 }
1520 
1521 /*
1522  * Mark an interface down and notify protocols of
1523  * the transition.
1524  */
1525 void
1526 if_down(struct ifnet *ifp)
1527 {
1528 	NET_ASSERT_LOCKED();
1529 
1530 	ifp->if_flags &= ~IFF_UP;
1531 	getmicrotime(&ifp->if_lastchange);
1532 	ifq_purge(&ifp->if_snd);
1533 
1534 	if_linkstate(ifp);
1535 }
1536 
1537 /*
1538  * Mark an interface up and notify protocols of
1539  * the transition.
1540  */
1541 void
1542 if_up(struct ifnet *ifp)
1543 {
1544 	NET_ASSERT_LOCKED();
1545 
1546 	ifp->if_flags |= IFF_UP;
1547 	getmicrotime(&ifp->if_lastchange);
1548 
1549 #ifdef INET6
1550 	/* Userland expects the kernel to set ::1 on default lo(4). */
1551 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1552 		in6_ifattach(ifp);
1553 #endif
1554 
1555 	if_linkstate(ifp);
1556 }
1557 
1558 /*
1559  * Notify userland, the routing table and hooks owner of
1560  * a link-state transition.
1561  */
1562 void
1563 if_linkstate_task(void *xifidx)
1564 {
1565 	unsigned int ifidx = (unsigned long)xifidx;
1566 	struct ifnet *ifp;
1567 
1568 	KERNEL_LOCK();
1569 	NET_LOCK();
1570 
1571 	ifp = if_get(ifidx);
1572 	if (ifp != NULL)
1573 		if_linkstate(ifp);
1574 	if_put(ifp);
1575 
1576 	NET_UNLOCK();
1577 	KERNEL_UNLOCK();
1578 }
1579 
1580 void
1581 if_linkstate(struct ifnet *ifp)
1582 {
1583 	NET_ASSERT_LOCKED();
1584 
1585 	rtm_ifchg(ifp);
1586 	rt_if_track(ifp);
1587 
1588 	if_hooks_run(&ifp->if_linkstatehooks);
1589 }
1590 
1591 void
1592 if_linkstatehook_add(struct ifnet *ifp, struct task *t)
1593 {
1594 	mtx_enter(&if_hooks_mtx);
1595 	TAILQ_INSERT_HEAD(&ifp->if_linkstatehooks, t, t_entry);
1596 	mtx_leave(&if_hooks_mtx);
1597 }
1598 
1599 void
1600 if_linkstatehook_del(struct ifnet *ifp, struct task *t)
1601 {
1602 	mtx_enter(&if_hooks_mtx);
1603 	TAILQ_REMOVE(&ifp->if_linkstatehooks, t, t_entry);
1604 	mtx_leave(&if_hooks_mtx);
1605 }
1606 
1607 /*
1608  * Schedule a link state change task.
1609  */
1610 void
1611 if_link_state_change(struct ifnet *ifp)
1612 {
1613 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1614 }
1615 
1616 /*
1617  * Handle interface watchdog timer routine.  Called
1618  * from softclock, we decrement timer (if set) and
1619  * call the appropriate interface routine on expiration.
1620  */
1621 void
1622 if_slowtimo(void *arg)
1623 {
1624 	struct ifnet *ifp = arg;
1625 	int s = splnet();
1626 
1627 	if (ifp->if_watchdog) {
1628 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1629 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1630 		timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO);
1631 	}
1632 	splx(s);
1633 }
1634 
1635 void
1636 if_watchdog_task(void *xifidx)
1637 {
1638 	unsigned int ifidx = (unsigned long)xifidx;
1639 	struct ifnet *ifp;
1640 	int s;
1641 
1642 	ifp = if_get(ifidx);
1643 	if (ifp == NULL)
1644 		return;
1645 
1646 	KERNEL_LOCK();
1647 	s = splnet();
1648 	if (ifp->if_watchdog)
1649 		(*ifp->if_watchdog)(ifp);
1650 	splx(s);
1651 	KERNEL_UNLOCK();
1652 
1653 	if_put(ifp);
1654 }
1655 
1656 /*
1657  * Map interface name to interface structure pointer.
1658  */
1659 struct ifnet *
1660 if_unit(const char *name)
1661 {
1662 	struct ifnet *ifp;
1663 
1664 	KERNEL_ASSERT_LOCKED();
1665 
1666 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1667 		if (strcmp(ifp->if_xname, name) == 0) {
1668 			if_ref(ifp);
1669 			return (ifp);
1670 		}
1671 	}
1672 
1673 	return (NULL);
1674 }
1675 
1676 /*
1677  * Map interface index to interface structure pointer.
1678  */
1679 struct ifnet *
1680 if_get(unsigned int index)
1681 {
1682 	struct srp_ref sr;
1683 	struct if_map *if_map;
1684 	struct srp *map;
1685 	struct ifnet *ifp = NULL;
1686 
1687 	if_map = srp_enter(&sr, &if_idxmap.map);
1688 	if (index < if_map->limit) {
1689 		map = (struct srp *)(if_map + 1);
1690 
1691 		ifp = srp_follow(&sr, &map[index]);
1692 		if (ifp != NULL) {
1693 			KASSERT(ifp->if_index == index);
1694 			if_ref(ifp);
1695 		}
1696 	}
1697 	srp_leave(&sr);
1698 
1699 	return (ifp);
1700 }
1701 
1702 struct ifnet *
1703 if_ref(struct ifnet *ifp)
1704 {
1705 	refcnt_take(&ifp->if_refcnt);
1706 
1707 	return (ifp);
1708 }
1709 
1710 void
1711 if_put(struct ifnet *ifp)
1712 {
1713 	if (ifp == NULL)
1714 		return;
1715 
1716 	refcnt_rele_wake(&ifp->if_refcnt);
1717 }
1718 
1719 int
1720 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1721 {
1722 	if (ifp->if_sadl == NULL)
1723 		return (EINVAL);
1724 
1725 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1726 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1727 
1728 	return (0);
1729 }
1730 
1731 int
1732 if_createrdomain(int rdomain, struct ifnet *ifp)
1733 {
1734 	int error;
1735 	struct ifnet *loifp;
1736 	char loifname[IFNAMSIZ];
1737 	unsigned int unit = rdomain;
1738 
1739 	if (!rtable_exists(rdomain) && (error = rtable_add(rdomain)) != 0)
1740 		return (error);
1741 	if (!rtable_empty(rdomain))
1742 		return (EEXIST);
1743 
1744 	/* Create rdomain including its loopback if with unit == rdomain */
1745 	snprintf(loifname, sizeof(loifname), "lo%u", unit);
1746 	error = if_clone_create(loifname, 0);
1747 	if ((loifp = if_unit(loifname)) == NULL)
1748 		return (ENXIO);
1749 	if (error && (ifp != loifp || error != EEXIST)) {
1750 		if_put(loifp);
1751 		return (error);
1752 	}
1753 
1754 	rtable_l2set(rdomain, rdomain, loifp->if_index);
1755 	loifp->if_rdomain = rdomain;
1756 	if_put(loifp);
1757 
1758 	return (0);
1759 }
1760 
1761 int
1762 if_setrdomain(struct ifnet *ifp, int rdomain)
1763 {
1764 	struct ifreq ifr;
1765 	int error, up = 0, s;
1766 
1767 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1768 		return (EINVAL);
1769 
1770 	if (rdomain != ifp->if_rdomain &&
1771 	    (ifp->if_flags & IFF_LOOPBACK) &&
1772 	    (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1773 		return (EPERM);
1774 
1775 	if (!rtable_exists(rdomain))
1776 		return (ESRCH);
1777 
1778 	/* make sure that the routing table is a real rdomain */
1779 	if (rdomain != rtable_l2(rdomain))
1780 		return (EINVAL);
1781 
1782 	if (rdomain != ifp->if_rdomain) {
1783 		s = splnet();
1784 		/*
1785 		 * We are tearing down the world.
1786 		 * Take down the IF so:
1787 		 * 1. everything that cares gets a message
1788 		 * 2. the automagic IPv6 bits are recreated
1789 		 */
1790 		if (ifp->if_flags & IFF_UP) {
1791 			up = 1;
1792 			if_down(ifp);
1793 		}
1794 		rti_delete(ifp);
1795 #ifdef MROUTING
1796 		vif_delete(ifp);
1797 #endif
1798 		in_ifdetach(ifp);
1799 #ifdef INET6
1800 		in6_ifdetach(ifp);
1801 #endif
1802 		splx(s);
1803 	}
1804 
1805 	/* Let devices like enc(4) or mpe(4) know about the change */
1806 	ifr.ifr_rdomainid = rdomain;
1807 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
1808 	    (caddr_t)&ifr)) != ENOTTY)
1809 		return (error);
1810 	error = 0;
1811 
1812 	/* Add interface to the specified rdomain */
1813 	ifp->if_rdomain = rdomain;
1814 
1815 	/* If we took down the IF, bring it back */
1816 	if (up) {
1817 		s = splnet();
1818 		if_up(ifp);
1819 		splx(s);
1820 	}
1821 
1822 	return (0);
1823 }
1824 
1825 /*
1826  * Interface ioctls.
1827  */
1828 int
1829 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1830 {
1831 	struct ifnet *ifp;
1832 	struct ifreq *ifr = (struct ifreq *)data;
1833 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
1834 	struct if_afreq *ifar = (struct if_afreq *)data;
1835 	char ifdescrbuf[IFDESCRSIZE];
1836 	char ifrtlabelbuf[RTLABEL_LEN];
1837 	int s, error = 0, oif_xflags;
1838 	size_t bytesdone;
1839 	unsigned short oif_flags;
1840 
1841 	switch (cmd) {
1842 	case SIOCIFCREATE:
1843 		if ((error = suser(p)) != 0)
1844 			return (error);
1845 		error = if_clone_create(ifr->ifr_name, 0);
1846 		return (error);
1847 	case SIOCIFDESTROY:
1848 		if ((error = suser(p)) != 0)
1849 			return (error);
1850 		error = if_clone_destroy(ifr->ifr_name);
1851 		return (error);
1852 	case SIOCSIFGATTR:
1853 		if ((error = suser(p)) != 0)
1854 			return (error);
1855 		NET_LOCK();
1856 		error = if_setgroupattribs(data);
1857 		NET_UNLOCK();
1858 		return (error);
1859 	case SIOCGIFCONF:
1860 	case SIOCIFGCLONERS:
1861 	case SIOCGIFGMEMB:
1862 	case SIOCGIFGATTR:
1863 	case SIOCGIFGLIST:
1864 	case SIOCGIFFLAGS:
1865 	case SIOCGIFXFLAGS:
1866 	case SIOCGIFMETRIC:
1867 	case SIOCGIFMTU:
1868 	case SIOCGIFHARDMTU:
1869 	case SIOCGIFDATA:
1870 	case SIOCGIFDESCR:
1871 	case SIOCGIFRTLABEL:
1872 	case SIOCGIFPRIORITY:
1873 	case SIOCGIFRDOMAIN:
1874 	case SIOCGIFGROUP:
1875 	case SIOCGIFLLPRIO:
1876 		return (ifioctl_get(cmd, data));
1877 	}
1878 
1879 	ifp = if_unit(ifr->ifr_name);
1880 	if (ifp == NULL)
1881 		return (ENXIO);
1882 	oif_flags = ifp->if_flags;
1883 	oif_xflags = ifp->if_xflags;
1884 
1885 	switch (cmd) {
1886 	case SIOCIFAFATTACH:
1887 	case SIOCIFAFDETACH:
1888 		if ((error = suser(p)) != 0)
1889 			break;
1890 		NET_LOCK();
1891 		switch (ifar->ifar_af) {
1892 		case AF_INET:
1893 			/* attach is a noop for AF_INET */
1894 			if (cmd == SIOCIFAFDETACH)
1895 				in_ifdetach(ifp);
1896 			break;
1897 #ifdef INET6
1898 		case AF_INET6:
1899 			if (cmd == SIOCIFAFATTACH)
1900 				error = in6_ifattach(ifp);
1901 			else
1902 				in6_ifdetach(ifp);
1903 			break;
1904 #endif /* INET6 */
1905 		default:
1906 			error = EAFNOSUPPORT;
1907 		}
1908 		NET_UNLOCK();
1909 		break;
1910 
1911 	case SIOCSIFFLAGS:
1912 		if ((error = suser(p)) != 0)
1913 			break;
1914 
1915 		NET_LOCK();
1916 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1917 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
1918 
1919 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1920 		if (error != 0) {
1921 			ifp->if_flags = oif_flags;
1922 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
1923 			s = splnet();
1924 			if (ISSET(ifp->if_flags, IFF_UP))
1925 				if_up(ifp);
1926 			else
1927 				if_down(ifp);
1928 			splx(s);
1929 		}
1930 		NET_UNLOCK();
1931 		break;
1932 
1933 	case SIOCSIFXFLAGS:
1934 		if ((error = suser(p)) != 0)
1935 			break;
1936 
1937 		NET_LOCK();
1938 #ifdef INET6
1939 		if (ISSET(ifr->ifr_flags, IFXF_AUTOCONF6)) {
1940 			error = in6_ifattach(ifp);
1941 			if (error != 0) {
1942 				NET_UNLOCK();
1943 				break;
1944 			}
1945 		}
1946 
1947 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1948 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
1949 			ifp->if_xflags |= IFXF_INET6_NOSOII;
1950 
1951 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1952 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
1953 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
1954 
1955 #endif	/* INET6 */
1956 
1957 #ifdef MPLS
1958 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
1959 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
1960 			s = splnet();
1961 			ifp->if_xflags |= IFXF_MPLS;
1962 			ifp->if_ll_output = ifp->if_output;
1963 			ifp->if_output = mpls_output;
1964 			splx(s);
1965 		}
1966 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
1967 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
1968 			s = splnet();
1969 			ifp->if_xflags &= ~IFXF_MPLS;
1970 			ifp->if_output = ifp->if_ll_output;
1971 			ifp->if_ll_output = NULL;
1972 			splx(s);
1973 		}
1974 #endif	/* MPLS */
1975 
1976 #ifndef SMALL_KERNEL
1977 		if (ifp->if_capabilities & IFCAP_WOL) {
1978 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
1979 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
1980 				s = splnet();
1981 				ifp->if_xflags |= IFXF_WOL;
1982 				error = ifp->if_wol(ifp, 1);
1983 				splx(s);
1984 			}
1985 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
1986 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
1987 				s = splnet();
1988 				ifp->if_xflags &= ~IFXF_WOL;
1989 				error = ifp->if_wol(ifp, 0);
1990 				splx(s);
1991 			}
1992 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
1993 			ifr->ifr_flags &= ~IFXF_WOL;
1994 			error = ENOTSUP;
1995 		}
1996 #endif
1997 
1998 		if (error == 0)
1999 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
2000 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
2001 		NET_UNLOCK();
2002 		break;
2003 
2004 	case SIOCSIFMETRIC:
2005 		if ((error = suser(p)) != 0)
2006 			break;
2007 		NET_LOCK();
2008 		ifp->if_metric = ifr->ifr_metric;
2009 		NET_UNLOCK();
2010 		break;
2011 
2012 	case SIOCSIFMTU:
2013 		if ((error = suser(p)) != 0)
2014 			break;
2015 		NET_LOCK();
2016 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2017 		NET_UNLOCK();
2018 		if (!error)
2019 			rtm_ifchg(ifp);
2020 		break;
2021 
2022 	case SIOCSIFDESCR:
2023 		if ((error = suser(p)) != 0)
2024 			break;
2025 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2026 		    IFDESCRSIZE, &bytesdone);
2027 		if (error == 0) {
2028 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2029 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2030 		}
2031 		break;
2032 
2033 	case SIOCSIFRTLABEL:
2034 		if ((error = suser(p)) != 0)
2035 			break;
2036 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2037 		    RTLABEL_LEN, &bytesdone);
2038 		if (error == 0) {
2039 			rtlabel_unref(ifp->if_rtlabelid);
2040 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2041 		}
2042 		break;
2043 
2044 	case SIOCSIFPRIORITY:
2045 		if ((error = suser(p)) != 0)
2046 			break;
2047 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2048 			error = EINVAL;
2049 			break;
2050 		}
2051 		ifp->if_priority = ifr->ifr_metric;
2052 		break;
2053 
2054 	case SIOCSIFRDOMAIN:
2055 		if ((error = suser(p)) != 0)
2056 			break;
2057 		error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2058 		if (!error || error == EEXIST) {
2059 			NET_LOCK();
2060 			error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2061 			NET_UNLOCK();
2062 		}
2063 		break;
2064 
2065 	case SIOCAIFGROUP:
2066 		if ((error = suser(p)))
2067 			break;
2068 		NET_LOCK();
2069 		error = if_addgroup(ifp, ifgr->ifgr_group);
2070 		if (error == 0) {
2071 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2072 			if (error == ENOTTY)
2073 				error = 0;
2074 		}
2075 		NET_UNLOCK();
2076 		break;
2077 
2078 	case SIOCDIFGROUP:
2079 		if ((error = suser(p)))
2080 			break;
2081 		NET_LOCK();
2082 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2083 		if (error == ENOTTY)
2084 			error = 0;
2085 		if (error == 0)
2086 			error = if_delgroup(ifp, ifgr->ifgr_group);
2087 		NET_UNLOCK();
2088 		break;
2089 
2090 	case SIOCSIFLLADDR:
2091 		if ((error = suser(p)))
2092 			break;
2093 		if ((ifp->if_sadl == NULL) ||
2094 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2095 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2096 			error = EINVAL;
2097 			break;
2098 		}
2099 		NET_LOCK();
2100 		switch (ifp->if_type) {
2101 		case IFT_ETHER:
2102 		case IFT_CARP:
2103 		case IFT_XETHER:
2104 		case IFT_ISO88025:
2105 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2106 			if (error == ENOTTY)
2107 				error = 0;
2108 			if (error == 0)
2109 				error = if_setlladdr(ifp,
2110 				    ifr->ifr_addr.sa_data);
2111 			break;
2112 		default:
2113 			error = ENODEV;
2114 		}
2115 
2116 		if (error == 0)
2117 			ifnewlladdr(ifp);
2118 		NET_UNLOCK();
2119 		break;
2120 
2121 	case SIOCSIFLLPRIO:
2122 		if ((error = suser(p)))
2123 			break;
2124 		if (ifr->ifr_llprio < IFQ_MINPRIO ||
2125 		    ifr->ifr_llprio > IFQ_MAXPRIO) {
2126 			error = EINVAL;
2127 			break;
2128 		}
2129 		NET_LOCK();
2130 		ifp->if_llprio = ifr->ifr_llprio;
2131 		NET_UNLOCK();
2132 		break;
2133 
2134 	case SIOCGIFSFFPAGE:
2135 		error = suser(p);
2136 		if (error != 0)
2137 			break;
2138 
2139 		error = if_sffpage_check(data);
2140 		if (error != 0)
2141 			break;
2142 
2143 		/* don't take NET_LOCK because i2c reads take a long time */
2144 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2145 		break;
2146 
2147 	case SIOCSETKALIVE:
2148 	case SIOCDIFPHYADDR:
2149 	case SIOCSLIFPHYADDR:
2150 	case SIOCSLIFPHYRTABLE:
2151 	case SIOCSLIFPHYTTL:
2152 	case SIOCSLIFPHYDF:
2153 	case SIOCSLIFPHYECN:
2154 	case SIOCADDMULTI:
2155 	case SIOCDELMULTI:
2156 	case SIOCSIFMEDIA:
2157 	case SIOCSVNETID:
2158 	case SIOCDVNETID:
2159 	case SIOCSVNETFLOWID:
2160 	case SIOCSTXHPRIO:
2161 	case SIOCSRXHPRIO:
2162 	case SIOCSIFPAIR:
2163 	case SIOCSIFPARENT:
2164 	case SIOCDIFPARENT:
2165 	case SIOCSETMPWCFG:
2166 	case SIOCSETLABEL:
2167 	case SIOCDELLABEL:
2168 	case SIOCSPWE3CTRLWORD:
2169 	case SIOCSPWE3FAT:
2170 	case SIOCSPWE3NEIGHBOR:
2171 	case SIOCDPWE3NEIGHBOR:
2172 #if NBRIDGE > 0
2173 	case SIOCBRDGADD:
2174 	case SIOCBRDGDEL:
2175 	case SIOCBRDGSIFFLGS:
2176 	case SIOCBRDGSCACHE:
2177 	case SIOCBRDGADDS:
2178 	case SIOCBRDGDELS:
2179 	case SIOCBRDGSADDR:
2180 	case SIOCBRDGSTO:
2181 	case SIOCBRDGDADDR:
2182 	case SIOCBRDGFLUSH:
2183 	case SIOCBRDGADDL:
2184 	case SIOCBRDGSIFPROT:
2185 	case SIOCBRDGARL:
2186 	case SIOCBRDGFRL:
2187 	case SIOCBRDGSPRI:
2188 	case SIOCBRDGSHT:
2189 	case SIOCBRDGSFD:
2190 	case SIOCBRDGSMA:
2191 	case SIOCBRDGSIFPRIO:
2192 	case SIOCBRDGSIFCOST:
2193 	case SIOCBRDGSTXHC:
2194 	case SIOCBRDGSPROTO:
2195 	case SIOCSWSPORTNO:
2196 #endif
2197 		if ((error = suser(p)) != 0)
2198 			break;
2199 		/* FALLTHROUGH */
2200 	default:
2201 		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
2202 			(struct mbuf *) cmd, (struct mbuf *) data,
2203 			(struct mbuf *) ifp, p));
2204 		if (error != EOPNOTSUPP)
2205 			break;
2206 		switch (cmd) {
2207 		case SIOCAIFADDR:
2208 		case SIOCDIFADDR:
2209 		case SIOCSIFADDR:
2210 		case SIOCSIFNETMASK:
2211 		case SIOCSIFDSTADDR:
2212 		case SIOCSIFBRDADDR:
2213 #ifdef INET6
2214 		case SIOCAIFADDR_IN6:
2215 		case SIOCDIFADDR_IN6:
2216 #endif
2217 			error = suser(p);
2218 			break;
2219 		default:
2220 			error = 0;
2221 			break;
2222 		}
2223 		if (error)
2224 			break;
2225 		NET_LOCK();
2226 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2227 		NET_UNLOCK();
2228 		break;
2229 	}
2230 
2231 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags)
2232 		rtm_ifchg(ifp);
2233 
2234 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2235 		getmicrotime(&ifp->if_lastchange);
2236 
2237 	if_put(ifp);
2238 
2239 	return (error);
2240 }
2241 
2242 int
2243 ifioctl_get(u_long cmd, caddr_t data)
2244 {
2245 	struct ifnet *ifp;
2246 	struct ifreq *ifr = (struct ifreq *)data;
2247 	char ifdescrbuf[IFDESCRSIZE];
2248 	char ifrtlabelbuf[RTLABEL_LEN];
2249 	int error = 0;
2250 	size_t bytesdone;
2251 	const char *label;
2252 
2253 	switch(cmd) {
2254 	case SIOCGIFCONF:
2255 		NET_RLOCK_IN_IOCTL();
2256 		error = ifconf(data);
2257 		NET_RUNLOCK_IN_IOCTL();
2258 		return (error);
2259 	case SIOCIFGCLONERS:
2260 		error = if_clone_list((struct if_clonereq *)data);
2261 		return (error);
2262 	case SIOCGIFGMEMB:
2263 		NET_RLOCK_IN_IOCTL();
2264 		error = if_getgroupmembers(data);
2265 		NET_RUNLOCK_IN_IOCTL();
2266 		return (error);
2267 	case SIOCGIFGATTR:
2268 		NET_RLOCK_IN_IOCTL();
2269 		error = if_getgroupattribs(data);
2270 		NET_RUNLOCK_IN_IOCTL();
2271 		return (error);
2272 	case SIOCGIFGLIST:
2273 		NET_RLOCK_IN_IOCTL();
2274 		error = if_getgrouplist(data);
2275 		NET_RUNLOCK_IN_IOCTL();
2276 		return (error);
2277 	}
2278 
2279 	ifp = if_unit(ifr->ifr_name);
2280 	if (ifp == NULL)
2281 		return (ENXIO);
2282 
2283 	NET_RLOCK_IN_IOCTL();
2284 
2285 	switch(cmd) {
2286 	case SIOCGIFFLAGS:
2287 		ifr->ifr_flags = ifp->if_flags;
2288 		if (ifq_is_oactive(&ifp->if_snd))
2289 			ifr->ifr_flags |= IFF_OACTIVE;
2290 		break;
2291 
2292 	case SIOCGIFXFLAGS:
2293 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2294 		break;
2295 
2296 	case SIOCGIFMETRIC:
2297 		ifr->ifr_metric = ifp->if_metric;
2298 		break;
2299 
2300 	case SIOCGIFMTU:
2301 		ifr->ifr_mtu = ifp->if_mtu;
2302 		break;
2303 
2304 	case SIOCGIFHARDMTU:
2305 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2306 		break;
2307 
2308 	case SIOCGIFDATA: {
2309 		struct if_data ifdata;
2310 		if_getdata(ifp, &ifdata);
2311 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2312 		break;
2313 	}
2314 
2315 	case SIOCGIFDESCR:
2316 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2317 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2318 		    &bytesdone);
2319 		break;
2320 
2321 	case SIOCGIFRTLABEL:
2322 		if (ifp->if_rtlabelid &&
2323 		    (label = rtlabel_id2name(ifp->if_rtlabelid)) != NULL) {
2324 			strlcpy(ifrtlabelbuf, label, RTLABEL_LEN);
2325 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2326 			    RTLABEL_LEN, &bytesdone);
2327 		} else
2328 			error = ENOENT;
2329 		break;
2330 
2331 	case SIOCGIFPRIORITY:
2332 		ifr->ifr_metric = ifp->if_priority;
2333 		break;
2334 
2335 	case SIOCGIFRDOMAIN:
2336 		ifr->ifr_rdomainid = ifp->if_rdomain;
2337 		break;
2338 
2339 	case SIOCGIFGROUP:
2340 		error = if_getgroup(data, ifp);
2341 		break;
2342 
2343 	case SIOCGIFLLPRIO:
2344 		ifr->ifr_llprio = ifp->if_llprio;
2345 		break;
2346 
2347 	default:
2348 		panic("invalid ioctl %lu", cmd);
2349 	}
2350 
2351 	NET_RUNLOCK_IN_IOCTL();
2352 
2353 	if_put(ifp);
2354 
2355 	return (error);
2356 }
2357 
2358 static int
2359 if_sffpage_check(const caddr_t data)
2360 {
2361 	const struct if_sffpage *sff = (const struct if_sffpage *)data;
2362 
2363 	switch (sff->sff_addr) {
2364 	case IFSFF_ADDR_EEPROM:
2365 	case IFSFF_ADDR_DDM:
2366 		break;
2367 	default:
2368 		return (EINVAL);
2369 	}
2370 
2371 	return (0);
2372 }
2373 
2374 int
2375 if_txhprio_l2_check(int hdrprio)
2376 {
2377 	switch (hdrprio) {
2378 	case IF_HDRPRIO_PACKET:
2379 		return (0);
2380 	default:
2381 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2382 			return (0);
2383 		break;
2384 	}
2385 
2386 	return (EINVAL);
2387 }
2388 
2389 int
2390 if_txhprio_l3_check(int hdrprio)
2391 {
2392 	switch (hdrprio) {
2393 	case IF_HDRPRIO_PACKET:
2394 	case IF_HDRPRIO_PAYLOAD:
2395 		return (0);
2396 	default:
2397 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2398 			return (0);
2399 		break;
2400 	}
2401 
2402 	return (EINVAL);
2403 }
2404 
2405 int
2406 if_rxhprio_l2_check(int hdrprio)
2407 {
2408 	switch (hdrprio) {
2409 	case IF_HDRPRIO_PACKET:
2410 	case IF_HDRPRIO_OUTER:
2411 		return (0);
2412 	default:
2413 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2414 			return (0);
2415 		break;
2416 	}
2417 
2418 	return (EINVAL);
2419 }
2420 
2421 int
2422 if_rxhprio_l3_check(int hdrprio)
2423 {
2424 	switch (hdrprio) {
2425 	case IF_HDRPRIO_PACKET:
2426 	case IF_HDRPRIO_PAYLOAD:
2427 	case IF_HDRPRIO_OUTER:
2428 		return (0);
2429 	default:
2430 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2431 			return (0);
2432 		break;
2433 	}
2434 
2435 	return (EINVAL);
2436 }
2437 
2438 /*
2439  * Return interface configuration
2440  * of system.  List may be used
2441  * in later ioctl's (above) to get
2442  * other information.
2443  */
2444 int
2445 ifconf(caddr_t data)
2446 {
2447 	struct ifconf *ifc = (struct ifconf *)data;
2448 	struct ifnet *ifp;
2449 	struct ifaddr *ifa;
2450 	struct ifreq ifr, *ifrp;
2451 	int space = ifc->ifc_len, error = 0;
2452 
2453 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2454 	if (space == 0) {
2455 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
2456 			struct sockaddr *sa;
2457 
2458 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2459 				space += sizeof (ifr);
2460 			else
2461 				TAILQ_FOREACH(ifa,
2462 				    &ifp->if_addrlist, ifa_list) {
2463 					sa = ifa->ifa_addr;
2464 					if (sa->sa_len > sizeof(*sa))
2465 						space += sa->sa_len -
2466 						    sizeof(*sa);
2467 					space += sizeof(ifr);
2468 				}
2469 		}
2470 		ifc->ifc_len = space;
2471 		return (0);
2472 	}
2473 
2474 	ifrp = ifc->ifc_req;
2475 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2476 		if (space < sizeof(ifr))
2477 			break;
2478 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2479 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2480 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2481 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2482 			    sizeof(ifr));
2483 			if (error)
2484 				break;
2485 			space -= sizeof (ifr), ifrp++;
2486 		} else
2487 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2488 				struct sockaddr *sa = ifa->ifa_addr;
2489 
2490 				if (space < sizeof(ifr))
2491 					break;
2492 				if (sa->sa_len <= sizeof(*sa)) {
2493 					ifr.ifr_addr = *sa;
2494 					error = copyout((caddr_t)&ifr,
2495 					    (caddr_t)ifrp, sizeof (ifr));
2496 					ifrp++;
2497 				} else {
2498 					space -= sa->sa_len - sizeof(*sa);
2499 					if (space < sizeof (ifr))
2500 						break;
2501 					error = copyout((caddr_t)&ifr,
2502 					    (caddr_t)ifrp,
2503 					    sizeof(ifr.ifr_name));
2504 					if (error == 0)
2505 						error = copyout((caddr_t)sa,
2506 						    (caddr_t)&ifrp->ifr_addr,
2507 						    sa->sa_len);
2508 					ifrp = (struct ifreq *)(sa->sa_len +
2509 					    (caddr_t)&ifrp->ifr_addr);
2510 				}
2511 				if (error)
2512 					break;
2513 				space -= sizeof (ifr);
2514 			}
2515 	}
2516 	ifc->ifc_len -= space;
2517 	return (error);
2518 }
2519 
2520 void
2521 if_counters_alloc(struct ifnet *ifp)
2522 {
2523 	KASSERT(ifp->if_counters == NULL);
2524 
2525 	ifp->if_counters = counters_alloc(ifc_ncounters);
2526 }
2527 
2528 void
2529 if_counters_free(struct ifnet *ifp)
2530 {
2531 	KASSERT(ifp->if_counters != NULL);
2532 
2533 	counters_free(ifp->if_counters, ifc_ncounters);
2534 	ifp->if_counters = NULL;
2535 }
2536 
2537 void
2538 if_getdata(struct ifnet *ifp, struct if_data *data)
2539 {
2540 	unsigned int i;
2541 
2542 	*data = ifp->if_data;
2543 
2544 	if (ifp->if_counters != NULL) {
2545 		uint64_t counters[ifc_ncounters];
2546 
2547 		counters_read(ifp->if_counters, counters, nitems(counters));
2548 
2549 		data->ifi_ipackets += counters[ifc_ipackets];
2550 		data->ifi_ierrors += counters[ifc_ierrors];
2551 		data->ifi_opackets += counters[ifc_opackets];
2552 		data->ifi_oerrors += counters[ifc_oerrors];
2553 		data->ifi_collisions += counters[ifc_collisions];
2554 		data->ifi_ibytes += counters[ifc_ibytes];
2555 		data->ifi_obytes += counters[ifc_obytes];
2556 		data->ifi_imcasts += counters[ifc_imcasts];
2557 		data->ifi_omcasts += counters[ifc_omcasts];
2558 		data->ifi_iqdrops += counters[ifc_iqdrops];
2559 		data->ifi_oqdrops += counters[ifc_oqdrops];
2560 		data->ifi_noproto += counters[ifc_noproto];
2561 	}
2562 
2563 	for (i = 0; i < ifp->if_nifqs; i++) {
2564 		struct ifqueue *ifq = ifp->if_ifqs[i];
2565 
2566 		ifq_add_data(ifq, data);
2567 	}
2568 
2569 	for (i = 0; i < ifp->if_niqs; i++) {
2570 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2571 
2572 		ifiq_add_data(ifiq, data);
2573 	}
2574 }
2575 
2576 /*
2577  * Dummy functions replaced in ifnet during detach (if protocols decide to
2578  * fiddle with the if during detach.
2579  */
2580 void
2581 if_detached_qstart(struct ifqueue *ifq)
2582 {
2583 	ifq_purge(ifq);
2584 }
2585 
2586 int
2587 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2588 {
2589 	return ENODEV;
2590 }
2591 
2592 /*
2593  * Create interface group without members
2594  */
2595 struct ifg_group *
2596 if_creategroup(const char *groupname)
2597 {
2598 	struct ifg_group	*ifg;
2599 
2600 	if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
2601 		return (NULL);
2602 
2603 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2604 	ifg->ifg_refcnt = 1;
2605 	ifg->ifg_carp_demoted = 0;
2606 	TAILQ_INIT(&ifg->ifg_members);
2607 #if NPF > 0
2608 	pfi_attach_ifgroup(ifg);
2609 #endif
2610 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2611 
2612 	return (ifg);
2613 }
2614 
2615 /*
2616  * Add a group to an interface
2617  */
2618 int
2619 if_addgroup(struct ifnet *ifp, const char *groupname)
2620 {
2621 	struct ifg_list		*ifgl;
2622 	struct ifg_group	*ifg = NULL;
2623 	struct ifg_member	*ifgm;
2624 
2625 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
2626 	    groupname[strlen(groupname) - 1] <= '9')
2627 		return (EINVAL);
2628 
2629 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2630 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2631 			return (EEXIST);
2632 
2633 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
2634 		return (ENOMEM);
2635 
2636 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
2637 		free(ifgl, M_TEMP, sizeof(*ifgl));
2638 		return (ENOMEM);
2639 	}
2640 
2641 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2642 		if (!strcmp(ifg->ifg_group, groupname))
2643 			break;
2644 
2645 	if (ifg == NULL) {
2646 		ifg = if_creategroup(groupname);
2647 		if (ifg == NULL) {
2648 			free(ifgl, M_TEMP, sizeof(*ifgl));
2649 			free(ifgm, M_TEMP, sizeof(*ifgm));
2650 			return (ENOMEM);
2651 		}
2652 	} else
2653 		ifg->ifg_refcnt++;
2654 	KASSERT(ifg->ifg_refcnt != 0);
2655 
2656 	ifgl->ifgl_group = ifg;
2657 	ifgm->ifgm_ifp = ifp;
2658 
2659 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2660 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2661 
2662 #if NPF > 0
2663 	pfi_group_addmember(groupname, ifp);
2664 #endif
2665 
2666 	return (0);
2667 }
2668 
2669 /*
2670  * Remove a group from an interface
2671  */
2672 int
2673 if_delgroup(struct ifnet *ifp, const char *groupname)
2674 {
2675 	struct ifg_list		*ifgl;
2676 	struct ifg_member	*ifgm;
2677 
2678 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2679 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2680 			break;
2681 	if (ifgl == NULL)
2682 		return (ENOENT);
2683 
2684 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2685 
2686 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2687 		if (ifgm->ifgm_ifp == ifp)
2688 			break;
2689 
2690 	if (ifgm != NULL) {
2691 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2692 		free(ifgm, M_TEMP, sizeof(*ifgm));
2693 	}
2694 
2695 #if NPF > 0
2696 	pfi_group_change(groupname);
2697 #endif
2698 
2699 	KASSERT(ifgl->ifgl_group->ifg_refcnt != 0);
2700 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2701 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2702 #if NPF > 0
2703 		pfi_detach_ifgroup(ifgl->ifgl_group);
2704 #endif
2705 		free(ifgl->ifgl_group, M_TEMP, sizeof(*ifgl->ifgl_group));
2706 	}
2707 
2708 	free(ifgl, M_TEMP, sizeof(*ifgl));
2709 
2710 	return (0);
2711 }
2712 
2713 /*
2714  * Stores all groups from an interface in memory pointed
2715  * to by data
2716  */
2717 int
2718 if_getgroup(caddr_t data, struct ifnet *ifp)
2719 {
2720 	int			 len, error;
2721 	struct ifg_list		*ifgl;
2722 	struct ifg_req		 ifgrq, *ifgp;
2723 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2724 
2725 	if (ifgr->ifgr_len == 0) {
2726 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2727 			ifgr->ifgr_len += sizeof(struct ifg_req);
2728 		return (0);
2729 	}
2730 
2731 	len = ifgr->ifgr_len;
2732 	ifgp = ifgr->ifgr_groups;
2733 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2734 		if (len < sizeof(ifgrq))
2735 			return (EINVAL);
2736 		bzero(&ifgrq, sizeof ifgrq);
2737 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2738 		    sizeof(ifgrq.ifgrq_group));
2739 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2740 		    sizeof(struct ifg_req))))
2741 			return (error);
2742 		len -= sizeof(ifgrq);
2743 		ifgp++;
2744 	}
2745 
2746 	return (0);
2747 }
2748 
2749 /*
2750  * Stores all members of a group in memory pointed to by data
2751  */
2752 int
2753 if_getgroupmembers(caddr_t data)
2754 {
2755 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2756 	struct ifg_group	*ifg;
2757 	struct ifg_member	*ifgm;
2758 	struct ifg_req		 ifgrq, *ifgp;
2759 	int			 len, error;
2760 
2761 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2762 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2763 			break;
2764 	if (ifg == NULL)
2765 		return (ENOENT);
2766 
2767 	if (ifgr->ifgr_len == 0) {
2768 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2769 			ifgr->ifgr_len += sizeof(ifgrq);
2770 		return (0);
2771 	}
2772 
2773 	len = ifgr->ifgr_len;
2774 	ifgp = ifgr->ifgr_groups;
2775 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
2776 		if (len < sizeof(ifgrq))
2777 			return (EINVAL);
2778 		bzero(&ifgrq, sizeof ifgrq);
2779 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
2780 		    sizeof(ifgrq.ifgrq_member));
2781 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2782 		    sizeof(struct ifg_req))))
2783 			return (error);
2784 		len -= sizeof(ifgrq);
2785 		ifgp++;
2786 	}
2787 
2788 	return (0);
2789 }
2790 
2791 int
2792 if_getgroupattribs(caddr_t data)
2793 {
2794 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2795 	struct ifg_group	*ifg;
2796 
2797 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2798 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2799 			break;
2800 	if (ifg == NULL)
2801 		return (ENOENT);
2802 
2803 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
2804 
2805 	return (0);
2806 }
2807 
2808 int
2809 if_setgroupattribs(caddr_t data)
2810 {
2811 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2812 	struct ifg_group	*ifg;
2813 	struct ifg_member	*ifgm;
2814 	int			 demote;
2815 
2816 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2817 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2818 			break;
2819 	if (ifg == NULL)
2820 		return (ENOENT);
2821 
2822 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
2823 	if (demote + ifg->ifg_carp_demoted > 0xff ||
2824 	    demote + ifg->ifg_carp_demoted < 0)
2825 		return (EINVAL);
2826 
2827 	ifg->ifg_carp_demoted += demote;
2828 
2829 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2830 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
2831 
2832 	return (0);
2833 }
2834 
2835 /*
2836  * Stores all groups in memory pointed to by data
2837  */
2838 int
2839 if_getgrouplist(caddr_t data)
2840 {
2841 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2842 	struct ifg_group	*ifg;
2843 	struct ifg_req		 ifgrq, *ifgp;
2844 	int			 len, error;
2845 
2846 	if (ifgr->ifgr_len == 0) {
2847 		TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2848 			ifgr->ifgr_len += sizeof(ifgrq);
2849 		return (0);
2850 	}
2851 
2852 	len = ifgr->ifgr_len;
2853 	ifgp = ifgr->ifgr_groups;
2854 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
2855 		if (len < sizeof(ifgrq))
2856 			return (EINVAL);
2857 		bzero(&ifgrq, sizeof ifgrq);
2858 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
2859 		    sizeof(ifgrq.ifgrq_group));
2860 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2861 		    sizeof(struct ifg_req))))
2862 			return (error);
2863 		len -= sizeof(ifgrq);
2864 		ifgp++;
2865 	}
2866 
2867 	return (0);
2868 }
2869 
2870 void
2871 if_group_routechange(struct sockaddr *dst, struct sockaddr *mask)
2872 {
2873 	switch (dst->sa_family) {
2874 	case AF_INET:
2875 		if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
2876 		    mask && (mask->sa_len == 0 ||
2877 		    satosin(mask)->sin_addr.s_addr == INADDR_ANY))
2878 			if_group_egress_build();
2879 		break;
2880 #ifdef INET6
2881 	case AF_INET6:
2882 		if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
2883 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
2884 		    IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
2885 		    &in6addr_any)))
2886 			if_group_egress_build();
2887 		break;
2888 #endif
2889 	}
2890 }
2891 
2892 int
2893 if_group_egress_build(void)
2894 {
2895 	struct ifnet		*ifp;
2896 	struct ifg_group	*ifg;
2897 	struct ifg_member	*ifgm, *next;
2898 	struct sockaddr_in	 sa_in;
2899 #ifdef INET6
2900 	struct sockaddr_in6	 sa_in6;
2901 #endif
2902 	struct rtentry		*rt;
2903 
2904 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2905 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
2906 			break;
2907 
2908 	if (ifg != NULL)
2909 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
2910 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
2911 
2912 	bzero(&sa_in, sizeof(sa_in));
2913 	sa_in.sin_len = sizeof(sa_in);
2914 	sa_in.sin_family = AF_INET;
2915 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
2916 	while (rt != NULL) {
2917 		ifp = if_get(rt->rt_ifidx);
2918 		if (ifp != NULL) {
2919 			if_addgroup(ifp, IFG_EGRESS);
2920 			if_put(ifp);
2921 		}
2922 		rt = rtable_iterate(rt);
2923 	}
2924 
2925 #ifdef INET6
2926 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
2927 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
2928 	    RTP_ANY);
2929 	while (rt != NULL) {
2930 		ifp = if_get(rt->rt_ifidx);
2931 		if (ifp != NULL) {
2932 			if_addgroup(ifp, IFG_EGRESS);
2933 			if_put(ifp);
2934 		}
2935 		rt = rtable_iterate(rt);
2936 	}
2937 #endif /* INET6 */
2938 
2939 	return (0);
2940 }
2941 
2942 /*
2943  * Set/clear promiscuous mode on interface ifp based on the truth value
2944  * of pswitch.  The calls are reference counted so that only the first
2945  * "on" request actually has an effect, as does the final "off" request.
2946  * Results are undefined if the "off" and "on" requests are not matched.
2947  */
2948 int
2949 ifpromisc(struct ifnet *ifp, int pswitch)
2950 {
2951 	struct ifreq ifr;
2952 	unsigned short oif_flags;
2953 	int oif_pcount, error;
2954 
2955 	NET_ASSERT_LOCKED(); /* modifying if_flags and if_pcount */
2956 
2957 	oif_flags = ifp->if_flags;
2958 	oif_pcount = ifp->if_pcount;
2959 	if (pswitch) {
2960 		if (ifp->if_pcount++ != 0)
2961 			return (0);
2962 		ifp->if_flags |= IFF_PROMISC;
2963 	} else {
2964 		if (--ifp->if_pcount > 0)
2965 			return (0);
2966 		ifp->if_flags &= ~IFF_PROMISC;
2967 	}
2968 
2969 	if ((ifp->if_flags & IFF_UP) == 0)
2970 		return (0);
2971 
2972 	memset(&ifr, 0, sizeof(ifr));
2973 	ifr.ifr_flags = ifp->if_flags;
2974 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
2975 	if (error) {
2976 		ifp->if_flags = oif_flags;
2977 		ifp->if_pcount = oif_pcount;
2978 	}
2979 
2980 	return (error);
2981 }
2982 
2983 void
2984 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
2985 {
2986 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
2987 }
2988 
2989 void
2990 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
2991 {
2992 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
2993 }
2994 
2995 void
2996 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
2997 {
2998 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
2999 		panic("ifa_update_broadaddr does not support dynamic length");
3000 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
3001 }
3002 
3003 #ifdef DDB
3004 /* debug function, can be called from ddb> */
3005 void
3006 ifa_print_all(void)
3007 {
3008 	struct ifnet *ifp;
3009 	struct ifaddr *ifa;
3010 
3011 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
3012 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
3013 			char addr[INET6_ADDRSTRLEN];
3014 
3015 			switch (ifa->ifa_addr->sa_family) {
3016 			case AF_INET:
3017 				printf("%s", inet_ntop(AF_INET,
3018 				    &satosin(ifa->ifa_addr)->sin_addr,
3019 				    addr, sizeof(addr)));
3020 				break;
3021 #ifdef INET6
3022 			case AF_INET6:
3023 				printf("%s", inet_ntop(AF_INET6,
3024 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
3025 				    addr, sizeof(addr)));
3026 				break;
3027 #endif
3028 			}
3029 			printf(" on %s\n", ifp->if_xname);
3030 		}
3031 	}
3032 }
3033 #endif /* DDB */
3034 
3035 void
3036 ifnewlladdr(struct ifnet *ifp)
3037 {
3038 #ifdef INET6
3039 	struct ifaddr *ifa;
3040 #endif
3041 	struct ifreq ifrq;
3042 	short up;
3043 	int s;
3044 
3045 	s = splnet();
3046 	up = ifp->if_flags & IFF_UP;
3047 
3048 	if (up) {
3049 		/* go down for a moment... */
3050 		ifp->if_flags &= ~IFF_UP;
3051 		ifrq.ifr_flags = ifp->if_flags;
3052 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3053 	}
3054 
3055 	ifp->if_flags |= IFF_UP;
3056 	ifrq.ifr_flags = ifp->if_flags;
3057 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3058 
3059 #ifdef INET6
3060 	/*
3061 	 * Update the link-local address.  Don't do it if we're
3062 	 * a router to avoid confusing hosts on the network.
3063 	 */
3064 	if (!ip6_forwarding) {
3065 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
3066 		if (ifa) {
3067 			in6_purgeaddr(ifa);
3068 			if_hooks_run(&ifp->if_addrhooks);
3069 			in6_ifattach(ifp);
3070 		}
3071 	}
3072 #endif
3073 	if (!up) {
3074 		/* go back down */
3075 		ifp->if_flags &= ~IFF_UP;
3076 		ifrq.ifr_flags = ifp->if_flags;
3077 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3078 	}
3079 	splx(s);
3080 }
3081 
3082 void
3083 if_addrhook_add(struct ifnet *ifp, struct task *t)
3084 {
3085 	mtx_enter(&if_hooks_mtx);
3086 	TAILQ_INSERT_TAIL(&ifp->if_addrhooks, t, t_entry);
3087 	mtx_leave(&if_hooks_mtx);
3088 }
3089 
3090 void
3091 if_addrhook_del(struct ifnet *ifp, struct task *t)
3092 {
3093 	mtx_enter(&if_hooks_mtx);
3094 	TAILQ_REMOVE(&ifp->if_addrhooks, t, t_entry);
3095 	mtx_leave(&if_hooks_mtx);
3096 }
3097 
3098 void
3099 if_addrhooks_run(struct ifnet *ifp)
3100 {
3101 	if_hooks_run(&ifp->if_addrhooks);
3102 }
3103 
3104 void
3105 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
3106 {
3107 	extern int ticks;
3108 
3109 	memset(rxr, 0, sizeof(*rxr));
3110 
3111 	rxr->rxr_adjusted = ticks;
3112 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
3113 	rxr->rxr_hwm = hwm;
3114 }
3115 
3116 static inline void
3117 if_rxr_adjust_cwm(struct if_rxring *rxr)
3118 {
3119 	extern int ticks;
3120 
3121 	if (rxr->rxr_alive >= rxr->rxr_lwm)
3122 		return;
3123 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
3124 		rxr->rxr_cwm++;
3125 
3126 	rxr->rxr_adjusted = ticks;
3127 }
3128 
3129 void
3130 if_rxr_livelocked(struct if_rxring *rxr)
3131 {
3132 	extern int ticks;
3133 
3134 	if (ticks - rxr->rxr_adjusted >= 1) {
3135 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3136 			rxr->rxr_cwm--;
3137 
3138 		rxr->rxr_adjusted = ticks;
3139 	}
3140 }
3141 
3142 u_int
3143 if_rxr_get(struct if_rxring *rxr, u_int max)
3144 {
3145 	extern int ticks;
3146 	u_int diff;
3147 
3148 	if (ticks - rxr->rxr_adjusted >= 1) {
3149 		/* we're free to try for an adjustment */
3150 		if_rxr_adjust_cwm(rxr);
3151 	}
3152 
3153 	if (rxr->rxr_alive >= rxr->rxr_cwm)
3154 		return (0);
3155 
3156 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
3157 	rxr->rxr_alive += diff;
3158 
3159 	return (diff);
3160 }
3161 
3162 int
3163 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
3164 {
3165 	struct if_rxrinfo kifri;
3166 	int error;
3167 	u_int n;
3168 
3169 	error = copyin(uifri, &kifri, sizeof(kifri));
3170 	if (error)
3171 		return (error);
3172 
3173 	n = min(t, kifri.ifri_total);
3174 	kifri.ifri_total = t;
3175 
3176 	if (n > 0) {
3177 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
3178 		if (error)
3179 			return (error);
3180 	}
3181 
3182 	return (copyout(&kifri, uifri, sizeof(kifri)));
3183 }
3184 
3185 int
3186 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
3187     struct if_rxring *rxr)
3188 {
3189 	struct if_rxring_info ifr;
3190 
3191 	memset(&ifr, 0, sizeof(ifr));
3192 
3193 	if (name != NULL)
3194 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3195 
3196 	ifr.ifr_size = size;
3197 	ifr.ifr_info = *rxr;
3198 
3199 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
3200 }
3201 
3202 /*
3203  * Network stack input queues.
3204  */
3205 
3206 void
3207 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3208 {
3209 	mq_init(&niq->ni_q, maxlen, IPL_NET);
3210 	niq->ni_isr = isr;
3211 }
3212 
3213 int
3214 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3215 {
3216 	int rv;
3217 
3218 	rv = mq_enqueue(&niq->ni_q, m);
3219 	if (rv == 0)
3220 		schednetisr(niq->ni_isr);
3221 	else
3222 		if_congestion();
3223 
3224 	return (rv);
3225 }
3226 
3227 int
3228 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3229 {
3230 	int rv;
3231 
3232 	rv = mq_enlist(&niq->ni_q, ml);
3233 	if (rv == 0)
3234 		schednetisr(niq->ni_isr);
3235 	else
3236 		if_congestion();
3237 
3238 	return (rv);
3239 }
3240 
3241 __dead void
3242 unhandled_af(int af)
3243 {
3244 	panic("unhandled af %d", af);
3245 }
3246 
3247 /*
3248  * XXXSMP This tunable is here to work around the fact that IPsec
3249  * globals aren't ready to be accessed by multiple threads in
3250  * parallel.
3251  */
3252 int		 nettaskqs = NET_TASKQ;
3253 
3254 struct taskq *
3255 net_tq(unsigned int ifindex)
3256 {
3257 	struct taskq *t = NULL;
3258 
3259 	t = nettqmp[ifindex % nettaskqs];
3260 
3261 	return (t);
3262 }
3263