xref: /openbsd-src/sys/net/if.c (revision 4e1ee0786f11cc571bd0be17d38e46f635c719fc)
1 /*	$OpenBSD: if.c,v 1.643 2021/07/20 16:32:28 bluhm Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "pfsync.h"
70 #include "ppp.h"
71 #include "switch.h"
72 #include "if_wg.h"
73 
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/mbuf.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/timeout.h>
80 #include <sys/protosw.h>
81 #include <sys/kernel.h>
82 #include <sys/ioctl.h>
83 #include <sys/domain.h>
84 #include <sys/task.h>
85 #include <sys/atomic.h>
86 #include <sys/percpu.h>
87 #include <sys/proc.h>
88 #include <sys/stdint.h>	/* uintptr_t */
89 #include <sys/rwlock.h>
90 
91 #include <net/if.h>
92 #include <net/if_dl.h>
93 #include <net/if_types.h>
94 #include <net/route.h>
95 #include <net/netisr.h>
96 
97 #include <netinet/in.h>
98 #include <netinet/if_ether.h>
99 #include <netinet/igmp.h>
100 #ifdef MROUTING
101 #include <netinet/ip_mroute.h>
102 #endif
103 
104 #ifdef INET6
105 #include <netinet6/in6_var.h>
106 #include <netinet6/in6_ifattach.h>
107 #include <netinet6/nd6.h>
108 #include <netinet/ip6.h>
109 #include <netinet6/ip6_var.h>
110 #endif
111 
112 #ifdef MPLS
113 #include <netmpls/mpls.h>
114 #endif
115 
116 #if NBPFILTER > 0
117 #include <net/bpf.h>
118 #endif
119 
120 #if NBRIDGE > 0
121 #include <net/if_bridge.h>
122 #endif
123 
124 #if NCARP > 0
125 #include <netinet/ip_carp.h>
126 #endif
127 
128 #if NPF > 0
129 #include <net/pfvar.h>
130 #endif
131 
132 #include <sys/device.h>
133 
134 void	if_attachsetup(struct ifnet *);
135 void	if_attachdomain(struct ifnet *);
136 void	if_attach_common(struct ifnet *);
137 void	if_remove(struct ifnet *);
138 int	if_createrdomain(int, struct ifnet *);
139 int	if_setrdomain(struct ifnet *, int);
140 void	if_slowtimo(void *);
141 
142 void	if_detached_qstart(struct ifqueue *);
143 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
144 
145 int	ifioctl_get(u_long, caddr_t);
146 int	ifconf(caddr_t);
147 static int
148 	if_sffpage_check(const caddr_t);
149 
150 int	if_getgroup(caddr_t, struct ifnet *);
151 int	if_getgroupmembers(caddr_t);
152 int	if_getgroupattribs(caddr_t);
153 int	if_setgroupattribs(caddr_t);
154 int	if_getgrouplist(caddr_t);
155 
156 void	if_linkstate(struct ifnet *);
157 void	if_linkstate_task(void *);
158 
159 int	if_clone_list(struct if_clonereq *);
160 struct if_clone	*if_clone_lookup(const char *, int *);
161 
162 int	if_group_egress_build(void);
163 
164 void	if_watchdog_task(void *);
165 
166 void	if_netisr(void *);
167 
168 #ifdef DDB
169 void	ifa_print_all(void);
170 #endif
171 
172 void	if_qstart_compat(struct ifqueue *);
173 
174 /*
175  * interface index map
176  *
177  * the kernel maintains a mapping of interface indexes to struct ifnet
178  * pointers.
179  *
180  * the map is an array of struct ifnet pointers prefixed by an if_map
181  * structure. the if_map structure stores the length of its array.
182  *
183  * as interfaces are attached to the system, the map is grown on demand
184  * up to USHRT_MAX entries.
185  *
186  * interface index 0 is reserved and represents no interface. this
187  * supports the use of the interface index as the scope for IPv6 link
188  * local addresses, where scope 0 means no scope has been specified.
189  * it also supports the use of interface index as the unique identifier
190  * for network interfaces in SNMP applications as per RFC2863. therefore
191  * if_get(0) returns NULL.
192  */
193 
194 void if_ifp_dtor(void *, void *);
195 void if_map_dtor(void *, void *);
196 struct ifnet *if_ref(struct ifnet *);
197 
198 /*
199  * struct if_map
200  *
201  * bounded array of ifnet srp pointers used to fetch references of live
202  * interfaces with if_get().
203  */
204 
205 struct if_map {
206 	unsigned long		 limit;
207 	/* followed by limit ifnet srp pointers */
208 };
209 
210 /*
211  * struct if_idxmap
212  *
213  * infrastructure to manage updates and accesses to the current if_map.
214  */
215 
216 struct if_idxmap {
217 	unsigned int		 serial;
218 	unsigned int		 count;
219 	struct srp		 map;
220 };
221 
222 void	if_idxmap_init(unsigned int);
223 void	if_idxmap_insert(struct ifnet *);
224 void	if_idxmap_remove(struct ifnet *);
225 
226 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
227 
228 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
229 int if_cloners_count;
230 
231 struct rwlock if_cloners_lock = RWLOCK_INITIALIZER("clonelk");
232 
233 /* hooks should only be added, deleted, and run from a process context */
234 struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE);
235 void	if_hooks_run(struct task_list *);
236 
237 int	ifq_congestion;
238 
239 int		 netisr;
240 
241 #define	NET_TASKQ	1
242 struct taskq	*nettqmp[NET_TASKQ];
243 
244 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
245 
246 /*
247  * Serialize socket operations to ensure no new sleeping points
248  * are introduced in IP output paths.
249  */
250 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
251 
252 /*
253  * Network interface utility routines.
254  */
255 void
256 ifinit(void)
257 {
258 	unsigned int	i;
259 
260 	/*
261 	 * most machines boot with 4 or 5 interfaces, so size the initial map
262 	 * to accomodate this
263 	 */
264 	if_idxmap_init(8);
265 
266 	for (i = 0; i < NET_TASKQ; i++) {
267 		nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
268 		if (nettqmp[i] == NULL)
269 			panic("unable to create network taskq %d", i);
270 	}
271 }
272 
273 static struct if_idxmap if_idxmap = {
274 	0,
275 	0,
276 	SRP_INITIALIZER()
277 };
278 
279 struct srp_gc if_ifp_gc = SRP_GC_INITIALIZER(if_ifp_dtor, NULL);
280 struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
281 
282 struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
283 
284 void
285 if_idxmap_init(unsigned int limit)
286 {
287 	struct if_map *if_map;
288 	struct srp *map;
289 	unsigned int i;
290 
291 	if_idxmap.serial = 1; /* skip ifidx 0 so it can return NULL */
292 
293 	if_map = malloc(sizeof(*if_map) + limit * sizeof(*map),
294 	    M_IFADDR, M_WAITOK);
295 
296 	if_map->limit = limit;
297 	map = (struct srp *)(if_map + 1);
298 	for (i = 0; i < limit; i++)
299 		srp_init(&map[i]);
300 
301 	/* this is called early so there's nothing to race with */
302 	srp_update_locked(&if_map_gc, &if_idxmap.map, if_map);
303 }
304 
305 void
306 if_idxmap_insert(struct ifnet *ifp)
307 {
308 	struct if_map *if_map;
309 	struct srp *map;
310 	unsigned int index, i;
311 
312 	refcnt_init(&ifp->if_refcnt);
313 
314 	/* the kernel lock guarantees serialised modifications to if_idxmap */
315 	KERNEL_ASSERT_LOCKED();
316 
317 	if (++if_idxmap.count > USHRT_MAX)
318 		panic("too many interfaces");
319 
320 	if_map = srp_get_locked(&if_idxmap.map);
321 	map = (struct srp *)(if_map + 1);
322 
323 	index = if_idxmap.serial++ & USHRT_MAX;
324 
325 	if (index >= if_map->limit) {
326 		struct if_map *nif_map;
327 		struct srp *nmap;
328 		unsigned int nlimit;
329 		struct ifnet *nifp;
330 
331 		nlimit = if_map->limit * 2;
332 		nif_map = malloc(sizeof(*nif_map) + nlimit * sizeof(*nmap),
333 		    M_IFADDR, M_WAITOK);
334 		nmap = (struct srp *)(nif_map + 1);
335 
336 		nif_map->limit = nlimit;
337 		for (i = 0; i < if_map->limit; i++) {
338 			srp_init(&nmap[i]);
339 			nifp = srp_get_locked(&map[i]);
340 			if (nifp != NULL) {
341 				srp_update_locked(&if_ifp_gc, &nmap[i],
342 				    if_ref(nifp));
343 			}
344 		}
345 
346 		while (i < nlimit) {
347 			srp_init(&nmap[i]);
348 			i++;
349 		}
350 
351 		srp_update_locked(&if_map_gc, &if_idxmap.map, nif_map);
352 		if_map = nif_map;
353 		map = nmap;
354 	}
355 
356 	/* pick the next free index */
357 	for (i = 0; i < USHRT_MAX; i++) {
358 		if (index != 0 && srp_get_locked(&map[index]) == NULL)
359 			break;
360 
361 		index = if_idxmap.serial++ & USHRT_MAX;
362 	}
363 
364 	/* commit */
365 	ifp->if_index = index;
366 	srp_update_locked(&if_ifp_gc, &map[index], if_ref(ifp));
367 }
368 
369 void
370 if_idxmap_remove(struct ifnet *ifp)
371 {
372 	struct if_map *if_map;
373 	struct srp *map;
374 	unsigned int index;
375 
376 	index = ifp->if_index;
377 
378 	/* the kernel lock guarantees serialised modifications to if_idxmap */
379 	KERNEL_ASSERT_LOCKED();
380 
381 	if_map = srp_get_locked(&if_idxmap.map);
382 	KASSERT(index < if_map->limit);
383 
384 	map = (struct srp *)(if_map + 1);
385 	KASSERT(ifp == (struct ifnet *)srp_get_locked(&map[index]));
386 
387 	srp_update_locked(&if_ifp_gc, &map[index], NULL);
388 	if_idxmap.count--;
389 	/* end of if_idxmap modifications */
390 }
391 
392 void
393 if_ifp_dtor(void *null, void *ifp)
394 {
395 	if_put(ifp);
396 }
397 
398 void
399 if_map_dtor(void *null, void *m)
400 {
401 	struct if_map *if_map = m;
402 	struct srp *map = (struct srp *)(if_map + 1);
403 	unsigned int i;
404 
405 	/*
406 	 * dont need to serialize the use of update_locked since this is
407 	 * the last reference to this map. there's nothing to race against.
408 	 */
409 	for (i = 0; i < if_map->limit; i++)
410 		srp_update_locked(&if_ifp_gc, &map[i], NULL);
411 
412 	free(if_map, M_IFADDR, sizeof(*if_map) + if_map->limit * sizeof(*map));
413 }
414 
415 /*
416  * Attach an interface to the
417  * list of "active" interfaces.
418  */
419 void
420 if_attachsetup(struct ifnet *ifp)
421 {
422 	unsigned long ifidx;
423 
424 	NET_ASSERT_LOCKED();
425 
426 	if_addgroup(ifp, IFG_ALL);
427 
428 	if_attachdomain(ifp);
429 #if NPF > 0
430 	pfi_attach_ifnet(ifp);
431 #endif
432 
433 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
434 	if_slowtimo(ifp);
435 
436 	if_idxmap_insert(ifp);
437 	KASSERT(if_get(0) == NULL);
438 
439 	ifidx = ifp->if_index;
440 
441 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
442 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
443 
444 	/* Announce the interface. */
445 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
446 }
447 
448 /*
449  * Allocate the link level name for the specified interface.  This
450  * is an attachment helper.  It must be called after ifp->if_addrlen
451  * is initialized, which may not be the case when if_attach() is
452  * called.
453  */
454 void
455 if_alloc_sadl(struct ifnet *ifp)
456 {
457 	unsigned int socksize;
458 	int namelen, masklen;
459 	struct sockaddr_dl *sdl;
460 
461 	/*
462 	 * If the interface already has a link name, release it
463 	 * now.  This is useful for interfaces that can change
464 	 * link types, and thus switch link names often.
465 	 */
466 	if_free_sadl(ifp);
467 
468 	namelen = strlen(ifp->if_xname);
469 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
470 	socksize = masklen + ifp->if_addrlen;
471 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
472 	if (socksize < sizeof(*sdl))
473 		socksize = sizeof(*sdl);
474 	socksize = ROUNDUP(socksize);
475 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
476 	sdl->sdl_len = socksize;
477 	sdl->sdl_family = AF_LINK;
478 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
479 	sdl->sdl_nlen = namelen;
480 	sdl->sdl_alen = ifp->if_addrlen;
481 	sdl->sdl_index = ifp->if_index;
482 	sdl->sdl_type = ifp->if_type;
483 	ifp->if_sadl = sdl;
484 }
485 
486 /*
487  * Free the link level name for the specified interface.  This is
488  * a detach helper.  This is called from if_detach() or from
489  * link layer type specific detach functions.
490  */
491 void
492 if_free_sadl(struct ifnet *ifp)
493 {
494 	if (ifp->if_sadl == NULL)
495 		return;
496 
497 	free(ifp->if_sadl, M_IFADDR, ifp->if_sadl->sdl_len);
498 	ifp->if_sadl = NULL;
499 }
500 
501 void
502 if_attachdomain(struct ifnet *ifp)
503 {
504 	const struct domain *dp;
505 	int i, s;
506 
507 	s = splnet();
508 
509 	/* address family dependent data region */
510 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
511 	for (i = 0; (dp = domains[i]) != NULL; i++) {
512 		if (dp->dom_ifattach)
513 			ifp->if_afdata[dp->dom_family] =
514 			    (*dp->dom_ifattach)(ifp);
515 	}
516 
517 	splx(s);
518 }
519 
520 void
521 if_attachhead(struct ifnet *ifp)
522 {
523 	if_attach_common(ifp);
524 	NET_LOCK();
525 	TAILQ_INSERT_HEAD(&ifnet, ifp, if_list);
526 	if_attachsetup(ifp);
527 	NET_UNLOCK();
528 }
529 
530 void
531 if_attach(struct ifnet *ifp)
532 {
533 	if_attach_common(ifp);
534 	NET_LOCK();
535 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
536 	if_attachsetup(ifp);
537 	NET_UNLOCK();
538 }
539 
540 void
541 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
542 {
543 	struct ifqueue **map;
544 	struct ifqueue *ifq;
545 	int i;
546 
547 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
548 	KASSERT(nqs != 0);
549 
550 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
551 
552 	ifp->if_snd.ifq_softc = NULL;
553 	map[0] = &ifp->if_snd;
554 
555 	for (i = 1; i < nqs; i++) {
556 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
557 		ifq_set_maxlen(ifq, ifp->if_snd.ifq_maxlen);
558 		ifq_init(ifq, ifp, i);
559 		map[i] = ifq;
560 	}
561 
562 	ifp->if_ifqs = map;
563 	ifp->if_nifqs = nqs;
564 }
565 
566 void
567 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
568 {
569 	struct ifiqueue **map;
570 	struct ifiqueue *ifiq;
571 	unsigned int i;
572 
573 	KASSERT(niqs != 0);
574 
575 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
576 
577 	ifp->if_rcv.ifiq_softc = NULL;
578 	map[0] = &ifp->if_rcv;
579 
580 	for (i = 1; i < niqs; i++) {
581 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
582 		ifiq_init(ifiq, ifp, i);
583 		map[i] = ifiq;
584 	}
585 
586 	ifp->if_iqs = map;
587 	ifp->if_niqs = niqs;
588 }
589 
590 void
591 if_attach_common(struct ifnet *ifp)
592 {
593 	KASSERT(ifp->if_ioctl != NULL);
594 
595 	TAILQ_INIT(&ifp->if_addrlist);
596 	TAILQ_INIT(&ifp->if_maddrlist);
597 	TAILQ_INIT(&ifp->if_groups);
598 
599 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
600 		KASSERTMSG(ifp->if_qstart == NULL,
601 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
602 		ifp->if_qstart = if_qstart_compat;
603 	} else {
604 		KASSERTMSG(ifp->if_start == NULL,
605 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
606 		KASSERTMSG(ifp->if_qstart != NULL,
607 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
608 	}
609 
610 	ifq_init(&ifp->if_snd, ifp, 0);
611 
612 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
613 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
614 	ifp->if_nifqs = 1;
615 	if (ifp->if_txmit == 0)
616 		ifp->if_txmit = IF_TXMIT_DEFAULT;
617 
618 	ifiq_init(&ifp->if_rcv, ifp, 0);
619 
620 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
621 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
622 	ifp->if_niqs = 1;
623 
624 	TAILQ_INIT(&ifp->if_addrhooks);
625 	TAILQ_INIT(&ifp->if_linkstatehooks);
626 	TAILQ_INIT(&ifp->if_detachhooks);
627 
628 	if (ifp->if_rtrequest == NULL)
629 		ifp->if_rtrequest = if_rtrequest_dummy;
630 	if (ifp->if_enqueue == NULL)
631 		ifp->if_enqueue = if_enqueue_ifq;
632 #if NBPFILTER > 0
633 	if (ifp->if_bpf_mtap == NULL)
634 		ifp->if_bpf_mtap = bpf_mtap_ether;
635 #endif
636 	ifp->if_llprio = IFQ_DEFPRIO;
637 }
638 
639 void
640 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
641 {
642 	/*
643 	 * only switch the ifq_ops on the first ifq on an interface.
644 	 *
645 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
646 	 * works on a single ifq. because the code uses the ifq_ops
647 	 * on the first ifq (if_snd) to select a queue for an mbuf,
648 	 * by switching only the first one we change both the algorithm
649 	 * and force the routing of all new packets to it.
650 	 */
651 	ifq_attach(&ifp->if_snd, newops, args);
652 }
653 
654 void
655 if_start(struct ifnet *ifp)
656 {
657 	KASSERT(ifp->if_qstart == if_qstart_compat);
658 	if_qstart_compat(&ifp->if_snd);
659 }
660 void
661 if_qstart_compat(struct ifqueue *ifq)
662 {
663 	struct ifnet *ifp = ifq->ifq_if;
664 	int s;
665 
666 	/*
667 	 * the stack assumes that an interface can have multiple
668 	 * transmit rings, but a lot of drivers are still written
669 	 * so that interfaces and send rings have a 1:1 mapping.
670 	 * this provides compatibility between the stack and the older
671 	 * drivers by translating from the only queue they have
672 	 * (ifp->if_snd) back to the interface and calling if_start.
673 	 */
674 
675 	KERNEL_LOCK();
676 	s = splnet();
677 	(*ifp->if_start)(ifp);
678 	splx(s);
679 	KERNEL_UNLOCK();
680 }
681 
682 int
683 if_enqueue(struct ifnet *ifp, struct mbuf *m)
684 {
685 	CLR(m->m_pkthdr.csum_flags, M_TIMESTAMP);
686 
687 #if NPF > 0
688 	if (m->m_pkthdr.pf.delay > 0)
689 		return (pf_delay_pkt(m, ifp->if_index));
690 #endif
691 
692 #if NBRIDGE > 0
693 	if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
694 		int error;
695 
696 		error = bridge_enqueue(ifp, m);
697 		return (error);
698 	}
699 #endif
700 
701 #if NPF > 0
702 	pf_pkt_addr_changed(m);
703 #endif	/* NPF > 0 */
704 
705 	return ((*ifp->if_enqueue)(ifp, m));
706 }
707 
708 int
709 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
710 {
711 	struct ifqueue *ifq = &ifp->if_snd;
712 	int error;
713 
714 	if (ifp->if_nifqs > 1) {
715 		unsigned int idx;
716 
717 		/*
718 		 * use the operations on the first ifq to pick which of
719 		 * the array gets this mbuf.
720 		 */
721 
722 		idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
723 		ifq = ifp->if_ifqs[idx];
724 	}
725 
726 	error = ifq_enqueue(ifq, m);
727 	if (error)
728 		return (error);
729 
730 	ifq_start(ifq);
731 
732 	return (0);
733 }
734 
735 void
736 if_input(struct ifnet *ifp, struct mbuf_list *ml)
737 {
738 	ifiq_input(&ifp->if_rcv, ml);
739 }
740 
741 int
742 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
743 {
744 	int keepflags;
745 
746 #if NBPFILTER > 0
747 	/*
748 	 * Only send packets to bpf if they are destinated to local
749 	 * addresses.
750 	 *
751 	 * if_input_local() is also called for SIMPLEX interfaces to
752 	 * duplicate packets for local use.  But don't dup them to bpf.
753 	 */
754 	if (ifp->if_flags & IFF_LOOPBACK) {
755 		caddr_t if_bpf = ifp->if_bpf;
756 
757 		if (if_bpf)
758 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
759 	}
760 #endif
761 	keepflags = m->m_flags & (M_BCAST|M_MCAST);
762 	m_resethdr(m);
763 	m->m_flags |= M_LOOP | keepflags;
764 	m->m_pkthdr.ph_ifidx = ifp->if_index;
765 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
766 
767 	ifp->if_opackets++;
768 	ifp->if_obytes += m->m_pkthdr.len;
769 
770 	ifp->if_ipackets++;
771 	ifp->if_ibytes += m->m_pkthdr.len;
772 
773 	switch (af) {
774 	case AF_INET:
775 		ipv4_input(ifp, m);
776 		break;
777 #ifdef INET6
778 	case AF_INET6:
779 		ipv6_input(ifp, m);
780 		break;
781 #endif /* INET6 */
782 #ifdef MPLS
783 	case AF_MPLS:
784 		mpls_input(ifp, m);
785 		break;
786 #endif /* MPLS */
787 	default:
788 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
789 		m_freem(m);
790 		return (EAFNOSUPPORT);
791 	}
792 
793 	return (0);
794 }
795 
796 int
797 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
798 {
799 	struct ifiqueue *ifiq;
800 	unsigned int flow = 0;
801 
802 	m->m_pkthdr.ph_family = af;
803 	m->m_pkthdr.ph_ifidx = ifp->if_index;
804 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
805 
806 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
807 		flow = m->m_pkthdr.ph_flowid;
808 
809 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
810 
811 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
812 }
813 
814 void
815 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
816 {
817 	struct mbuf *m;
818 
819 	if (ml_empty(ml))
820 		return;
821 
822 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
823 		enqueue_randomness(ml_len(ml) ^ (uintptr_t)MBUF_LIST_FIRST(ml));
824 
825 	/*
826 	 * We grab the NET_LOCK() before processing any packet to
827 	 * ensure there's no contention on the routing table lock.
828 	 *
829 	 * Without it we could race with a userland thread to insert
830 	 * a L2 entry in ip{6,}_output().  Such race would result in
831 	 * one of the threads sleeping *inside* the IP output path.
832 	 *
833 	 * Since we have a NET_LOCK() we also use it to serialize access
834 	 * to PF globals, pipex globals, unicast and multicast addresses
835 	 * lists and the socket layer.
836 	 */
837 
838 	/*
839 	 * XXXSMP IPsec data structures are not ready to be accessed
840 	 * by multiple network threads in parallel.  In this case
841 	 * use an exclusive lock.
842 	 */
843 	NET_LOCK();
844 	while ((m = ml_dequeue(ml)) != NULL)
845 		(*ifp->if_input)(ifp, m);
846 	NET_UNLOCK();
847 }
848 
849 void
850 if_vinput(struct ifnet *ifp, struct mbuf *m)
851 {
852 #if NBPFILTER > 0
853 	caddr_t if_bpf;
854 #endif
855 
856 	m->m_pkthdr.ph_ifidx = ifp->if_index;
857 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
858 
859 	counters_pkt(ifp->if_counters,
860 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
861 
862 #if NPF > 0
863 	pf_pkt_addr_changed(m);
864 #endif
865 
866 #if NBPFILTER > 0
867 	if_bpf = ifp->if_bpf;
868 	if (if_bpf) {
869 		if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) {
870 			m_freem(m);
871 			return;
872 		}
873 	}
874 #endif
875 
876 	if (__predict_true(!ISSET(ifp->if_xflags, IFXF_MONITOR)))
877 		(*ifp->if_input)(ifp, m);
878 }
879 
880 void
881 if_netisr(void *unused)
882 {
883 	int n, t = 0;
884 
885 	NET_LOCK();
886 
887 	while ((n = netisr) != 0) {
888 		/* Like sched_pause() but with a rwlock dance. */
889 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
890 			NET_UNLOCK();
891 			yield();
892 			NET_LOCK();
893 		}
894 
895 		atomic_clearbits_int(&netisr, n);
896 
897 #if NETHER > 0
898 		if (n & (1 << NETISR_ARP)) {
899 			KERNEL_LOCK();
900 			arpintr();
901 			KERNEL_UNLOCK();
902 		}
903 #endif
904 #if NPPP > 0
905 		if (n & (1 << NETISR_PPP)) {
906 			KERNEL_LOCK();
907 			pppintr();
908 			KERNEL_UNLOCK();
909 		}
910 #endif
911 #if NBRIDGE > 0
912 		if (n & (1 << NETISR_BRIDGE))
913 			bridgeintr();
914 #endif
915 #if NSWITCH > 0
916 		if (n & (1 << NETISR_SWITCH)) {
917 			KERNEL_LOCK();
918 			switchintr();
919 			KERNEL_UNLOCK();
920 		}
921 #endif
922 		t |= n;
923 	}
924 
925 #if NPFSYNC > 0
926 	if (t & (1 << NETISR_PFSYNC)) {
927 		KERNEL_LOCK();
928 		pfsyncintr();
929 		KERNEL_UNLOCK();
930 	}
931 #endif
932 
933 	NET_UNLOCK();
934 }
935 
936 void
937 if_hooks_run(struct task_list *hooks)
938 {
939 	struct task *t, *nt;
940 	struct task cursor = { .t_func = NULL };
941 	void (*func)(void *);
942 	void *arg;
943 
944 	mtx_enter(&if_hooks_mtx);
945 	for (t = TAILQ_FIRST(hooks); t != NULL; t = nt) {
946 		if (t->t_func == NULL) { /* skip cursors */
947 			nt = TAILQ_NEXT(t, t_entry);
948 			continue;
949 		}
950 		func = t->t_func;
951 		arg = t->t_arg;
952 
953 		TAILQ_INSERT_AFTER(hooks, t, &cursor, t_entry);
954 		mtx_leave(&if_hooks_mtx);
955 
956 		(*func)(arg);
957 
958 		mtx_enter(&if_hooks_mtx);
959 		nt = TAILQ_NEXT(&cursor, t_entry); /* avoid _Q_INVALIDATE */
960 		TAILQ_REMOVE(hooks, &cursor, t_entry);
961 	}
962 	mtx_leave(&if_hooks_mtx);
963 }
964 
965 void
966 if_remove(struct ifnet *ifp)
967 {
968 	/* Remove the interface from the list of all interfaces. */
969 	NET_LOCK();
970 	TAILQ_REMOVE(&ifnet, ifp, if_list);
971 	NET_UNLOCK();
972 
973 	/* Remove the interface from the interface index map. */
974 	if_idxmap_remove(ifp);
975 
976 	/* Sleep until the last reference is released. */
977 	refcnt_finalize(&ifp->if_refcnt, "ifrm");
978 }
979 
980 void
981 if_deactivate(struct ifnet *ifp)
982 {
983 	/*
984 	 * Call detach hooks from head to tail.  To make sure detach
985 	 * hooks are executed in the reverse order they were added, all
986 	 * the hooks have to be added to the head!
987 	 */
988 
989 	NET_LOCK();
990 	if_hooks_run(&ifp->if_detachhooks);
991 	NET_UNLOCK();
992 }
993 
994 void
995 if_detachhook_add(struct ifnet *ifp, struct task *t)
996 {
997 	mtx_enter(&if_hooks_mtx);
998 	TAILQ_INSERT_HEAD(&ifp->if_detachhooks, t, t_entry);
999 	mtx_leave(&if_hooks_mtx);
1000 }
1001 
1002 void
1003 if_detachhook_del(struct ifnet *ifp, struct task *t)
1004 {
1005 	mtx_enter(&if_hooks_mtx);
1006 	TAILQ_REMOVE(&ifp->if_detachhooks, t, t_entry);
1007 	mtx_leave(&if_hooks_mtx);
1008 }
1009 
1010 /*
1011  * Detach an interface from everything in the kernel.  Also deallocate
1012  * private resources.
1013  */
1014 void
1015 if_detach(struct ifnet *ifp)
1016 {
1017 	struct ifaddr *ifa;
1018 	struct ifg_list *ifg;
1019 	const struct domain *dp;
1020 	int i, s;
1021 
1022 	/* Undo pseudo-driver changes. */
1023 	if_deactivate(ifp);
1024 
1025 	/* Other CPUs must not have a reference before we start destroying. */
1026 	if_remove(ifp);
1027 
1028 	ifq_clr_oactive(&ifp->if_snd);
1029 
1030 #if NBPFILTER > 0
1031 	bpfdetach(ifp);
1032 #endif
1033 
1034 	NET_LOCK();
1035 	s = splnet();
1036 	ifp->if_qstart = if_detached_qstart;
1037 	ifp->if_ioctl = if_detached_ioctl;
1038 	ifp->if_watchdog = NULL;
1039 
1040 	/* Remove the watchdog timeout & task */
1041 	timeout_del(&ifp->if_slowtimo);
1042 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1043 
1044 	/* Remove the link state task */
1045 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1046 
1047 	rti_delete(ifp);
1048 #if NETHER > 0 && defined(NFSCLIENT)
1049 	if (ifp->if_index == revarp_ifidx)
1050 		revarp_ifidx = 0;
1051 #endif
1052 #ifdef MROUTING
1053 	vif_delete(ifp);
1054 #endif
1055 	in_ifdetach(ifp);
1056 #ifdef INET6
1057 	in6_ifdetach(ifp);
1058 #endif
1059 #if NPF > 0
1060 	pfi_detach_ifnet(ifp);
1061 #endif
1062 
1063 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1064 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1065 
1066 	if_free_sadl(ifp);
1067 
1068 	/* We should not have any address left at this point. */
1069 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1070 #ifdef DIAGNOSTIC
1071 		printf("%s: address list non empty\n", ifp->if_xname);
1072 #endif
1073 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1074 			ifa_del(ifp, ifa);
1075 			ifa->ifa_ifp = NULL;
1076 			ifafree(ifa);
1077 		}
1078 	}
1079 
1080 	KASSERT(TAILQ_EMPTY(&ifp->if_addrhooks));
1081 	KASSERT(TAILQ_EMPTY(&ifp->if_linkstatehooks));
1082 	KASSERT(TAILQ_EMPTY(&ifp->if_detachhooks));
1083 
1084 	for (i = 0; (dp = domains[i]) != NULL; i++) {
1085 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
1086 			(*dp->dom_ifdetach)(ifp,
1087 			    ifp->if_afdata[dp->dom_family]);
1088 	}
1089 
1090 	/* Announce that the interface is gone. */
1091 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1092 	splx(s);
1093 	NET_UNLOCK();
1094 
1095 	if (ifp->if_counters != NULL)
1096 		if_counters_free(ifp);
1097 
1098 	for (i = 0; i < ifp->if_nifqs; i++)
1099 		ifq_destroy(ifp->if_ifqs[i]);
1100 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1101 		for (i = 1; i < ifp->if_nifqs; i++) {
1102 			free(ifp->if_ifqs[i], M_DEVBUF,
1103 			    sizeof(struct ifqueue));
1104 		}
1105 		free(ifp->if_ifqs, M_DEVBUF,
1106 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1107 	}
1108 
1109 	for (i = 0; i < ifp->if_niqs; i++)
1110 		ifiq_destroy(ifp->if_iqs[i]);
1111 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1112 		for (i = 1; i < ifp->if_niqs; i++) {
1113 			free(ifp->if_iqs[i], M_DEVBUF,
1114 			    sizeof(struct ifiqueue));
1115 		}
1116 		free(ifp->if_iqs, M_DEVBUF,
1117 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1118 	}
1119 }
1120 
1121 /*
1122  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1123  */
1124 int
1125 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1126 {
1127 	struct ifnet *ifp;
1128 	int connected = 0;
1129 
1130 	ifp = if_get(ifidx);
1131 	if (ifp == NULL)
1132 		return (0);
1133 
1134 	if (ifp0->if_index == ifp->if_index)
1135 		connected = 1;
1136 
1137 #if NBRIDGE > 0
1138 	if (ifp0->if_bridgeidx != 0 && ifp0->if_bridgeidx == ifp->if_bridgeidx)
1139 		connected = 1;
1140 #endif
1141 #if NCARP > 0
1142 	if ((ifp0->if_type == IFT_CARP &&
1143 	    ifp0->if_carpdevidx == ifp->if_index) ||
1144 	    (ifp->if_type == IFT_CARP && ifp->if_carpdevidx == ifp0->if_index))
1145 		connected = 1;
1146 #endif
1147 
1148 	if_put(ifp);
1149 	return (connected);
1150 }
1151 
1152 /*
1153  * Create a clone network interface.
1154  */
1155 int
1156 if_clone_create(const char *name, int rdomain)
1157 {
1158 	struct if_clone *ifc;
1159 	struct ifnet *ifp;
1160 	int unit, ret;
1161 
1162 	ifc = if_clone_lookup(name, &unit);
1163 	if (ifc == NULL)
1164 		return (EINVAL);
1165 
1166 	rw_enter_write(&if_cloners_lock);
1167 
1168 	if ((ifp = if_unit(name)) != NULL) {
1169 		ret = EEXIST;
1170 		goto unlock;
1171 	}
1172 
1173 	ret = (*ifc->ifc_create)(ifc, unit);
1174 
1175 	if (ret != 0 || (ifp = if_unit(name)) == NULL)
1176 		goto unlock;
1177 
1178 	NET_LOCK();
1179 	if_addgroup(ifp, ifc->ifc_name);
1180 	if (rdomain != 0)
1181 		if_setrdomain(ifp, rdomain);
1182 	NET_UNLOCK();
1183 unlock:
1184 	rw_exit_write(&if_cloners_lock);
1185 	if_put(ifp);
1186 
1187 	return (ret);
1188 }
1189 
1190 /*
1191  * Destroy a clone network interface.
1192  */
1193 int
1194 if_clone_destroy(const char *name)
1195 {
1196 	struct if_clone *ifc;
1197 	struct ifnet *ifp;
1198 	int ret;
1199 
1200 	ifc = if_clone_lookup(name, NULL);
1201 	if (ifc == NULL)
1202 		return (EINVAL);
1203 
1204 	if (ifc->ifc_destroy == NULL)
1205 		return (EOPNOTSUPP);
1206 
1207 	rw_enter_write(&if_cloners_lock);
1208 
1209 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1210 		if (strcmp(ifp->if_xname, name) == 0)
1211 			break;
1212 	}
1213 	if (ifp == NULL) {
1214 		rw_exit_write(&if_cloners_lock);
1215 		return (ENXIO);
1216 	}
1217 
1218 	NET_LOCK();
1219 	if (ifp->if_flags & IFF_UP) {
1220 		int s;
1221 		s = splnet();
1222 		if_down(ifp);
1223 		splx(s);
1224 	}
1225 	NET_UNLOCK();
1226 	ret = (*ifc->ifc_destroy)(ifp);
1227 
1228 	rw_exit_write(&if_cloners_lock);
1229 
1230 	return (ret);
1231 }
1232 
1233 /*
1234  * Look up a network interface cloner.
1235  */
1236 struct if_clone *
1237 if_clone_lookup(const char *name, int *unitp)
1238 {
1239 	struct if_clone *ifc;
1240 	const char *cp;
1241 	int unit;
1242 
1243 	/* separate interface name from unit */
1244 	for (cp = name;
1245 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1246 	    cp++)
1247 		continue;
1248 
1249 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1250 		return (NULL);	/* No name or unit number */
1251 
1252 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1253 		return (NULL);	/* unit number 0 padded */
1254 
1255 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1256 		if (strlen(ifc->ifc_name) == cp - name &&
1257 		    !strncmp(name, ifc->ifc_name, cp - name))
1258 			break;
1259 	}
1260 
1261 	if (ifc == NULL)
1262 		return (NULL);
1263 
1264 	unit = 0;
1265 	while (cp - name < IFNAMSIZ && *cp) {
1266 		if (*cp < '0' || *cp > '9' ||
1267 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1268 			/* Bogus unit number. */
1269 			return (NULL);
1270 		}
1271 		unit = (unit * 10) + (*cp++ - '0');
1272 	}
1273 
1274 	if (unitp != NULL)
1275 		*unitp = unit;
1276 	return (ifc);
1277 }
1278 
1279 /*
1280  * Register a network interface cloner.
1281  */
1282 void
1283 if_clone_attach(struct if_clone *ifc)
1284 {
1285 	/*
1286 	 * we are called at kernel boot by main(), when pseudo devices are
1287 	 * being attached. The main() is the only guy which may alter the
1288 	 * if_cloners. While system is running and main() is done with
1289 	 * initialization, the if_cloners becomes immutable.
1290 	 */
1291 	KASSERT(pdevinit_done == 0);
1292 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1293 	if_cloners_count++;
1294 }
1295 
1296 /*
1297  * Provide list of interface cloners to userspace.
1298  */
1299 int
1300 if_clone_list(struct if_clonereq *ifcr)
1301 {
1302 	char outbuf[IFNAMSIZ], *dst;
1303 	struct if_clone *ifc;
1304 	int count, error = 0;
1305 
1306 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1307 		/* Just asking how many there are. */
1308 		ifcr->ifcr_total = if_cloners_count;
1309 		return (0);
1310 	}
1311 
1312 	if (ifcr->ifcr_count < 0)
1313 		return (EINVAL);
1314 
1315 	ifcr->ifcr_total = if_cloners_count;
1316 	count = MIN(if_cloners_count, ifcr->ifcr_count);
1317 
1318 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1319 		if (count == 0)
1320 			break;
1321 		bzero(outbuf, sizeof outbuf);
1322 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1323 		error = copyout(outbuf, dst, IFNAMSIZ);
1324 		if (error)
1325 			break;
1326 		count--;
1327 		dst += IFNAMSIZ;
1328 	}
1329 
1330 	return (error);
1331 }
1332 
1333 /*
1334  * set queue congestion marker
1335  */
1336 void
1337 if_congestion(void)
1338 {
1339 	extern int ticks;
1340 
1341 	ifq_congestion = ticks;
1342 }
1343 
1344 int
1345 if_congested(void)
1346 {
1347 	extern int ticks;
1348 	int diff;
1349 
1350 	diff = ticks - ifq_congestion;
1351 	if (diff < 0) {
1352 		ifq_congestion = ticks - hz;
1353 		return (0);
1354 	}
1355 
1356 	return (diff <= (hz / 100));
1357 }
1358 
1359 #define	equal(a1, a2)	\
1360 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1361 	(a1)->sa_len) == 0)
1362 
1363 /*
1364  * Locate an interface based on a complete address.
1365  */
1366 struct ifaddr *
1367 ifa_ifwithaddr(struct sockaddr *addr, u_int rtableid)
1368 {
1369 	struct ifnet *ifp;
1370 	struct ifaddr *ifa;
1371 	u_int rdomain;
1372 
1373 	rdomain = rtable_l2(rtableid);
1374 	KERNEL_LOCK();
1375 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1376 		if (ifp->if_rdomain != rdomain)
1377 			continue;
1378 
1379 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1380 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1381 				continue;
1382 
1383 			if (equal(addr, ifa->ifa_addr)) {
1384 				KERNEL_UNLOCK();
1385 				return (ifa);
1386 			}
1387 		}
1388 	}
1389 	KERNEL_UNLOCK();
1390 	return (NULL);
1391 }
1392 
1393 /*
1394  * Locate the point to point interface with a given destination address.
1395  */
1396 struct ifaddr *
1397 ifa_ifwithdstaddr(struct sockaddr *addr, u_int rdomain)
1398 {
1399 	struct ifnet *ifp;
1400 	struct ifaddr *ifa;
1401 
1402 	rdomain = rtable_l2(rdomain);
1403 	KERNEL_LOCK();
1404 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1405 		if (ifp->if_rdomain != rdomain)
1406 			continue;
1407 		if (ifp->if_flags & IFF_POINTOPOINT) {
1408 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1409 				if (ifa->ifa_addr->sa_family !=
1410 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1411 					continue;
1412 				if (equal(addr, ifa->ifa_dstaddr)) {
1413 					KERNEL_UNLOCK();
1414 					return (ifa);
1415 				}
1416 			}
1417 		}
1418 	}
1419 	KERNEL_UNLOCK();
1420 	return (NULL);
1421 }
1422 
1423 /*
1424  * Find an interface address specific to an interface best matching
1425  * a given address.
1426  */
1427 struct ifaddr *
1428 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1429 {
1430 	struct ifaddr *ifa;
1431 	char *cp, *cp2, *cp3;
1432 	char *cplim;
1433 	struct ifaddr *ifa_maybe = NULL;
1434 	u_int af = addr->sa_family;
1435 
1436 	if (af >= AF_MAX)
1437 		return (NULL);
1438 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1439 		if (ifa->ifa_addr->sa_family != af)
1440 			continue;
1441 		if (ifa_maybe == NULL)
1442 			ifa_maybe = ifa;
1443 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1444 			if (equal(addr, ifa->ifa_addr) ||
1445 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1446 				return (ifa);
1447 			continue;
1448 		}
1449 		cp = addr->sa_data;
1450 		cp2 = ifa->ifa_addr->sa_data;
1451 		cp3 = ifa->ifa_netmask->sa_data;
1452 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1453 		for (; cp3 < cplim; cp3++)
1454 			if ((*cp++ ^ *cp2++) & *cp3)
1455 				break;
1456 		if (cp3 == cplim)
1457 			return (ifa);
1458 	}
1459 	return (ifa_maybe);
1460 }
1461 
1462 void
1463 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1464 {
1465 }
1466 
1467 /*
1468  * Default action when installing a local route on a point-to-point
1469  * interface.
1470  */
1471 void
1472 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1473 {
1474 	struct ifnet *lo0ifp;
1475 	struct ifaddr *ifa, *lo0ifa;
1476 
1477 	switch (req) {
1478 	case RTM_ADD:
1479 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1480 			break;
1481 
1482 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1483 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1484 			    rt_key(rt)->sa_len) == 0)
1485 				break;
1486 		}
1487 
1488 		if (ifa == NULL)
1489 			break;
1490 
1491 		KASSERT(ifa == rt->rt_ifa);
1492 
1493 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1494 		KASSERT(lo0ifp != NULL);
1495 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1496 			if (lo0ifa->ifa_addr->sa_family ==
1497 			    ifa->ifa_addr->sa_family)
1498 				break;
1499 		}
1500 		if_put(lo0ifp);
1501 
1502 		if (lo0ifa == NULL)
1503 			break;
1504 
1505 		rt->rt_flags &= ~RTF_LLINFO;
1506 		break;
1507 	case RTM_DELETE:
1508 	case RTM_RESOLVE:
1509 	default:
1510 		break;
1511 	}
1512 }
1513 
1514 int
1515 p2p_bpf_mtap(caddr_t if_bpf, const struct mbuf *m, u_int dir)
1516 {
1517 #if NBPFILTER > 0
1518 	return (bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, m, dir));
1519 #else
1520 	return (0);
1521 #endif
1522 }
1523 
1524 void
1525 p2p_input(struct ifnet *ifp, struct mbuf *m)
1526 {
1527 	void (*input)(struct ifnet *, struct mbuf *);
1528 
1529 	switch (m->m_pkthdr.ph_family) {
1530 	case AF_INET:
1531 		input = ipv4_input;
1532 		break;
1533 #ifdef INET6
1534 	case AF_INET6:
1535 		input = ipv6_input;
1536 		break;
1537 #endif
1538 #ifdef MPLS
1539 	case AF_MPLS:
1540 		input = mpls_input;
1541 		break;
1542 #endif
1543 	default:
1544 		m_freem(m);
1545 		return;
1546 	}
1547 
1548 	(*input)(ifp, m);
1549 }
1550 
1551 /*
1552  * Bring down all interfaces
1553  */
1554 void
1555 if_downall(void)
1556 {
1557 	struct ifreq ifrq;	/* XXX only partly built */
1558 	struct ifnet *ifp;
1559 
1560 	NET_LOCK();
1561 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1562 		if ((ifp->if_flags & IFF_UP) == 0)
1563 			continue;
1564 		if_down(ifp);
1565 		ifrq.ifr_flags = ifp->if_flags;
1566 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1567 	}
1568 	NET_UNLOCK();
1569 }
1570 
1571 /*
1572  * Mark an interface down and notify protocols of
1573  * the transition.
1574  */
1575 void
1576 if_down(struct ifnet *ifp)
1577 {
1578 	NET_ASSERT_LOCKED();
1579 
1580 	ifp->if_flags &= ~IFF_UP;
1581 	getmicrotime(&ifp->if_lastchange);
1582 	ifq_purge(&ifp->if_snd);
1583 
1584 	if_linkstate(ifp);
1585 }
1586 
1587 /*
1588  * Mark an interface up and notify protocols of
1589  * the transition.
1590  */
1591 void
1592 if_up(struct ifnet *ifp)
1593 {
1594 	NET_ASSERT_LOCKED();
1595 
1596 	ifp->if_flags |= IFF_UP;
1597 	getmicrotime(&ifp->if_lastchange);
1598 
1599 #ifdef INET6
1600 	/* Userland expects the kernel to set ::1 on default lo(4). */
1601 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1602 		in6_ifattach(ifp);
1603 #endif
1604 
1605 	if_linkstate(ifp);
1606 }
1607 
1608 /*
1609  * Notify userland, the routing table and hooks owner of
1610  * a link-state transition.
1611  */
1612 void
1613 if_linkstate_task(void *xifidx)
1614 {
1615 	unsigned int ifidx = (unsigned long)xifidx;
1616 	struct ifnet *ifp;
1617 
1618 	KERNEL_LOCK();
1619 	NET_LOCK();
1620 
1621 	ifp = if_get(ifidx);
1622 	if (ifp != NULL)
1623 		if_linkstate(ifp);
1624 	if_put(ifp);
1625 
1626 	NET_UNLOCK();
1627 	KERNEL_UNLOCK();
1628 }
1629 
1630 void
1631 if_linkstate(struct ifnet *ifp)
1632 {
1633 	NET_ASSERT_LOCKED();
1634 
1635 	rtm_ifchg(ifp);
1636 	rt_if_track(ifp);
1637 
1638 	if_hooks_run(&ifp->if_linkstatehooks);
1639 }
1640 
1641 void
1642 if_linkstatehook_add(struct ifnet *ifp, struct task *t)
1643 {
1644 	mtx_enter(&if_hooks_mtx);
1645 	TAILQ_INSERT_HEAD(&ifp->if_linkstatehooks, t, t_entry);
1646 	mtx_leave(&if_hooks_mtx);
1647 }
1648 
1649 void
1650 if_linkstatehook_del(struct ifnet *ifp, struct task *t)
1651 {
1652 	mtx_enter(&if_hooks_mtx);
1653 	TAILQ_REMOVE(&ifp->if_linkstatehooks, t, t_entry);
1654 	mtx_leave(&if_hooks_mtx);
1655 }
1656 
1657 /*
1658  * Schedule a link state change task.
1659  */
1660 void
1661 if_link_state_change(struct ifnet *ifp)
1662 {
1663 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1664 }
1665 
1666 /*
1667  * Handle interface watchdog timer routine.  Called
1668  * from softclock, we decrement timer (if set) and
1669  * call the appropriate interface routine on expiration.
1670  */
1671 void
1672 if_slowtimo(void *arg)
1673 {
1674 	struct ifnet *ifp = arg;
1675 	int s = splnet();
1676 
1677 	if (ifp->if_watchdog) {
1678 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1679 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1680 		timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO);
1681 	}
1682 	splx(s);
1683 }
1684 
1685 void
1686 if_watchdog_task(void *xifidx)
1687 {
1688 	unsigned int ifidx = (unsigned long)xifidx;
1689 	struct ifnet *ifp;
1690 	int s;
1691 
1692 	ifp = if_get(ifidx);
1693 	if (ifp == NULL)
1694 		return;
1695 
1696 	KERNEL_LOCK();
1697 	s = splnet();
1698 	if (ifp->if_watchdog)
1699 		(*ifp->if_watchdog)(ifp);
1700 	splx(s);
1701 	KERNEL_UNLOCK();
1702 
1703 	if_put(ifp);
1704 }
1705 
1706 /*
1707  * Map interface name to interface structure pointer.
1708  */
1709 struct ifnet *
1710 if_unit(const char *name)
1711 {
1712 	struct ifnet *ifp;
1713 
1714 	KERNEL_ASSERT_LOCKED();
1715 
1716 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1717 		if (strcmp(ifp->if_xname, name) == 0) {
1718 			if_ref(ifp);
1719 			return (ifp);
1720 		}
1721 	}
1722 
1723 	return (NULL);
1724 }
1725 
1726 /*
1727  * Map interface index to interface structure pointer.
1728  */
1729 struct ifnet *
1730 if_get(unsigned int index)
1731 {
1732 	struct srp_ref sr;
1733 	struct if_map *if_map;
1734 	struct srp *map;
1735 	struct ifnet *ifp = NULL;
1736 
1737 	if_map = srp_enter(&sr, &if_idxmap.map);
1738 	if (index < if_map->limit) {
1739 		map = (struct srp *)(if_map + 1);
1740 
1741 		ifp = srp_follow(&sr, &map[index]);
1742 		if (ifp != NULL) {
1743 			KASSERT(ifp->if_index == index);
1744 			if_ref(ifp);
1745 		}
1746 	}
1747 	srp_leave(&sr);
1748 
1749 	return (ifp);
1750 }
1751 
1752 struct ifnet *
1753 if_ref(struct ifnet *ifp)
1754 {
1755 	refcnt_take(&ifp->if_refcnt);
1756 
1757 	return (ifp);
1758 }
1759 
1760 void
1761 if_put(struct ifnet *ifp)
1762 {
1763 	if (ifp == NULL)
1764 		return;
1765 
1766 	refcnt_rele_wake(&ifp->if_refcnt);
1767 }
1768 
1769 int
1770 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1771 {
1772 	if (ifp->if_sadl == NULL)
1773 		return (EINVAL);
1774 
1775 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1776 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1777 
1778 	return (0);
1779 }
1780 
1781 int
1782 if_createrdomain(int rdomain, struct ifnet *ifp)
1783 {
1784 	int error;
1785 	struct ifnet *loifp;
1786 	char loifname[IFNAMSIZ];
1787 	unsigned int unit = rdomain;
1788 
1789 	if ((error = rtable_add(rdomain)) != 0)
1790 		return (error);
1791 	if (!rtable_empty(rdomain))
1792 		return (EEXIST);
1793 
1794 	/* Create rdomain including its loopback if with unit == rdomain */
1795 	snprintf(loifname, sizeof(loifname), "lo%u", unit);
1796 	error = if_clone_create(loifname, 0);
1797 	if ((loifp = if_unit(loifname)) == NULL)
1798 		return (ENXIO);
1799 	if (error && (ifp != loifp || error != EEXIST)) {
1800 		if_put(loifp);
1801 		return (error);
1802 	}
1803 
1804 	rtable_l2set(rdomain, rdomain, loifp->if_index);
1805 	loifp->if_rdomain = rdomain;
1806 	if_put(loifp);
1807 
1808 	return (0);
1809 }
1810 
1811 int
1812 if_setrdomain(struct ifnet *ifp, int rdomain)
1813 {
1814 	struct ifreq ifr;
1815 	int error, up = 0, s;
1816 
1817 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1818 		return (EINVAL);
1819 
1820 	if (rdomain != ifp->if_rdomain &&
1821 	    (ifp->if_flags & IFF_LOOPBACK) &&
1822 	    (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1823 		return (EPERM);
1824 
1825 	if (!rtable_exists(rdomain))
1826 		return (ESRCH);
1827 
1828 	/* make sure that the routing table is a real rdomain */
1829 	if (rdomain != rtable_l2(rdomain))
1830 		return (EINVAL);
1831 
1832 	if (rdomain != ifp->if_rdomain) {
1833 		s = splnet();
1834 		/*
1835 		 * We are tearing down the world.
1836 		 * Take down the IF so:
1837 		 * 1. everything that cares gets a message
1838 		 * 2. the automagic IPv6 bits are recreated
1839 		 */
1840 		if (ifp->if_flags & IFF_UP) {
1841 			up = 1;
1842 			if_down(ifp);
1843 		}
1844 		rti_delete(ifp);
1845 #ifdef MROUTING
1846 		vif_delete(ifp);
1847 #endif
1848 		in_ifdetach(ifp);
1849 #ifdef INET6
1850 		in6_ifdetach(ifp);
1851 #endif
1852 		splx(s);
1853 	}
1854 
1855 	/* Let devices like enc(4) or mpe(4) know about the change */
1856 	ifr.ifr_rdomainid = rdomain;
1857 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
1858 	    (caddr_t)&ifr)) != ENOTTY)
1859 		return (error);
1860 	error = 0;
1861 
1862 	/* Add interface to the specified rdomain */
1863 	ifp->if_rdomain = rdomain;
1864 
1865 	/* If we took down the IF, bring it back */
1866 	if (up) {
1867 		s = splnet();
1868 		if_up(ifp);
1869 		splx(s);
1870 	}
1871 
1872 	return (0);
1873 }
1874 
1875 /*
1876  * Interface ioctls.
1877  */
1878 int
1879 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1880 {
1881 	struct ifnet *ifp;
1882 	struct ifreq *ifr = (struct ifreq *)data;
1883 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
1884 	struct if_afreq *ifar = (struct if_afreq *)data;
1885 	char ifdescrbuf[IFDESCRSIZE];
1886 	char ifrtlabelbuf[RTLABEL_LEN];
1887 	int s, error = 0, oif_xflags;
1888 	size_t bytesdone;
1889 	unsigned short oif_flags;
1890 
1891 	switch (cmd) {
1892 	case SIOCIFCREATE:
1893 		if ((error = suser(p)) != 0)
1894 			return (error);
1895 		error = if_clone_create(ifr->ifr_name, 0);
1896 		return (error);
1897 	case SIOCIFDESTROY:
1898 		if ((error = suser(p)) != 0)
1899 			return (error);
1900 		error = if_clone_destroy(ifr->ifr_name);
1901 		return (error);
1902 	case SIOCSIFGATTR:
1903 		if ((error = suser(p)) != 0)
1904 			return (error);
1905 		NET_LOCK();
1906 		error = if_setgroupattribs(data);
1907 		NET_UNLOCK();
1908 		return (error);
1909 	case SIOCGIFCONF:
1910 	case SIOCIFGCLONERS:
1911 	case SIOCGIFGMEMB:
1912 	case SIOCGIFGATTR:
1913 	case SIOCGIFGLIST:
1914 	case SIOCGIFFLAGS:
1915 	case SIOCGIFXFLAGS:
1916 	case SIOCGIFMETRIC:
1917 	case SIOCGIFMTU:
1918 	case SIOCGIFHARDMTU:
1919 	case SIOCGIFDATA:
1920 	case SIOCGIFDESCR:
1921 	case SIOCGIFRTLABEL:
1922 	case SIOCGIFPRIORITY:
1923 	case SIOCGIFRDOMAIN:
1924 	case SIOCGIFGROUP:
1925 	case SIOCGIFLLPRIO:
1926 		return (ifioctl_get(cmd, data));
1927 	}
1928 
1929 	ifp = if_unit(ifr->ifr_name);
1930 	if (ifp == NULL)
1931 		return (ENXIO);
1932 	oif_flags = ifp->if_flags;
1933 	oif_xflags = ifp->if_xflags;
1934 
1935 	switch (cmd) {
1936 	case SIOCIFAFATTACH:
1937 	case SIOCIFAFDETACH:
1938 		if ((error = suser(p)) != 0)
1939 			break;
1940 		NET_LOCK();
1941 		switch (ifar->ifar_af) {
1942 		case AF_INET:
1943 			/* attach is a noop for AF_INET */
1944 			if (cmd == SIOCIFAFDETACH)
1945 				in_ifdetach(ifp);
1946 			break;
1947 #ifdef INET6
1948 		case AF_INET6:
1949 			if (cmd == SIOCIFAFATTACH)
1950 				error = in6_ifattach(ifp);
1951 			else
1952 				in6_ifdetach(ifp);
1953 			break;
1954 #endif /* INET6 */
1955 		default:
1956 			error = EAFNOSUPPORT;
1957 		}
1958 		NET_UNLOCK();
1959 		break;
1960 
1961 	case SIOCSIFXFLAGS:
1962 		if ((error = suser(p)) != 0)
1963 			break;
1964 
1965 		NET_LOCK();
1966 #ifdef INET6
1967 		if ((ISSET(ifr->ifr_flags, IFXF_AUTOCONF6) ||
1968 		    ISSET(ifr->ifr_flags, IFXF_AUTOCONF6TEMP)) &&
1969 		    !ISSET(ifp->if_xflags, IFXF_AUTOCONF6) &&
1970 		    !ISSET(ifp->if_xflags, IFXF_AUTOCONF6TEMP)) {
1971 			error = in6_ifattach(ifp);
1972 			if (error != 0) {
1973 				NET_UNLOCK();
1974 				break;
1975 			}
1976 		}
1977 
1978 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1979 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
1980 			ifp->if_xflags |= IFXF_INET6_NOSOII;
1981 
1982 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1983 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
1984 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
1985 
1986 #endif	/* INET6 */
1987 
1988 #ifdef MPLS
1989 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
1990 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
1991 			s = splnet();
1992 			ifp->if_xflags |= IFXF_MPLS;
1993 			ifp->if_ll_output = ifp->if_output;
1994 			ifp->if_output = mpls_output;
1995 			splx(s);
1996 		}
1997 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
1998 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
1999 			s = splnet();
2000 			ifp->if_xflags &= ~IFXF_MPLS;
2001 			ifp->if_output = ifp->if_ll_output;
2002 			ifp->if_ll_output = NULL;
2003 			splx(s);
2004 		}
2005 #endif	/* MPLS */
2006 
2007 #ifndef SMALL_KERNEL
2008 		if (ifp->if_capabilities & IFCAP_WOL) {
2009 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
2010 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
2011 				s = splnet();
2012 				ifp->if_xflags |= IFXF_WOL;
2013 				error = ifp->if_wol(ifp, 1);
2014 				splx(s);
2015 			}
2016 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
2017 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
2018 				s = splnet();
2019 				ifp->if_xflags &= ~IFXF_WOL;
2020 				error = ifp->if_wol(ifp, 0);
2021 				splx(s);
2022 			}
2023 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
2024 			ifr->ifr_flags &= ~IFXF_WOL;
2025 			error = ENOTSUP;
2026 		}
2027 #endif
2028 
2029 		if (error == 0)
2030 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
2031 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
2032 
2033 		if (!ISSET(ifp->if_flags, IFF_UP) &&
2034 		    ((!ISSET(oif_xflags, IFXF_AUTOCONF4) &&
2035 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF4)) ||
2036 		    (!ISSET(oif_xflags, IFXF_AUTOCONF6) &&
2037 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF6)) ||
2038 		    (!ISSET(oif_xflags, IFXF_AUTOCONF6TEMP) &&
2039 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF6TEMP)))) {
2040 			ifr->ifr_flags = ifp->if_flags | IFF_UP;
2041 			cmd = SIOCSIFFLAGS;
2042 			goto forceup;
2043 		}
2044 
2045 		NET_UNLOCK();
2046 		break;
2047 
2048 	case SIOCSIFFLAGS:
2049 		if ((error = suser(p)) != 0)
2050 			break;
2051 
2052 		NET_LOCK();
2053 forceup:
2054 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2055 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
2056 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2057 		if (error != 0) {
2058 			ifp->if_flags = oif_flags;
2059 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
2060 			s = splnet();
2061 			if (ISSET(ifp->if_flags, IFF_UP))
2062 				if_up(ifp);
2063 			else
2064 				if_down(ifp);
2065 			splx(s);
2066 		}
2067 		NET_UNLOCK();
2068 		break;
2069 
2070 	case SIOCSIFMETRIC:
2071 		if ((error = suser(p)) != 0)
2072 			break;
2073 		NET_LOCK();
2074 		ifp->if_metric = ifr->ifr_metric;
2075 		NET_UNLOCK();
2076 		break;
2077 
2078 	case SIOCSIFMTU:
2079 		if ((error = suser(p)) != 0)
2080 			break;
2081 		NET_LOCK();
2082 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2083 		NET_UNLOCK();
2084 		if (error == 0)
2085 			rtm_ifchg(ifp);
2086 		break;
2087 
2088 	case SIOCSIFDESCR:
2089 		if ((error = suser(p)) != 0)
2090 			break;
2091 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2092 		    IFDESCRSIZE, &bytesdone);
2093 		if (error == 0) {
2094 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2095 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2096 		}
2097 		break;
2098 
2099 	case SIOCSIFRTLABEL:
2100 		if ((error = suser(p)) != 0)
2101 			break;
2102 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2103 		    RTLABEL_LEN, &bytesdone);
2104 		if (error == 0) {
2105 			rtlabel_unref(ifp->if_rtlabelid);
2106 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2107 		}
2108 		break;
2109 
2110 	case SIOCSIFPRIORITY:
2111 		if ((error = suser(p)) != 0)
2112 			break;
2113 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2114 			error = EINVAL;
2115 			break;
2116 		}
2117 		ifp->if_priority = ifr->ifr_metric;
2118 		break;
2119 
2120 	case SIOCSIFRDOMAIN:
2121 		if ((error = suser(p)) != 0)
2122 			break;
2123 		error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2124 		if (!error || error == EEXIST) {
2125 			NET_LOCK();
2126 			error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2127 			NET_UNLOCK();
2128 		}
2129 		break;
2130 
2131 	case SIOCAIFGROUP:
2132 		if ((error = suser(p)))
2133 			break;
2134 		NET_LOCK();
2135 		error = if_addgroup(ifp, ifgr->ifgr_group);
2136 		if (error == 0) {
2137 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2138 			if (error == ENOTTY)
2139 				error = 0;
2140 		}
2141 		NET_UNLOCK();
2142 		break;
2143 
2144 	case SIOCDIFGROUP:
2145 		if ((error = suser(p)))
2146 			break;
2147 		NET_LOCK();
2148 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2149 		if (error == ENOTTY)
2150 			error = 0;
2151 		if (error == 0)
2152 			error = if_delgroup(ifp, ifgr->ifgr_group);
2153 		NET_UNLOCK();
2154 		break;
2155 
2156 	case SIOCSIFLLADDR:
2157 		if ((error = suser(p)))
2158 			break;
2159 		if ((ifp->if_sadl == NULL) ||
2160 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2161 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2162 			error = EINVAL;
2163 			break;
2164 		}
2165 		NET_LOCK();
2166 		switch (ifp->if_type) {
2167 		case IFT_ETHER:
2168 		case IFT_CARP:
2169 		case IFT_XETHER:
2170 		case IFT_ISO88025:
2171 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2172 			if (error == ENOTTY)
2173 				error = 0;
2174 			if (error == 0)
2175 				error = if_setlladdr(ifp,
2176 				    ifr->ifr_addr.sa_data);
2177 			break;
2178 		default:
2179 			error = ENODEV;
2180 		}
2181 
2182 		if (error == 0)
2183 			ifnewlladdr(ifp);
2184 		NET_UNLOCK();
2185 		if (error == 0)
2186 			rtm_ifchg(ifp);
2187 		break;
2188 
2189 	case SIOCSIFLLPRIO:
2190 		if ((error = suser(p)))
2191 			break;
2192 		if (ifr->ifr_llprio < IFQ_MINPRIO ||
2193 		    ifr->ifr_llprio > IFQ_MAXPRIO) {
2194 			error = EINVAL;
2195 			break;
2196 		}
2197 		NET_LOCK();
2198 		ifp->if_llprio = ifr->ifr_llprio;
2199 		NET_UNLOCK();
2200 		break;
2201 
2202 	case SIOCGIFSFFPAGE:
2203 		error = suser(p);
2204 		if (error != 0)
2205 			break;
2206 
2207 		error = if_sffpage_check(data);
2208 		if (error != 0)
2209 			break;
2210 
2211 		/* don't take NET_LOCK because i2c reads take a long time */
2212 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2213 		break;
2214 
2215 	case SIOCSETKALIVE:
2216 	case SIOCDIFPHYADDR:
2217 	case SIOCSLIFPHYADDR:
2218 	case SIOCSLIFPHYRTABLE:
2219 	case SIOCSLIFPHYTTL:
2220 	case SIOCSLIFPHYDF:
2221 	case SIOCSLIFPHYECN:
2222 	case SIOCADDMULTI:
2223 	case SIOCDELMULTI:
2224 	case SIOCSIFMEDIA:
2225 	case SIOCSVNETID:
2226 	case SIOCDVNETID:
2227 	case SIOCSVNETFLOWID:
2228 	case SIOCSTXHPRIO:
2229 	case SIOCSRXHPRIO:
2230 	case SIOCSIFPAIR:
2231 	case SIOCSIFPARENT:
2232 	case SIOCDIFPARENT:
2233 	case SIOCSETMPWCFG:
2234 	case SIOCSETLABEL:
2235 	case SIOCDELLABEL:
2236 	case SIOCSPWE3CTRLWORD:
2237 	case SIOCSPWE3FAT:
2238 	case SIOCSPWE3NEIGHBOR:
2239 	case SIOCDPWE3NEIGHBOR:
2240 #if NBRIDGE > 0
2241 	case SIOCBRDGADD:
2242 	case SIOCBRDGDEL:
2243 	case SIOCBRDGSIFFLGS:
2244 	case SIOCBRDGSCACHE:
2245 	case SIOCBRDGADDS:
2246 	case SIOCBRDGDELS:
2247 	case SIOCBRDGSADDR:
2248 	case SIOCBRDGSTO:
2249 	case SIOCBRDGDADDR:
2250 	case SIOCBRDGFLUSH:
2251 	case SIOCBRDGADDL:
2252 	case SIOCBRDGSIFPROT:
2253 	case SIOCBRDGARL:
2254 	case SIOCBRDGFRL:
2255 	case SIOCBRDGSPRI:
2256 	case SIOCBRDGSHT:
2257 	case SIOCBRDGSFD:
2258 	case SIOCBRDGSMA:
2259 	case SIOCBRDGSIFPRIO:
2260 	case SIOCBRDGSIFCOST:
2261 	case SIOCBRDGSTXHC:
2262 	case SIOCBRDGSPROTO:
2263 	case SIOCSWSPORTNO:
2264 #endif
2265 		if ((error = suser(p)) != 0)
2266 			break;
2267 		/* FALLTHROUGH */
2268 	default:
2269 		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
2270 			(struct mbuf *) cmd, (struct mbuf *) data,
2271 			(struct mbuf *) ifp, p));
2272 		if (error != EOPNOTSUPP)
2273 			break;
2274 		switch (cmd) {
2275 		case SIOCAIFADDR:
2276 		case SIOCDIFADDR:
2277 		case SIOCSIFADDR:
2278 		case SIOCSIFNETMASK:
2279 		case SIOCSIFDSTADDR:
2280 		case SIOCSIFBRDADDR:
2281 #ifdef INET6
2282 		case SIOCAIFADDR_IN6:
2283 		case SIOCDIFADDR_IN6:
2284 #endif
2285 			error = suser(p);
2286 			break;
2287 		default:
2288 			error = 0;
2289 			break;
2290 		}
2291 		if (error)
2292 			break;
2293 		NET_LOCK();
2294 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2295 		NET_UNLOCK();
2296 		break;
2297 	}
2298 
2299 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags) {
2300 		/* if_up() and if_down() already sent an update, skip here */
2301 		if (((oif_flags ^ ifp->if_flags) & IFF_UP) == 0)
2302 			rtm_ifchg(ifp);
2303 	}
2304 
2305 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2306 		getmicrotime(&ifp->if_lastchange);
2307 
2308 	if_put(ifp);
2309 
2310 	return (error);
2311 }
2312 
2313 int
2314 ifioctl_get(u_long cmd, caddr_t data)
2315 {
2316 	struct ifnet *ifp;
2317 	struct ifreq *ifr = (struct ifreq *)data;
2318 	char ifdescrbuf[IFDESCRSIZE];
2319 	char ifrtlabelbuf[RTLABEL_LEN];
2320 	int error = 0;
2321 	size_t bytesdone;
2322 	const char *label;
2323 
2324 	switch(cmd) {
2325 	case SIOCGIFCONF:
2326 		NET_RLOCK_IN_IOCTL();
2327 		error = ifconf(data);
2328 		NET_RUNLOCK_IN_IOCTL();
2329 		return (error);
2330 	case SIOCIFGCLONERS:
2331 		error = if_clone_list((struct if_clonereq *)data);
2332 		return (error);
2333 	case SIOCGIFGMEMB:
2334 		NET_RLOCK_IN_IOCTL();
2335 		error = if_getgroupmembers(data);
2336 		NET_RUNLOCK_IN_IOCTL();
2337 		return (error);
2338 	case SIOCGIFGATTR:
2339 		NET_RLOCK_IN_IOCTL();
2340 		error = if_getgroupattribs(data);
2341 		NET_RUNLOCK_IN_IOCTL();
2342 		return (error);
2343 	case SIOCGIFGLIST:
2344 		NET_RLOCK_IN_IOCTL();
2345 		error = if_getgrouplist(data);
2346 		NET_RUNLOCK_IN_IOCTL();
2347 		return (error);
2348 	}
2349 
2350 	ifp = if_unit(ifr->ifr_name);
2351 	if (ifp == NULL)
2352 		return (ENXIO);
2353 
2354 	NET_RLOCK_IN_IOCTL();
2355 
2356 	switch(cmd) {
2357 	case SIOCGIFFLAGS:
2358 		ifr->ifr_flags = ifp->if_flags;
2359 		if (ifq_is_oactive(&ifp->if_snd))
2360 			ifr->ifr_flags |= IFF_OACTIVE;
2361 		break;
2362 
2363 	case SIOCGIFXFLAGS:
2364 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2365 		break;
2366 
2367 	case SIOCGIFMETRIC:
2368 		ifr->ifr_metric = ifp->if_metric;
2369 		break;
2370 
2371 	case SIOCGIFMTU:
2372 		ifr->ifr_mtu = ifp->if_mtu;
2373 		break;
2374 
2375 	case SIOCGIFHARDMTU:
2376 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2377 		break;
2378 
2379 	case SIOCGIFDATA: {
2380 		struct if_data ifdata;
2381 		if_getdata(ifp, &ifdata);
2382 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2383 		break;
2384 	}
2385 
2386 	case SIOCGIFDESCR:
2387 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2388 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2389 		    &bytesdone);
2390 		break;
2391 
2392 	case SIOCGIFRTLABEL:
2393 		if (ifp->if_rtlabelid &&
2394 		    (label = rtlabel_id2name(ifp->if_rtlabelid)) != NULL) {
2395 			strlcpy(ifrtlabelbuf, label, RTLABEL_LEN);
2396 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2397 			    RTLABEL_LEN, &bytesdone);
2398 		} else
2399 			error = ENOENT;
2400 		break;
2401 
2402 	case SIOCGIFPRIORITY:
2403 		ifr->ifr_metric = ifp->if_priority;
2404 		break;
2405 
2406 	case SIOCGIFRDOMAIN:
2407 		ifr->ifr_rdomainid = ifp->if_rdomain;
2408 		break;
2409 
2410 	case SIOCGIFGROUP:
2411 		error = if_getgroup(data, ifp);
2412 		break;
2413 
2414 	case SIOCGIFLLPRIO:
2415 		ifr->ifr_llprio = ifp->if_llprio;
2416 		break;
2417 
2418 	default:
2419 		panic("invalid ioctl %lu", cmd);
2420 	}
2421 
2422 	NET_RUNLOCK_IN_IOCTL();
2423 
2424 	if_put(ifp);
2425 
2426 	return (error);
2427 }
2428 
2429 static int
2430 if_sffpage_check(const caddr_t data)
2431 {
2432 	const struct if_sffpage *sff = (const struct if_sffpage *)data;
2433 
2434 	switch (sff->sff_addr) {
2435 	case IFSFF_ADDR_EEPROM:
2436 	case IFSFF_ADDR_DDM:
2437 		break;
2438 	default:
2439 		return (EINVAL);
2440 	}
2441 
2442 	return (0);
2443 }
2444 
2445 int
2446 if_txhprio_l2_check(int hdrprio)
2447 {
2448 	switch (hdrprio) {
2449 	case IF_HDRPRIO_PACKET:
2450 		return (0);
2451 	default:
2452 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2453 			return (0);
2454 		break;
2455 	}
2456 
2457 	return (EINVAL);
2458 }
2459 
2460 int
2461 if_txhprio_l3_check(int hdrprio)
2462 {
2463 	switch (hdrprio) {
2464 	case IF_HDRPRIO_PACKET:
2465 	case IF_HDRPRIO_PAYLOAD:
2466 		return (0);
2467 	default:
2468 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2469 			return (0);
2470 		break;
2471 	}
2472 
2473 	return (EINVAL);
2474 }
2475 
2476 int
2477 if_rxhprio_l2_check(int hdrprio)
2478 {
2479 	switch (hdrprio) {
2480 	case IF_HDRPRIO_PACKET:
2481 	case IF_HDRPRIO_OUTER:
2482 		return (0);
2483 	default:
2484 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2485 			return (0);
2486 		break;
2487 	}
2488 
2489 	return (EINVAL);
2490 }
2491 
2492 int
2493 if_rxhprio_l3_check(int hdrprio)
2494 {
2495 	switch (hdrprio) {
2496 	case IF_HDRPRIO_PACKET:
2497 	case IF_HDRPRIO_PAYLOAD:
2498 	case IF_HDRPRIO_OUTER:
2499 		return (0);
2500 	default:
2501 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2502 			return (0);
2503 		break;
2504 	}
2505 
2506 	return (EINVAL);
2507 }
2508 
2509 /*
2510  * Return interface configuration
2511  * of system.  List may be used
2512  * in later ioctl's (above) to get
2513  * other information.
2514  */
2515 int
2516 ifconf(caddr_t data)
2517 {
2518 	struct ifconf *ifc = (struct ifconf *)data;
2519 	struct ifnet *ifp;
2520 	struct ifaddr *ifa;
2521 	struct ifreq ifr, *ifrp;
2522 	int space = ifc->ifc_len, error = 0;
2523 
2524 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2525 	if (space == 0) {
2526 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
2527 			struct sockaddr *sa;
2528 
2529 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2530 				space += sizeof (ifr);
2531 			else
2532 				TAILQ_FOREACH(ifa,
2533 				    &ifp->if_addrlist, ifa_list) {
2534 					sa = ifa->ifa_addr;
2535 					if (sa->sa_len > sizeof(*sa))
2536 						space += sa->sa_len -
2537 						    sizeof(*sa);
2538 					space += sizeof(ifr);
2539 				}
2540 		}
2541 		ifc->ifc_len = space;
2542 		return (0);
2543 	}
2544 
2545 	ifrp = ifc->ifc_req;
2546 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2547 		if (space < sizeof(ifr))
2548 			break;
2549 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2550 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2551 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2552 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2553 			    sizeof(ifr));
2554 			if (error)
2555 				break;
2556 			space -= sizeof (ifr), ifrp++;
2557 		} else
2558 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2559 				struct sockaddr *sa = ifa->ifa_addr;
2560 
2561 				if (space < sizeof(ifr))
2562 					break;
2563 				if (sa->sa_len <= sizeof(*sa)) {
2564 					ifr.ifr_addr = *sa;
2565 					error = copyout((caddr_t)&ifr,
2566 					    (caddr_t)ifrp, sizeof (ifr));
2567 					ifrp++;
2568 				} else {
2569 					space -= sa->sa_len - sizeof(*sa);
2570 					if (space < sizeof (ifr))
2571 						break;
2572 					error = copyout((caddr_t)&ifr,
2573 					    (caddr_t)ifrp,
2574 					    sizeof(ifr.ifr_name));
2575 					if (error == 0)
2576 						error = copyout((caddr_t)sa,
2577 						    (caddr_t)&ifrp->ifr_addr,
2578 						    sa->sa_len);
2579 					ifrp = (struct ifreq *)(sa->sa_len +
2580 					    (caddr_t)&ifrp->ifr_addr);
2581 				}
2582 				if (error)
2583 					break;
2584 				space -= sizeof (ifr);
2585 			}
2586 	}
2587 	ifc->ifc_len -= space;
2588 	return (error);
2589 }
2590 
2591 void
2592 if_counters_alloc(struct ifnet *ifp)
2593 {
2594 	KASSERT(ifp->if_counters == NULL);
2595 
2596 	ifp->if_counters = counters_alloc(ifc_ncounters);
2597 }
2598 
2599 void
2600 if_counters_free(struct ifnet *ifp)
2601 {
2602 	KASSERT(ifp->if_counters != NULL);
2603 
2604 	counters_free(ifp->if_counters, ifc_ncounters);
2605 	ifp->if_counters = NULL;
2606 }
2607 
2608 void
2609 if_getdata(struct ifnet *ifp, struct if_data *data)
2610 {
2611 	unsigned int i;
2612 
2613 	*data = ifp->if_data;
2614 
2615 	if (ifp->if_counters != NULL) {
2616 		uint64_t counters[ifc_ncounters];
2617 
2618 		counters_read(ifp->if_counters, counters, nitems(counters));
2619 
2620 		data->ifi_ipackets += counters[ifc_ipackets];
2621 		data->ifi_ierrors += counters[ifc_ierrors];
2622 		data->ifi_opackets += counters[ifc_opackets];
2623 		data->ifi_oerrors += counters[ifc_oerrors];
2624 		data->ifi_collisions += counters[ifc_collisions];
2625 		data->ifi_ibytes += counters[ifc_ibytes];
2626 		data->ifi_obytes += counters[ifc_obytes];
2627 		data->ifi_imcasts += counters[ifc_imcasts];
2628 		data->ifi_omcasts += counters[ifc_omcasts];
2629 		data->ifi_iqdrops += counters[ifc_iqdrops];
2630 		data->ifi_oqdrops += counters[ifc_oqdrops];
2631 		data->ifi_noproto += counters[ifc_noproto];
2632 	}
2633 
2634 	for (i = 0; i < ifp->if_nifqs; i++) {
2635 		struct ifqueue *ifq = ifp->if_ifqs[i];
2636 
2637 		ifq_add_data(ifq, data);
2638 	}
2639 
2640 	for (i = 0; i < ifp->if_niqs; i++) {
2641 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2642 
2643 		ifiq_add_data(ifiq, data);
2644 	}
2645 }
2646 
2647 /*
2648  * Dummy functions replaced in ifnet during detach (if protocols decide to
2649  * fiddle with the if during detach.
2650  */
2651 void
2652 if_detached_qstart(struct ifqueue *ifq)
2653 {
2654 	ifq_purge(ifq);
2655 }
2656 
2657 int
2658 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2659 {
2660 	return ENODEV;
2661 }
2662 
2663 /*
2664  * Create interface group without members
2665  */
2666 struct ifg_group *
2667 if_creategroup(const char *groupname)
2668 {
2669 	struct ifg_group	*ifg;
2670 
2671 	if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
2672 		return (NULL);
2673 
2674 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2675 	ifg->ifg_refcnt = 1;
2676 	ifg->ifg_carp_demoted = 0;
2677 	TAILQ_INIT(&ifg->ifg_members);
2678 #if NPF > 0
2679 	pfi_attach_ifgroup(ifg);
2680 #endif
2681 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2682 
2683 	return (ifg);
2684 }
2685 
2686 /*
2687  * Add a group to an interface
2688  */
2689 int
2690 if_addgroup(struct ifnet *ifp, const char *groupname)
2691 {
2692 	struct ifg_list		*ifgl;
2693 	struct ifg_group	*ifg = NULL;
2694 	struct ifg_member	*ifgm;
2695 	size_t			 namelen;
2696 
2697 	namelen = strlen(groupname);
2698 	if (namelen == 0 || namelen >= IFNAMSIZ ||
2699 	    (groupname[namelen - 1] >= '0' && groupname[namelen - 1] <= '9'))
2700 		return (EINVAL);
2701 
2702 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2703 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2704 			return (EEXIST);
2705 
2706 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
2707 		return (ENOMEM);
2708 
2709 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
2710 		free(ifgl, M_TEMP, sizeof(*ifgl));
2711 		return (ENOMEM);
2712 	}
2713 
2714 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2715 		if (!strcmp(ifg->ifg_group, groupname))
2716 			break;
2717 
2718 	if (ifg == NULL) {
2719 		ifg = if_creategroup(groupname);
2720 		if (ifg == NULL) {
2721 			free(ifgl, M_TEMP, sizeof(*ifgl));
2722 			free(ifgm, M_TEMP, sizeof(*ifgm));
2723 			return (ENOMEM);
2724 		}
2725 	} else
2726 		ifg->ifg_refcnt++;
2727 	KASSERT(ifg->ifg_refcnt != 0);
2728 
2729 	ifgl->ifgl_group = ifg;
2730 	ifgm->ifgm_ifp = ifp;
2731 
2732 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2733 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2734 
2735 #if NPF > 0
2736 	pfi_group_addmember(groupname, ifp);
2737 #endif
2738 
2739 	return (0);
2740 }
2741 
2742 /*
2743  * Remove a group from an interface
2744  */
2745 int
2746 if_delgroup(struct ifnet *ifp, const char *groupname)
2747 {
2748 	struct ifg_list		*ifgl;
2749 	struct ifg_member	*ifgm;
2750 
2751 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2752 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2753 			break;
2754 	if (ifgl == NULL)
2755 		return (ENOENT);
2756 
2757 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2758 
2759 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2760 		if (ifgm->ifgm_ifp == ifp)
2761 			break;
2762 
2763 	if (ifgm != NULL) {
2764 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2765 		free(ifgm, M_TEMP, sizeof(*ifgm));
2766 	}
2767 
2768 #if NPF > 0
2769 	pfi_group_change(groupname);
2770 #endif
2771 
2772 	KASSERT(ifgl->ifgl_group->ifg_refcnt != 0);
2773 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2774 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2775 #if NPF > 0
2776 		pfi_detach_ifgroup(ifgl->ifgl_group);
2777 #endif
2778 		free(ifgl->ifgl_group, M_TEMP, sizeof(*ifgl->ifgl_group));
2779 	}
2780 
2781 	free(ifgl, M_TEMP, sizeof(*ifgl));
2782 
2783 	return (0);
2784 }
2785 
2786 /*
2787  * Stores all groups from an interface in memory pointed
2788  * to by data
2789  */
2790 int
2791 if_getgroup(caddr_t data, struct ifnet *ifp)
2792 {
2793 	int			 len, error;
2794 	struct ifg_list		*ifgl;
2795 	struct ifg_req		 ifgrq, *ifgp;
2796 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2797 
2798 	if (ifgr->ifgr_len == 0) {
2799 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2800 			ifgr->ifgr_len += sizeof(struct ifg_req);
2801 		return (0);
2802 	}
2803 
2804 	len = ifgr->ifgr_len;
2805 	ifgp = ifgr->ifgr_groups;
2806 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2807 		if (len < sizeof(ifgrq))
2808 			return (EINVAL);
2809 		bzero(&ifgrq, sizeof ifgrq);
2810 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2811 		    sizeof(ifgrq.ifgrq_group));
2812 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2813 		    sizeof(struct ifg_req))))
2814 			return (error);
2815 		len -= sizeof(ifgrq);
2816 		ifgp++;
2817 	}
2818 
2819 	return (0);
2820 }
2821 
2822 /*
2823  * Stores all members of a group in memory pointed to by data
2824  */
2825 int
2826 if_getgroupmembers(caddr_t data)
2827 {
2828 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2829 	struct ifg_group	*ifg;
2830 	struct ifg_member	*ifgm;
2831 	struct ifg_req		 ifgrq, *ifgp;
2832 	int			 len, error;
2833 
2834 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2835 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2836 			break;
2837 	if (ifg == NULL)
2838 		return (ENOENT);
2839 
2840 	if (ifgr->ifgr_len == 0) {
2841 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2842 			ifgr->ifgr_len += sizeof(ifgrq);
2843 		return (0);
2844 	}
2845 
2846 	len = ifgr->ifgr_len;
2847 	ifgp = ifgr->ifgr_groups;
2848 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
2849 		if (len < sizeof(ifgrq))
2850 			return (EINVAL);
2851 		bzero(&ifgrq, sizeof ifgrq);
2852 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
2853 		    sizeof(ifgrq.ifgrq_member));
2854 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2855 		    sizeof(struct ifg_req))))
2856 			return (error);
2857 		len -= sizeof(ifgrq);
2858 		ifgp++;
2859 	}
2860 
2861 	return (0);
2862 }
2863 
2864 int
2865 if_getgroupattribs(caddr_t data)
2866 {
2867 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2868 	struct ifg_group	*ifg;
2869 
2870 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2871 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2872 			break;
2873 	if (ifg == NULL)
2874 		return (ENOENT);
2875 
2876 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
2877 
2878 	return (0);
2879 }
2880 
2881 int
2882 if_setgroupattribs(caddr_t data)
2883 {
2884 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2885 	struct ifg_group	*ifg;
2886 	struct ifg_member	*ifgm;
2887 	int			 demote;
2888 
2889 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2890 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2891 			break;
2892 	if (ifg == NULL)
2893 		return (ENOENT);
2894 
2895 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
2896 	if (demote + ifg->ifg_carp_demoted > 0xff ||
2897 	    demote + ifg->ifg_carp_demoted < 0)
2898 		return (EINVAL);
2899 
2900 	ifg->ifg_carp_demoted += demote;
2901 
2902 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2903 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
2904 
2905 	return (0);
2906 }
2907 
2908 /*
2909  * Stores all groups in memory pointed to by data
2910  */
2911 int
2912 if_getgrouplist(caddr_t data)
2913 {
2914 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2915 	struct ifg_group	*ifg;
2916 	struct ifg_req		 ifgrq, *ifgp;
2917 	int			 len, error;
2918 
2919 	if (ifgr->ifgr_len == 0) {
2920 		TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2921 			ifgr->ifgr_len += sizeof(ifgrq);
2922 		return (0);
2923 	}
2924 
2925 	len = ifgr->ifgr_len;
2926 	ifgp = ifgr->ifgr_groups;
2927 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
2928 		if (len < sizeof(ifgrq))
2929 			return (EINVAL);
2930 		bzero(&ifgrq, sizeof ifgrq);
2931 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
2932 		    sizeof(ifgrq.ifgrq_group));
2933 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2934 		    sizeof(struct ifg_req))))
2935 			return (error);
2936 		len -= sizeof(ifgrq);
2937 		ifgp++;
2938 	}
2939 
2940 	return (0);
2941 }
2942 
2943 void
2944 if_group_routechange(struct sockaddr *dst, struct sockaddr *mask)
2945 {
2946 	switch (dst->sa_family) {
2947 	case AF_INET:
2948 		if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
2949 		    mask && (mask->sa_len == 0 ||
2950 		    satosin(mask)->sin_addr.s_addr == INADDR_ANY))
2951 			if_group_egress_build();
2952 		break;
2953 #ifdef INET6
2954 	case AF_INET6:
2955 		if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
2956 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
2957 		    IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
2958 		    &in6addr_any)))
2959 			if_group_egress_build();
2960 		break;
2961 #endif
2962 	}
2963 }
2964 
2965 int
2966 if_group_egress_build(void)
2967 {
2968 	struct ifnet		*ifp;
2969 	struct ifg_group	*ifg;
2970 	struct ifg_member	*ifgm, *next;
2971 	struct sockaddr_in	 sa_in;
2972 #ifdef INET6
2973 	struct sockaddr_in6	 sa_in6;
2974 #endif
2975 	struct rtentry		*rt;
2976 
2977 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2978 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
2979 			break;
2980 
2981 	if (ifg != NULL)
2982 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
2983 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
2984 
2985 	bzero(&sa_in, sizeof(sa_in));
2986 	sa_in.sin_len = sizeof(sa_in);
2987 	sa_in.sin_family = AF_INET;
2988 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
2989 	while (rt != NULL) {
2990 		ifp = if_get(rt->rt_ifidx);
2991 		if (ifp != NULL) {
2992 			if_addgroup(ifp, IFG_EGRESS);
2993 			if_put(ifp);
2994 		}
2995 		rt = rtable_iterate(rt);
2996 	}
2997 
2998 #ifdef INET6
2999 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
3000 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
3001 	    RTP_ANY);
3002 	while (rt != NULL) {
3003 		ifp = if_get(rt->rt_ifidx);
3004 		if (ifp != NULL) {
3005 			if_addgroup(ifp, IFG_EGRESS);
3006 			if_put(ifp);
3007 		}
3008 		rt = rtable_iterate(rt);
3009 	}
3010 #endif /* INET6 */
3011 
3012 	return (0);
3013 }
3014 
3015 /*
3016  * Set/clear promiscuous mode on interface ifp based on the truth value
3017  * of pswitch.  The calls are reference counted so that only the first
3018  * "on" request actually has an effect, as does the final "off" request.
3019  * Results are undefined if the "off" and "on" requests are not matched.
3020  */
3021 int
3022 ifpromisc(struct ifnet *ifp, int pswitch)
3023 {
3024 	struct ifreq ifr;
3025 	unsigned short oif_flags;
3026 	int oif_pcount, error;
3027 
3028 	NET_ASSERT_LOCKED(); /* modifying if_flags and if_pcount */
3029 
3030 	oif_flags = ifp->if_flags;
3031 	oif_pcount = ifp->if_pcount;
3032 	if (pswitch) {
3033 		if (ifp->if_pcount++ != 0)
3034 			return (0);
3035 		ifp->if_flags |= IFF_PROMISC;
3036 	} else {
3037 		if (--ifp->if_pcount > 0)
3038 			return (0);
3039 		ifp->if_flags &= ~IFF_PROMISC;
3040 	}
3041 
3042 	if ((ifp->if_flags & IFF_UP) == 0)
3043 		return (0);
3044 
3045 	memset(&ifr, 0, sizeof(ifr));
3046 	ifr.ifr_flags = ifp->if_flags;
3047 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
3048 	if (error) {
3049 		ifp->if_flags = oif_flags;
3050 		ifp->if_pcount = oif_pcount;
3051 	}
3052 
3053 	return (error);
3054 }
3055 
3056 void
3057 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
3058 {
3059 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
3060 }
3061 
3062 void
3063 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
3064 {
3065 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
3066 }
3067 
3068 void
3069 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
3070 {
3071 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
3072 		panic("ifa_update_broadaddr does not support dynamic length");
3073 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
3074 }
3075 
3076 #ifdef DDB
3077 /* debug function, can be called from ddb> */
3078 void
3079 ifa_print_all(void)
3080 {
3081 	struct ifnet *ifp;
3082 	struct ifaddr *ifa;
3083 
3084 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
3085 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
3086 			char addr[INET6_ADDRSTRLEN];
3087 
3088 			switch (ifa->ifa_addr->sa_family) {
3089 			case AF_INET:
3090 				printf("%s", inet_ntop(AF_INET,
3091 				    &satosin(ifa->ifa_addr)->sin_addr,
3092 				    addr, sizeof(addr)));
3093 				break;
3094 #ifdef INET6
3095 			case AF_INET6:
3096 				printf("%s", inet_ntop(AF_INET6,
3097 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
3098 				    addr, sizeof(addr)));
3099 				break;
3100 #endif
3101 			}
3102 			printf(" on %s\n", ifp->if_xname);
3103 		}
3104 	}
3105 }
3106 #endif /* DDB */
3107 
3108 void
3109 ifnewlladdr(struct ifnet *ifp)
3110 {
3111 #ifdef INET6
3112 	struct ifaddr *ifa;
3113 #endif
3114 	struct ifreq ifrq;
3115 	short up;
3116 
3117 	NET_ASSERT_LOCKED();	/* for ioctl and in6 */
3118 	KERNEL_ASSERT_LOCKED();	/* for if_flags */
3119 
3120 	up = ifp->if_flags & IFF_UP;
3121 
3122 	if (up) {
3123 		/* go down for a moment... */
3124 		ifp->if_flags &= ~IFF_UP;
3125 		ifrq.ifr_flags = ifp->if_flags;
3126 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3127 	}
3128 
3129 	ifp->if_flags |= IFF_UP;
3130 	ifrq.ifr_flags = ifp->if_flags;
3131 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3132 
3133 #ifdef INET6
3134 	/*
3135 	 * Update the link-local address.  Don't do it if we're
3136 	 * a router to avoid confusing hosts on the network.
3137 	 */
3138 	if (!ip6_forwarding) {
3139 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
3140 		if (ifa) {
3141 			in6_purgeaddr(ifa);
3142 			if_hooks_run(&ifp->if_addrhooks);
3143 			in6_ifattach(ifp);
3144 		}
3145 	}
3146 #endif
3147 	if (!up) {
3148 		/* go back down */
3149 		ifp->if_flags &= ~IFF_UP;
3150 		ifrq.ifr_flags = ifp->if_flags;
3151 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3152 	}
3153 }
3154 
3155 void
3156 if_addrhook_add(struct ifnet *ifp, struct task *t)
3157 {
3158 	mtx_enter(&if_hooks_mtx);
3159 	TAILQ_INSERT_TAIL(&ifp->if_addrhooks, t, t_entry);
3160 	mtx_leave(&if_hooks_mtx);
3161 }
3162 
3163 void
3164 if_addrhook_del(struct ifnet *ifp, struct task *t)
3165 {
3166 	mtx_enter(&if_hooks_mtx);
3167 	TAILQ_REMOVE(&ifp->if_addrhooks, t, t_entry);
3168 	mtx_leave(&if_hooks_mtx);
3169 }
3170 
3171 void
3172 if_addrhooks_run(struct ifnet *ifp)
3173 {
3174 	if_hooks_run(&ifp->if_addrhooks);
3175 }
3176 
3177 void
3178 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
3179 {
3180 	extern int ticks;
3181 
3182 	memset(rxr, 0, sizeof(*rxr));
3183 
3184 	rxr->rxr_adjusted = ticks;
3185 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
3186 	rxr->rxr_hwm = hwm;
3187 }
3188 
3189 static inline void
3190 if_rxr_adjust_cwm(struct if_rxring *rxr)
3191 {
3192 	extern int ticks;
3193 
3194 	if (rxr->rxr_alive >= rxr->rxr_lwm)
3195 		return;
3196 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
3197 		rxr->rxr_cwm++;
3198 
3199 	rxr->rxr_adjusted = ticks;
3200 }
3201 
3202 void
3203 if_rxr_livelocked(struct if_rxring *rxr)
3204 {
3205 	extern int ticks;
3206 
3207 	if (ticks - rxr->rxr_adjusted >= 1) {
3208 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3209 			rxr->rxr_cwm--;
3210 
3211 		rxr->rxr_adjusted = ticks;
3212 	}
3213 }
3214 
3215 u_int
3216 if_rxr_get(struct if_rxring *rxr, u_int max)
3217 {
3218 	extern int ticks;
3219 	u_int diff;
3220 
3221 	if (ticks - rxr->rxr_adjusted >= 1) {
3222 		/* we're free to try for an adjustment */
3223 		if_rxr_adjust_cwm(rxr);
3224 	}
3225 
3226 	if (rxr->rxr_alive >= rxr->rxr_cwm)
3227 		return (0);
3228 
3229 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
3230 	rxr->rxr_alive += diff;
3231 
3232 	return (diff);
3233 }
3234 
3235 int
3236 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
3237 {
3238 	struct if_rxrinfo kifri;
3239 	int error;
3240 	u_int n;
3241 
3242 	error = copyin(uifri, &kifri, sizeof(kifri));
3243 	if (error)
3244 		return (error);
3245 
3246 	n = min(t, kifri.ifri_total);
3247 	kifri.ifri_total = t;
3248 
3249 	if (n > 0) {
3250 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
3251 		if (error)
3252 			return (error);
3253 	}
3254 
3255 	return (copyout(&kifri, uifri, sizeof(kifri)));
3256 }
3257 
3258 int
3259 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
3260     struct if_rxring *rxr)
3261 {
3262 	struct if_rxring_info ifr;
3263 
3264 	memset(&ifr, 0, sizeof(ifr));
3265 
3266 	if (name != NULL)
3267 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3268 
3269 	ifr.ifr_size = size;
3270 	ifr.ifr_info = *rxr;
3271 
3272 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
3273 }
3274 
3275 /*
3276  * Network stack input queues.
3277  */
3278 
3279 void
3280 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3281 {
3282 	mq_init(&niq->ni_q, maxlen, IPL_NET);
3283 	niq->ni_isr = isr;
3284 }
3285 
3286 int
3287 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3288 {
3289 	int rv;
3290 
3291 	rv = mq_enqueue(&niq->ni_q, m);
3292 	if (rv == 0)
3293 		schednetisr(niq->ni_isr);
3294 	else
3295 		if_congestion();
3296 
3297 	return (rv);
3298 }
3299 
3300 int
3301 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3302 {
3303 	int rv;
3304 
3305 	rv = mq_enlist(&niq->ni_q, ml);
3306 	if (rv == 0)
3307 		schednetisr(niq->ni_isr);
3308 	else
3309 		if_congestion();
3310 
3311 	return (rv);
3312 }
3313 
3314 __dead void
3315 unhandled_af(int af)
3316 {
3317 	panic("unhandled af %d", af);
3318 }
3319 
3320 struct taskq *
3321 net_tq(unsigned int ifindex)
3322 {
3323 	struct taskq *t = NULL;
3324 	static int nettaskqs;
3325 
3326 	if (nettaskqs == 0)
3327 		nettaskqs = min(NET_TASKQ, ncpus);
3328 
3329 	t = nettqmp[ifindex % nettaskqs];
3330 
3331 	return (t);
3332 }
3333