xref: /openbsd-src/sys/net/if.c (revision 46035553bfdd96e63c94e32da0210227ec2e3cf1)
1 /*	$OpenBSD: if.c,v 1.621 2020/12/15 03:43:34 dlg Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "pfsync.h"
70 #include "ppp.h"
71 #include "pppoe.h"
72 #include "switch.h"
73 #include "if_wg.h"
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/mbuf.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/timeout.h>
81 #include <sys/protosw.h>
82 #include <sys/kernel.h>
83 #include <sys/ioctl.h>
84 #include <sys/domain.h>
85 #include <sys/task.h>
86 #include <sys/atomic.h>
87 #include <sys/percpu.h>
88 #include <sys/proc.h>
89 #include <sys/stdint.h>	/* uintptr_t */
90 #include <sys/rwlock.h>
91 
92 #include <net/if.h>
93 #include <net/if_dl.h>
94 #include <net/if_types.h>
95 #include <net/route.h>
96 #include <net/netisr.h>
97 
98 #include <netinet/in.h>
99 #include <netinet/if_ether.h>
100 #include <netinet/igmp.h>
101 #ifdef MROUTING
102 #include <netinet/ip_mroute.h>
103 #endif
104 
105 #ifdef INET6
106 #include <netinet6/in6_var.h>
107 #include <netinet6/in6_ifattach.h>
108 #include <netinet6/nd6.h>
109 #include <netinet/ip6.h>
110 #include <netinet6/ip6_var.h>
111 #endif
112 
113 #ifdef MPLS
114 #include <netmpls/mpls.h>
115 #endif
116 
117 #if NBPFILTER > 0
118 #include <net/bpf.h>
119 #endif
120 
121 #if NBRIDGE > 0
122 #include <net/if_bridge.h>
123 #endif
124 
125 #if NCARP > 0
126 #include <netinet/ip_carp.h>
127 #endif
128 
129 #if NPF > 0
130 #include <net/pfvar.h>
131 #endif
132 
133 #include <sys/device.h>
134 
135 void	if_attachsetup(struct ifnet *);
136 void	if_attachdomain(struct ifnet *);
137 void	if_attach_common(struct ifnet *);
138 int	if_createrdomain(int, struct ifnet *);
139 int	if_setrdomain(struct ifnet *, int);
140 void	if_slowtimo(void *);
141 
142 void	if_detached_qstart(struct ifqueue *);
143 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
144 
145 int	ifioctl_get(u_long, caddr_t);
146 int	ifconf(caddr_t);
147 static int
148 	if_sffpage_check(const caddr_t);
149 
150 int	if_getgroup(caddr_t, struct ifnet *);
151 int	if_getgroupmembers(caddr_t);
152 int	if_getgroupattribs(caddr_t);
153 int	if_setgroupattribs(caddr_t);
154 int	if_getgrouplist(caddr_t);
155 
156 void	if_linkstate(struct ifnet *);
157 void	if_linkstate_task(void *);
158 
159 int	if_clone_list(struct if_clonereq *);
160 struct if_clone	*if_clone_lookup(const char *, int *);
161 
162 int	if_group_egress_build(void);
163 
164 void	if_watchdog_task(void *);
165 
166 void	if_netisr(void *);
167 
168 #ifdef DDB
169 void	ifa_print_all(void);
170 #endif
171 
172 void	if_qstart_compat(struct ifqueue *);
173 
174 /*
175  * interface index map
176  *
177  * the kernel maintains a mapping of interface indexes to struct ifnet
178  * pointers.
179  *
180  * the map is an array of struct ifnet pointers prefixed by an if_map
181  * structure. the if_map structure stores the length of its array.
182  *
183  * as interfaces are attached to the system, the map is grown on demand
184  * up to USHRT_MAX entries.
185  *
186  * interface index 0 is reserved and represents no interface. this
187  * supports the use of the interface index as the scope for IPv6 link
188  * local addresses, where scope 0 means no scope has been specified.
189  * it also supports the use of interface index as the unique identifier
190  * for network interfaces in SNMP applications as per RFC2863. therefore
191  * if_get(0) returns NULL.
192  */
193 
194 void if_ifp_dtor(void *, void *);
195 void if_map_dtor(void *, void *);
196 struct ifnet *if_ref(struct ifnet *);
197 
198 /*
199  * struct if_map
200  *
201  * bounded array of ifnet srp pointers used to fetch references of live
202  * interfaces with if_get().
203  */
204 
205 struct if_map {
206 	unsigned long		 limit;
207 	/* followed by limit ifnet srp pointers */
208 };
209 
210 /*
211  * struct if_idxmap
212  *
213  * infrastructure to manage updates and accesses to the current if_map.
214  */
215 
216 struct if_idxmap {
217 	unsigned int		 serial;
218 	unsigned int		 count;
219 	struct srp		 map;
220 };
221 
222 void	if_idxmap_init(unsigned int);
223 void	if_idxmap_insert(struct ifnet *);
224 void	if_idxmap_remove(struct ifnet *);
225 
226 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
227 
228 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
229 int if_cloners_count;
230 
231 struct rwlock if_cloners_lock = RWLOCK_INITIALIZER("clonerlock");
232 
233 /* hooks should only be added, deleted, and run from a process context */
234 struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE);
235 void	if_hooks_run(struct task_list *);
236 
237 int	ifq_congestion;
238 
239 int		 netisr;
240 
241 #define	NET_TASKQ	1
242 struct taskq	*nettqmp[NET_TASKQ];
243 
244 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
245 
246 /*
247  * Serialize socket operations to ensure no new sleeping points
248  * are introduced in IP output paths.
249  */
250 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
251 
252 /*
253  * Network interface utility routines.
254  */
255 void
256 ifinit(void)
257 {
258 	unsigned int	i;
259 
260 	/*
261 	 * most machines boot with 4 or 5 interfaces, so size the initial map
262 	 * to accomodate this
263 	 */
264 	if_idxmap_init(8);
265 
266 	for (i = 0; i < NET_TASKQ; i++) {
267 		nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
268 		if (nettqmp[i] == NULL)
269 			panic("unable to create network taskq %d", i);
270 	}
271 }
272 
273 static struct if_idxmap if_idxmap = {
274 	0,
275 	0,
276 	SRP_INITIALIZER()
277 };
278 
279 struct srp_gc if_ifp_gc = SRP_GC_INITIALIZER(if_ifp_dtor, NULL);
280 struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
281 
282 struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
283 
284 void
285 if_idxmap_init(unsigned int limit)
286 {
287 	struct if_map *if_map;
288 	struct srp *map;
289 	unsigned int i;
290 
291 	if_idxmap.serial = 1; /* skip ifidx 0 so it can return NULL */
292 
293 	if_map = malloc(sizeof(*if_map) + limit * sizeof(*map),
294 	    M_IFADDR, M_WAITOK);
295 
296 	if_map->limit = limit;
297 	map = (struct srp *)(if_map + 1);
298 	for (i = 0; i < limit; i++)
299 		srp_init(&map[i]);
300 
301 	/* this is called early so there's nothing to race with */
302 	srp_update_locked(&if_map_gc, &if_idxmap.map, if_map);
303 }
304 
305 void
306 if_idxmap_insert(struct ifnet *ifp)
307 {
308 	struct if_map *if_map;
309 	struct srp *map;
310 	unsigned int index, i;
311 
312 	refcnt_init(&ifp->if_refcnt);
313 
314 	/* the kernel lock guarantees serialised modifications to if_idxmap */
315 	KERNEL_ASSERT_LOCKED();
316 
317 	if (++if_idxmap.count > USHRT_MAX)
318 		panic("too many interfaces");
319 
320 	if_map = srp_get_locked(&if_idxmap.map);
321 	map = (struct srp *)(if_map + 1);
322 
323 	index = if_idxmap.serial++ & USHRT_MAX;
324 
325 	if (index >= if_map->limit) {
326 		struct if_map *nif_map;
327 		struct srp *nmap;
328 		unsigned int nlimit;
329 		struct ifnet *nifp;
330 
331 		nlimit = if_map->limit * 2;
332 		nif_map = malloc(sizeof(*nif_map) + nlimit * sizeof(*nmap),
333 		    M_IFADDR, M_WAITOK);
334 		nmap = (struct srp *)(nif_map + 1);
335 
336 		nif_map->limit = nlimit;
337 		for (i = 0; i < if_map->limit; i++) {
338 			srp_init(&nmap[i]);
339 			nifp = srp_get_locked(&map[i]);
340 			if (nifp != NULL) {
341 				srp_update_locked(&if_ifp_gc, &nmap[i],
342 				    if_ref(nifp));
343 			}
344 		}
345 
346 		while (i < nlimit) {
347 			srp_init(&nmap[i]);
348 			i++;
349 		}
350 
351 		srp_update_locked(&if_map_gc, &if_idxmap.map, nif_map);
352 		if_map = nif_map;
353 		map = nmap;
354 	}
355 
356 	/* pick the next free index */
357 	for (i = 0; i < USHRT_MAX; i++) {
358 		if (index != 0 && srp_get_locked(&map[index]) == NULL)
359 			break;
360 
361 		index = if_idxmap.serial++ & USHRT_MAX;
362 	}
363 
364 	/* commit */
365 	ifp->if_index = index;
366 	srp_update_locked(&if_ifp_gc, &map[index], if_ref(ifp));
367 }
368 
369 void
370 if_idxmap_remove(struct ifnet *ifp)
371 {
372 	struct if_map *if_map;
373 	struct srp *map;
374 	unsigned int index;
375 
376 	index = ifp->if_index;
377 
378 	/* the kernel lock guarantees serialised modifications to if_idxmap */
379 	KERNEL_ASSERT_LOCKED();
380 
381 	if_map = srp_get_locked(&if_idxmap.map);
382 	KASSERT(index < if_map->limit);
383 
384 	map = (struct srp *)(if_map + 1);
385 	KASSERT(ifp == (struct ifnet *)srp_get_locked(&map[index]));
386 
387 	srp_update_locked(&if_ifp_gc, &map[index], NULL);
388 	if_idxmap.count--;
389 	/* end of if_idxmap modifications */
390 
391 	/* sleep until the last reference is released */
392 	refcnt_finalize(&ifp->if_refcnt, "ifidxrm");
393 }
394 
395 void
396 if_ifp_dtor(void *null, void *ifp)
397 {
398 	if_put(ifp);
399 }
400 
401 void
402 if_map_dtor(void *null, void *m)
403 {
404 	struct if_map *if_map = m;
405 	struct srp *map = (struct srp *)(if_map + 1);
406 	unsigned int i;
407 
408 	/*
409 	 * dont need to serialize the use of update_locked since this is
410 	 * the last reference to this map. there's nothing to race against.
411 	 */
412 	for (i = 0; i < if_map->limit; i++)
413 		srp_update_locked(&if_ifp_gc, &map[i], NULL);
414 
415 	free(if_map, M_IFADDR, sizeof(*if_map) + if_map->limit * sizeof(*map));
416 }
417 
418 /*
419  * Attach an interface to the
420  * list of "active" interfaces.
421  */
422 void
423 if_attachsetup(struct ifnet *ifp)
424 {
425 	unsigned long ifidx;
426 
427 	NET_ASSERT_LOCKED();
428 
429 	TAILQ_INIT(&ifp->if_groups);
430 
431 	if_addgroup(ifp, IFG_ALL);
432 
433 	if_attachdomain(ifp);
434 #if NPF > 0
435 	pfi_attach_ifnet(ifp);
436 #endif
437 
438 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
439 	if_slowtimo(ifp);
440 
441 	if_idxmap_insert(ifp);
442 	KASSERT(if_get(0) == NULL);
443 
444 	ifidx = ifp->if_index;
445 
446 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
447 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
448 
449 	/* Announce the interface. */
450 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
451 }
452 
453 /*
454  * Allocate the link level name for the specified interface.  This
455  * is an attachment helper.  It must be called after ifp->if_addrlen
456  * is initialized, which may not be the case when if_attach() is
457  * called.
458  */
459 void
460 if_alloc_sadl(struct ifnet *ifp)
461 {
462 	unsigned int socksize;
463 	int namelen, masklen;
464 	struct sockaddr_dl *sdl;
465 
466 	/*
467 	 * If the interface already has a link name, release it
468 	 * now.  This is useful for interfaces that can change
469 	 * link types, and thus switch link names often.
470 	 */
471 	if_free_sadl(ifp);
472 
473 	namelen = strlen(ifp->if_xname);
474 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
475 	socksize = masklen + ifp->if_addrlen;
476 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
477 	if (socksize < sizeof(*sdl))
478 		socksize = sizeof(*sdl);
479 	socksize = ROUNDUP(socksize);
480 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
481 	sdl->sdl_len = socksize;
482 	sdl->sdl_family = AF_LINK;
483 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
484 	sdl->sdl_nlen = namelen;
485 	sdl->sdl_alen = ifp->if_addrlen;
486 	sdl->sdl_index = ifp->if_index;
487 	sdl->sdl_type = ifp->if_type;
488 	ifp->if_sadl = sdl;
489 }
490 
491 /*
492  * Free the link level name for the specified interface.  This is
493  * a detach helper.  This is called from if_detach() or from
494  * link layer type specific detach functions.
495  */
496 void
497 if_free_sadl(struct ifnet *ifp)
498 {
499 	if (ifp->if_sadl == NULL)
500 		return;
501 
502 	free(ifp->if_sadl, M_IFADDR, ifp->if_sadl->sdl_len);
503 	ifp->if_sadl = NULL;
504 }
505 
506 void
507 if_attachdomain(struct ifnet *ifp)
508 {
509 	struct domain *dp;
510 	int i, s;
511 
512 	s = splnet();
513 
514 	/* address family dependent data region */
515 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
516 	for (i = 0; (dp = domains[i]) != NULL; i++) {
517 		if (dp->dom_ifattach)
518 			ifp->if_afdata[dp->dom_family] =
519 			    (*dp->dom_ifattach)(ifp);
520 	}
521 
522 	splx(s);
523 }
524 
525 void
526 if_attachhead(struct ifnet *ifp)
527 {
528 	if_attach_common(ifp);
529 	NET_LOCK();
530 	TAILQ_INSERT_HEAD(&ifnet, ifp, if_list);
531 	if_attachsetup(ifp);
532 	NET_UNLOCK();
533 }
534 
535 void
536 if_attach(struct ifnet *ifp)
537 {
538 	if_attach_common(ifp);
539 	NET_LOCK();
540 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
541 	if_attachsetup(ifp);
542 	NET_UNLOCK();
543 }
544 
545 void
546 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
547 {
548 	struct ifqueue **map;
549 	struct ifqueue *ifq;
550 	int i;
551 
552 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
553 	KASSERT(nqs != 0);
554 
555 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
556 
557 	ifp->if_snd.ifq_softc = NULL;
558 	map[0] = &ifp->if_snd;
559 
560 	for (i = 1; i < nqs; i++) {
561 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
562 		ifq_set_maxlen(ifq, ifp->if_snd.ifq_maxlen);
563 		ifq_init(ifq, ifp, i);
564 		map[i] = ifq;
565 	}
566 
567 	ifp->if_ifqs = map;
568 	ifp->if_nifqs = nqs;
569 }
570 
571 void
572 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
573 {
574 	struct ifiqueue **map;
575 	struct ifiqueue *ifiq;
576 	unsigned int i;
577 
578 	KASSERT(niqs != 0);
579 
580 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
581 
582 	ifp->if_rcv.ifiq_softc = NULL;
583 	map[0] = &ifp->if_rcv;
584 
585 	for (i = 1; i < niqs; i++) {
586 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
587 		ifiq_init(ifiq, ifp, i);
588 		map[i] = ifiq;
589 	}
590 
591 	ifp->if_iqs = map;
592 	ifp->if_niqs = niqs;
593 }
594 
595 void
596 if_attach_common(struct ifnet *ifp)
597 {
598 	KASSERT(ifp->if_ioctl != NULL);
599 
600 	TAILQ_INIT(&ifp->if_addrlist);
601 	TAILQ_INIT(&ifp->if_maddrlist);
602 
603 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
604 		KASSERTMSG(ifp->if_qstart == NULL,
605 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
606 		ifp->if_qstart = if_qstart_compat;
607 	} else {
608 		KASSERTMSG(ifp->if_start == NULL,
609 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
610 		KASSERTMSG(ifp->if_qstart != NULL,
611 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
612 	}
613 
614 	ifq_init(&ifp->if_snd, ifp, 0);
615 
616 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
617 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
618 	ifp->if_nifqs = 1;
619 	if (ifp->if_txmit == 0)
620 		ifp->if_txmit = IF_TXMIT_DEFAULT;
621 
622 	ifiq_init(&ifp->if_rcv, ifp, 0);
623 
624 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
625 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
626 	ifp->if_niqs = 1;
627 
628 	TAILQ_INIT(&ifp->if_addrhooks);
629 	TAILQ_INIT(&ifp->if_linkstatehooks);
630 	TAILQ_INIT(&ifp->if_detachhooks);
631 
632 	if (ifp->if_rtrequest == NULL)
633 		ifp->if_rtrequest = if_rtrequest_dummy;
634 	if (ifp->if_enqueue == NULL)
635 		ifp->if_enqueue = if_enqueue_ifq;
636 	ifp->if_llprio = IFQ_DEFPRIO;
637 }
638 
639 void
640 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
641 {
642 	/*
643 	 * only switch the ifq_ops on the first ifq on an interface.
644 	 *
645 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
646 	 * works on a single ifq. because the code uses the ifq_ops
647 	 * on the first ifq (if_snd) to select a queue for an mbuf,
648 	 * by switching only the first one we change both the algorithm
649 	 * and force the routing of all new packets to it.
650 	 */
651 	ifq_attach(&ifp->if_snd, newops, args);
652 }
653 
654 void
655 if_start(struct ifnet *ifp)
656 {
657 	KASSERT(ifp->if_qstart == if_qstart_compat);
658 	if_qstart_compat(&ifp->if_snd);
659 }
660 void
661 if_qstart_compat(struct ifqueue *ifq)
662 {
663 	struct ifnet *ifp = ifq->ifq_if;
664 	int s;
665 
666 	/*
667 	 * the stack assumes that an interface can have multiple
668 	 * transmit rings, but a lot of drivers are still written
669 	 * so that interfaces and send rings have a 1:1 mapping.
670 	 * this provides compatability between the stack and the older
671 	 * drivers by translating from the only queue they have
672 	 * (ifp->if_snd) back to the interface and calling if_start.
673 	 */
674 
675 	KERNEL_LOCK();
676 	s = splnet();
677 	(*ifp->if_start)(ifp);
678 	splx(s);
679 	KERNEL_UNLOCK();
680 }
681 
682 int
683 if_enqueue(struct ifnet *ifp, struct mbuf *m)
684 {
685 	CLR(m->m_pkthdr.csum_flags, M_TIMESTAMP);
686 
687 #if NPF > 0
688 	if (m->m_pkthdr.pf.delay > 0)
689 		return (pf_delay_pkt(m, ifp->if_index));
690 #endif
691 
692 #if NBRIDGE > 0
693 	if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
694 		int error;
695 
696 		error = bridge_enqueue(ifp, m);
697 		return (error);
698 	}
699 #endif
700 
701 #if NPF > 0
702 	pf_pkt_addr_changed(m);
703 #endif	/* NPF > 0 */
704 
705 	return ((*ifp->if_enqueue)(ifp, m));
706 }
707 
708 int
709 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
710 {
711 	struct ifqueue *ifq = &ifp->if_snd;
712 	int error;
713 
714 	if (ifp->if_nifqs > 1) {
715 		unsigned int idx;
716 
717 		/*
718 		 * use the operations on the first ifq to pick which of
719 		 * the array gets this mbuf.
720 		 */
721 
722 		idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
723 		ifq = ifp->if_ifqs[idx];
724 	}
725 
726 	error = ifq_enqueue(ifq, m);
727 	if (error)
728 		return (error);
729 
730 	ifq_start(ifq);
731 
732 	return (0);
733 }
734 
735 void
736 if_input(struct ifnet *ifp, struct mbuf_list *ml)
737 {
738 	ifiq_input(&ifp->if_rcv, ml);
739 }
740 
741 int
742 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
743 {
744 #if NBPFILTER > 0
745 	/*
746 	 * Only send packets to bpf if they are destinated to local
747 	 * addresses.
748 	 *
749 	 * if_input_local() is also called for SIMPLEX interfaces to
750 	 * duplicate packets for local use.  But don't dup them to bpf.
751 	 */
752 	if (ifp->if_flags & IFF_LOOPBACK) {
753 		caddr_t if_bpf = ifp->if_bpf;
754 
755 		if (if_bpf)
756 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
757 	}
758 #endif
759 	m_resethdr(m);
760 	m->m_flags |= M_LOOP;
761 	m->m_pkthdr.ph_ifidx = ifp->if_index;
762 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
763 
764 	ifp->if_opackets++;
765 	ifp->if_obytes += m->m_pkthdr.len;
766 
767 	ifp->if_ipackets++;
768 	ifp->if_ibytes += m->m_pkthdr.len;
769 
770 	switch (af) {
771 	case AF_INET:
772 		ipv4_input(ifp, m);
773 		break;
774 #ifdef INET6
775 	case AF_INET6:
776 		ipv6_input(ifp, m);
777 		break;
778 #endif /* INET6 */
779 #ifdef MPLS
780 	case AF_MPLS:
781 		mpls_input(ifp, m);
782 		break;
783 #endif /* MPLS */
784 	default:
785 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
786 		m_freem(m);
787 		return (EAFNOSUPPORT);
788 	}
789 
790 	return (0);
791 }
792 
793 int
794 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
795 {
796 	struct ifiqueue *ifiq;
797 	unsigned int flow = 0;
798 
799 	m->m_pkthdr.ph_family = af;
800 	m->m_pkthdr.ph_ifidx = ifp->if_index;
801 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
802 
803 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
804 		flow = m->m_pkthdr.ph_flowid;
805 
806 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
807 
808 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
809 }
810 
811 void
812 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
813 {
814 	struct mbuf *m;
815 
816 	if (ml_empty(ml))
817 		return;
818 
819 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
820 		enqueue_randomness(ml_len(ml) ^ (uintptr_t)MBUF_LIST_FIRST(ml));
821 
822 	/*
823 	 * We grab the NET_LOCK() before processing any packet to
824 	 * ensure there's no contention on the routing table lock.
825 	 *
826 	 * Without it we could race with a userland thread to insert
827 	 * a L2 entry in ip{6,}_output().  Such race would result in
828 	 * one of the threads sleeping *inside* the IP output path.
829 	 *
830 	 * Since we have a NET_LOCK() we also use it to serialize access
831 	 * to PF globals, pipex globals, unicast and multicast addresses
832 	 * lists and the socket layer.
833 	 */
834 	NET_LOCK();
835 	while ((m = ml_dequeue(ml)) != NULL)
836 		(*ifp->if_input)(ifp, m);
837 	NET_UNLOCK();
838 }
839 
840 void
841 if_vinput(struct ifnet *ifp, struct mbuf *m)
842 {
843 #if NBPFILTER > 0
844 	caddr_t if_bpf;
845 #endif
846 
847 	m->m_pkthdr.ph_ifidx = ifp->if_index;
848 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
849 
850 	counters_pkt(ifp->if_counters,
851 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
852 
853 #if NBPFILTER > 0
854 	if_bpf = ifp->if_bpf;
855 	if (if_bpf) {
856 		if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN)) {
857 			m_freem(m);
858 			return;
859 		}
860 	}
861 #endif
862 
863 	(*ifp->if_input)(ifp, m);
864 }
865 
866 void
867 if_netisr(void *unused)
868 {
869 	int n, t = 0;
870 
871 	NET_LOCK();
872 
873 	while ((n = netisr) != 0) {
874 		/* Like sched_pause() but with a rwlock dance. */
875 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
876 			NET_UNLOCK();
877 			yield();
878 			NET_LOCK();
879 		}
880 
881 		atomic_clearbits_int(&netisr, n);
882 
883 #if NETHER > 0
884 		if (n & (1 << NETISR_ARP)) {
885 			KERNEL_LOCK();
886 			arpintr();
887 			KERNEL_UNLOCK();
888 		}
889 #endif
890 #if NPPP > 0
891 		if (n & (1 << NETISR_PPP)) {
892 			KERNEL_LOCK();
893 			pppintr();
894 			KERNEL_UNLOCK();
895 		}
896 #endif
897 #if NBRIDGE > 0
898 		if (n & (1 << NETISR_BRIDGE))
899 			bridgeintr();
900 #endif
901 #if NSWITCH > 0
902 		if (n & (1 << NETISR_SWITCH)) {
903 			KERNEL_LOCK();
904 			switchintr();
905 			KERNEL_UNLOCK();
906 		}
907 #endif
908 #if NPPPOE > 0
909 		if (n & (1 << NETISR_PPPOE)) {
910 			KERNEL_LOCK();
911 			pppoeintr();
912 			KERNEL_UNLOCK();
913 		}
914 #endif
915 		t |= n;
916 	}
917 
918 #if NPFSYNC > 0
919 	if (t & (1 << NETISR_PFSYNC)) {
920 		KERNEL_LOCK();
921 		pfsyncintr();
922 		KERNEL_UNLOCK();
923 	}
924 #endif
925 
926 	NET_UNLOCK();
927 }
928 
929 void
930 if_hooks_run(struct task_list *hooks)
931 {
932 	struct task *t, *nt;
933 	struct task cursor = { .t_func = NULL };
934 	void (*func)(void *);
935 	void *arg;
936 
937 	mtx_enter(&if_hooks_mtx);
938 	for (t = TAILQ_FIRST(hooks); t != NULL; t = nt) {
939 		if (t->t_func == NULL) { /* skip cursors */
940 			nt = TAILQ_NEXT(t, t_entry);
941 			continue;
942 		}
943 		func = t->t_func;
944 		arg = t->t_arg;
945 
946 		TAILQ_INSERT_AFTER(hooks, t, &cursor, t_entry);
947 		mtx_leave(&if_hooks_mtx);
948 
949 		(*func)(arg);
950 
951 		mtx_enter(&if_hooks_mtx);
952 		nt = TAILQ_NEXT(&cursor, t_entry); /* avoid _Q_INVALIDATE */
953 		TAILQ_REMOVE(hooks, &cursor, t_entry);
954 	}
955 	mtx_leave(&if_hooks_mtx);
956 }
957 
958 void
959 if_deactivate(struct ifnet *ifp)
960 {
961 	/*
962 	 * Call detach hooks from head to tail.  To make sure detach
963 	 * hooks are executed in the reverse order they were added, all
964 	 * the hooks have to be added to the head!
965 	 */
966 
967 	NET_LOCK();
968 	if_hooks_run(&ifp->if_detachhooks);
969 	NET_UNLOCK();
970 }
971 
972 void
973 if_detachhook_add(struct ifnet *ifp, struct task *t)
974 {
975 	mtx_enter(&if_hooks_mtx);
976 	TAILQ_INSERT_HEAD(&ifp->if_detachhooks, t, t_entry);
977 	mtx_leave(&if_hooks_mtx);
978 }
979 
980 void
981 if_detachhook_del(struct ifnet *ifp, struct task *t)
982 {
983 	mtx_enter(&if_hooks_mtx);
984 	TAILQ_REMOVE(&ifp->if_detachhooks, t, t_entry);
985 	mtx_leave(&if_hooks_mtx);
986 }
987 
988 /*
989  * Detach an interface from everything in the kernel.  Also deallocate
990  * private resources.
991  */
992 void
993 if_detach(struct ifnet *ifp)
994 {
995 	struct ifaddr *ifa;
996 	struct ifg_list *ifg;
997 	struct domain *dp;
998 	int i, s;
999 
1000 	/* Undo pseudo-driver changes. */
1001 	if_deactivate(ifp);
1002 
1003 	ifq_clr_oactive(&ifp->if_snd);
1004 
1005 	/* Other CPUs must not have a reference before we start destroying. */
1006 	if_idxmap_remove(ifp);
1007 
1008 #if NBPFILTER > 0
1009 	bpfdetach(ifp);
1010 #endif
1011 
1012 	NET_LOCK();
1013 	s = splnet();
1014 	ifp->if_qstart = if_detached_qstart;
1015 	ifp->if_ioctl = if_detached_ioctl;
1016 	ifp->if_watchdog = NULL;
1017 
1018 	/* Remove the watchdog timeout & task */
1019 	timeout_del(&ifp->if_slowtimo);
1020 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1021 
1022 	/* Remove the link state task */
1023 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1024 
1025 	rti_delete(ifp);
1026 #if NETHER > 0 && defined(NFSCLIENT)
1027 	if (ifp->if_index == revarp_ifidx)
1028 		revarp_ifidx = 0;
1029 #endif
1030 #ifdef MROUTING
1031 	vif_delete(ifp);
1032 #endif
1033 	in_ifdetach(ifp);
1034 #ifdef INET6
1035 	in6_ifdetach(ifp);
1036 #endif
1037 #if NPF > 0
1038 	pfi_detach_ifnet(ifp);
1039 #endif
1040 
1041 	/* Remove the interface from the list of all interfaces.  */
1042 	TAILQ_REMOVE(&ifnet, ifp, if_list);
1043 
1044 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1045 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1046 
1047 	if_free_sadl(ifp);
1048 
1049 	/* We should not have any address left at this point. */
1050 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1051 #ifdef DIAGNOSTIC
1052 		printf("%s: address list non empty\n", ifp->if_xname);
1053 #endif
1054 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1055 			ifa_del(ifp, ifa);
1056 			ifa->ifa_ifp = NULL;
1057 			ifafree(ifa);
1058 		}
1059 	}
1060 
1061 	KASSERT(TAILQ_EMPTY(&ifp->if_addrhooks));
1062 	KASSERT(TAILQ_EMPTY(&ifp->if_linkstatehooks));
1063 	KASSERT(TAILQ_EMPTY(&ifp->if_detachhooks));
1064 
1065 	for (i = 0; (dp = domains[i]) != NULL; i++) {
1066 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
1067 			(*dp->dom_ifdetach)(ifp,
1068 			    ifp->if_afdata[dp->dom_family]);
1069 	}
1070 
1071 	/* Announce that the interface is gone. */
1072 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1073 	splx(s);
1074 	NET_UNLOCK();
1075 
1076 	if (ifp->if_counters != NULL)
1077 		if_counters_free(ifp);
1078 
1079 	for (i = 0; i < ifp->if_nifqs; i++)
1080 		ifq_destroy(ifp->if_ifqs[i]);
1081 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1082 		for (i = 1; i < ifp->if_nifqs; i++) {
1083 			free(ifp->if_ifqs[i], M_DEVBUF,
1084 			    sizeof(struct ifqueue));
1085 		}
1086 		free(ifp->if_ifqs, M_DEVBUF,
1087 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1088 	}
1089 
1090 	for (i = 0; i < ifp->if_niqs; i++)
1091 		ifiq_destroy(ifp->if_iqs[i]);
1092 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1093 		for (i = 1; i < ifp->if_niqs; i++) {
1094 			free(ifp->if_iqs[i], M_DEVBUF,
1095 			    sizeof(struct ifiqueue));
1096 		}
1097 		free(ifp->if_iqs, M_DEVBUF,
1098 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1099 	}
1100 }
1101 
1102 /*
1103  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1104  */
1105 int
1106 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1107 {
1108 	struct ifnet *ifp;
1109 	int connected = 0;
1110 
1111 	ifp = if_get(ifidx);
1112 	if (ifp == NULL)
1113 		return (0);
1114 
1115 	if (ifp0->if_index == ifp->if_index)
1116 		connected = 1;
1117 
1118 #if NBRIDGE > 0
1119 	if (ifp0->if_bridgeidx != 0 && ifp0->if_bridgeidx == ifp->if_bridgeidx)
1120 		connected = 1;
1121 #endif
1122 #if NCARP > 0
1123 	if ((ifp0->if_type == IFT_CARP &&
1124 	    ifp0->if_carpdevidx == ifp->if_index) ||
1125 	    (ifp->if_type == IFT_CARP && ifp->if_carpdevidx == ifp0->if_index))
1126 		connected = 1;
1127 #endif
1128 
1129 	if_put(ifp);
1130 	return (connected);
1131 }
1132 
1133 /*
1134  * Create a clone network interface.
1135  */
1136 int
1137 if_clone_create(const char *name, int rdomain)
1138 {
1139 	struct if_clone *ifc;
1140 	struct ifnet *ifp;
1141 	int unit, ret;
1142 
1143 	ifc = if_clone_lookup(name, &unit);
1144 	if (ifc == NULL)
1145 		return (EINVAL);
1146 
1147 	rw_enter_write(&if_cloners_lock);
1148 
1149 	if (ifunit(name) != NULL) {
1150 		ret = EEXIST;
1151 		goto unlock;
1152 	}
1153 
1154 	ret = (*ifc->ifc_create)(ifc, unit);
1155 
1156 	if (ret != 0 || (ifp = ifunit(name)) == NULL)
1157 		goto unlock;
1158 
1159 	NET_LOCK();
1160 	if_addgroup(ifp, ifc->ifc_name);
1161 	if (rdomain != 0)
1162 		if_setrdomain(ifp, rdomain);
1163 	NET_UNLOCK();
1164 unlock:
1165 	rw_exit_write(&if_cloners_lock);
1166 
1167 	return (ret);
1168 }
1169 
1170 /*
1171  * Destroy a clone network interface.
1172  */
1173 int
1174 if_clone_destroy(const char *name)
1175 {
1176 	struct if_clone *ifc;
1177 	struct ifnet *ifp;
1178 	int ret;
1179 
1180 	ifc = if_clone_lookup(name, NULL);
1181 	if (ifc == NULL)
1182 		return (EINVAL);
1183 
1184 	if (ifc->ifc_destroy == NULL)
1185 		return (EOPNOTSUPP);
1186 
1187 	rw_enter_write(&if_cloners_lock);
1188 
1189 	ifp = ifunit(name);
1190 	if (ifp == NULL) {
1191 		rw_exit_write(&if_cloners_lock);
1192 		return (ENXIO);
1193 	}
1194 
1195 	NET_LOCK();
1196 	if (ifp->if_flags & IFF_UP) {
1197 		int s;
1198 		s = splnet();
1199 		if_down(ifp);
1200 		splx(s);
1201 	}
1202 	NET_UNLOCK();
1203 	ret = (*ifc->ifc_destroy)(ifp);
1204 
1205 	rw_exit_write(&if_cloners_lock);
1206 
1207 	return (ret);
1208 }
1209 
1210 /*
1211  * Look up a network interface cloner.
1212  */
1213 struct if_clone *
1214 if_clone_lookup(const char *name, int *unitp)
1215 {
1216 	struct if_clone *ifc;
1217 	const char *cp;
1218 	int unit;
1219 
1220 	/* separate interface name from unit */
1221 	for (cp = name;
1222 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1223 	    cp++)
1224 		continue;
1225 
1226 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1227 		return (NULL);	/* No name or unit number */
1228 
1229 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1230 		return (NULL);	/* unit number 0 padded */
1231 
1232 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1233 		if (strlen(ifc->ifc_name) == cp - name &&
1234 		    !strncmp(name, ifc->ifc_name, cp - name))
1235 			break;
1236 	}
1237 
1238 	if (ifc == NULL)
1239 		return (NULL);
1240 
1241 	unit = 0;
1242 	while (cp - name < IFNAMSIZ && *cp) {
1243 		if (*cp < '0' || *cp > '9' ||
1244 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1245 			/* Bogus unit number. */
1246 			return (NULL);
1247 		}
1248 		unit = (unit * 10) + (*cp++ - '0');
1249 	}
1250 
1251 	if (unitp != NULL)
1252 		*unitp = unit;
1253 	return (ifc);
1254 }
1255 
1256 /*
1257  * Register a network interface cloner.
1258  */
1259 void
1260 if_clone_attach(struct if_clone *ifc)
1261 {
1262 	/*
1263 	 * we are called at kernel boot by main(), when pseudo devices are
1264 	 * being attached. The main() is the only guy which may alter the
1265 	 * if_cloners. While system is running and main() is done with
1266 	 * initialization, the if_cloners becomes immutable.
1267 	 */
1268 	KASSERT(pdevinit_done == 0);
1269 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1270 	if_cloners_count++;
1271 }
1272 
1273 /*
1274  * Provide list of interface cloners to userspace.
1275  */
1276 int
1277 if_clone_list(struct if_clonereq *ifcr)
1278 {
1279 	char outbuf[IFNAMSIZ], *dst;
1280 	struct if_clone *ifc;
1281 	int count, error = 0;
1282 
1283 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1284 		/* Just asking how many there are. */
1285 		ifcr->ifcr_total = if_cloners_count;
1286 		return (0);
1287 	}
1288 
1289 	if (ifcr->ifcr_count < 0)
1290 		return (EINVAL);
1291 
1292 	ifcr->ifcr_total = if_cloners_count;
1293 	count = MIN(if_cloners_count, ifcr->ifcr_count);
1294 
1295 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1296 		if (count == 0)
1297 			break;
1298 		bzero(outbuf, sizeof outbuf);
1299 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1300 		error = copyout(outbuf, dst, IFNAMSIZ);
1301 		if (error)
1302 			break;
1303 		count--;
1304 		dst += IFNAMSIZ;
1305 	}
1306 
1307 	return (error);
1308 }
1309 
1310 /*
1311  * set queue congestion marker
1312  */
1313 void
1314 if_congestion(void)
1315 {
1316 	extern int ticks;
1317 
1318 	ifq_congestion = ticks;
1319 }
1320 
1321 int
1322 if_congested(void)
1323 {
1324 	extern int ticks;
1325 	int diff;
1326 
1327 	diff = ticks - ifq_congestion;
1328 	if (diff < 0) {
1329 		ifq_congestion = ticks - hz;
1330 		return (0);
1331 	}
1332 
1333 	return (diff <= (hz / 100));
1334 }
1335 
1336 #define	equal(a1, a2)	\
1337 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1338 	(a1)->sa_len) == 0)
1339 
1340 /*
1341  * Locate an interface based on a complete address.
1342  */
1343 struct ifaddr *
1344 ifa_ifwithaddr(struct sockaddr *addr, u_int rtableid)
1345 {
1346 	struct ifnet *ifp;
1347 	struct ifaddr *ifa;
1348 	u_int rdomain;
1349 
1350 	rdomain = rtable_l2(rtableid);
1351 	KERNEL_LOCK();
1352 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1353 		if (ifp->if_rdomain != rdomain)
1354 			continue;
1355 
1356 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1357 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1358 				continue;
1359 
1360 			if (equal(addr, ifa->ifa_addr)) {
1361 				KERNEL_UNLOCK();
1362 				return (ifa);
1363 			}
1364 		}
1365 	}
1366 	KERNEL_UNLOCK();
1367 	return (NULL);
1368 }
1369 
1370 /*
1371  * Locate the point to point interface with a given destination address.
1372  */
1373 struct ifaddr *
1374 ifa_ifwithdstaddr(struct sockaddr *addr, u_int rdomain)
1375 {
1376 	struct ifnet *ifp;
1377 	struct ifaddr *ifa;
1378 
1379 	rdomain = rtable_l2(rdomain);
1380 	KERNEL_LOCK();
1381 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1382 		if (ifp->if_rdomain != rdomain)
1383 			continue;
1384 		if (ifp->if_flags & IFF_POINTOPOINT) {
1385 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1386 				if (ifa->ifa_addr->sa_family !=
1387 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1388 					continue;
1389 				if (equal(addr, ifa->ifa_dstaddr)) {
1390 					KERNEL_UNLOCK();
1391 					return (ifa);
1392 				}
1393 			}
1394 		}
1395 	}
1396 	KERNEL_UNLOCK();
1397 	return (NULL);
1398 }
1399 
1400 /*
1401  * Find an interface address specific to an interface best matching
1402  * a given address.
1403  */
1404 struct ifaddr *
1405 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1406 {
1407 	struct ifaddr *ifa;
1408 	char *cp, *cp2, *cp3;
1409 	char *cplim;
1410 	struct ifaddr *ifa_maybe = NULL;
1411 	u_int af = addr->sa_family;
1412 
1413 	if (af >= AF_MAX)
1414 		return (NULL);
1415 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1416 		if (ifa->ifa_addr->sa_family != af)
1417 			continue;
1418 		if (ifa_maybe == NULL)
1419 			ifa_maybe = ifa;
1420 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1421 			if (equal(addr, ifa->ifa_addr) ||
1422 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1423 				return (ifa);
1424 			continue;
1425 		}
1426 		cp = addr->sa_data;
1427 		cp2 = ifa->ifa_addr->sa_data;
1428 		cp3 = ifa->ifa_netmask->sa_data;
1429 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1430 		for (; cp3 < cplim; cp3++)
1431 			if ((*cp++ ^ *cp2++) & *cp3)
1432 				break;
1433 		if (cp3 == cplim)
1434 			return (ifa);
1435 	}
1436 	return (ifa_maybe);
1437 }
1438 
1439 void
1440 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1441 {
1442 }
1443 
1444 /*
1445  * Default action when installing a local route on a point-to-point
1446  * interface.
1447  */
1448 void
1449 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1450 {
1451 	struct ifnet *lo0ifp;
1452 	struct ifaddr *ifa, *lo0ifa;
1453 
1454 	switch (req) {
1455 	case RTM_ADD:
1456 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1457 			break;
1458 
1459 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1460 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1461 			    rt_key(rt)->sa_len) == 0)
1462 				break;
1463 		}
1464 
1465 		if (ifa == NULL)
1466 			break;
1467 
1468 		KASSERT(ifa == rt->rt_ifa);
1469 
1470 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1471 		KASSERT(lo0ifp != NULL);
1472 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1473 			if (lo0ifa->ifa_addr->sa_family ==
1474 			    ifa->ifa_addr->sa_family)
1475 				break;
1476 		}
1477 		if_put(lo0ifp);
1478 
1479 		if (lo0ifa == NULL)
1480 			break;
1481 
1482 		rt->rt_flags &= ~RTF_LLINFO;
1483 		break;
1484 	case RTM_DELETE:
1485 	case RTM_RESOLVE:
1486 	default:
1487 		break;
1488 	}
1489 }
1490 
1491 
1492 /*
1493  * Bring down all interfaces
1494  */
1495 void
1496 if_downall(void)
1497 {
1498 	struct ifreq ifrq;	/* XXX only partly built */
1499 	struct ifnet *ifp;
1500 
1501 	NET_LOCK();
1502 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1503 		if ((ifp->if_flags & IFF_UP) == 0)
1504 			continue;
1505 		if_down(ifp);
1506 		ifrq.ifr_flags = ifp->if_flags;
1507 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1508 	}
1509 	NET_UNLOCK();
1510 }
1511 
1512 /*
1513  * Mark an interface down and notify protocols of
1514  * the transition.
1515  */
1516 void
1517 if_down(struct ifnet *ifp)
1518 {
1519 	NET_ASSERT_LOCKED();
1520 
1521 	ifp->if_flags &= ~IFF_UP;
1522 	getmicrotime(&ifp->if_lastchange);
1523 	ifq_purge(&ifp->if_snd);
1524 
1525 	if_linkstate(ifp);
1526 }
1527 
1528 /*
1529  * Mark an interface up and notify protocols of
1530  * the transition.
1531  */
1532 void
1533 if_up(struct ifnet *ifp)
1534 {
1535 	NET_ASSERT_LOCKED();
1536 
1537 	ifp->if_flags |= IFF_UP;
1538 	getmicrotime(&ifp->if_lastchange);
1539 
1540 #ifdef INET6
1541 	/* Userland expects the kernel to set ::1 on default lo(4). */
1542 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1543 		in6_ifattach(ifp);
1544 #endif
1545 
1546 	if_linkstate(ifp);
1547 }
1548 
1549 /*
1550  * Notify userland, the routing table and hooks owner of
1551  * a link-state transition.
1552  */
1553 void
1554 if_linkstate_task(void *xifidx)
1555 {
1556 	unsigned int ifidx = (unsigned long)xifidx;
1557 	struct ifnet *ifp;
1558 
1559 	KERNEL_LOCK();
1560 	NET_LOCK();
1561 
1562 	ifp = if_get(ifidx);
1563 	if (ifp != NULL)
1564 		if_linkstate(ifp);
1565 	if_put(ifp);
1566 
1567 	NET_UNLOCK();
1568 	KERNEL_UNLOCK();
1569 }
1570 
1571 void
1572 if_linkstate(struct ifnet *ifp)
1573 {
1574 	NET_ASSERT_LOCKED();
1575 
1576 	rtm_ifchg(ifp);
1577 	rt_if_track(ifp);
1578 
1579 	if_hooks_run(&ifp->if_linkstatehooks);
1580 }
1581 
1582 void
1583 if_linkstatehook_add(struct ifnet *ifp, struct task *t)
1584 {
1585 	mtx_enter(&if_hooks_mtx);
1586 	TAILQ_INSERT_HEAD(&ifp->if_linkstatehooks, t, t_entry);
1587 	mtx_leave(&if_hooks_mtx);
1588 }
1589 
1590 void
1591 if_linkstatehook_del(struct ifnet *ifp, struct task *t)
1592 {
1593 	mtx_enter(&if_hooks_mtx);
1594 	TAILQ_REMOVE(&ifp->if_linkstatehooks, t, t_entry);
1595 	mtx_leave(&if_hooks_mtx);
1596 }
1597 
1598 /*
1599  * Schedule a link state change task.
1600  */
1601 void
1602 if_link_state_change(struct ifnet *ifp)
1603 {
1604 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1605 }
1606 
1607 /*
1608  * Handle interface watchdog timer routine.  Called
1609  * from softclock, we decrement timer (if set) and
1610  * call the appropriate interface routine on expiration.
1611  */
1612 void
1613 if_slowtimo(void *arg)
1614 {
1615 	struct ifnet *ifp = arg;
1616 	int s = splnet();
1617 
1618 	if (ifp->if_watchdog) {
1619 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1620 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1621 		timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO);
1622 	}
1623 	splx(s);
1624 }
1625 
1626 void
1627 if_watchdog_task(void *xifidx)
1628 {
1629 	unsigned int ifidx = (unsigned long)xifidx;
1630 	struct ifnet *ifp;
1631 	int s;
1632 
1633 	ifp = if_get(ifidx);
1634 	if (ifp == NULL)
1635 		return;
1636 
1637 	KERNEL_LOCK();
1638 	s = splnet();
1639 	if (ifp->if_watchdog)
1640 		(*ifp->if_watchdog)(ifp);
1641 	splx(s);
1642 	KERNEL_UNLOCK();
1643 
1644 	if_put(ifp);
1645 }
1646 
1647 /*
1648  * Map interface name to interface structure pointer.
1649  */
1650 struct ifnet *
1651 ifunit(const char *name)
1652 {
1653 	struct ifnet *ifp;
1654 
1655 	KERNEL_ASSERT_LOCKED();
1656 
1657 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1658 		if (strcmp(ifp->if_xname, name) == 0)
1659 			return (ifp);
1660 	}
1661 	return (NULL);
1662 }
1663 
1664 /*
1665  * Map interface index to interface structure pointer.
1666  */
1667 struct ifnet *
1668 if_get(unsigned int index)
1669 {
1670 	struct srp_ref sr;
1671 	struct if_map *if_map;
1672 	struct srp *map;
1673 	struct ifnet *ifp = NULL;
1674 
1675 	if_map = srp_enter(&sr, &if_idxmap.map);
1676 	if (index < if_map->limit) {
1677 		map = (struct srp *)(if_map + 1);
1678 
1679 		ifp = srp_follow(&sr, &map[index]);
1680 		if (ifp != NULL) {
1681 			KASSERT(ifp->if_index == index);
1682 			if_ref(ifp);
1683 		}
1684 	}
1685 	srp_leave(&sr);
1686 
1687 	return (ifp);
1688 }
1689 
1690 struct ifnet *
1691 if_ref(struct ifnet *ifp)
1692 {
1693 	refcnt_take(&ifp->if_refcnt);
1694 
1695 	return (ifp);
1696 }
1697 
1698 void
1699 if_put(struct ifnet *ifp)
1700 {
1701 	if (ifp == NULL)
1702 		return;
1703 
1704 	refcnt_rele_wake(&ifp->if_refcnt);
1705 }
1706 
1707 int
1708 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1709 {
1710 	if (ifp->if_sadl == NULL)
1711 		return (EINVAL);
1712 
1713 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1714 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1715 
1716 	return (0);
1717 }
1718 
1719 int
1720 if_createrdomain(int rdomain, struct ifnet *ifp)
1721 {
1722 	int error;
1723 	struct ifnet *loifp;
1724 	char loifname[IFNAMSIZ];
1725 	unsigned int unit = rdomain;
1726 
1727 	if (!rtable_exists(rdomain) && (error = rtable_add(rdomain)) != 0)
1728 		return (error);
1729 	if (!rtable_empty(rdomain))
1730 		return (EEXIST);
1731 
1732 	/* Create rdomain including its loopback if with unit == rdomain */
1733 	snprintf(loifname, sizeof(loifname), "lo%u", unit);
1734 	error = if_clone_create(loifname, 0);
1735 	if ((loifp = ifunit(loifname)) == NULL)
1736 		return (ENXIO);
1737 	if (error && (ifp != loifp || error != EEXIST))
1738 		return (error);
1739 
1740 	rtable_l2set(rdomain, rdomain, loifp->if_index);
1741 	loifp->if_rdomain = rdomain;
1742 
1743 	return (0);
1744 }
1745 
1746 int
1747 if_setrdomain(struct ifnet *ifp, int rdomain)
1748 {
1749 	struct ifreq ifr;
1750 	int error, up = 0, s;
1751 
1752 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1753 		return (EINVAL);
1754 
1755 	if (rdomain != ifp->if_rdomain &&
1756 	    (ifp->if_flags & IFF_LOOPBACK) &&
1757 	    (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1758 		return (EPERM);
1759 
1760 	if (!rtable_exists(rdomain))
1761 		return (ESRCH);
1762 
1763 	/* make sure that the routing table is a real rdomain */
1764 	if (rdomain != rtable_l2(rdomain))
1765 		return (EINVAL);
1766 
1767 	if (rdomain != ifp->if_rdomain) {
1768 		s = splnet();
1769 		/*
1770 		 * We are tearing down the world.
1771 		 * Take down the IF so:
1772 		 * 1. everything that cares gets a message
1773 		 * 2. the automagic IPv6 bits are recreated
1774 		 */
1775 		if (ifp->if_flags & IFF_UP) {
1776 			up = 1;
1777 			if_down(ifp);
1778 		}
1779 		rti_delete(ifp);
1780 #ifdef MROUTING
1781 		vif_delete(ifp);
1782 #endif
1783 		in_ifdetach(ifp);
1784 #ifdef INET6
1785 		in6_ifdetach(ifp);
1786 #endif
1787 		splx(s);
1788 	}
1789 
1790 	/* Let devices like enc(4) or mpe(4) know about the change */
1791 	ifr.ifr_rdomainid = rdomain;
1792 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
1793 	    (caddr_t)&ifr)) != ENOTTY)
1794 		return (error);
1795 	error = 0;
1796 
1797 	/* Add interface to the specified rdomain */
1798 	ifp->if_rdomain = rdomain;
1799 
1800 	/* If we took down the IF, bring it back */
1801 	if (up) {
1802 		s = splnet();
1803 		if_up(ifp);
1804 		splx(s);
1805 	}
1806 
1807 	return (0);
1808 }
1809 
1810 /*
1811  * Interface ioctls.
1812  */
1813 int
1814 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1815 {
1816 	struct ifnet *ifp;
1817 	struct ifreq *ifr = (struct ifreq *)data;
1818 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
1819 	struct if_afreq *ifar = (struct if_afreq *)data;
1820 	char ifdescrbuf[IFDESCRSIZE];
1821 	char ifrtlabelbuf[RTLABEL_LEN];
1822 	int s, error = 0, oif_xflags;
1823 	size_t bytesdone;
1824 	unsigned short oif_flags;
1825 
1826 	switch (cmd) {
1827 	case SIOCIFCREATE:
1828 		if ((error = suser(p)) != 0)
1829 			return (error);
1830 		error = if_clone_create(ifr->ifr_name, 0);
1831 		return (error);
1832 	case SIOCIFDESTROY:
1833 		if ((error = suser(p)) != 0)
1834 			return (error);
1835 		error = if_clone_destroy(ifr->ifr_name);
1836 		return (error);
1837 	case SIOCSIFGATTR:
1838 		if ((error = suser(p)) != 0)
1839 			return (error);
1840 		NET_LOCK();
1841 		error = if_setgroupattribs(data);
1842 		NET_UNLOCK();
1843 		return (error);
1844 	case SIOCGIFCONF:
1845 	case SIOCIFGCLONERS:
1846 	case SIOCGIFGMEMB:
1847 	case SIOCGIFGATTR:
1848 	case SIOCGIFGLIST:
1849 	case SIOCGIFFLAGS:
1850 	case SIOCGIFXFLAGS:
1851 	case SIOCGIFMETRIC:
1852 	case SIOCGIFMTU:
1853 	case SIOCGIFHARDMTU:
1854 	case SIOCGIFDATA:
1855 	case SIOCGIFDESCR:
1856 	case SIOCGIFRTLABEL:
1857 	case SIOCGIFPRIORITY:
1858 	case SIOCGIFRDOMAIN:
1859 	case SIOCGIFGROUP:
1860 	case SIOCGIFLLPRIO:
1861 		return (ifioctl_get(cmd, data));
1862 	}
1863 
1864 	ifp = ifunit(ifr->ifr_name);
1865 	if (ifp == NULL)
1866 		return (ENXIO);
1867 	oif_flags = ifp->if_flags;
1868 	oif_xflags = ifp->if_xflags;
1869 
1870 	switch (cmd) {
1871 	case SIOCIFAFATTACH:
1872 	case SIOCIFAFDETACH:
1873 		if ((error = suser(p)) != 0)
1874 			break;
1875 		NET_LOCK();
1876 		switch (ifar->ifar_af) {
1877 		case AF_INET:
1878 			/* attach is a noop for AF_INET */
1879 			if (cmd == SIOCIFAFDETACH)
1880 				in_ifdetach(ifp);
1881 			break;
1882 #ifdef INET6
1883 		case AF_INET6:
1884 			if (cmd == SIOCIFAFATTACH)
1885 				error = in6_ifattach(ifp);
1886 			else
1887 				in6_ifdetach(ifp);
1888 			break;
1889 #endif /* INET6 */
1890 		default:
1891 			error = EAFNOSUPPORT;
1892 		}
1893 		NET_UNLOCK();
1894 		break;
1895 
1896 	case SIOCSIFFLAGS:
1897 		if ((error = suser(p)) != 0)
1898 			break;
1899 
1900 		NET_LOCK();
1901 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1902 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
1903 
1904 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1905 		if (error != 0) {
1906 			ifp->if_flags = oif_flags;
1907 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
1908 			s = splnet();
1909 			if (ISSET(ifp->if_flags, IFF_UP))
1910 				if_up(ifp);
1911 			else
1912 				if_down(ifp);
1913 			splx(s);
1914 		}
1915 		NET_UNLOCK();
1916 		break;
1917 
1918 	case SIOCSIFXFLAGS:
1919 		if ((error = suser(p)) != 0)
1920 			break;
1921 
1922 		NET_LOCK();
1923 #ifdef INET6
1924 		if (ISSET(ifr->ifr_flags, IFXF_AUTOCONF6)) {
1925 			error = in6_ifattach(ifp);
1926 			if (error != 0) {
1927 				NET_UNLOCK();
1928 				break;
1929 			}
1930 		}
1931 
1932 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1933 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
1934 			ifp->if_xflags |= IFXF_INET6_NOSOII;
1935 
1936 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1937 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
1938 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
1939 
1940 #endif	/* INET6 */
1941 
1942 #ifdef MPLS
1943 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
1944 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
1945 			s = splnet();
1946 			ifp->if_xflags |= IFXF_MPLS;
1947 			ifp->if_ll_output = ifp->if_output;
1948 			ifp->if_output = mpls_output;
1949 			splx(s);
1950 		}
1951 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
1952 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
1953 			s = splnet();
1954 			ifp->if_xflags &= ~IFXF_MPLS;
1955 			ifp->if_output = ifp->if_ll_output;
1956 			ifp->if_ll_output = NULL;
1957 			splx(s);
1958 		}
1959 #endif	/* MPLS */
1960 
1961 #ifndef SMALL_KERNEL
1962 		if (ifp->if_capabilities & IFCAP_WOL) {
1963 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
1964 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
1965 				s = splnet();
1966 				ifp->if_xflags |= IFXF_WOL;
1967 				error = ifp->if_wol(ifp, 1);
1968 				splx(s);
1969 			}
1970 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
1971 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
1972 				s = splnet();
1973 				ifp->if_xflags &= ~IFXF_WOL;
1974 				error = ifp->if_wol(ifp, 0);
1975 				splx(s);
1976 			}
1977 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
1978 			ifr->ifr_flags &= ~IFXF_WOL;
1979 			error = ENOTSUP;
1980 		}
1981 #endif
1982 
1983 		if (error == 0)
1984 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
1985 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
1986 		NET_UNLOCK();
1987 		break;
1988 
1989 	case SIOCSIFMETRIC:
1990 		if ((error = suser(p)) != 0)
1991 			break;
1992 		NET_LOCK();
1993 		ifp->if_metric = ifr->ifr_metric;
1994 		NET_UNLOCK();
1995 		break;
1996 
1997 	case SIOCSIFMTU:
1998 		if ((error = suser(p)) != 0)
1999 			break;
2000 		NET_LOCK();
2001 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2002 		NET_UNLOCK();
2003 		if (!error)
2004 			rtm_ifchg(ifp);
2005 		break;
2006 
2007 	case SIOCSIFDESCR:
2008 		if ((error = suser(p)) != 0)
2009 			break;
2010 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2011 		    IFDESCRSIZE, &bytesdone);
2012 		if (error == 0) {
2013 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2014 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2015 		}
2016 		break;
2017 
2018 	case SIOCSIFRTLABEL:
2019 		if ((error = suser(p)) != 0)
2020 			break;
2021 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2022 		    RTLABEL_LEN, &bytesdone);
2023 		if (error == 0) {
2024 			rtlabel_unref(ifp->if_rtlabelid);
2025 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2026 		}
2027 		break;
2028 
2029 	case SIOCSIFPRIORITY:
2030 		if ((error = suser(p)) != 0)
2031 			break;
2032 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2033 			error = EINVAL;
2034 			break;
2035 		}
2036 		ifp->if_priority = ifr->ifr_metric;
2037 		break;
2038 
2039 	case SIOCSIFRDOMAIN:
2040 		if ((error = suser(p)) != 0)
2041 			break;
2042 		error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2043 		if (!error || error == EEXIST) {
2044 			NET_LOCK();
2045 			error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2046 			NET_UNLOCK();
2047 		}
2048 		break;
2049 
2050 	case SIOCAIFGROUP:
2051 		if ((error = suser(p)))
2052 			break;
2053 		NET_LOCK();
2054 		error = if_addgroup(ifp, ifgr->ifgr_group);
2055 		if (error == 0) {
2056 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2057 			if (error == ENOTTY)
2058 				error = 0;
2059 		}
2060 		NET_UNLOCK();
2061 		break;
2062 
2063 	case SIOCDIFGROUP:
2064 		if ((error = suser(p)))
2065 			break;
2066 		NET_LOCK();
2067 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2068 		if (error == ENOTTY)
2069 			error = 0;
2070 		if (error == 0)
2071 			error = if_delgroup(ifp, ifgr->ifgr_group);
2072 		NET_UNLOCK();
2073 		break;
2074 
2075 	case SIOCSIFLLADDR:
2076 		if ((error = suser(p)))
2077 			break;
2078 		if ((ifp->if_sadl == NULL) ||
2079 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2080 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2081 			error = EINVAL;
2082 			break;
2083 		}
2084 		NET_LOCK();
2085 		switch (ifp->if_type) {
2086 		case IFT_ETHER:
2087 		case IFT_CARP:
2088 		case IFT_XETHER:
2089 		case IFT_ISO88025:
2090 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2091 			if (error == ENOTTY)
2092 				error = 0;
2093 			if (error == 0)
2094 				error = if_setlladdr(ifp,
2095 				    ifr->ifr_addr.sa_data);
2096 			break;
2097 		default:
2098 			error = ENODEV;
2099 		}
2100 
2101 		if (error == 0)
2102 			ifnewlladdr(ifp);
2103 		NET_UNLOCK();
2104 		break;
2105 
2106 	case SIOCSIFLLPRIO:
2107 		if ((error = suser(p)))
2108 			break;
2109 		if (ifr->ifr_llprio < IFQ_MINPRIO ||
2110 		    ifr->ifr_llprio > IFQ_MAXPRIO) {
2111 			error = EINVAL;
2112 			break;
2113 		}
2114 		NET_LOCK();
2115 		ifp->if_llprio = ifr->ifr_llprio;
2116 		NET_UNLOCK();
2117 		break;
2118 
2119 	case SIOCGIFSFFPAGE:
2120 		error = suser(p);
2121 		if (error != 0)
2122 			break;
2123 
2124 		error = if_sffpage_check(data);
2125 		if (error != 0)
2126 			break;
2127 
2128 		/* don't take NET_LOCK because i2c reads take a long time */
2129 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2130 		break;
2131 
2132 	case SIOCSETKALIVE:
2133 	case SIOCDIFPHYADDR:
2134 	case SIOCSLIFPHYADDR:
2135 	case SIOCSLIFPHYRTABLE:
2136 	case SIOCSLIFPHYTTL:
2137 	case SIOCSLIFPHYDF:
2138 	case SIOCSLIFPHYECN:
2139 	case SIOCADDMULTI:
2140 	case SIOCDELMULTI:
2141 	case SIOCSIFMEDIA:
2142 	case SIOCSVNETID:
2143 	case SIOCDVNETID:
2144 	case SIOCSVNETFLOWID:
2145 	case SIOCSTXHPRIO:
2146 	case SIOCSRXHPRIO:
2147 	case SIOCSIFPAIR:
2148 	case SIOCSIFPARENT:
2149 	case SIOCDIFPARENT:
2150 	case SIOCSETMPWCFG:
2151 	case SIOCSETLABEL:
2152 	case SIOCDELLABEL:
2153 	case SIOCSPWE3CTRLWORD:
2154 	case SIOCSPWE3FAT:
2155 	case SIOCSPWE3NEIGHBOR:
2156 	case SIOCDPWE3NEIGHBOR:
2157 #if NBRIDGE > 0
2158 	case SIOCBRDGADD:
2159 	case SIOCBRDGDEL:
2160 	case SIOCBRDGSIFFLGS:
2161 	case SIOCBRDGSCACHE:
2162 	case SIOCBRDGADDS:
2163 	case SIOCBRDGDELS:
2164 	case SIOCBRDGSADDR:
2165 	case SIOCBRDGSTO:
2166 	case SIOCBRDGDADDR:
2167 	case SIOCBRDGFLUSH:
2168 	case SIOCBRDGADDL:
2169 	case SIOCBRDGSIFPROT:
2170 	case SIOCBRDGARL:
2171 	case SIOCBRDGFRL:
2172 	case SIOCBRDGSPRI:
2173 	case SIOCBRDGSHT:
2174 	case SIOCBRDGSFD:
2175 	case SIOCBRDGSMA:
2176 	case SIOCBRDGSIFPRIO:
2177 	case SIOCBRDGSIFCOST:
2178 	case SIOCBRDGSTXHC:
2179 	case SIOCBRDGSPROTO:
2180 	case SIOCSWSPORTNO:
2181 #endif
2182 		if ((error = suser(p)) != 0)
2183 			break;
2184 		/* FALLTHROUGH */
2185 	default:
2186 		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
2187 			(struct mbuf *) cmd, (struct mbuf *) data,
2188 			(struct mbuf *) ifp, p));
2189 		if (error != EOPNOTSUPP)
2190 			break;
2191 		switch (cmd) {
2192 		case SIOCAIFADDR:
2193 		case SIOCDIFADDR:
2194 		case SIOCSIFADDR:
2195 		case SIOCSIFNETMASK:
2196 		case SIOCSIFDSTADDR:
2197 		case SIOCSIFBRDADDR:
2198 #ifdef INET6
2199 		case SIOCAIFADDR_IN6:
2200 		case SIOCDIFADDR_IN6:
2201 #endif
2202 			error = suser(p);
2203 			break;
2204 		default:
2205 			error = 0;
2206 			break;
2207 		}
2208 		if (error)
2209 			break;
2210 		NET_LOCK();
2211 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2212 		NET_UNLOCK();
2213 		break;
2214 	}
2215 
2216 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags)
2217 		rtm_ifchg(ifp);
2218 
2219 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2220 		getmicrotime(&ifp->if_lastchange);
2221 
2222 	return (error);
2223 }
2224 
2225 int
2226 ifioctl_get(u_long cmd, caddr_t data)
2227 {
2228 	struct ifnet *ifp;
2229 	struct ifreq *ifr = (struct ifreq *)data;
2230 	char ifdescrbuf[IFDESCRSIZE];
2231 	char ifrtlabelbuf[RTLABEL_LEN];
2232 	int error = 0;
2233 	size_t bytesdone;
2234 	const char *label;
2235 
2236 	switch(cmd) {
2237 	case SIOCGIFCONF:
2238 		NET_RLOCK_IN_IOCTL();
2239 		error = ifconf(data);
2240 		NET_RUNLOCK_IN_IOCTL();
2241 		return (error);
2242 	case SIOCIFGCLONERS:
2243 		error = if_clone_list((struct if_clonereq *)data);
2244 		return (error);
2245 	case SIOCGIFGMEMB:
2246 		NET_RLOCK_IN_IOCTL();
2247 		error = if_getgroupmembers(data);
2248 		NET_RUNLOCK_IN_IOCTL();
2249 		return (error);
2250 	case SIOCGIFGATTR:
2251 		NET_RLOCK_IN_IOCTL();
2252 		error = if_getgroupattribs(data);
2253 		NET_RUNLOCK_IN_IOCTL();
2254 		return (error);
2255 	case SIOCGIFGLIST:
2256 		NET_RLOCK_IN_IOCTL();
2257 		error = if_getgrouplist(data);
2258 		NET_RUNLOCK_IN_IOCTL();
2259 		return (error);
2260 	}
2261 
2262 	ifp = ifunit(ifr->ifr_name);
2263 	if (ifp == NULL)
2264 		return (ENXIO);
2265 
2266 	NET_RLOCK_IN_IOCTL();
2267 
2268 	switch(cmd) {
2269 	case SIOCGIFFLAGS:
2270 		ifr->ifr_flags = ifp->if_flags;
2271 		if (ifq_is_oactive(&ifp->if_snd))
2272 			ifr->ifr_flags |= IFF_OACTIVE;
2273 		break;
2274 
2275 	case SIOCGIFXFLAGS:
2276 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2277 		break;
2278 
2279 	case SIOCGIFMETRIC:
2280 		ifr->ifr_metric = ifp->if_metric;
2281 		break;
2282 
2283 	case SIOCGIFMTU:
2284 		ifr->ifr_mtu = ifp->if_mtu;
2285 		break;
2286 
2287 	case SIOCGIFHARDMTU:
2288 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2289 		break;
2290 
2291 	case SIOCGIFDATA: {
2292 		struct if_data ifdata;
2293 		if_getdata(ifp, &ifdata);
2294 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2295 		break;
2296 	}
2297 
2298 	case SIOCGIFDESCR:
2299 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2300 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2301 		    &bytesdone);
2302 		break;
2303 
2304 	case SIOCGIFRTLABEL:
2305 		if (ifp->if_rtlabelid &&
2306 		    (label = rtlabel_id2name(ifp->if_rtlabelid)) != NULL) {
2307 			strlcpy(ifrtlabelbuf, label, RTLABEL_LEN);
2308 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2309 			    RTLABEL_LEN, &bytesdone);
2310 		} else
2311 			error = ENOENT;
2312 		break;
2313 
2314 	case SIOCGIFPRIORITY:
2315 		ifr->ifr_metric = ifp->if_priority;
2316 		break;
2317 
2318 	case SIOCGIFRDOMAIN:
2319 		ifr->ifr_rdomainid = ifp->if_rdomain;
2320 		break;
2321 
2322 	case SIOCGIFGROUP:
2323 		error = if_getgroup(data, ifp);
2324 		break;
2325 
2326 	case SIOCGIFLLPRIO:
2327 		ifr->ifr_llprio = ifp->if_llprio;
2328 		break;
2329 
2330 	default:
2331 		panic("invalid ioctl %lu", cmd);
2332 	}
2333 
2334 	NET_RUNLOCK_IN_IOCTL();
2335 
2336 	return (error);
2337 }
2338 
2339 static int
2340 if_sffpage_check(const caddr_t data)
2341 {
2342 	const struct if_sffpage *sff = (const struct if_sffpage *)data;
2343 
2344 	switch (sff->sff_addr) {
2345 	case IFSFF_ADDR_EEPROM:
2346 	case IFSFF_ADDR_DDM:
2347 		break;
2348 	default:
2349 		return (EINVAL);
2350 	}
2351 
2352 	return (0);
2353 }
2354 
2355 int
2356 if_txhprio_l2_check(int hdrprio)
2357 {
2358 	switch (hdrprio) {
2359 	case IF_HDRPRIO_PACKET:
2360 		return (0);
2361 	default:
2362 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2363 			return (0);
2364 		break;
2365 	}
2366 
2367 	return (EINVAL);
2368 }
2369 
2370 int
2371 if_txhprio_l3_check(int hdrprio)
2372 {
2373 	switch (hdrprio) {
2374 	case IF_HDRPRIO_PACKET:
2375 	case IF_HDRPRIO_PAYLOAD:
2376 		return (0);
2377 	default:
2378 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2379 			return (0);
2380 		break;
2381 	}
2382 
2383 	return (EINVAL);
2384 }
2385 
2386 int
2387 if_rxhprio_l2_check(int hdrprio)
2388 {
2389 	switch (hdrprio) {
2390 	case IF_HDRPRIO_PACKET:
2391 	case IF_HDRPRIO_OUTER:
2392 		return (0);
2393 	default:
2394 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2395 			return (0);
2396 		break;
2397 	}
2398 
2399 	return (EINVAL);
2400 }
2401 
2402 int
2403 if_rxhprio_l3_check(int hdrprio)
2404 {
2405 	switch (hdrprio) {
2406 	case IF_HDRPRIO_PACKET:
2407 	case IF_HDRPRIO_PAYLOAD:
2408 	case IF_HDRPRIO_OUTER:
2409 		return (0);
2410 	default:
2411 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2412 			return (0);
2413 		break;
2414 	}
2415 
2416 	return (EINVAL);
2417 }
2418 
2419 /*
2420  * Return interface configuration
2421  * of system.  List may be used
2422  * in later ioctl's (above) to get
2423  * other information.
2424  */
2425 int
2426 ifconf(caddr_t data)
2427 {
2428 	struct ifconf *ifc = (struct ifconf *)data;
2429 	struct ifnet *ifp;
2430 	struct ifaddr *ifa;
2431 	struct ifreq ifr, *ifrp;
2432 	int space = ifc->ifc_len, error = 0;
2433 
2434 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2435 	if (space == 0) {
2436 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
2437 			struct sockaddr *sa;
2438 
2439 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2440 				space += sizeof (ifr);
2441 			else
2442 				TAILQ_FOREACH(ifa,
2443 				    &ifp->if_addrlist, ifa_list) {
2444 					sa = ifa->ifa_addr;
2445 					if (sa->sa_len > sizeof(*sa))
2446 						space += sa->sa_len -
2447 						    sizeof(*sa);
2448 					space += sizeof(ifr);
2449 				}
2450 		}
2451 		ifc->ifc_len = space;
2452 		return (0);
2453 	}
2454 
2455 	ifrp = ifc->ifc_req;
2456 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2457 		if (space < sizeof(ifr))
2458 			break;
2459 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2460 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2461 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2462 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2463 			    sizeof(ifr));
2464 			if (error)
2465 				break;
2466 			space -= sizeof (ifr), ifrp++;
2467 		} else
2468 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2469 				struct sockaddr *sa = ifa->ifa_addr;
2470 
2471 				if (space < sizeof(ifr))
2472 					break;
2473 				if (sa->sa_len <= sizeof(*sa)) {
2474 					ifr.ifr_addr = *sa;
2475 					error = copyout((caddr_t)&ifr,
2476 					    (caddr_t)ifrp, sizeof (ifr));
2477 					ifrp++;
2478 				} else {
2479 					space -= sa->sa_len - sizeof(*sa);
2480 					if (space < sizeof (ifr))
2481 						break;
2482 					error = copyout((caddr_t)&ifr,
2483 					    (caddr_t)ifrp,
2484 					    sizeof(ifr.ifr_name));
2485 					if (error == 0)
2486 						error = copyout((caddr_t)sa,
2487 						    (caddr_t)&ifrp->ifr_addr,
2488 						    sa->sa_len);
2489 					ifrp = (struct ifreq *)(sa->sa_len +
2490 					    (caddr_t)&ifrp->ifr_addr);
2491 				}
2492 				if (error)
2493 					break;
2494 				space -= sizeof (ifr);
2495 			}
2496 	}
2497 	ifc->ifc_len -= space;
2498 	return (error);
2499 }
2500 
2501 void
2502 if_counters_alloc(struct ifnet *ifp)
2503 {
2504 	KASSERT(ifp->if_counters == NULL);
2505 
2506 	ifp->if_counters = counters_alloc(ifc_ncounters);
2507 }
2508 
2509 void
2510 if_counters_free(struct ifnet *ifp)
2511 {
2512 	KASSERT(ifp->if_counters != NULL);
2513 
2514 	counters_free(ifp->if_counters, ifc_ncounters);
2515 	ifp->if_counters = NULL;
2516 }
2517 
2518 void
2519 if_getdata(struct ifnet *ifp, struct if_data *data)
2520 {
2521 	unsigned int i;
2522 
2523 	*data = ifp->if_data;
2524 
2525 	if (ifp->if_counters != NULL) {
2526 		uint64_t counters[ifc_ncounters];
2527 
2528 		counters_read(ifp->if_counters, counters, nitems(counters));
2529 
2530 		data->ifi_ipackets += counters[ifc_ipackets];
2531 		data->ifi_ierrors += counters[ifc_ierrors];
2532 		data->ifi_opackets += counters[ifc_opackets];
2533 		data->ifi_oerrors += counters[ifc_oerrors];
2534 		data->ifi_collisions += counters[ifc_collisions];
2535 		data->ifi_ibytes += counters[ifc_ibytes];
2536 		data->ifi_obytes += counters[ifc_obytes];
2537 		data->ifi_imcasts += counters[ifc_imcasts];
2538 		data->ifi_omcasts += counters[ifc_omcasts];
2539 		data->ifi_iqdrops += counters[ifc_iqdrops];
2540 		data->ifi_oqdrops += counters[ifc_oqdrops];
2541 		data->ifi_noproto += counters[ifc_noproto];
2542 	}
2543 
2544 	for (i = 0; i < ifp->if_nifqs; i++) {
2545 		struct ifqueue *ifq = ifp->if_ifqs[i];
2546 
2547 		ifq_add_data(ifq, data);
2548 	}
2549 
2550 	for (i = 0; i < ifp->if_niqs; i++) {
2551 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2552 
2553 		ifiq_add_data(ifiq, data);
2554 	}
2555 }
2556 
2557 /*
2558  * Dummy functions replaced in ifnet during detach (if protocols decide to
2559  * fiddle with the if during detach.
2560  */
2561 void
2562 if_detached_qstart(struct ifqueue *ifq)
2563 {
2564 	ifq_purge(ifq);
2565 }
2566 
2567 int
2568 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2569 {
2570 	return ENODEV;
2571 }
2572 
2573 /*
2574  * Create interface group without members
2575  */
2576 struct ifg_group *
2577 if_creategroup(const char *groupname)
2578 {
2579 	struct ifg_group	*ifg;
2580 
2581 	if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
2582 		return (NULL);
2583 
2584 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2585 	ifg->ifg_refcnt = 0;
2586 	ifg->ifg_carp_demoted = 0;
2587 	TAILQ_INIT(&ifg->ifg_members);
2588 #if NPF > 0
2589 	pfi_attach_ifgroup(ifg);
2590 #endif
2591 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2592 
2593 	return (ifg);
2594 }
2595 
2596 /*
2597  * Add a group to an interface
2598  */
2599 int
2600 if_addgroup(struct ifnet *ifp, const char *groupname)
2601 {
2602 	struct ifg_list		*ifgl;
2603 	struct ifg_group	*ifg = NULL;
2604 	struct ifg_member	*ifgm;
2605 
2606 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
2607 	    groupname[strlen(groupname) - 1] <= '9')
2608 		return (EINVAL);
2609 
2610 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2611 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2612 			return (EEXIST);
2613 
2614 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
2615 		return (ENOMEM);
2616 
2617 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
2618 		free(ifgl, M_TEMP, sizeof(*ifgl));
2619 		return (ENOMEM);
2620 	}
2621 
2622 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2623 		if (!strcmp(ifg->ifg_group, groupname))
2624 			break;
2625 
2626 	if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
2627 		free(ifgl, M_TEMP, sizeof(*ifgl));
2628 		free(ifgm, M_TEMP, sizeof(*ifgm));
2629 		return (ENOMEM);
2630 	}
2631 
2632 	ifg->ifg_refcnt++;
2633 	ifgl->ifgl_group = ifg;
2634 	ifgm->ifgm_ifp = ifp;
2635 
2636 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2637 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2638 
2639 #if NPF > 0
2640 	pfi_group_addmember(groupname, ifp);
2641 #endif
2642 
2643 	return (0);
2644 }
2645 
2646 /*
2647  * Remove a group from an interface
2648  */
2649 int
2650 if_delgroup(struct ifnet *ifp, const char *groupname)
2651 {
2652 	struct ifg_list		*ifgl;
2653 	struct ifg_member	*ifgm;
2654 
2655 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2656 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2657 			break;
2658 	if (ifgl == NULL)
2659 		return (ENOENT);
2660 
2661 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2662 
2663 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2664 		if (ifgm->ifgm_ifp == ifp)
2665 			break;
2666 
2667 	if (ifgm != NULL) {
2668 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2669 		free(ifgm, M_TEMP, sizeof(*ifgm));
2670 	}
2671 
2672 #if NPF > 0
2673 	pfi_group_change(groupname);
2674 #endif
2675 
2676 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2677 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2678 #if NPF > 0
2679 		pfi_detach_ifgroup(ifgl->ifgl_group);
2680 #endif
2681 		free(ifgl->ifgl_group, M_TEMP, sizeof(*ifgl->ifgl_group));
2682 	}
2683 
2684 	free(ifgl, M_TEMP, sizeof(*ifgl));
2685 
2686 	return (0);
2687 }
2688 
2689 /*
2690  * Stores all groups from an interface in memory pointed
2691  * to by data
2692  */
2693 int
2694 if_getgroup(caddr_t data, struct ifnet *ifp)
2695 {
2696 	int			 len, error;
2697 	struct ifg_list		*ifgl;
2698 	struct ifg_req		 ifgrq, *ifgp;
2699 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2700 
2701 	if (ifgr->ifgr_len == 0) {
2702 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2703 			ifgr->ifgr_len += sizeof(struct ifg_req);
2704 		return (0);
2705 	}
2706 
2707 	len = ifgr->ifgr_len;
2708 	ifgp = ifgr->ifgr_groups;
2709 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2710 		if (len < sizeof(ifgrq))
2711 			return (EINVAL);
2712 		bzero(&ifgrq, sizeof ifgrq);
2713 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2714 		    sizeof(ifgrq.ifgrq_group));
2715 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2716 		    sizeof(struct ifg_req))))
2717 			return (error);
2718 		len -= sizeof(ifgrq);
2719 		ifgp++;
2720 	}
2721 
2722 	return (0);
2723 }
2724 
2725 /*
2726  * Stores all members of a group in memory pointed to by data
2727  */
2728 int
2729 if_getgroupmembers(caddr_t data)
2730 {
2731 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2732 	struct ifg_group	*ifg;
2733 	struct ifg_member	*ifgm;
2734 	struct ifg_req		 ifgrq, *ifgp;
2735 	int			 len, error;
2736 
2737 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2738 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2739 			break;
2740 	if (ifg == NULL)
2741 		return (ENOENT);
2742 
2743 	if (ifgr->ifgr_len == 0) {
2744 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2745 			ifgr->ifgr_len += sizeof(ifgrq);
2746 		return (0);
2747 	}
2748 
2749 	len = ifgr->ifgr_len;
2750 	ifgp = ifgr->ifgr_groups;
2751 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
2752 		if (len < sizeof(ifgrq))
2753 			return (EINVAL);
2754 		bzero(&ifgrq, sizeof ifgrq);
2755 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
2756 		    sizeof(ifgrq.ifgrq_member));
2757 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2758 		    sizeof(struct ifg_req))))
2759 			return (error);
2760 		len -= sizeof(ifgrq);
2761 		ifgp++;
2762 	}
2763 
2764 	return (0);
2765 }
2766 
2767 int
2768 if_getgroupattribs(caddr_t data)
2769 {
2770 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2771 	struct ifg_group	*ifg;
2772 
2773 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2774 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2775 			break;
2776 	if (ifg == NULL)
2777 		return (ENOENT);
2778 
2779 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
2780 
2781 	return (0);
2782 }
2783 
2784 int
2785 if_setgroupattribs(caddr_t data)
2786 {
2787 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2788 	struct ifg_group	*ifg;
2789 	struct ifg_member	*ifgm;
2790 	int			 demote;
2791 
2792 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2793 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2794 			break;
2795 	if (ifg == NULL)
2796 		return (ENOENT);
2797 
2798 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
2799 	if (demote + ifg->ifg_carp_demoted > 0xff ||
2800 	    demote + ifg->ifg_carp_demoted < 0)
2801 		return (EINVAL);
2802 
2803 	ifg->ifg_carp_demoted += demote;
2804 
2805 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2806 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
2807 
2808 	return (0);
2809 }
2810 
2811 /*
2812  * Stores all groups in memory pointed to by data
2813  */
2814 int
2815 if_getgrouplist(caddr_t data)
2816 {
2817 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2818 	struct ifg_group	*ifg;
2819 	struct ifg_req		 ifgrq, *ifgp;
2820 	int			 len, error;
2821 
2822 	if (ifgr->ifgr_len == 0) {
2823 		TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2824 			ifgr->ifgr_len += sizeof(ifgrq);
2825 		return (0);
2826 	}
2827 
2828 	len = ifgr->ifgr_len;
2829 	ifgp = ifgr->ifgr_groups;
2830 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
2831 		if (len < sizeof(ifgrq))
2832 			return (EINVAL);
2833 		bzero(&ifgrq, sizeof ifgrq);
2834 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
2835 		    sizeof(ifgrq.ifgrq_group));
2836 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2837 		    sizeof(struct ifg_req))))
2838 			return (error);
2839 		len -= sizeof(ifgrq);
2840 		ifgp++;
2841 	}
2842 
2843 	return (0);
2844 }
2845 
2846 void
2847 if_group_routechange(struct sockaddr *dst, struct sockaddr *mask)
2848 {
2849 	switch (dst->sa_family) {
2850 	case AF_INET:
2851 		if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
2852 		    mask && (mask->sa_len == 0 ||
2853 		    satosin(mask)->sin_addr.s_addr == INADDR_ANY))
2854 			if_group_egress_build();
2855 		break;
2856 #ifdef INET6
2857 	case AF_INET6:
2858 		if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
2859 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
2860 		    IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
2861 		    &in6addr_any)))
2862 			if_group_egress_build();
2863 		break;
2864 #endif
2865 	}
2866 }
2867 
2868 int
2869 if_group_egress_build(void)
2870 {
2871 	struct ifnet		*ifp;
2872 	struct ifg_group	*ifg;
2873 	struct ifg_member	*ifgm, *next;
2874 	struct sockaddr_in	 sa_in;
2875 #ifdef INET6
2876 	struct sockaddr_in6	 sa_in6;
2877 #endif
2878 	struct rtentry		*rt;
2879 
2880 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2881 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
2882 			break;
2883 
2884 	if (ifg != NULL)
2885 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
2886 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
2887 
2888 	bzero(&sa_in, sizeof(sa_in));
2889 	sa_in.sin_len = sizeof(sa_in);
2890 	sa_in.sin_family = AF_INET;
2891 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
2892 	while (rt != NULL) {
2893 		ifp = if_get(rt->rt_ifidx);
2894 		if (ifp != NULL) {
2895 			if_addgroup(ifp, IFG_EGRESS);
2896 			if_put(ifp);
2897 		}
2898 		rt = rtable_iterate(rt);
2899 	}
2900 
2901 #ifdef INET6
2902 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
2903 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
2904 	    RTP_ANY);
2905 	while (rt != NULL) {
2906 		ifp = if_get(rt->rt_ifidx);
2907 		if (ifp != NULL) {
2908 			if_addgroup(ifp, IFG_EGRESS);
2909 			if_put(ifp);
2910 		}
2911 		rt = rtable_iterate(rt);
2912 	}
2913 #endif /* INET6 */
2914 
2915 	return (0);
2916 }
2917 
2918 /*
2919  * Set/clear promiscuous mode on interface ifp based on the truth value
2920  * of pswitch.  The calls are reference counted so that only the first
2921  * "on" request actually has an effect, as does the final "off" request.
2922  * Results are undefined if the "off" and "on" requests are not matched.
2923  */
2924 int
2925 ifpromisc(struct ifnet *ifp, int pswitch)
2926 {
2927 	struct ifreq ifr;
2928 	unsigned short oif_flags;
2929 	int oif_pcount, error;
2930 
2931 	NET_ASSERT_LOCKED(); /* modifying if_flags and if_pcount */
2932 
2933 	oif_flags = ifp->if_flags;
2934 	oif_pcount = ifp->if_pcount;
2935 	if (pswitch) {
2936 		if (ifp->if_pcount++ != 0)
2937 			return (0);
2938 		ifp->if_flags |= IFF_PROMISC;
2939 	} else {
2940 		if (--ifp->if_pcount > 0)
2941 			return (0);
2942 		ifp->if_flags &= ~IFF_PROMISC;
2943 	}
2944 
2945 	if ((ifp->if_flags & IFF_UP) == 0)
2946 		return (0);
2947 
2948 	memset(&ifr, 0, sizeof(ifr));
2949 	ifr.ifr_flags = ifp->if_flags;
2950 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
2951 	if (error) {
2952 		ifp->if_flags = oif_flags;
2953 		ifp->if_pcount = oif_pcount;
2954 	}
2955 
2956 	return (error);
2957 }
2958 
2959 void
2960 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
2961 {
2962 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
2963 }
2964 
2965 void
2966 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
2967 {
2968 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
2969 }
2970 
2971 void
2972 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
2973 {
2974 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
2975 		panic("ifa_update_broadaddr does not support dynamic length");
2976 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
2977 }
2978 
2979 #ifdef DDB
2980 /* debug function, can be called from ddb> */
2981 void
2982 ifa_print_all(void)
2983 {
2984 	struct ifnet *ifp;
2985 	struct ifaddr *ifa;
2986 
2987 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2988 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2989 			char addr[INET6_ADDRSTRLEN];
2990 
2991 			switch (ifa->ifa_addr->sa_family) {
2992 			case AF_INET:
2993 				printf("%s", inet_ntop(AF_INET,
2994 				    &satosin(ifa->ifa_addr)->sin_addr,
2995 				    addr, sizeof(addr)));
2996 				break;
2997 #ifdef INET6
2998 			case AF_INET6:
2999 				printf("%s", inet_ntop(AF_INET6,
3000 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
3001 				    addr, sizeof(addr)));
3002 				break;
3003 #endif
3004 			}
3005 			printf(" on %s\n", ifp->if_xname);
3006 		}
3007 	}
3008 }
3009 #endif /* DDB */
3010 
3011 void
3012 ifnewlladdr(struct ifnet *ifp)
3013 {
3014 #ifdef INET6
3015 	struct ifaddr *ifa;
3016 #endif
3017 	struct ifreq ifrq;
3018 	short up;
3019 	int s;
3020 
3021 	s = splnet();
3022 	up = ifp->if_flags & IFF_UP;
3023 
3024 	if (up) {
3025 		/* go down for a moment... */
3026 		ifp->if_flags &= ~IFF_UP;
3027 		ifrq.ifr_flags = ifp->if_flags;
3028 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3029 	}
3030 
3031 	ifp->if_flags |= IFF_UP;
3032 	ifrq.ifr_flags = ifp->if_flags;
3033 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3034 
3035 #ifdef INET6
3036 	/*
3037 	 * Update the link-local address.  Don't do it if we're
3038 	 * a router to avoid confusing hosts on the network.
3039 	 */
3040 	if (!ip6_forwarding) {
3041 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
3042 		if (ifa) {
3043 			in6_purgeaddr(ifa);
3044 			if_hooks_run(&ifp->if_addrhooks);
3045 			in6_ifattach(ifp);
3046 		}
3047 	}
3048 #endif
3049 	if (!up) {
3050 		/* go back down */
3051 		ifp->if_flags &= ~IFF_UP;
3052 		ifrq.ifr_flags = ifp->if_flags;
3053 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3054 	}
3055 	splx(s);
3056 }
3057 
3058 void
3059 if_addrhook_add(struct ifnet *ifp, struct task *t)
3060 {
3061 	mtx_enter(&if_hooks_mtx);
3062 	TAILQ_INSERT_TAIL(&ifp->if_addrhooks, t, t_entry);
3063 	mtx_leave(&if_hooks_mtx);
3064 }
3065 
3066 void
3067 if_addrhook_del(struct ifnet *ifp, struct task *t)
3068 {
3069 	mtx_enter(&if_hooks_mtx);
3070 	TAILQ_REMOVE(&ifp->if_addrhooks, t, t_entry);
3071 	mtx_leave(&if_hooks_mtx);
3072 }
3073 
3074 void
3075 if_addrhooks_run(struct ifnet *ifp)
3076 {
3077 	if_hooks_run(&ifp->if_addrhooks);
3078 }
3079 
3080 void
3081 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
3082 {
3083 	extern int ticks;
3084 
3085 	memset(rxr, 0, sizeof(*rxr));
3086 
3087 	rxr->rxr_adjusted = ticks;
3088 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
3089 	rxr->rxr_hwm = hwm;
3090 }
3091 
3092 static inline void
3093 if_rxr_adjust_cwm(struct if_rxring *rxr)
3094 {
3095 	extern int ticks;
3096 
3097 	if (rxr->rxr_alive >= rxr->rxr_lwm)
3098 		return;
3099 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
3100 		rxr->rxr_cwm++;
3101 
3102 	rxr->rxr_adjusted = ticks;
3103 }
3104 
3105 void
3106 if_rxr_livelocked(struct if_rxring *rxr)
3107 {
3108 	extern int ticks;
3109 
3110 	if (ticks - rxr->rxr_adjusted >= 1) {
3111 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3112 			rxr->rxr_cwm--;
3113 
3114 		rxr->rxr_adjusted = ticks;
3115 	}
3116 }
3117 
3118 u_int
3119 if_rxr_get(struct if_rxring *rxr, u_int max)
3120 {
3121 	extern int ticks;
3122 	u_int diff;
3123 
3124 	if (ticks - rxr->rxr_adjusted >= 1) {
3125 		/* we're free to try for an adjustment */
3126 		if_rxr_adjust_cwm(rxr);
3127 	}
3128 
3129 	if (rxr->rxr_alive >= rxr->rxr_cwm)
3130 		return (0);
3131 
3132 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
3133 	rxr->rxr_alive += diff;
3134 
3135 	return (diff);
3136 }
3137 
3138 int
3139 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
3140 {
3141 	struct if_rxrinfo kifri;
3142 	int error;
3143 	u_int n;
3144 
3145 	error = copyin(uifri, &kifri, sizeof(kifri));
3146 	if (error)
3147 		return (error);
3148 
3149 	n = min(t, kifri.ifri_total);
3150 	kifri.ifri_total = t;
3151 
3152 	if (n > 0) {
3153 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
3154 		if (error)
3155 			return (error);
3156 	}
3157 
3158 	return (copyout(&kifri, uifri, sizeof(kifri)));
3159 }
3160 
3161 int
3162 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
3163     struct if_rxring *rxr)
3164 {
3165 	struct if_rxring_info ifr;
3166 
3167 	memset(&ifr, 0, sizeof(ifr));
3168 
3169 	if (name != NULL)
3170 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3171 
3172 	ifr.ifr_size = size;
3173 	ifr.ifr_info = *rxr;
3174 
3175 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
3176 }
3177 
3178 /*
3179  * Network stack input queues.
3180  */
3181 
3182 void
3183 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3184 {
3185 	mq_init(&niq->ni_q, maxlen, IPL_NET);
3186 	niq->ni_isr = isr;
3187 }
3188 
3189 int
3190 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3191 {
3192 	int rv;
3193 
3194 	rv = mq_enqueue(&niq->ni_q, m);
3195 	if (rv == 0)
3196 		schednetisr(niq->ni_isr);
3197 	else
3198 		if_congestion();
3199 
3200 	return (rv);
3201 }
3202 
3203 int
3204 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3205 {
3206 	int rv;
3207 
3208 	rv = mq_enlist(&niq->ni_q, ml);
3209 	if (rv == 0)
3210 		schednetisr(niq->ni_isr);
3211 	else
3212 		if_congestion();
3213 
3214 	return (rv);
3215 }
3216 
3217 __dead void
3218 unhandled_af(int af)
3219 {
3220 	panic("unhandled af %d", af);
3221 }
3222 
3223 /*
3224  * XXXSMP This tunable is here to work around the fact that IPsec
3225  * globals aren't ready to be accessed by multiple threads in
3226  * parallel.
3227  */
3228 int		 nettaskqs = NET_TASKQ;
3229 
3230 struct taskq *
3231 net_tq(unsigned int ifindex)
3232 {
3233 	struct taskq *t = NULL;
3234 
3235 	t = nettqmp[ifindex % nettaskqs];
3236 
3237 	return (t);
3238 }
3239