xref: /openbsd-src/sys/net/if.c (revision 1a8dbaac879b9f3335ad7fb25429ce63ac1d6bac)
1 /*	$OpenBSD: if.c,v 1.620 2020/10/03 00:23:55 mvs Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "pfsync.h"
70 #include "ppp.h"
71 #include "pppoe.h"
72 #include "switch.h"
73 #include "if_wg.h"
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/mbuf.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/timeout.h>
81 #include <sys/protosw.h>
82 #include <sys/kernel.h>
83 #include <sys/ioctl.h>
84 #include <sys/domain.h>
85 #include <sys/task.h>
86 #include <sys/atomic.h>
87 #include <sys/percpu.h>
88 #include <sys/proc.h>
89 #include <sys/stdint.h>	/* uintptr_t */
90 #include <sys/rwlock.h>
91 
92 #include <net/if.h>
93 #include <net/if_dl.h>
94 #include <net/if_types.h>
95 #include <net/route.h>
96 #include <net/netisr.h>
97 
98 #include <netinet/in.h>
99 #include <netinet/if_ether.h>
100 #include <netinet/igmp.h>
101 #ifdef MROUTING
102 #include <netinet/ip_mroute.h>
103 #endif
104 
105 #ifdef INET6
106 #include <netinet6/in6_var.h>
107 #include <netinet6/in6_ifattach.h>
108 #include <netinet6/nd6.h>
109 #include <netinet/ip6.h>
110 #include <netinet6/ip6_var.h>
111 #endif
112 
113 #ifdef MPLS
114 #include <netmpls/mpls.h>
115 #endif
116 
117 #if NBPFILTER > 0
118 #include <net/bpf.h>
119 #endif
120 
121 #if NBRIDGE > 0
122 #include <net/if_bridge.h>
123 #endif
124 
125 #if NCARP > 0
126 #include <netinet/ip_carp.h>
127 #endif
128 
129 #if NPF > 0
130 #include <net/pfvar.h>
131 #endif
132 
133 #include <sys/device.h>
134 
135 void	if_attachsetup(struct ifnet *);
136 void	if_attachdomain(struct ifnet *);
137 void	if_attach_common(struct ifnet *);
138 int	if_createrdomain(int, struct ifnet *);
139 int	if_setrdomain(struct ifnet *, int);
140 void	if_slowtimo(void *);
141 
142 void	if_detached_qstart(struct ifqueue *);
143 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
144 
145 int	ifioctl_get(u_long, caddr_t);
146 int	ifconf(caddr_t);
147 static int
148 	if_sffpage_check(const caddr_t);
149 
150 int	if_getgroup(caddr_t, struct ifnet *);
151 int	if_getgroupmembers(caddr_t);
152 int	if_getgroupattribs(caddr_t);
153 int	if_setgroupattribs(caddr_t);
154 int	if_getgrouplist(caddr_t);
155 
156 void	if_linkstate(struct ifnet *);
157 void	if_linkstate_task(void *);
158 
159 int	if_clone_list(struct if_clonereq *);
160 struct if_clone	*if_clone_lookup(const char *, int *);
161 
162 int	if_group_egress_build(void);
163 
164 void	if_watchdog_task(void *);
165 
166 void	if_netisr(void *);
167 
168 #ifdef DDB
169 void	ifa_print_all(void);
170 #endif
171 
172 void	if_qstart_compat(struct ifqueue *);
173 
174 /*
175  * interface index map
176  *
177  * the kernel maintains a mapping of interface indexes to struct ifnet
178  * pointers.
179  *
180  * the map is an array of struct ifnet pointers prefixed by an if_map
181  * structure. the if_map structure stores the length of its array.
182  *
183  * as interfaces are attached to the system, the map is grown on demand
184  * up to USHRT_MAX entries.
185  *
186  * interface index 0 is reserved and represents no interface. this
187  * supports the use of the interface index as the scope for IPv6 link
188  * local addresses, where scope 0 means no scope has been specified.
189  * it also supports the use of interface index as the unique identifier
190  * for network interfaces in SNMP applications as per RFC2863. therefore
191  * if_get(0) returns NULL.
192  */
193 
194 void if_ifp_dtor(void *, void *);
195 void if_map_dtor(void *, void *);
196 struct ifnet *if_ref(struct ifnet *);
197 
198 /*
199  * struct if_map
200  *
201  * bounded array of ifnet srp pointers used to fetch references of live
202  * interfaces with if_get().
203  */
204 
205 struct if_map {
206 	unsigned long		 limit;
207 	/* followed by limit ifnet srp pointers */
208 };
209 
210 /*
211  * struct if_idxmap
212  *
213  * infrastructure to manage updates and accesses to the current if_map.
214  */
215 
216 struct if_idxmap {
217 	unsigned int		 serial;
218 	unsigned int		 count;
219 	struct srp		 map;
220 };
221 
222 void	if_idxmap_init(unsigned int);
223 void	if_idxmap_insert(struct ifnet *);
224 void	if_idxmap_remove(struct ifnet *);
225 
226 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
227 
228 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
229 int if_cloners_count;
230 
231 struct rwlock if_cloners_lock = RWLOCK_INITIALIZER("clonerlock");
232 
233 /* hooks should only be added, deleted, and run from a process context */
234 struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE);
235 void	if_hooks_run(struct task_list *);
236 
237 int	ifq_congestion;
238 
239 int		 netisr;
240 
241 #define	NET_TASKQ	1
242 struct taskq	*nettqmp[NET_TASKQ];
243 
244 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
245 
246 /*
247  * Serialize socket operations to ensure no new sleeping points
248  * are introduced in IP output paths.
249  */
250 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
251 
252 /*
253  * Network interface utility routines.
254  */
255 void
256 ifinit(void)
257 {
258 	unsigned int	i;
259 
260 	/*
261 	 * most machines boot with 4 or 5 interfaces, so size the initial map
262 	 * to accomodate this
263 	 */
264 	if_idxmap_init(8);
265 
266 	for (i = 0; i < NET_TASKQ; i++) {
267 		nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
268 		if (nettqmp[i] == NULL)
269 			panic("unable to create network taskq %d", i);
270 	}
271 }
272 
273 static struct if_idxmap if_idxmap = {
274 	0,
275 	0,
276 	SRP_INITIALIZER()
277 };
278 
279 struct srp_gc if_ifp_gc = SRP_GC_INITIALIZER(if_ifp_dtor, NULL);
280 struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
281 
282 struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
283 
284 void
285 if_idxmap_init(unsigned int limit)
286 {
287 	struct if_map *if_map;
288 	struct srp *map;
289 	unsigned int i;
290 
291 	if_idxmap.serial = 1; /* skip ifidx 0 so it can return NULL */
292 
293 	if_map = malloc(sizeof(*if_map) + limit * sizeof(*map),
294 	    M_IFADDR, M_WAITOK);
295 
296 	if_map->limit = limit;
297 	map = (struct srp *)(if_map + 1);
298 	for (i = 0; i < limit; i++)
299 		srp_init(&map[i]);
300 
301 	/* this is called early so there's nothing to race with */
302 	srp_update_locked(&if_map_gc, &if_idxmap.map, if_map);
303 }
304 
305 void
306 if_idxmap_insert(struct ifnet *ifp)
307 {
308 	struct if_map *if_map;
309 	struct srp *map;
310 	unsigned int index, i;
311 
312 	refcnt_init(&ifp->if_refcnt);
313 
314 	/* the kernel lock guarantees serialised modifications to if_idxmap */
315 	KERNEL_ASSERT_LOCKED();
316 
317 	if (++if_idxmap.count > USHRT_MAX)
318 		panic("too many interfaces");
319 
320 	if_map = srp_get_locked(&if_idxmap.map);
321 	map = (struct srp *)(if_map + 1);
322 
323 	index = if_idxmap.serial++ & USHRT_MAX;
324 
325 	if (index >= if_map->limit) {
326 		struct if_map *nif_map;
327 		struct srp *nmap;
328 		unsigned int nlimit;
329 		struct ifnet *nifp;
330 
331 		nlimit = if_map->limit * 2;
332 		nif_map = malloc(sizeof(*nif_map) + nlimit * sizeof(*nmap),
333 		    M_IFADDR, M_WAITOK);
334 		nmap = (struct srp *)(nif_map + 1);
335 
336 		nif_map->limit = nlimit;
337 		for (i = 0; i < if_map->limit; i++) {
338 			srp_init(&nmap[i]);
339 			nifp = srp_get_locked(&map[i]);
340 			if (nifp != NULL) {
341 				srp_update_locked(&if_ifp_gc, &nmap[i],
342 				    if_ref(nifp));
343 			}
344 		}
345 
346 		while (i < nlimit) {
347 			srp_init(&nmap[i]);
348 			i++;
349 		}
350 
351 		srp_update_locked(&if_map_gc, &if_idxmap.map, nif_map);
352 		if_map = nif_map;
353 		map = nmap;
354 	}
355 
356 	/* pick the next free index */
357 	for (i = 0; i < USHRT_MAX; i++) {
358 		if (index != 0 && srp_get_locked(&map[index]) == NULL)
359 			break;
360 
361 		index = if_idxmap.serial++ & USHRT_MAX;
362 	}
363 
364 	/* commit */
365 	ifp->if_index = index;
366 	srp_update_locked(&if_ifp_gc, &map[index], if_ref(ifp));
367 }
368 
369 void
370 if_idxmap_remove(struct ifnet *ifp)
371 {
372 	struct if_map *if_map;
373 	struct srp *map;
374 	unsigned int index;
375 
376 	index = ifp->if_index;
377 
378 	/* the kernel lock guarantees serialised modifications to if_idxmap */
379 	KERNEL_ASSERT_LOCKED();
380 
381 	if_map = srp_get_locked(&if_idxmap.map);
382 	KASSERT(index < if_map->limit);
383 
384 	map = (struct srp *)(if_map + 1);
385 	KASSERT(ifp == (struct ifnet *)srp_get_locked(&map[index]));
386 
387 	srp_update_locked(&if_ifp_gc, &map[index], NULL);
388 	if_idxmap.count--;
389 	/* end of if_idxmap modifications */
390 
391 	/* sleep until the last reference is released */
392 	refcnt_finalize(&ifp->if_refcnt, "ifidxrm");
393 }
394 
395 void
396 if_ifp_dtor(void *null, void *ifp)
397 {
398 	if_put(ifp);
399 }
400 
401 void
402 if_map_dtor(void *null, void *m)
403 {
404 	struct if_map *if_map = m;
405 	struct srp *map = (struct srp *)(if_map + 1);
406 	unsigned int i;
407 
408 	/*
409 	 * dont need to serialize the use of update_locked since this is
410 	 * the last reference to this map. there's nothing to race against.
411 	 */
412 	for (i = 0; i < if_map->limit; i++)
413 		srp_update_locked(&if_ifp_gc, &map[i], NULL);
414 
415 	free(if_map, M_IFADDR, sizeof(*if_map) + if_map->limit * sizeof(*map));
416 }
417 
418 /*
419  * Attach an interface to the
420  * list of "active" interfaces.
421  */
422 void
423 if_attachsetup(struct ifnet *ifp)
424 {
425 	unsigned long ifidx;
426 
427 	NET_ASSERT_LOCKED();
428 
429 	TAILQ_INIT(&ifp->if_groups);
430 
431 	if_addgroup(ifp, IFG_ALL);
432 
433 	if_attachdomain(ifp);
434 #if NPF > 0
435 	pfi_attach_ifnet(ifp);
436 #endif
437 
438 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
439 	if_slowtimo(ifp);
440 
441 	if_idxmap_insert(ifp);
442 	KASSERT(if_get(0) == NULL);
443 
444 	ifidx = ifp->if_index;
445 
446 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
447 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
448 
449 	/* Announce the interface. */
450 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
451 }
452 
453 /*
454  * Allocate the link level name for the specified interface.  This
455  * is an attachment helper.  It must be called after ifp->if_addrlen
456  * is initialized, which may not be the case when if_attach() is
457  * called.
458  */
459 void
460 if_alloc_sadl(struct ifnet *ifp)
461 {
462 	unsigned int socksize;
463 	int namelen, masklen;
464 	struct sockaddr_dl *sdl;
465 
466 	/*
467 	 * If the interface already has a link name, release it
468 	 * now.  This is useful for interfaces that can change
469 	 * link types, and thus switch link names often.
470 	 */
471 	if_free_sadl(ifp);
472 
473 	namelen = strlen(ifp->if_xname);
474 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
475 	socksize = masklen + ifp->if_addrlen;
476 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
477 	if (socksize < sizeof(*sdl))
478 		socksize = sizeof(*sdl);
479 	socksize = ROUNDUP(socksize);
480 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
481 	sdl->sdl_len = socksize;
482 	sdl->sdl_family = AF_LINK;
483 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
484 	sdl->sdl_nlen = namelen;
485 	sdl->sdl_alen = ifp->if_addrlen;
486 	sdl->sdl_index = ifp->if_index;
487 	sdl->sdl_type = ifp->if_type;
488 	ifp->if_sadl = sdl;
489 }
490 
491 /*
492  * Free the link level name for the specified interface.  This is
493  * a detach helper.  This is called from if_detach() or from
494  * link layer type specific detach functions.
495  */
496 void
497 if_free_sadl(struct ifnet *ifp)
498 {
499 	if (ifp->if_sadl == NULL)
500 		return;
501 
502 	free(ifp->if_sadl, M_IFADDR, ifp->if_sadl->sdl_len);
503 	ifp->if_sadl = NULL;
504 }
505 
506 void
507 if_attachdomain(struct ifnet *ifp)
508 {
509 	struct domain *dp;
510 	int i, s;
511 
512 	s = splnet();
513 
514 	/* address family dependent data region */
515 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
516 	for (i = 0; (dp = domains[i]) != NULL; i++) {
517 		if (dp->dom_ifattach)
518 			ifp->if_afdata[dp->dom_family] =
519 			    (*dp->dom_ifattach)(ifp);
520 	}
521 
522 	splx(s);
523 }
524 
525 void
526 if_attachhead(struct ifnet *ifp)
527 {
528 	if_attach_common(ifp);
529 	NET_LOCK();
530 	TAILQ_INSERT_HEAD(&ifnet, ifp, if_list);
531 	if_attachsetup(ifp);
532 	NET_UNLOCK();
533 }
534 
535 void
536 if_attach(struct ifnet *ifp)
537 {
538 	if_attach_common(ifp);
539 	NET_LOCK();
540 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
541 	if_attachsetup(ifp);
542 	NET_UNLOCK();
543 }
544 
545 void
546 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
547 {
548 	struct ifqueue **map;
549 	struct ifqueue *ifq;
550 	int i;
551 
552 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
553 	KASSERT(nqs != 0);
554 
555 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
556 
557 	ifp->if_snd.ifq_softc = NULL;
558 	map[0] = &ifp->if_snd;
559 
560 	for (i = 1; i < nqs; i++) {
561 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
562 		ifq_set_maxlen(ifq, ifp->if_snd.ifq_maxlen);
563 		ifq_init(ifq, ifp, i);
564 		map[i] = ifq;
565 	}
566 
567 	ifp->if_ifqs = map;
568 	ifp->if_nifqs = nqs;
569 }
570 
571 void
572 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
573 {
574 	struct ifiqueue **map;
575 	struct ifiqueue *ifiq;
576 	unsigned int i;
577 
578 	KASSERT(niqs != 0);
579 
580 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
581 
582 	ifp->if_rcv.ifiq_softc = NULL;
583 	map[0] = &ifp->if_rcv;
584 
585 	for (i = 1; i < niqs; i++) {
586 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
587 		ifiq_init(ifiq, ifp, i);
588 		map[i] = ifiq;
589 	}
590 
591 	ifp->if_iqs = map;
592 	ifp->if_niqs = niqs;
593 }
594 
595 void
596 if_attach_common(struct ifnet *ifp)
597 {
598 	KASSERT(ifp->if_ioctl != NULL);
599 
600 	TAILQ_INIT(&ifp->if_addrlist);
601 	TAILQ_INIT(&ifp->if_maddrlist);
602 
603 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
604 		KASSERTMSG(ifp->if_qstart == NULL,
605 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
606 		ifp->if_qstart = if_qstart_compat;
607 	} else {
608 		KASSERTMSG(ifp->if_start == NULL,
609 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
610 		KASSERTMSG(ifp->if_qstart != NULL,
611 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
612 	}
613 
614 	ifq_init(&ifp->if_snd, ifp, 0);
615 
616 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
617 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
618 	ifp->if_nifqs = 1;
619 	if (ifp->if_txmit == 0)
620 		ifp->if_txmit = IF_TXMIT_DEFAULT;
621 
622 	ifiq_init(&ifp->if_rcv, ifp, 0);
623 
624 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
625 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
626 	ifp->if_niqs = 1;
627 
628 	TAILQ_INIT(&ifp->if_addrhooks);
629 	TAILQ_INIT(&ifp->if_linkstatehooks);
630 	TAILQ_INIT(&ifp->if_detachhooks);
631 
632 	if (ifp->if_rtrequest == NULL)
633 		ifp->if_rtrequest = if_rtrequest_dummy;
634 	if (ifp->if_enqueue == NULL)
635 		ifp->if_enqueue = if_enqueue_ifq;
636 	ifp->if_llprio = IFQ_DEFPRIO;
637 }
638 
639 void
640 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
641 {
642 	/*
643 	 * only switch the ifq_ops on the first ifq on an interface.
644 	 *
645 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
646 	 * works on a single ifq. because the code uses the ifq_ops
647 	 * on the first ifq (if_snd) to select a queue for an mbuf,
648 	 * by switching only the first one we change both the algorithm
649 	 * and force the routing of all new packets to it.
650 	 */
651 	ifq_attach(&ifp->if_snd, newops, args);
652 }
653 
654 void
655 if_start(struct ifnet *ifp)
656 {
657 	KASSERT(ifp->if_qstart == if_qstart_compat);
658 	if_qstart_compat(&ifp->if_snd);
659 }
660 void
661 if_qstart_compat(struct ifqueue *ifq)
662 {
663 	struct ifnet *ifp = ifq->ifq_if;
664 	int s;
665 
666 	/*
667 	 * the stack assumes that an interface can have multiple
668 	 * transmit rings, but a lot of drivers are still written
669 	 * so that interfaces and send rings have a 1:1 mapping.
670 	 * this provides compatability between the stack and the older
671 	 * drivers by translating from the only queue they have
672 	 * (ifp->if_snd) back to the interface and calling if_start.
673 	 */
674 
675 	KERNEL_LOCK();
676 	s = splnet();
677 	(*ifp->if_start)(ifp);
678 	splx(s);
679 	KERNEL_UNLOCK();
680 }
681 
682 int
683 if_enqueue(struct ifnet *ifp, struct mbuf *m)
684 {
685 #if NPF > 0
686 	if (m->m_pkthdr.pf.delay > 0)
687 		return (pf_delay_pkt(m, ifp->if_index));
688 #endif
689 
690 #if NBRIDGE > 0
691 	if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
692 		int error;
693 
694 		error = bridge_enqueue(ifp, m);
695 		return (error);
696 	}
697 #endif
698 
699 #if NPF > 0
700 	pf_pkt_addr_changed(m);
701 #endif	/* NPF > 0 */
702 
703 	return ((*ifp->if_enqueue)(ifp, m));
704 }
705 
706 int
707 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
708 {
709 	struct ifqueue *ifq = &ifp->if_snd;
710 	int error;
711 
712 	if (ifp->if_nifqs > 1) {
713 		unsigned int idx;
714 
715 		/*
716 		 * use the operations on the first ifq to pick which of
717 		 * the array gets this mbuf.
718 		 */
719 
720 		idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
721 		ifq = ifp->if_ifqs[idx];
722 	}
723 
724 	error = ifq_enqueue(ifq, m);
725 	if (error)
726 		return (error);
727 
728 	ifq_start(ifq);
729 
730 	return (0);
731 }
732 
733 void
734 if_input(struct ifnet *ifp, struct mbuf_list *ml)
735 {
736 	ifiq_input(&ifp->if_rcv, ml);
737 }
738 
739 int
740 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
741 {
742 #if NBPFILTER > 0
743 	/*
744 	 * Only send packets to bpf if they are destinated to local
745 	 * addresses.
746 	 *
747 	 * if_input_local() is also called for SIMPLEX interfaces to
748 	 * duplicate packets for local use.  But don't dup them to bpf.
749 	 */
750 	if (ifp->if_flags & IFF_LOOPBACK) {
751 		caddr_t if_bpf = ifp->if_bpf;
752 
753 		if (if_bpf)
754 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
755 	}
756 #endif
757 	m_resethdr(m);
758 	m->m_flags |= M_LOOP;
759 	m->m_pkthdr.ph_ifidx = ifp->if_index;
760 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
761 
762 	ifp->if_opackets++;
763 	ifp->if_obytes += m->m_pkthdr.len;
764 
765 	ifp->if_ipackets++;
766 	ifp->if_ibytes += m->m_pkthdr.len;
767 
768 	switch (af) {
769 	case AF_INET:
770 		ipv4_input(ifp, m);
771 		break;
772 #ifdef INET6
773 	case AF_INET6:
774 		ipv6_input(ifp, m);
775 		break;
776 #endif /* INET6 */
777 #ifdef MPLS
778 	case AF_MPLS:
779 		mpls_input(ifp, m);
780 		break;
781 #endif /* MPLS */
782 	default:
783 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
784 		m_freem(m);
785 		return (EAFNOSUPPORT);
786 	}
787 
788 	return (0);
789 }
790 
791 int
792 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
793 {
794 	struct ifiqueue *ifiq;
795 	unsigned int flow = 0;
796 
797 	m->m_pkthdr.ph_family = af;
798 	m->m_pkthdr.ph_ifidx = ifp->if_index;
799 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
800 
801 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
802 		flow = m->m_pkthdr.ph_flowid;
803 
804 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
805 
806 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
807 }
808 
809 void
810 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
811 {
812 	struct mbuf *m;
813 
814 	if (ml_empty(ml))
815 		return;
816 
817 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
818 		enqueue_randomness(ml_len(ml) ^ (uintptr_t)MBUF_LIST_FIRST(ml));
819 
820 	/*
821 	 * We grab the NET_LOCK() before processing any packet to
822 	 * ensure there's no contention on the routing table lock.
823 	 *
824 	 * Without it we could race with a userland thread to insert
825 	 * a L2 entry in ip{6,}_output().  Such race would result in
826 	 * one of the threads sleeping *inside* the IP output path.
827 	 *
828 	 * Since we have a NET_LOCK() we also use it to serialize access
829 	 * to PF globals, pipex globals, unicast and multicast addresses
830 	 * lists and the socket layer.
831 	 */
832 	NET_LOCK();
833 	while ((m = ml_dequeue(ml)) != NULL)
834 		(*ifp->if_input)(ifp, m);
835 	NET_UNLOCK();
836 }
837 
838 void
839 if_vinput(struct ifnet *ifp, struct mbuf *m)
840 {
841 #if NBPFILTER > 0
842 	caddr_t if_bpf;
843 #endif
844 
845 	m->m_pkthdr.ph_ifidx = ifp->if_index;
846 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
847 
848 	counters_pkt(ifp->if_counters,
849 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
850 
851 #if NBPFILTER > 0
852 	if_bpf = ifp->if_bpf;
853 	if (if_bpf) {
854 		if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN)) {
855 			m_freem(m);
856 			return;
857 		}
858 	}
859 #endif
860 
861 	(*ifp->if_input)(ifp, m);
862 }
863 
864 void
865 if_netisr(void *unused)
866 {
867 	int n, t = 0;
868 
869 	NET_LOCK();
870 
871 	while ((n = netisr) != 0) {
872 		/* Like sched_pause() but with a rwlock dance. */
873 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
874 			NET_UNLOCK();
875 			yield();
876 			NET_LOCK();
877 		}
878 
879 		atomic_clearbits_int(&netisr, n);
880 
881 #if NETHER > 0
882 		if (n & (1 << NETISR_ARP)) {
883 			KERNEL_LOCK();
884 			arpintr();
885 			KERNEL_UNLOCK();
886 		}
887 #endif
888 #if NPPP > 0
889 		if (n & (1 << NETISR_PPP)) {
890 			KERNEL_LOCK();
891 			pppintr();
892 			KERNEL_UNLOCK();
893 		}
894 #endif
895 #if NBRIDGE > 0
896 		if (n & (1 << NETISR_BRIDGE))
897 			bridgeintr();
898 #endif
899 #if NSWITCH > 0
900 		if (n & (1 << NETISR_SWITCH)) {
901 			KERNEL_LOCK();
902 			switchintr();
903 			KERNEL_UNLOCK();
904 		}
905 #endif
906 #if NPPPOE > 0
907 		if (n & (1 << NETISR_PPPOE)) {
908 			KERNEL_LOCK();
909 			pppoeintr();
910 			KERNEL_UNLOCK();
911 		}
912 #endif
913 		t |= n;
914 	}
915 
916 #if NPFSYNC > 0
917 	if (t & (1 << NETISR_PFSYNC)) {
918 		KERNEL_LOCK();
919 		pfsyncintr();
920 		KERNEL_UNLOCK();
921 	}
922 #endif
923 
924 	NET_UNLOCK();
925 }
926 
927 void
928 if_hooks_run(struct task_list *hooks)
929 {
930 	struct task *t, *nt;
931 	struct task cursor = { .t_func = NULL };
932 	void (*func)(void *);
933 	void *arg;
934 
935 	mtx_enter(&if_hooks_mtx);
936 	for (t = TAILQ_FIRST(hooks); t != NULL; t = nt) {
937 		if (t->t_func == NULL) { /* skip cursors */
938 			nt = TAILQ_NEXT(t, t_entry);
939 			continue;
940 		}
941 		func = t->t_func;
942 		arg = t->t_arg;
943 
944 		TAILQ_INSERT_AFTER(hooks, t, &cursor, t_entry);
945 		mtx_leave(&if_hooks_mtx);
946 
947 		(*func)(arg);
948 
949 		mtx_enter(&if_hooks_mtx);
950 		nt = TAILQ_NEXT(&cursor, t_entry); /* avoid _Q_INVALIDATE */
951 		TAILQ_REMOVE(hooks, &cursor, t_entry);
952 	}
953 	mtx_leave(&if_hooks_mtx);
954 }
955 
956 void
957 if_deactivate(struct ifnet *ifp)
958 {
959 	/*
960 	 * Call detach hooks from head to tail.  To make sure detach
961 	 * hooks are executed in the reverse order they were added, all
962 	 * the hooks have to be added to the head!
963 	 */
964 
965 	NET_LOCK();
966 	if_hooks_run(&ifp->if_detachhooks);
967 	NET_UNLOCK();
968 }
969 
970 void
971 if_detachhook_add(struct ifnet *ifp, struct task *t)
972 {
973 	mtx_enter(&if_hooks_mtx);
974 	TAILQ_INSERT_HEAD(&ifp->if_detachhooks, t, t_entry);
975 	mtx_leave(&if_hooks_mtx);
976 }
977 
978 void
979 if_detachhook_del(struct ifnet *ifp, struct task *t)
980 {
981 	mtx_enter(&if_hooks_mtx);
982 	TAILQ_REMOVE(&ifp->if_detachhooks, t, t_entry);
983 	mtx_leave(&if_hooks_mtx);
984 }
985 
986 /*
987  * Detach an interface from everything in the kernel.  Also deallocate
988  * private resources.
989  */
990 void
991 if_detach(struct ifnet *ifp)
992 {
993 	struct ifaddr *ifa;
994 	struct ifg_list *ifg;
995 	struct domain *dp;
996 	int i, s;
997 
998 	/* Undo pseudo-driver changes. */
999 	if_deactivate(ifp);
1000 
1001 	ifq_clr_oactive(&ifp->if_snd);
1002 
1003 	/* Other CPUs must not have a reference before we start destroying. */
1004 	if_idxmap_remove(ifp);
1005 
1006 #if NBPFILTER > 0
1007 	bpfdetach(ifp);
1008 #endif
1009 
1010 	NET_LOCK();
1011 	s = splnet();
1012 	ifp->if_qstart = if_detached_qstart;
1013 	ifp->if_ioctl = if_detached_ioctl;
1014 	ifp->if_watchdog = NULL;
1015 
1016 	/* Remove the watchdog timeout & task */
1017 	timeout_del(&ifp->if_slowtimo);
1018 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1019 
1020 	/* Remove the link state task */
1021 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1022 
1023 	rti_delete(ifp);
1024 #if NETHER > 0 && defined(NFSCLIENT)
1025 	if (ifp->if_index == revarp_ifidx)
1026 		revarp_ifidx = 0;
1027 #endif
1028 #ifdef MROUTING
1029 	vif_delete(ifp);
1030 #endif
1031 	in_ifdetach(ifp);
1032 #ifdef INET6
1033 	in6_ifdetach(ifp);
1034 #endif
1035 #if NPF > 0
1036 	pfi_detach_ifnet(ifp);
1037 #endif
1038 
1039 	/* Remove the interface from the list of all interfaces.  */
1040 	TAILQ_REMOVE(&ifnet, ifp, if_list);
1041 
1042 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1043 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1044 
1045 	if_free_sadl(ifp);
1046 
1047 	/* We should not have any address left at this point. */
1048 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1049 #ifdef DIAGNOSTIC
1050 		printf("%s: address list non empty\n", ifp->if_xname);
1051 #endif
1052 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1053 			ifa_del(ifp, ifa);
1054 			ifa->ifa_ifp = NULL;
1055 			ifafree(ifa);
1056 		}
1057 	}
1058 
1059 	KASSERT(TAILQ_EMPTY(&ifp->if_addrhooks));
1060 	KASSERT(TAILQ_EMPTY(&ifp->if_linkstatehooks));
1061 	KASSERT(TAILQ_EMPTY(&ifp->if_detachhooks));
1062 
1063 	for (i = 0; (dp = domains[i]) != NULL; i++) {
1064 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
1065 			(*dp->dom_ifdetach)(ifp,
1066 			    ifp->if_afdata[dp->dom_family]);
1067 	}
1068 
1069 	/* Announce that the interface is gone. */
1070 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1071 	splx(s);
1072 	NET_UNLOCK();
1073 
1074 	if (ifp->if_counters != NULL)
1075 		if_counters_free(ifp);
1076 
1077 	for (i = 0; i < ifp->if_nifqs; i++)
1078 		ifq_destroy(ifp->if_ifqs[i]);
1079 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1080 		for (i = 1; i < ifp->if_nifqs; i++) {
1081 			free(ifp->if_ifqs[i], M_DEVBUF,
1082 			    sizeof(struct ifqueue));
1083 		}
1084 		free(ifp->if_ifqs, M_DEVBUF,
1085 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1086 	}
1087 
1088 	for (i = 0; i < ifp->if_niqs; i++)
1089 		ifiq_destroy(ifp->if_iqs[i]);
1090 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1091 		for (i = 1; i < ifp->if_niqs; i++) {
1092 			free(ifp->if_iqs[i], M_DEVBUF,
1093 			    sizeof(struct ifiqueue));
1094 		}
1095 		free(ifp->if_iqs, M_DEVBUF,
1096 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1097 	}
1098 }
1099 
1100 /*
1101  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1102  */
1103 int
1104 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1105 {
1106 	struct ifnet *ifp;
1107 	int connected = 0;
1108 
1109 	ifp = if_get(ifidx);
1110 	if (ifp == NULL)
1111 		return (0);
1112 
1113 	if (ifp0->if_index == ifp->if_index)
1114 		connected = 1;
1115 
1116 #if NBRIDGE > 0
1117 	if (ifp0->if_bridgeidx != 0 && ifp0->if_bridgeidx == ifp->if_bridgeidx)
1118 		connected = 1;
1119 #endif
1120 #if NCARP > 0
1121 	if ((ifp0->if_type == IFT_CARP &&
1122 	    ifp0->if_carpdevidx == ifp->if_index) ||
1123 	    (ifp->if_type == IFT_CARP && ifp->if_carpdevidx == ifp0->if_index))
1124 		connected = 1;
1125 #endif
1126 
1127 	if_put(ifp);
1128 	return (connected);
1129 }
1130 
1131 /*
1132  * Create a clone network interface.
1133  */
1134 int
1135 if_clone_create(const char *name, int rdomain)
1136 {
1137 	struct if_clone *ifc;
1138 	struct ifnet *ifp;
1139 	int unit, ret;
1140 
1141 	ifc = if_clone_lookup(name, &unit);
1142 	if (ifc == NULL)
1143 		return (EINVAL);
1144 
1145 	rw_enter_write(&if_cloners_lock);
1146 
1147 	if (ifunit(name) != NULL) {
1148 		ret = EEXIST;
1149 		goto unlock;
1150 	}
1151 
1152 	ret = (*ifc->ifc_create)(ifc, unit);
1153 
1154 	if (ret != 0 || (ifp = ifunit(name)) == NULL)
1155 		goto unlock;
1156 
1157 	NET_LOCK();
1158 	if_addgroup(ifp, ifc->ifc_name);
1159 	if (rdomain != 0)
1160 		if_setrdomain(ifp, rdomain);
1161 	NET_UNLOCK();
1162 unlock:
1163 	rw_exit_write(&if_cloners_lock);
1164 
1165 	return (ret);
1166 }
1167 
1168 /*
1169  * Destroy a clone network interface.
1170  */
1171 int
1172 if_clone_destroy(const char *name)
1173 {
1174 	struct if_clone *ifc;
1175 	struct ifnet *ifp;
1176 	int ret;
1177 
1178 	ifc = if_clone_lookup(name, NULL);
1179 	if (ifc == NULL)
1180 		return (EINVAL);
1181 
1182 	if (ifc->ifc_destroy == NULL)
1183 		return (EOPNOTSUPP);
1184 
1185 	rw_enter_write(&if_cloners_lock);
1186 
1187 	ifp = ifunit(name);
1188 	if (ifp == NULL) {
1189 		rw_exit_write(&if_cloners_lock);
1190 		return (ENXIO);
1191 	}
1192 
1193 	NET_LOCK();
1194 	if (ifp->if_flags & IFF_UP) {
1195 		int s;
1196 		s = splnet();
1197 		if_down(ifp);
1198 		splx(s);
1199 	}
1200 	NET_UNLOCK();
1201 	ret = (*ifc->ifc_destroy)(ifp);
1202 
1203 	rw_exit_write(&if_cloners_lock);
1204 
1205 	return (ret);
1206 }
1207 
1208 /*
1209  * Look up a network interface cloner.
1210  */
1211 struct if_clone *
1212 if_clone_lookup(const char *name, int *unitp)
1213 {
1214 	struct if_clone *ifc;
1215 	const char *cp;
1216 	int unit;
1217 
1218 	/* separate interface name from unit */
1219 	for (cp = name;
1220 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1221 	    cp++)
1222 		continue;
1223 
1224 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1225 		return (NULL);	/* No name or unit number */
1226 
1227 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1228 		return (NULL);	/* unit number 0 padded */
1229 
1230 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1231 		if (strlen(ifc->ifc_name) == cp - name &&
1232 		    !strncmp(name, ifc->ifc_name, cp - name))
1233 			break;
1234 	}
1235 
1236 	if (ifc == NULL)
1237 		return (NULL);
1238 
1239 	unit = 0;
1240 	while (cp - name < IFNAMSIZ && *cp) {
1241 		if (*cp < '0' || *cp > '9' ||
1242 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1243 			/* Bogus unit number. */
1244 			return (NULL);
1245 		}
1246 		unit = (unit * 10) + (*cp++ - '0');
1247 	}
1248 
1249 	if (unitp != NULL)
1250 		*unitp = unit;
1251 	return (ifc);
1252 }
1253 
1254 /*
1255  * Register a network interface cloner.
1256  */
1257 void
1258 if_clone_attach(struct if_clone *ifc)
1259 {
1260 	/*
1261 	 * we are called at kernel boot by main(), when pseudo devices are
1262 	 * being attached. The main() is the only guy which may alter the
1263 	 * if_cloners. While system is running and main() is done with
1264 	 * initialization, the if_cloners becomes immutable.
1265 	 */
1266 	KASSERT(pdevinit_done == 0);
1267 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1268 	if_cloners_count++;
1269 }
1270 
1271 /*
1272  * Provide list of interface cloners to userspace.
1273  */
1274 int
1275 if_clone_list(struct if_clonereq *ifcr)
1276 {
1277 	char outbuf[IFNAMSIZ], *dst;
1278 	struct if_clone *ifc;
1279 	int count, error = 0;
1280 
1281 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1282 		/* Just asking how many there are. */
1283 		ifcr->ifcr_total = if_cloners_count;
1284 		return (0);
1285 	}
1286 
1287 	if (ifcr->ifcr_count < 0)
1288 		return (EINVAL);
1289 
1290 	ifcr->ifcr_total = if_cloners_count;
1291 	count = MIN(if_cloners_count, ifcr->ifcr_count);
1292 
1293 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1294 		if (count == 0)
1295 			break;
1296 		bzero(outbuf, sizeof outbuf);
1297 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1298 		error = copyout(outbuf, dst, IFNAMSIZ);
1299 		if (error)
1300 			break;
1301 		count--;
1302 		dst += IFNAMSIZ;
1303 	}
1304 
1305 	return (error);
1306 }
1307 
1308 /*
1309  * set queue congestion marker
1310  */
1311 void
1312 if_congestion(void)
1313 {
1314 	extern int ticks;
1315 
1316 	ifq_congestion = ticks;
1317 }
1318 
1319 int
1320 if_congested(void)
1321 {
1322 	extern int ticks;
1323 	int diff;
1324 
1325 	diff = ticks - ifq_congestion;
1326 	if (diff < 0) {
1327 		ifq_congestion = ticks - hz;
1328 		return (0);
1329 	}
1330 
1331 	return (diff <= (hz / 100));
1332 }
1333 
1334 #define	equal(a1, a2)	\
1335 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1336 	(a1)->sa_len) == 0)
1337 
1338 /*
1339  * Locate an interface based on a complete address.
1340  */
1341 struct ifaddr *
1342 ifa_ifwithaddr(struct sockaddr *addr, u_int rtableid)
1343 {
1344 	struct ifnet *ifp;
1345 	struct ifaddr *ifa;
1346 	u_int rdomain;
1347 
1348 	rdomain = rtable_l2(rtableid);
1349 	KERNEL_LOCK();
1350 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1351 		if (ifp->if_rdomain != rdomain)
1352 			continue;
1353 
1354 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1355 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1356 				continue;
1357 
1358 			if (equal(addr, ifa->ifa_addr)) {
1359 				KERNEL_UNLOCK();
1360 				return (ifa);
1361 			}
1362 		}
1363 	}
1364 	KERNEL_UNLOCK();
1365 	return (NULL);
1366 }
1367 
1368 /*
1369  * Locate the point to point interface with a given destination address.
1370  */
1371 struct ifaddr *
1372 ifa_ifwithdstaddr(struct sockaddr *addr, u_int rdomain)
1373 {
1374 	struct ifnet *ifp;
1375 	struct ifaddr *ifa;
1376 
1377 	rdomain = rtable_l2(rdomain);
1378 	KERNEL_LOCK();
1379 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1380 		if (ifp->if_rdomain != rdomain)
1381 			continue;
1382 		if (ifp->if_flags & IFF_POINTOPOINT) {
1383 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1384 				if (ifa->ifa_addr->sa_family !=
1385 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1386 					continue;
1387 				if (equal(addr, ifa->ifa_dstaddr)) {
1388 					KERNEL_UNLOCK();
1389 					return (ifa);
1390 				}
1391 			}
1392 		}
1393 	}
1394 	KERNEL_UNLOCK();
1395 	return (NULL);
1396 }
1397 
1398 /*
1399  * Find an interface address specific to an interface best matching
1400  * a given address.
1401  */
1402 struct ifaddr *
1403 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1404 {
1405 	struct ifaddr *ifa;
1406 	char *cp, *cp2, *cp3;
1407 	char *cplim;
1408 	struct ifaddr *ifa_maybe = NULL;
1409 	u_int af = addr->sa_family;
1410 
1411 	if (af >= AF_MAX)
1412 		return (NULL);
1413 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1414 		if (ifa->ifa_addr->sa_family != af)
1415 			continue;
1416 		if (ifa_maybe == NULL)
1417 			ifa_maybe = ifa;
1418 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1419 			if (equal(addr, ifa->ifa_addr) ||
1420 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1421 				return (ifa);
1422 			continue;
1423 		}
1424 		cp = addr->sa_data;
1425 		cp2 = ifa->ifa_addr->sa_data;
1426 		cp3 = ifa->ifa_netmask->sa_data;
1427 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1428 		for (; cp3 < cplim; cp3++)
1429 			if ((*cp++ ^ *cp2++) & *cp3)
1430 				break;
1431 		if (cp3 == cplim)
1432 			return (ifa);
1433 	}
1434 	return (ifa_maybe);
1435 }
1436 
1437 void
1438 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1439 {
1440 }
1441 
1442 /*
1443  * Default action when installing a local route on a point-to-point
1444  * interface.
1445  */
1446 void
1447 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1448 {
1449 	struct ifnet *lo0ifp;
1450 	struct ifaddr *ifa, *lo0ifa;
1451 
1452 	switch (req) {
1453 	case RTM_ADD:
1454 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1455 			break;
1456 
1457 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1458 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1459 			    rt_key(rt)->sa_len) == 0)
1460 				break;
1461 		}
1462 
1463 		if (ifa == NULL)
1464 			break;
1465 
1466 		KASSERT(ifa == rt->rt_ifa);
1467 
1468 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1469 		KASSERT(lo0ifp != NULL);
1470 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1471 			if (lo0ifa->ifa_addr->sa_family ==
1472 			    ifa->ifa_addr->sa_family)
1473 				break;
1474 		}
1475 		if_put(lo0ifp);
1476 
1477 		if (lo0ifa == NULL)
1478 			break;
1479 
1480 		rt->rt_flags &= ~RTF_LLINFO;
1481 		break;
1482 	case RTM_DELETE:
1483 	case RTM_RESOLVE:
1484 	default:
1485 		break;
1486 	}
1487 }
1488 
1489 
1490 /*
1491  * Bring down all interfaces
1492  */
1493 void
1494 if_downall(void)
1495 {
1496 	struct ifreq ifrq;	/* XXX only partly built */
1497 	struct ifnet *ifp;
1498 
1499 	NET_LOCK();
1500 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1501 		if ((ifp->if_flags & IFF_UP) == 0)
1502 			continue;
1503 		if_down(ifp);
1504 		ifrq.ifr_flags = ifp->if_flags;
1505 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1506 	}
1507 	NET_UNLOCK();
1508 }
1509 
1510 /*
1511  * Mark an interface down and notify protocols of
1512  * the transition.
1513  */
1514 void
1515 if_down(struct ifnet *ifp)
1516 {
1517 	NET_ASSERT_LOCKED();
1518 
1519 	ifp->if_flags &= ~IFF_UP;
1520 	getmicrotime(&ifp->if_lastchange);
1521 	ifq_purge(&ifp->if_snd);
1522 
1523 	if_linkstate(ifp);
1524 }
1525 
1526 /*
1527  * Mark an interface up and notify protocols of
1528  * the transition.
1529  */
1530 void
1531 if_up(struct ifnet *ifp)
1532 {
1533 	NET_ASSERT_LOCKED();
1534 
1535 	ifp->if_flags |= IFF_UP;
1536 	getmicrotime(&ifp->if_lastchange);
1537 
1538 #ifdef INET6
1539 	/* Userland expects the kernel to set ::1 on default lo(4). */
1540 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1541 		in6_ifattach(ifp);
1542 #endif
1543 
1544 	if_linkstate(ifp);
1545 }
1546 
1547 /*
1548  * Notify userland, the routing table and hooks owner of
1549  * a link-state transition.
1550  */
1551 void
1552 if_linkstate_task(void *xifidx)
1553 {
1554 	unsigned int ifidx = (unsigned long)xifidx;
1555 	struct ifnet *ifp;
1556 
1557 	KERNEL_LOCK();
1558 	NET_LOCK();
1559 
1560 	ifp = if_get(ifidx);
1561 	if (ifp != NULL)
1562 		if_linkstate(ifp);
1563 	if_put(ifp);
1564 
1565 	NET_UNLOCK();
1566 	KERNEL_UNLOCK();
1567 }
1568 
1569 void
1570 if_linkstate(struct ifnet *ifp)
1571 {
1572 	NET_ASSERT_LOCKED();
1573 
1574 	rtm_ifchg(ifp);
1575 	rt_if_track(ifp);
1576 
1577 	if_hooks_run(&ifp->if_linkstatehooks);
1578 }
1579 
1580 void
1581 if_linkstatehook_add(struct ifnet *ifp, struct task *t)
1582 {
1583 	mtx_enter(&if_hooks_mtx);
1584 	TAILQ_INSERT_HEAD(&ifp->if_linkstatehooks, t, t_entry);
1585 	mtx_leave(&if_hooks_mtx);
1586 }
1587 
1588 void
1589 if_linkstatehook_del(struct ifnet *ifp, struct task *t)
1590 {
1591 	mtx_enter(&if_hooks_mtx);
1592 	TAILQ_REMOVE(&ifp->if_linkstatehooks, t, t_entry);
1593 	mtx_leave(&if_hooks_mtx);
1594 }
1595 
1596 /*
1597  * Schedule a link state change task.
1598  */
1599 void
1600 if_link_state_change(struct ifnet *ifp)
1601 {
1602 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1603 }
1604 
1605 /*
1606  * Handle interface watchdog timer routine.  Called
1607  * from softclock, we decrement timer (if set) and
1608  * call the appropriate interface routine on expiration.
1609  */
1610 void
1611 if_slowtimo(void *arg)
1612 {
1613 	struct ifnet *ifp = arg;
1614 	int s = splnet();
1615 
1616 	if (ifp->if_watchdog) {
1617 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1618 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1619 		timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO);
1620 	}
1621 	splx(s);
1622 }
1623 
1624 void
1625 if_watchdog_task(void *xifidx)
1626 {
1627 	unsigned int ifidx = (unsigned long)xifidx;
1628 	struct ifnet *ifp;
1629 	int s;
1630 
1631 	ifp = if_get(ifidx);
1632 	if (ifp == NULL)
1633 		return;
1634 
1635 	KERNEL_LOCK();
1636 	s = splnet();
1637 	if (ifp->if_watchdog)
1638 		(*ifp->if_watchdog)(ifp);
1639 	splx(s);
1640 	KERNEL_UNLOCK();
1641 
1642 	if_put(ifp);
1643 }
1644 
1645 /*
1646  * Map interface name to interface structure pointer.
1647  */
1648 struct ifnet *
1649 ifunit(const char *name)
1650 {
1651 	struct ifnet *ifp;
1652 
1653 	KERNEL_ASSERT_LOCKED();
1654 
1655 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1656 		if (strcmp(ifp->if_xname, name) == 0)
1657 			return (ifp);
1658 	}
1659 	return (NULL);
1660 }
1661 
1662 /*
1663  * Map interface index to interface structure pointer.
1664  */
1665 struct ifnet *
1666 if_get(unsigned int index)
1667 {
1668 	struct srp_ref sr;
1669 	struct if_map *if_map;
1670 	struct srp *map;
1671 	struct ifnet *ifp = NULL;
1672 
1673 	if_map = srp_enter(&sr, &if_idxmap.map);
1674 	if (index < if_map->limit) {
1675 		map = (struct srp *)(if_map + 1);
1676 
1677 		ifp = srp_follow(&sr, &map[index]);
1678 		if (ifp != NULL) {
1679 			KASSERT(ifp->if_index == index);
1680 			if_ref(ifp);
1681 		}
1682 	}
1683 	srp_leave(&sr);
1684 
1685 	return (ifp);
1686 }
1687 
1688 struct ifnet *
1689 if_ref(struct ifnet *ifp)
1690 {
1691 	refcnt_take(&ifp->if_refcnt);
1692 
1693 	return (ifp);
1694 }
1695 
1696 void
1697 if_put(struct ifnet *ifp)
1698 {
1699 	if (ifp == NULL)
1700 		return;
1701 
1702 	refcnt_rele_wake(&ifp->if_refcnt);
1703 }
1704 
1705 int
1706 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1707 {
1708 	if (ifp->if_sadl == NULL)
1709 		return (EINVAL);
1710 
1711 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1712 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1713 
1714 	return (0);
1715 }
1716 
1717 int
1718 if_createrdomain(int rdomain, struct ifnet *ifp)
1719 {
1720 	int error;
1721 	struct ifnet *loifp;
1722 	char loifname[IFNAMSIZ];
1723 	unsigned int unit = rdomain;
1724 
1725 	if (!rtable_exists(rdomain) && (error = rtable_add(rdomain)) != 0)
1726 		return (error);
1727 	if (!rtable_empty(rdomain))
1728 		return (EEXIST);
1729 
1730 	/* Create rdomain including its loopback if with unit == rdomain */
1731 	snprintf(loifname, sizeof(loifname), "lo%u", unit);
1732 	error = if_clone_create(loifname, 0);
1733 	if ((loifp = ifunit(loifname)) == NULL)
1734 		return (ENXIO);
1735 	if (error && (ifp != loifp || error != EEXIST))
1736 		return (error);
1737 
1738 	rtable_l2set(rdomain, rdomain, loifp->if_index);
1739 	loifp->if_rdomain = rdomain;
1740 
1741 	return (0);
1742 }
1743 
1744 int
1745 if_setrdomain(struct ifnet *ifp, int rdomain)
1746 {
1747 	struct ifreq ifr;
1748 	int error, up = 0, s;
1749 
1750 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1751 		return (EINVAL);
1752 
1753 	if (rdomain != ifp->if_rdomain &&
1754 	    (ifp->if_flags & IFF_LOOPBACK) &&
1755 	    (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1756 		return (EPERM);
1757 
1758 	if (!rtable_exists(rdomain))
1759 		return (ESRCH);
1760 
1761 	/* make sure that the routing table is a real rdomain */
1762 	if (rdomain != rtable_l2(rdomain))
1763 		return (EINVAL);
1764 
1765 	if (rdomain != ifp->if_rdomain) {
1766 		s = splnet();
1767 		/*
1768 		 * We are tearing down the world.
1769 		 * Take down the IF so:
1770 		 * 1. everything that cares gets a message
1771 		 * 2. the automagic IPv6 bits are recreated
1772 		 */
1773 		if (ifp->if_flags & IFF_UP) {
1774 			up = 1;
1775 			if_down(ifp);
1776 		}
1777 		rti_delete(ifp);
1778 #ifdef MROUTING
1779 		vif_delete(ifp);
1780 #endif
1781 		in_ifdetach(ifp);
1782 #ifdef INET6
1783 		in6_ifdetach(ifp);
1784 #endif
1785 		splx(s);
1786 	}
1787 
1788 	/* Let devices like enc(4) or mpe(4) know about the change */
1789 	ifr.ifr_rdomainid = rdomain;
1790 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
1791 	    (caddr_t)&ifr)) != ENOTTY)
1792 		return (error);
1793 	error = 0;
1794 
1795 	/* Add interface to the specified rdomain */
1796 	ifp->if_rdomain = rdomain;
1797 
1798 	/* If we took down the IF, bring it back */
1799 	if (up) {
1800 		s = splnet();
1801 		if_up(ifp);
1802 		splx(s);
1803 	}
1804 
1805 	return (0);
1806 }
1807 
1808 /*
1809  * Interface ioctls.
1810  */
1811 int
1812 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1813 {
1814 	struct ifnet *ifp;
1815 	struct ifreq *ifr = (struct ifreq *)data;
1816 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
1817 	struct if_afreq *ifar = (struct if_afreq *)data;
1818 	char ifdescrbuf[IFDESCRSIZE];
1819 	char ifrtlabelbuf[RTLABEL_LEN];
1820 	int s, error = 0, oif_xflags;
1821 	size_t bytesdone;
1822 	unsigned short oif_flags;
1823 
1824 	switch (cmd) {
1825 	case SIOCIFCREATE:
1826 		if ((error = suser(p)) != 0)
1827 			return (error);
1828 		error = if_clone_create(ifr->ifr_name, 0);
1829 		return (error);
1830 	case SIOCIFDESTROY:
1831 		if ((error = suser(p)) != 0)
1832 			return (error);
1833 		error = if_clone_destroy(ifr->ifr_name);
1834 		return (error);
1835 	case SIOCSIFGATTR:
1836 		if ((error = suser(p)) != 0)
1837 			return (error);
1838 		NET_LOCK();
1839 		error = if_setgroupattribs(data);
1840 		NET_UNLOCK();
1841 		return (error);
1842 	case SIOCGIFCONF:
1843 	case SIOCIFGCLONERS:
1844 	case SIOCGIFGMEMB:
1845 	case SIOCGIFGATTR:
1846 	case SIOCGIFGLIST:
1847 	case SIOCGIFFLAGS:
1848 	case SIOCGIFXFLAGS:
1849 	case SIOCGIFMETRIC:
1850 	case SIOCGIFMTU:
1851 	case SIOCGIFHARDMTU:
1852 	case SIOCGIFDATA:
1853 	case SIOCGIFDESCR:
1854 	case SIOCGIFRTLABEL:
1855 	case SIOCGIFPRIORITY:
1856 	case SIOCGIFRDOMAIN:
1857 	case SIOCGIFGROUP:
1858 	case SIOCGIFLLPRIO:
1859 		return (ifioctl_get(cmd, data));
1860 	}
1861 
1862 	ifp = ifunit(ifr->ifr_name);
1863 	if (ifp == NULL)
1864 		return (ENXIO);
1865 	oif_flags = ifp->if_flags;
1866 	oif_xflags = ifp->if_xflags;
1867 
1868 	switch (cmd) {
1869 	case SIOCIFAFATTACH:
1870 	case SIOCIFAFDETACH:
1871 		if ((error = suser(p)) != 0)
1872 			break;
1873 		NET_LOCK();
1874 		switch (ifar->ifar_af) {
1875 		case AF_INET:
1876 			/* attach is a noop for AF_INET */
1877 			if (cmd == SIOCIFAFDETACH)
1878 				in_ifdetach(ifp);
1879 			break;
1880 #ifdef INET6
1881 		case AF_INET6:
1882 			if (cmd == SIOCIFAFATTACH)
1883 				error = in6_ifattach(ifp);
1884 			else
1885 				in6_ifdetach(ifp);
1886 			break;
1887 #endif /* INET6 */
1888 		default:
1889 			error = EAFNOSUPPORT;
1890 		}
1891 		NET_UNLOCK();
1892 		break;
1893 
1894 	case SIOCSIFFLAGS:
1895 		if ((error = suser(p)) != 0)
1896 			break;
1897 
1898 		NET_LOCK();
1899 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1900 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
1901 
1902 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1903 		if (error != 0) {
1904 			ifp->if_flags = oif_flags;
1905 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
1906 			s = splnet();
1907 			if (ISSET(ifp->if_flags, IFF_UP))
1908 				if_up(ifp);
1909 			else
1910 				if_down(ifp);
1911 			splx(s);
1912 		}
1913 		NET_UNLOCK();
1914 		break;
1915 
1916 	case SIOCSIFXFLAGS:
1917 		if ((error = suser(p)) != 0)
1918 			break;
1919 
1920 		NET_LOCK();
1921 #ifdef INET6
1922 		if (ISSET(ifr->ifr_flags, IFXF_AUTOCONF6)) {
1923 			error = in6_ifattach(ifp);
1924 			if (error != 0) {
1925 				NET_UNLOCK();
1926 				break;
1927 			}
1928 		}
1929 
1930 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1931 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
1932 			ifp->if_xflags |= IFXF_INET6_NOSOII;
1933 
1934 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1935 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
1936 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
1937 
1938 #endif	/* INET6 */
1939 
1940 #ifdef MPLS
1941 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
1942 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
1943 			s = splnet();
1944 			ifp->if_xflags |= IFXF_MPLS;
1945 			ifp->if_ll_output = ifp->if_output;
1946 			ifp->if_output = mpls_output;
1947 			splx(s);
1948 		}
1949 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
1950 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
1951 			s = splnet();
1952 			ifp->if_xflags &= ~IFXF_MPLS;
1953 			ifp->if_output = ifp->if_ll_output;
1954 			ifp->if_ll_output = NULL;
1955 			splx(s);
1956 		}
1957 #endif	/* MPLS */
1958 
1959 #ifndef SMALL_KERNEL
1960 		if (ifp->if_capabilities & IFCAP_WOL) {
1961 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
1962 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
1963 				s = splnet();
1964 				ifp->if_xflags |= IFXF_WOL;
1965 				error = ifp->if_wol(ifp, 1);
1966 				splx(s);
1967 			}
1968 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
1969 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
1970 				s = splnet();
1971 				ifp->if_xflags &= ~IFXF_WOL;
1972 				error = ifp->if_wol(ifp, 0);
1973 				splx(s);
1974 			}
1975 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
1976 			ifr->ifr_flags &= ~IFXF_WOL;
1977 			error = ENOTSUP;
1978 		}
1979 #endif
1980 
1981 		if (error == 0)
1982 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
1983 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
1984 		NET_UNLOCK();
1985 		break;
1986 
1987 	case SIOCSIFMETRIC:
1988 		if ((error = suser(p)) != 0)
1989 			break;
1990 		NET_LOCK();
1991 		ifp->if_metric = ifr->ifr_metric;
1992 		NET_UNLOCK();
1993 		break;
1994 
1995 	case SIOCSIFMTU:
1996 		if ((error = suser(p)) != 0)
1997 			break;
1998 		NET_LOCK();
1999 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2000 		NET_UNLOCK();
2001 		if (!error)
2002 			rtm_ifchg(ifp);
2003 		break;
2004 
2005 	case SIOCSIFDESCR:
2006 		if ((error = suser(p)) != 0)
2007 			break;
2008 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2009 		    IFDESCRSIZE, &bytesdone);
2010 		if (error == 0) {
2011 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2012 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2013 		}
2014 		break;
2015 
2016 	case SIOCSIFRTLABEL:
2017 		if ((error = suser(p)) != 0)
2018 			break;
2019 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2020 		    RTLABEL_LEN, &bytesdone);
2021 		if (error == 0) {
2022 			rtlabel_unref(ifp->if_rtlabelid);
2023 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2024 		}
2025 		break;
2026 
2027 	case SIOCSIFPRIORITY:
2028 		if ((error = suser(p)) != 0)
2029 			break;
2030 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2031 			error = EINVAL;
2032 			break;
2033 		}
2034 		ifp->if_priority = ifr->ifr_metric;
2035 		break;
2036 
2037 	case SIOCSIFRDOMAIN:
2038 		if ((error = suser(p)) != 0)
2039 			break;
2040 		error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2041 		if (!error || error == EEXIST) {
2042 			NET_LOCK();
2043 			error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2044 			NET_UNLOCK();
2045 		}
2046 		break;
2047 
2048 	case SIOCAIFGROUP:
2049 		if ((error = suser(p)))
2050 			break;
2051 		NET_LOCK();
2052 		error = if_addgroup(ifp, ifgr->ifgr_group);
2053 		if (error == 0) {
2054 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2055 			if (error == ENOTTY)
2056 				error = 0;
2057 		}
2058 		NET_UNLOCK();
2059 		break;
2060 
2061 	case SIOCDIFGROUP:
2062 		if ((error = suser(p)))
2063 			break;
2064 		NET_LOCK();
2065 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2066 		if (error == ENOTTY)
2067 			error = 0;
2068 		if (error == 0)
2069 			error = if_delgroup(ifp, ifgr->ifgr_group);
2070 		NET_UNLOCK();
2071 		break;
2072 
2073 	case SIOCSIFLLADDR:
2074 		if ((error = suser(p)))
2075 			break;
2076 		if ((ifp->if_sadl == NULL) ||
2077 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2078 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2079 			error = EINVAL;
2080 			break;
2081 		}
2082 		NET_LOCK();
2083 		switch (ifp->if_type) {
2084 		case IFT_ETHER:
2085 		case IFT_CARP:
2086 		case IFT_XETHER:
2087 		case IFT_ISO88025:
2088 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2089 			if (error == ENOTTY)
2090 				error = 0;
2091 			if (error == 0)
2092 				error = if_setlladdr(ifp,
2093 				    ifr->ifr_addr.sa_data);
2094 			break;
2095 		default:
2096 			error = ENODEV;
2097 		}
2098 
2099 		if (error == 0)
2100 			ifnewlladdr(ifp);
2101 		NET_UNLOCK();
2102 		break;
2103 
2104 	case SIOCSIFLLPRIO:
2105 		if ((error = suser(p)))
2106 			break;
2107 		if (ifr->ifr_llprio < IFQ_MINPRIO ||
2108 		    ifr->ifr_llprio > IFQ_MAXPRIO) {
2109 			error = EINVAL;
2110 			break;
2111 		}
2112 		NET_LOCK();
2113 		ifp->if_llprio = ifr->ifr_llprio;
2114 		NET_UNLOCK();
2115 		break;
2116 
2117 	case SIOCGIFSFFPAGE:
2118 		error = suser(p);
2119 		if (error != 0)
2120 			break;
2121 
2122 		error = if_sffpage_check(data);
2123 		if (error != 0)
2124 			break;
2125 
2126 		/* don't take NET_LOCK because i2c reads take a long time */
2127 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2128 		break;
2129 
2130 	case SIOCSETKALIVE:
2131 	case SIOCDIFPHYADDR:
2132 	case SIOCSLIFPHYADDR:
2133 	case SIOCSLIFPHYRTABLE:
2134 	case SIOCSLIFPHYTTL:
2135 	case SIOCSLIFPHYDF:
2136 	case SIOCSLIFPHYECN:
2137 	case SIOCADDMULTI:
2138 	case SIOCDELMULTI:
2139 	case SIOCSIFMEDIA:
2140 	case SIOCSVNETID:
2141 	case SIOCDVNETID:
2142 	case SIOCSVNETFLOWID:
2143 	case SIOCSTXHPRIO:
2144 	case SIOCSRXHPRIO:
2145 	case SIOCSIFPAIR:
2146 	case SIOCSIFPARENT:
2147 	case SIOCDIFPARENT:
2148 	case SIOCSETMPWCFG:
2149 	case SIOCSETLABEL:
2150 	case SIOCDELLABEL:
2151 	case SIOCSPWE3CTRLWORD:
2152 	case SIOCSPWE3FAT:
2153 	case SIOCSPWE3NEIGHBOR:
2154 	case SIOCDPWE3NEIGHBOR:
2155 #if NBRIDGE > 0
2156 	case SIOCBRDGADD:
2157 	case SIOCBRDGDEL:
2158 	case SIOCBRDGSIFFLGS:
2159 	case SIOCBRDGSCACHE:
2160 	case SIOCBRDGADDS:
2161 	case SIOCBRDGDELS:
2162 	case SIOCBRDGSADDR:
2163 	case SIOCBRDGSTO:
2164 	case SIOCBRDGDADDR:
2165 	case SIOCBRDGFLUSH:
2166 	case SIOCBRDGADDL:
2167 	case SIOCBRDGSIFPROT:
2168 	case SIOCBRDGARL:
2169 	case SIOCBRDGFRL:
2170 	case SIOCBRDGSPRI:
2171 	case SIOCBRDGSHT:
2172 	case SIOCBRDGSFD:
2173 	case SIOCBRDGSMA:
2174 	case SIOCBRDGSIFPRIO:
2175 	case SIOCBRDGSIFCOST:
2176 	case SIOCBRDGSTXHC:
2177 	case SIOCBRDGSPROTO:
2178 	case SIOCSWSPORTNO:
2179 #endif
2180 		if ((error = suser(p)) != 0)
2181 			break;
2182 		/* FALLTHROUGH */
2183 	default:
2184 		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
2185 			(struct mbuf *) cmd, (struct mbuf *) data,
2186 			(struct mbuf *) ifp, p));
2187 		if (error != EOPNOTSUPP)
2188 			break;
2189 		switch (cmd) {
2190 		case SIOCAIFADDR:
2191 		case SIOCDIFADDR:
2192 		case SIOCSIFADDR:
2193 		case SIOCSIFNETMASK:
2194 		case SIOCSIFDSTADDR:
2195 		case SIOCSIFBRDADDR:
2196 #ifdef INET6
2197 		case SIOCAIFADDR_IN6:
2198 		case SIOCDIFADDR_IN6:
2199 #endif
2200 			error = suser(p);
2201 			break;
2202 		default:
2203 			error = 0;
2204 			break;
2205 		}
2206 		if (error)
2207 			break;
2208 		NET_LOCK();
2209 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2210 		NET_UNLOCK();
2211 		break;
2212 	}
2213 
2214 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags)
2215 		rtm_ifchg(ifp);
2216 
2217 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2218 		getmicrotime(&ifp->if_lastchange);
2219 
2220 	return (error);
2221 }
2222 
2223 int
2224 ifioctl_get(u_long cmd, caddr_t data)
2225 {
2226 	struct ifnet *ifp;
2227 	struct ifreq *ifr = (struct ifreq *)data;
2228 	char ifdescrbuf[IFDESCRSIZE];
2229 	char ifrtlabelbuf[RTLABEL_LEN];
2230 	int error = 0;
2231 	size_t bytesdone;
2232 	const char *label;
2233 
2234 	switch(cmd) {
2235 	case SIOCGIFCONF:
2236 		NET_RLOCK_IN_IOCTL();
2237 		error = ifconf(data);
2238 		NET_RUNLOCK_IN_IOCTL();
2239 		return (error);
2240 	case SIOCIFGCLONERS:
2241 		error = if_clone_list((struct if_clonereq *)data);
2242 		return (error);
2243 	case SIOCGIFGMEMB:
2244 		NET_RLOCK_IN_IOCTL();
2245 		error = if_getgroupmembers(data);
2246 		NET_RUNLOCK_IN_IOCTL();
2247 		return (error);
2248 	case SIOCGIFGATTR:
2249 		NET_RLOCK_IN_IOCTL();
2250 		error = if_getgroupattribs(data);
2251 		NET_RUNLOCK_IN_IOCTL();
2252 		return (error);
2253 	case SIOCGIFGLIST:
2254 		NET_RLOCK_IN_IOCTL();
2255 		error = if_getgrouplist(data);
2256 		NET_RUNLOCK_IN_IOCTL();
2257 		return (error);
2258 	}
2259 
2260 	ifp = ifunit(ifr->ifr_name);
2261 	if (ifp == NULL)
2262 		return (ENXIO);
2263 
2264 	NET_RLOCK_IN_IOCTL();
2265 
2266 	switch(cmd) {
2267 	case SIOCGIFFLAGS:
2268 		ifr->ifr_flags = ifp->if_flags;
2269 		if (ifq_is_oactive(&ifp->if_snd))
2270 			ifr->ifr_flags |= IFF_OACTIVE;
2271 		break;
2272 
2273 	case SIOCGIFXFLAGS:
2274 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2275 		break;
2276 
2277 	case SIOCGIFMETRIC:
2278 		ifr->ifr_metric = ifp->if_metric;
2279 		break;
2280 
2281 	case SIOCGIFMTU:
2282 		ifr->ifr_mtu = ifp->if_mtu;
2283 		break;
2284 
2285 	case SIOCGIFHARDMTU:
2286 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2287 		break;
2288 
2289 	case SIOCGIFDATA: {
2290 		struct if_data ifdata;
2291 		if_getdata(ifp, &ifdata);
2292 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2293 		break;
2294 	}
2295 
2296 	case SIOCGIFDESCR:
2297 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2298 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2299 		    &bytesdone);
2300 		break;
2301 
2302 	case SIOCGIFRTLABEL:
2303 		if (ifp->if_rtlabelid &&
2304 		    (label = rtlabel_id2name(ifp->if_rtlabelid)) != NULL) {
2305 			strlcpy(ifrtlabelbuf, label, RTLABEL_LEN);
2306 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2307 			    RTLABEL_LEN, &bytesdone);
2308 		} else
2309 			error = ENOENT;
2310 		break;
2311 
2312 	case SIOCGIFPRIORITY:
2313 		ifr->ifr_metric = ifp->if_priority;
2314 		break;
2315 
2316 	case SIOCGIFRDOMAIN:
2317 		ifr->ifr_rdomainid = ifp->if_rdomain;
2318 		break;
2319 
2320 	case SIOCGIFGROUP:
2321 		error = if_getgroup(data, ifp);
2322 		break;
2323 
2324 	case SIOCGIFLLPRIO:
2325 		ifr->ifr_llprio = ifp->if_llprio;
2326 		break;
2327 
2328 	default:
2329 		panic("invalid ioctl %lu", cmd);
2330 	}
2331 
2332 	NET_RUNLOCK_IN_IOCTL();
2333 
2334 	return (error);
2335 }
2336 
2337 static int
2338 if_sffpage_check(const caddr_t data)
2339 {
2340 	const struct if_sffpage *sff = (const struct if_sffpage *)data;
2341 
2342 	switch (sff->sff_addr) {
2343 	case IFSFF_ADDR_EEPROM:
2344 	case IFSFF_ADDR_DDM:
2345 		break;
2346 	default:
2347 		return (EINVAL);
2348 	}
2349 
2350 	return (0);
2351 }
2352 
2353 int
2354 if_txhprio_l2_check(int hdrprio)
2355 {
2356 	switch (hdrprio) {
2357 	case IF_HDRPRIO_PACKET:
2358 		return (0);
2359 	default:
2360 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2361 			return (0);
2362 		break;
2363 	}
2364 
2365 	return (EINVAL);
2366 }
2367 
2368 int
2369 if_txhprio_l3_check(int hdrprio)
2370 {
2371 	switch (hdrprio) {
2372 	case IF_HDRPRIO_PACKET:
2373 	case IF_HDRPRIO_PAYLOAD:
2374 		return (0);
2375 	default:
2376 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2377 			return (0);
2378 		break;
2379 	}
2380 
2381 	return (EINVAL);
2382 }
2383 
2384 int
2385 if_rxhprio_l2_check(int hdrprio)
2386 {
2387 	switch (hdrprio) {
2388 	case IF_HDRPRIO_PACKET:
2389 	case IF_HDRPRIO_OUTER:
2390 		return (0);
2391 	default:
2392 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2393 			return (0);
2394 		break;
2395 	}
2396 
2397 	return (EINVAL);
2398 }
2399 
2400 int
2401 if_rxhprio_l3_check(int hdrprio)
2402 {
2403 	switch (hdrprio) {
2404 	case IF_HDRPRIO_PACKET:
2405 	case IF_HDRPRIO_PAYLOAD:
2406 	case IF_HDRPRIO_OUTER:
2407 		return (0);
2408 	default:
2409 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2410 			return (0);
2411 		break;
2412 	}
2413 
2414 	return (EINVAL);
2415 }
2416 
2417 /*
2418  * Return interface configuration
2419  * of system.  List may be used
2420  * in later ioctl's (above) to get
2421  * other information.
2422  */
2423 int
2424 ifconf(caddr_t data)
2425 {
2426 	struct ifconf *ifc = (struct ifconf *)data;
2427 	struct ifnet *ifp;
2428 	struct ifaddr *ifa;
2429 	struct ifreq ifr, *ifrp;
2430 	int space = ifc->ifc_len, error = 0;
2431 
2432 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2433 	if (space == 0) {
2434 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
2435 			struct sockaddr *sa;
2436 
2437 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2438 				space += sizeof (ifr);
2439 			else
2440 				TAILQ_FOREACH(ifa,
2441 				    &ifp->if_addrlist, ifa_list) {
2442 					sa = ifa->ifa_addr;
2443 					if (sa->sa_len > sizeof(*sa))
2444 						space += sa->sa_len -
2445 						    sizeof(*sa);
2446 					space += sizeof(ifr);
2447 				}
2448 		}
2449 		ifc->ifc_len = space;
2450 		return (0);
2451 	}
2452 
2453 	ifrp = ifc->ifc_req;
2454 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2455 		if (space < sizeof(ifr))
2456 			break;
2457 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2458 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2459 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2460 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2461 			    sizeof(ifr));
2462 			if (error)
2463 				break;
2464 			space -= sizeof (ifr), ifrp++;
2465 		} else
2466 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2467 				struct sockaddr *sa = ifa->ifa_addr;
2468 
2469 				if (space < sizeof(ifr))
2470 					break;
2471 				if (sa->sa_len <= sizeof(*sa)) {
2472 					ifr.ifr_addr = *sa;
2473 					error = copyout((caddr_t)&ifr,
2474 					    (caddr_t)ifrp, sizeof (ifr));
2475 					ifrp++;
2476 				} else {
2477 					space -= sa->sa_len - sizeof(*sa);
2478 					if (space < sizeof (ifr))
2479 						break;
2480 					error = copyout((caddr_t)&ifr,
2481 					    (caddr_t)ifrp,
2482 					    sizeof(ifr.ifr_name));
2483 					if (error == 0)
2484 						error = copyout((caddr_t)sa,
2485 						    (caddr_t)&ifrp->ifr_addr,
2486 						    sa->sa_len);
2487 					ifrp = (struct ifreq *)(sa->sa_len +
2488 					    (caddr_t)&ifrp->ifr_addr);
2489 				}
2490 				if (error)
2491 					break;
2492 				space -= sizeof (ifr);
2493 			}
2494 	}
2495 	ifc->ifc_len -= space;
2496 	return (error);
2497 }
2498 
2499 void
2500 if_counters_alloc(struct ifnet *ifp)
2501 {
2502 	KASSERT(ifp->if_counters == NULL);
2503 
2504 	ifp->if_counters = counters_alloc(ifc_ncounters);
2505 }
2506 
2507 void
2508 if_counters_free(struct ifnet *ifp)
2509 {
2510 	KASSERT(ifp->if_counters != NULL);
2511 
2512 	counters_free(ifp->if_counters, ifc_ncounters);
2513 	ifp->if_counters = NULL;
2514 }
2515 
2516 void
2517 if_getdata(struct ifnet *ifp, struct if_data *data)
2518 {
2519 	unsigned int i;
2520 
2521 	*data = ifp->if_data;
2522 
2523 	if (ifp->if_counters != NULL) {
2524 		uint64_t counters[ifc_ncounters];
2525 
2526 		counters_read(ifp->if_counters, counters, nitems(counters));
2527 
2528 		data->ifi_ipackets += counters[ifc_ipackets];
2529 		data->ifi_ierrors += counters[ifc_ierrors];
2530 		data->ifi_opackets += counters[ifc_opackets];
2531 		data->ifi_oerrors += counters[ifc_oerrors];
2532 		data->ifi_collisions += counters[ifc_collisions];
2533 		data->ifi_ibytes += counters[ifc_ibytes];
2534 		data->ifi_obytes += counters[ifc_obytes];
2535 		data->ifi_imcasts += counters[ifc_imcasts];
2536 		data->ifi_omcasts += counters[ifc_omcasts];
2537 		data->ifi_iqdrops += counters[ifc_iqdrops];
2538 		data->ifi_oqdrops += counters[ifc_oqdrops];
2539 		data->ifi_noproto += counters[ifc_noproto];
2540 	}
2541 
2542 	for (i = 0; i < ifp->if_nifqs; i++) {
2543 		struct ifqueue *ifq = ifp->if_ifqs[i];
2544 
2545 		ifq_add_data(ifq, data);
2546 	}
2547 
2548 	for (i = 0; i < ifp->if_niqs; i++) {
2549 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2550 
2551 		ifiq_add_data(ifiq, data);
2552 	}
2553 }
2554 
2555 /*
2556  * Dummy functions replaced in ifnet during detach (if protocols decide to
2557  * fiddle with the if during detach.
2558  */
2559 void
2560 if_detached_qstart(struct ifqueue *ifq)
2561 {
2562 	ifq_purge(ifq);
2563 }
2564 
2565 int
2566 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2567 {
2568 	return ENODEV;
2569 }
2570 
2571 /*
2572  * Create interface group without members
2573  */
2574 struct ifg_group *
2575 if_creategroup(const char *groupname)
2576 {
2577 	struct ifg_group	*ifg;
2578 
2579 	if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
2580 		return (NULL);
2581 
2582 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2583 	ifg->ifg_refcnt = 0;
2584 	ifg->ifg_carp_demoted = 0;
2585 	TAILQ_INIT(&ifg->ifg_members);
2586 #if NPF > 0
2587 	pfi_attach_ifgroup(ifg);
2588 #endif
2589 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2590 
2591 	return (ifg);
2592 }
2593 
2594 /*
2595  * Add a group to an interface
2596  */
2597 int
2598 if_addgroup(struct ifnet *ifp, const char *groupname)
2599 {
2600 	struct ifg_list		*ifgl;
2601 	struct ifg_group	*ifg = NULL;
2602 	struct ifg_member	*ifgm;
2603 
2604 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
2605 	    groupname[strlen(groupname) - 1] <= '9')
2606 		return (EINVAL);
2607 
2608 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2609 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2610 			return (EEXIST);
2611 
2612 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
2613 		return (ENOMEM);
2614 
2615 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
2616 		free(ifgl, M_TEMP, sizeof(*ifgl));
2617 		return (ENOMEM);
2618 	}
2619 
2620 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2621 		if (!strcmp(ifg->ifg_group, groupname))
2622 			break;
2623 
2624 	if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
2625 		free(ifgl, M_TEMP, sizeof(*ifgl));
2626 		free(ifgm, M_TEMP, sizeof(*ifgm));
2627 		return (ENOMEM);
2628 	}
2629 
2630 	ifg->ifg_refcnt++;
2631 	ifgl->ifgl_group = ifg;
2632 	ifgm->ifgm_ifp = ifp;
2633 
2634 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2635 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2636 
2637 #if NPF > 0
2638 	pfi_group_addmember(groupname, ifp);
2639 #endif
2640 
2641 	return (0);
2642 }
2643 
2644 /*
2645  * Remove a group from an interface
2646  */
2647 int
2648 if_delgroup(struct ifnet *ifp, const char *groupname)
2649 {
2650 	struct ifg_list		*ifgl;
2651 	struct ifg_member	*ifgm;
2652 
2653 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2654 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2655 			break;
2656 	if (ifgl == NULL)
2657 		return (ENOENT);
2658 
2659 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2660 
2661 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2662 		if (ifgm->ifgm_ifp == ifp)
2663 			break;
2664 
2665 	if (ifgm != NULL) {
2666 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2667 		free(ifgm, M_TEMP, sizeof(*ifgm));
2668 	}
2669 
2670 #if NPF > 0
2671 	pfi_group_change(groupname);
2672 #endif
2673 
2674 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2675 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2676 #if NPF > 0
2677 		pfi_detach_ifgroup(ifgl->ifgl_group);
2678 #endif
2679 		free(ifgl->ifgl_group, M_TEMP, sizeof(*ifgl->ifgl_group));
2680 	}
2681 
2682 	free(ifgl, M_TEMP, sizeof(*ifgl));
2683 
2684 	return (0);
2685 }
2686 
2687 /*
2688  * Stores all groups from an interface in memory pointed
2689  * to by data
2690  */
2691 int
2692 if_getgroup(caddr_t data, struct ifnet *ifp)
2693 {
2694 	int			 len, error;
2695 	struct ifg_list		*ifgl;
2696 	struct ifg_req		 ifgrq, *ifgp;
2697 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2698 
2699 	if (ifgr->ifgr_len == 0) {
2700 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2701 			ifgr->ifgr_len += sizeof(struct ifg_req);
2702 		return (0);
2703 	}
2704 
2705 	len = ifgr->ifgr_len;
2706 	ifgp = ifgr->ifgr_groups;
2707 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2708 		if (len < sizeof(ifgrq))
2709 			return (EINVAL);
2710 		bzero(&ifgrq, sizeof ifgrq);
2711 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2712 		    sizeof(ifgrq.ifgrq_group));
2713 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2714 		    sizeof(struct ifg_req))))
2715 			return (error);
2716 		len -= sizeof(ifgrq);
2717 		ifgp++;
2718 	}
2719 
2720 	return (0);
2721 }
2722 
2723 /*
2724  * Stores all members of a group in memory pointed to by data
2725  */
2726 int
2727 if_getgroupmembers(caddr_t data)
2728 {
2729 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2730 	struct ifg_group	*ifg;
2731 	struct ifg_member	*ifgm;
2732 	struct ifg_req		 ifgrq, *ifgp;
2733 	int			 len, error;
2734 
2735 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2736 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2737 			break;
2738 	if (ifg == NULL)
2739 		return (ENOENT);
2740 
2741 	if (ifgr->ifgr_len == 0) {
2742 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2743 			ifgr->ifgr_len += sizeof(ifgrq);
2744 		return (0);
2745 	}
2746 
2747 	len = ifgr->ifgr_len;
2748 	ifgp = ifgr->ifgr_groups;
2749 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
2750 		if (len < sizeof(ifgrq))
2751 			return (EINVAL);
2752 		bzero(&ifgrq, sizeof ifgrq);
2753 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
2754 		    sizeof(ifgrq.ifgrq_member));
2755 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2756 		    sizeof(struct ifg_req))))
2757 			return (error);
2758 		len -= sizeof(ifgrq);
2759 		ifgp++;
2760 	}
2761 
2762 	return (0);
2763 }
2764 
2765 int
2766 if_getgroupattribs(caddr_t data)
2767 {
2768 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2769 	struct ifg_group	*ifg;
2770 
2771 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2772 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2773 			break;
2774 	if (ifg == NULL)
2775 		return (ENOENT);
2776 
2777 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
2778 
2779 	return (0);
2780 }
2781 
2782 int
2783 if_setgroupattribs(caddr_t data)
2784 {
2785 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2786 	struct ifg_group	*ifg;
2787 	struct ifg_member	*ifgm;
2788 	int			 demote;
2789 
2790 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2791 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2792 			break;
2793 	if (ifg == NULL)
2794 		return (ENOENT);
2795 
2796 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
2797 	if (demote + ifg->ifg_carp_demoted > 0xff ||
2798 	    demote + ifg->ifg_carp_demoted < 0)
2799 		return (EINVAL);
2800 
2801 	ifg->ifg_carp_demoted += demote;
2802 
2803 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2804 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
2805 
2806 	return (0);
2807 }
2808 
2809 /*
2810  * Stores all groups in memory pointed to by data
2811  */
2812 int
2813 if_getgrouplist(caddr_t data)
2814 {
2815 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2816 	struct ifg_group	*ifg;
2817 	struct ifg_req		 ifgrq, *ifgp;
2818 	int			 len, error;
2819 
2820 	if (ifgr->ifgr_len == 0) {
2821 		TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2822 			ifgr->ifgr_len += sizeof(ifgrq);
2823 		return (0);
2824 	}
2825 
2826 	len = ifgr->ifgr_len;
2827 	ifgp = ifgr->ifgr_groups;
2828 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
2829 		if (len < sizeof(ifgrq))
2830 			return (EINVAL);
2831 		bzero(&ifgrq, sizeof ifgrq);
2832 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
2833 		    sizeof(ifgrq.ifgrq_group));
2834 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2835 		    sizeof(struct ifg_req))))
2836 			return (error);
2837 		len -= sizeof(ifgrq);
2838 		ifgp++;
2839 	}
2840 
2841 	return (0);
2842 }
2843 
2844 void
2845 if_group_routechange(struct sockaddr *dst, struct sockaddr *mask)
2846 {
2847 	switch (dst->sa_family) {
2848 	case AF_INET:
2849 		if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
2850 		    mask && (mask->sa_len == 0 ||
2851 		    satosin(mask)->sin_addr.s_addr == INADDR_ANY))
2852 			if_group_egress_build();
2853 		break;
2854 #ifdef INET6
2855 	case AF_INET6:
2856 		if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
2857 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
2858 		    IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
2859 		    &in6addr_any)))
2860 			if_group_egress_build();
2861 		break;
2862 #endif
2863 	}
2864 }
2865 
2866 int
2867 if_group_egress_build(void)
2868 {
2869 	struct ifnet		*ifp;
2870 	struct ifg_group	*ifg;
2871 	struct ifg_member	*ifgm, *next;
2872 	struct sockaddr_in	 sa_in;
2873 #ifdef INET6
2874 	struct sockaddr_in6	 sa_in6;
2875 #endif
2876 	struct rtentry		*rt;
2877 
2878 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2879 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
2880 			break;
2881 
2882 	if (ifg != NULL)
2883 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
2884 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
2885 
2886 	bzero(&sa_in, sizeof(sa_in));
2887 	sa_in.sin_len = sizeof(sa_in);
2888 	sa_in.sin_family = AF_INET;
2889 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
2890 	while (rt != NULL) {
2891 		ifp = if_get(rt->rt_ifidx);
2892 		if (ifp != NULL) {
2893 			if_addgroup(ifp, IFG_EGRESS);
2894 			if_put(ifp);
2895 		}
2896 		rt = rtable_iterate(rt);
2897 	}
2898 
2899 #ifdef INET6
2900 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
2901 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
2902 	    RTP_ANY);
2903 	while (rt != NULL) {
2904 		ifp = if_get(rt->rt_ifidx);
2905 		if (ifp != NULL) {
2906 			if_addgroup(ifp, IFG_EGRESS);
2907 			if_put(ifp);
2908 		}
2909 		rt = rtable_iterate(rt);
2910 	}
2911 #endif /* INET6 */
2912 
2913 	return (0);
2914 }
2915 
2916 /*
2917  * Set/clear promiscuous mode on interface ifp based on the truth value
2918  * of pswitch.  The calls are reference counted so that only the first
2919  * "on" request actually has an effect, as does the final "off" request.
2920  * Results are undefined if the "off" and "on" requests are not matched.
2921  */
2922 int
2923 ifpromisc(struct ifnet *ifp, int pswitch)
2924 {
2925 	struct ifreq ifr;
2926 	unsigned short oif_flags;
2927 	int oif_pcount, error;
2928 
2929 	NET_ASSERT_LOCKED(); /* modifying if_flags and if_pcount */
2930 
2931 	oif_flags = ifp->if_flags;
2932 	oif_pcount = ifp->if_pcount;
2933 	if (pswitch) {
2934 		if (ifp->if_pcount++ != 0)
2935 			return (0);
2936 		ifp->if_flags |= IFF_PROMISC;
2937 	} else {
2938 		if (--ifp->if_pcount > 0)
2939 			return (0);
2940 		ifp->if_flags &= ~IFF_PROMISC;
2941 	}
2942 
2943 	if ((ifp->if_flags & IFF_UP) == 0)
2944 		return (0);
2945 
2946 	memset(&ifr, 0, sizeof(ifr));
2947 	ifr.ifr_flags = ifp->if_flags;
2948 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
2949 	if (error) {
2950 		ifp->if_flags = oif_flags;
2951 		ifp->if_pcount = oif_pcount;
2952 	}
2953 
2954 	return (error);
2955 }
2956 
2957 void
2958 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
2959 {
2960 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
2961 }
2962 
2963 void
2964 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
2965 {
2966 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
2967 }
2968 
2969 void
2970 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
2971 {
2972 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
2973 		panic("ifa_update_broadaddr does not support dynamic length");
2974 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
2975 }
2976 
2977 #ifdef DDB
2978 /* debug function, can be called from ddb> */
2979 void
2980 ifa_print_all(void)
2981 {
2982 	struct ifnet *ifp;
2983 	struct ifaddr *ifa;
2984 
2985 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2986 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2987 			char addr[INET6_ADDRSTRLEN];
2988 
2989 			switch (ifa->ifa_addr->sa_family) {
2990 			case AF_INET:
2991 				printf("%s", inet_ntop(AF_INET,
2992 				    &satosin(ifa->ifa_addr)->sin_addr,
2993 				    addr, sizeof(addr)));
2994 				break;
2995 #ifdef INET6
2996 			case AF_INET6:
2997 				printf("%s", inet_ntop(AF_INET6,
2998 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
2999 				    addr, sizeof(addr)));
3000 				break;
3001 #endif
3002 			}
3003 			printf(" on %s\n", ifp->if_xname);
3004 		}
3005 	}
3006 }
3007 #endif /* DDB */
3008 
3009 void
3010 ifnewlladdr(struct ifnet *ifp)
3011 {
3012 #ifdef INET6
3013 	struct ifaddr *ifa;
3014 #endif
3015 	struct ifreq ifrq;
3016 	short up;
3017 	int s;
3018 
3019 	s = splnet();
3020 	up = ifp->if_flags & IFF_UP;
3021 
3022 	if (up) {
3023 		/* go down for a moment... */
3024 		ifp->if_flags &= ~IFF_UP;
3025 		ifrq.ifr_flags = ifp->if_flags;
3026 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3027 	}
3028 
3029 	ifp->if_flags |= IFF_UP;
3030 	ifrq.ifr_flags = ifp->if_flags;
3031 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3032 
3033 #ifdef INET6
3034 	/*
3035 	 * Update the link-local address.  Don't do it if we're
3036 	 * a router to avoid confusing hosts on the network.
3037 	 */
3038 	if (!ip6_forwarding) {
3039 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
3040 		if (ifa) {
3041 			in6_purgeaddr(ifa);
3042 			if_hooks_run(&ifp->if_addrhooks);
3043 			in6_ifattach(ifp);
3044 		}
3045 	}
3046 #endif
3047 	if (!up) {
3048 		/* go back down */
3049 		ifp->if_flags &= ~IFF_UP;
3050 		ifrq.ifr_flags = ifp->if_flags;
3051 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3052 	}
3053 	splx(s);
3054 }
3055 
3056 void
3057 if_addrhook_add(struct ifnet *ifp, struct task *t)
3058 {
3059 	mtx_enter(&if_hooks_mtx);
3060 	TAILQ_INSERT_TAIL(&ifp->if_addrhooks, t, t_entry);
3061 	mtx_leave(&if_hooks_mtx);
3062 }
3063 
3064 void
3065 if_addrhook_del(struct ifnet *ifp, struct task *t)
3066 {
3067 	mtx_enter(&if_hooks_mtx);
3068 	TAILQ_REMOVE(&ifp->if_addrhooks, t, t_entry);
3069 	mtx_leave(&if_hooks_mtx);
3070 }
3071 
3072 void
3073 if_addrhooks_run(struct ifnet *ifp)
3074 {
3075 	if_hooks_run(&ifp->if_addrhooks);
3076 }
3077 
3078 void
3079 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
3080 {
3081 	extern int ticks;
3082 
3083 	memset(rxr, 0, sizeof(*rxr));
3084 
3085 	rxr->rxr_adjusted = ticks;
3086 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
3087 	rxr->rxr_hwm = hwm;
3088 }
3089 
3090 static inline void
3091 if_rxr_adjust_cwm(struct if_rxring *rxr)
3092 {
3093 	extern int ticks;
3094 
3095 	if (rxr->rxr_alive >= rxr->rxr_lwm)
3096 		return;
3097 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
3098 		rxr->rxr_cwm++;
3099 
3100 	rxr->rxr_adjusted = ticks;
3101 }
3102 
3103 void
3104 if_rxr_livelocked(struct if_rxring *rxr)
3105 {
3106 	extern int ticks;
3107 
3108 	if (ticks - rxr->rxr_adjusted >= 1) {
3109 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3110 			rxr->rxr_cwm--;
3111 
3112 		rxr->rxr_adjusted = ticks;
3113 	}
3114 }
3115 
3116 u_int
3117 if_rxr_get(struct if_rxring *rxr, u_int max)
3118 {
3119 	extern int ticks;
3120 	u_int diff;
3121 
3122 	if (ticks - rxr->rxr_adjusted >= 1) {
3123 		/* we're free to try for an adjustment */
3124 		if_rxr_adjust_cwm(rxr);
3125 	}
3126 
3127 	if (rxr->rxr_alive >= rxr->rxr_cwm)
3128 		return (0);
3129 
3130 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
3131 	rxr->rxr_alive += diff;
3132 
3133 	return (diff);
3134 }
3135 
3136 int
3137 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
3138 {
3139 	struct if_rxrinfo kifri;
3140 	int error;
3141 	u_int n;
3142 
3143 	error = copyin(uifri, &kifri, sizeof(kifri));
3144 	if (error)
3145 		return (error);
3146 
3147 	n = min(t, kifri.ifri_total);
3148 	kifri.ifri_total = t;
3149 
3150 	if (n > 0) {
3151 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
3152 		if (error)
3153 			return (error);
3154 	}
3155 
3156 	return (copyout(&kifri, uifri, sizeof(kifri)));
3157 }
3158 
3159 int
3160 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
3161     struct if_rxring *rxr)
3162 {
3163 	struct if_rxring_info ifr;
3164 
3165 	memset(&ifr, 0, sizeof(ifr));
3166 
3167 	if (name != NULL)
3168 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3169 
3170 	ifr.ifr_size = size;
3171 	ifr.ifr_info = *rxr;
3172 
3173 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
3174 }
3175 
3176 /*
3177  * Network stack input queues.
3178  */
3179 
3180 void
3181 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3182 {
3183 	mq_init(&niq->ni_q, maxlen, IPL_NET);
3184 	niq->ni_isr = isr;
3185 }
3186 
3187 int
3188 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3189 {
3190 	int rv;
3191 
3192 	rv = mq_enqueue(&niq->ni_q, m);
3193 	if (rv == 0)
3194 		schednetisr(niq->ni_isr);
3195 	else
3196 		if_congestion();
3197 
3198 	return (rv);
3199 }
3200 
3201 int
3202 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3203 {
3204 	int rv;
3205 
3206 	rv = mq_enlist(&niq->ni_q, ml);
3207 	if (rv == 0)
3208 		schednetisr(niq->ni_isr);
3209 	else
3210 		if_congestion();
3211 
3212 	return (rv);
3213 }
3214 
3215 __dead void
3216 unhandled_af(int af)
3217 {
3218 	panic("unhandled af %d", af);
3219 }
3220 
3221 /*
3222  * XXXSMP This tunable is here to work around the fact that IPsec
3223  * globals aren't ready to be accessed by multiple threads in
3224  * parallel.
3225  */
3226 int		 nettaskqs = NET_TASKQ;
3227 
3228 struct taskq *
3229 net_tq(unsigned int ifindex)
3230 {
3231 	struct taskq *t = NULL;
3232 
3233 	t = nettqmp[ifindex % nettaskqs];
3234 
3235 	return (t);
3236 }
3237