xref: /openbsd-src/sys/net/if.c (revision a119297bd5e44b38d77070a02315a6e337fde1cb)
1 /*	$OpenBSD: if.c,v 1.629 2021/02/11 20:28:01 mvs Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "pfsync.h"
70 #include "ppp.h"
71 #include "switch.h"
72 #include "if_wg.h"
73 
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/mbuf.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/timeout.h>
80 #include <sys/protosw.h>
81 #include <sys/kernel.h>
82 #include <sys/ioctl.h>
83 #include <sys/domain.h>
84 #include <sys/task.h>
85 #include <sys/atomic.h>
86 #include <sys/percpu.h>
87 #include <sys/proc.h>
88 #include <sys/stdint.h>	/* uintptr_t */
89 #include <sys/rwlock.h>
90 
91 #include <net/if.h>
92 #include <net/if_dl.h>
93 #include <net/if_types.h>
94 #include <net/route.h>
95 #include <net/netisr.h>
96 
97 #include <netinet/in.h>
98 #include <netinet/if_ether.h>
99 #include <netinet/igmp.h>
100 #ifdef MROUTING
101 #include <netinet/ip_mroute.h>
102 #endif
103 
104 #ifdef INET6
105 #include <netinet6/in6_var.h>
106 #include <netinet6/in6_ifattach.h>
107 #include <netinet6/nd6.h>
108 #include <netinet/ip6.h>
109 #include <netinet6/ip6_var.h>
110 #endif
111 
112 #ifdef MPLS
113 #include <netmpls/mpls.h>
114 #endif
115 
116 #if NBPFILTER > 0
117 #include <net/bpf.h>
118 #endif
119 
120 #if NBRIDGE > 0
121 #include <net/if_bridge.h>
122 #endif
123 
124 #if NCARP > 0
125 #include <netinet/ip_carp.h>
126 #endif
127 
128 #if NPF > 0
129 #include <net/pfvar.h>
130 #endif
131 
132 #include <sys/device.h>
133 
134 void	if_attachsetup(struct ifnet *);
135 void	if_attachdomain(struct ifnet *);
136 void	if_attach_common(struct ifnet *);
137 void	if_remove(struct ifnet *);
138 int	if_createrdomain(int, struct ifnet *);
139 int	if_setrdomain(struct ifnet *, int);
140 void	if_slowtimo(void *);
141 
142 void	if_detached_qstart(struct ifqueue *);
143 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
144 
145 int	ifioctl_get(u_long, caddr_t);
146 int	ifconf(caddr_t);
147 static int
148 	if_sffpage_check(const caddr_t);
149 
150 int	if_getgroup(caddr_t, struct ifnet *);
151 int	if_getgroupmembers(caddr_t);
152 int	if_getgroupattribs(caddr_t);
153 int	if_setgroupattribs(caddr_t);
154 int	if_getgrouplist(caddr_t);
155 
156 void	if_linkstate(struct ifnet *);
157 void	if_linkstate_task(void *);
158 
159 int	if_clone_list(struct if_clonereq *);
160 struct if_clone	*if_clone_lookup(const char *, int *);
161 
162 int	if_group_egress_build(void);
163 
164 void	if_watchdog_task(void *);
165 
166 void	if_netisr(void *);
167 
168 #ifdef DDB
169 void	ifa_print_all(void);
170 #endif
171 
172 void	if_qstart_compat(struct ifqueue *);
173 
174 /*
175  * interface index map
176  *
177  * the kernel maintains a mapping of interface indexes to struct ifnet
178  * pointers.
179  *
180  * the map is an array of struct ifnet pointers prefixed by an if_map
181  * structure. the if_map structure stores the length of its array.
182  *
183  * as interfaces are attached to the system, the map is grown on demand
184  * up to USHRT_MAX entries.
185  *
186  * interface index 0 is reserved and represents no interface. this
187  * supports the use of the interface index as the scope for IPv6 link
188  * local addresses, where scope 0 means no scope has been specified.
189  * it also supports the use of interface index as the unique identifier
190  * for network interfaces in SNMP applications as per RFC2863. therefore
191  * if_get(0) returns NULL.
192  */
193 
194 void if_ifp_dtor(void *, void *);
195 void if_map_dtor(void *, void *);
196 struct ifnet *if_ref(struct ifnet *);
197 
198 /*
199  * struct if_map
200  *
201  * bounded array of ifnet srp pointers used to fetch references of live
202  * interfaces with if_get().
203  */
204 
205 struct if_map {
206 	unsigned long		 limit;
207 	/* followed by limit ifnet srp pointers */
208 };
209 
210 /*
211  * struct if_idxmap
212  *
213  * infrastructure to manage updates and accesses to the current if_map.
214  */
215 
216 struct if_idxmap {
217 	unsigned int		 serial;
218 	unsigned int		 count;
219 	struct srp		 map;
220 };
221 
222 void	if_idxmap_init(unsigned int);
223 void	if_idxmap_insert(struct ifnet *);
224 void	if_idxmap_remove(struct ifnet *);
225 
226 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
227 
228 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
229 int if_cloners_count;
230 
231 struct rwlock if_cloners_lock = RWLOCK_INITIALIZER("clonerlock");
232 
233 /* hooks should only be added, deleted, and run from a process context */
234 struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE);
235 void	if_hooks_run(struct task_list *);
236 
237 int	ifq_congestion;
238 
239 int		 netisr;
240 
241 #define	NET_TASKQ	1
242 struct taskq	*nettqmp[NET_TASKQ];
243 
244 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
245 
246 /*
247  * Serialize socket operations to ensure no new sleeping points
248  * are introduced in IP output paths.
249  */
250 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
251 
252 /*
253  * Network interface utility routines.
254  */
255 void
256 ifinit(void)
257 {
258 	unsigned int	i;
259 
260 	/*
261 	 * most machines boot with 4 or 5 interfaces, so size the initial map
262 	 * to accomodate this
263 	 */
264 	if_idxmap_init(8);
265 
266 	for (i = 0; i < NET_TASKQ; i++) {
267 		nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
268 		if (nettqmp[i] == NULL)
269 			panic("unable to create network taskq %d", i);
270 	}
271 }
272 
273 static struct if_idxmap if_idxmap = {
274 	0,
275 	0,
276 	SRP_INITIALIZER()
277 };
278 
279 struct srp_gc if_ifp_gc = SRP_GC_INITIALIZER(if_ifp_dtor, NULL);
280 struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
281 
282 struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
283 
284 void
285 if_idxmap_init(unsigned int limit)
286 {
287 	struct if_map *if_map;
288 	struct srp *map;
289 	unsigned int i;
290 
291 	if_idxmap.serial = 1; /* skip ifidx 0 so it can return NULL */
292 
293 	if_map = malloc(sizeof(*if_map) + limit * sizeof(*map),
294 	    M_IFADDR, M_WAITOK);
295 
296 	if_map->limit = limit;
297 	map = (struct srp *)(if_map + 1);
298 	for (i = 0; i < limit; i++)
299 		srp_init(&map[i]);
300 
301 	/* this is called early so there's nothing to race with */
302 	srp_update_locked(&if_map_gc, &if_idxmap.map, if_map);
303 }
304 
305 void
306 if_idxmap_insert(struct ifnet *ifp)
307 {
308 	struct if_map *if_map;
309 	struct srp *map;
310 	unsigned int index, i;
311 
312 	refcnt_init(&ifp->if_refcnt);
313 
314 	/* the kernel lock guarantees serialised modifications to if_idxmap */
315 	KERNEL_ASSERT_LOCKED();
316 
317 	if (++if_idxmap.count > USHRT_MAX)
318 		panic("too many interfaces");
319 
320 	if_map = srp_get_locked(&if_idxmap.map);
321 	map = (struct srp *)(if_map + 1);
322 
323 	index = if_idxmap.serial++ & USHRT_MAX;
324 
325 	if (index >= if_map->limit) {
326 		struct if_map *nif_map;
327 		struct srp *nmap;
328 		unsigned int nlimit;
329 		struct ifnet *nifp;
330 
331 		nlimit = if_map->limit * 2;
332 		nif_map = malloc(sizeof(*nif_map) + nlimit * sizeof(*nmap),
333 		    M_IFADDR, M_WAITOK);
334 		nmap = (struct srp *)(nif_map + 1);
335 
336 		nif_map->limit = nlimit;
337 		for (i = 0; i < if_map->limit; i++) {
338 			srp_init(&nmap[i]);
339 			nifp = srp_get_locked(&map[i]);
340 			if (nifp != NULL) {
341 				srp_update_locked(&if_ifp_gc, &nmap[i],
342 				    if_ref(nifp));
343 			}
344 		}
345 
346 		while (i < nlimit) {
347 			srp_init(&nmap[i]);
348 			i++;
349 		}
350 
351 		srp_update_locked(&if_map_gc, &if_idxmap.map, nif_map);
352 		if_map = nif_map;
353 		map = nmap;
354 	}
355 
356 	/* pick the next free index */
357 	for (i = 0; i < USHRT_MAX; i++) {
358 		if (index != 0 && srp_get_locked(&map[index]) == NULL)
359 			break;
360 
361 		index = if_idxmap.serial++ & USHRT_MAX;
362 	}
363 
364 	/* commit */
365 	ifp->if_index = index;
366 	srp_update_locked(&if_ifp_gc, &map[index], if_ref(ifp));
367 }
368 
369 void
370 if_idxmap_remove(struct ifnet *ifp)
371 {
372 	struct if_map *if_map;
373 	struct srp *map;
374 	unsigned int index;
375 
376 	index = ifp->if_index;
377 
378 	/* the kernel lock guarantees serialised modifications to if_idxmap */
379 	KERNEL_ASSERT_LOCKED();
380 
381 	if_map = srp_get_locked(&if_idxmap.map);
382 	KASSERT(index < if_map->limit);
383 
384 	map = (struct srp *)(if_map + 1);
385 	KASSERT(ifp == (struct ifnet *)srp_get_locked(&map[index]));
386 
387 	srp_update_locked(&if_ifp_gc, &map[index], NULL);
388 	if_idxmap.count--;
389 	/* end of if_idxmap modifications */
390 }
391 
392 void
393 if_ifp_dtor(void *null, void *ifp)
394 {
395 	if_put(ifp);
396 }
397 
398 void
399 if_map_dtor(void *null, void *m)
400 {
401 	struct if_map *if_map = m;
402 	struct srp *map = (struct srp *)(if_map + 1);
403 	unsigned int i;
404 
405 	/*
406 	 * dont need to serialize the use of update_locked since this is
407 	 * the last reference to this map. there's nothing to race against.
408 	 */
409 	for (i = 0; i < if_map->limit; i++)
410 		srp_update_locked(&if_ifp_gc, &map[i], NULL);
411 
412 	free(if_map, M_IFADDR, sizeof(*if_map) + if_map->limit * sizeof(*map));
413 }
414 
415 /*
416  * Attach an interface to the
417  * list of "active" interfaces.
418  */
419 void
420 if_attachsetup(struct ifnet *ifp)
421 {
422 	unsigned long ifidx;
423 
424 	NET_ASSERT_LOCKED();
425 
426 	if_addgroup(ifp, IFG_ALL);
427 
428 	if_attachdomain(ifp);
429 #if NPF > 0
430 	pfi_attach_ifnet(ifp);
431 #endif
432 
433 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
434 	if_slowtimo(ifp);
435 
436 	if_idxmap_insert(ifp);
437 	KASSERT(if_get(0) == NULL);
438 
439 	ifidx = ifp->if_index;
440 
441 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
442 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
443 
444 	/* Announce the interface. */
445 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
446 }
447 
448 /*
449  * Allocate the link level name for the specified interface.  This
450  * is an attachment helper.  It must be called after ifp->if_addrlen
451  * is initialized, which may not be the case when if_attach() is
452  * called.
453  */
454 void
455 if_alloc_sadl(struct ifnet *ifp)
456 {
457 	unsigned int socksize;
458 	int namelen, masklen;
459 	struct sockaddr_dl *sdl;
460 
461 	/*
462 	 * If the interface already has a link name, release it
463 	 * now.  This is useful for interfaces that can change
464 	 * link types, and thus switch link names often.
465 	 */
466 	if_free_sadl(ifp);
467 
468 	namelen = strlen(ifp->if_xname);
469 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
470 	socksize = masklen + ifp->if_addrlen;
471 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
472 	if (socksize < sizeof(*sdl))
473 		socksize = sizeof(*sdl);
474 	socksize = ROUNDUP(socksize);
475 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
476 	sdl->sdl_len = socksize;
477 	sdl->sdl_family = AF_LINK;
478 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
479 	sdl->sdl_nlen = namelen;
480 	sdl->sdl_alen = ifp->if_addrlen;
481 	sdl->sdl_index = ifp->if_index;
482 	sdl->sdl_type = ifp->if_type;
483 	ifp->if_sadl = sdl;
484 }
485 
486 /*
487  * Free the link level name for the specified interface.  This is
488  * a detach helper.  This is called from if_detach() or from
489  * link layer type specific detach functions.
490  */
491 void
492 if_free_sadl(struct ifnet *ifp)
493 {
494 	if (ifp->if_sadl == NULL)
495 		return;
496 
497 	free(ifp->if_sadl, M_IFADDR, ifp->if_sadl->sdl_len);
498 	ifp->if_sadl = NULL;
499 }
500 
501 void
502 if_attachdomain(struct ifnet *ifp)
503 {
504 	struct domain *dp;
505 	int i, s;
506 
507 	s = splnet();
508 
509 	/* address family dependent data region */
510 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
511 	for (i = 0; (dp = domains[i]) != NULL; i++) {
512 		if (dp->dom_ifattach)
513 			ifp->if_afdata[dp->dom_family] =
514 			    (*dp->dom_ifattach)(ifp);
515 	}
516 
517 	splx(s);
518 }
519 
520 void
521 if_attachhead(struct ifnet *ifp)
522 {
523 	if_attach_common(ifp);
524 	NET_LOCK();
525 	TAILQ_INSERT_HEAD(&ifnet, ifp, if_list);
526 	if_attachsetup(ifp);
527 	NET_UNLOCK();
528 }
529 
530 void
531 if_attach(struct ifnet *ifp)
532 {
533 	if_attach_common(ifp);
534 	NET_LOCK();
535 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
536 	if_attachsetup(ifp);
537 	NET_UNLOCK();
538 }
539 
540 void
541 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
542 {
543 	struct ifqueue **map;
544 	struct ifqueue *ifq;
545 	int i;
546 
547 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
548 	KASSERT(nqs != 0);
549 
550 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
551 
552 	ifp->if_snd.ifq_softc = NULL;
553 	map[0] = &ifp->if_snd;
554 
555 	for (i = 1; i < nqs; i++) {
556 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
557 		ifq_set_maxlen(ifq, ifp->if_snd.ifq_maxlen);
558 		ifq_init(ifq, ifp, i);
559 		map[i] = ifq;
560 	}
561 
562 	ifp->if_ifqs = map;
563 	ifp->if_nifqs = nqs;
564 }
565 
566 void
567 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
568 {
569 	struct ifiqueue **map;
570 	struct ifiqueue *ifiq;
571 	unsigned int i;
572 
573 	KASSERT(niqs != 0);
574 
575 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
576 
577 	ifp->if_rcv.ifiq_softc = NULL;
578 	map[0] = &ifp->if_rcv;
579 
580 	for (i = 1; i < niqs; i++) {
581 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
582 		ifiq_init(ifiq, ifp, i);
583 		map[i] = ifiq;
584 	}
585 
586 	ifp->if_iqs = map;
587 	ifp->if_niqs = niqs;
588 }
589 
590 void
591 if_attach_common(struct ifnet *ifp)
592 {
593 	KASSERT(ifp->if_ioctl != NULL);
594 
595 	TAILQ_INIT(&ifp->if_addrlist);
596 	TAILQ_INIT(&ifp->if_maddrlist);
597 	TAILQ_INIT(&ifp->if_groups);
598 
599 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
600 		KASSERTMSG(ifp->if_qstart == NULL,
601 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
602 		ifp->if_qstart = if_qstart_compat;
603 	} else {
604 		KASSERTMSG(ifp->if_start == NULL,
605 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
606 		KASSERTMSG(ifp->if_qstart != NULL,
607 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
608 	}
609 
610 	ifq_init(&ifp->if_snd, ifp, 0);
611 
612 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
613 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
614 	ifp->if_nifqs = 1;
615 	if (ifp->if_txmit == 0)
616 		ifp->if_txmit = IF_TXMIT_DEFAULT;
617 
618 	ifiq_init(&ifp->if_rcv, ifp, 0);
619 
620 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
621 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
622 	ifp->if_niqs = 1;
623 
624 	TAILQ_INIT(&ifp->if_addrhooks);
625 	TAILQ_INIT(&ifp->if_linkstatehooks);
626 	TAILQ_INIT(&ifp->if_detachhooks);
627 
628 	if (ifp->if_rtrequest == NULL)
629 		ifp->if_rtrequest = if_rtrequest_dummy;
630 	if (ifp->if_enqueue == NULL)
631 		ifp->if_enqueue = if_enqueue_ifq;
632 	ifp->if_llprio = IFQ_DEFPRIO;
633 }
634 
635 void
636 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
637 {
638 	/*
639 	 * only switch the ifq_ops on the first ifq on an interface.
640 	 *
641 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
642 	 * works on a single ifq. because the code uses the ifq_ops
643 	 * on the first ifq (if_snd) to select a queue for an mbuf,
644 	 * by switching only the first one we change both the algorithm
645 	 * and force the routing of all new packets to it.
646 	 */
647 	ifq_attach(&ifp->if_snd, newops, args);
648 }
649 
650 void
651 if_start(struct ifnet *ifp)
652 {
653 	KASSERT(ifp->if_qstart == if_qstart_compat);
654 	if_qstart_compat(&ifp->if_snd);
655 }
656 void
657 if_qstart_compat(struct ifqueue *ifq)
658 {
659 	struct ifnet *ifp = ifq->ifq_if;
660 	int s;
661 
662 	/*
663 	 * the stack assumes that an interface can have multiple
664 	 * transmit rings, but a lot of drivers are still written
665 	 * so that interfaces and send rings have a 1:1 mapping.
666 	 * this provides compatability between the stack and the older
667 	 * drivers by translating from the only queue they have
668 	 * (ifp->if_snd) back to the interface and calling if_start.
669 	 */
670 
671 	KERNEL_LOCK();
672 	s = splnet();
673 	(*ifp->if_start)(ifp);
674 	splx(s);
675 	KERNEL_UNLOCK();
676 }
677 
678 int
679 if_enqueue(struct ifnet *ifp, struct mbuf *m)
680 {
681 	CLR(m->m_pkthdr.csum_flags, M_TIMESTAMP);
682 
683 #if NPF > 0
684 	if (m->m_pkthdr.pf.delay > 0)
685 		return (pf_delay_pkt(m, ifp->if_index));
686 #endif
687 
688 #if NBRIDGE > 0
689 	if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
690 		int error;
691 
692 		error = bridge_enqueue(ifp, m);
693 		return (error);
694 	}
695 #endif
696 
697 #if NPF > 0
698 	pf_pkt_addr_changed(m);
699 #endif	/* NPF > 0 */
700 
701 	return ((*ifp->if_enqueue)(ifp, m));
702 }
703 
704 int
705 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
706 {
707 	struct ifqueue *ifq = &ifp->if_snd;
708 	int error;
709 
710 	if (ifp->if_nifqs > 1) {
711 		unsigned int idx;
712 
713 		/*
714 		 * use the operations on the first ifq to pick which of
715 		 * the array gets this mbuf.
716 		 */
717 
718 		idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
719 		ifq = ifp->if_ifqs[idx];
720 	}
721 
722 	error = ifq_enqueue(ifq, m);
723 	if (error)
724 		return (error);
725 
726 	ifq_start(ifq);
727 
728 	return (0);
729 }
730 
731 void
732 if_input(struct ifnet *ifp, struct mbuf_list *ml)
733 {
734 	ifiq_input(&ifp->if_rcv, ml);
735 }
736 
737 int
738 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
739 {
740 	int keepflags;
741 
742 #if NBPFILTER > 0
743 	/*
744 	 * Only send packets to bpf if they are destinated to local
745 	 * addresses.
746 	 *
747 	 * if_input_local() is also called for SIMPLEX interfaces to
748 	 * duplicate packets for local use.  But don't dup them to bpf.
749 	 */
750 	if (ifp->if_flags & IFF_LOOPBACK) {
751 		caddr_t if_bpf = ifp->if_bpf;
752 
753 		if (if_bpf)
754 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
755 	}
756 #endif
757 	keepflags = m->m_flags & (M_BCAST|M_MCAST);
758 	m_resethdr(m);
759 	m->m_flags |= M_LOOP | keepflags;
760 	m->m_pkthdr.ph_ifidx = ifp->if_index;
761 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
762 
763 	ifp->if_opackets++;
764 	ifp->if_obytes += m->m_pkthdr.len;
765 
766 	ifp->if_ipackets++;
767 	ifp->if_ibytes += m->m_pkthdr.len;
768 
769 	switch (af) {
770 	case AF_INET:
771 		ipv4_input(ifp, m);
772 		break;
773 #ifdef INET6
774 	case AF_INET6:
775 		ipv6_input(ifp, m);
776 		break;
777 #endif /* INET6 */
778 #ifdef MPLS
779 	case AF_MPLS:
780 		mpls_input(ifp, m);
781 		break;
782 #endif /* MPLS */
783 	default:
784 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
785 		m_freem(m);
786 		return (EAFNOSUPPORT);
787 	}
788 
789 	return (0);
790 }
791 
792 int
793 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
794 {
795 	struct ifiqueue *ifiq;
796 	unsigned int flow = 0;
797 
798 	m->m_pkthdr.ph_family = af;
799 	m->m_pkthdr.ph_ifidx = ifp->if_index;
800 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
801 
802 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
803 		flow = m->m_pkthdr.ph_flowid;
804 
805 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
806 
807 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
808 }
809 
810 void
811 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
812 {
813 	struct mbuf *m;
814 
815 	if (ml_empty(ml))
816 		return;
817 
818 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
819 		enqueue_randomness(ml_len(ml) ^ (uintptr_t)MBUF_LIST_FIRST(ml));
820 
821 	/*
822 	 * We grab the NET_LOCK() before processing any packet to
823 	 * ensure there's no contention on the routing table lock.
824 	 *
825 	 * Without it we could race with a userland thread to insert
826 	 * a L2 entry in ip{6,}_output().  Such race would result in
827 	 * one of the threads sleeping *inside* the IP output path.
828 	 *
829 	 * Since we have a NET_LOCK() we also use it to serialize access
830 	 * to PF globals, pipex globals, unicast and multicast addresses
831 	 * lists and the socket layer.
832 	 */
833 	NET_LOCK();
834 	while ((m = ml_dequeue(ml)) != NULL)
835 		(*ifp->if_input)(ifp, m);
836 	NET_UNLOCK();
837 }
838 
839 void
840 if_vinput(struct ifnet *ifp, struct mbuf *m)
841 {
842 #if NBPFILTER > 0
843 	caddr_t if_bpf;
844 #endif
845 
846 	m->m_pkthdr.ph_ifidx = ifp->if_index;
847 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
848 
849 	counters_pkt(ifp->if_counters,
850 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
851 
852 #if NBPFILTER > 0
853 	if_bpf = ifp->if_bpf;
854 	if (if_bpf) {
855 		if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN)) {
856 			m_freem(m);
857 			return;
858 		}
859 	}
860 #endif
861 
862 	(*ifp->if_input)(ifp, m);
863 }
864 
865 void
866 if_netisr(void *unused)
867 {
868 	int n, t = 0;
869 
870 	NET_LOCK();
871 
872 	while ((n = netisr) != 0) {
873 		/* Like sched_pause() but with a rwlock dance. */
874 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
875 			NET_UNLOCK();
876 			yield();
877 			NET_LOCK();
878 		}
879 
880 		atomic_clearbits_int(&netisr, n);
881 
882 #if NETHER > 0
883 		if (n & (1 << NETISR_ARP)) {
884 			KERNEL_LOCK();
885 			arpintr();
886 			KERNEL_UNLOCK();
887 		}
888 #endif
889 #if NPPP > 0
890 		if (n & (1 << NETISR_PPP)) {
891 			KERNEL_LOCK();
892 			pppintr();
893 			KERNEL_UNLOCK();
894 		}
895 #endif
896 #if NBRIDGE > 0
897 		if (n & (1 << NETISR_BRIDGE))
898 			bridgeintr();
899 #endif
900 #if NSWITCH > 0
901 		if (n & (1 << NETISR_SWITCH)) {
902 			KERNEL_LOCK();
903 			switchintr();
904 			KERNEL_UNLOCK();
905 		}
906 #endif
907 		t |= n;
908 	}
909 
910 #if NPFSYNC > 0
911 	if (t & (1 << NETISR_PFSYNC)) {
912 		KERNEL_LOCK();
913 		pfsyncintr();
914 		KERNEL_UNLOCK();
915 	}
916 #endif
917 
918 	NET_UNLOCK();
919 }
920 
921 void
922 if_hooks_run(struct task_list *hooks)
923 {
924 	struct task *t, *nt;
925 	struct task cursor = { .t_func = NULL };
926 	void (*func)(void *);
927 	void *arg;
928 
929 	mtx_enter(&if_hooks_mtx);
930 	for (t = TAILQ_FIRST(hooks); t != NULL; t = nt) {
931 		if (t->t_func == NULL) { /* skip cursors */
932 			nt = TAILQ_NEXT(t, t_entry);
933 			continue;
934 		}
935 		func = t->t_func;
936 		arg = t->t_arg;
937 
938 		TAILQ_INSERT_AFTER(hooks, t, &cursor, t_entry);
939 		mtx_leave(&if_hooks_mtx);
940 
941 		(*func)(arg);
942 
943 		mtx_enter(&if_hooks_mtx);
944 		nt = TAILQ_NEXT(&cursor, t_entry); /* avoid _Q_INVALIDATE */
945 		TAILQ_REMOVE(hooks, &cursor, t_entry);
946 	}
947 	mtx_leave(&if_hooks_mtx);
948 }
949 
950 void
951 if_remove(struct ifnet *ifp)
952 {
953 	/* Remove the interface from the list of all interfaces. */
954 	NET_LOCK();
955 	TAILQ_REMOVE(&ifnet, ifp, if_list);
956 	NET_UNLOCK();
957 
958 	/* Remove the interface from the interface index map. */
959 	if_idxmap_remove(ifp);
960 
961 	/* Sleep until the last reference is released. */
962 	refcnt_finalize(&ifp->if_refcnt, "ifrm");
963 }
964 
965 void
966 if_deactivate(struct ifnet *ifp)
967 {
968 	/*
969 	 * Call detach hooks from head to tail.  To make sure detach
970 	 * hooks are executed in the reverse order they were added, all
971 	 * the hooks have to be added to the head!
972 	 */
973 
974 	NET_LOCK();
975 	if_hooks_run(&ifp->if_detachhooks);
976 	NET_UNLOCK();
977 }
978 
979 void
980 if_detachhook_add(struct ifnet *ifp, struct task *t)
981 {
982 	mtx_enter(&if_hooks_mtx);
983 	TAILQ_INSERT_HEAD(&ifp->if_detachhooks, t, t_entry);
984 	mtx_leave(&if_hooks_mtx);
985 }
986 
987 void
988 if_detachhook_del(struct ifnet *ifp, struct task *t)
989 {
990 	mtx_enter(&if_hooks_mtx);
991 	TAILQ_REMOVE(&ifp->if_detachhooks, t, t_entry);
992 	mtx_leave(&if_hooks_mtx);
993 }
994 
995 /*
996  * Detach an interface from everything in the kernel.  Also deallocate
997  * private resources.
998  */
999 void
1000 if_detach(struct ifnet *ifp)
1001 {
1002 	struct ifaddr *ifa;
1003 	struct ifg_list *ifg;
1004 	struct domain *dp;
1005 	int i, s;
1006 
1007 	/* Undo pseudo-driver changes. */
1008 	if_deactivate(ifp);
1009 
1010 	/* Other CPUs must not have a reference before we start destroying. */
1011 	if_remove(ifp);
1012 
1013 	ifq_clr_oactive(&ifp->if_snd);
1014 
1015 #if NBPFILTER > 0
1016 	bpfdetach(ifp);
1017 #endif
1018 
1019 	NET_LOCK();
1020 	s = splnet();
1021 	ifp->if_qstart = if_detached_qstart;
1022 	ifp->if_ioctl = if_detached_ioctl;
1023 	ifp->if_watchdog = NULL;
1024 
1025 	/* Remove the watchdog timeout & task */
1026 	timeout_del(&ifp->if_slowtimo);
1027 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1028 
1029 	/* Remove the link state task */
1030 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1031 
1032 	rti_delete(ifp);
1033 #if NETHER > 0 && defined(NFSCLIENT)
1034 	if (ifp->if_index == revarp_ifidx)
1035 		revarp_ifidx = 0;
1036 #endif
1037 #ifdef MROUTING
1038 	vif_delete(ifp);
1039 #endif
1040 	in_ifdetach(ifp);
1041 #ifdef INET6
1042 	in6_ifdetach(ifp);
1043 #endif
1044 #if NPF > 0
1045 	pfi_detach_ifnet(ifp);
1046 #endif
1047 
1048 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1049 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1050 
1051 	if_free_sadl(ifp);
1052 
1053 	/* We should not have any address left at this point. */
1054 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1055 #ifdef DIAGNOSTIC
1056 		printf("%s: address list non empty\n", ifp->if_xname);
1057 #endif
1058 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1059 			ifa_del(ifp, ifa);
1060 			ifa->ifa_ifp = NULL;
1061 			ifafree(ifa);
1062 		}
1063 	}
1064 
1065 	KASSERT(TAILQ_EMPTY(&ifp->if_addrhooks));
1066 	KASSERT(TAILQ_EMPTY(&ifp->if_linkstatehooks));
1067 	KASSERT(TAILQ_EMPTY(&ifp->if_detachhooks));
1068 
1069 	for (i = 0; (dp = domains[i]) != NULL; i++) {
1070 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
1071 			(*dp->dom_ifdetach)(ifp,
1072 			    ifp->if_afdata[dp->dom_family]);
1073 	}
1074 
1075 	/* Announce that the interface is gone. */
1076 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1077 	splx(s);
1078 	NET_UNLOCK();
1079 
1080 	if (ifp->if_counters != NULL)
1081 		if_counters_free(ifp);
1082 
1083 	for (i = 0; i < ifp->if_nifqs; i++)
1084 		ifq_destroy(ifp->if_ifqs[i]);
1085 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1086 		for (i = 1; i < ifp->if_nifqs; i++) {
1087 			free(ifp->if_ifqs[i], M_DEVBUF,
1088 			    sizeof(struct ifqueue));
1089 		}
1090 		free(ifp->if_ifqs, M_DEVBUF,
1091 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1092 	}
1093 
1094 	for (i = 0; i < ifp->if_niqs; i++)
1095 		ifiq_destroy(ifp->if_iqs[i]);
1096 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1097 		for (i = 1; i < ifp->if_niqs; i++) {
1098 			free(ifp->if_iqs[i], M_DEVBUF,
1099 			    sizeof(struct ifiqueue));
1100 		}
1101 		free(ifp->if_iqs, M_DEVBUF,
1102 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1103 	}
1104 }
1105 
1106 /*
1107  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1108  */
1109 int
1110 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1111 {
1112 	struct ifnet *ifp;
1113 	int connected = 0;
1114 
1115 	ifp = if_get(ifidx);
1116 	if (ifp == NULL)
1117 		return (0);
1118 
1119 	if (ifp0->if_index == ifp->if_index)
1120 		connected = 1;
1121 
1122 #if NBRIDGE > 0
1123 	if (ifp0->if_bridgeidx != 0 && ifp0->if_bridgeidx == ifp->if_bridgeidx)
1124 		connected = 1;
1125 #endif
1126 #if NCARP > 0
1127 	if ((ifp0->if_type == IFT_CARP &&
1128 	    ifp0->if_carpdevidx == ifp->if_index) ||
1129 	    (ifp->if_type == IFT_CARP && ifp->if_carpdevidx == ifp0->if_index))
1130 		connected = 1;
1131 #endif
1132 
1133 	if_put(ifp);
1134 	return (connected);
1135 }
1136 
1137 /*
1138  * Create a clone network interface.
1139  */
1140 int
1141 if_clone_create(const char *name, int rdomain)
1142 {
1143 	struct if_clone *ifc;
1144 	struct ifnet *ifp;
1145 	int unit, ret;
1146 
1147 	ifc = if_clone_lookup(name, &unit);
1148 	if (ifc == NULL)
1149 		return (EINVAL);
1150 
1151 	rw_enter_write(&if_cloners_lock);
1152 
1153 	if ((ifp = if_unit(name)) != NULL) {
1154 		ret = EEXIST;
1155 		goto unlock;
1156 	}
1157 
1158 	ret = (*ifc->ifc_create)(ifc, unit);
1159 
1160 	if (ret != 0 || (ifp = if_unit(name)) == NULL)
1161 		goto unlock;
1162 
1163 	NET_LOCK();
1164 	if_addgroup(ifp, ifc->ifc_name);
1165 	if (rdomain != 0)
1166 		if_setrdomain(ifp, rdomain);
1167 	NET_UNLOCK();
1168 unlock:
1169 	rw_exit_write(&if_cloners_lock);
1170 	if_put(ifp);
1171 
1172 	return (ret);
1173 }
1174 
1175 /*
1176  * Destroy a clone network interface.
1177  */
1178 int
1179 if_clone_destroy(const char *name)
1180 {
1181 	struct if_clone *ifc;
1182 	struct ifnet *ifp;
1183 	int ret;
1184 
1185 	ifc = if_clone_lookup(name, NULL);
1186 	if (ifc == NULL)
1187 		return (EINVAL);
1188 
1189 	if (ifc->ifc_destroy == NULL)
1190 		return (EOPNOTSUPP);
1191 
1192 	rw_enter_write(&if_cloners_lock);
1193 
1194 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1195 		if (strcmp(ifp->if_xname, name) == 0)
1196 			break;
1197 	}
1198 	if (ifp == NULL) {
1199 		rw_exit_write(&if_cloners_lock);
1200 		return (ENXIO);
1201 	}
1202 
1203 	NET_LOCK();
1204 	if (ifp->if_flags & IFF_UP) {
1205 		int s;
1206 		s = splnet();
1207 		if_down(ifp);
1208 		splx(s);
1209 	}
1210 	NET_UNLOCK();
1211 	ret = (*ifc->ifc_destroy)(ifp);
1212 
1213 	rw_exit_write(&if_cloners_lock);
1214 
1215 	return (ret);
1216 }
1217 
1218 /*
1219  * Look up a network interface cloner.
1220  */
1221 struct if_clone *
1222 if_clone_lookup(const char *name, int *unitp)
1223 {
1224 	struct if_clone *ifc;
1225 	const char *cp;
1226 	int unit;
1227 
1228 	/* separate interface name from unit */
1229 	for (cp = name;
1230 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1231 	    cp++)
1232 		continue;
1233 
1234 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1235 		return (NULL);	/* No name or unit number */
1236 
1237 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1238 		return (NULL);	/* unit number 0 padded */
1239 
1240 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1241 		if (strlen(ifc->ifc_name) == cp - name &&
1242 		    !strncmp(name, ifc->ifc_name, cp - name))
1243 			break;
1244 	}
1245 
1246 	if (ifc == NULL)
1247 		return (NULL);
1248 
1249 	unit = 0;
1250 	while (cp - name < IFNAMSIZ && *cp) {
1251 		if (*cp < '0' || *cp > '9' ||
1252 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1253 			/* Bogus unit number. */
1254 			return (NULL);
1255 		}
1256 		unit = (unit * 10) + (*cp++ - '0');
1257 	}
1258 
1259 	if (unitp != NULL)
1260 		*unitp = unit;
1261 	return (ifc);
1262 }
1263 
1264 /*
1265  * Register a network interface cloner.
1266  */
1267 void
1268 if_clone_attach(struct if_clone *ifc)
1269 {
1270 	/*
1271 	 * we are called at kernel boot by main(), when pseudo devices are
1272 	 * being attached. The main() is the only guy which may alter the
1273 	 * if_cloners. While system is running and main() is done with
1274 	 * initialization, the if_cloners becomes immutable.
1275 	 */
1276 	KASSERT(pdevinit_done == 0);
1277 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1278 	if_cloners_count++;
1279 }
1280 
1281 /*
1282  * Provide list of interface cloners to userspace.
1283  */
1284 int
1285 if_clone_list(struct if_clonereq *ifcr)
1286 {
1287 	char outbuf[IFNAMSIZ], *dst;
1288 	struct if_clone *ifc;
1289 	int count, error = 0;
1290 
1291 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1292 		/* Just asking how many there are. */
1293 		ifcr->ifcr_total = if_cloners_count;
1294 		return (0);
1295 	}
1296 
1297 	if (ifcr->ifcr_count < 0)
1298 		return (EINVAL);
1299 
1300 	ifcr->ifcr_total = if_cloners_count;
1301 	count = MIN(if_cloners_count, ifcr->ifcr_count);
1302 
1303 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1304 		if (count == 0)
1305 			break;
1306 		bzero(outbuf, sizeof outbuf);
1307 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1308 		error = copyout(outbuf, dst, IFNAMSIZ);
1309 		if (error)
1310 			break;
1311 		count--;
1312 		dst += IFNAMSIZ;
1313 	}
1314 
1315 	return (error);
1316 }
1317 
1318 /*
1319  * set queue congestion marker
1320  */
1321 void
1322 if_congestion(void)
1323 {
1324 	extern int ticks;
1325 
1326 	ifq_congestion = ticks;
1327 }
1328 
1329 int
1330 if_congested(void)
1331 {
1332 	extern int ticks;
1333 	int diff;
1334 
1335 	diff = ticks - ifq_congestion;
1336 	if (diff < 0) {
1337 		ifq_congestion = ticks - hz;
1338 		return (0);
1339 	}
1340 
1341 	return (diff <= (hz / 100));
1342 }
1343 
1344 #define	equal(a1, a2)	\
1345 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1346 	(a1)->sa_len) == 0)
1347 
1348 /*
1349  * Locate an interface based on a complete address.
1350  */
1351 struct ifaddr *
1352 ifa_ifwithaddr(struct sockaddr *addr, u_int rtableid)
1353 {
1354 	struct ifnet *ifp;
1355 	struct ifaddr *ifa;
1356 	u_int rdomain;
1357 
1358 	rdomain = rtable_l2(rtableid);
1359 	KERNEL_LOCK();
1360 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1361 		if (ifp->if_rdomain != rdomain)
1362 			continue;
1363 
1364 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1365 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1366 				continue;
1367 
1368 			if (equal(addr, ifa->ifa_addr)) {
1369 				KERNEL_UNLOCK();
1370 				return (ifa);
1371 			}
1372 		}
1373 	}
1374 	KERNEL_UNLOCK();
1375 	return (NULL);
1376 }
1377 
1378 /*
1379  * Locate the point to point interface with a given destination address.
1380  */
1381 struct ifaddr *
1382 ifa_ifwithdstaddr(struct sockaddr *addr, u_int rdomain)
1383 {
1384 	struct ifnet *ifp;
1385 	struct ifaddr *ifa;
1386 
1387 	rdomain = rtable_l2(rdomain);
1388 	KERNEL_LOCK();
1389 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1390 		if (ifp->if_rdomain != rdomain)
1391 			continue;
1392 		if (ifp->if_flags & IFF_POINTOPOINT) {
1393 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1394 				if (ifa->ifa_addr->sa_family !=
1395 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1396 					continue;
1397 				if (equal(addr, ifa->ifa_dstaddr)) {
1398 					KERNEL_UNLOCK();
1399 					return (ifa);
1400 				}
1401 			}
1402 		}
1403 	}
1404 	KERNEL_UNLOCK();
1405 	return (NULL);
1406 }
1407 
1408 /*
1409  * Find an interface address specific to an interface best matching
1410  * a given address.
1411  */
1412 struct ifaddr *
1413 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1414 {
1415 	struct ifaddr *ifa;
1416 	char *cp, *cp2, *cp3;
1417 	char *cplim;
1418 	struct ifaddr *ifa_maybe = NULL;
1419 	u_int af = addr->sa_family;
1420 
1421 	if (af >= AF_MAX)
1422 		return (NULL);
1423 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1424 		if (ifa->ifa_addr->sa_family != af)
1425 			continue;
1426 		if (ifa_maybe == NULL)
1427 			ifa_maybe = ifa;
1428 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1429 			if (equal(addr, ifa->ifa_addr) ||
1430 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1431 				return (ifa);
1432 			continue;
1433 		}
1434 		cp = addr->sa_data;
1435 		cp2 = ifa->ifa_addr->sa_data;
1436 		cp3 = ifa->ifa_netmask->sa_data;
1437 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1438 		for (; cp3 < cplim; cp3++)
1439 			if ((*cp++ ^ *cp2++) & *cp3)
1440 				break;
1441 		if (cp3 == cplim)
1442 			return (ifa);
1443 	}
1444 	return (ifa_maybe);
1445 }
1446 
1447 void
1448 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1449 {
1450 }
1451 
1452 /*
1453  * Default action when installing a local route on a point-to-point
1454  * interface.
1455  */
1456 void
1457 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1458 {
1459 	struct ifnet *lo0ifp;
1460 	struct ifaddr *ifa, *lo0ifa;
1461 
1462 	switch (req) {
1463 	case RTM_ADD:
1464 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1465 			break;
1466 
1467 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1468 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1469 			    rt_key(rt)->sa_len) == 0)
1470 				break;
1471 		}
1472 
1473 		if (ifa == NULL)
1474 			break;
1475 
1476 		KASSERT(ifa == rt->rt_ifa);
1477 
1478 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1479 		KASSERT(lo0ifp != NULL);
1480 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1481 			if (lo0ifa->ifa_addr->sa_family ==
1482 			    ifa->ifa_addr->sa_family)
1483 				break;
1484 		}
1485 		if_put(lo0ifp);
1486 
1487 		if (lo0ifa == NULL)
1488 			break;
1489 
1490 		rt->rt_flags &= ~RTF_LLINFO;
1491 		break;
1492 	case RTM_DELETE:
1493 	case RTM_RESOLVE:
1494 	default:
1495 		break;
1496 	}
1497 }
1498 
1499 
1500 /*
1501  * Bring down all interfaces
1502  */
1503 void
1504 if_downall(void)
1505 {
1506 	struct ifreq ifrq;	/* XXX only partly built */
1507 	struct ifnet *ifp;
1508 
1509 	NET_LOCK();
1510 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1511 		if ((ifp->if_flags & IFF_UP) == 0)
1512 			continue;
1513 		if_down(ifp);
1514 		ifrq.ifr_flags = ifp->if_flags;
1515 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1516 	}
1517 	NET_UNLOCK();
1518 }
1519 
1520 /*
1521  * Mark an interface down and notify protocols of
1522  * the transition.
1523  */
1524 void
1525 if_down(struct ifnet *ifp)
1526 {
1527 	NET_ASSERT_LOCKED();
1528 
1529 	ifp->if_flags &= ~IFF_UP;
1530 	getmicrotime(&ifp->if_lastchange);
1531 	ifq_purge(&ifp->if_snd);
1532 
1533 	if_linkstate(ifp);
1534 }
1535 
1536 /*
1537  * Mark an interface up and notify protocols of
1538  * the transition.
1539  */
1540 void
1541 if_up(struct ifnet *ifp)
1542 {
1543 	NET_ASSERT_LOCKED();
1544 
1545 	ifp->if_flags |= IFF_UP;
1546 	getmicrotime(&ifp->if_lastchange);
1547 
1548 #ifdef INET6
1549 	/* Userland expects the kernel to set ::1 on default lo(4). */
1550 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1551 		in6_ifattach(ifp);
1552 #endif
1553 
1554 	if_linkstate(ifp);
1555 }
1556 
1557 /*
1558  * Notify userland, the routing table and hooks owner of
1559  * a link-state transition.
1560  */
1561 void
1562 if_linkstate_task(void *xifidx)
1563 {
1564 	unsigned int ifidx = (unsigned long)xifidx;
1565 	struct ifnet *ifp;
1566 
1567 	KERNEL_LOCK();
1568 	NET_LOCK();
1569 
1570 	ifp = if_get(ifidx);
1571 	if (ifp != NULL)
1572 		if_linkstate(ifp);
1573 	if_put(ifp);
1574 
1575 	NET_UNLOCK();
1576 	KERNEL_UNLOCK();
1577 }
1578 
1579 void
1580 if_linkstate(struct ifnet *ifp)
1581 {
1582 	NET_ASSERT_LOCKED();
1583 
1584 	rtm_ifchg(ifp);
1585 	rt_if_track(ifp);
1586 
1587 	if_hooks_run(&ifp->if_linkstatehooks);
1588 }
1589 
1590 void
1591 if_linkstatehook_add(struct ifnet *ifp, struct task *t)
1592 {
1593 	mtx_enter(&if_hooks_mtx);
1594 	TAILQ_INSERT_HEAD(&ifp->if_linkstatehooks, t, t_entry);
1595 	mtx_leave(&if_hooks_mtx);
1596 }
1597 
1598 void
1599 if_linkstatehook_del(struct ifnet *ifp, struct task *t)
1600 {
1601 	mtx_enter(&if_hooks_mtx);
1602 	TAILQ_REMOVE(&ifp->if_linkstatehooks, t, t_entry);
1603 	mtx_leave(&if_hooks_mtx);
1604 }
1605 
1606 /*
1607  * Schedule a link state change task.
1608  */
1609 void
1610 if_link_state_change(struct ifnet *ifp)
1611 {
1612 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1613 }
1614 
1615 /*
1616  * Handle interface watchdog timer routine.  Called
1617  * from softclock, we decrement timer (if set) and
1618  * call the appropriate interface routine on expiration.
1619  */
1620 void
1621 if_slowtimo(void *arg)
1622 {
1623 	struct ifnet *ifp = arg;
1624 	int s = splnet();
1625 
1626 	if (ifp->if_watchdog) {
1627 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1628 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1629 		timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO);
1630 	}
1631 	splx(s);
1632 }
1633 
1634 void
1635 if_watchdog_task(void *xifidx)
1636 {
1637 	unsigned int ifidx = (unsigned long)xifidx;
1638 	struct ifnet *ifp;
1639 	int s;
1640 
1641 	ifp = if_get(ifidx);
1642 	if (ifp == NULL)
1643 		return;
1644 
1645 	KERNEL_LOCK();
1646 	s = splnet();
1647 	if (ifp->if_watchdog)
1648 		(*ifp->if_watchdog)(ifp);
1649 	splx(s);
1650 	KERNEL_UNLOCK();
1651 
1652 	if_put(ifp);
1653 }
1654 
1655 /*
1656  * Map interface name to interface structure pointer.
1657  */
1658 struct ifnet *
1659 if_unit(const char *name)
1660 {
1661 	struct ifnet *ifp;
1662 
1663 	KERNEL_ASSERT_LOCKED();
1664 
1665 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1666 		if (strcmp(ifp->if_xname, name) == 0) {
1667 			if_ref(ifp);
1668 			return (ifp);
1669 		}
1670 	}
1671 
1672 	return (NULL);
1673 }
1674 
1675 /*
1676  * Map interface index to interface structure pointer.
1677  */
1678 struct ifnet *
1679 if_get(unsigned int index)
1680 {
1681 	struct srp_ref sr;
1682 	struct if_map *if_map;
1683 	struct srp *map;
1684 	struct ifnet *ifp = NULL;
1685 
1686 	if_map = srp_enter(&sr, &if_idxmap.map);
1687 	if (index < if_map->limit) {
1688 		map = (struct srp *)(if_map + 1);
1689 
1690 		ifp = srp_follow(&sr, &map[index]);
1691 		if (ifp != NULL) {
1692 			KASSERT(ifp->if_index == index);
1693 			if_ref(ifp);
1694 		}
1695 	}
1696 	srp_leave(&sr);
1697 
1698 	return (ifp);
1699 }
1700 
1701 struct ifnet *
1702 if_ref(struct ifnet *ifp)
1703 {
1704 	refcnt_take(&ifp->if_refcnt);
1705 
1706 	return (ifp);
1707 }
1708 
1709 void
1710 if_put(struct ifnet *ifp)
1711 {
1712 	if (ifp == NULL)
1713 		return;
1714 
1715 	refcnt_rele_wake(&ifp->if_refcnt);
1716 }
1717 
1718 int
1719 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1720 {
1721 	if (ifp->if_sadl == NULL)
1722 		return (EINVAL);
1723 
1724 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1725 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1726 
1727 	return (0);
1728 }
1729 
1730 int
1731 if_createrdomain(int rdomain, struct ifnet *ifp)
1732 {
1733 	int error;
1734 	struct ifnet *loifp;
1735 	char loifname[IFNAMSIZ];
1736 	unsigned int unit = rdomain;
1737 
1738 	if (!rtable_exists(rdomain) && (error = rtable_add(rdomain)) != 0)
1739 		return (error);
1740 	if (!rtable_empty(rdomain))
1741 		return (EEXIST);
1742 
1743 	/* Create rdomain including its loopback if with unit == rdomain */
1744 	snprintf(loifname, sizeof(loifname), "lo%u", unit);
1745 	error = if_clone_create(loifname, 0);
1746 	if ((loifp = if_unit(loifname)) == NULL)
1747 		return (ENXIO);
1748 	if (error && (ifp != loifp || error != EEXIST)) {
1749 		if_put(loifp);
1750 		return (error);
1751 	}
1752 
1753 	rtable_l2set(rdomain, rdomain, loifp->if_index);
1754 	loifp->if_rdomain = rdomain;
1755 	if_put(loifp);
1756 
1757 	return (0);
1758 }
1759 
1760 int
1761 if_setrdomain(struct ifnet *ifp, int rdomain)
1762 {
1763 	struct ifreq ifr;
1764 	int error, up = 0, s;
1765 
1766 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1767 		return (EINVAL);
1768 
1769 	if (rdomain != ifp->if_rdomain &&
1770 	    (ifp->if_flags & IFF_LOOPBACK) &&
1771 	    (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1772 		return (EPERM);
1773 
1774 	if (!rtable_exists(rdomain))
1775 		return (ESRCH);
1776 
1777 	/* make sure that the routing table is a real rdomain */
1778 	if (rdomain != rtable_l2(rdomain))
1779 		return (EINVAL);
1780 
1781 	if (rdomain != ifp->if_rdomain) {
1782 		s = splnet();
1783 		/*
1784 		 * We are tearing down the world.
1785 		 * Take down the IF so:
1786 		 * 1. everything that cares gets a message
1787 		 * 2. the automagic IPv6 bits are recreated
1788 		 */
1789 		if (ifp->if_flags & IFF_UP) {
1790 			up = 1;
1791 			if_down(ifp);
1792 		}
1793 		rti_delete(ifp);
1794 #ifdef MROUTING
1795 		vif_delete(ifp);
1796 #endif
1797 		in_ifdetach(ifp);
1798 #ifdef INET6
1799 		in6_ifdetach(ifp);
1800 #endif
1801 		splx(s);
1802 	}
1803 
1804 	/* Let devices like enc(4) or mpe(4) know about the change */
1805 	ifr.ifr_rdomainid = rdomain;
1806 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
1807 	    (caddr_t)&ifr)) != ENOTTY)
1808 		return (error);
1809 	error = 0;
1810 
1811 	/* Add interface to the specified rdomain */
1812 	ifp->if_rdomain = rdomain;
1813 
1814 	/* If we took down the IF, bring it back */
1815 	if (up) {
1816 		s = splnet();
1817 		if_up(ifp);
1818 		splx(s);
1819 	}
1820 
1821 	return (0);
1822 }
1823 
1824 /*
1825  * Interface ioctls.
1826  */
1827 int
1828 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1829 {
1830 	struct ifnet *ifp;
1831 	struct ifreq *ifr = (struct ifreq *)data;
1832 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
1833 	struct if_afreq *ifar = (struct if_afreq *)data;
1834 	char ifdescrbuf[IFDESCRSIZE];
1835 	char ifrtlabelbuf[RTLABEL_LEN];
1836 	int s, error = 0, oif_xflags;
1837 	size_t bytesdone;
1838 	unsigned short oif_flags;
1839 
1840 	switch (cmd) {
1841 	case SIOCIFCREATE:
1842 		if ((error = suser(p)) != 0)
1843 			return (error);
1844 		error = if_clone_create(ifr->ifr_name, 0);
1845 		return (error);
1846 	case SIOCIFDESTROY:
1847 		if ((error = suser(p)) != 0)
1848 			return (error);
1849 		error = if_clone_destroy(ifr->ifr_name);
1850 		return (error);
1851 	case SIOCSIFGATTR:
1852 		if ((error = suser(p)) != 0)
1853 			return (error);
1854 		NET_LOCK();
1855 		error = if_setgroupattribs(data);
1856 		NET_UNLOCK();
1857 		return (error);
1858 	case SIOCGIFCONF:
1859 	case SIOCIFGCLONERS:
1860 	case SIOCGIFGMEMB:
1861 	case SIOCGIFGATTR:
1862 	case SIOCGIFGLIST:
1863 	case SIOCGIFFLAGS:
1864 	case SIOCGIFXFLAGS:
1865 	case SIOCGIFMETRIC:
1866 	case SIOCGIFMTU:
1867 	case SIOCGIFHARDMTU:
1868 	case SIOCGIFDATA:
1869 	case SIOCGIFDESCR:
1870 	case SIOCGIFRTLABEL:
1871 	case SIOCGIFPRIORITY:
1872 	case SIOCGIFRDOMAIN:
1873 	case SIOCGIFGROUP:
1874 	case SIOCGIFLLPRIO:
1875 		return (ifioctl_get(cmd, data));
1876 	}
1877 
1878 	ifp = if_unit(ifr->ifr_name);
1879 	if (ifp == NULL)
1880 		return (ENXIO);
1881 	oif_flags = ifp->if_flags;
1882 	oif_xflags = ifp->if_xflags;
1883 
1884 	switch (cmd) {
1885 	case SIOCIFAFATTACH:
1886 	case SIOCIFAFDETACH:
1887 		if ((error = suser(p)) != 0)
1888 			break;
1889 		NET_LOCK();
1890 		switch (ifar->ifar_af) {
1891 		case AF_INET:
1892 			/* attach is a noop for AF_INET */
1893 			if (cmd == SIOCIFAFDETACH)
1894 				in_ifdetach(ifp);
1895 			break;
1896 #ifdef INET6
1897 		case AF_INET6:
1898 			if (cmd == SIOCIFAFATTACH)
1899 				error = in6_ifattach(ifp);
1900 			else
1901 				in6_ifdetach(ifp);
1902 			break;
1903 #endif /* INET6 */
1904 		default:
1905 			error = EAFNOSUPPORT;
1906 		}
1907 		NET_UNLOCK();
1908 		break;
1909 
1910 	case SIOCSIFFLAGS:
1911 		if ((error = suser(p)) != 0)
1912 			break;
1913 
1914 		NET_LOCK();
1915 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1916 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
1917 
1918 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1919 		if (error != 0) {
1920 			ifp->if_flags = oif_flags;
1921 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
1922 			s = splnet();
1923 			if (ISSET(ifp->if_flags, IFF_UP))
1924 				if_up(ifp);
1925 			else
1926 				if_down(ifp);
1927 			splx(s);
1928 		}
1929 		NET_UNLOCK();
1930 		break;
1931 
1932 	case SIOCSIFXFLAGS:
1933 		if ((error = suser(p)) != 0)
1934 			break;
1935 
1936 		NET_LOCK();
1937 #ifdef INET6
1938 		if (ISSET(ifr->ifr_flags, IFXF_AUTOCONF6)) {
1939 			error = in6_ifattach(ifp);
1940 			if (error != 0) {
1941 				NET_UNLOCK();
1942 				break;
1943 			}
1944 		}
1945 
1946 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1947 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
1948 			ifp->if_xflags |= IFXF_INET6_NOSOII;
1949 
1950 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1951 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
1952 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
1953 
1954 #endif	/* INET6 */
1955 
1956 #ifdef MPLS
1957 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
1958 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
1959 			s = splnet();
1960 			ifp->if_xflags |= IFXF_MPLS;
1961 			ifp->if_ll_output = ifp->if_output;
1962 			ifp->if_output = mpls_output;
1963 			splx(s);
1964 		}
1965 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
1966 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
1967 			s = splnet();
1968 			ifp->if_xflags &= ~IFXF_MPLS;
1969 			ifp->if_output = ifp->if_ll_output;
1970 			ifp->if_ll_output = NULL;
1971 			splx(s);
1972 		}
1973 #endif	/* MPLS */
1974 
1975 #ifndef SMALL_KERNEL
1976 		if (ifp->if_capabilities & IFCAP_WOL) {
1977 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
1978 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
1979 				s = splnet();
1980 				ifp->if_xflags |= IFXF_WOL;
1981 				error = ifp->if_wol(ifp, 1);
1982 				splx(s);
1983 			}
1984 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
1985 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
1986 				s = splnet();
1987 				ifp->if_xflags &= ~IFXF_WOL;
1988 				error = ifp->if_wol(ifp, 0);
1989 				splx(s);
1990 			}
1991 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
1992 			ifr->ifr_flags &= ~IFXF_WOL;
1993 			error = ENOTSUP;
1994 		}
1995 #endif
1996 
1997 		if (error == 0)
1998 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
1999 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
2000 		NET_UNLOCK();
2001 		break;
2002 
2003 	case SIOCSIFMETRIC:
2004 		if ((error = suser(p)) != 0)
2005 			break;
2006 		NET_LOCK();
2007 		ifp->if_metric = ifr->ifr_metric;
2008 		NET_UNLOCK();
2009 		break;
2010 
2011 	case SIOCSIFMTU:
2012 		if ((error = suser(p)) != 0)
2013 			break;
2014 		NET_LOCK();
2015 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2016 		NET_UNLOCK();
2017 		if (!error)
2018 			rtm_ifchg(ifp);
2019 		break;
2020 
2021 	case SIOCSIFDESCR:
2022 		if ((error = suser(p)) != 0)
2023 			break;
2024 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2025 		    IFDESCRSIZE, &bytesdone);
2026 		if (error == 0) {
2027 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2028 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2029 		}
2030 		break;
2031 
2032 	case SIOCSIFRTLABEL:
2033 		if ((error = suser(p)) != 0)
2034 			break;
2035 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2036 		    RTLABEL_LEN, &bytesdone);
2037 		if (error == 0) {
2038 			rtlabel_unref(ifp->if_rtlabelid);
2039 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2040 		}
2041 		break;
2042 
2043 	case SIOCSIFPRIORITY:
2044 		if ((error = suser(p)) != 0)
2045 			break;
2046 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2047 			error = EINVAL;
2048 			break;
2049 		}
2050 		ifp->if_priority = ifr->ifr_metric;
2051 		break;
2052 
2053 	case SIOCSIFRDOMAIN:
2054 		if ((error = suser(p)) != 0)
2055 			break;
2056 		error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2057 		if (!error || error == EEXIST) {
2058 			NET_LOCK();
2059 			error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2060 			NET_UNLOCK();
2061 		}
2062 		break;
2063 
2064 	case SIOCAIFGROUP:
2065 		if ((error = suser(p)))
2066 			break;
2067 		NET_LOCK();
2068 		error = if_addgroup(ifp, ifgr->ifgr_group);
2069 		if (error == 0) {
2070 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2071 			if (error == ENOTTY)
2072 				error = 0;
2073 		}
2074 		NET_UNLOCK();
2075 		break;
2076 
2077 	case SIOCDIFGROUP:
2078 		if ((error = suser(p)))
2079 			break;
2080 		NET_LOCK();
2081 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2082 		if (error == ENOTTY)
2083 			error = 0;
2084 		if (error == 0)
2085 			error = if_delgroup(ifp, ifgr->ifgr_group);
2086 		NET_UNLOCK();
2087 		break;
2088 
2089 	case SIOCSIFLLADDR:
2090 		if ((error = suser(p)))
2091 			break;
2092 		if ((ifp->if_sadl == NULL) ||
2093 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2094 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2095 			error = EINVAL;
2096 			break;
2097 		}
2098 		NET_LOCK();
2099 		switch (ifp->if_type) {
2100 		case IFT_ETHER:
2101 		case IFT_CARP:
2102 		case IFT_XETHER:
2103 		case IFT_ISO88025:
2104 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2105 			if (error == ENOTTY)
2106 				error = 0;
2107 			if (error == 0)
2108 				error = if_setlladdr(ifp,
2109 				    ifr->ifr_addr.sa_data);
2110 			break;
2111 		default:
2112 			error = ENODEV;
2113 		}
2114 
2115 		if (error == 0)
2116 			ifnewlladdr(ifp);
2117 		NET_UNLOCK();
2118 		break;
2119 
2120 	case SIOCSIFLLPRIO:
2121 		if ((error = suser(p)))
2122 			break;
2123 		if (ifr->ifr_llprio < IFQ_MINPRIO ||
2124 		    ifr->ifr_llprio > IFQ_MAXPRIO) {
2125 			error = EINVAL;
2126 			break;
2127 		}
2128 		NET_LOCK();
2129 		ifp->if_llprio = ifr->ifr_llprio;
2130 		NET_UNLOCK();
2131 		break;
2132 
2133 	case SIOCGIFSFFPAGE:
2134 		error = suser(p);
2135 		if (error != 0)
2136 			break;
2137 
2138 		error = if_sffpage_check(data);
2139 		if (error != 0)
2140 			break;
2141 
2142 		/* don't take NET_LOCK because i2c reads take a long time */
2143 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2144 		break;
2145 
2146 	case SIOCSETKALIVE:
2147 	case SIOCDIFPHYADDR:
2148 	case SIOCSLIFPHYADDR:
2149 	case SIOCSLIFPHYRTABLE:
2150 	case SIOCSLIFPHYTTL:
2151 	case SIOCSLIFPHYDF:
2152 	case SIOCSLIFPHYECN:
2153 	case SIOCADDMULTI:
2154 	case SIOCDELMULTI:
2155 	case SIOCSIFMEDIA:
2156 	case SIOCSVNETID:
2157 	case SIOCDVNETID:
2158 	case SIOCSVNETFLOWID:
2159 	case SIOCSTXHPRIO:
2160 	case SIOCSRXHPRIO:
2161 	case SIOCSIFPAIR:
2162 	case SIOCSIFPARENT:
2163 	case SIOCDIFPARENT:
2164 	case SIOCSETMPWCFG:
2165 	case SIOCSETLABEL:
2166 	case SIOCDELLABEL:
2167 	case SIOCSPWE3CTRLWORD:
2168 	case SIOCSPWE3FAT:
2169 	case SIOCSPWE3NEIGHBOR:
2170 	case SIOCDPWE3NEIGHBOR:
2171 #if NBRIDGE > 0
2172 	case SIOCBRDGADD:
2173 	case SIOCBRDGDEL:
2174 	case SIOCBRDGSIFFLGS:
2175 	case SIOCBRDGSCACHE:
2176 	case SIOCBRDGADDS:
2177 	case SIOCBRDGDELS:
2178 	case SIOCBRDGSADDR:
2179 	case SIOCBRDGSTO:
2180 	case SIOCBRDGDADDR:
2181 	case SIOCBRDGFLUSH:
2182 	case SIOCBRDGADDL:
2183 	case SIOCBRDGSIFPROT:
2184 	case SIOCBRDGARL:
2185 	case SIOCBRDGFRL:
2186 	case SIOCBRDGSPRI:
2187 	case SIOCBRDGSHT:
2188 	case SIOCBRDGSFD:
2189 	case SIOCBRDGSMA:
2190 	case SIOCBRDGSIFPRIO:
2191 	case SIOCBRDGSIFCOST:
2192 	case SIOCBRDGSTXHC:
2193 	case SIOCBRDGSPROTO:
2194 	case SIOCSWSPORTNO:
2195 #endif
2196 		if ((error = suser(p)) != 0)
2197 			break;
2198 		/* FALLTHROUGH */
2199 	default:
2200 		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
2201 			(struct mbuf *) cmd, (struct mbuf *) data,
2202 			(struct mbuf *) ifp, p));
2203 		if (error != EOPNOTSUPP)
2204 			break;
2205 		switch (cmd) {
2206 		case SIOCAIFADDR:
2207 		case SIOCDIFADDR:
2208 		case SIOCSIFADDR:
2209 		case SIOCSIFNETMASK:
2210 		case SIOCSIFDSTADDR:
2211 		case SIOCSIFBRDADDR:
2212 #ifdef INET6
2213 		case SIOCAIFADDR_IN6:
2214 		case SIOCDIFADDR_IN6:
2215 #endif
2216 			error = suser(p);
2217 			break;
2218 		default:
2219 			error = 0;
2220 			break;
2221 		}
2222 		if (error)
2223 			break;
2224 		NET_LOCK();
2225 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2226 		NET_UNLOCK();
2227 		break;
2228 	}
2229 
2230 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags)
2231 		rtm_ifchg(ifp);
2232 
2233 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2234 		getmicrotime(&ifp->if_lastchange);
2235 
2236 	if_put(ifp);
2237 
2238 	return (error);
2239 }
2240 
2241 int
2242 ifioctl_get(u_long cmd, caddr_t data)
2243 {
2244 	struct ifnet *ifp;
2245 	struct ifreq *ifr = (struct ifreq *)data;
2246 	char ifdescrbuf[IFDESCRSIZE];
2247 	char ifrtlabelbuf[RTLABEL_LEN];
2248 	int error = 0;
2249 	size_t bytesdone;
2250 	const char *label;
2251 
2252 	switch(cmd) {
2253 	case SIOCGIFCONF:
2254 		NET_RLOCK_IN_IOCTL();
2255 		error = ifconf(data);
2256 		NET_RUNLOCK_IN_IOCTL();
2257 		return (error);
2258 	case SIOCIFGCLONERS:
2259 		error = if_clone_list((struct if_clonereq *)data);
2260 		return (error);
2261 	case SIOCGIFGMEMB:
2262 		NET_RLOCK_IN_IOCTL();
2263 		error = if_getgroupmembers(data);
2264 		NET_RUNLOCK_IN_IOCTL();
2265 		return (error);
2266 	case SIOCGIFGATTR:
2267 		NET_RLOCK_IN_IOCTL();
2268 		error = if_getgroupattribs(data);
2269 		NET_RUNLOCK_IN_IOCTL();
2270 		return (error);
2271 	case SIOCGIFGLIST:
2272 		NET_RLOCK_IN_IOCTL();
2273 		error = if_getgrouplist(data);
2274 		NET_RUNLOCK_IN_IOCTL();
2275 		return (error);
2276 	}
2277 
2278 	ifp = if_unit(ifr->ifr_name);
2279 	if (ifp == NULL)
2280 		return (ENXIO);
2281 
2282 	NET_RLOCK_IN_IOCTL();
2283 
2284 	switch(cmd) {
2285 	case SIOCGIFFLAGS:
2286 		ifr->ifr_flags = ifp->if_flags;
2287 		if (ifq_is_oactive(&ifp->if_snd))
2288 			ifr->ifr_flags |= IFF_OACTIVE;
2289 		break;
2290 
2291 	case SIOCGIFXFLAGS:
2292 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2293 		break;
2294 
2295 	case SIOCGIFMETRIC:
2296 		ifr->ifr_metric = ifp->if_metric;
2297 		break;
2298 
2299 	case SIOCGIFMTU:
2300 		ifr->ifr_mtu = ifp->if_mtu;
2301 		break;
2302 
2303 	case SIOCGIFHARDMTU:
2304 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2305 		break;
2306 
2307 	case SIOCGIFDATA: {
2308 		struct if_data ifdata;
2309 		if_getdata(ifp, &ifdata);
2310 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2311 		break;
2312 	}
2313 
2314 	case SIOCGIFDESCR:
2315 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2316 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2317 		    &bytesdone);
2318 		break;
2319 
2320 	case SIOCGIFRTLABEL:
2321 		if (ifp->if_rtlabelid &&
2322 		    (label = rtlabel_id2name(ifp->if_rtlabelid)) != NULL) {
2323 			strlcpy(ifrtlabelbuf, label, RTLABEL_LEN);
2324 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2325 			    RTLABEL_LEN, &bytesdone);
2326 		} else
2327 			error = ENOENT;
2328 		break;
2329 
2330 	case SIOCGIFPRIORITY:
2331 		ifr->ifr_metric = ifp->if_priority;
2332 		break;
2333 
2334 	case SIOCGIFRDOMAIN:
2335 		ifr->ifr_rdomainid = ifp->if_rdomain;
2336 		break;
2337 
2338 	case SIOCGIFGROUP:
2339 		error = if_getgroup(data, ifp);
2340 		break;
2341 
2342 	case SIOCGIFLLPRIO:
2343 		ifr->ifr_llprio = ifp->if_llprio;
2344 		break;
2345 
2346 	default:
2347 		panic("invalid ioctl %lu", cmd);
2348 	}
2349 
2350 	NET_RUNLOCK_IN_IOCTL();
2351 
2352 	if_put(ifp);
2353 
2354 	return (error);
2355 }
2356 
2357 static int
2358 if_sffpage_check(const caddr_t data)
2359 {
2360 	const struct if_sffpage *sff = (const struct if_sffpage *)data;
2361 
2362 	switch (sff->sff_addr) {
2363 	case IFSFF_ADDR_EEPROM:
2364 	case IFSFF_ADDR_DDM:
2365 		break;
2366 	default:
2367 		return (EINVAL);
2368 	}
2369 
2370 	return (0);
2371 }
2372 
2373 int
2374 if_txhprio_l2_check(int hdrprio)
2375 {
2376 	switch (hdrprio) {
2377 	case IF_HDRPRIO_PACKET:
2378 		return (0);
2379 	default:
2380 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2381 			return (0);
2382 		break;
2383 	}
2384 
2385 	return (EINVAL);
2386 }
2387 
2388 int
2389 if_txhprio_l3_check(int hdrprio)
2390 {
2391 	switch (hdrprio) {
2392 	case IF_HDRPRIO_PACKET:
2393 	case IF_HDRPRIO_PAYLOAD:
2394 		return (0);
2395 	default:
2396 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2397 			return (0);
2398 		break;
2399 	}
2400 
2401 	return (EINVAL);
2402 }
2403 
2404 int
2405 if_rxhprio_l2_check(int hdrprio)
2406 {
2407 	switch (hdrprio) {
2408 	case IF_HDRPRIO_PACKET:
2409 	case IF_HDRPRIO_OUTER:
2410 		return (0);
2411 	default:
2412 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2413 			return (0);
2414 		break;
2415 	}
2416 
2417 	return (EINVAL);
2418 }
2419 
2420 int
2421 if_rxhprio_l3_check(int hdrprio)
2422 {
2423 	switch (hdrprio) {
2424 	case IF_HDRPRIO_PACKET:
2425 	case IF_HDRPRIO_PAYLOAD:
2426 	case IF_HDRPRIO_OUTER:
2427 		return (0);
2428 	default:
2429 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2430 			return (0);
2431 		break;
2432 	}
2433 
2434 	return (EINVAL);
2435 }
2436 
2437 /*
2438  * Return interface configuration
2439  * of system.  List may be used
2440  * in later ioctl's (above) to get
2441  * other information.
2442  */
2443 int
2444 ifconf(caddr_t data)
2445 {
2446 	struct ifconf *ifc = (struct ifconf *)data;
2447 	struct ifnet *ifp;
2448 	struct ifaddr *ifa;
2449 	struct ifreq ifr, *ifrp;
2450 	int space = ifc->ifc_len, error = 0;
2451 
2452 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2453 	if (space == 0) {
2454 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
2455 			struct sockaddr *sa;
2456 
2457 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2458 				space += sizeof (ifr);
2459 			else
2460 				TAILQ_FOREACH(ifa,
2461 				    &ifp->if_addrlist, ifa_list) {
2462 					sa = ifa->ifa_addr;
2463 					if (sa->sa_len > sizeof(*sa))
2464 						space += sa->sa_len -
2465 						    sizeof(*sa);
2466 					space += sizeof(ifr);
2467 				}
2468 		}
2469 		ifc->ifc_len = space;
2470 		return (0);
2471 	}
2472 
2473 	ifrp = ifc->ifc_req;
2474 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2475 		if (space < sizeof(ifr))
2476 			break;
2477 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2478 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2479 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2480 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2481 			    sizeof(ifr));
2482 			if (error)
2483 				break;
2484 			space -= sizeof (ifr), ifrp++;
2485 		} else
2486 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2487 				struct sockaddr *sa = ifa->ifa_addr;
2488 
2489 				if (space < sizeof(ifr))
2490 					break;
2491 				if (sa->sa_len <= sizeof(*sa)) {
2492 					ifr.ifr_addr = *sa;
2493 					error = copyout((caddr_t)&ifr,
2494 					    (caddr_t)ifrp, sizeof (ifr));
2495 					ifrp++;
2496 				} else {
2497 					space -= sa->sa_len - sizeof(*sa);
2498 					if (space < sizeof (ifr))
2499 						break;
2500 					error = copyout((caddr_t)&ifr,
2501 					    (caddr_t)ifrp,
2502 					    sizeof(ifr.ifr_name));
2503 					if (error == 0)
2504 						error = copyout((caddr_t)sa,
2505 						    (caddr_t)&ifrp->ifr_addr,
2506 						    sa->sa_len);
2507 					ifrp = (struct ifreq *)(sa->sa_len +
2508 					    (caddr_t)&ifrp->ifr_addr);
2509 				}
2510 				if (error)
2511 					break;
2512 				space -= sizeof (ifr);
2513 			}
2514 	}
2515 	ifc->ifc_len -= space;
2516 	return (error);
2517 }
2518 
2519 void
2520 if_counters_alloc(struct ifnet *ifp)
2521 {
2522 	KASSERT(ifp->if_counters == NULL);
2523 
2524 	ifp->if_counters = counters_alloc(ifc_ncounters);
2525 }
2526 
2527 void
2528 if_counters_free(struct ifnet *ifp)
2529 {
2530 	KASSERT(ifp->if_counters != NULL);
2531 
2532 	counters_free(ifp->if_counters, ifc_ncounters);
2533 	ifp->if_counters = NULL;
2534 }
2535 
2536 void
2537 if_getdata(struct ifnet *ifp, struct if_data *data)
2538 {
2539 	unsigned int i;
2540 
2541 	*data = ifp->if_data;
2542 
2543 	if (ifp->if_counters != NULL) {
2544 		uint64_t counters[ifc_ncounters];
2545 
2546 		counters_read(ifp->if_counters, counters, nitems(counters));
2547 
2548 		data->ifi_ipackets += counters[ifc_ipackets];
2549 		data->ifi_ierrors += counters[ifc_ierrors];
2550 		data->ifi_opackets += counters[ifc_opackets];
2551 		data->ifi_oerrors += counters[ifc_oerrors];
2552 		data->ifi_collisions += counters[ifc_collisions];
2553 		data->ifi_ibytes += counters[ifc_ibytes];
2554 		data->ifi_obytes += counters[ifc_obytes];
2555 		data->ifi_imcasts += counters[ifc_imcasts];
2556 		data->ifi_omcasts += counters[ifc_omcasts];
2557 		data->ifi_iqdrops += counters[ifc_iqdrops];
2558 		data->ifi_oqdrops += counters[ifc_oqdrops];
2559 		data->ifi_noproto += counters[ifc_noproto];
2560 	}
2561 
2562 	for (i = 0; i < ifp->if_nifqs; i++) {
2563 		struct ifqueue *ifq = ifp->if_ifqs[i];
2564 
2565 		ifq_add_data(ifq, data);
2566 	}
2567 
2568 	for (i = 0; i < ifp->if_niqs; i++) {
2569 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2570 
2571 		ifiq_add_data(ifiq, data);
2572 	}
2573 }
2574 
2575 /*
2576  * Dummy functions replaced in ifnet during detach (if protocols decide to
2577  * fiddle with the if during detach.
2578  */
2579 void
2580 if_detached_qstart(struct ifqueue *ifq)
2581 {
2582 	ifq_purge(ifq);
2583 }
2584 
2585 int
2586 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2587 {
2588 	return ENODEV;
2589 }
2590 
2591 /*
2592  * Create interface group without members
2593  */
2594 struct ifg_group *
2595 if_creategroup(const char *groupname)
2596 {
2597 	struct ifg_group	*ifg;
2598 
2599 	if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
2600 		return (NULL);
2601 
2602 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2603 	ifg->ifg_refcnt = 1;
2604 	ifg->ifg_carp_demoted = 0;
2605 	TAILQ_INIT(&ifg->ifg_members);
2606 #if NPF > 0
2607 	pfi_attach_ifgroup(ifg);
2608 #endif
2609 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2610 
2611 	return (ifg);
2612 }
2613 
2614 /*
2615  * Add a group to an interface
2616  */
2617 int
2618 if_addgroup(struct ifnet *ifp, const char *groupname)
2619 {
2620 	struct ifg_list		*ifgl;
2621 	struct ifg_group	*ifg = NULL;
2622 	struct ifg_member	*ifgm;
2623 	size_t			 namelen;
2624 
2625 	namelen = strlen(groupname);
2626 	if (namelen == 0 || namelen >= IFNAMSIZ ||
2627 	    (groupname[namelen - 1] >= '0' && groupname[namelen - 1] <= '9'))
2628 		return (EINVAL);
2629 
2630 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2631 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2632 			return (EEXIST);
2633 
2634 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
2635 		return (ENOMEM);
2636 
2637 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
2638 		free(ifgl, M_TEMP, sizeof(*ifgl));
2639 		return (ENOMEM);
2640 	}
2641 
2642 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2643 		if (!strcmp(ifg->ifg_group, groupname))
2644 			break;
2645 
2646 	if (ifg == NULL) {
2647 		ifg = if_creategroup(groupname);
2648 		if (ifg == NULL) {
2649 			free(ifgl, M_TEMP, sizeof(*ifgl));
2650 			free(ifgm, M_TEMP, sizeof(*ifgm));
2651 			return (ENOMEM);
2652 		}
2653 	} else
2654 		ifg->ifg_refcnt++;
2655 	KASSERT(ifg->ifg_refcnt != 0);
2656 
2657 	ifgl->ifgl_group = ifg;
2658 	ifgm->ifgm_ifp = ifp;
2659 
2660 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2661 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2662 
2663 #if NPF > 0
2664 	pfi_group_addmember(groupname, ifp);
2665 #endif
2666 
2667 	return (0);
2668 }
2669 
2670 /*
2671  * Remove a group from an interface
2672  */
2673 int
2674 if_delgroup(struct ifnet *ifp, const char *groupname)
2675 {
2676 	struct ifg_list		*ifgl;
2677 	struct ifg_member	*ifgm;
2678 
2679 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2680 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2681 			break;
2682 	if (ifgl == NULL)
2683 		return (ENOENT);
2684 
2685 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2686 
2687 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2688 		if (ifgm->ifgm_ifp == ifp)
2689 			break;
2690 
2691 	if (ifgm != NULL) {
2692 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2693 		free(ifgm, M_TEMP, sizeof(*ifgm));
2694 	}
2695 
2696 #if NPF > 0
2697 	pfi_group_change(groupname);
2698 #endif
2699 
2700 	KASSERT(ifgl->ifgl_group->ifg_refcnt != 0);
2701 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2702 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2703 #if NPF > 0
2704 		pfi_detach_ifgroup(ifgl->ifgl_group);
2705 #endif
2706 		free(ifgl->ifgl_group, M_TEMP, sizeof(*ifgl->ifgl_group));
2707 	}
2708 
2709 	free(ifgl, M_TEMP, sizeof(*ifgl));
2710 
2711 	return (0);
2712 }
2713 
2714 /*
2715  * Stores all groups from an interface in memory pointed
2716  * to by data
2717  */
2718 int
2719 if_getgroup(caddr_t data, struct ifnet *ifp)
2720 {
2721 	int			 len, error;
2722 	struct ifg_list		*ifgl;
2723 	struct ifg_req		 ifgrq, *ifgp;
2724 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2725 
2726 	if (ifgr->ifgr_len == 0) {
2727 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2728 			ifgr->ifgr_len += sizeof(struct ifg_req);
2729 		return (0);
2730 	}
2731 
2732 	len = ifgr->ifgr_len;
2733 	ifgp = ifgr->ifgr_groups;
2734 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2735 		if (len < sizeof(ifgrq))
2736 			return (EINVAL);
2737 		bzero(&ifgrq, sizeof ifgrq);
2738 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2739 		    sizeof(ifgrq.ifgrq_group));
2740 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2741 		    sizeof(struct ifg_req))))
2742 			return (error);
2743 		len -= sizeof(ifgrq);
2744 		ifgp++;
2745 	}
2746 
2747 	return (0);
2748 }
2749 
2750 /*
2751  * Stores all members of a group in memory pointed to by data
2752  */
2753 int
2754 if_getgroupmembers(caddr_t data)
2755 {
2756 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2757 	struct ifg_group	*ifg;
2758 	struct ifg_member	*ifgm;
2759 	struct ifg_req		 ifgrq, *ifgp;
2760 	int			 len, error;
2761 
2762 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2763 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2764 			break;
2765 	if (ifg == NULL)
2766 		return (ENOENT);
2767 
2768 	if (ifgr->ifgr_len == 0) {
2769 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2770 			ifgr->ifgr_len += sizeof(ifgrq);
2771 		return (0);
2772 	}
2773 
2774 	len = ifgr->ifgr_len;
2775 	ifgp = ifgr->ifgr_groups;
2776 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
2777 		if (len < sizeof(ifgrq))
2778 			return (EINVAL);
2779 		bzero(&ifgrq, sizeof ifgrq);
2780 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
2781 		    sizeof(ifgrq.ifgrq_member));
2782 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2783 		    sizeof(struct ifg_req))))
2784 			return (error);
2785 		len -= sizeof(ifgrq);
2786 		ifgp++;
2787 	}
2788 
2789 	return (0);
2790 }
2791 
2792 int
2793 if_getgroupattribs(caddr_t data)
2794 {
2795 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2796 	struct ifg_group	*ifg;
2797 
2798 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2799 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2800 			break;
2801 	if (ifg == NULL)
2802 		return (ENOENT);
2803 
2804 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
2805 
2806 	return (0);
2807 }
2808 
2809 int
2810 if_setgroupattribs(caddr_t data)
2811 {
2812 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2813 	struct ifg_group	*ifg;
2814 	struct ifg_member	*ifgm;
2815 	int			 demote;
2816 
2817 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2818 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2819 			break;
2820 	if (ifg == NULL)
2821 		return (ENOENT);
2822 
2823 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
2824 	if (demote + ifg->ifg_carp_demoted > 0xff ||
2825 	    demote + ifg->ifg_carp_demoted < 0)
2826 		return (EINVAL);
2827 
2828 	ifg->ifg_carp_demoted += demote;
2829 
2830 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2831 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
2832 
2833 	return (0);
2834 }
2835 
2836 /*
2837  * Stores all groups in memory pointed to by data
2838  */
2839 int
2840 if_getgrouplist(caddr_t data)
2841 {
2842 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2843 	struct ifg_group	*ifg;
2844 	struct ifg_req		 ifgrq, *ifgp;
2845 	int			 len, error;
2846 
2847 	if (ifgr->ifgr_len == 0) {
2848 		TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2849 			ifgr->ifgr_len += sizeof(ifgrq);
2850 		return (0);
2851 	}
2852 
2853 	len = ifgr->ifgr_len;
2854 	ifgp = ifgr->ifgr_groups;
2855 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
2856 		if (len < sizeof(ifgrq))
2857 			return (EINVAL);
2858 		bzero(&ifgrq, sizeof ifgrq);
2859 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
2860 		    sizeof(ifgrq.ifgrq_group));
2861 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2862 		    sizeof(struct ifg_req))))
2863 			return (error);
2864 		len -= sizeof(ifgrq);
2865 		ifgp++;
2866 	}
2867 
2868 	return (0);
2869 }
2870 
2871 void
2872 if_group_routechange(struct sockaddr *dst, struct sockaddr *mask)
2873 {
2874 	switch (dst->sa_family) {
2875 	case AF_INET:
2876 		if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
2877 		    mask && (mask->sa_len == 0 ||
2878 		    satosin(mask)->sin_addr.s_addr == INADDR_ANY))
2879 			if_group_egress_build();
2880 		break;
2881 #ifdef INET6
2882 	case AF_INET6:
2883 		if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
2884 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
2885 		    IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
2886 		    &in6addr_any)))
2887 			if_group_egress_build();
2888 		break;
2889 #endif
2890 	}
2891 }
2892 
2893 int
2894 if_group_egress_build(void)
2895 {
2896 	struct ifnet		*ifp;
2897 	struct ifg_group	*ifg;
2898 	struct ifg_member	*ifgm, *next;
2899 	struct sockaddr_in	 sa_in;
2900 #ifdef INET6
2901 	struct sockaddr_in6	 sa_in6;
2902 #endif
2903 	struct rtentry		*rt;
2904 
2905 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2906 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
2907 			break;
2908 
2909 	if (ifg != NULL)
2910 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
2911 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
2912 
2913 	bzero(&sa_in, sizeof(sa_in));
2914 	sa_in.sin_len = sizeof(sa_in);
2915 	sa_in.sin_family = AF_INET;
2916 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
2917 	while (rt != NULL) {
2918 		ifp = if_get(rt->rt_ifidx);
2919 		if (ifp != NULL) {
2920 			if_addgroup(ifp, IFG_EGRESS);
2921 			if_put(ifp);
2922 		}
2923 		rt = rtable_iterate(rt);
2924 	}
2925 
2926 #ifdef INET6
2927 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
2928 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
2929 	    RTP_ANY);
2930 	while (rt != NULL) {
2931 		ifp = if_get(rt->rt_ifidx);
2932 		if (ifp != NULL) {
2933 			if_addgroup(ifp, IFG_EGRESS);
2934 			if_put(ifp);
2935 		}
2936 		rt = rtable_iterate(rt);
2937 	}
2938 #endif /* INET6 */
2939 
2940 	return (0);
2941 }
2942 
2943 /*
2944  * Set/clear promiscuous mode on interface ifp based on the truth value
2945  * of pswitch.  The calls are reference counted so that only the first
2946  * "on" request actually has an effect, as does the final "off" request.
2947  * Results are undefined if the "off" and "on" requests are not matched.
2948  */
2949 int
2950 ifpromisc(struct ifnet *ifp, int pswitch)
2951 {
2952 	struct ifreq ifr;
2953 	unsigned short oif_flags;
2954 	int oif_pcount, error;
2955 
2956 	NET_ASSERT_LOCKED(); /* modifying if_flags and if_pcount */
2957 
2958 	oif_flags = ifp->if_flags;
2959 	oif_pcount = ifp->if_pcount;
2960 	if (pswitch) {
2961 		if (ifp->if_pcount++ != 0)
2962 			return (0);
2963 		ifp->if_flags |= IFF_PROMISC;
2964 	} else {
2965 		if (--ifp->if_pcount > 0)
2966 			return (0);
2967 		ifp->if_flags &= ~IFF_PROMISC;
2968 	}
2969 
2970 	if ((ifp->if_flags & IFF_UP) == 0)
2971 		return (0);
2972 
2973 	memset(&ifr, 0, sizeof(ifr));
2974 	ifr.ifr_flags = ifp->if_flags;
2975 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
2976 	if (error) {
2977 		ifp->if_flags = oif_flags;
2978 		ifp->if_pcount = oif_pcount;
2979 	}
2980 
2981 	return (error);
2982 }
2983 
2984 void
2985 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
2986 {
2987 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
2988 }
2989 
2990 void
2991 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
2992 {
2993 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
2994 }
2995 
2996 void
2997 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
2998 {
2999 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
3000 		panic("ifa_update_broadaddr does not support dynamic length");
3001 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
3002 }
3003 
3004 #ifdef DDB
3005 /* debug function, can be called from ddb> */
3006 void
3007 ifa_print_all(void)
3008 {
3009 	struct ifnet *ifp;
3010 	struct ifaddr *ifa;
3011 
3012 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
3013 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
3014 			char addr[INET6_ADDRSTRLEN];
3015 
3016 			switch (ifa->ifa_addr->sa_family) {
3017 			case AF_INET:
3018 				printf("%s", inet_ntop(AF_INET,
3019 				    &satosin(ifa->ifa_addr)->sin_addr,
3020 				    addr, sizeof(addr)));
3021 				break;
3022 #ifdef INET6
3023 			case AF_INET6:
3024 				printf("%s", inet_ntop(AF_INET6,
3025 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
3026 				    addr, sizeof(addr)));
3027 				break;
3028 #endif
3029 			}
3030 			printf(" on %s\n", ifp->if_xname);
3031 		}
3032 	}
3033 }
3034 #endif /* DDB */
3035 
3036 void
3037 ifnewlladdr(struct ifnet *ifp)
3038 {
3039 #ifdef INET6
3040 	struct ifaddr *ifa;
3041 #endif
3042 	struct ifreq ifrq;
3043 	short up;
3044 	int s;
3045 
3046 	s = splnet();
3047 	up = ifp->if_flags & IFF_UP;
3048 
3049 	if (up) {
3050 		/* go down for a moment... */
3051 		ifp->if_flags &= ~IFF_UP;
3052 		ifrq.ifr_flags = ifp->if_flags;
3053 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3054 	}
3055 
3056 	ifp->if_flags |= IFF_UP;
3057 	ifrq.ifr_flags = ifp->if_flags;
3058 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3059 
3060 #ifdef INET6
3061 	/*
3062 	 * Update the link-local address.  Don't do it if we're
3063 	 * a router to avoid confusing hosts on the network.
3064 	 */
3065 	if (!ip6_forwarding) {
3066 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
3067 		if (ifa) {
3068 			in6_purgeaddr(ifa);
3069 			if_hooks_run(&ifp->if_addrhooks);
3070 			in6_ifattach(ifp);
3071 		}
3072 	}
3073 #endif
3074 	if (!up) {
3075 		/* go back down */
3076 		ifp->if_flags &= ~IFF_UP;
3077 		ifrq.ifr_flags = ifp->if_flags;
3078 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3079 	}
3080 	splx(s);
3081 }
3082 
3083 void
3084 if_addrhook_add(struct ifnet *ifp, struct task *t)
3085 {
3086 	mtx_enter(&if_hooks_mtx);
3087 	TAILQ_INSERT_TAIL(&ifp->if_addrhooks, t, t_entry);
3088 	mtx_leave(&if_hooks_mtx);
3089 }
3090 
3091 void
3092 if_addrhook_del(struct ifnet *ifp, struct task *t)
3093 {
3094 	mtx_enter(&if_hooks_mtx);
3095 	TAILQ_REMOVE(&ifp->if_addrhooks, t, t_entry);
3096 	mtx_leave(&if_hooks_mtx);
3097 }
3098 
3099 void
3100 if_addrhooks_run(struct ifnet *ifp)
3101 {
3102 	if_hooks_run(&ifp->if_addrhooks);
3103 }
3104 
3105 void
3106 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
3107 {
3108 	extern int ticks;
3109 
3110 	memset(rxr, 0, sizeof(*rxr));
3111 
3112 	rxr->rxr_adjusted = ticks;
3113 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
3114 	rxr->rxr_hwm = hwm;
3115 }
3116 
3117 static inline void
3118 if_rxr_adjust_cwm(struct if_rxring *rxr)
3119 {
3120 	extern int ticks;
3121 
3122 	if (rxr->rxr_alive >= rxr->rxr_lwm)
3123 		return;
3124 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
3125 		rxr->rxr_cwm++;
3126 
3127 	rxr->rxr_adjusted = ticks;
3128 }
3129 
3130 void
3131 if_rxr_livelocked(struct if_rxring *rxr)
3132 {
3133 	extern int ticks;
3134 
3135 	if (ticks - rxr->rxr_adjusted >= 1) {
3136 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3137 			rxr->rxr_cwm--;
3138 
3139 		rxr->rxr_adjusted = ticks;
3140 	}
3141 }
3142 
3143 u_int
3144 if_rxr_get(struct if_rxring *rxr, u_int max)
3145 {
3146 	extern int ticks;
3147 	u_int diff;
3148 
3149 	if (ticks - rxr->rxr_adjusted >= 1) {
3150 		/* we're free to try for an adjustment */
3151 		if_rxr_adjust_cwm(rxr);
3152 	}
3153 
3154 	if (rxr->rxr_alive >= rxr->rxr_cwm)
3155 		return (0);
3156 
3157 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
3158 	rxr->rxr_alive += diff;
3159 
3160 	return (diff);
3161 }
3162 
3163 int
3164 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
3165 {
3166 	struct if_rxrinfo kifri;
3167 	int error;
3168 	u_int n;
3169 
3170 	error = copyin(uifri, &kifri, sizeof(kifri));
3171 	if (error)
3172 		return (error);
3173 
3174 	n = min(t, kifri.ifri_total);
3175 	kifri.ifri_total = t;
3176 
3177 	if (n > 0) {
3178 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
3179 		if (error)
3180 			return (error);
3181 	}
3182 
3183 	return (copyout(&kifri, uifri, sizeof(kifri)));
3184 }
3185 
3186 int
3187 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
3188     struct if_rxring *rxr)
3189 {
3190 	struct if_rxring_info ifr;
3191 
3192 	memset(&ifr, 0, sizeof(ifr));
3193 
3194 	if (name != NULL)
3195 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3196 
3197 	ifr.ifr_size = size;
3198 	ifr.ifr_info = *rxr;
3199 
3200 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
3201 }
3202 
3203 /*
3204  * Network stack input queues.
3205  */
3206 
3207 void
3208 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3209 {
3210 	mq_init(&niq->ni_q, maxlen, IPL_NET);
3211 	niq->ni_isr = isr;
3212 }
3213 
3214 int
3215 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3216 {
3217 	int rv;
3218 
3219 	rv = mq_enqueue(&niq->ni_q, m);
3220 	if (rv == 0)
3221 		schednetisr(niq->ni_isr);
3222 	else
3223 		if_congestion();
3224 
3225 	return (rv);
3226 }
3227 
3228 int
3229 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3230 {
3231 	int rv;
3232 
3233 	rv = mq_enlist(&niq->ni_q, ml);
3234 	if (rv == 0)
3235 		schednetisr(niq->ni_isr);
3236 	else
3237 		if_congestion();
3238 
3239 	return (rv);
3240 }
3241 
3242 __dead void
3243 unhandled_af(int af)
3244 {
3245 	panic("unhandled af %d", af);
3246 }
3247 
3248 /*
3249  * XXXSMP This tunable is here to work around the fact that IPsec
3250  * globals aren't ready to be accessed by multiple threads in
3251  * parallel.
3252  */
3253 int		 nettaskqs = NET_TASKQ;
3254 
3255 struct taskq *
3256 net_tq(unsigned int ifindex)
3257 {
3258 	struct taskq *t = NULL;
3259 
3260 	t = nettqmp[ifindex % nettaskqs];
3261 
3262 	return (t);
3263 }
3264