xref: /openbsd-src/sys/net/if.c (revision be691f3bb6417f04a68938fadbcaee2d5795e764)
1 /*	$OpenBSD: if.c,v 1.644 2021/11/11 10:03:10 claudio Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "pfsync.h"
70 #include "ppp.h"
71 #include "if_wg.h"
72 
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/mbuf.h>
76 #include <sys/socket.h>
77 #include <sys/socketvar.h>
78 #include <sys/timeout.h>
79 #include <sys/protosw.h>
80 #include <sys/kernel.h>
81 #include <sys/ioctl.h>
82 #include <sys/domain.h>
83 #include <sys/task.h>
84 #include <sys/atomic.h>
85 #include <sys/percpu.h>
86 #include <sys/proc.h>
87 #include <sys/stdint.h>	/* uintptr_t */
88 #include <sys/rwlock.h>
89 
90 #include <net/if.h>
91 #include <net/if_dl.h>
92 #include <net/if_types.h>
93 #include <net/route.h>
94 #include <net/netisr.h>
95 
96 #include <netinet/in.h>
97 #include <netinet/if_ether.h>
98 #include <netinet/igmp.h>
99 #ifdef MROUTING
100 #include <netinet/ip_mroute.h>
101 #endif
102 
103 #ifdef INET6
104 #include <netinet6/in6_var.h>
105 #include <netinet6/in6_ifattach.h>
106 #include <netinet6/nd6.h>
107 #include <netinet/ip6.h>
108 #include <netinet6/ip6_var.h>
109 #endif
110 
111 #ifdef MPLS
112 #include <netmpls/mpls.h>
113 #endif
114 
115 #if NBPFILTER > 0
116 #include <net/bpf.h>
117 #endif
118 
119 #if NBRIDGE > 0
120 #include <net/if_bridge.h>
121 #endif
122 
123 #if NCARP > 0
124 #include <netinet/ip_carp.h>
125 #endif
126 
127 #if NPF > 0
128 #include <net/pfvar.h>
129 #endif
130 
131 #include <sys/device.h>
132 
133 void	if_attachsetup(struct ifnet *);
134 void	if_attachdomain(struct ifnet *);
135 void	if_attach_common(struct ifnet *);
136 void	if_remove(struct ifnet *);
137 int	if_createrdomain(int, struct ifnet *);
138 int	if_setrdomain(struct ifnet *, int);
139 void	if_slowtimo(void *);
140 
141 void	if_detached_qstart(struct ifqueue *);
142 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
143 
144 int	ifioctl_get(u_long, caddr_t);
145 int	ifconf(caddr_t);
146 static int
147 	if_sffpage_check(const caddr_t);
148 
149 int	if_getgroup(caddr_t, struct ifnet *);
150 int	if_getgroupmembers(caddr_t);
151 int	if_getgroupattribs(caddr_t);
152 int	if_setgroupattribs(caddr_t);
153 int	if_getgrouplist(caddr_t);
154 
155 void	if_linkstate(struct ifnet *);
156 void	if_linkstate_task(void *);
157 
158 int	if_clone_list(struct if_clonereq *);
159 struct if_clone	*if_clone_lookup(const char *, int *);
160 
161 int	if_group_egress_build(void);
162 
163 void	if_watchdog_task(void *);
164 
165 void	if_netisr(void *);
166 
167 #ifdef DDB
168 void	ifa_print_all(void);
169 #endif
170 
171 void	if_qstart_compat(struct ifqueue *);
172 
173 /*
174  * interface index map
175  *
176  * the kernel maintains a mapping of interface indexes to struct ifnet
177  * pointers.
178  *
179  * the map is an array of struct ifnet pointers prefixed by an if_map
180  * structure. the if_map structure stores the length of its array.
181  *
182  * as interfaces are attached to the system, the map is grown on demand
183  * up to USHRT_MAX entries.
184  *
185  * interface index 0 is reserved and represents no interface. this
186  * supports the use of the interface index as the scope for IPv6 link
187  * local addresses, where scope 0 means no scope has been specified.
188  * it also supports the use of interface index as the unique identifier
189  * for network interfaces in SNMP applications as per RFC2863. therefore
190  * if_get(0) returns NULL.
191  */
192 
193 void if_ifp_dtor(void *, void *);
194 void if_map_dtor(void *, void *);
195 struct ifnet *if_ref(struct ifnet *);
196 
197 /*
198  * struct if_map
199  *
200  * bounded array of ifnet srp pointers used to fetch references of live
201  * interfaces with if_get().
202  */
203 
204 struct if_map {
205 	unsigned long		 limit;
206 	/* followed by limit ifnet srp pointers */
207 };
208 
209 /*
210  * struct if_idxmap
211  *
212  * infrastructure to manage updates and accesses to the current if_map.
213  */
214 
215 struct if_idxmap {
216 	unsigned int		 serial;
217 	unsigned int		 count;
218 	struct srp		 map;
219 };
220 
221 void	if_idxmap_init(unsigned int);
222 void	if_idxmap_insert(struct ifnet *);
223 void	if_idxmap_remove(struct ifnet *);
224 
225 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
226 
227 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
228 int if_cloners_count;
229 
230 struct rwlock if_cloners_lock = RWLOCK_INITIALIZER("clonelk");
231 
232 /* hooks should only be added, deleted, and run from a process context */
233 struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE);
234 void	if_hooks_run(struct task_list *);
235 
236 int	ifq_congestion;
237 
238 int		 netisr;
239 
240 #define	NET_TASKQ	1
241 struct taskq	*nettqmp[NET_TASKQ];
242 
243 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
244 
245 /*
246  * Serialize socket operations to ensure no new sleeping points
247  * are introduced in IP output paths.
248  */
249 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
250 
251 /*
252  * Network interface utility routines.
253  */
254 void
255 ifinit(void)
256 {
257 	unsigned int	i;
258 
259 	/*
260 	 * most machines boot with 4 or 5 interfaces, so size the initial map
261 	 * to accomodate this
262 	 */
263 	if_idxmap_init(8);
264 
265 	for (i = 0; i < NET_TASKQ; i++) {
266 		nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
267 		if (nettqmp[i] == NULL)
268 			panic("unable to create network taskq %d", i);
269 	}
270 }
271 
272 static struct if_idxmap if_idxmap = {
273 	0,
274 	0,
275 	SRP_INITIALIZER()
276 };
277 
278 struct srp_gc if_ifp_gc = SRP_GC_INITIALIZER(if_ifp_dtor, NULL);
279 struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
280 
281 struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
282 
283 void
284 if_idxmap_init(unsigned int limit)
285 {
286 	struct if_map *if_map;
287 	struct srp *map;
288 	unsigned int i;
289 
290 	if_idxmap.serial = 1; /* skip ifidx 0 so it can return NULL */
291 
292 	if_map = malloc(sizeof(*if_map) + limit * sizeof(*map),
293 	    M_IFADDR, M_WAITOK);
294 
295 	if_map->limit = limit;
296 	map = (struct srp *)(if_map + 1);
297 	for (i = 0; i < limit; i++)
298 		srp_init(&map[i]);
299 
300 	/* this is called early so there's nothing to race with */
301 	srp_update_locked(&if_map_gc, &if_idxmap.map, if_map);
302 }
303 
304 void
305 if_idxmap_insert(struct ifnet *ifp)
306 {
307 	struct if_map *if_map;
308 	struct srp *map;
309 	unsigned int index, i;
310 
311 	refcnt_init(&ifp->if_refcnt);
312 
313 	/* the kernel lock guarantees serialised modifications to if_idxmap */
314 	KERNEL_ASSERT_LOCKED();
315 
316 	if (++if_idxmap.count > USHRT_MAX)
317 		panic("too many interfaces");
318 
319 	if_map = srp_get_locked(&if_idxmap.map);
320 	map = (struct srp *)(if_map + 1);
321 
322 	index = if_idxmap.serial++ & USHRT_MAX;
323 
324 	if (index >= if_map->limit) {
325 		struct if_map *nif_map;
326 		struct srp *nmap;
327 		unsigned int nlimit;
328 		struct ifnet *nifp;
329 
330 		nlimit = if_map->limit * 2;
331 		nif_map = malloc(sizeof(*nif_map) + nlimit * sizeof(*nmap),
332 		    M_IFADDR, M_WAITOK);
333 		nmap = (struct srp *)(nif_map + 1);
334 
335 		nif_map->limit = nlimit;
336 		for (i = 0; i < if_map->limit; i++) {
337 			srp_init(&nmap[i]);
338 			nifp = srp_get_locked(&map[i]);
339 			if (nifp != NULL) {
340 				srp_update_locked(&if_ifp_gc, &nmap[i],
341 				    if_ref(nifp));
342 			}
343 		}
344 
345 		while (i < nlimit) {
346 			srp_init(&nmap[i]);
347 			i++;
348 		}
349 
350 		srp_update_locked(&if_map_gc, &if_idxmap.map, nif_map);
351 		if_map = nif_map;
352 		map = nmap;
353 	}
354 
355 	/* pick the next free index */
356 	for (i = 0; i < USHRT_MAX; i++) {
357 		if (index != 0 && srp_get_locked(&map[index]) == NULL)
358 			break;
359 
360 		index = if_idxmap.serial++ & USHRT_MAX;
361 	}
362 
363 	/* commit */
364 	ifp->if_index = index;
365 	srp_update_locked(&if_ifp_gc, &map[index], if_ref(ifp));
366 }
367 
368 void
369 if_idxmap_remove(struct ifnet *ifp)
370 {
371 	struct if_map *if_map;
372 	struct srp *map;
373 	unsigned int index;
374 
375 	index = ifp->if_index;
376 
377 	/* the kernel lock guarantees serialised modifications to if_idxmap */
378 	KERNEL_ASSERT_LOCKED();
379 
380 	if_map = srp_get_locked(&if_idxmap.map);
381 	KASSERT(index < if_map->limit);
382 
383 	map = (struct srp *)(if_map + 1);
384 	KASSERT(ifp == (struct ifnet *)srp_get_locked(&map[index]));
385 
386 	srp_update_locked(&if_ifp_gc, &map[index], NULL);
387 	if_idxmap.count--;
388 	/* end of if_idxmap modifications */
389 }
390 
391 void
392 if_ifp_dtor(void *null, void *ifp)
393 {
394 	if_put(ifp);
395 }
396 
397 void
398 if_map_dtor(void *null, void *m)
399 {
400 	struct if_map *if_map = m;
401 	struct srp *map = (struct srp *)(if_map + 1);
402 	unsigned int i;
403 
404 	/*
405 	 * dont need to serialize the use of update_locked since this is
406 	 * the last reference to this map. there's nothing to race against.
407 	 */
408 	for (i = 0; i < if_map->limit; i++)
409 		srp_update_locked(&if_ifp_gc, &map[i], NULL);
410 
411 	free(if_map, M_IFADDR, sizeof(*if_map) + if_map->limit * sizeof(*map));
412 }
413 
414 /*
415  * Attach an interface to the
416  * list of "active" interfaces.
417  */
418 void
419 if_attachsetup(struct ifnet *ifp)
420 {
421 	unsigned long ifidx;
422 
423 	NET_ASSERT_LOCKED();
424 
425 	if_addgroup(ifp, IFG_ALL);
426 
427 	if_attachdomain(ifp);
428 #if NPF > 0
429 	pfi_attach_ifnet(ifp);
430 #endif
431 
432 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
433 	if_slowtimo(ifp);
434 
435 	if_idxmap_insert(ifp);
436 	KASSERT(if_get(0) == NULL);
437 
438 	ifidx = ifp->if_index;
439 
440 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
441 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
442 
443 	/* Announce the interface. */
444 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
445 }
446 
447 /*
448  * Allocate the link level name for the specified interface.  This
449  * is an attachment helper.  It must be called after ifp->if_addrlen
450  * is initialized, which may not be the case when if_attach() is
451  * called.
452  */
453 void
454 if_alloc_sadl(struct ifnet *ifp)
455 {
456 	unsigned int socksize;
457 	int namelen, masklen;
458 	struct sockaddr_dl *sdl;
459 
460 	/*
461 	 * If the interface already has a link name, release it
462 	 * now.  This is useful for interfaces that can change
463 	 * link types, and thus switch link names often.
464 	 */
465 	if_free_sadl(ifp);
466 
467 	namelen = strlen(ifp->if_xname);
468 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
469 	socksize = masklen + ifp->if_addrlen;
470 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
471 	if (socksize < sizeof(*sdl))
472 		socksize = sizeof(*sdl);
473 	socksize = ROUNDUP(socksize);
474 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
475 	sdl->sdl_len = socksize;
476 	sdl->sdl_family = AF_LINK;
477 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
478 	sdl->sdl_nlen = namelen;
479 	sdl->sdl_alen = ifp->if_addrlen;
480 	sdl->sdl_index = ifp->if_index;
481 	sdl->sdl_type = ifp->if_type;
482 	ifp->if_sadl = sdl;
483 }
484 
485 /*
486  * Free the link level name for the specified interface.  This is
487  * a detach helper.  This is called from if_detach() or from
488  * link layer type specific detach functions.
489  */
490 void
491 if_free_sadl(struct ifnet *ifp)
492 {
493 	if (ifp->if_sadl == NULL)
494 		return;
495 
496 	free(ifp->if_sadl, M_IFADDR, ifp->if_sadl->sdl_len);
497 	ifp->if_sadl = NULL;
498 }
499 
500 void
501 if_attachdomain(struct ifnet *ifp)
502 {
503 	const struct domain *dp;
504 	int i, s;
505 
506 	s = splnet();
507 
508 	/* address family dependent data region */
509 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
510 	for (i = 0; (dp = domains[i]) != NULL; i++) {
511 		if (dp->dom_ifattach)
512 			ifp->if_afdata[dp->dom_family] =
513 			    (*dp->dom_ifattach)(ifp);
514 	}
515 
516 	splx(s);
517 }
518 
519 void
520 if_attachhead(struct ifnet *ifp)
521 {
522 	if_attach_common(ifp);
523 	NET_LOCK();
524 	TAILQ_INSERT_HEAD(&ifnet, ifp, if_list);
525 	if_attachsetup(ifp);
526 	NET_UNLOCK();
527 }
528 
529 void
530 if_attach(struct ifnet *ifp)
531 {
532 	if_attach_common(ifp);
533 	NET_LOCK();
534 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
535 	if_attachsetup(ifp);
536 	NET_UNLOCK();
537 }
538 
539 void
540 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
541 {
542 	struct ifqueue **map;
543 	struct ifqueue *ifq;
544 	int i;
545 
546 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
547 	KASSERT(nqs != 0);
548 
549 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
550 
551 	ifp->if_snd.ifq_softc = NULL;
552 	map[0] = &ifp->if_snd;
553 
554 	for (i = 1; i < nqs; i++) {
555 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
556 		ifq_set_maxlen(ifq, ifp->if_snd.ifq_maxlen);
557 		ifq_init(ifq, ifp, i);
558 		map[i] = ifq;
559 	}
560 
561 	ifp->if_ifqs = map;
562 	ifp->if_nifqs = nqs;
563 }
564 
565 void
566 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
567 {
568 	struct ifiqueue **map;
569 	struct ifiqueue *ifiq;
570 	unsigned int i;
571 
572 	KASSERT(niqs != 0);
573 
574 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
575 
576 	ifp->if_rcv.ifiq_softc = NULL;
577 	map[0] = &ifp->if_rcv;
578 
579 	for (i = 1; i < niqs; i++) {
580 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
581 		ifiq_init(ifiq, ifp, i);
582 		map[i] = ifiq;
583 	}
584 
585 	ifp->if_iqs = map;
586 	ifp->if_niqs = niqs;
587 }
588 
589 void
590 if_attach_common(struct ifnet *ifp)
591 {
592 	KASSERT(ifp->if_ioctl != NULL);
593 
594 	TAILQ_INIT(&ifp->if_addrlist);
595 	TAILQ_INIT(&ifp->if_maddrlist);
596 	TAILQ_INIT(&ifp->if_groups);
597 
598 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
599 		KASSERTMSG(ifp->if_qstart == NULL,
600 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
601 		ifp->if_qstart = if_qstart_compat;
602 	} else {
603 		KASSERTMSG(ifp->if_start == NULL,
604 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
605 		KASSERTMSG(ifp->if_qstart != NULL,
606 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
607 	}
608 
609 	ifq_init(&ifp->if_snd, ifp, 0);
610 
611 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
612 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
613 	ifp->if_nifqs = 1;
614 	if (ifp->if_txmit == 0)
615 		ifp->if_txmit = IF_TXMIT_DEFAULT;
616 
617 	ifiq_init(&ifp->if_rcv, ifp, 0);
618 
619 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
620 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
621 	ifp->if_niqs = 1;
622 
623 	TAILQ_INIT(&ifp->if_addrhooks);
624 	TAILQ_INIT(&ifp->if_linkstatehooks);
625 	TAILQ_INIT(&ifp->if_detachhooks);
626 
627 	if (ifp->if_rtrequest == NULL)
628 		ifp->if_rtrequest = if_rtrequest_dummy;
629 	if (ifp->if_enqueue == NULL)
630 		ifp->if_enqueue = if_enqueue_ifq;
631 #if NBPFILTER > 0
632 	if (ifp->if_bpf_mtap == NULL)
633 		ifp->if_bpf_mtap = bpf_mtap_ether;
634 #endif
635 	ifp->if_llprio = IFQ_DEFPRIO;
636 }
637 
638 void
639 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
640 {
641 	/*
642 	 * only switch the ifq_ops on the first ifq on an interface.
643 	 *
644 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
645 	 * works on a single ifq. because the code uses the ifq_ops
646 	 * on the first ifq (if_snd) to select a queue for an mbuf,
647 	 * by switching only the first one we change both the algorithm
648 	 * and force the routing of all new packets to it.
649 	 */
650 	ifq_attach(&ifp->if_snd, newops, args);
651 }
652 
653 void
654 if_start(struct ifnet *ifp)
655 {
656 	KASSERT(ifp->if_qstart == if_qstart_compat);
657 	if_qstart_compat(&ifp->if_snd);
658 }
659 void
660 if_qstart_compat(struct ifqueue *ifq)
661 {
662 	struct ifnet *ifp = ifq->ifq_if;
663 	int s;
664 
665 	/*
666 	 * the stack assumes that an interface can have multiple
667 	 * transmit rings, but a lot of drivers are still written
668 	 * so that interfaces and send rings have a 1:1 mapping.
669 	 * this provides compatibility between the stack and the older
670 	 * drivers by translating from the only queue they have
671 	 * (ifp->if_snd) back to the interface and calling if_start.
672 	 */
673 
674 	KERNEL_LOCK();
675 	s = splnet();
676 	(*ifp->if_start)(ifp);
677 	splx(s);
678 	KERNEL_UNLOCK();
679 }
680 
681 int
682 if_enqueue(struct ifnet *ifp, struct mbuf *m)
683 {
684 	CLR(m->m_pkthdr.csum_flags, M_TIMESTAMP);
685 
686 #if NPF > 0
687 	if (m->m_pkthdr.pf.delay > 0)
688 		return (pf_delay_pkt(m, ifp->if_index));
689 #endif
690 
691 #if NBRIDGE > 0
692 	if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
693 		int error;
694 
695 		error = bridge_enqueue(ifp, m);
696 		return (error);
697 	}
698 #endif
699 
700 #if NPF > 0
701 	pf_pkt_addr_changed(m);
702 #endif	/* NPF > 0 */
703 
704 	return ((*ifp->if_enqueue)(ifp, m));
705 }
706 
707 int
708 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
709 {
710 	struct ifqueue *ifq = &ifp->if_snd;
711 	int error;
712 
713 	if (ifp->if_nifqs > 1) {
714 		unsigned int idx;
715 
716 		/*
717 		 * use the operations on the first ifq to pick which of
718 		 * the array gets this mbuf.
719 		 */
720 
721 		idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
722 		ifq = ifp->if_ifqs[idx];
723 	}
724 
725 	error = ifq_enqueue(ifq, m);
726 	if (error)
727 		return (error);
728 
729 	ifq_start(ifq);
730 
731 	return (0);
732 }
733 
734 void
735 if_input(struct ifnet *ifp, struct mbuf_list *ml)
736 {
737 	ifiq_input(&ifp->if_rcv, ml);
738 }
739 
740 int
741 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
742 {
743 	int keepflags;
744 
745 #if NBPFILTER > 0
746 	/*
747 	 * Only send packets to bpf if they are destinated to local
748 	 * addresses.
749 	 *
750 	 * if_input_local() is also called for SIMPLEX interfaces to
751 	 * duplicate packets for local use.  But don't dup them to bpf.
752 	 */
753 	if (ifp->if_flags & IFF_LOOPBACK) {
754 		caddr_t if_bpf = ifp->if_bpf;
755 
756 		if (if_bpf)
757 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
758 	}
759 #endif
760 	keepflags = m->m_flags & (M_BCAST|M_MCAST);
761 	m_resethdr(m);
762 	m->m_flags |= M_LOOP | keepflags;
763 	m->m_pkthdr.ph_ifidx = ifp->if_index;
764 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
765 
766 	ifp->if_opackets++;
767 	ifp->if_obytes += m->m_pkthdr.len;
768 
769 	ifp->if_ipackets++;
770 	ifp->if_ibytes += m->m_pkthdr.len;
771 
772 	switch (af) {
773 	case AF_INET:
774 		ipv4_input(ifp, m);
775 		break;
776 #ifdef INET6
777 	case AF_INET6:
778 		ipv6_input(ifp, m);
779 		break;
780 #endif /* INET6 */
781 #ifdef MPLS
782 	case AF_MPLS:
783 		mpls_input(ifp, m);
784 		break;
785 #endif /* MPLS */
786 	default:
787 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
788 		m_freem(m);
789 		return (EAFNOSUPPORT);
790 	}
791 
792 	return (0);
793 }
794 
795 int
796 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
797 {
798 	struct ifiqueue *ifiq;
799 	unsigned int flow = 0;
800 
801 	m->m_pkthdr.ph_family = af;
802 	m->m_pkthdr.ph_ifidx = ifp->if_index;
803 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
804 
805 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
806 		flow = m->m_pkthdr.ph_flowid;
807 
808 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
809 
810 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
811 }
812 
813 void
814 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
815 {
816 	struct mbuf *m;
817 
818 	if (ml_empty(ml))
819 		return;
820 
821 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
822 		enqueue_randomness(ml_len(ml) ^ (uintptr_t)MBUF_LIST_FIRST(ml));
823 
824 	/*
825 	 * We grab the NET_LOCK() before processing any packet to
826 	 * ensure there's no contention on the routing table lock.
827 	 *
828 	 * Without it we could race with a userland thread to insert
829 	 * a L2 entry in ip{6,}_output().  Such race would result in
830 	 * one of the threads sleeping *inside* the IP output path.
831 	 *
832 	 * Since we have a NET_LOCK() we also use it to serialize access
833 	 * to PF globals, pipex globals, unicast and multicast addresses
834 	 * lists and the socket layer.
835 	 */
836 
837 	/*
838 	 * XXXSMP IPsec data structures are not ready to be accessed
839 	 * by multiple network threads in parallel.  In this case
840 	 * use an exclusive lock.
841 	 */
842 	NET_LOCK();
843 	while ((m = ml_dequeue(ml)) != NULL)
844 		(*ifp->if_input)(ifp, m);
845 	NET_UNLOCK();
846 }
847 
848 void
849 if_vinput(struct ifnet *ifp, struct mbuf *m)
850 {
851 #if NBPFILTER > 0
852 	caddr_t if_bpf;
853 #endif
854 
855 	m->m_pkthdr.ph_ifidx = ifp->if_index;
856 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
857 
858 	counters_pkt(ifp->if_counters,
859 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
860 
861 #if NPF > 0
862 	pf_pkt_addr_changed(m);
863 #endif
864 
865 #if NBPFILTER > 0
866 	if_bpf = ifp->if_bpf;
867 	if (if_bpf) {
868 		if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) {
869 			m_freem(m);
870 			return;
871 		}
872 	}
873 #endif
874 
875 	if (__predict_true(!ISSET(ifp->if_xflags, IFXF_MONITOR)))
876 		(*ifp->if_input)(ifp, m);
877 }
878 
879 void
880 if_netisr(void *unused)
881 {
882 	int n, t = 0;
883 
884 	NET_LOCK();
885 
886 	while ((n = netisr) != 0) {
887 		/* Like sched_pause() but with a rwlock dance. */
888 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
889 			NET_UNLOCK();
890 			yield();
891 			NET_LOCK();
892 		}
893 
894 		atomic_clearbits_int(&netisr, n);
895 
896 #if NETHER > 0
897 		if (n & (1 << NETISR_ARP)) {
898 			KERNEL_LOCK();
899 			arpintr();
900 			KERNEL_UNLOCK();
901 		}
902 #endif
903 #if NPPP > 0
904 		if (n & (1 << NETISR_PPP)) {
905 			KERNEL_LOCK();
906 			pppintr();
907 			KERNEL_UNLOCK();
908 		}
909 #endif
910 #if NBRIDGE > 0
911 		if (n & (1 << NETISR_BRIDGE))
912 			bridgeintr();
913 #endif
914 		t |= n;
915 	}
916 
917 #if NPFSYNC > 0
918 	if (t & (1 << NETISR_PFSYNC)) {
919 		KERNEL_LOCK();
920 		pfsyncintr();
921 		KERNEL_UNLOCK();
922 	}
923 #endif
924 
925 	NET_UNLOCK();
926 }
927 
928 void
929 if_hooks_run(struct task_list *hooks)
930 {
931 	struct task *t, *nt;
932 	struct task cursor = { .t_func = NULL };
933 	void (*func)(void *);
934 	void *arg;
935 
936 	mtx_enter(&if_hooks_mtx);
937 	for (t = TAILQ_FIRST(hooks); t != NULL; t = nt) {
938 		if (t->t_func == NULL) { /* skip cursors */
939 			nt = TAILQ_NEXT(t, t_entry);
940 			continue;
941 		}
942 		func = t->t_func;
943 		arg = t->t_arg;
944 
945 		TAILQ_INSERT_AFTER(hooks, t, &cursor, t_entry);
946 		mtx_leave(&if_hooks_mtx);
947 
948 		(*func)(arg);
949 
950 		mtx_enter(&if_hooks_mtx);
951 		nt = TAILQ_NEXT(&cursor, t_entry); /* avoid _Q_INVALIDATE */
952 		TAILQ_REMOVE(hooks, &cursor, t_entry);
953 	}
954 	mtx_leave(&if_hooks_mtx);
955 }
956 
957 void
958 if_remove(struct ifnet *ifp)
959 {
960 	/* Remove the interface from the list of all interfaces. */
961 	NET_LOCK();
962 	TAILQ_REMOVE(&ifnet, ifp, if_list);
963 	NET_UNLOCK();
964 
965 	/* Remove the interface from the interface index map. */
966 	if_idxmap_remove(ifp);
967 
968 	/* Sleep until the last reference is released. */
969 	refcnt_finalize(&ifp->if_refcnt, "ifrm");
970 }
971 
972 void
973 if_deactivate(struct ifnet *ifp)
974 {
975 	/*
976 	 * Call detach hooks from head to tail.  To make sure detach
977 	 * hooks are executed in the reverse order they were added, all
978 	 * the hooks have to be added to the head!
979 	 */
980 
981 	NET_LOCK();
982 	if_hooks_run(&ifp->if_detachhooks);
983 	NET_UNLOCK();
984 }
985 
986 void
987 if_detachhook_add(struct ifnet *ifp, struct task *t)
988 {
989 	mtx_enter(&if_hooks_mtx);
990 	TAILQ_INSERT_HEAD(&ifp->if_detachhooks, t, t_entry);
991 	mtx_leave(&if_hooks_mtx);
992 }
993 
994 void
995 if_detachhook_del(struct ifnet *ifp, struct task *t)
996 {
997 	mtx_enter(&if_hooks_mtx);
998 	TAILQ_REMOVE(&ifp->if_detachhooks, t, t_entry);
999 	mtx_leave(&if_hooks_mtx);
1000 }
1001 
1002 /*
1003  * Detach an interface from everything in the kernel.  Also deallocate
1004  * private resources.
1005  */
1006 void
1007 if_detach(struct ifnet *ifp)
1008 {
1009 	struct ifaddr *ifa;
1010 	struct ifg_list *ifg;
1011 	const struct domain *dp;
1012 	int i, s;
1013 
1014 	/* Undo pseudo-driver changes. */
1015 	if_deactivate(ifp);
1016 
1017 	/* Other CPUs must not have a reference before we start destroying. */
1018 	if_remove(ifp);
1019 
1020 	ifq_clr_oactive(&ifp->if_snd);
1021 
1022 #if NBPFILTER > 0
1023 	bpfdetach(ifp);
1024 #endif
1025 
1026 	NET_LOCK();
1027 	s = splnet();
1028 	ifp->if_qstart = if_detached_qstart;
1029 	ifp->if_ioctl = if_detached_ioctl;
1030 	ifp->if_watchdog = NULL;
1031 
1032 	/* Remove the watchdog timeout & task */
1033 	timeout_del(&ifp->if_slowtimo);
1034 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1035 
1036 	/* Remove the link state task */
1037 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1038 
1039 	rti_delete(ifp);
1040 #if NETHER > 0 && defined(NFSCLIENT)
1041 	if (ifp->if_index == revarp_ifidx)
1042 		revarp_ifidx = 0;
1043 #endif
1044 #ifdef MROUTING
1045 	vif_delete(ifp);
1046 #endif
1047 	in_ifdetach(ifp);
1048 #ifdef INET6
1049 	in6_ifdetach(ifp);
1050 #endif
1051 #if NPF > 0
1052 	pfi_detach_ifnet(ifp);
1053 #endif
1054 
1055 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1056 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1057 
1058 	if_free_sadl(ifp);
1059 
1060 	/* We should not have any address left at this point. */
1061 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1062 #ifdef DIAGNOSTIC
1063 		printf("%s: address list non empty\n", ifp->if_xname);
1064 #endif
1065 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1066 			ifa_del(ifp, ifa);
1067 			ifa->ifa_ifp = NULL;
1068 			ifafree(ifa);
1069 		}
1070 	}
1071 
1072 	KASSERT(TAILQ_EMPTY(&ifp->if_addrhooks));
1073 	KASSERT(TAILQ_EMPTY(&ifp->if_linkstatehooks));
1074 	KASSERT(TAILQ_EMPTY(&ifp->if_detachhooks));
1075 
1076 	for (i = 0; (dp = domains[i]) != NULL; i++) {
1077 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
1078 			(*dp->dom_ifdetach)(ifp,
1079 			    ifp->if_afdata[dp->dom_family]);
1080 	}
1081 
1082 	/* Announce that the interface is gone. */
1083 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1084 	splx(s);
1085 	NET_UNLOCK();
1086 
1087 	if (ifp->if_counters != NULL)
1088 		if_counters_free(ifp);
1089 
1090 	for (i = 0; i < ifp->if_nifqs; i++)
1091 		ifq_destroy(ifp->if_ifqs[i]);
1092 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1093 		for (i = 1; i < ifp->if_nifqs; i++) {
1094 			free(ifp->if_ifqs[i], M_DEVBUF,
1095 			    sizeof(struct ifqueue));
1096 		}
1097 		free(ifp->if_ifqs, M_DEVBUF,
1098 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1099 	}
1100 
1101 	for (i = 0; i < ifp->if_niqs; i++)
1102 		ifiq_destroy(ifp->if_iqs[i]);
1103 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1104 		for (i = 1; i < ifp->if_niqs; i++) {
1105 			free(ifp->if_iqs[i], M_DEVBUF,
1106 			    sizeof(struct ifiqueue));
1107 		}
1108 		free(ifp->if_iqs, M_DEVBUF,
1109 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1110 	}
1111 }
1112 
1113 /*
1114  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1115  */
1116 int
1117 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1118 {
1119 	struct ifnet *ifp;
1120 	int connected = 0;
1121 
1122 	ifp = if_get(ifidx);
1123 	if (ifp == NULL)
1124 		return (0);
1125 
1126 	if (ifp0->if_index == ifp->if_index)
1127 		connected = 1;
1128 
1129 #if NBRIDGE > 0
1130 	if (ifp0->if_bridgeidx != 0 && ifp0->if_bridgeidx == ifp->if_bridgeidx)
1131 		connected = 1;
1132 #endif
1133 #if NCARP > 0
1134 	if ((ifp0->if_type == IFT_CARP &&
1135 	    ifp0->if_carpdevidx == ifp->if_index) ||
1136 	    (ifp->if_type == IFT_CARP && ifp->if_carpdevidx == ifp0->if_index))
1137 		connected = 1;
1138 #endif
1139 
1140 	if_put(ifp);
1141 	return (connected);
1142 }
1143 
1144 /*
1145  * Create a clone network interface.
1146  */
1147 int
1148 if_clone_create(const char *name, int rdomain)
1149 {
1150 	struct if_clone *ifc;
1151 	struct ifnet *ifp;
1152 	int unit, ret;
1153 
1154 	ifc = if_clone_lookup(name, &unit);
1155 	if (ifc == NULL)
1156 		return (EINVAL);
1157 
1158 	rw_enter_write(&if_cloners_lock);
1159 
1160 	if ((ifp = if_unit(name)) != NULL) {
1161 		ret = EEXIST;
1162 		goto unlock;
1163 	}
1164 
1165 	ret = (*ifc->ifc_create)(ifc, unit);
1166 
1167 	if (ret != 0 || (ifp = if_unit(name)) == NULL)
1168 		goto unlock;
1169 
1170 	NET_LOCK();
1171 	if_addgroup(ifp, ifc->ifc_name);
1172 	if (rdomain != 0)
1173 		if_setrdomain(ifp, rdomain);
1174 	NET_UNLOCK();
1175 unlock:
1176 	rw_exit_write(&if_cloners_lock);
1177 	if_put(ifp);
1178 
1179 	return (ret);
1180 }
1181 
1182 /*
1183  * Destroy a clone network interface.
1184  */
1185 int
1186 if_clone_destroy(const char *name)
1187 {
1188 	struct if_clone *ifc;
1189 	struct ifnet *ifp;
1190 	int ret;
1191 
1192 	ifc = if_clone_lookup(name, NULL);
1193 	if (ifc == NULL)
1194 		return (EINVAL);
1195 
1196 	if (ifc->ifc_destroy == NULL)
1197 		return (EOPNOTSUPP);
1198 
1199 	rw_enter_write(&if_cloners_lock);
1200 
1201 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1202 		if (strcmp(ifp->if_xname, name) == 0)
1203 			break;
1204 	}
1205 	if (ifp == NULL) {
1206 		rw_exit_write(&if_cloners_lock);
1207 		return (ENXIO);
1208 	}
1209 
1210 	NET_LOCK();
1211 	if (ifp->if_flags & IFF_UP) {
1212 		int s;
1213 		s = splnet();
1214 		if_down(ifp);
1215 		splx(s);
1216 	}
1217 	NET_UNLOCK();
1218 	ret = (*ifc->ifc_destroy)(ifp);
1219 
1220 	rw_exit_write(&if_cloners_lock);
1221 
1222 	return (ret);
1223 }
1224 
1225 /*
1226  * Look up a network interface cloner.
1227  */
1228 struct if_clone *
1229 if_clone_lookup(const char *name, int *unitp)
1230 {
1231 	struct if_clone *ifc;
1232 	const char *cp;
1233 	int unit;
1234 
1235 	/* separate interface name from unit */
1236 	for (cp = name;
1237 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1238 	    cp++)
1239 		continue;
1240 
1241 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1242 		return (NULL);	/* No name or unit number */
1243 
1244 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1245 		return (NULL);	/* unit number 0 padded */
1246 
1247 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1248 		if (strlen(ifc->ifc_name) == cp - name &&
1249 		    !strncmp(name, ifc->ifc_name, cp - name))
1250 			break;
1251 	}
1252 
1253 	if (ifc == NULL)
1254 		return (NULL);
1255 
1256 	unit = 0;
1257 	while (cp - name < IFNAMSIZ && *cp) {
1258 		if (*cp < '0' || *cp > '9' ||
1259 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1260 			/* Bogus unit number. */
1261 			return (NULL);
1262 		}
1263 		unit = (unit * 10) + (*cp++ - '0');
1264 	}
1265 
1266 	if (unitp != NULL)
1267 		*unitp = unit;
1268 	return (ifc);
1269 }
1270 
1271 /*
1272  * Register a network interface cloner.
1273  */
1274 void
1275 if_clone_attach(struct if_clone *ifc)
1276 {
1277 	/*
1278 	 * we are called at kernel boot by main(), when pseudo devices are
1279 	 * being attached. The main() is the only guy which may alter the
1280 	 * if_cloners. While system is running and main() is done with
1281 	 * initialization, the if_cloners becomes immutable.
1282 	 */
1283 	KASSERT(pdevinit_done == 0);
1284 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1285 	if_cloners_count++;
1286 }
1287 
1288 /*
1289  * Provide list of interface cloners to userspace.
1290  */
1291 int
1292 if_clone_list(struct if_clonereq *ifcr)
1293 {
1294 	char outbuf[IFNAMSIZ], *dst;
1295 	struct if_clone *ifc;
1296 	int count, error = 0;
1297 
1298 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1299 		/* Just asking how many there are. */
1300 		ifcr->ifcr_total = if_cloners_count;
1301 		return (0);
1302 	}
1303 
1304 	if (ifcr->ifcr_count < 0)
1305 		return (EINVAL);
1306 
1307 	ifcr->ifcr_total = if_cloners_count;
1308 	count = MIN(if_cloners_count, ifcr->ifcr_count);
1309 
1310 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1311 		if (count == 0)
1312 			break;
1313 		bzero(outbuf, sizeof outbuf);
1314 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1315 		error = copyout(outbuf, dst, IFNAMSIZ);
1316 		if (error)
1317 			break;
1318 		count--;
1319 		dst += IFNAMSIZ;
1320 	}
1321 
1322 	return (error);
1323 }
1324 
1325 /*
1326  * set queue congestion marker
1327  */
1328 void
1329 if_congestion(void)
1330 {
1331 	extern int ticks;
1332 
1333 	ifq_congestion = ticks;
1334 }
1335 
1336 int
1337 if_congested(void)
1338 {
1339 	extern int ticks;
1340 	int diff;
1341 
1342 	diff = ticks - ifq_congestion;
1343 	if (diff < 0) {
1344 		ifq_congestion = ticks - hz;
1345 		return (0);
1346 	}
1347 
1348 	return (diff <= (hz / 100));
1349 }
1350 
1351 #define	equal(a1, a2)	\
1352 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1353 	(a1)->sa_len) == 0)
1354 
1355 /*
1356  * Locate an interface based on a complete address.
1357  */
1358 struct ifaddr *
1359 ifa_ifwithaddr(struct sockaddr *addr, u_int rtableid)
1360 {
1361 	struct ifnet *ifp;
1362 	struct ifaddr *ifa;
1363 	u_int rdomain;
1364 
1365 	rdomain = rtable_l2(rtableid);
1366 	KERNEL_LOCK();
1367 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1368 		if (ifp->if_rdomain != rdomain)
1369 			continue;
1370 
1371 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1372 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1373 				continue;
1374 
1375 			if (equal(addr, ifa->ifa_addr)) {
1376 				KERNEL_UNLOCK();
1377 				return (ifa);
1378 			}
1379 		}
1380 	}
1381 	KERNEL_UNLOCK();
1382 	return (NULL);
1383 }
1384 
1385 /*
1386  * Locate the point to point interface with a given destination address.
1387  */
1388 struct ifaddr *
1389 ifa_ifwithdstaddr(struct sockaddr *addr, u_int rdomain)
1390 {
1391 	struct ifnet *ifp;
1392 	struct ifaddr *ifa;
1393 
1394 	rdomain = rtable_l2(rdomain);
1395 	KERNEL_LOCK();
1396 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1397 		if (ifp->if_rdomain != rdomain)
1398 			continue;
1399 		if (ifp->if_flags & IFF_POINTOPOINT) {
1400 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1401 				if (ifa->ifa_addr->sa_family !=
1402 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1403 					continue;
1404 				if (equal(addr, ifa->ifa_dstaddr)) {
1405 					KERNEL_UNLOCK();
1406 					return (ifa);
1407 				}
1408 			}
1409 		}
1410 	}
1411 	KERNEL_UNLOCK();
1412 	return (NULL);
1413 }
1414 
1415 /*
1416  * Find an interface address specific to an interface best matching
1417  * a given address.
1418  */
1419 struct ifaddr *
1420 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1421 {
1422 	struct ifaddr *ifa;
1423 	char *cp, *cp2, *cp3;
1424 	char *cplim;
1425 	struct ifaddr *ifa_maybe = NULL;
1426 	u_int af = addr->sa_family;
1427 
1428 	if (af >= AF_MAX)
1429 		return (NULL);
1430 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1431 		if (ifa->ifa_addr->sa_family != af)
1432 			continue;
1433 		if (ifa_maybe == NULL)
1434 			ifa_maybe = ifa;
1435 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1436 			if (equal(addr, ifa->ifa_addr) ||
1437 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1438 				return (ifa);
1439 			continue;
1440 		}
1441 		cp = addr->sa_data;
1442 		cp2 = ifa->ifa_addr->sa_data;
1443 		cp3 = ifa->ifa_netmask->sa_data;
1444 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1445 		for (; cp3 < cplim; cp3++)
1446 			if ((*cp++ ^ *cp2++) & *cp3)
1447 				break;
1448 		if (cp3 == cplim)
1449 			return (ifa);
1450 	}
1451 	return (ifa_maybe);
1452 }
1453 
1454 void
1455 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1456 {
1457 }
1458 
1459 /*
1460  * Default action when installing a local route on a point-to-point
1461  * interface.
1462  */
1463 void
1464 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1465 {
1466 	struct ifnet *lo0ifp;
1467 	struct ifaddr *ifa, *lo0ifa;
1468 
1469 	switch (req) {
1470 	case RTM_ADD:
1471 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1472 			break;
1473 
1474 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1475 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1476 			    rt_key(rt)->sa_len) == 0)
1477 				break;
1478 		}
1479 
1480 		if (ifa == NULL)
1481 			break;
1482 
1483 		KASSERT(ifa == rt->rt_ifa);
1484 
1485 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1486 		KASSERT(lo0ifp != NULL);
1487 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1488 			if (lo0ifa->ifa_addr->sa_family ==
1489 			    ifa->ifa_addr->sa_family)
1490 				break;
1491 		}
1492 		if_put(lo0ifp);
1493 
1494 		if (lo0ifa == NULL)
1495 			break;
1496 
1497 		rt->rt_flags &= ~RTF_LLINFO;
1498 		break;
1499 	case RTM_DELETE:
1500 	case RTM_RESOLVE:
1501 	default:
1502 		break;
1503 	}
1504 }
1505 
1506 int
1507 p2p_bpf_mtap(caddr_t if_bpf, const struct mbuf *m, u_int dir)
1508 {
1509 #if NBPFILTER > 0
1510 	return (bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, m, dir));
1511 #else
1512 	return (0);
1513 #endif
1514 }
1515 
1516 void
1517 p2p_input(struct ifnet *ifp, struct mbuf *m)
1518 {
1519 	void (*input)(struct ifnet *, struct mbuf *);
1520 
1521 	switch (m->m_pkthdr.ph_family) {
1522 	case AF_INET:
1523 		input = ipv4_input;
1524 		break;
1525 #ifdef INET6
1526 	case AF_INET6:
1527 		input = ipv6_input;
1528 		break;
1529 #endif
1530 #ifdef MPLS
1531 	case AF_MPLS:
1532 		input = mpls_input;
1533 		break;
1534 #endif
1535 	default:
1536 		m_freem(m);
1537 		return;
1538 	}
1539 
1540 	(*input)(ifp, m);
1541 }
1542 
1543 /*
1544  * Bring down all interfaces
1545  */
1546 void
1547 if_downall(void)
1548 {
1549 	struct ifreq ifrq;	/* XXX only partly built */
1550 	struct ifnet *ifp;
1551 
1552 	NET_LOCK();
1553 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1554 		if ((ifp->if_flags & IFF_UP) == 0)
1555 			continue;
1556 		if_down(ifp);
1557 		ifrq.ifr_flags = ifp->if_flags;
1558 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1559 	}
1560 	NET_UNLOCK();
1561 }
1562 
1563 /*
1564  * Mark an interface down and notify protocols of
1565  * the transition.
1566  */
1567 void
1568 if_down(struct ifnet *ifp)
1569 {
1570 	NET_ASSERT_LOCKED();
1571 
1572 	ifp->if_flags &= ~IFF_UP;
1573 	getmicrotime(&ifp->if_lastchange);
1574 	ifq_purge(&ifp->if_snd);
1575 
1576 	if_linkstate(ifp);
1577 }
1578 
1579 /*
1580  * Mark an interface up and notify protocols of
1581  * the transition.
1582  */
1583 void
1584 if_up(struct ifnet *ifp)
1585 {
1586 	NET_ASSERT_LOCKED();
1587 
1588 	ifp->if_flags |= IFF_UP;
1589 	getmicrotime(&ifp->if_lastchange);
1590 
1591 #ifdef INET6
1592 	/* Userland expects the kernel to set ::1 on default lo(4). */
1593 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1594 		in6_ifattach(ifp);
1595 #endif
1596 
1597 	if_linkstate(ifp);
1598 }
1599 
1600 /*
1601  * Notify userland, the routing table and hooks owner of
1602  * a link-state transition.
1603  */
1604 void
1605 if_linkstate_task(void *xifidx)
1606 {
1607 	unsigned int ifidx = (unsigned long)xifidx;
1608 	struct ifnet *ifp;
1609 
1610 	KERNEL_LOCK();
1611 	NET_LOCK();
1612 
1613 	ifp = if_get(ifidx);
1614 	if (ifp != NULL)
1615 		if_linkstate(ifp);
1616 	if_put(ifp);
1617 
1618 	NET_UNLOCK();
1619 	KERNEL_UNLOCK();
1620 }
1621 
1622 void
1623 if_linkstate(struct ifnet *ifp)
1624 {
1625 	NET_ASSERT_LOCKED();
1626 
1627 	rtm_ifchg(ifp);
1628 	rt_if_track(ifp);
1629 
1630 	if_hooks_run(&ifp->if_linkstatehooks);
1631 }
1632 
1633 void
1634 if_linkstatehook_add(struct ifnet *ifp, struct task *t)
1635 {
1636 	mtx_enter(&if_hooks_mtx);
1637 	TAILQ_INSERT_HEAD(&ifp->if_linkstatehooks, t, t_entry);
1638 	mtx_leave(&if_hooks_mtx);
1639 }
1640 
1641 void
1642 if_linkstatehook_del(struct ifnet *ifp, struct task *t)
1643 {
1644 	mtx_enter(&if_hooks_mtx);
1645 	TAILQ_REMOVE(&ifp->if_linkstatehooks, t, t_entry);
1646 	mtx_leave(&if_hooks_mtx);
1647 }
1648 
1649 /*
1650  * Schedule a link state change task.
1651  */
1652 void
1653 if_link_state_change(struct ifnet *ifp)
1654 {
1655 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1656 }
1657 
1658 /*
1659  * Handle interface watchdog timer routine.  Called
1660  * from softclock, we decrement timer (if set) and
1661  * call the appropriate interface routine on expiration.
1662  */
1663 void
1664 if_slowtimo(void *arg)
1665 {
1666 	struct ifnet *ifp = arg;
1667 	int s = splnet();
1668 
1669 	if (ifp->if_watchdog) {
1670 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1671 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1672 		timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO);
1673 	}
1674 	splx(s);
1675 }
1676 
1677 void
1678 if_watchdog_task(void *xifidx)
1679 {
1680 	unsigned int ifidx = (unsigned long)xifidx;
1681 	struct ifnet *ifp;
1682 	int s;
1683 
1684 	ifp = if_get(ifidx);
1685 	if (ifp == NULL)
1686 		return;
1687 
1688 	KERNEL_LOCK();
1689 	s = splnet();
1690 	if (ifp->if_watchdog)
1691 		(*ifp->if_watchdog)(ifp);
1692 	splx(s);
1693 	KERNEL_UNLOCK();
1694 
1695 	if_put(ifp);
1696 }
1697 
1698 /*
1699  * Map interface name to interface structure pointer.
1700  */
1701 struct ifnet *
1702 if_unit(const char *name)
1703 {
1704 	struct ifnet *ifp;
1705 
1706 	KERNEL_ASSERT_LOCKED();
1707 
1708 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1709 		if (strcmp(ifp->if_xname, name) == 0) {
1710 			if_ref(ifp);
1711 			return (ifp);
1712 		}
1713 	}
1714 
1715 	return (NULL);
1716 }
1717 
1718 /*
1719  * Map interface index to interface structure pointer.
1720  */
1721 struct ifnet *
1722 if_get(unsigned int index)
1723 {
1724 	struct srp_ref sr;
1725 	struct if_map *if_map;
1726 	struct srp *map;
1727 	struct ifnet *ifp = NULL;
1728 
1729 	if_map = srp_enter(&sr, &if_idxmap.map);
1730 	if (index < if_map->limit) {
1731 		map = (struct srp *)(if_map + 1);
1732 
1733 		ifp = srp_follow(&sr, &map[index]);
1734 		if (ifp != NULL) {
1735 			KASSERT(ifp->if_index == index);
1736 			if_ref(ifp);
1737 		}
1738 	}
1739 	srp_leave(&sr);
1740 
1741 	return (ifp);
1742 }
1743 
1744 struct ifnet *
1745 if_ref(struct ifnet *ifp)
1746 {
1747 	refcnt_take(&ifp->if_refcnt);
1748 
1749 	return (ifp);
1750 }
1751 
1752 void
1753 if_put(struct ifnet *ifp)
1754 {
1755 	if (ifp == NULL)
1756 		return;
1757 
1758 	refcnt_rele_wake(&ifp->if_refcnt);
1759 }
1760 
1761 int
1762 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1763 {
1764 	if (ifp->if_sadl == NULL)
1765 		return (EINVAL);
1766 
1767 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1768 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1769 
1770 	return (0);
1771 }
1772 
1773 int
1774 if_createrdomain(int rdomain, struct ifnet *ifp)
1775 {
1776 	int error;
1777 	struct ifnet *loifp;
1778 	char loifname[IFNAMSIZ];
1779 	unsigned int unit = rdomain;
1780 
1781 	if ((error = rtable_add(rdomain)) != 0)
1782 		return (error);
1783 	if (!rtable_empty(rdomain))
1784 		return (EEXIST);
1785 
1786 	/* Create rdomain including its loopback if with unit == rdomain */
1787 	snprintf(loifname, sizeof(loifname), "lo%u", unit);
1788 	error = if_clone_create(loifname, 0);
1789 	if ((loifp = if_unit(loifname)) == NULL)
1790 		return (ENXIO);
1791 	if (error && (ifp != loifp || error != EEXIST)) {
1792 		if_put(loifp);
1793 		return (error);
1794 	}
1795 
1796 	rtable_l2set(rdomain, rdomain, loifp->if_index);
1797 	loifp->if_rdomain = rdomain;
1798 	if_put(loifp);
1799 
1800 	return (0);
1801 }
1802 
1803 int
1804 if_setrdomain(struct ifnet *ifp, int rdomain)
1805 {
1806 	struct ifreq ifr;
1807 	int error, up = 0, s;
1808 
1809 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1810 		return (EINVAL);
1811 
1812 	if (rdomain != ifp->if_rdomain &&
1813 	    (ifp->if_flags & IFF_LOOPBACK) &&
1814 	    (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1815 		return (EPERM);
1816 
1817 	if (!rtable_exists(rdomain))
1818 		return (ESRCH);
1819 
1820 	/* make sure that the routing table is a real rdomain */
1821 	if (rdomain != rtable_l2(rdomain))
1822 		return (EINVAL);
1823 
1824 	if (rdomain != ifp->if_rdomain) {
1825 		s = splnet();
1826 		/*
1827 		 * We are tearing down the world.
1828 		 * Take down the IF so:
1829 		 * 1. everything that cares gets a message
1830 		 * 2. the automagic IPv6 bits are recreated
1831 		 */
1832 		if (ifp->if_flags & IFF_UP) {
1833 			up = 1;
1834 			if_down(ifp);
1835 		}
1836 		rti_delete(ifp);
1837 #ifdef MROUTING
1838 		vif_delete(ifp);
1839 #endif
1840 		in_ifdetach(ifp);
1841 #ifdef INET6
1842 		in6_ifdetach(ifp);
1843 #endif
1844 		splx(s);
1845 	}
1846 
1847 	/* Let devices like enc(4) or mpe(4) know about the change */
1848 	ifr.ifr_rdomainid = rdomain;
1849 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
1850 	    (caddr_t)&ifr)) != ENOTTY)
1851 		return (error);
1852 	error = 0;
1853 
1854 	/* Add interface to the specified rdomain */
1855 	ifp->if_rdomain = rdomain;
1856 
1857 	/* If we took down the IF, bring it back */
1858 	if (up) {
1859 		s = splnet();
1860 		if_up(ifp);
1861 		splx(s);
1862 	}
1863 
1864 	return (0);
1865 }
1866 
1867 /*
1868  * Interface ioctls.
1869  */
1870 int
1871 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1872 {
1873 	struct ifnet *ifp;
1874 	struct ifreq *ifr = (struct ifreq *)data;
1875 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
1876 	struct if_afreq *ifar = (struct if_afreq *)data;
1877 	char ifdescrbuf[IFDESCRSIZE];
1878 	char ifrtlabelbuf[RTLABEL_LEN];
1879 	int s, error = 0, oif_xflags;
1880 	size_t bytesdone;
1881 	unsigned short oif_flags;
1882 
1883 	switch (cmd) {
1884 	case SIOCIFCREATE:
1885 		if ((error = suser(p)) != 0)
1886 			return (error);
1887 		error = if_clone_create(ifr->ifr_name, 0);
1888 		return (error);
1889 	case SIOCIFDESTROY:
1890 		if ((error = suser(p)) != 0)
1891 			return (error);
1892 		error = if_clone_destroy(ifr->ifr_name);
1893 		return (error);
1894 	case SIOCSIFGATTR:
1895 		if ((error = suser(p)) != 0)
1896 			return (error);
1897 		NET_LOCK();
1898 		error = if_setgroupattribs(data);
1899 		NET_UNLOCK();
1900 		return (error);
1901 	case SIOCGIFCONF:
1902 	case SIOCIFGCLONERS:
1903 	case SIOCGIFGMEMB:
1904 	case SIOCGIFGATTR:
1905 	case SIOCGIFGLIST:
1906 	case SIOCGIFFLAGS:
1907 	case SIOCGIFXFLAGS:
1908 	case SIOCGIFMETRIC:
1909 	case SIOCGIFMTU:
1910 	case SIOCGIFHARDMTU:
1911 	case SIOCGIFDATA:
1912 	case SIOCGIFDESCR:
1913 	case SIOCGIFRTLABEL:
1914 	case SIOCGIFPRIORITY:
1915 	case SIOCGIFRDOMAIN:
1916 	case SIOCGIFGROUP:
1917 	case SIOCGIFLLPRIO:
1918 		return (ifioctl_get(cmd, data));
1919 	}
1920 
1921 	ifp = if_unit(ifr->ifr_name);
1922 	if (ifp == NULL)
1923 		return (ENXIO);
1924 	oif_flags = ifp->if_flags;
1925 	oif_xflags = ifp->if_xflags;
1926 
1927 	switch (cmd) {
1928 	case SIOCIFAFATTACH:
1929 	case SIOCIFAFDETACH:
1930 		if ((error = suser(p)) != 0)
1931 			break;
1932 		NET_LOCK();
1933 		switch (ifar->ifar_af) {
1934 		case AF_INET:
1935 			/* attach is a noop for AF_INET */
1936 			if (cmd == SIOCIFAFDETACH)
1937 				in_ifdetach(ifp);
1938 			break;
1939 #ifdef INET6
1940 		case AF_INET6:
1941 			if (cmd == SIOCIFAFATTACH)
1942 				error = in6_ifattach(ifp);
1943 			else
1944 				in6_ifdetach(ifp);
1945 			break;
1946 #endif /* INET6 */
1947 		default:
1948 			error = EAFNOSUPPORT;
1949 		}
1950 		NET_UNLOCK();
1951 		break;
1952 
1953 	case SIOCSIFXFLAGS:
1954 		if ((error = suser(p)) != 0)
1955 			break;
1956 
1957 		NET_LOCK();
1958 #ifdef INET6
1959 		if ((ISSET(ifr->ifr_flags, IFXF_AUTOCONF6) ||
1960 		    ISSET(ifr->ifr_flags, IFXF_AUTOCONF6TEMP)) &&
1961 		    !ISSET(ifp->if_xflags, IFXF_AUTOCONF6) &&
1962 		    !ISSET(ifp->if_xflags, IFXF_AUTOCONF6TEMP)) {
1963 			error = in6_ifattach(ifp);
1964 			if (error != 0) {
1965 				NET_UNLOCK();
1966 				break;
1967 			}
1968 		}
1969 
1970 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1971 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
1972 			ifp->if_xflags |= IFXF_INET6_NOSOII;
1973 
1974 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1975 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
1976 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
1977 
1978 #endif	/* INET6 */
1979 
1980 #ifdef MPLS
1981 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
1982 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
1983 			s = splnet();
1984 			ifp->if_xflags |= IFXF_MPLS;
1985 			ifp->if_ll_output = ifp->if_output;
1986 			ifp->if_output = mpls_output;
1987 			splx(s);
1988 		}
1989 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
1990 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
1991 			s = splnet();
1992 			ifp->if_xflags &= ~IFXF_MPLS;
1993 			ifp->if_output = ifp->if_ll_output;
1994 			ifp->if_ll_output = NULL;
1995 			splx(s);
1996 		}
1997 #endif	/* MPLS */
1998 
1999 #ifndef SMALL_KERNEL
2000 		if (ifp->if_capabilities & IFCAP_WOL) {
2001 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
2002 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
2003 				s = splnet();
2004 				ifp->if_xflags |= IFXF_WOL;
2005 				error = ifp->if_wol(ifp, 1);
2006 				splx(s);
2007 			}
2008 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
2009 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
2010 				s = splnet();
2011 				ifp->if_xflags &= ~IFXF_WOL;
2012 				error = ifp->if_wol(ifp, 0);
2013 				splx(s);
2014 			}
2015 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
2016 			ifr->ifr_flags &= ~IFXF_WOL;
2017 			error = ENOTSUP;
2018 		}
2019 #endif
2020 
2021 		if (error == 0)
2022 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
2023 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
2024 
2025 		if (!ISSET(ifp->if_flags, IFF_UP) &&
2026 		    ((!ISSET(oif_xflags, IFXF_AUTOCONF4) &&
2027 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF4)) ||
2028 		    (!ISSET(oif_xflags, IFXF_AUTOCONF6) &&
2029 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF6)) ||
2030 		    (!ISSET(oif_xflags, IFXF_AUTOCONF6TEMP) &&
2031 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF6TEMP)))) {
2032 			ifr->ifr_flags = ifp->if_flags | IFF_UP;
2033 			cmd = SIOCSIFFLAGS;
2034 			goto forceup;
2035 		}
2036 
2037 		NET_UNLOCK();
2038 		break;
2039 
2040 	case SIOCSIFFLAGS:
2041 		if ((error = suser(p)) != 0)
2042 			break;
2043 
2044 		NET_LOCK();
2045 forceup:
2046 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2047 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
2048 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2049 		if (error != 0) {
2050 			ifp->if_flags = oif_flags;
2051 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
2052 			s = splnet();
2053 			if (ISSET(ifp->if_flags, IFF_UP))
2054 				if_up(ifp);
2055 			else
2056 				if_down(ifp);
2057 			splx(s);
2058 		}
2059 		NET_UNLOCK();
2060 		break;
2061 
2062 	case SIOCSIFMETRIC:
2063 		if ((error = suser(p)) != 0)
2064 			break;
2065 		NET_LOCK();
2066 		ifp->if_metric = ifr->ifr_metric;
2067 		NET_UNLOCK();
2068 		break;
2069 
2070 	case SIOCSIFMTU:
2071 		if ((error = suser(p)) != 0)
2072 			break;
2073 		NET_LOCK();
2074 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2075 		NET_UNLOCK();
2076 		if (error == 0)
2077 			rtm_ifchg(ifp);
2078 		break;
2079 
2080 	case SIOCSIFDESCR:
2081 		if ((error = suser(p)) != 0)
2082 			break;
2083 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2084 		    IFDESCRSIZE, &bytesdone);
2085 		if (error == 0) {
2086 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2087 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2088 		}
2089 		break;
2090 
2091 	case SIOCSIFRTLABEL:
2092 		if ((error = suser(p)) != 0)
2093 			break;
2094 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2095 		    RTLABEL_LEN, &bytesdone);
2096 		if (error == 0) {
2097 			rtlabel_unref(ifp->if_rtlabelid);
2098 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2099 		}
2100 		break;
2101 
2102 	case SIOCSIFPRIORITY:
2103 		if ((error = suser(p)) != 0)
2104 			break;
2105 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2106 			error = EINVAL;
2107 			break;
2108 		}
2109 		ifp->if_priority = ifr->ifr_metric;
2110 		break;
2111 
2112 	case SIOCSIFRDOMAIN:
2113 		if ((error = suser(p)) != 0)
2114 			break;
2115 		error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2116 		if (!error || error == EEXIST) {
2117 			NET_LOCK();
2118 			error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2119 			NET_UNLOCK();
2120 		}
2121 		break;
2122 
2123 	case SIOCAIFGROUP:
2124 		if ((error = suser(p)))
2125 			break;
2126 		NET_LOCK();
2127 		error = if_addgroup(ifp, ifgr->ifgr_group);
2128 		if (error == 0) {
2129 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2130 			if (error == ENOTTY)
2131 				error = 0;
2132 		}
2133 		NET_UNLOCK();
2134 		break;
2135 
2136 	case SIOCDIFGROUP:
2137 		if ((error = suser(p)))
2138 			break;
2139 		NET_LOCK();
2140 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2141 		if (error == ENOTTY)
2142 			error = 0;
2143 		if (error == 0)
2144 			error = if_delgroup(ifp, ifgr->ifgr_group);
2145 		NET_UNLOCK();
2146 		break;
2147 
2148 	case SIOCSIFLLADDR:
2149 		if ((error = suser(p)))
2150 			break;
2151 		if ((ifp->if_sadl == NULL) ||
2152 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2153 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2154 			error = EINVAL;
2155 			break;
2156 		}
2157 		NET_LOCK();
2158 		switch (ifp->if_type) {
2159 		case IFT_ETHER:
2160 		case IFT_CARP:
2161 		case IFT_XETHER:
2162 		case IFT_ISO88025:
2163 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2164 			if (error == ENOTTY)
2165 				error = 0;
2166 			if (error == 0)
2167 				error = if_setlladdr(ifp,
2168 				    ifr->ifr_addr.sa_data);
2169 			break;
2170 		default:
2171 			error = ENODEV;
2172 		}
2173 
2174 		if (error == 0)
2175 			ifnewlladdr(ifp);
2176 		NET_UNLOCK();
2177 		if (error == 0)
2178 			rtm_ifchg(ifp);
2179 		break;
2180 
2181 	case SIOCSIFLLPRIO:
2182 		if ((error = suser(p)))
2183 			break;
2184 		if (ifr->ifr_llprio < IFQ_MINPRIO ||
2185 		    ifr->ifr_llprio > IFQ_MAXPRIO) {
2186 			error = EINVAL;
2187 			break;
2188 		}
2189 		NET_LOCK();
2190 		ifp->if_llprio = ifr->ifr_llprio;
2191 		NET_UNLOCK();
2192 		break;
2193 
2194 	case SIOCGIFSFFPAGE:
2195 		error = suser(p);
2196 		if (error != 0)
2197 			break;
2198 
2199 		error = if_sffpage_check(data);
2200 		if (error != 0)
2201 			break;
2202 
2203 		/* don't take NET_LOCK because i2c reads take a long time */
2204 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2205 		break;
2206 
2207 	case SIOCSETKALIVE:
2208 	case SIOCDIFPHYADDR:
2209 	case SIOCSLIFPHYADDR:
2210 	case SIOCSLIFPHYRTABLE:
2211 	case SIOCSLIFPHYTTL:
2212 	case SIOCSLIFPHYDF:
2213 	case SIOCSLIFPHYECN:
2214 	case SIOCADDMULTI:
2215 	case SIOCDELMULTI:
2216 	case SIOCSIFMEDIA:
2217 	case SIOCSVNETID:
2218 	case SIOCDVNETID:
2219 	case SIOCSVNETFLOWID:
2220 	case SIOCSTXHPRIO:
2221 	case SIOCSRXHPRIO:
2222 	case SIOCSIFPAIR:
2223 	case SIOCSIFPARENT:
2224 	case SIOCDIFPARENT:
2225 	case SIOCSETMPWCFG:
2226 	case SIOCSETLABEL:
2227 	case SIOCDELLABEL:
2228 	case SIOCSPWE3CTRLWORD:
2229 	case SIOCSPWE3FAT:
2230 	case SIOCSPWE3NEIGHBOR:
2231 	case SIOCDPWE3NEIGHBOR:
2232 #if NBRIDGE > 0
2233 	case SIOCBRDGADD:
2234 	case SIOCBRDGDEL:
2235 	case SIOCBRDGSIFFLGS:
2236 	case SIOCBRDGSCACHE:
2237 	case SIOCBRDGADDS:
2238 	case SIOCBRDGDELS:
2239 	case SIOCBRDGSADDR:
2240 	case SIOCBRDGSTO:
2241 	case SIOCBRDGDADDR:
2242 	case SIOCBRDGFLUSH:
2243 	case SIOCBRDGADDL:
2244 	case SIOCBRDGSIFPROT:
2245 	case SIOCBRDGARL:
2246 	case SIOCBRDGFRL:
2247 	case SIOCBRDGSPRI:
2248 	case SIOCBRDGSHT:
2249 	case SIOCBRDGSFD:
2250 	case SIOCBRDGSMA:
2251 	case SIOCBRDGSIFPRIO:
2252 	case SIOCBRDGSIFCOST:
2253 	case SIOCBRDGSTXHC:
2254 	case SIOCBRDGSPROTO:
2255 #endif
2256 		if ((error = suser(p)) != 0)
2257 			break;
2258 		/* FALLTHROUGH */
2259 	default:
2260 		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
2261 			(struct mbuf *) cmd, (struct mbuf *) data,
2262 			(struct mbuf *) ifp, p));
2263 		if (error != EOPNOTSUPP)
2264 			break;
2265 		switch (cmd) {
2266 		case SIOCAIFADDR:
2267 		case SIOCDIFADDR:
2268 		case SIOCSIFADDR:
2269 		case SIOCSIFNETMASK:
2270 		case SIOCSIFDSTADDR:
2271 		case SIOCSIFBRDADDR:
2272 #ifdef INET6
2273 		case SIOCAIFADDR_IN6:
2274 		case SIOCDIFADDR_IN6:
2275 #endif
2276 			error = suser(p);
2277 			break;
2278 		default:
2279 			error = 0;
2280 			break;
2281 		}
2282 		if (error)
2283 			break;
2284 		NET_LOCK();
2285 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2286 		NET_UNLOCK();
2287 		break;
2288 	}
2289 
2290 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags) {
2291 		/* if_up() and if_down() already sent an update, skip here */
2292 		if (((oif_flags ^ ifp->if_flags) & IFF_UP) == 0)
2293 			rtm_ifchg(ifp);
2294 	}
2295 
2296 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2297 		getmicrotime(&ifp->if_lastchange);
2298 
2299 	if_put(ifp);
2300 
2301 	return (error);
2302 }
2303 
2304 int
2305 ifioctl_get(u_long cmd, caddr_t data)
2306 {
2307 	struct ifnet *ifp;
2308 	struct ifreq *ifr = (struct ifreq *)data;
2309 	char ifdescrbuf[IFDESCRSIZE];
2310 	char ifrtlabelbuf[RTLABEL_LEN];
2311 	int error = 0;
2312 	size_t bytesdone;
2313 	const char *label;
2314 
2315 	switch(cmd) {
2316 	case SIOCGIFCONF:
2317 		NET_RLOCK_IN_IOCTL();
2318 		error = ifconf(data);
2319 		NET_RUNLOCK_IN_IOCTL();
2320 		return (error);
2321 	case SIOCIFGCLONERS:
2322 		error = if_clone_list((struct if_clonereq *)data);
2323 		return (error);
2324 	case SIOCGIFGMEMB:
2325 		NET_RLOCK_IN_IOCTL();
2326 		error = if_getgroupmembers(data);
2327 		NET_RUNLOCK_IN_IOCTL();
2328 		return (error);
2329 	case SIOCGIFGATTR:
2330 		NET_RLOCK_IN_IOCTL();
2331 		error = if_getgroupattribs(data);
2332 		NET_RUNLOCK_IN_IOCTL();
2333 		return (error);
2334 	case SIOCGIFGLIST:
2335 		NET_RLOCK_IN_IOCTL();
2336 		error = if_getgrouplist(data);
2337 		NET_RUNLOCK_IN_IOCTL();
2338 		return (error);
2339 	}
2340 
2341 	ifp = if_unit(ifr->ifr_name);
2342 	if (ifp == NULL)
2343 		return (ENXIO);
2344 
2345 	NET_RLOCK_IN_IOCTL();
2346 
2347 	switch(cmd) {
2348 	case SIOCGIFFLAGS:
2349 		ifr->ifr_flags = ifp->if_flags;
2350 		if (ifq_is_oactive(&ifp->if_snd))
2351 			ifr->ifr_flags |= IFF_OACTIVE;
2352 		break;
2353 
2354 	case SIOCGIFXFLAGS:
2355 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2356 		break;
2357 
2358 	case SIOCGIFMETRIC:
2359 		ifr->ifr_metric = ifp->if_metric;
2360 		break;
2361 
2362 	case SIOCGIFMTU:
2363 		ifr->ifr_mtu = ifp->if_mtu;
2364 		break;
2365 
2366 	case SIOCGIFHARDMTU:
2367 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2368 		break;
2369 
2370 	case SIOCGIFDATA: {
2371 		struct if_data ifdata;
2372 		if_getdata(ifp, &ifdata);
2373 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2374 		break;
2375 	}
2376 
2377 	case SIOCGIFDESCR:
2378 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2379 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2380 		    &bytesdone);
2381 		break;
2382 
2383 	case SIOCGIFRTLABEL:
2384 		if (ifp->if_rtlabelid &&
2385 		    (label = rtlabel_id2name(ifp->if_rtlabelid)) != NULL) {
2386 			strlcpy(ifrtlabelbuf, label, RTLABEL_LEN);
2387 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2388 			    RTLABEL_LEN, &bytesdone);
2389 		} else
2390 			error = ENOENT;
2391 		break;
2392 
2393 	case SIOCGIFPRIORITY:
2394 		ifr->ifr_metric = ifp->if_priority;
2395 		break;
2396 
2397 	case SIOCGIFRDOMAIN:
2398 		ifr->ifr_rdomainid = ifp->if_rdomain;
2399 		break;
2400 
2401 	case SIOCGIFGROUP:
2402 		error = if_getgroup(data, ifp);
2403 		break;
2404 
2405 	case SIOCGIFLLPRIO:
2406 		ifr->ifr_llprio = ifp->if_llprio;
2407 		break;
2408 
2409 	default:
2410 		panic("invalid ioctl %lu", cmd);
2411 	}
2412 
2413 	NET_RUNLOCK_IN_IOCTL();
2414 
2415 	if_put(ifp);
2416 
2417 	return (error);
2418 }
2419 
2420 static int
2421 if_sffpage_check(const caddr_t data)
2422 {
2423 	const struct if_sffpage *sff = (const struct if_sffpage *)data;
2424 
2425 	switch (sff->sff_addr) {
2426 	case IFSFF_ADDR_EEPROM:
2427 	case IFSFF_ADDR_DDM:
2428 		break;
2429 	default:
2430 		return (EINVAL);
2431 	}
2432 
2433 	return (0);
2434 }
2435 
2436 int
2437 if_txhprio_l2_check(int hdrprio)
2438 {
2439 	switch (hdrprio) {
2440 	case IF_HDRPRIO_PACKET:
2441 		return (0);
2442 	default:
2443 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2444 			return (0);
2445 		break;
2446 	}
2447 
2448 	return (EINVAL);
2449 }
2450 
2451 int
2452 if_txhprio_l3_check(int hdrprio)
2453 {
2454 	switch (hdrprio) {
2455 	case IF_HDRPRIO_PACKET:
2456 	case IF_HDRPRIO_PAYLOAD:
2457 		return (0);
2458 	default:
2459 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2460 			return (0);
2461 		break;
2462 	}
2463 
2464 	return (EINVAL);
2465 }
2466 
2467 int
2468 if_rxhprio_l2_check(int hdrprio)
2469 {
2470 	switch (hdrprio) {
2471 	case IF_HDRPRIO_PACKET:
2472 	case IF_HDRPRIO_OUTER:
2473 		return (0);
2474 	default:
2475 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2476 			return (0);
2477 		break;
2478 	}
2479 
2480 	return (EINVAL);
2481 }
2482 
2483 int
2484 if_rxhprio_l3_check(int hdrprio)
2485 {
2486 	switch (hdrprio) {
2487 	case IF_HDRPRIO_PACKET:
2488 	case IF_HDRPRIO_PAYLOAD:
2489 	case IF_HDRPRIO_OUTER:
2490 		return (0);
2491 	default:
2492 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2493 			return (0);
2494 		break;
2495 	}
2496 
2497 	return (EINVAL);
2498 }
2499 
2500 /*
2501  * Return interface configuration
2502  * of system.  List may be used
2503  * in later ioctl's (above) to get
2504  * other information.
2505  */
2506 int
2507 ifconf(caddr_t data)
2508 {
2509 	struct ifconf *ifc = (struct ifconf *)data;
2510 	struct ifnet *ifp;
2511 	struct ifaddr *ifa;
2512 	struct ifreq ifr, *ifrp;
2513 	int space = ifc->ifc_len, error = 0;
2514 
2515 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2516 	if (space == 0) {
2517 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
2518 			struct sockaddr *sa;
2519 
2520 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2521 				space += sizeof (ifr);
2522 			else
2523 				TAILQ_FOREACH(ifa,
2524 				    &ifp->if_addrlist, ifa_list) {
2525 					sa = ifa->ifa_addr;
2526 					if (sa->sa_len > sizeof(*sa))
2527 						space += sa->sa_len -
2528 						    sizeof(*sa);
2529 					space += sizeof(ifr);
2530 				}
2531 		}
2532 		ifc->ifc_len = space;
2533 		return (0);
2534 	}
2535 
2536 	ifrp = ifc->ifc_req;
2537 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2538 		if (space < sizeof(ifr))
2539 			break;
2540 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2541 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2542 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2543 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2544 			    sizeof(ifr));
2545 			if (error)
2546 				break;
2547 			space -= sizeof (ifr), ifrp++;
2548 		} else
2549 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2550 				struct sockaddr *sa = ifa->ifa_addr;
2551 
2552 				if (space < sizeof(ifr))
2553 					break;
2554 				if (sa->sa_len <= sizeof(*sa)) {
2555 					ifr.ifr_addr = *sa;
2556 					error = copyout((caddr_t)&ifr,
2557 					    (caddr_t)ifrp, sizeof (ifr));
2558 					ifrp++;
2559 				} else {
2560 					space -= sa->sa_len - sizeof(*sa);
2561 					if (space < sizeof (ifr))
2562 						break;
2563 					error = copyout((caddr_t)&ifr,
2564 					    (caddr_t)ifrp,
2565 					    sizeof(ifr.ifr_name));
2566 					if (error == 0)
2567 						error = copyout((caddr_t)sa,
2568 						    (caddr_t)&ifrp->ifr_addr,
2569 						    sa->sa_len);
2570 					ifrp = (struct ifreq *)(sa->sa_len +
2571 					    (caddr_t)&ifrp->ifr_addr);
2572 				}
2573 				if (error)
2574 					break;
2575 				space -= sizeof (ifr);
2576 			}
2577 	}
2578 	ifc->ifc_len -= space;
2579 	return (error);
2580 }
2581 
2582 void
2583 if_counters_alloc(struct ifnet *ifp)
2584 {
2585 	KASSERT(ifp->if_counters == NULL);
2586 
2587 	ifp->if_counters = counters_alloc(ifc_ncounters);
2588 }
2589 
2590 void
2591 if_counters_free(struct ifnet *ifp)
2592 {
2593 	KASSERT(ifp->if_counters != NULL);
2594 
2595 	counters_free(ifp->if_counters, ifc_ncounters);
2596 	ifp->if_counters = NULL;
2597 }
2598 
2599 void
2600 if_getdata(struct ifnet *ifp, struct if_data *data)
2601 {
2602 	unsigned int i;
2603 
2604 	*data = ifp->if_data;
2605 
2606 	if (ifp->if_counters != NULL) {
2607 		uint64_t counters[ifc_ncounters];
2608 
2609 		counters_read(ifp->if_counters, counters, nitems(counters));
2610 
2611 		data->ifi_ipackets += counters[ifc_ipackets];
2612 		data->ifi_ierrors += counters[ifc_ierrors];
2613 		data->ifi_opackets += counters[ifc_opackets];
2614 		data->ifi_oerrors += counters[ifc_oerrors];
2615 		data->ifi_collisions += counters[ifc_collisions];
2616 		data->ifi_ibytes += counters[ifc_ibytes];
2617 		data->ifi_obytes += counters[ifc_obytes];
2618 		data->ifi_imcasts += counters[ifc_imcasts];
2619 		data->ifi_omcasts += counters[ifc_omcasts];
2620 		data->ifi_iqdrops += counters[ifc_iqdrops];
2621 		data->ifi_oqdrops += counters[ifc_oqdrops];
2622 		data->ifi_noproto += counters[ifc_noproto];
2623 	}
2624 
2625 	for (i = 0; i < ifp->if_nifqs; i++) {
2626 		struct ifqueue *ifq = ifp->if_ifqs[i];
2627 
2628 		ifq_add_data(ifq, data);
2629 	}
2630 
2631 	for (i = 0; i < ifp->if_niqs; i++) {
2632 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2633 
2634 		ifiq_add_data(ifiq, data);
2635 	}
2636 }
2637 
2638 /*
2639  * Dummy functions replaced in ifnet during detach (if protocols decide to
2640  * fiddle with the if during detach.
2641  */
2642 void
2643 if_detached_qstart(struct ifqueue *ifq)
2644 {
2645 	ifq_purge(ifq);
2646 }
2647 
2648 int
2649 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2650 {
2651 	return ENODEV;
2652 }
2653 
2654 /*
2655  * Create interface group without members
2656  */
2657 struct ifg_group *
2658 if_creategroup(const char *groupname)
2659 {
2660 	struct ifg_group	*ifg;
2661 
2662 	if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
2663 		return (NULL);
2664 
2665 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2666 	ifg->ifg_refcnt = 1;
2667 	ifg->ifg_carp_demoted = 0;
2668 	TAILQ_INIT(&ifg->ifg_members);
2669 #if NPF > 0
2670 	pfi_attach_ifgroup(ifg);
2671 #endif
2672 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2673 
2674 	return (ifg);
2675 }
2676 
2677 /*
2678  * Add a group to an interface
2679  */
2680 int
2681 if_addgroup(struct ifnet *ifp, const char *groupname)
2682 {
2683 	struct ifg_list		*ifgl;
2684 	struct ifg_group	*ifg = NULL;
2685 	struct ifg_member	*ifgm;
2686 	size_t			 namelen;
2687 
2688 	namelen = strlen(groupname);
2689 	if (namelen == 0 || namelen >= IFNAMSIZ ||
2690 	    (groupname[namelen - 1] >= '0' && groupname[namelen - 1] <= '9'))
2691 		return (EINVAL);
2692 
2693 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2694 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2695 			return (EEXIST);
2696 
2697 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
2698 		return (ENOMEM);
2699 
2700 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
2701 		free(ifgl, M_TEMP, sizeof(*ifgl));
2702 		return (ENOMEM);
2703 	}
2704 
2705 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2706 		if (!strcmp(ifg->ifg_group, groupname))
2707 			break;
2708 
2709 	if (ifg == NULL) {
2710 		ifg = if_creategroup(groupname);
2711 		if (ifg == NULL) {
2712 			free(ifgl, M_TEMP, sizeof(*ifgl));
2713 			free(ifgm, M_TEMP, sizeof(*ifgm));
2714 			return (ENOMEM);
2715 		}
2716 	} else
2717 		ifg->ifg_refcnt++;
2718 	KASSERT(ifg->ifg_refcnt != 0);
2719 
2720 	ifgl->ifgl_group = ifg;
2721 	ifgm->ifgm_ifp = ifp;
2722 
2723 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2724 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2725 
2726 #if NPF > 0
2727 	pfi_group_addmember(groupname, ifp);
2728 #endif
2729 
2730 	return (0);
2731 }
2732 
2733 /*
2734  * Remove a group from an interface
2735  */
2736 int
2737 if_delgroup(struct ifnet *ifp, const char *groupname)
2738 {
2739 	struct ifg_list		*ifgl;
2740 	struct ifg_member	*ifgm;
2741 
2742 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2743 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2744 			break;
2745 	if (ifgl == NULL)
2746 		return (ENOENT);
2747 
2748 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2749 
2750 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2751 		if (ifgm->ifgm_ifp == ifp)
2752 			break;
2753 
2754 	if (ifgm != NULL) {
2755 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2756 		free(ifgm, M_TEMP, sizeof(*ifgm));
2757 	}
2758 
2759 #if NPF > 0
2760 	pfi_group_change(groupname);
2761 #endif
2762 
2763 	KASSERT(ifgl->ifgl_group->ifg_refcnt != 0);
2764 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2765 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2766 #if NPF > 0
2767 		pfi_detach_ifgroup(ifgl->ifgl_group);
2768 #endif
2769 		free(ifgl->ifgl_group, M_TEMP, sizeof(*ifgl->ifgl_group));
2770 	}
2771 
2772 	free(ifgl, M_TEMP, sizeof(*ifgl));
2773 
2774 	return (0);
2775 }
2776 
2777 /*
2778  * Stores all groups from an interface in memory pointed
2779  * to by data
2780  */
2781 int
2782 if_getgroup(caddr_t data, struct ifnet *ifp)
2783 {
2784 	int			 len, error;
2785 	struct ifg_list		*ifgl;
2786 	struct ifg_req		 ifgrq, *ifgp;
2787 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2788 
2789 	if (ifgr->ifgr_len == 0) {
2790 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2791 			ifgr->ifgr_len += sizeof(struct ifg_req);
2792 		return (0);
2793 	}
2794 
2795 	len = ifgr->ifgr_len;
2796 	ifgp = ifgr->ifgr_groups;
2797 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2798 		if (len < sizeof(ifgrq))
2799 			return (EINVAL);
2800 		bzero(&ifgrq, sizeof ifgrq);
2801 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2802 		    sizeof(ifgrq.ifgrq_group));
2803 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2804 		    sizeof(struct ifg_req))))
2805 			return (error);
2806 		len -= sizeof(ifgrq);
2807 		ifgp++;
2808 	}
2809 
2810 	return (0);
2811 }
2812 
2813 /*
2814  * Stores all members of a group in memory pointed to by data
2815  */
2816 int
2817 if_getgroupmembers(caddr_t data)
2818 {
2819 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2820 	struct ifg_group	*ifg;
2821 	struct ifg_member	*ifgm;
2822 	struct ifg_req		 ifgrq, *ifgp;
2823 	int			 len, error;
2824 
2825 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2826 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2827 			break;
2828 	if (ifg == NULL)
2829 		return (ENOENT);
2830 
2831 	if (ifgr->ifgr_len == 0) {
2832 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2833 			ifgr->ifgr_len += sizeof(ifgrq);
2834 		return (0);
2835 	}
2836 
2837 	len = ifgr->ifgr_len;
2838 	ifgp = ifgr->ifgr_groups;
2839 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
2840 		if (len < sizeof(ifgrq))
2841 			return (EINVAL);
2842 		bzero(&ifgrq, sizeof ifgrq);
2843 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
2844 		    sizeof(ifgrq.ifgrq_member));
2845 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2846 		    sizeof(struct ifg_req))))
2847 			return (error);
2848 		len -= sizeof(ifgrq);
2849 		ifgp++;
2850 	}
2851 
2852 	return (0);
2853 }
2854 
2855 int
2856 if_getgroupattribs(caddr_t data)
2857 {
2858 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2859 	struct ifg_group	*ifg;
2860 
2861 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2862 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2863 			break;
2864 	if (ifg == NULL)
2865 		return (ENOENT);
2866 
2867 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
2868 
2869 	return (0);
2870 }
2871 
2872 int
2873 if_setgroupattribs(caddr_t data)
2874 {
2875 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2876 	struct ifg_group	*ifg;
2877 	struct ifg_member	*ifgm;
2878 	int			 demote;
2879 
2880 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2881 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2882 			break;
2883 	if (ifg == NULL)
2884 		return (ENOENT);
2885 
2886 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
2887 	if (demote + ifg->ifg_carp_demoted > 0xff ||
2888 	    demote + ifg->ifg_carp_demoted < 0)
2889 		return (EINVAL);
2890 
2891 	ifg->ifg_carp_demoted += demote;
2892 
2893 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2894 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
2895 
2896 	return (0);
2897 }
2898 
2899 /*
2900  * Stores all groups in memory pointed to by data
2901  */
2902 int
2903 if_getgrouplist(caddr_t data)
2904 {
2905 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2906 	struct ifg_group	*ifg;
2907 	struct ifg_req		 ifgrq, *ifgp;
2908 	int			 len, error;
2909 
2910 	if (ifgr->ifgr_len == 0) {
2911 		TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2912 			ifgr->ifgr_len += sizeof(ifgrq);
2913 		return (0);
2914 	}
2915 
2916 	len = ifgr->ifgr_len;
2917 	ifgp = ifgr->ifgr_groups;
2918 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
2919 		if (len < sizeof(ifgrq))
2920 			return (EINVAL);
2921 		bzero(&ifgrq, sizeof ifgrq);
2922 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
2923 		    sizeof(ifgrq.ifgrq_group));
2924 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2925 		    sizeof(struct ifg_req))))
2926 			return (error);
2927 		len -= sizeof(ifgrq);
2928 		ifgp++;
2929 	}
2930 
2931 	return (0);
2932 }
2933 
2934 void
2935 if_group_routechange(struct sockaddr *dst, struct sockaddr *mask)
2936 {
2937 	switch (dst->sa_family) {
2938 	case AF_INET:
2939 		if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
2940 		    mask && (mask->sa_len == 0 ||
2941 		    satosin(mask)->sin_addr.s_addr == INADDR_ANY))
2942 			if_group_egress_build();
2943 		break;
2944 #ifdef INET6
2945 	case AF_INET6:
2946 		if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
2947 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
2948 		    IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
2949 		    &in6addr_any)))
2950 			if_group_egress_build();
2951 		break;
2952 #endif
2953 	}
2954 }
2955 
2956 int
2957 if_group_egress_build(void)
2958 {
2959 	struct ifnet		*ifp;
2960 	struct ifg_group	*ifg;
2961 	struct ifg_member	*ifgm, *next;
2962 	struct sockaddr_in	 sa_in;
2963 #ifdef INET6
2964 	struct sockaddr_in6	 sa_in6;
2965 #endif
2966 	struct rtentry		*rt;
2967 
2968 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2969 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
2970 			break;
2971 
2972 	if (ifg != NULL)
2973 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
2974 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
2975 
2976 	bzero(&sa_in, sizeof(sa_in));
2977 	sa_in.sin_len = sizeof(sa_in);
2978 	sa_in.sin_family = AF_INET;
2979 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
2980 	while (rt != NULL) {
2981 		ifp = if_get(rt->rt_ifidx);
2982 		if (ifp != NULL) {
2983 			if_addgroup(ifp, IFG_EGRESS);
2984 			if_put(ifp);
2985 		}
2986 		rt = rtable_iterate(rt);
2987 	}
2988 
2989 #ifdef INET6
2990 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
2991 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
2992 	    RTP_ANY);
2993 	while (rt != NULL) {
2994 		ifp = if_get(rt->rt_ifidx);
2995 		if (ifp != NULL) {
2996 			if_addgroup(ifp, IFG_EGRESS);
2997 			if_put(ifp);
2998 		}
2999 		rt = rtable_iterate(rt);
3000 	}
3001 #endif /* INET6 */
3002 
3003 	return (0);
3004 }
3005 
3006 /*
3007  * Set/clear promiscuous mode on interface ifp based on the truth value
3008  * of pswitch.  The calls are reference counted so that only the first
3009  * "on" request actually has an effect, as does the final "off" request.
3010  * Results are undefined if the "off" and "on" requests are not matched.
3011  */
3012 int
3013 ifpromisc(struct ifnet *ifp, int pswitch)
3014 {
3015 	struct ifreq ifr;
3016 	unsigned short oif_flags;
3017 	int oif_pcount, error;
3018 
3019 	NET_ASSERT_LOCKED(); /* modifying if_flags and if_pcount */
3020 
3021 	oif_flags = ifp->if_flags;
3022 	oif_pcount = ifp->if_pcount;
3023 	if (pswitch) {
3024 		if (ifp->if_pcount++ != 0)
3025 			return (0);
3026 		ifp->if_flags |= IFF_PROMISC;
3027 	} else {
3028 		if (--ifp->if_pcount > 0)
3029 			return (0);
3030 		ifp->if_flags &= ~IFF_PROMISC;
3031 	}
3032 
3033 	if ((ifp->if_flags & IFF_UP) == 0)
3034 		return (0);
3035 
3036 	memset(&ifr, 0, sizeof(ifr));
3037 	ifr.ifr_flags = ifp->if_flags;
3038 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
3039 	if (error) {
3040 		ifp->if_flags = oif_flags;
3041 		ifp->if_pcount = oif_pcount;
3042 	}
3043 
3044 	return (error);
3045 }
3046 
3047 void
3048 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
3049 {
3050 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
3051 }
3052 
3053 void
3054 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
3055 {
3056 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
3057 }
3058 
3059 void
3060 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
3061 {
3062 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
3063 		panic("ifa_update_broadaddr does not support dynamic length");
3064 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
3065 }
3066 
3067 #ifdef DDB
3068 /* debug function, can be called from ddb> */
3069 void
3070 ifa_print_all(void)
3071 {
3072 	struct ifnet *ifp;
3073 	struct ifaddr *ifa;
3074 
3075 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
3076 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
3077 			char addr[INET6_ADDRSTRLEN];
3078 
3079 			switch (ifa->ifa_addr->sa_family) {
3080 			case AF_INET:
3081 				printf("%s", inet_ntop(AF_INET,
3082 				    &satosin(ifa->ifa_addr)->sin_addr,
3083 				    addr, sizeof(addr)));
3084 				break;
3085 #ifdef INET6
3086 			case AF_INET6:
3087 				printf("%s", inet_ntop(AF_INET6,
3088 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
3089 				    addr, sizeof(addr)));
3090 				break;
3091 #endif
3092 			}
3093 			printf(" on %s\n", ifp->if_xname);
3094 		}
3095 	}
3096 }
3097 #endif /* DDB */
3098 
3099 void
3100 ifnewlladdr(struct ifnet *ifp)
3101 {
3102 #ifdef INET6
3103 	struct ifaddr *ifa;
3104 #endif
3105 	struct ifreq ifrq;
3106 	short up;
3107 
3108 	NET_ASSERT_LOCKED();	/* for ioctl and in6 */
3109 	KERNEL_ASSERT_LOCKED();	/* for if_flags */
3110 
3111 	up = ifp->if_flags & IFF_UP;
3112 
3113 	if (up) {
3114 		/* go down for a moment... */
3115 		ifp->if_flags &= ~IFF_UP;
3116 		ifrq.ifr_flags = ifp->if_flags;
3117 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3118 	}
3119 
3120 	ifp->if_flags |= IFF_UP;
3121 	ifrq.ifr_flags = ifp->if_flags;
3122 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3123 
3124 #ifdef INET6
3125 	/*
3126 	 * Update the link-local address.  Don't do it if we're
3127 	 * a router to avoid confusing hosts on the network.
3128 	 */
3129 	if (!ip6_forwarding) {
3130 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
3131 		if (ifa) {
3132 			in6_purgeaddr(ifa);
3133 			if_hooks_run(&ifp->if_addrhooks);
3134 			in6_ifattach(ifp);
3135 		}
3136 	}
3137 #endif
3138 	if (!up) {
3139 		/* go back down */
3140 		ifp->if_flags &= ~IFF_UP;
3141 		ifrq.ifr_flags = ifp->if_flags;
3142 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3143 	}
3144 }
3145 
3146 void
3147 if_addrhook_add(struct ifnet *ifp, struct task *t)
3148 {
3149 	mtx_enter(&if_hooks_mtx);
3150 	TAILQ_INSERT_TAIL(&ifp->if_addrhooks, t, t_entry);
3151 	mtx_leave(&if_hooks_mtx);
3152 }
3153 
3154 void
3155 if_addrhook_del(struct ifnet *ifp, struct task *t)
3156 {
3157 	mtx_enter(&if_hooks_mtx);
3158 	TAILQ_REMOVE(&ifp->if_addrhooks, t, t_entry);
3159 	mtx_leave(&if_hooks_mtx);
3160 }
3161 
3162 void
3163 if_addrhooks_run(struct ifnet *ifp)
3164 {
3165 	if_hooks_run(&ifp->if_addrhooks);
3166 }
3167 
3168 void
3169 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
3170 {
3171 	extern int ticks;
3172 
3173 	memset(rxr, 0, sizeof(*rxr));
3174 
3175 	rxr->rxr_adjusted = ticks;
3176 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
3177 	rxr->rxr_hwm = hwm;
3178 }
3179 
3180 static inline void
3181 if_rxr_adjust_cwm(struct if_rxring *rxr)
3182 {
3183 	extern int ticks;
3184 
3185 	if (rxr->rxr_alive >= rxr->rxr_lwm)
3186 		return;
3187 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
3188 		rxr->rxr_cwm++;
3189 
3190 	rxr->rxr_adjusted = ticks;
3191 }
3192 
3193 void
3194 if_rxr_livelocked(struct if_rxring *rxr)
3195 {
3196 	extern int ticks;
3197 
3198 	if (ticks - rxr->rxr_adjusted >= 1) {
3199 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3200 			rxr->rxr_cwm--;
3201 
3202 		rxr->rxr_adjusted = ticks;
3203 	}
3204 }
3205 
3206 u_int
3207 if_rxr_get(struct if_rxring *rxr, u_int max)
3208 {
3209 	extern int ticks;
3210 	u_int diff;
3211 
3212 	if (ticks - rxr->rxr_adjusted >= 1) {
3213 		/* we're free to try for an adjustment */
3214 		if_rxr_adjust_cwm(rxr);
3215 	}
3216 
3217 	if (rxr->rxr_alive >= rxr->rxr_cwm)
3218 		return (0);
3219 
3220 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
3221 	rxr->rxr_alive += diff;
3222 
3223 	return (diff);
3224 }
3225 
3226 int
3227 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
3228 {
3229 	struct if_rxrinfo kifri;
3230 	int error;
3231 	u_int n;
3232 
3233 	error = copyin(uifri, &kifri, sizeof(kifri));
3234 	if (error)
3235 		return (error);
3236 
3237 	n = min(t, kifri.ifri_total);
3238 	kifri.ifri_total = t;
3239 
3240 	if (n > 0) {
3241 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
3242 		if (error)
3243 			return (error);
3244 	}
3245 
3246 	return (copyout(&kifri, uifri, sizeof(kifri)));
3247 }
3248 
3249 int
3250 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
3251     struct if_rxring *rxr)
3252 {
3253 	struct if_rxring_info ifr;
3254 
3255 	memset(&ifr, 0, sizeof(ifr));
3256 
3257 	if (name != NULL)
3258 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3259 
3260 	ifr.ifr_size = size;
3261 	ifr.ifr_info = *rxr;
3262 
3263 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
3264 }
3265 
3266 /*
3267  * Network stack input queues.
3268  */
3269 
3270 void
3271 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3272 {
3273 	mq_init(&niq->ni_q, maxlen, IPL_NET);
3274 	niq->ni_isr = isr;
3275 }
3276 
3277 int
3278 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3279 {
3280 	int rv;
3281 
3282 	rv = mq_enqueue(&niq->ni_q, m);
3283 	if (rv == 0)
3284 		schednetisr(niq->ni_isr);
3285 	else
3286 		if_congestion();
3287 
3288 	return (rv);
3289 }
3290 
3291 int
3292 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3293 {
3294 	int rv;
3295 
3296 	rv = mq_enlist(&niq->ni_q, ml);
3297 	if (rv == 0)
3298 		schednetisr(niq->ni_isr);
3299 	else
3300 		if_congestion();
3301 
3302 	return (rv);
3303 }
3304 
3305 __dead void
3306 unhandled_af(int af)
3307 {
3308 	panic("unhandled af %d", af);
3309 }
3310 
3311 struct taskq *
3312 net_tq(unsigned int ifindex)
3313 {
3314 	struct taskq *t = NULL;
3315 	static int nettaskqs;
3316 
3317 	if (nettaskqs == 0)
3318 		nettaskqs = min(NET_TASKQ, ncpus);
3319 
3320 	t = nettqmp[ifindex % nettaskqs];
3321 
3322 	return (t);
3323 }
3324