xref: /openbsd-src/sys/net/if.c (revision cba26e98faa2b48aa4705f205ed876af460243a2)
1 /*	$OpenBSD: if.c,v 1.624 2021/01/09 14:55:21 bluhm Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "pfsync.h"
70 #include "ppp.h"
71 #include "switch.h"
72 #include "if_wg.h"
73 
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/mbuf.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/timeout.h>
80 #include <sys/protosw.h>
81 #include <sys/kernel.h>
82 #include <sys/ioctl.h>
83 #include <sys/domain.h>
84 #include <sys/task.h>
85 #include <sys/atomic.h>
86 #include <sys/percpu.h>
87 #include <sys/proc.h>
88 #include <sys/stdint.h>	/* uintptr_t */
89 #include <sys/rwlock.h>
90 
91 #include <net/if.h>
92 #include <net/if_dl.h>
93 #include <net/if_types.h>
94 #include <net/route.h>
95 #include <net/netisr.h>
96 
97 #include <netinet/in.h>
98 #include <netinet/if_ether.h>
99 #include <netinet/igmp.h>
100 #ifdef MROUTING
101 #include <netinet/ip_mroute.h>
102 #endif
103 
104 #ifdef INET6
105 #include <netinet6/in6_var.h>
106 #include <netinet6/in6_ifattach.h>
107 #include <netinet6/nd6.h>
108 #include <netinet/ip6.h>
109 #include <netinet6/ip6_var.h>
110 #endif
111 
112 #ifdef MPLS
113 #include <netmpls/mpls.h>
114 #endif
115 
116 #if NBPFILTER > 0
117 #include <net/bpf.h>
118 #endif
119 
120 #if NBRIDGE > 0
121 #include <net/if_bridge.h>
122 #endif
123 
124 #if NCARP > 0
125 #include <netinet/ip_carp.h>
126 #endif
127 
128 #if NPF > 0
129 #include <net/pfvar.h>
130 #endif
131 
132 #include <sys/device.h>
133 
134 void	if_attachsetup(struct ifnet *);
135 void	if_attachdomain(struct ifnet *);
136 void	if_attach_common(struct ifnet *);
137 int	if_createrdomain(int, struct ifnet *);
138 int	if_setrdomain(struct ifnet *, int);
139 void	if_slowtimo(void *);
140 
141 void	if_detached_qstart(struct ifqueue *);
142 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
143 
144 int	ifioctl_get(u_long, caddr_t);
145 int	ifconf(caddr_t);
146 static int
147 	if_sffpage_check(const caddr_t);
148 
149 int	if_getgroup(caddr_t, struct ifnet *);
150 int	if_getgroupmembers(caddr_t);
151 int	if_getgroupattribs(caddr_t);
152 int	if_setgroupattribs(caddr_t);
153 int	if_getgrouplist(caddr_t);
154 
155 void	if_linkstate(struct ifnet *);
156 void	if_linkstate_task(void *);
157 
158 int	if_clone_list(struct if_clonereq *);
159 struct if_clone	*if_clone_lookup(const char *, int *);
160 
161 int	if_group_egress_build(void);
162 
163 void	if_watchdog_task(void *);
164 
165 void	if_netisr(void *);
166 
167 #ifdef DDB
168 void	ifa_print_all(void);
169 #endif
170 
171 void	if_qstart_compat(struct ifqueue *);
172 
173 /*
174  * interface index map
175  *
176  * the kernel maintains a mapping of interface indexes to struct ifnet
177  * pointers.
178  *
179  * the map is an array of struct ifnet pointers prefixed by an if_map
180  * structure. the if_map structure stores the length of its array.
181  *
182  * as interfaces are attached to the system, the map is grown on demand
183  * up to USHRT_MAX entries.
184  *
185  * interface index 0 is reserved and represents no interface. this
186  * supports the use of the interface index as the scope for IPv6 link
187  * local addresses, where scope 0 means no scope has been specified.
188  * it also supports the use of interface index as the unique identifier
189  * for network interfaces in SNMP applications as per RFC2863. therefore
190  * if_get(0) returns NULL.
191  */
192 
193 void if_ifp_dtor(void *, void *);
194 void if_map_dtor(void *, void *);
195 struct ifnet *if_ref(struct ifnet *);
196 
197 /*
198  * struct if_map
199  *
200  * bounded array of ifnet srp pointers used to fetch references of live
201  * interfaces with if_get().
202  */
203 
204 struct if_map {
205 	unsigned long		 limit;
206 	/* followed by limit ifnet srp pointers */
207 };
208 
209 /*
210  * struct if_idxmap
211  *
212  * infrastructure to manage updates and accesses to the current if_map.
213  */
214 
215 struct if_idxmap {
216 	unsigned int		 serial;
217 	unsigned int		 count;
218 	struct srp		 map;
219 };
220 
221 void	if_idxmap_init(unsigned int);
222 void	if_idxmap_insert(struct ifnet *);
223 void	if_idxmap_remove(struct ifnet *);
224 
225 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
226 
227 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
228 int if_cloners_count;
229 
230 struct rwlock if_cloners_lock = RWLOCK_INITIALIZER("clonerlock");
231 
232 /* hooks should only be added, deleted, and run from a process context */
233 struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE);
234 void	if_hooks_run(struct task_list *);
235 
236 int	ifq_congestion;
237 
238 int		 netisr;
239 
240 #define	NET_TASKQ	1
241 struct taskq	*nettqmp[NET_TASKQ];
242 
243 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
244 
245 /*
246  * Serialize socket operations to ensure no new sleeping points
247  * are introduced in IP output paths.
248  */
249 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
250 
251 /*
252  * Network interface utility routines.
253  */
254 void
255 ifinit(void)
256 {
257 	unsigned int	i;
258 
259 	/*
260 	 * most machines boot with 4 or 5 interfaces, so size the initial map
261 	 * to accomodate this
262 	 */
263 	if_idxmap_init(8);
264 
265 	for (i = 0; i < NET_TASKQ; i++) {
266 		nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
267 		if (nettqmp[i] == NULL)
268 			panic("unable to create network taskq %d", i);
269 	}
270 }
271 
272 static struct if_idxmap if_idxmap = {
273 	0,
274 	0,
275 	SRP_INITIALIZER()
276 };
277 
278 struct srp_gc if_ifp_gc = SRP_GC_INITIALIZER(if_ifp_dtor, NULL);
279 struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
280 
281 struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
282 
283 void
284 if_idxmap_init(unsigned int limit)
285 {
286 	struct if_map *if_map;
287 	struct srp *map;
288 	unsigned int i;
289 
290 	if_idxmap.serial = 1; /* skip ifidx 0 so it can return NULL */
291 
292 	if_map = malloc(sizeof(*if_map) + limit * sizeof(*map),
293 	    M_IFADDR, M_WAITOK);
294 
295 	if_map->limit = limit;
296 	map = (struct srp *)(if_map + 1);
297 	for (i = 0; i < limit; i++)
298 		srp_init(&map[i]);
299 
300 	/* this is called early so there's nothing to race with */
301 	srp_update_locked(&if_map_gc, &if_idxmap.map, if_map);
302 }
303 
304 void
305 if_idxmap_insert(struct ifnet *ifp)
306 {
307 	struct if_map *if_map;
308 	struct srp *map;
309 	unsigned int index, i;
310 
311 	refcnt_init(&ifp->if_refcnt);
312 
313 	/* the kernel lock guarantees serialised modifications to if_idxmap */
314 	KERNEL_ASSERT_LOCKED();
315 
316 	if (++if_idxmap.count > USHRT_MAX)
317 		panic("too many interfaces");
318 
319 	if_map = srp_get_locked(&if_idxmap.map);
320 	map = (struct srp *)(if_map + 1);
321 
322 	index = if_idxmap.serial++ & USHRT_MAX;
323 
324 	if (index >= if_map->limit) {
325 		struct if_map *nif_map;
326 		struct srp *nmap;
327 		unsigned int nlimit;
328 		struct ifnet *nifp;
329 
330 		nlimit = if_map->limit * 2;
331 		nif_map = malloc(sizeof(*nif_map) + nlimit * sizeof(*nmap),
332 		    M_IFADDR, M_WAITOK);
333 		nmap = (struct srp *)(nif_map + 1);
334 
335 		nif_map->limit = nlimit;
336 		for (i = 0; i < if_map->limit; i++) {
337 			srp_init(&nmap[i]);
338 			nifp = srp_get_locked(&map[i]);
339 			if (nifp != NULL) {
340 				srp_update_locked(&if_ifp_gc, &nmap[i],
341 				    if_ref(nifp));
342 			}
343 		}
344 
345 		while (i < nlimit) {
346 			srp_init(&nmap[i]);
347 			i++;
348 		}
349 
350 		srp_update_locked(&if_map_gc, &if_idxmap.map, nif_map);
351 		if_map = nif_map;
352 		map = nmap;
353 	}
354 
355 	/* pick the next free index */
356 	for (i = 0; i < USHRT_MAX; i++) {
357 		if (index != 0 && srp_get_locked(&map[index]) == NULL)
358 			break;
359 
360 		index = if_idxmap.serial++ & USHRT_MAX;
361 	}
362 
363 	/* commit */
364 	ifp->if_index = index;
365 	srp_update_locked(&if_ifp_gc, &map[index], if_ref(ifp));
366 }
367 
368 void
369 if_idxmap_remove(struct ifnet *ifp)
370 {
371 	struct if_map *if_map;
372 	struct srp *map;
373 	unsigned int index;
374 
375 	index = ifp->if_index;
376 
377 	/* the kernel lock guarantees serialised modifications to if_idxmap */
378 	KERNEL_ASSERT_LOCKED();
379 
380 	if_map = srp_get_locked(&if_idxmap.map);
381 	KASSERT(index < if_map->limit);
382 
383 	map = (struct srp *)(if_map + 1);
384 	KASSERT(ifp == (struct ifnet *)srp_get_locked(&map[index]));
385 
386 	srp_update_locked(&if_ifp_gc, &map[index], NULL);
387 	if_idxmap.count--;
388 	/* end of if_idxmap modifications */
389 
390 	/* sleep until the last reference is released */
391 	refcnt_finalize(&ifp->if_refcnt, "ifidxrm");
392 }
393 
394 void
395 if_ifp_dtor(void *null, void *ifp)
396 {
397 	if_put(ifp);
398 }
399 
400 void
401 if_map_dtor(void *null, void *m)
402 {
403 	struct if_map *if_map = m;
404 	struct srp *map = (struct srp *)(if_map + 1);
405 	unsigned int i;
406 
407 	/*
408 	 * dont need to serialize the use of update_locked since this is
409 	 * the last reference to this map. there's nothing to race against.
410 	 */
411 	for (i = 0; i < if_map->limit; i++)
412 		srp_update_locked(&if_ifp_gc, &map[i], NULL);
413 
414 	free(if_map, M_IFADDR, sizeof(*if_map) + if_map->limit * sizeof(*map));
415 }
416 
417 /*
418  * Attach an interface to the
419  * list of "active" interfaces.
420  */
421 void
422 if_attachsetup(struct ifnet *ifp)
423 {
424 	unsigned long ifidx;
425 
426 	NET_ASSERT_LOCKED();
427 
428 	TAILQ_INIT(&ifp->if_groups);
429 
430 	if_addgroup(ifp, IFG_ALL);
431 
432 	if_attachdomain(ifp);
433 #if NPF > 0
434 	pfi_attach_ifnet(ifp);
435 #endif
436 
437 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
438 	if_slowtimo(ifp);
439 
440 	if_idxmap_insert(ifp);
441 	KASSERT(if_get(0) == NULL);
442 
443 	ifidx = ifp->if_index;
444 
445 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
446 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
447 
448 	/* Announce the interface. */
449 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
450 }
451 
452 /*
453  * Allocate the link level name for the specified interface.  This
454  * is an attachment helper.  It must be called after ifp->if_addrlen
455  * is initialized, which may not be the case when if_attach() is
456  * called.
457  */
458 void
459 if_alloc_sadl(struct ifnet *ifp)
460 {
461 	unsigned int socksize;
462 	int namelen, masklen;
463 	struct sockaddr_dl *sdl;
464 
465 	/*
466 	 * If the interface already has a link name, release it
467 	 * now.  This is useful for interfaces that can change
468 	 * link types, and thus switch link names often.
469 	 */
470 	if_free_sadl(ifp);
471 
472 	namelen = strlen(ifp->if_xname);
473 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
474 	socksize = masklen + ifp->if_addrlen;
475 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
476 	if (socksize < sizeof(*sdl))
477 		socksize = sizeof(*sdl);
478 	socksize = ROUNDUP(socksize);
479 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
480 	sdl->sdl_len = socksize;
481 	sdl->sdl_family = AF_LINK;
482 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
483 	sdl->sdl_nlen = namelen;
484 	sdl->sdl_alen = ifp->if_addrlen;
485 	sdl->sdl_index = ifp->if_index;
486 	sdl->sdl_type = ifp->if_type;
487 	ifp->if_sadl = sdl;
488 }
489 
490 /*
491  * Free the link level name for the specified interface.  This is
492  * a detach helper.  This is called from if_detach() or from
493  * link layer type specific detach functions.
494  */
495 void
496 if_free_sadl(struct ifnet *ifp)
497 {
498 	if (ifp->if_sadl == NULL)
499 		return;
500 
501 	free(ifp->if_sadl, M_IFADDR, ifp->if_sadl->sdl_len);
502 	ifp->if_sadl = NULL;
503 }
504 
505 void
506 if_attachdomain(struct ifnet *ifp)
507 {
508 	struct domain *dp;
509 	int i, s;
510 
511 	s = splnet();
512 
513 	/* address family dependent data region */
514 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
515 	for (i = 0; (dp = domains[i]) != NULL; i++) {
516 		if (dp->dom_ifattach)
517 			ifp->if_afdata[dp->dom_family] =
518 			    (*dp->dom_ifattach)(ifp);
519 	}
520 
521 	splx(s);
522 }
523 
524 void
525 if_attachhead(struct ifnet *ifp)
526 {
527 	if_attach_common(ifp);
528 	NET_LOCK();
529 	TAILQ_INSERT_HEAD(&ifnet, ifp, if_list);
530 	if_attachsetup(ifp);
531 	NET_UNLOCK();
532 }
533 
534 void
535 if_attach(struct ifnet *ifp)
536 {
537 	if_attach_common(ifp);
538 	NET_LOCK();
539 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
540 	if_attachsetup(ifp);
541 	NET_UNLOCK();
542 }
543 
544 void
545 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
546 {
547 	struct ifqueue **map;
548 	struct ifqueue *ifq;
549 	int i;
550 
551 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
552 	KASSERT(nqs != 0);
553 
554 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
555 
556 	ifp->if_snd.ifq_softc = NULL;
557 	map[0] = &ifp->if_snd;
558 
559 	for (i = 1; i < nqs; i++) {
560 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
561 		ifq_set_maxlen(ifq, ifp->if_snd.ifq_maxlen);
562 		ifq_init(ifq, ifp, i);
563 		map[i] = ifq;
564 	}
565 
566 	ifp->if_ifqs = map;
567 	ifp->if_nifqs = nqs;
568 }
569 
570 void
571 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
572 {
573 	struct ifiqueue **map;
574 	struct ifiqueue *ifiq;
575 	unsigned int i;
576 
577 	KASSERT(niqs != 0);
578 
579 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
580 
581 	ifp->if_rcv.ifiq_softc = NULL;
582 	map[0] = &ifp->if_rcv;
583 
584 	for (i = 1; i < niqs; i++) {
585 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
586 		ifiq_init(ifiq, ifp, i);
587 		map[i] = ifiq;
588 	}
589 
590 	ifp->if_iqs = map;
591 	ifp->if_niqs = niqs;
592 }
593 
594 void
595 if_attach_common(struct ifnet *ifp)
596 {
597 	KASSERT(ifp->if_ioctl != NULL);
598 
599 	TAILQ_INIT(&ifp->if_addrlist);
600 	TAILQ_INIT(&ifp->if_maddrlist);
601 
602 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
603 		KASSERTMSG(ifp->if_qstart == NULL,
604 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
605 		ifp->if_qstart = if_qstart_compat;
606 	} else {
607 		KASSERTMSG(ifp->if_start == NULL,
608 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
609 		KASSERTMSG(ifp->if_qstart != NULL,
610 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
611 	}
612 
613 	ifq_init(&ifp->if_snd, ifp, 0);
614 
615 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
616 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
617 	ifp->if_nifqs = 1;
618 	if (ifp->if_txmit == 0)
619 		ifp->if_txmit = IF_TXMIT_DEFAULT;
620 
621 	ifiq_init(&ifp->if_rcv, ifp, 0);
622 
623 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
624 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
625 	ifp->if_niqs = 1;
626 
627 	TAILQ_INIT(&ifp->if_addrhooks);
628 	TAILQ_INIT(&ifp->if_linkstatehooks);
629 	TAILQ_INIT(&ifp->if_detachhooks);
630 
631 	if (ifp->if_rtrequest == NULL)
632 		ifp->if_rtrequest = if_rtrequest_dummy;
633 	if (ifp->if_enqueue == NULL)
634 		ifp->if_enqueue = if_enqueue_ifq;
635 	ifp->if_llprio = IFQ_DEFPRIO;
636 }
637 
638 void
639 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
640 {
641 	/*
642 	 * only switch the ifq_ops on the first ifq on an interface.
643 	 *
644 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
645 	 * works on a single ifq. because the code uses the ifq_ops
646 	 * on the first ifq (if_snd) to select a queue for an mbuf,
647 	 * by switching only the first one we change both the algorithm
648 	 * and force the routing of all new packets to it.
649 	 */
650 	ifq_attach(&ifp->if_snd, newops, args);
651 }
652 
653 void
654 if_start(struct ifnet *ifp)
655 {
656 	KASSERT(ifp->if_qstart == if_qstart_compat);
657 	if_qstart_compat(&ifp->if_snd);
658 }
659 void
660 if_qstart_compat(struct ifqueue *ifq)
661 {
662 	struct ifnet *ifp = ifq->ifq_if;
663 	int s;
664 
665 	/*
666 	 * the stack assumes that an interface can have multiple
667 	 * transmit rings, but a lot of drivers are still written
668 	 * so that interfaces and send rings have a 1:1 mapping.
669 	 * this provides compatability between the stack and the older
670 	 * drivers by translating from the only queue they have
671 	 * (ifp->if_snd) back to the interface and calling if_start.
672 	 */
673 
674 	KERNEL_LOCK();
675 	s = splnet();
676 	(*ifp->if_start)(ifp);
677 	splx(s);
678 	KERNEL_UNLOCK();
679 }
680 
681 int
682 if_enqueue(struct ifnet *ifp, struct mbuf *m)
683 {
684 	CLR(m->m_pkthdr.csum_flags, M_TIMESTAMP);
685 
686 #if NPF > 0
687 	if (m->m_pkthdr.pf.delay > 0)
688 		return (pf_delay_pkt(m, ifp->if_index));
689 #endif
690 
691 #if NBRIDGE > 0
692 	if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
693 		int error;
694 
695 		error = bridge_enqueue(ifp, m);
696 		return (error);
697 	}
698 #endif
699 
700 #if NPF > 0
701 	pf_pkt_addr_changed(m);
702 #endif	/* NPF > 0 */
703 
704 	return ((*ifp->if_enqueue)(ifp, m));
705 }
706 
707 int
708 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
709 {
710 	struct ifqueue *ifq = &ifp->if_snd;
711 	int error;
712 
713 	if (ifp->if_nifqs > 1) {
714 		unsigned int idx;
715 
716 		/*
717 		 * use the operations on the first ifq to pick which of
718 		 * the array gets this mbuf.
719 		 */
720 
721 		idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
722 		ifq = ifp->if_ifqs[idx];
723 	}
724 
725 	error = ifq_enqueue(ifq, m);
726 	if (error)
727 		return (error);
728 
729 	ifq_start(ifq);
730 
731 	return (0);
732 }
733 
734 void
735 if_input(struct ifnet *ifp, struct mbuf_list *ml)
736 {
737 	ifiq_input(&ifp->if_rcv, ml);
738 }
739 
740 int
741 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
742 {
743 	int keepflags;
744 
745 #if NBPFILTER > 0
746 	/*
747 	 * Only send packets to bpf if they are destinated to local
748 	 * addresses.
749 	 *
750 	 * if_input_local() is also called for SIMPLEX interfaces to
751 	 * duplicate packets for local use.  But don't dup them to bpf.
752 	 */
753 	if (ifp->if_flags & IFF_LOOPBACK) {
754 		caddr_t if_bpf = ifp->if_bpf;
755 
756 		if (if_bpf)
757 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
758 	}
759 #endif
760 	keepflags = m->m_flags & (M_BCAST|M_MCAST);
761 	m_resethdr(m);
762 	m->m_flags |= M_LOOP | keepflags;
763 	m->m_pkthdr.ph_ifidx = ifp->if_index;
764 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
765 
766 	ifp->if_opackets++;
767 	ifp->if_obytes += m->m_pkthdr.len;
768 
769 	ifp->if_ipackets++;
770 	ifp->if_ibytes += m->m_pkthdr.len;
771 
772 	switch (af) {
773 	case AF_INET:
774 		ipv4_input(ifp, m);
775 		break;
776 #ifdef INET6
777 	case AF_INET6:
778 		ipv6_input(ifp, m);
779 		break;
780 #endif /* INET6 */
781 #ifdef MPLS
782 	case AF_MPLS:
783 		mpls_input(ifp, m);
784 		break;
785 #endif /* MPLS */
786 	default:
787 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
788 		m_freem(m);
789 		return (EAFNOSUPPORT);
790 	}
791 
792 	return (0);
793 }
794 
795 int
796 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
797 {
798 	struct ifiqueue *ifiq;
799 	unsigned int flow = 0;
800 
801 	m->m_pkthdr.ph_family = af;
802 	m->m_pkthdr.ph_ifidx = ifp->if_index;
803 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
804 
805 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
806 		flow = m->m_pkthdr.ph_flowid;
807 
808 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
809 
810 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
811 }
812 
813 void
814 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
815 {
816 	struct mbuf *m;
817 
818 	if (ml_empty(ml))
819 		return;
820 
821 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
822 		enqueue_randomness(ml_len(ml) ^ (uintptr_t)MBUF_LIST_FIRST(ml));
823 
824 	/*
825 	 * We grab the NET_LOCK() before processing any packet to
826 	 * ensure there's no contention on the routing table lock.
827 	 *
828 	 * Without it we could race with a userland thread to insert
829 	 * a L2 entry in ip{6,}_output().  Such race would result in
830 	 * one of the threads sleeping *inside* the IP output path.
831 	 *
832 	 * Since we have a NET_LOCK() we also use it to serialize access
833 	 * to PF globals, pipex globals, unicast and multicast addresses
834 	 * lists and the socket layer.
835 	 */
836 	NET_LOCK();
837 	while ((m = ml_dequeue(ml)) != NULL)
838 		(*ifp->if_input)(ifp, m);
839 	NET_UNLOCK();
840 }
841 
842 void
843 if_vinput(struct ifnet *ifp, struct mbuf *m)
844 {
845 #if NBPFILTER > 0
846 	caddr_t if_bpf;
847 #endif
848 
849 	m->m_pkthdr.ph_ifidx = ifp->if_index;
850 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
851 
852 	counters_pkt(ifp->if_counters,
853 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
854 
855 #if NBPFILTER > 0
856 	if_bpf = ifp->if_bpf;
857 	if (if_bpf) {
858 		if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN)) {
859 			m_freem(m);
860 			return;
861 		}
862 	}
863 #endif
864 
865 	(*ifp->if_input)(ifp, m);
866 }
867 
868 void
869 if_netisr(void *unused)
870 {
871 	int n, t = 0;
872 
873 	NET_LOCK();
874 
875 	while ((n = netisr) != 0) {
876 		/* Like sched_pause() but with a rwlock dance. */
877 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
878 			NET_UNLOCK();
879 			yield();
880 			NET_LOCK();
881 		}
882 
883 		atomic_clearbits_int(&netisr, n);
884 
885 #if NETHER > 0
886 		if (n & (1 << NETISR_ARP)) {
887 			KERNEL_LOCK();
888 			arpintr();
889 			KERNEL_UNLOCK();
890 		}
891 #endif
892 #if NPPP > 0
893 		if (n & (1 << NETISR_PPP)) {
894 			KERNEL_LOCK();
895 			pppintr();
896 			KERNEL_UNLOCK();
897 		}
898 #endif
899 #if NBRIDGE > 0
900 		if (n & (1 << NETISR_BRIDGE))
901 			bridgeintr();
902 #endif
903 #if NSWITCH > 0
904 		if (n & (1 << NETISR_SWITCH)) {
905 			KERNEL_LOCK();
906 			switchintr();
907 			KERNEL_UNLOCK();
908 		}
909 #endif
910 		t |= n;
911 	}
912 
913 #if NPFSYNC > 0
914 	if (t & (1 << NETISR_PFSYNC)) {
915 		KERNEL_LOCK();
916 		pfsyncintr();
917 		KERNEL_UNLOCK();
918 	}
919 #endif
920 
921 	NET_UNLOCK();
922 }
923 
924 void
925 if_hooks_run(struct task_list *hooks)
926 {
927 	struct task *t, *nt;
928 	struct task cursor = { .t_func = NULL };
929 	void (*func)(void *);
930 	void *arg;
931 
932 	mtx_enter(&if_hooks_mtx);
933 	for (t = TAILQ_FIRST(hooks); t != NULL; t = nt) {
934 		if (t->t_func == NULL) { /* skip cursors */
935 			nt = TAILQ_NEXT(t, t_entry);
936 			continue;
937 		}
938 		func = t->t_func;
939 		arg = t->t_arg;
940 
941 		TAILQ_INSERT_AFTER(hooks, t, &cursor, t_entry);
942 		mtx_leave(&if_hooks_mtx);
943 
944 		(*func)(arg);
945 
946 		mtx_enter(&if_hooks_mtx);
947 		nt = TAILQ_NEXT(&cursor, t_entry); /* avoid _Q_INVALIDATE */
948 		TAILQ_REMOVE(hooks, &cursor, t_entry);
949 	}
950 	mtx_leave(&if_hooks_mtx);
951 }
952 
953 void
954 if_deactivate(struct ifnet *ifp)
955 {
956 	/*
957 	 * Call detach hooks from head to tail.  To make sure detach
958 	 * hooks are executed in the reverse order they were added, all
959 	 * the hooks have to be added to the head!
960 	 */
961 
962 	NET_LOCK();
963 	if_hooks_run(&ifp->if_detachhooks);
964 	NET_UNLOCK();
965 }
966 
967 void
968 if_detachhook_add(struct ifnet *ifp, struct task *t)
969 {
970 	mtx_enter(&if_hooks_mtx);
971 	TAILQ_INSERT_HEAD(&ifp->if_detachhooks, t, t_entry);
972 	mtx_leave(&if_hooks_mtx);
973 }
974 
975 void
976 if_detachhook_del(struct ifnet *ifp, struct task *t)
977 {
978 	mtx_enter(&if_hooks_mtx);
979 	TAILQ_REMOVE(&ifp->if_detachhooks, t, t_entry);
980 	mtx_leave(&if_hooks_mtx);
981 }
982 
983 /*
984  * Detach an interface from everything in the kernel.  Also deallocate
985  * private resources.
986  */
987 void
988 if_detach(struct ifnet *ifp)
989 {
990 	struct ifaddr *ifa;
991 	struct ifg_list *ifg;
992 	struct domain *dp;
993 	int i, s;
994 
995 	/* Undo pseudo-driver changes. */
996 	if_deactivate(ifp);
997 
998 	ifq_clr_oactive(&ifp->if_snd);
999 
1000 	/* Other CPUs must not have a reference before we start destroying. */
1001 	if_idxmap_remove(ifp);
1002 
1003 #if NBPFILTER > 0
1004 	bpfdetach(ifp);
1005 #endif
1006 
1007 	NET_LOCK();
1008 	s = splnet();
1009 	ifp->if_qstart = if_detached_qstart;
1010 	ifp->if_ioctl = if_detached_ioctl;
1011 	ifp->if_watchdog = NULL;
1012 
1013 	/* Remove the watchdog timeout & task */
1014 	timeout_del(&ifp->if_slowtimo);
1015 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1016 
1017 	/* Remove the link state task */
1018 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1019 
1020 	rti_delete(ifp);
1021 #if NETHER > 0 && defined(NFSCLIENT)
1022 	if (ifp->if_index == revarp_ifidx)
1023 		revarp_ifidx = 0;
1024 #endif
1025 #ifdef MROUTING
1026 	vif_delete(ifp);
1027 #endif
1028 	in_ifdetach(ifp);
1029 #ifdef INET6
1030 	in6_ifdetach(ifp);
1031 #endif
1032 #if NPF > 0
1033 	pfi_detach_ifnet(ifp);
1034 #endif
1035 
1036 	/* Remove the interface from the list of all interfaces.  */
1037 	TAILQ_REMOVE(&ifnet, ifp, if_list);
1038 
1039 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1040 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1041 
1042 	if_free_sadl(ifp);
1043 
1044 	/* We should not have any address left at this point. */
1045 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1046 #ifdef DIAGNOSTIC
1047 		printf("%s: address list non empty\n", ifp->if_xname);
1048 #endif
1049 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1050 			ifa_del(ifp, ifa);
1051 			ifa->ifa_ifp = NULL;
1052 			ifafree(ifa);
1053 		}
1054 	}
1055 
1056 	KASSERT(TAILQ_EMPTY(&ifp->if_addrhooks));
1057 	KASSERT(TAILQ_EMPTY(&ifp->if_linkstatehooks));
1058 	KASSERT(TAILQ_EMPTY(&ifp->if_detachhooks));
1059 
1060 	for (i = 0; (dp = domains[i]) != NULL; i++) {
1061 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
1062 			(*dp->dom_ifdetach)(ifp,
1063 			    ifp->if_afdata[dp->dom_family]);
1064 	}
1065 
1066 	/* Announce that the interface is gone. */
1067 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1068 	splx(s);
1069 	NET_UNLOCK();
1070 
1071 	if (ifp->if_counters != NULL)
1072 		if_counters_free(ifp);
1073 
1074 	for (i = 0; i < ifp->if_nifqs; i++)
1075 		ifq_destroy(ifp->if_ifqs[i]);
1076 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1077 		for (i = 1; i < ifp->if_nifqs; i++) {
1078 			free(ifp->if_ifqs[i], M_DEVBUF,
1079 			    sizeof(struct ifqueue));
1080 		}
1081 		free(ifp->if_ifqs, M_DEVBUF,
1082 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1083 	}
1084 
1085 	for (i = 0; i < ifp->if_niqs; i++)
1086 		ifiq_destroy(ifp->if_iqs[i]);
1087 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1088 		for (i = 1; i < ifp->if_niqs; i++) {
1089 			free(ifp->if_iqs[i], M_DEVBUF,
1090 			    sizeof(struct ifiqueue));
1091 		}
1092 		free(ifp->if_iqs, M_DEVBUF,
1093 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1094 	}
1095 }
1096 
1097 /*
1098  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1099  */
1100 int
1101 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1102 {
1103 	struct ifnet *ifp;
1104 	int connected = 0;
1105 
1106 	ifp = if_get(ifidx);
1107 	if (ifp == NULL)
1108 		return (0);
1109 
1110 	if (ifp0->if_index == ifp->if_index)
1111 		connected = 1;
1112 
1113 #if NBRIDGE > 0
1114 	if (ifp0->if_bridgeidx != 0 && ifp0->if_bridgeidx == ifp->if_bridgeidx)
1115 		connected = 1;
1116 #endif
1117 #if NCARP > 0
1118 	if ((ifp0->if_type == IFT_CARP &&
1119 	    ifp0->if_carpdevidx == ifp->if_index) ||
1120 	    (ifp->if_type == IFT_CARP && ifp->if_carpdevidx == ifp0->if_index))
1121 		connected = 1;
1122 #endif
1123 
1124 	if_put(ifp);
1125 	return (connected);
1126 }
1127 
1128 /*
1129  * Create a clone network interface.
1130  */
1131 int
1132 if_clone_create(const char *name, int rdomain)
1133 {
1134 	struct if_clone *ifc;
1135 	struct ifnet *ifp;
1136 	int unit, ret;
1137 
1138 	ifc = if_clone_lookup(name, &unit);
1139 	if (ifc == NULL)
1140 		return (EINVAL);
1141 
1142 	rw_enter_write(&if_cloners_lock);
1143 
1144 	if (ifunit(name) != NULL) {
1145 		ret = EEXIST;
1146 		goto unlock;
1147 	}
1148 
1149 	ret = (*ifc->ifc_create)(ifc, unit);
1150 
1151 	if (ret != 0 || (ifp = ifunit(name)) == NULL)
1152 		goto unlock;
1153 
1154 	NET_LOCK();
1155 	if_addgroup(ifp, ifc->ifc_name);
1156 	if (rdomain != 0)
1157 		if_setrdomain(ifp, rdomain);
1158 	NET_UNLOCK();
1159 unlock:
1160 	rw_exit_write(&if_cloners_lock);
1161 
1162 	return (ret);
1163 }
1164 
1165 /*
1166  * Destroy a clone network interface.
1167  */
1168 int
1169 if_clone_destroy(const char *name)
1170 {
1171 	struct if_clone *ifc;
1172 	struct ifnet *ifp;
1173 	int ret;
1174 
1175 	ifc = if_clone_lookup(name, NULL);
1176 	if (ifc == NULL)
1177 		return (EINVAL);
1178 
1179 	if (ifc->ifc_destroy == NULL)
1180 		return (EOPNOTSUPP);
1181 
1182 	rw_enter_write(&if_cloners_lock);
1183 
1184 	ifp = ifunit(name);
1185 	if (ifp == NULL) {
1186 		rw_exit_write(&if_cloners_lock);
1187 		return (ENXIO);
1188 	}
1189 
1190 	NET_LOCK();
1191 	if (ifp->if_flags & IFF_UP) {
1192 		int s;
1193 		s = splnet();
1194 		if_down(ifp);
1195 		splx(s);
1196 	}
1197 	NET_UNLOCK();
1198 	ret = (*ifc->ifc_destroy)(ifp);
1199 
1200 	rw_exit_write(&if_cloners_lock);
1201 
1202 	return (ret);
1203 }
1204 
1205 /*
1206  * Look up a network interface cloner.
1207  */
1208 struct if_clone *
1209 if_clone_lookup(const char *name, int *unitp)
1210 {
1211 	struct if_clone *ifc;
1212 	const char *cp;
1213 	int unit;
1214 
1215 	/* separate interface name from unit */
1216 	for (cp = name;
1217 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1218 	    cp++)
1219 		continue;
1220 
1221 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1222 		return (NULL);	/* No name or unit number */
1223 
1224 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1225 		return (NULL);	/* unit number 0 padded */
1226 
1227 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1228 		if (strlen(ifc->ifc_name) == cp - name &&
1229 		    !strncmp(name, ifc->ifc_name, cp - name))
1230 			break;
1231 	}
1232 
1233 	if (ifc == NULL)
1234 		return (NULL);
1235 
1236 	unit = 0;
1237 	while (cp - name < IFNAMSIZ && *cp) {
1238 		if (*cp < '0' || *cp > '9' ||
1239 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1240 			/* Bogus unit number. */
1241 			return (NULL);
1242 		}
1243 		unit = (unit * 10) + (*cp++ - '0');
1244 	}
1245 
1246 	if (unitp != NULL)
1247 		*unitp = unit;
1248 	return (ifc);
1249 }
1250 
1251 /*
1252  * Register a network interface cloner.
1253  */
1254 void
1255 if_clone_attach(struct if_clone *ifc)
1256 {
1257 	/*
1258 	 * we are called at kernel boot by main(), when pseudo devices are
1259 	 * being attached. The main() is the only guy which may alter the
1260 	 * if_cloners. While system is running and main() is done with
1261 	 * initialization, the if_cloners becomes immutable.
1262 	 */
1263 	KASSERT(pdevinit_done == 0);
1264 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1265 	if_cloners_count++;
1266 }
1267 
1268 /*
1269  * Provide list of interface cloners to userspace.
1270  */
1271 int
1272 if_clone_list(struct if_clonereq *ifcr)
1273 {
1274 	char outbuf[IFNAMSIZ], *dst;
1275 	struct if_clone *ifc;
1276 	int count, error = 0;
1277 
1278 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1279 		/* Just asking how many there are. */
1280 		ifcr->ifcr_total = if_cloners_count;
1281 		return (0);
1282 	}
1283 
1284 	if (ifcr->ifcr_count < 0)
1285 		return (EINVAL);
1286 
1287 	ifcr->ifcr_total = if_cloners_count;
1288 	count = MIN(if_cloners_count, ifcr->ifcr_count);
1289 
1290 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1291 		if (count == 0)
1292 			break;
1293 		bzero(outbuf, sizeof outbuf);
1294 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1295 		error = copyout(outbuf, dst, IFNAMSIZ);
1296 		if (error)
1297 			break;
1298 		count--;
1299 		dst += IFNAMSIZ;
1300 	}
1301 
1302 	return (error);
1303 }
1304 
1305 /*
1306  * set queue congestion marker
1307  */
1308 void
1309 if_congestion(void)
1310 {
1311 	extern int ticks;
1312 
1313 	ifq_congestion = ticks;
1314 }
1315 
1316 int
1317 if_congested(void)
1318 {
1319 	extern int ticks;
1320 	int diff;
1321 
1322 	diff = ticks - ifq_congestion;
1323 	if (diff < 0) {
1324 		ifq_congestion = ticks - hz;
1325 		return (0);
1326 	}
1327 
1328 	return (diff <= (hz / 100));
1329 }
1330 
1331 #define	equal(a1, a2)	\
1332 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1333 	(a1)->sa_len) == 0)
1334 
1335 /*
1336  * Locate an interface based on a complete address.
1337  */
1338 struct ifaddr *
1339 ifa_ifwithaddr(struct sockaddr *addr, u_int rtableid)
1340 {
1341 	struct ifnet *ifp;
1342 	struct ifaddr *ifa;
1343 	u_int rdomain;
1344 
1345 	rdomain = rtable_l2(rtableid);
1346 	KERNEL_LOCK();
1347 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1348 		if (ifp->if_rdomain != rdomain)
1349 			continue;
1350 
1351 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1352 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1353 				continue;
1354 
1355 			if (equal(addr, ifa->ifa_addr)) {
1356 				KERNEL_UNLOCK();
1357 				return (ifa);
1358 			}
1359 		}
1360 	}
1361 	KERNEL_UNLOCK();
1362 	return (NULL);
1363 }
1364 
1365 /*
1366  * Locate the point to point interface with a given destination address.
1367  */
1368 struct ifaddr *
1369 ifa_ifwithdstaddr(struct sockaddr *addr, u_int rdomain)
1370 {
1371 	struct ifnet *ifp;
1372 	struct ifaddr *ifa;
1373 
1374 	rdomain = rtable_l2(rdomain);
1375 	KERNEL_LOCK();
1376 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1377 		if (ifp->if_rdomain != rdomain)
1378 			continue;
1379 		if (ifp->if_flags & IFF_POINTOPOINT) {
1380 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1381 				if (ifa->ifa_addr->sa_family !=
1382 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1383 					continue;
1384 				if (equal(addr, ifa->ifa_dstaddr)) {
1385 					KERNEL_UNLOCK();
1386 					return (ifa);
1387 				}
1388 			}
1389 		}
1390 	}
1391 	KERNEL_UNLOCK();
1392 	return (NULL);
1393 }
1394 
1395 /*
1396  * Find an interface address specific to an interface best matching
1397  * a given address.
1398  */
1399 struct ifaddr *
1400 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1401 {
1402 	struct ifaddr *ifa;
1403 	char *cp, *cp2, *cp3;
1404 	char *cplim;
1405 	struct ifaddr *ifa_maybe = NULL;
1406 	u_int af = addr->sa_family;
1407 
1408 	if (af >= AF_MAX)
1409 		return (NULL);
1410 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1411 		if (ifa->ifa_addr->sa_family != af)
1412 			continue;
1413 		if (ifa_maybe == NULL)
1414 			ifa_maybe = ifa;
1415 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1416 			if (equal(addr, ifa->ifa_addr) ||
1417 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1418 				return (ifa);
1419 			continue;
1420 		}
1421 		cp = addr->sa_data;
1422 		cp2 = ifa->ifa_addr->sa_data;
1423 		cp3 = ifa->ifa_netmask->sa_data;
1424 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1425 		for (; cp3 < cplim; cp3++)
1426 			if ((*cp++ ^ *cp2++) & *cp3)
1427 				break;
1428 		if (cp3 == cplim)
1429 			return (ifa);
1430 	}
1431 	return (ifa_maybe);
1432 }
1433 
1434 void
1435 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1436 {
1437 }
1438 
1439 /*
1440  * Default action when installing a local route on a point-to-point
1441  * interface.
1442  */
1443 void
1444 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1445 {
1446 	struct ifnet *lo0ifp;
1447 	struct ifaddr *ifa, *lo0ifa;
1448 
1449 	switch (req) {
1450 	case RTM_ADD:
1451 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1452 			break;
1453 
1454 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1455 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1456 			    rt_key(rt)->sa_len) == 0)
1457 				break;
1458 		}
1459 
1460 		if (ifa == NULL)
1461 			break;
1462 
1463 		KASSERT(ifa == rt->rt_ifa);
1464 
1465 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1466 		KASSERT(lo0ifp != NULL);
1467 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1468 			if (lo0ifa->ifa_addr->sa_family ==
1469 			    ifa->ifa_addr->sa_family)
1470 				break;
1471 		}
1472 		if_put(lo0ifp);
1473 
1474 		if (lo0ifa == NULL)
1475 			break;
1476 
1477 		rt->rt_flags &= ~RTF_LLINFO;
1478 		break;
1479 	case RTM_DELETE:
1480 	case RTM_RESOLVE:
1481 	default:
1482 		break;
1483 	}
1484 }
1485 
1486 
1487 /*
1488  * Bring down all interfaces
1489  */
1490 void
1491 if_downall(void)
1492 {
1493 	struct ifreq ifrq;	/* XXX only partly built */
1494 	struct ifnet *ifp;
1495 
1496 	NET_LOCK();
1497 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1498 		if ((ifp->if_flags & IFF_UP) == 0)
1499 			continue;
1500 		if_down(ifp);
1501 		ifrq.ifr_flags = ifp->if_flags;
1502 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1503 	}
1504 	NET_UNLOCK();
1505 }
1506 
1507 /*
1508  * Mark an interface down and notify protocols of
1509  * the transition.
1510  */
1511 void
1512 if_down(struct ifnet *ifp)
1513 {
1514 	NET_ASSERT_LOCKED();
1515 
1516 	ifp->if_flags &= ~IFF_UP;
1517 	getmicrotime(&ifp->if_lastchange);
1518 	ifq_purge(&ifp->if_snd);
1519 
1520 	if_linkstate(ifp);
1521 }
1522 
1523 /*
1524  * Mark an interface up and notify protocols of
1525  * the transition.
1526  */
1527 void
1528 if_up(struct ifnet *ifp)
1529 {
1530 	NET_ASSERT_LOCKED();
1531 
1532 	ifp->if_flags |= IFF_UP;
1533 	getmicrotime(&ifp->if_lastchange);
1534 
1535 #ifdef INET6
1536 	/* Userland expects the kernel to set ::1 on default lo(4). */
1537 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1538 		in6_ifattach(ifp);
1539 #endif
1540 
1541 	if_linkstate(ifp);
1542 }
1543 
1544 /*
1545  * Notify userland, the routing table and hooks owner of
1546  * a link-state transition.
1547  */
1548 void
1549 if_linkstate_task(void *xifidx)
1550 {
1551 	unsigned int ifidx = (unsigned long)xifidx;
1552 	struct ifnet *ifp;
1553 
1554 	KERNEL_LOCK();
1555 	NET_LOCK();
1556 
1557 	ifp = if_get(ifidx);
1558 	if (ifp != NULL)
1559 		if_linkstate(ifp);
1560 	if_put(ifp);
1561 
1562 	NET_UNLOCK();
1563 	KERNEL_UNLOCK();
1564 }
1565 
1566 void
1567 if_linkstate(struct ifnet *ifp)
1568 {
1569 	NET_ASSERT_LOCKED();
1570 
1571 	rtm_ifchg(ifp);
1572 	rt_if_track(ifp);
1573 
1574 	if_hooks_run(&ifp->if_linkstatehooks);
1575 }
1576 
1577 void
1578 if_linkstatehook_add(struct ifnet *ifp, struct task *t)
1579 {
1580 	mtx_enter(&if_hooks_mtx);
1581 	TAILQ_INSERT_HEAD(&ifp->if_linkstatehooks, t, t_entry);
1582 	mtx_leave(&if_hooks_mtx);
1583 }
1584 
1585 void
1586 if_linkstatehook_del(struct ifnet *ifp, struct task *t)
1587 {
1588 	mtx_enter(&if_hooks_mtx);
1589 	TAILQ_REMOVE(&ifp->if_linkstatehooks, t, t_entry);
1590 	mtx_leave(&if_hooks_mtx);
1591 }
1592 
1593 /*
1594  * Schedule a link state change task.
1595  */
1596 void
1597 if_link_state_change(struct ifnet *ifp)
1598 {
1599 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1600 }
1601 
1602 /*
1603  * Handle interface watchdog timer routine.  Called
1604  * from softclock, we decrement timer (if set) and
1605  * call the appropriate interface routine on expiration.
1606  */
1607 void
1608 if_slowtimo(void *arg)
1609 {
1610 	struct ifnet *ifp = arg;
1611 	int s = splnet();
1612 
1613 	if (ifp->if_watchdog) {
1614 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1615 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1616 		timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO);
1617 	}
1618 	splx(s);
1619 }
1620 
1621 void
1622 if_watchdog_task(void *xifidx)
1623 {
1624 	unsigned int ifidx = (unsigned long)xifidx;
1625 	struct ifnet *ifp;
1626 	int s;
1627 
1628 	ifp = if_get(ifidx);
1629 	if (ifp == NULL)
1630 		return;
1631 
1632 	KERNEL_LOCK();
1633 	s = splnet();
1634 	if (ifp->if_watchdog)
1635 		(*ifp->if_watchdog)(ifp);
1636 	splx(s);
1637 	KERNEL_UNLOCK();
1638 
1639 	if_put(ifp);
1640 }
1641 
1642 /*
1643  * Map interface name to interface structure pointer.
1644  */
1645 struct ifnet *
1646 ifunit(const char *name)
1647 {
1648 	struct ifnet *ifp;
1649 
1650 	KERNEL_ASSERT_LOCKED();
1651 
1652 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1653 		if (strcmp(ifp->if_xname, name) == 0)
1654 			return (ifp);
1655 	}
1656 	return (NULL);
1657 }
1658 
1659 /*
1660  * Map interface index to interface structure pointer.
1661  */
1662 struct ifnet *
1663 if_get(unsigned int index)
1664 {
1665 	struct srp_ref sr;
1666 	struct if_map *if_map;
1667 	struct srp *map;
1668 	struct ifnet *ifp = NULL;
1669 
1670 	if_map = srp_enter(&sr, &if_idxmap.map);
1671 	if (index < if_map->limit) {
1672 		map = (struct srp *)(if_map + 1);
1673 
1674 		ifp = srp_follow(&sr, &map[index]);
1675 		if (ifp != NULL) {
1676 			KASSERT(ifp->if_index == index);
1677 			if_ref(ifp);
1678 		}
1679 	}
1680 	srp_leave(&sr);
1681 
1682 	return (ifp);
1683 }
1684 
1685 struct ifnet *
1686 if_ref(struct ifnet *ifp)
1687 {
1688 	refcnt_take(&ifp->if_refcnt);
1689 
1690 	return (ifp);
1691 }
1692 
1693 void
1694 if_put(struct ifnet *ifp)
1695 {
1696 	if (ifp == NULL)
1697 		return;
1698 
1699 	refcnt_rele_wake(&ifp->if_refcnt);
1700 }
1701 
1702 int
1703 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1704 {
1705 	if (ifp->if_sadl == NULL)
1706 		return (EINVAL);
1707 
1708 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1709 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1710 
1711 	return (0);
1712 }
1713 
1714 int
1715 if_createrdomain(int rdomain, struct ifnet *ifp)
1716 {
1717 	int error;
1718 	struct ifnet *loifp;
1719 	char loifname[IFNAMSIZ];
1720 	unsigned int unit = rdomain;
1721 
1722 	if (!rtable_exists(rdomain) && (error = rtable_add(rdomain)) != 0)
1723 		return (error);
1724 	if (!rtable_empty(rdomain))
1725 		return (EEXIST);
1726 
1727 	/* Create rdomain including its loopback if with unit == rdomain */
1728 	snprintf(loifname, sizeof(loifname), "lo%u", unit);
1729 	error = if_clone_create(loifname, 0);
1730 	if ((loifp = ifunit(loifname)) == NULL)
1731 		return (ENXIO);
1732 	if (error && (ifp != loifp || error != EEXIST))
1733 		return (error);
1734 
1735 	rtable_l2set(rdomain, rdomain, loifp->if_index);
1736 	loifp->if_rdomain = rdomain;
1737 
1738 	return (0);
1739 }
1740 
1741 int
1742 if_setrdomain(struct ifnet *ifp, int rdomain)
1743 {
1744 	struct ifreq ifr;
1745 	int error, up = 0, s;
1746 
1747 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1748 		return (EINVAL);
1749 
1750 	if (rdomain != ifp->if_rdomain &&
1751 	    (ifp->if_flags & IFF_LOOPBACK) &&
1752 	    (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1753 		return (EPERM);
1754 
1755 	if (!rtable_exists(rdomain))
1756 		return (ESRCH);
1757 
1758 	/* make sure that the routing table is a real rdomain */
1759 	if (rdomain != rtable_l2(rdomain))
1760 		return (EINVAL);
1761 
1762 	if (rdomain != ifp->if_rdomain) {
1763 		s = splnet();
1764 		/*
1765 		 * We are tearing down the world.
1766 		 * Take down the IF so:
1767 		 * 1. everything that cares gets a message
1768 		 * 2. the automagic IPv6 bits are recreated
1769 		 */
1770 		if (ifp->if_flags & IFF_UP) {
1771 			up = 1;
1772 			if_down(ifp);
1773 		}
1774 		rti_delete(ifp);
1775 #ifdef MROUTING
1776 		vif_delete(ifp);
1777 #endif
1778 		in_ifdetach(ifp);
1779 #ifdef INET6
1780 		in6_ifdetach(ifp);
1781 #endif
1782 		splx(s);
1783 	}
1784 
1785 	/* Let devices like enc(4) or mpe(4) know about the change */
1786 	ifr.ifr_rdomainid = rdomain;
1787 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
1788 	    (caddr_t)&ifr)) != ENOTTY)
1789 		return (error);
1790 	error = 0;
1791 
1792 	/* Add interface to the specified rdomain */
1793 	ifp->if_rdomain = rdomain;
1794 
1795 	/* If we took down the IF, bring it back */
1796 	if (up) {
1797 		s = splnet();
1798 		if_up(ifp);
1799 		splx(s);
1800 	}
1801 
1802 	return (0);
1803 }
1804 
1805 /*
1806  * Interface ioctls.
1807  */
1808 int
1809 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1810 {
1811 	struct ifnet *ifp;
1812 	struct ifreq *ifr = (struct ifreq *)data;
1813 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
1814 	struct if_afreq *ifar = (struct if_afreq *)data;
1815 	char ifdescrbuf[IFDESCRSIZE];
1816 	char ifrtlabelbuf[RTLABEL_LEN];
1817 	int s, error = 0, oif_xflags;
1818 	size_t bytesdone;
1819 	unsigned short oif_flags;
1820 
1821 	switch (cmd) {
1822 	case SIOCIFCREATE:
1823 		if ((error = suser(p)) != 0)
1824 			return (error);
1825 		error = if_clone_create(ifr->ifr_name, 0);
1826 		return (error);
1827 	case SIOCIFDESTROY:
1828 		if ((error = suser(p)) != 0)
1829 			return (error);
1830 		error = if_clone_destroy(ifr->ifr_name);
1831 		return (error);
1832 	case SIOCSIFGATTR:
1833 		if ((error = suser(p)) != 0)
1834 			return (error);
1835 		NET_LOCK();
1836 		error = if_setgroupattribs(data);
1837 		NET_UNLOCK();
1838 		return (error);
1839 	case SIOCGIFCONF:
1840 	case SIOCIFGCLONERS:
1841 	case SIOCGIFGMEMB:
1842 	case SIOCGIFGATTR:
1843 	case SIOCGIFGLIST:
1844 	case SIOCGIFFLAGS:
1845 	case SIOCGIFXFLAGS:
1846 	case SIOCGIFMETRIC:
1847 	case SIOCGIFMTU:
1848 	case SIOCGIFHARDMTU:
1849 	case SIOCGIFDATA:
1850 	case SIOCGIFDESCR:
1851 	case SIOCGIFRTLABEL:
1852 	case SIOCGIFPRIORITY:
1853 	case SIOCGIFRDOMAIN:
1854 	case SIOCGIFGROUP:
1855 	case SIOCGIFLLPRIO:
1856 		return (ifioctl_get(cmd, data));
1857 	}
1858 
1859 	ifp = ifunit(ifr->ifr_name);
1860 	if (ifp == NULL)
1861 		return (ENXIO);
1862 	oif_flags = ifp->if_flags;
1863 	oif_xflags = ifp->if_xflags;
1864 
1865 	switch (cmd) {
1866 	case SIOCIFAFATTACH:
1867 	case SIOCIFAFDETACH:
1868 		if ((error = suser(p)) != 0)
1869 			break;
1870 		NET_LOCK();
1871 		switch (ifar->ifar_af) {
1872 		case AF_INET:
1873 			/* attach is a noop for AF_INET */
1874 			if (cmd == SIOCIFAFDETACH)
1875 				in_ifdetach(ifp);
1876 			break;
1877 #ifdef INET6
1878 		case AF_INET6:
1879 			if (cmd == SIOCIFAFATTACH)
1880 				error = in6_ifattach(ifp);
1881 			else
1882 				in6_ifdetach(ifp);
1883 			break;
1884 #endif /* INET6 */
1885 		default:
1886 			error = EAFNOSUPPORT;
1887 		}
1888 		NET_UNLOCK();
1889 		break;
1890 
1891 	case SIOCSIFFLAGS:
1892 		if ((error = suser(p)) != 0)
1893 			break;
1894 
1895 		NET_LOCK();
1896 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1897 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
1898 
1899 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1900 		if (error != 0) {
1901 			ifp->if_flags = oif_flags;
1902 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
1903 			s = splnet();
1904 			if (ISSET(ifp->if_flags, IFF_UP))
1905 				if_up(ifp);
1906 			else
1907 				if_down(ifp);
1908 			splx(s);
1909 		}
1910 		NET_UNLOCK();
1911 		break;
1912 
1913 	case SIOCSIFXFLAGS:
1914 		if ((error = suser(p)) != 0)
1915 			break;
1916 
1917 		NET_LOCK();
1918 #ifdef INET6
1919 		if (ISSET(ifr->ifr_flags, IFXF_AUTOCONF6)) {
1920 			error = in6_ifattach(ifp);
1921 			if (error != 0) {
1922 				NET_UNLOCK();
1923 				break;
1924 			}
1925 		}
1926 
1927 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1928 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
1929 			ifp->if_xflags |= IFXF_INET6_NOSOII;
1930 
1931 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1932 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
1933 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
1934 
1935 #endif	/* INET6 */
1936 
1937 #ifdef MPLS
1938 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
1939 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
1940 			s = splnet();
1941 			ifp->if_xflags |= IFXF_MPLS;
1942 			ifp->if_ll_output = ifp->if_output;
1943 			ifp->if_output = mpls_output;
1944 			splx(s);
1945 		}
1946 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
1947 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
1948 			s = splnet();
1949 			ifp->if_xflags &= ~IFXF_MPLS;
1950 			ifp->if_output = ifp->if_ll_output;
1951 			ifp->if_ll_output = NULL;
1952 			splx(s);
1953 		}
1954 #endif	/* MPLS */
1955 
1956 #ifndef SMALL_KERNEL
1957 		if (ifp->if_capabilities & IFCAP_WOL) {
1958 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
1959 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
1960 				s = splnet();
1961 				ifp->if_xflags |= IFXF_WOL;
1962 				error = ifp->if_wol(ifp, 1);
1963 				splx(s);
1964 			}
1965 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
1966 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
1967 				s = splnet();
1968 				ifp->if_xflags &= ~IFXF_WOL;
1969 				error = ifp->if_wol(ifp, 0);
1970 				splx(s);
1971 			}
1972 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
1973 			ifr->ifr_flags &= ~IFXF_WOL;
1974 			error = ENOTSUP;
1975 		}
1976 #endif
1977 
1978 		if (error == 0)
1979 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
1980 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
1981 		NET_UNLOCK();
1982 		break;
1983 
1984 	case SIOCSIFMETRIC:
1985 		if ((error = suser(p)) != 0)
1986 			break;
1987 		NET_LOCK();
1988 		ifp->if_metric = ifr->ifr_metric;
1989 		NET_UNLOCK();
1990 		break;
1991 
1992 	case SIOCSIFMTU:
1993 		if ((error = suser(p)) != 0)
1994 			break;
1995 		NET_LOCK();
1996 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1997 		NET_UNLOCK();
1998 		if (!error)
1999 			rtm_ifchg(ifp);
2000 		break;
2001 
2002 	case SIOCSIFDESCR:
2003 		if ((error = suser(p)) != 0)
2004 			break;
2005 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2006 		    IFDESCRSIZE, &bytesdone);
2007 		if (error == 0) {
2008 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2009 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2010 		}
2011 		break;
2012 
2013 	case SIOCSIFRTLABEL:
2014 		if ((error = suser(p)) != 0)
2015 			break;
2016 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2017 		    RTLABEL_LEN, &bytesdone);
2018 		if (error == 0) {
2019 			rtlabel_unref(ifp->if_rtlabelid);
2020 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2021 		}
2022 		break;
2023 
2024 	case SIOCSIFPRIORITY:
2025 		if ((error = suser(p)) != 0)
2026 			break;
2027 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2028 			error = EINVAL;
2029 			break;
2030 		}
2031 		ifp->if_priority = ifr->ifr_metric;
2032 		break;
2033 
2034 	case SIOCSIFRDOMAIN:
2035 		if ((error = suser(p)) != 0)
2036 			break;
2037 		error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2038 		if (!error || error == EEXIST) {
2039 			NET_LOCK();
2040 			error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2041 			NET_UNLOCK();
2042 		}
2043 		break;
2044 
2045 	case SIOCAIFGROUP:
2046 		if ((error = suser(p)))
2047 			break;
2048 		NET_LOCK();
2049 		error = if_addgroup(ifp, ifgr->ifgr_group);
2050 		if (error == 0) {
2051 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2052 			if (error == ENOTTY)
2053 				error = 0;
2054 		}
2055 		NET_UNLOCK();
2056 		break;
2057 
2058 	case SIOCDIFGROUP:
2059 		if ((error = suser(p)))
2060 			break;
2061 		NET_LOCK();
2062 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2063 		if (error == ENOTTY)
2064 			error = 0;
2065 		if (error == 0)
2066 			error = if_delgroup(ifp, ifgr->ifgr_group);
2067 		NET_UNLOCK();
2068 		break;
2069 
2070 	case SIOCSIFLLADDR:
2071 		if ((error = suser(p)))
2072 			break;
2073 		if ((ifp->if_sadl == NULL) ||
2074 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2075 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2076 			error = EINVAL;
2077 			break;
2078 		}
2079 		NET_LOCK();
2080 		switch (ifp->if_type) {
2081 		case IFT_ETHER:
2082 		case IFT_CARP:
2083 		case IFT_XETHER:
2084 		case IFT_ISO88025:
2085 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2086 			if (error == ENOTTY)
2087 				error = 0;
2088 			if (error == 0)
2089 				error = if_setlladdr(ifp,
2090 				    ifr->ifr_addr.sa_data);
2091 			break;
2092 		default:
2093 			error = ENODEV;
2094 		}
2095 
2096 		if (error == 0)
2097 			ifnewlladdr(ifp);
2098 		NET_UNLOCK();
2099 		break;
2100 
2101 	case SIOCSIFLLPRIO:
2102 		if ((error = suser(p)))
2103 			break;
2104 		if (ifr->ifr_llprio < IFQ_MINPRIO ||
2105 		    ifr->ifr_llprio > IFQ_MAXPRIO) {
2106 			error = EINVAL;
2107 			break;
2108 		}
2109 		NET_LOCK();
2110 		ifp->if_llprio = ifr->ifr_llprio;
2111 		NET_UNLOCK();
2112 		break;
2113 
2114 	case SIOCGIFSFFPAGE:
2115 		error = suser(p);
2116 		if (error != 0)
2117 			break;
2118 
2119 		error = if_sffpage_check(data);
2120 		if (error != 0)
2121 			break;
2122 
2123 		/* don't take NET_LOCK because i2c reads take a long time */
2124 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2125 		break;
2126 
2127 	case SIOCSETKALIVE:
2128 	case SIOCDIFPHYADDR:
2129 	case SIOCSLIFPHYADDR:
2130 	case SIOCSLIFPHYRTABLE:
2131 	case SIOCSLIFPHYTTL:
2132 	case SIOCSLIFPHYDF:
2133 	case SIOCSLIFPHYECN:
2134 	case SIOCADDMULTI:
2135 	case SIOCDELMULTI:
2136 	case SIOCSIFMEDIA:
2137 	case SIOCSVNETID:
2138 	case SIOCDVNETID:
2139 	case SIOCSVNETFLOWID:
2140 	case SIOCSTXHPRIO:
2141 	case SIOCSRXHPRIO:
2142 	case SIOCSIFPAIR:
2143 	case SIOCSIFPARENT:
2144 	case SIOCDIFPARENT:
2145 	case SIOCSETMPWCFG:
2146 	case SIOCSETLABEL:
2147 	case SIOCDELLABEL:
2148 	case SIOCSPWE3CTRLWORD:
2149 	case SIOCSPWE3FAT:
2150 	case SIOCSPWE3NEIGHBOR:
2151 	case SIOCDPWE3NEIGHBOR:
2152 #if NBRIDGE > 0
2153 	case SIOCBRDGADD:
2154 	case SIOCBRDGDEL:
2155 	case SIOCBRDGSIFFLGS:
2156 	case SIOCBRDGSCACHE:
2157 	case SIOCBRDGADDS:
2158 	case SIOCBRDGDELS:
2159 	case SIOCBRDGSADDR:
2160 	case SIOCBRDGSTO:
2161 	case SIOCBRDGDADDR:
2162 	case SIOCBRDGFLUSH:
2163 	case SIOCBRDGADDL:
2164 	case SIOCBRDGSIFPROT:
2165 	case SIOCBRDGARL:
2166 	case SIOCBRDGFRL:
2167 	case SIOCBRDGSPRI:
2168 	case SIOCBRDGSHT:
2169 	case SIOCBRDGSFD:
2170 	case SIOCBRDGSMA:
2171 	case SIOCBRDGSIFPRIO:
2172 	case SIOCBRDGSIFCOST:
2173 	case SIOCBRDGSTXHC:
2174 	case SIOCBRDGSPROTO:
2175 	case SIOCSWSPORTNO:
2176 #endif
2177 		if ((error = suser(p)) != 0)
2178 			break;
2179 		/* FALLTHROUGH */
2180 	default:
2181 		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
2182 			(struct mbuf *) cmd, (struct mbuf *) data,
2183 			(struct mbuf *) ifp, p));
2184 		if (error != EOPNOTSUPP)
2185 			break;
2186 		switch (cmd) {
2187 		case SIOCAIFADDR:
2188 		case SIOCDIFADDR:
2189 		case SIOCSIFADDR:
2190 		case SIOCSIFNETMASK:
2191 		case SIOCSIFDSTADDR:
2192 		case SIOCSIFBRDADDR:
2193 #ifdef INET6
2194 		case SIOCAIFADDR_IN6:
2195 		case SIOCDIFADDR_IN6:
2196 #endif
2197 			error = suser(p);
2198 			break;
2199 		default:
2200 			error = 0;
2201 			break;
2202 		}
2203 		if (error)
2204 			break;
2205 		NET_LOCK();
2206 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2207 		NET_UNLOCK();
2208 		break;
2209 	}
2210 
2211 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags)
2212 		rtm_ifchg(ifp);
2213 
2214 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2215 		getmicrotime(&ifp->if_lastchange);
2216 
2217 	return (error);
2218 }
2219 
2220 int
2221 ifioctl_get(u_long cmd, caddr_t data)
2222 {
2223 	struct ifnet *ifp;
2224 	struct ifreq *ifr = (struct ifreq *)data;
2225 	char ifdescrbuf[IFDESCRSIZE];
2226 	char ifrtlabelbuf[RTLABEL_LEN];
2227 	int error = 0;
2228 	size_t bytesdone;
2229 	const char *label;
2230 
2231 	switch(cmd) {
2232 	case SIOCGIFCONF:
2233 		NET_RLOCK_IN_IOCTL();
2234 		error = ifconf(data);
2235 		NET_RUNLOCK_IN_IOCTL();
2236 		return (error);
2237 	case SIOCIFGCLONERS:
2238 		error = if_clone_list((struct if_clonereq *)data);
2239 		return (error);
2240 	case SIOCGIFGMEMB:
2241 		NET_RLOCK_IN_IOCTL();
2242 		error = if_getgroupmembers(data);
2243 		NET_RUNLOCK_IN_IOCTL();
2244 		return (error);
2245 	case SIOCGIFGATTR:
2246 		NET_RLOCK_IN_IOCTL();
2247 		error = if_getgroupattribs(data);
2248 		NET_RUNLOCK_IN_IOCTL();
2249 		return (error);
2250 	case SIOCGIFGLIST:
2251 		NET_RLOCK_IN_IOCTL();
2252 		error = if_getgrouplist(data);
2253 		NET_RUNLOCK_IN_IOCTL();
2254 		return (error);
2255 	}
2256 
2257 	ifp = ifunit(ifr->ifr_name);
2258 	if (ifp == NULL)
2259 		return (ENXIO);
2260 
2261 	NET_RLOCK_IN_IOCTL();
2262 
2263 	switch(cmd) {
2264 	case SIOCGIFFLAGS:
2265 		ifr->ifr_flags = ifp->if_flags;
2266 		if (ifq_is_oactive(&ifp->if_snd))
2267 			ifr->ifr_flags |= IFF_OACTIVE;
2268 		break;
2269 
2270 	case SIOCGIFXFLAGS:
2271 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2272 		break;
2273 
2274 	case SIOCGIFMETRIC:
2275 		ifr->ifr_metric = ifp->if_metric;
2276 		break;
2277 
2278 	case SIOCGIFMTU:
2279 		ifr->ifr_mtu = ifp->if_mtu;
2280 		break;
2281 
2282 	case SIOCGIFHARDMTU:
2283 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2284 		break;
2285 
2286 	case SIOCGIFDATA: {
2287 		struct if_data ifdata;
2288 		if_getdata(ifp, &ifdata);
2289 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2290 		break;
2291 	}
2292 
2293 	case SIOCGIFDESCR:
2294 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2295 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2296 		    &bytesdone);
2297 		break;
2298 
2299 	case SIOCGIFRTLABEL:
2300 		if (ifp->if_rtlabelid &&
2301 		    (label = rtlabel_id2name(ifp->if_rtlabelid)) != NULL) {
2302 			strlcpy(ifrtlabelbuf, label, RTLABEL_LEN);
2303 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2304 			    RTLABEL_LEN, &bytesdone);
2305 		} else
2306 			error = ENOENT;
2307 		break;
2308 
2309 	case SIOCGIFPRIORITY:
2310 		ifr->ifr_metric = ifp->if_priority;
2311 		break;
2312 
2313 	case SIOCGIFRDOMAIN:
2314 		ifr->ifr_rdomainid = ifp->if_rdomain;
2315 		break;
2316 
2317 	case SIOCGIFGROUP:
2318 		error = if_getgroup(data, ifp);
2319 		break;
2320 
2321 	case SIOCGIFLLPRIO:
2322 		ifr->ifr_llprio = ifp->if_llprio;
2323 		break;
2324 
2325 	default:
2326 		panic("invalid ioctl %lu", cmd);
2327 	}
2328 
2329 	NET_RUNLOCK_IN_IOCTL();
2330 
2331 	return (error);
2332 }
2333 
2334 static int
2335 if_sffpage_check(const caddr_t data)
2336 {
2337 	const struct if_sffpage *sff = (const struct if_sffpage *)data;
2338 
2339 	switch (sff->sff_addr) {
2340 	case IFSFF_ADDR_EEPROM:
2341 	case IFSFF_ADDR_DDM:
2342 		break;
2343 	default:
2344 		return (EINVAL);
2345 	}
2346 
2347 	return (0);
2348 }
2349 
2350 int
2351 if_txhprio_l2_check(int hdrprio)
2352 {
2353 	switch (hdrprio) {
2354 	case IF_HDRPRIO_PACKET:
2355 		return (0);
2356 	default:
2357 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2358 			return (0);
2359 		break;
2360 	}
2361 
2362 	return (EINVAL);
2363 }
2364 
2365 int
2366 if_txhprio_l3_check(int hdrprio)
2367 {
2368 	switch (hdrprio) {
2369 	case IF_HDRPRIO_PACKET:
2370 	case IF_HDRPRIO_PAYLOAD:
2371 		return (0);
2372 	default:
2373 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2374 			return (0);
2375 		break;
2376 	}
2377 
2378 	return (EINVAL);
2379 }
2380 
2381 int
2382 if_rxhprio_l2_check(int hdrprio)
2383 {
2384 	switch (hdrprio) {
2385 	case IF_HDRPRIO_PACKET:
2386 	case IF_HDRPRIO_OUTER:
2387 		return (0);
2388 	default:
2389 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2390 			return (0);
2391 		break;
2392 	}
2393 
2394 	return (EINVAL);
2395 }
2396 
2397 int
2398 if_rxhprio_l3_check(int hdrprio)
2399 {
2400 	switch (hdrprio) {
2401 	case IF_HDRPRIO_PACKET:
2402 	case IF_HDRPRIO_PAYLOAD:
2403 	case IF_HDRPRIO_OUTER:
2404 		return (0);
2405 	default:
2406 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2407 			return (0);
2408 		break;
2409 	}
2410 
2411 	return (EINVAL);
2412 }
2413 
2414 /*
2415  * Return interface configuration
2416  * of system.  List may be used
2417  * in later ioctl's (above) to get
2418  * other information.
2419  */
2420 int
2421 ifconf(caddr_t data)
2422 {
2423 	struct ifconf *ifc = (struct ifconf *)data;
2424 	struct ifnet *ifp;
2425 	struct ifaddr *ifa;
2426 	struct ifreq ifr, *ifrp;
2427 	int space = ifc->ifc_len, error = 0;
2428 
2429 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2430 	if (space == 0) {
2431 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
2432 			struct sockaddr *sa;
2433 
2434 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2435 				space += sizeof (ifr);
2436 			else
2437 				TAILQ_FOREACH(ifa,
2438 				    &ifp->if_addrlist, ifa_list) {
2439 					sa = ifa->ifa_addr;
2440 					if (sa->sa_len > sizeof(*sa))
2441 						space += sa->sa_len -
2442 						    sizeof(*sa);
2443 					space += sizeof(ifr);
2444 				}
2445 		}
2446 		ifc->ifc_len = space;
2447 		return (0);
2448 	}
2449 
2450 	ifrp = ifc->ifc_req;
2451 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2452 		if (space < sizeof(ifr))
2453 			break;
2454 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2455 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2456 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2457 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2458 			    sizeof(ifr));
2459 			if (error)
2460 				break;
2461 			space -= sizeof (ifr), ifrp++;
2462 		} else
2463 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2464 				struct sockaddr *sa = ifa->ifa_addr;
2465 
2466 				if (space < sizeof(ifr))
2467 					break;
2468 				if (sa->sa_len <= sizeof(*sa)) {
2469 					ifr.ifr_addr = *sa;
2470 					error = copyout((caddr_t)&ifr,
2471 					    (caddr_t)ifrp, sizeof (ifr));
2472 					ifrp++;
2473 				} else {
2474 					space -= sa->sa_len - sizeof(*sa);
2475 					if (space < sizeof (ifr))
2476 						break;
2477 					error = copyout((caddr_t)&ifr,
2478 					    (caddr_t)ifrp,
2479 					    sizeof(ifr.ifr_name));
2480 					if (error == 0)
2481 						error = copyout((caddr_t)sa,
2482 						    (caddr_t)&ifrp->ifr_addr,
2483 						    sa->sa_len);
2484 					ifrp = (struct ifreq *)(sa->sa_len +
2485 					    (caddr_t)&ifrp->ifr_addr);
2486 				}
2487 				if (error)
2488 					break;
2489 				space -= sizeof (ifr);
2490 			}
2491 	}
2492 	ifc->ifc_len -= space;
2493 	return (error);
2494 }
2495 
2496 void
2497 if_counters_alloc(struct ifnet *ifp)
2498 {
2499 	KASSERT(ifp->if_counters == NULL);
2500 
2501 	ifp->if_counters = counters_alloc(ifc_ncounters);
2502 }
2503 
2504 void
2505 if_counters_free(struct ifnet *ifp)
2506 {
2507 	KASSERT(ifp->if_counters != NULL);
2508 
2509 	counters_free(ifp->if_counters, ifc_ncounters);
2510 	ifp->if_counters = NULL;
2511 }
2512 
2513 void
2514 if_getdata(struct ifnet *ifp, struct if_data *data)
2515 {
2516 	unsigned int i;
2517 
2518 	*data = ifp->if_data;
2519 
2520 	if (ifp->if_counters != NULL) {
2521 		uint64_t counters[ifc_ncounters];
2522 
2523 		counters_read(ifp->if_counters, counters, nitems(counters));
2524 
2525 		data->ifi_ipackets += counters[ifc_ipackets];
2526 		data->ifi_ierrors += counters[ifc_ierrors];
2527 		data->ifi_opackets += counters[ifc_opackets];
2528 		data->ifi_oerrors += counters[ifc_oerrors];
2529 		data->ifi_collisions += counters[ifc_collisions];
2530 		data->ifi_ibytes += counters[ifc_ibytes];
2531 		data->ifi_obytes += counters[ifc_obytes];
2532 		data->ifi_imcasts += counters[ifc_imcasts];
2533 		data->ifi_omcasts += counters[ifc_omcasts];
2534 		data->ifi_iqdrops += counters[ifc_iqdrops];
2535 		data->ifi_oqdrops += counters[ifc_oqdrops];
2536 		data->ifi_noproto += counters[ifc_noproto];
2537 	}
2538 
2539 	for (i = 0; i < ifp->if_nifqs; i++) {
2540 		struct ifqueue *ifq = ifp->if_ifqs[i];
2541 
2542 		ifq_add_data(ifq, data);
2543 	}
2544 
2545 	for (i = 0; i < ifp->if_niqs; i++) {
2546 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2547 
2548 		ifiq_add_data(ifiq, data);
2549 	}
2550 }
2551 
2552 /*
2553  * Dummy functions replaced in ifnet during detach (if protocols decide to
2554  * fiddle with the if during detach.
2555  */
2556 void
2557 if_detached_qstart(struct ifqueue *ifq)
2558 {
2559 	ifq_purge(ifq);
2560 }
2561 
2562 int
2563 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2564 {
2565 	return ENODEV;
2566 }
2567 
2568 /*
2569  * Create interface group without members
2570  */
2571 struct ifg_group *
2572 if_creategroup(const char *groupname)
2573 {
2574 	struct ifg_group	*ifg;
2575 
2576 	if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
2577 		return (NULL);
2578 
2579 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2580 	ifg->ifg_refcnt = 0;
2581 	ifg->ifg_carp_demoted = 0;
2582 	TAILQ_INIT(&ifg->ifg_members);
2583 #if NPF > 0
2584 	pfi_attach_ifgroup(ifg);
2585 #endif
2586 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2587 
2588 	return (ifg);
2589 }
2590 
2591 /*
2592  * Add a group to an interface
2593  */
2594 int
2595 if_addgroup(struct ifnet *ifp, const char *groupname)
2596 {
2597 	struct ifg_list		*ifgl;
2598 	struct ifg_group	*ifg = NULL;
2599 	struct ifg_member	*ifgm;
2600 
2601 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
2602 	    groupname[strlen(groupname) - 1] <= '9')
2603 		return (EINVAL);
2604 
2605 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2606 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2607 			return (EEXIST);
2608 
2609 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
2610 		return (ENOMEM);
2611 
2612 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
2613 		free(ifgl, M_TEMP, sizeof(*ifgl));
2614 		return (ENOMEM);
2615 	}
2616 
2617 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2618 		if (!strcmp(ifg->ifg_group, groupname))
2619 			break;
2620 
2621 	if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
2622 		free(ifgl, M_TEMP, sizeof(*ifgl));
2623 		free(ifgm, M_TEMP, sizeof(*ifgm));
2624 		return (ENOMEM);
2625 	}
2626 
2627 	ifg->ifg_refcnt++;
2628 	ifgl->ifgl_group = ifg;
2629 	ifgm->ifgm_ifp = ifp;
2630 
2631 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2632 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2633 
2634 #if NPF > 0
2635 	pfi_group_addmember(groupname, ifp);
2636 #endif
2637 
2638 	return (0);
2639 }
2640 
2641 /*
2642  * Remove a group from an interface
2643  */
2644 int
2645 if_delgroup(struct ifnet *ifp, const char *groupname)
2646 {
2647 	struct ifg_list		*ifgl;
2648 	struct ifg_member	*ifgm;
2649 
2650 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2651 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2652 			break;
2653 	if (ifgl == NULL)
2654 		return (ENOENT);
2655 
2656 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2657 
2658 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2659 		if (ifgm->ifgm_ifp == ifp)
2660 			break;
2661 
2662 	if (ifgm != NULL) {
2663 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2664 		free(ifgm, M_TEMP, sizeof(*ifgm));
2665 	}
2666 
2667 #if NPF > 0
2668 	pfi_group_change(groupname);
2669 #endif
2670 
2671 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2672 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2673 #if NPF > 0
2674 		pfi_detach_ifgroup(ifgl->ifgl_group);
2675 #endif
2676 		free(ifgl->ifgl_group, M_TEMP, sizeof(*ifgl->ifgl_group));
2677 	}
2678 
2679 	free(ifgl, M_TEMP, sizeof(*ifgl));
2680 
2681 	return (0);
2682 }
2683 
2684 /*
2685  * Stores all groups from an interface in memory pointed
2686  * to by data
2687  */
2688 int
2689 if_getgroup(caddr_t data, struct ifnet *ifp)
2690 {
2691 	int			 len, error;
2692 	struct ifg_list		*ifgl;
2693 	struct ifg_req		 ifgrq, *ifgp;
2694 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2695 
2696 	if (ifgr->ifgr_len == 0) {
2697 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2698 			ifgr->ifgr_len += sizeof(struct ifg_req);
2699 		return (0);
2700 	}
2701 
2702 	len = ifgr->ifgr_len;
2703 	ifgp = ifgr->ifgr_groups;
2704 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2705 		if (len < sizeof(ifgrq))
2706 			return (EINVAL);
2707 		bzero(&ifgrq, sizeof ifgrq);
2708 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2709 		    sizeof(ifgrq.ifgrq_group));
2710 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2711 		    sizeof(struct ifg_req))))
2712 			return (error);
2713 		len -= sizeof(ifgrq);
2714 		ifgp++;
2715 	}
2716 
2717 	return (0);
2718 }
2719 
2720 /*
2721  * Stores all members of a group in memory pointed to by data
2722  */
2723 int
2724 if_getgroupmembers(caddr_t data)
2725 {
2726 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2727 	struct ifg_group	*ifg;
2728 	struct ifg_member	*ifgm;
2729 	struct ifg_req		 ifgrq, *ifgp;
2730 	int			 len, error;
2731 
2732 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2733 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2734 			break;
2735 	if (ifg == NULL)
2736 		return (ENOENT);
2737 
2738 	if (ifgr->ifgr_len == 0) {
2739 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2740 			ifgr->ifgr_len += sizeof(ifgrq);
2741 		return (0);
2742 	}
2743 
2744 	len = ifgr->ifgr_len;
2745 	ifgp = ifgr->ifgr_groups;
2746 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
2747 		if (len < sizeof(ifgrq))
2748 			return (EINVAL);
2749 		bzero(&ifgrq, sizeof ifgrq);
2750 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
2751 		    sizeof(ifgrq.ifgrq_member));
2752 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2753 		    sizeof(struct ifg_req))))
2754 			return (error);
2755 		len -= sizeof(ifgrq);
2756 		ifgp++;
2757 	}
2758 
2759 	return (0);
2760 }
2761 
2762 int
2763 if_getgroupattribs(caddr_t data)
2764 {
2765 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2766 	struct ifg_group	*ifg;
2767 
2768 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2769 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2770 			break;
2771 	if (ifg == NULL)
2772 		return (ENOENT);
2773 
2774 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
2775 
2776 	return (0);
2777 }
2778 
2779 int
2780 if_setgroupattribs(caddr_t data)
2781 {
2782 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2783 	struct ifg_group	*ifg;
2784 	struct ifg_member	*ifgm;
2785 	int			 demote;
2786 
2787 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2788 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2789 			break;
2790 	if (ifg == NULL)
2791 		return (ENOENT);
2792 
2793 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
2794 	if (demote + ifg->ifg_carp_demoted > 0xff ||
2795 	    demote + ifg->ifg_carp_demoted < 0)
2796 		return (EINVAL);
2797 
2798 	ifg->ifg_carp_demoted += demote;
2799 
2800 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2801 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
2802 
2803 	return (0);
2804 }
2805 
2806 /*
2807  * Stores all groups in memory pointed to by data
2808  */
2809 int
2810 if_getgrouplist(caddr_t data)
2811 {
2812 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2813 	struct ifg_group	*ifg;
2814 	struct ifg_req		 ifgrq, *ifgp;
2815 	int			 len, error;
2816 
2817 	if (ifgr->ifgr_len == 0) {
2818 		TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2819 			ifgr->ifgr_len += sizeof(ifgrq);
2820 		return (0);
2821 	}
2822 
2823 	len = ifgr->ifgr_len;
2824 	ifgp = ifgr->ifgr_groups;
2825 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
2826 		if (len < sizeof(ifgrq))
2827 			return (EINVAL);
2828 		bzero(&ifgrq, sizeof ifgrq);
2829 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
2830 		    sizeof(ifgrq.ifgrq_group));
2831 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2832 		    sizeof(struct ifg_req))))
2833 			return (error);
2834 		len -= sizeof(ifgrq);
2835 		ifgp++;
2836 	}
2837 
2838 	return (0);
2839 }
2840 
2841 void
2842 if_group_routechange(struct sockaddr *dst, struct sockaddr *mask)
2843 {
2844 	switch (dst->sa_family) {
2845 	case AF_INET:
2846 		if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
2847 		    mask && (mask->sa_len == 0 ||
2848 		    satosin(mask)->sin_addr.s_addr == INADDR_ANY))
2849 			if_group_egress_build();
2850 		break;
2851 #ifdef INET6
2852 	case AF_INET6:
2853 		if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
2854 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
2855 		    IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
2856 		    &in6addr_any)))
2857 			if_group_egress_build();
2858 		break;
2859 #endif
2860 	}
2861 }
2862 
2863 int
2864 if_group_egress_build(void)
2865 {
2866 	struct ifnet		*ifp;
2867 	struct ifg_group	*ifg;
2868 	struct ifg_member	*ifgm, *next;
2869 	struct sockaddr_in	 sa_in;
2870 #ifdef INET6
2871 	struct sockaddr_in6	 sa_in6;
2872 #endif
2873 	struct rtentry		*rt;
2874 
2875 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2876 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
2877 			break;
2878 
2879 	if (ifg != NULL)
2880 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
2881 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
2882 
2883 	bzero(&sa_in, sizeof(sa_in));
2884 	sa_in.sin_len = sizeof(sa_in);
2885 	sa_in.sin_family = AF_INET;
2886 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
2887 	while (rt != NULL) {
2888 		ifp = if_get(rt->rt_ifidx);
2889 		if (ifp != NULL) {
2890 			if_addgroup(ifp, IFG_EGRESS);
2891 			if_put(ifp);
2892 		}
2893 		rt = rtable_iterate(rt);
2894 	}
2895 
2896 #ifdef INET6
2897 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
2898 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
2899 	    RTP_ANY);
2900 	while (rt != NULL) {
2901 		ifp = if_get(rt->rt_ifidx);
2902 		if (ifp != NULL) {
2903 			if_addgroup(ifp, IFG_EGRESS);
2904 			if_put(ifp);
2905 		}
2906 		rt = rtable_iterate(rt);
2907 	}
2908 #endif /* INET6 */
2909 
2910 	return (0);
2911 }
2912 
2913 /*
2914  * Set/clear promiscuous mode on interface ifp based on the truth value
2915  * of pswitch.  The calls are reference counted so that only the first
2916  * "on" request actually has an effect, as does the final "off" request.
2917  * Results are undefined if the "off" and "on" requests are not matched.
2918  */
2919 int
2920 ifpromisc(struct ifnet *ifp, int pswitch)
2921 {
2922 	struct ifreq ifr;
2923 	unsigned short oif_flags;
2924 	int oif_pcount, error;
2925 
2926 	NET_ASSERT_LOCKED(); /* modifying if_flags and if_pcount */
2927 
2928 	oif_flags = ifp->if_flags;
2929 	oif_pcount = ifp->if_pcount;
2930 	if (pswitch) {
2931 		if (ifp->if_pcount++ != 0)
2932 			return (0);
2933 		ifp->if_flags |= IFF_PROMISC;
2934 	} else {
2935 		if (--ifp->if_pcount > 0)
2936 			return (0);
2937 		ifp->if_flags &= ~IFF_PROMISC;
2938 	}
2939 
2940 	if ((ifp->if_flags & IFF_UP) == 0)
2941 		return (0);
2942 
2943 	memset(&ifr, 0, sizeof(ifr));
2944 	ifr.ifr_flags = ifp->if_flags;
2945 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
2946 	if (error) {
2947 		ifp->if_flags = oif_flags;
2948 		ifp->if_pcount = oif_pcount;
2949 	}
2950 
2951 	return (error);
2952 }
2953 
2954 void
2955 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
2956 {
2957 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
2958 }
2959 
2960 void
2961 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
2962 {
2963 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
2964 }
2965 
2966 void
2967 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
2968 {
2969 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
2970 		panic("ifa_update_broadaddr does not support dynamic length");
2971 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
2972 }
2973 
2974 #ifdef DDB
2975 /* debug function, can be called from ddb> */
2976 void
2977 ifa_print_all(void)
2978 {
2979 	struct ifnet *ifp;
2980 	struct ifaddr *ifa;
2981 
2982 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2983 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2984 			char addr[INET6_ADDRSTRLEN];
2985 
2986 			switch (ifa->ifa_addr->sa_family) {
2987 			case AF_INET:
2988 				printf("%s", inet_ntop(AF_INET,
2989 				    &satosin(ifa->ifa_addr)->sin_addr,
2990 				    addr, sizeof(addr)));
2991 				break;
2992 #ifdef INET6
2993 			case AF_INET6:
2994 				printf("%s", inet_ntop(AF_INET6,
2995 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
2996 				    addr, sizeof(addr)));
2997 				break;
2998 #endif
2999 			}
3000 			printf(" on %s\n", ifp->if_xname);
3001 		}
3002 	}
3003 }
3004 #endif /* DDB */
3005 
3006 void
3007 ifnewlladdr(struct ifnet *ifp)
3008 {
3009 #ifdef INET6
3010 	struct ifaddr *ifa;
3011 #endif
3012 	struct ifreq ifrq;
3013 	short up;
3014 	int s;
3015 
3016 	s = splnet();
3017 	up = ifp->if_flags & IFF_UP;
3018 
3019 	if (up) {
3020 		/* go down for a moment... */
3021 		ifp->if_flags &= ~IFF_UP;
3022 		ifrq.ifr_flags = ifp->if_flags;
3023 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3024 	}
3025 
3026 	ifp->if_flags |= IFF_UP;
3027 	ifrq.ifr_flags = ifp->if_flags;
3028 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3029 
3030 #ifdef INET6
3031 	/*
3032 	 * Update the link-local address.  Don't do it if we're
3033 	 * a router to avoid confusing hosts on the network.
3034 	 */
3035 	if (!ip6_forwarding) {
3036 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
3037 		if (ifa) {
3038 			in6_purgeaddr(ifa);
3039 			if_hooks_run(&ifp->if_addrhooks);
3040 			in6_ifattach(ifp);
3041 		}
3042 	}
3043 #endif
3044 	if (!up) {
3045 		/* go back down */
3046 		ifp->if_flags &= ~IFF_UP;
3047 		ifrq.ifr_flags = ifp->if_flags;
3048 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3049 	}
3050 	splx(s);
3051 }
3052 
3053 void
3054 if_addrhook_add(struct ifnet *ifp, struct task *t)
3055 {
3056 	mtx_enter(&if_hooks_mtx);
3057 	TAILQ_INSERT_TAIL(&ifp->if_addrhooks, t, t_entry);
3058 	mtx_leave(&if_hooks_mtx);
3059 }
3060 
3061 void
3062 if_addrhook_del(struct ifnet *ifp, struct task *t)
3063 {
3064 	mtx_enter(&if_hooks_mtx);
3065 	TAILQ_REMOVE(&ifp->if_addrhooks, t, t_entry);
3066 	mtx_leave(&if_hooks_mtx);
3067 }
3068 
3069 void
3070 if_addrhooks_run(struct ifnet *ifp)
3071 {
3072 	if_hooks_run(&ifp->if_addrhooks);
3073 }
3074 
3075 void
3076 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
3077 {
3078 	extern int ticks;
3079 
3080 	memset(rxr, 0, sizeof(*rxr));
3081 
3082 	rxr->rxr_adjusted = ticks;
3083 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
3084 	rxr->rxr_hwm = hwm;
3085 }
3086 
3087 static inline void
3088 if_rxr_adjust_cwm(struct if_rxring *rxr)
3089 {
3090 	extern int ticks;
3091 
3092 	if (rxr->rxr_alive >= rxr->rxr_lwm)
3093 		return;
3094 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
3095 		rxr->rxr_cwm++;
3096 
3097 	rxr->rxr_adjusted = ticks;
3098 }
3099 
3100 void
3101 if_rxr_livelocked(struct if_rxring *rxr)
3102 {
3103 	extern int ticks;
3104 
3105 	if (ticks - rxr->rxr_adjusted >= 1) {
3106 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3107 			rxr->rxr_cwm--;
3108 
3109 		rxr->rxr_adjusted = ticks;
3110 	}
3111 }
3112 
3113 u_int
3114 if_rxr_get(struct if_rxring *rxr, u_int max)
3115 {
3116 	extern int ticks;
3117 	u_int diff;
3118 
3119 	if (ticks - rxr->rxr_adjusted >= 1) {
3120 		/* we're free to try for an adjustment */
3121 		if_rxr_adjust_cwm(rxr);
3122 	}
3123 
3124 	if (rxr->rxr_alive >= rxr->rxr_cwm)
3125 		return (0);
3126 
3127 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
3128 	rxr->rxr_alive += diff;
3129 
3130 	return (diff);
3131 }
3132 
3133 int
3134 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
3135 {
3136 	struct if_rxrinfo kifri;
3137 	int error;
3138 	u_int n;
3139 
3140 	error = copyin(uifri, &kifri, sizeof(kifri));
3141 	if (error)
3142 		return (error);
3143 
3144 	n = min(t, kifri.ifri_total);
3145 	kifri.ifri_total = t;
3146 
3147 	if (n > 0) {
3148 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
3149 		if (error)
3150 			return (error);
3151 	}
3152 
3153 	return (copyout(&kifri, uifri, sizeof(kifri)));
3154 }
3155 
3156 int
3157 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
3158     struct if_rxring *rxr)
3159 {
3160 	struct if_rxring_info ifr;
3161 
3162 	memset(&ifr, 0, sizeof(ifr));
3163 
3164 	if (name != NULL)
3165 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3166 
3167 	ifr.ifr_size = size;
3168 	ifr.ifr_info = *rxr;
3169 
3170 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
3171 }
3172 
3173 /*
3174  * Network stack input queues.
3175  */
3176 
3177 void
3178 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3179 {
3180 	mq_init(&niq->ni_q, maxlen, IPL_NET);
3181 	niq->ni_isr = isr;
3182 }
3183 
3184 int
3185 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3186 {
3187 	int rv;
3188 
3189 	rv = mq_enqueue(&niq->ni_q, m);
3190 	if (rv == 0)
3191 		schednetisr(niq->ni_isr);
3192 	else
3193 		if_congestion();
3194 
3195 	return (rv);
3196 }
3197 
3198 int
3199 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3200 {
3201 	int rv;
3202 
3203 	rv = mq_enlist(&niq->ni_q, ml);
3204 	if (rv == 0)
3205 		schednetisr(niq->ni_isr);
3206 	else
3207 		if_congestion();
3208 
3209 	return (rv);
3210 }
3211 
3212 __dead void
3213 unhandled_af(int af)
3214 {
3215 	panic("unhandled af %d", af);
3216 }
3217 
3218 /*
3219  * XXXSMP This tunable is here to work around the fact that IPsec
3220  * globals aren't ready to be accessed by multiple threads in
3221  * parallel.
3222  */
3223 int		 nettaskqs = NET_TASKQ;
3224 
3225 struct taskq *
3226 net_tq(unsigned int ifindex)
3227 {
3228 	struct taskq *t = NULL;
3229 
3230 	t = nettqmp[ifindex % nettaskqs];
3231 
3232 	return (t);
3233 }
3234