xref: /openbsd-src/sys/net/if.c (revision 7350f337b9e3eb4461d99580e625c7ef148d107c)
1 /*	$OpenBSD: if.c,v 1.585 2019/06/15 17:05:21 mpi Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "pfsync.h"
70 #include "ppp.h"
71 #include "pppoe.h"
72 #include "switch.h"
73 #include "trunk.h"
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/mbuf.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/timeout.h>
81 #include <sys/protosw.h>
82 #include <sys/kernel.h>
83 #include <sys/ioctl.h>
84 #include <sys/domain.h>
85 #include <sys/task.h>
86 #include <sys/atomic.h>
87 #include <sys/percpu.h>
88 #include <sys/proc.h>
89 
90 #include <dev/rndvar.h>
91 
92 #include <net/if.h>
93 #include <net/if_dl.h>
94 #include <net/if_types.h>
95 #include <net/route.h>
96 #include <net/netisr.h>
97 
98 #include <netinet/in.h>
99 #include <netinet/if_ether.h>
100 #include <netinet/igmp.h>
101 #ifdef MROUTING
102 #include <netinet/ip_mroute.h>
103 #endif
104 
105 #ifdef INET6
106 #include <netinet6/in6_var.h>
107 #include <netinet6/in6_ifattach.h>
108 #include <netinet6/nd6.h>
109 #include <netinet/ip6.h>
110 #include <netinet6/ip6_var.h>
111 #endif
112 
113 #ifdef MPLS
114 #include <netmpls/mpls.h>
115 #endif
116 
117 #if NBPFILTER > 0
118 #include <net/bpf.h>
119 #endif
120 
121 #if NBRIDGE > 0
122 #include <net/if_bridge.h>
123 #endif
124 
125 #if NCARP > 0
126 #include <netinet/ip_carp.h>
127 #endif
128 
129 #if NPF > 0
130 #include <net/pfvar.h>
131 #endif
132 
133 #include <sys/device.h>
134 
135 void	if_attachsetup(struct ifnet *);
136 void	if_attachdomain(struct ifnet *);
137 void	if_attach_common(struct ifnet *);
138 int	if_createrdomain(int, struct ifnet *);
139 int	if_setrdomain(struct ifnet *, int);
140 void	if_slowtimo(void *);
141 
142 void	if_detached_qstart(struct ifqueue *);
143 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
144 
145 int	ifioctl_get(u_long, caddr_t);
146 int	ifconf(caddr_t);
147 static int
148 	if_sffpage_check(const caddr_t);
149 
150 int	if_getgroup(caddr_t, struct ifnet *);
151 int	if_getgroupmembers(caddr_t);
152 int	if_getgroupattribs(caddr_t);
153 int	if_setgroupattribs(caddr_t);
154 int	if_getgrouplist(caddr_t);
155 
156 void	if_linkstate(struct ifnet *);
157 void	if_linkstate_task(void *);
158 
159 int	if_clone_list(struct if_clonereq *);
160 struct if_clone	*if_clone_lookup(const char *, int *);
161 
162 int	if_group_egress_build(void);
163 
164 void	if_watchdog_task(void *);
165 
166 void	if_netisr(void *);
167 
168 #ifdef DDB
169 void	ifa_print_all(void);
170 #endif
171 
172 void	if_qstart_compat(struct ifqueue *);
173 
174 /*
175  * interface index map
176  *
177  * the kernel maintains a mapping of interface indexes to struct ifnet
178  * pointers.
179  *
180  * the map is an array of struct ifnet pointers prefixed by an if_map
181  * structure. the if_map structure stores the length of its array.
182  *
183  * as interfaces are attached to the system, the map is grown on demand
184  * up to USHRT_MAX entries.
185  *
186  * interface index 0 is reserved and represents no interface. this
187  * supports the use of the interface index as the scope for IPv6 link
188  * local addresses, where scope 0 means no scope has been specified.
189  * it also supports the use of interface index as the unique identifier
190  * for network interfaces in SNMP applications as per RFC2863. therefore
191  * if_get(0) returns NULL.
192  */
193 
194 void if_ifp_dtor(void *, void *);
195 void if_map_dtor(void *, void *);
196 struct ifnet *if_ref(struct ifnet *);
197 
198 /*
199  * struct if_map
200  *
201  * bounded array of ifnet srp pointers used to fetch references of live
202  * interfaces with if_get().
203  */
204 
205 struct if_map {
206 	unsigned long		 limit;
207 	/* followed by limit ifnet srp pointers */
208 };
209 
210 /*
211  * struct if_idxmap
212  *
213  * infrastructure to manage updates and accesses to the current if_map.
214  */
215 
216 struct if_idxmap {
217 	unsigned int		 serial;
218 	unsigned int		 count;
219 	struct srp		 map;
220 };
221 
222 void	if_idxmap_init(unsigned int);
223 void	if_idxmap_insert(struct ifnet *);
224 void	if_idxmap_remove(struct ifnet *);
225 
226 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
227 
228 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
229 int if_cloners_count;
230 
231 struct timeout net_tick_to;
232 void	net_tick(void *);
233 int	net_livelocked(void);
234 int	ifq_congestion;
235 
236 int		 netisr;
237 
238 #define	NET_TASKQ	1
239 struct taskq	*nettqmp[NET_TASKQ];
240 
241 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
242 
243 /*
244  * Serialize socket operations to ensure no new sleeping points
245  * are introduced in IP output paths.
246  */
247 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
248 
249 /*
250  * Network interface utility routines.
251  */
252 void
253 ifinit(void)
254 {
255 	unsigned int	i;
256 
257 	/*
258 	 * most machines boot with 4 or 5 interfaces, so size the initial map
259 	 * to accomodate this
260 	 */
261 	if_idxmap_init(8);
262 
263 	timeout_set(&net_tick_to, net_tick, &net_tick_to);
264 
265 	for (i = 0; i < NET_TASKQ; i++) {
266 		nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
267 		if (nettqmp[i] == NULL)
268 			panic("unable to create network taskq %d", i);
269 	}
270 
271 	net_tick(&net_tick_to);
272 }
273 
274 static struct if_idxmap if_idxmap = {
275 	0,
276 	0,
277 	SRP_INITIALIZER()
278 };
279 
280 struct srp_gc if_ifp_gc = SRP_GC_INITIALIZER(if_ifp_dtor, NULL);
281 struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
282 
283 struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
284 
285 void
286 if_idxmap_init(unsigned int limit)
287 {
288 	struct if_map *if_map;
289 	struct srp *map;
290 	unsigned int i;
291 
292 	if_idxmap.serial = 1; /* skip ifidx 0 so it can return NULL */
293 
294 	if_map = malloc(sizeof(*if_map) + limit * sizeof(*map),
295 	    M_IFADDR, M_WAITOK);
296 
297 	if_map->limit = limit;
298 	map = (struct srp *)(if_map + 1);
299 	for (i = 0; i < limit; i++)
300 		srp_init(&map[i]);
301 
302 	/* this is called early so there's nothing to race with */
303 	srp_update_locked(&if_map_gc, &if_idxmap.map, if_map);
304 }
305 
306 void
307 if_idxmap_insert(struct ifnet *ifp)
308 {
309 	struct if_map *if_map;
310 	struct srp *map;
311 	unsigned int index, i;
312 
313 	refcnt_init(&ifp->if_refcnt);
314 
315 	/* the kernel lock guarantees serialised modifications to if_idxmap */
316 	KERNEL_ASSERT_LOCKED();
317 
318 	if (++if_idxmap.count > USHRT_MAX)
319 		panic("too many interfaces");
320 
321 	if_map = srp_get_locked(&if_idxmap.map);
322 	map = (struct srp *)(if_map + 1);
323 
324 	index = if_idxmap.serial++ & USHRT_MAX;
325 
326 	if (index >= if_map->limit) {
327 		struct if_map *nif_map;
328 		struct srp *nmap;
329 		unsigned int nlimit;
330 		struct ifnet *nifp;
331 
332 		nlimit = if_map->limit * 2;
333 		nif_map = malloc(sizeof(*nif_map) + nlimit * sizeof(*nmap),
334 		    M_IFADDR, M_WAITOK);
335 		nmap = (struct srp *)(nif_map + 1);
336 
337 		nif_map->limit = nlimit;
338 		for (i = 0; i < if_map->limit; i++) {
339 			srp_init(&nmap[i]);
340 			nifp = srp_get_locked(&map[i]);
341 			if (nifp != NULL) {
342 				srp_update_locked(&if_ifp_gc, &nmap[i],
343 				    if_ref(nifp));
344 			}
345 		}
346 
347 		while (i < nlimit) {
348 			srp_init(&nmap[i]);
349 			i++;
350 		}
351 
352 		srp_update_locked(&if_map_gc, &if_idxmap.map, nif_map);
353 		if_map = nif_map;
354 		map = nmap;
355 	}
356 
357 	/* pick the next free index */
358 	for (i = 0; i < USHRT_MAX; i++) {
359 		if (index != 0 && srp_get_locked(&map[index]) == NULL)
360 			break;
361 
362 		index = if_idxmap.serial++ & USHRT_MAX;
363 	}
364 
365 	/* commit */
366 	ifp->if_index = index;
367 	srp_update_locked(&if_ifp_gc, &map[index], if_ref(ifp));
368 }
369 
370 void
371 if_idxmap_remove(struct ifnet *ifp)
372 {
373 	struct if_map *if_map;
374 	struct srp *map;
375 	unsigned int index;
376 
377 	index = ifp->if_index;
378 
379 	/* the kernel lock guarantees serialised modifications to if_idxmap */
380 	KERNEL_ASSERT_LOCKED();
381 
382 	if_map = srp_get_locked(&if_idxmap.map);
383 	KASSERT(index < if_map->limit);
384 
385 	map = (struct srp *)(if_map + 1);
386 	KASSERT(ifp == (struct ifnet *)srp_get_locked(&map[index]));
387 
388 	srp_update_locked(&if_ifp_gc, &map[index], NULL);
389 	if_idxmap.count--;
390 	/* end of if_idxmap modifications */
391 
392 	/* sleep until the last reference is released */
393 	refcnt_finalize(&ifp->if_refcnt, "ifidxrm");
394 }
395 
396 void
397 if_ifp_dtor(void *null, void *ifp)
398 {
399 	if_put(ifp);
400 }
401 
402 void
403 if_map_dtor(void *null, void *m)
404 {
405 	struct if_map *if_map = m;
406 	struct srp *map = (struct srp *)(if_map + 1);
407 	unsigned int i;
408 
409 	/*
410 	 * dont need to serialize the use of update_locked since this is
411 	 * the last reference to this map. there's nothing to race against.
412 	 */
413 	for (i = 0; i < if_map->limit; i++)
414 		srp_update_locked(&if_ifp_gc, &map[i], NULL);
415 
416 	free(if_map, M_IFADDR, sizeof(*if_map) + if_map->limit * sizeof(*map));
417 }
418 
419 /*
420  * Attach an interface to the
421  * list of "active" interfaces.
422  */
423 void
424 if_attachsetup(struct ifnet *ifp)
425 {
426 	unsigned long ifidx;
427 
428 	NET_ASSERT_LOCKED();
429 
430 	TAILQ_INIT(&ifp->if_groups);
431 
432 	if_addgroup(ifp, IFG_ALL);
433 
434 	if_attachdomain(ifp);
435 #if NPF > 0
436 	pfi_attach_ifnet(ifp);
437 #endif
438 
439 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
440 	if_slowtimo(ifp);
441 
442 	if_idxmap_insert(ifp);
443 	KASSERT(if_get(0) == NULL);
444 
445 	ifidx = ifp->if_index;
446 
447 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
448 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
449 
450 	/* Announce the interface. */
451 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
452 }
453 
454 /*
455  * Allocate the link level name for the specified interface.  This
456  * is an attachment helper.  It must be called after ifp->if_addrlen
457  * is initialized, which may not be the case when if_attach() is
458  * called.
459  */
460 void
461 if_alloc_sadl(struct ifnet *ifp)
462 {
463 	unsigned int socksize;
464 	int namelen, masklen;
465 	struct sockaddr_dl *sdl;
466 
467 	/*
468 	 * If the interface already has a link name, release it
469 	 * now.  This is useful for interfaces that can change
470 	 * link types, and thus switch link names often.
471 	 */
472 	if_free_sadl(ifp);
473 
474 	namelen = strlen(ifp->if_xname);
475 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
476 	socksize = masklen + ifp->if_addrlen;
477 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
478 	if (socksize < sizeof(*sdl))
479 		socksize = sizeof(*sdl);
480 	socksize = ROUNDUP(socksize);
481 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
482 	sdl->sdl_len = socksize;
483 	sdl->sdl_family = AF_LINK;
484 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
485 	sdl->sdl_nlen = namelen;
486 	sdl->sdl_alen = ifp->if_addrlen;
487 	sdl->sdl_index = ifp->if_index;
488 	sdl->sdl_type = ifp->if_type;
489 	ifp->if_sadl = sdl;
490 }
491 
492 /*
493  * Free the link level name for the specified interface.  This is
494  * a detach helper.  This is called from if_detach() or from
495  * link layer type specific detach functions.
496  */
497 void
498 if_free_sadl(struct ifnet *ifp)
499 {
500 	if (ifp->if_sadl == NULL)
501 		return;
502 
503 	free(ifp->if_sadl, M_IFADDR, ifp->if_sadl->sdl_len);
504 	ifp->if_sadl = NULL;
505 }
506 
507 void
508 if_attachdomain(struct ifnet *ifp)
509 {
510 	struct domain *dp;
511 	int i, s;
512 
513 	s = splnet();
514 
515 	/* address family dependent data region */
516 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
517 	for (i = 0; (dp = domains[i]) != NULL; i++) {
518 		if (dp->dom_ifattach)
519 			ifp->if_afdata[dp->dom_family] =
520 			    (*dp->dom_ifattach)(ifp);
521 	}
522 
523 	splx(s);
524 }
525 
526 void
527 if_attachhead(struct ifnet *ifp)
528 {
529 	if_attach_common(ifp);
530 	NET_LOCK();
531 	TAILQ_INSERT_HEAD(&ifnet, ifp, if_list);
532 	if_attachsetup(ifp);
533 	NET_UNLOCK();
534 }
535 
536 void
537 if_attach(struct ifnet *ifp)
538 {
539 	if_attach_common(ifp);
540 	NET_LOCK();
541 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
542 	if_attachsetup(ifp);
543 	NET_UNLOCK();
544 }
545 
546 void
547 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
548 {
549 	struct ifqueue **map;
550 	struct ifqueue *ifq;
551 	int i;
552 
553 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
554 	KASSERT(nqs != 0);
555 
556 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
557 
558 	ifp->if_snd.ifq_softc = NULL;
559 	map[0] = &ifp->if_snd;
560 
561 	for (i = 1; i < nqs; i++) {
562 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
563 		ifq_set_maxlen(ifq, ifp->if_snd.ifq_maxlen);
564 		ifq_init(ifq, ifp, i);
565 		map[i] = ifq;
566 	}
567 
568 	ifp->if_ifqs = map;
569 	ifp->if_nifqs = nqs;
570 }
571 
572 void
573 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
574 {
575 	struct ifiqueue **map;
576 	struct ifiqueue *ifiq;
577 	unsigned int i;
578 
579 	KASSERT(niqs != 0);
580 
581 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
582 
583 	ifp->if_rcv.ifiq_softc = NULL;
584 	map[0] = &ifp->if_rcv;
585 
586 	for (i = 1; i < niqs; i++) {
587 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
588 		ifiq_init(ifiq, ifp, i);
589 		map[i] = ifiq;
590 	}
591 
592 	ifp->if_iqs = map;
593 	ifp->if_niqs = niqs;
594 }
595 
596 void
597 if_attach_common(struct ifnet *ifp)
598 {
599 	KASSERT(ifp->if_ioctl != NULL);
600 
601 	TAILQ_INIT(&ifp->if_addrlist);
602 	TAILQ_INIT(&ifp->if_maddrlist);
603 
604 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
605 		KASSERTMSG(ifp->if_qstart == NULL,
606 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
607 		ifp->if_qstart = if_qstart_compat;
608 	} else {
609 		KASSERTMSG(ifp->if_start == NULL,
610 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
611 		KASSERTMSG(ifp->if_qstart != NULL,
612 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
613 	}
614 
615 	ifq_init(&ifp->if_snd, ifp, 0);
616 
617 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
618 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
619 	ifp->if_nifqs = 1;
620 	if (ifp->if_txmit == 0)
621 		ifp->if_txmit = IF_TXMIT_DEFAULT;
622 
623 	ifiq_init(&ifp->if_rcv, ifp, 0);
624 
625 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
626 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
627 	ifp->if_niqs = 1;
628 
629 	ifp->if_addrhooks = malloc(sizeof(*ifp->if_addrhooks),
630 	    M_TEMP, M_WAITOK);
631 	TAILQ_INIT(ifp->if_addrhooks);
632 	ifp->if_linkstatehooks = malloc(sizeof(*ifp->if_linkstatehooks),
633 	    M_TEMP, M_WAITOK);
634 	TAILQ_INIT(ifp->if_linkstatehooks);
635 	ifp->if_detachhooks = malloc(sizeof(*ifp->if_detachhooks),
636 	    M_TEMP, M_WAITOK);
637 	TAILQ_INIT(ifp->if_detachhooks);
638 
639 	if (ifp->if_rtrequest == NULL)
640 		ifp->if_rtrequest = if_rtrequest_dummy;
641 	if (ifp->if_enqueue == NULL)
642 		ifp->if_enqueue = if_enqueue_ifq;
643 	ifp->if_llprio = IFQ_DEFPRIO;
644 
645 	SRPL_INIT(&ifp->if_inputs);
646 }
647 
648 void
649 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
650 {
651 	/*
652 	 * only switch the ifq_ops on the first ifq on an interface.
653 	 *
654 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
655 	 * works on a single ifq. because the code uses the ifq_ops
656 	 * on the first ifq (if_snd) to select a queue for an mbuf,
657 	 * by switching only the first one we change both the algorithm
658 	 * and force the routing of all new packets to it.
659 	 */
660 	ifq_attach(&ifp->if_snd, newops, args);
661 }
662 
663 void
664 if_start(struct ifnet *ifp)
665 {
666 	KASSERT(ifp->if_qstart == if_qstart_compat);
667 	if_qstart_compat(&ifp->if_snd);
668 }
669 void
670 if_qstart_compat(struct ifqueue *ifq)
671 {
672 	struct ifnet *ifp = ifq->ifq_if;
673 	int s;
674 
675 	/*
676 	 * the stack assumes that an interface can have multiple
677 	 * transmit rings, but a lot of drivers are still written
678 	 * so that interfaces and send rings have a 1:1 mapping.
679 	 * this provides compatability between the stack and the older
680 	 * drivers by translating from the only queue they have
681 	 * (ifp->if_snd) back to the interface and calling if_start.
682  	 */
683 
684 	KERNEL_LOCK();
685 	s = splnet();
686 	(*ifp->if_start)(ifp);
687 	splx(s);
688 	KERNEL_UNLOCK();
689 }
690 
691 int
692 if_enqueue(struct ifnet *ifp, struct mbuf *m)
693 {
694 #if NPF > 0
695 	if (m->m_pkthdr.pf.delay > 0)
696 		return (pf_delay_pkt(m, ifp->if_index));
697 #endif
698 
699 #if NBRIDGE > 0
700 	if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
701 		int error;
702 
703 		error = bridge_enqueue(ifp, m);
704 		return (error);
705 	}
706 #endif
707 
708 #if NPF > 0
709 	pf_pkt_addr_changed(m);
710 #endif	/* NPF > 0 */
711 
712 	return ((*ifp->if_enqueue)(ifp, m));
713 }
714 
715 int
716 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
717 {
718 	struct ifqueue *ifq = &ifp->if_snd;
719 	int error;
720 
721 	if (ifp->if_nifqs > 1) {
722 		unsigned int idx;
723 
724 		/*
725 		 * use the operations on the first ifq to pick which of
726 		 * the array gets this mbuf.
727 		 */
728 
729 		idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
730 		ifq = ifp->if_ifqs[idx];
731 	}
732 
733 	error = ifq_enqueue(ifq, m);
734 	if (error)
735 		return (error);
736 
737 	ifq_start(ifq);
738 
739 	return (0);
740 }
741 
742 void
743 if_input(struct ifnet *ifp, struct mbuf_list *ml)
744 {
745 	ifiq_input(&ifp->if_rcv, ml);
746 }
747 
748 int
749 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
750 {
751 #if NBPFILTER > 0
752 	/*
753 	 * Only send packets to bpf if they are destinated to local
754 	 * addresses.
755 	 *
756 	 * if_input_local() is also called for SIMPLEX interfaces to
757 	 * duplicate packets for local use.  But don't dup them to bpf.
758 	 */
759 	if (ifp->if_flags & IFF_LOOPBACK) {
760 		caddr_t if_bpf = ifp->if_bpf;
761 
762 		if (if_bpf)
763 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
764 	}
765 #endif
766 	m_resethdr(m);
767 	m->m_flags |= M_LOOP;
768 	m->m_pkthdr.ph_ifidx = ifp->if_index;
769 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
770 
771 	ifp->if_opackets++;
772 	ifp->if_obytes += m->m_pkthdr.len;
773 
774 	ifp->if_ipackets++;
775 	ifp->if_ibytes += m->m_pkthdr.len;
776 
777 	switch (af) {
778 	case AF_INET:
779 		ipv4_input(ifp, m);
780 		break;
781 #ifdef INET6
782 	case AF_INET6:
783 		ipv6_input(ifp, m);
784 		break;
785 #endif /* INET6 */
786 #ifdef MPLS
787 	case AF_MPLS:
788 		mpls_input(ifp, m);
789 		break;
790 #endif /* MPLS */
791 	default:
792 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
793 		m_freem(m);
794 		return (EAFNOSUPPORT);
795 	}
796 
797 	return (0);
798 }
799 
800 int
801 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
802 {
803 	struct ifiqueue *ifiq;
804 	unsigned int flow = 0;
805 
806 	m->m_pkthdr.ph_family = af;
807 	m->m_pkthdr.ph_ifidx = ifp->if_index;
808 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
809 
810 	if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID))
811 		flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK;
812 
813 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
814 
815 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
816 }
817 
818 struct ifih {
819 	SRPL_ENTRY(ifih)	  ifih_next;
820 	int			(*ifih_input)(struct ifnet *, struct mbuf *,
821 				      void *);
822 	void			 *ifih_cookie;
823 	int			  ifih_refcnt;
824 	struct refcnt		  ifih_srpcnt;
825 };
826 
827 void	if_ih_ref(void *, void *);
828 void	if_ih_unref(void *, void *);
829 
830 struct srpl_rc ifih_rc = SRPL_RC_INITIALIZER(if_ih_ref, if_ih_unref, NULL);
831 
832 void
833 if_ih_insert(struct ifnet *ifp, int (*input)(struct ifnet *, struct mbuf *,
834     void *), void *cookie)
835 {
836 	struct ifih *ifih;
837 
838 	/* the kernel lock guarantees serialised modifications to if_inputs */
839 	KERNEL_ASSERT_LOCKED();
840 
841 	SRPL_FOREACH_LOCKED(ifih, &ifp->if_inputs, ifih_next) {
842 		if (ifih->ifih_input == input && ifih->ifih_cookie == cookie) {
843 			ifih->ifih_refcnt++;
844 			break;
845 		}
846 	}
847 
848 	if (ifih == NULL) {
849 		ifih = malloc(sizeof(*ifih), M_DEVBUF, M_WAITOK);
850 
851 		ifih->ifih_input = input;
852 		ifih->ifih_cookie = cookie;
853 		ifih->ifih_refcnt = 1;
854 		refcnt_init(&ifih->ifih_srpcnt);
855 		SRPL_INSERT_HEAD_LOCKED(&ifih_rc, &ifp->if_inputs,
856 		    ifih, ifih_next);
857 	}
858 }
859 
860 void
861 if_ih_ref(void *null, void *i)
862 {
863 	struct ifih *ifih = i;
864 
865 	refcnt_take(&ifih->ifih_srpcnt);
866 }
867 
868 void
869 if_ih_unref(void *null, void *i)
870 {
871 	struct ifih *ifih = i;
872 
873 	refcnt_rele_wake(&ifih->ifih_srpcnt);
874 }
875 
876 void
877 if_ih_remove(struct ifnet *ifp, int (*input)(struct ifnet *, struct mbuf *,
878     void *), void *cookie)
879 {
880 	struct ifih *ifih;
881 
882 	/* the kernel lock guarantees serialised modifications to if_inputs */
883 	KERNEL_ASSERT_LOCKED();
884 
885 	SRPL_FOREACH_LOCKED(ifih, &ifp->if_inputs, ifih_next) {
886 		if (ifih->ifih_input == input && ifih->ifih_cookie == cookie)
887 			break;
888 	}
889 
890 	KASSERT(ifih != NULL);
891 
892 	if (--ifih->ifih_refcnt == 0) {
893 		SRPL_REMOVE_LOCKED(&ifih_rc, &ifp->if_inputs, ifih,
894 		    ifih, ifih_next);
895 
896 		refcnt_finalize(&ifih->ifih_srpcnt, "ifihrm");
897 		free(ifih, M_DEVBUF, sizeof(*ifih));
898 	}
899 }
900 
901 static void
902 if_ih_input(struct ifnet *ifp, struct mbuf *m)
903 {
904 	struct ifih *ifih;
905 	struct srp_ref sr;
906 
907 	/*
908 	 * Pass this mbuf to all input handlers of its
909 	 * interface until it is consumed.
910 	 */
911 	SRPL_FOREACH(ifih, &sr, &ifp->if_inputs, ifih_next) {
912 		if ((*ifih->ifih_input)(ifp, m, ifih->ifih_cookie))
913 			break;
914 	}
915 	SRPL_LEAVE(&sr);
916 
917 	if (ifih == NULL)
918 		m_freem(m);
919 }
920 
921 void
922 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
923 {
924 	struct mbuf *m;
925 
926 	if (ml_empty(ml))
927 		return;
928 
929 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
930 		enqueue_randomness(ml_len(ml));
931 
932 	/*
933 	 * We grab the NET_LOCK() before processing any packet to
934 	 * ensure there's no contention on the routing table lock.
935 	 *
936 	 * Without it we could race with a userland thread to insert
937 	 * a L2 entry in ip{6,}_output().  Such race would result in
938 	 * one of the threads sleeping *inside* the IP output path.
939 	 *
940 	 * Since we have a NET_LOCK() we also use it to serialize access
941 	 * to PF globals, pipex globals, unicast and multicast addresses
942 	 * lists.
943 	 */
944 	NET_RLOCK();
945 	while ((m = ml_dequeue(ml)) != NULL)
946 		if_ih_input(ifp, m);
947 	NET_RUNLOCK();
948 }
949 
950 void
951 if_vinput(struct ifnet *ifp, struct mbuf *m)
952 {
953 #if NBPFILTER > 0
954 	caddr_t if_bpf;
955 #endif
956 
957 	m->m_pkthdr.ph_ifidx = ifp->if_index;
958 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
959 
960 	counters_pkt(ifp->if_counters,
961 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
962 
963 #if NBPFILTER > 0
964 	if_bpf = ifp->if_bpf;
965 	if (if_bpf) {
966 		if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT)) {
967 			m_freem(m);
968 			return;
969 		}
970 	}
971 #endif
972 
973 	if_ih_input(ifp, m);
974 }
975 
976 void
977 if_netisr(void *unused)
978 {
979 	int n, t = 0;
980 
981 	NET_RLOCK();
982 
983 	while ((n = netisr) != 0) {
984 		/* Like sched_pause() but with a rwlock dance. */
985 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
986 			NET_RUNLOCK();
987 			yield();
988 			NET_RLOCK();
989 		}
990 
991 		atomic_clearbits_int(&netisr, n);
992 
993 #if NETHER > 0
994 		if (n & (1 << NETISR_ARP)) {
995 			KERNEL_LOCK();
996 			arpintr();
997 			KERNEL_UNLOCK();
998 		}
999 #endif
1000 		if (n & (1 << NETISR_IP))
1001 			ipintr();
1002 #ifdef INET6
1003 		if (n & (1 << NETISR_IPV6))
1004 			ip6intr();
1005 #endif
1006 #if NPPP > 0
1007 		if (n & (1 << NETISR_PPP)) {
1008 			KERNEL_LOCK();
1009 			pppintr();
1010 			KERNEL_UNLOCK();
1011 		}
1012 #endif
1013 #if NBRIDGE > 0
1014 		if (n & (1 << NETISR_BRIDGE))
1015 			bridgeintr();
1016 #endif
1017 #if NSWITCH > 0
1018 		if (n & (1 << NETISR_SWITCH)) {
1019 			KERNEL_LOCK();
1020 			switchintr();
1021 			KERNEL_UNLOCK();
1022 		}
1023 #endif
1024 #if NPPPOE > 0
1025 		if (n & (1 << NETISR_PPPOE)) {
1026 			KERNEL_LOCK();
1027 			pppoeintr();
1028 			KERNEL_UNLOCK();
1029 		}
1030 #endif
1031 #ifdef PIPEX
1032 		if (n & (1 << NETISR_PIPEX)) {
1033 			KERNEL_LOCK();
1034 			pipexintr();
1035 			KERNEL_UNLOCK();
1036 		}
1037 #endif
1038 		t |= n;
1039 	}
1040 
1041 #if NPFSYNC > 0
1042 	if (t & (1 << NETISR_PFSYNC)) {
1043 		KERNEL_LOCK();
1044 		pfsyncintr();
1045 		KERNEL_UNLOCK();
1046 	}
1047 #endif
1048 
1049 	NET_RUNLOCK();
1050 }
1051 
1052 void
1053 if_deactivate(struct ifnet *ifp)
1054 {
1055 	NET_LOCK();
1056 	/*
1057 	 * Call detach hooks from head to tail.  To make sure detach
1058 	 * hooks are executed in the reverse order they were added, all
1059 	 * the hooks have to be added to the head!
1060 	 */
1061 	dohooks(ifp->if_detachhooks, HOOK_REMOVE | HOOK_FREE);
1062 
1063 	NET_UNLOCK();
1064 }
1065 
1066 /*
1067  * Detach an interface from everything in the kernel.  Also deallocate
1068  * private resources.
1069  */
1070 void
1071 if_detach(struct ifnet *ifp)
1072 {
1073 	struct ifaddr *ifa;
1074 	struct ifg_list *ifg;
1075 	struct domain *dp;
1076 	int i, s;
1077 
1078 	/* Undo pseudo-driver changes. */
1079 	if_deactivate(ifp);
1080 
1081 	ifq_clr_oactive(&ifp->if_snd);
1082 
1083 	/* Other CPUs must not have a reference before we start destroying. */
1084 	if_idxmap_remove(ifp);
1085 
1086 #if NBPFILTER > 0
1087 	bpfdetach(ifp);
1088 #endif
1089 
1090 	NET_LOCK();
1091 	s = splnet();
1092 	ifp->if_qstart = if_detached_qstart;
1093 	ifp->if_ioctl = if_detached_ioctl;
1094 	ifp->if_watchdog = NULL;
1095 
1096 	/* Remove the watchdog timeout & task */
1097 	timeout_del(&ifp->if_slowtimo);
1098 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1099 
1100 	/* Remove the link state task */
1101 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1102 
1103 	rti_delete(ifp);
1104 #if NETHER > 0 && defined(NFSCLIENT)
1105 	if (ifp->if_index == revarp_ifidx)
1106 		revarp_ifidx = 0;
1107 #endif
1108 #ifdef MROUTING
1109 	vif_delete(ifp);
1110 #endif
1111 	in_ifdetach(ifp);
1112 #ifdef INET6
1113 	in6_ifdetach(ifp);
1114 #endif
1115 #if NPF > 0
1116 	pfi_detach_ifnet(ifp);
1117 #endif
1118 
1119 	/* Remove the interface from the list of all interfaces.  */
1120 	TAILQ_REMOVE(&ifnet, ifp, if_list);
1121 
1122 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1123 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1124 
1125 	if_free_sadl(ifp);
1126 
1127 	/* We should not have any address left at this point. */
1128 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1129 #ifdef DIAGNOSTIC
1130 		printf("%s: address list non empty\n", ifp->if_xname);
1131 #endif
1132 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1133 			ifa_del(ifp, ifa);
1134 			ifa->ifa_ifp = NULL;
1135 			ifafree(ifa);
1136 		}
1137 	}
1138 
1139 	free(ifp->if_addrhooks, M_TEMP, sizeof(*ifp->if_addrhooks));
1140 	free(ifp->if_linkstatehooks, M_TEMP, sizeof(*ifp->if_linkstatehooks));
1141 	free(ifp->if_detachhooks, M_TEMP, sizeof(*ifp->if_detachhooks));
1142 
1143 	for (i = 0; (dp = domains[i]) != NULL; i++) {
1144 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
1145 			(*dp->dom_ifdetach)(ifp,
1146 			    ifp->if_afdata[dp->dom_family]);
1147 	}
1148 
1149 	/* Announce that the interface is gone. */
1150 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1151 	splx(s);
1152 	NET_UNLOCK();
1153 
1154 	if (ifp->if_counters != NULL)
1155 		if_counters_free(ifp);
1156 
1157 	for (i = 0; i < ifp->if_nifqs; i++)
1158 		ifq_destroy(ifp->if_ifqs[i]);
1159 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1160 		for (i = 1; i < ifp->if_nifqs; i++) {
1161 			free(ifp->if_ifqs[i], M_DEVBUF,
1162 			    sizeof(struct ifqueue));
1163 		}
1164 		free(ifp->if_ifqs, M_DEVBUF,
1165 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1166 	}
1167 
1168 	for (i = 0; i < ifp->if_niqs; i++)
1169 		ifiq_destroy(ifp->if_iqs[i]);
1170 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1171 		for (i = 1; i < ifp->if_niqs; i++) {
1172 			free(ifp->if_iqs[i], M_DEVBUF,
1173 			    sizeof(struct ifiqueue));
1174 		}
1175 		free(ifp->if_iqs, M_DEVBUF,
1176 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1177 	}
1178 }
1179 
1180 /*
1181  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1182  */
1183 int
1184 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1185 {
1186 	struct ifnet *ifp;
1187 	int connected = 0;
1188 
1189 	ifp = if_get(ifidx);
1190 	if (ifp == NULL)
1191 		return (0);
1192 
1193 	if (ifp0->if_index == ifp->if_index)
1194 		connected = 1;
1195 
1196 #if NBRIDGE > 0
1197 	if (ifp0->if_bridgeidx != 0 && ifp0->if_bridgeidx == ifp->if_bridgeidx)
1198 		connected = 1;
1199 #endif
1200 #if NCARP > 0
1201 	if ((ifp0->if_type == IFT_CARP && ifp0->if_carpdev == ifp) ||
1202 	    (ifp->if_type == IFT_CARP && ifp->if_carpdev == ifp0))
1203 		connected = 1;
1204 #endif
1205 
1206 	if_put(ifp);
1207 	return (connected);
1208 }
1209 
1210 /*
1211  * Create a clone network interface.
1212  */
1213 int
1214 if_clone_create(const char *name, int rdomain)
1215 {
1216 	struct if_clone *ifc;
1217 	struct ifnet *ifp;
1218 	int unit, ret;
1219 
1220 	ifc = if_clone_lookup(name, &unit);
1221 	if (ifc == NULL)
1222 		return (EINVAL);
1223 
1224 	if (ifunit(name) != NULL)
1225 		return (EEXIST);
1226 
1227 	ret = (*ifc->ifc_create)(ifc, unit);
1228 
1229 	if (ret != 0 || (ifp = ifunit(name)) == NULL)
1230 		return (ret);
1231 
1232 	NET_LOCK();
1233 	if_addgroup(ifp, ifc->ifc_name);
1234 	if (rdomain != 0)
1235 		if_setrdomain(ifp, rdomain);
1236 	NET_UNLOCK();
1237 
1238 	return (ret);
1239 }
1240 
1241 /*
1242  * Destroy a clone network interface.
1243  */
1244 int
1245 if_clone_destroy(const char *name)
1246 {
1247 	struct if_clone *ifc;
1248 	struct ifnet *ifp;
1249 	int ret;
1250 
1251 	ifc = if_clone_lookup(name, NULL);
1252 	if (ifc == NULL)
1253 		return (EINVAL);
1254 
1255 	ifp = ifunit(name);
1256 	if (ifp == NULL)
1257 		return (ENXIO);
1258 
1259 	if (ifc->ifc_destroy == NULL)
1260 		return (EOPNOTSUPP);
1261 
1262 	NET_LOCK();
1263 	if (ifp->if_flags & IFF_UP) {
1264 		int s;
1265 		s = splnet();
1266 		if_down(ifp);
1267 		splx(s);
1268 	}
1269 	NET_UNLOCK();
1270 	ret = (*ifc->ifc_destroy)(ifp);
1271 
1272 	return (ret);
1273 }
1274 
1275 /*
1276  * Look up a network interface cloner.
1277  */
1278 struct if_clone *
1279 if_clone_lookup(const char *name, int *unitp)
1280 {
1281 	struct if_clone *ifc;
1282 	const char *cp;
1283 	int unit;
1284 
1285 	/* separate interface name from unit */
1286 	for (cp = name;
1287 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1288 	    cp++)
1289 		continue;
1290 
1291 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1292 		return (NULL);	/* No name or unit number */
1293 
1294 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1295 		return (NULL);	/* unit number 0 padded */
1296 
1297 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1298 		if (strlen(ifc->ifc_name) == cp - name &&
1299 		    !strncmp(name, ifc->ifc_name, cp - name))
1300 			break;
1301 	}
1302 
1303 	if (ifc == NULL)
1304 		return (NULL);
1305 
1306 	unit = 0;
1307 	while (cp - name < IFNAMSIZ && *cp) {
1308 		if (*cp < '0' || *cp > '9' ||
1309 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1310 			/* Bogus unit number. */
1311 			return (NULL);
1312 		}
1313 		unit = (unit * 10) + (*cp++ - '0');
1314 	}
1315 
1316 	if (unitp != NULL)
1317 		*unitp = unit;
1318 	return (ifc);
1319 }
1320 
1321 /*
1322  * Register a network interface cloner.
1323  */
1324 void
1325 if_clone_attach(struct if_clone *ifc)
1326 {
1327 	/*
1328 	 * we are called at kernel boot by main(), when pseudo devices are
1329 	 * being attached. The main() is the only guy which may alter the
1330 	 * if_cloners. While system is running and main() is done with
1331 	 * initialization, the if_cloners becomes immutable.
1332 	 */
1333 	KASSERT(pdevinit_done == 0);
1334 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1335 	if_cloners_count++;
1336 }
1337 
1338 /*
1339  * Provide list of interface cloners to userspace.
1340  */
1341 int
1342 if_clone_list(struct if_clonereq *ifcr)
1343 {
1344 	char outbuf[IFNAMSIZ], *dst;
1345 	struct if_clone *ifc;
1346 	int count, error = 0;
1347 
1348 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1349 		/* Just asking how many there are. */
1350 		ifcr->ifcr_total = if_cloners_count;
1351 		return (0);
1352 	}
1353 
1354 	if (ifcr->ifcr_count < 0)
1355 		return (EINVAL);
1356 
1357 	ifcr->ifcr_total = if_cloners_count;
1358 	count = MIN(if_cloners_count, ifcr->ifcr_count);
1359 
1360 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1361 		if (count == 0)
1362 			break;
1363 		bzero(outbuf, sizeof outbuf);
1364 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1365 		error = copyout(outbuf, dst, IFNAMSIZ);
1366 		if (error)
1367 			break;
1368 		count--;
1369 		dst += IFNAMSIZ;
1370 	}
1371 
1372 	return (error);
1373 }
1374 
1375 /*
1376  * set queue congestion marker
1377  */
1378 void
1379 if_congestion(void)
1380 {
1381 	extern int ticks;
1382 
1383 	ifq_congestion = ticks;
1384 }
1385 
1386 int
1387 if_congested(void)
1388 {
1389 	extern int ticks;
1390 	int diff;
1391 
1392 	diff = ticks - ifq_congestion;
1393 	if (diff < 0) {
1394 		ifq_congestion = ticks - hz;
1395 		return (0);
1396 	}
1397 
1398 	return (diff <= (hz / 100));
1399 }
1400 
1401 #define	equal(a1, a2)	\
1402 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1403 	(a1)->sa_len) == 0)
1404 
1405 /*
1406  * Locate an interface based on a complete address.
1407  */
1408 struct ifaddr *
1409 ifa_ifwithaddr(struct sockaddr *addr, u_int rtableid)
1410 {
1411 	struct ifnet *ifp;
1412 	struct ifaddr *ifa;
1413 	u_int rdomain;
1414 
1415 	rdomain = rtable_l2(rtableid);
1416 	KERNEL_LOCK();
1417 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1418 		if (ifp->if_rdomain != rdomain)
1419 			continue;
1420 
1421 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1422 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1423 				continue;
1424 
1425 			if (equal(addr, ifa->ifa_addr)) {
1426 				KERNEL_UNLOCK();
1427 				return (ifa);
1428 			}
1429 		}
1430 	}
1431 	KERNEL_UNLOCK();
1432 	return (NULL);
1433 }
1434 
1435 /*
1436  * Locate the point to point interface with a given destination address.
1437  */
1438 struct ifaddr *
1439 ifa_ifwithdstaddr(struct sockaddr *addr, u_int rdomain)
1440 {
1441 	struct ifnet *ifp;
1442 	struct ifaddr *ifa;
1443 
1444 	rdomain = rtable_l2(rdomain);
1445 	KERNEL_LOCK();
1446 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1447 		if (ifp->if_rdomain != rdomain)
1448 			continue;
1449 		if (ifp->if_flags & IFF_POINTOPOINT) {
1450 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1451 				if (ifa->ifa_addr->sa_family !=
1452 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1453 					continue;
1454 				if (equal(addr, ifa->ifa_dstaddr)) {
1455 					KERNEL_UNLOCK();
1456 					return (ifa);
1457 				}
1458 			}
1459 		}
1460 	}
1461 	KERNEL_UNLOCK();
1462 	return (NULL);
1463 }
1464 
1465 /*
1466  * Find an interface address specific to an interface best matching
1467  * a given address.
1468  */
1469 struct ifaddr *
1470 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1471 {
1472 	struct ifaddr *ifa;
1473 	char *cp, *cp2, *cp3;
1474 	char *cplim;
1475 	struct ifaddr *ifa_maybe = NULL;
1476 	u_int af = addr->sa_family;
1477 
1478 	if (af >= AF_MAX)
1479 		return (NULL);
1480 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1481 		if (ifa->ifa_addr->sa_family != af)
1482 			continue;
1483 		if (ifa_maybe == NULL)
1484 			ifa_maybe = ifa;
1485 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1486 			if (equal(addr, ifa->ifa_addr) ||
1487 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1488 				return (ifa);
1489 			continue;
1490 		}
1491 		cp = addr->sa_data;
1492 		cp2 = ifa->ifa_addr->sa_data;
1493 		cp3 = ifa->ifa_netmask->sa_data;
1494 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1495 		for (; cp3 < cplim; cp3++)
1496 			if ((*cp++ ^ *cp2++) & *cp3)
1497 				break;
1498 		if (cp3 == cplim)
1499 			return (ifa);
1500 	}
1501 	return (ifa_maybe);
1502 }
1503 
1504 void
1505 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1506 {
1507 }
1508 
1509 /*
1510  * Default action when installing a local route on a point-to-point
1511  * interface.
1512  */
1513 void
1514 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1515 {
1516 	struct ifnet *lo0ifp;
1517 	struct ifaddr *ifa, *lo0ifa;
1518 
1519 	switch (req) {
1520 	case RTM_ADD:
1521 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1522 			break;
1523 
1524 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1525 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1526 			    rt_key(rt)->sa_len) == 0)
1527 				break;
1528 		}
1529 
1530 		if (ifa == NULL)
1531 			break;
1532 
1533 		KASSERT(ifa == rt->rt_ifa);
1534 
1535 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1536 		KASSERT(lo0ifp != NULL);
1537 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1538 			if (lo0ifa->ifa_addr->sa_family ==
1539 			    ifa->ifa_addr->sa_family)
1540 				break;
1541 		}
1542 		if_put(lo0ifp);
1543 
1544 		if (lo0ifa == NULL)
1545 			break;
1546 
1547 		rt->rt_flags &= ~RTF_LLINFO;
1548 		break;
1549 	case RTM_DELETE:
1550 	case RTM_RESOLVE:
1551 	default:
1552 		break;
1553 	}
1554 }
1555 
1556 
1557 /*
1558  * Bring down all interfaces
1559  */
1560 void
1561 if_downall(void)
1562 {
1563 	struct ifreq ifrq;	/* XXX only partly built */
1564 	struct ifnet *ifp;
1565 
1566 	NET_LOCK();
1567 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1568 		if ((ifp->if_flags & IFF_UP) == 0)
1569 			continue;
1570 		if_down(ifp);
1571 		ifrq.ifr_flags = ifp->if_flags;
1572 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1573 	}
1574 	NET_UNLOCK();
1575 }
1576 
1577 /*
1578  * Mark an interface down and notify protocols of
1579  * the transition.
1580  */
1581 void
1582 if_down(struct ifnet *ifp)
1583 {
1584 	NET_ASSERT_LOCKED();
1585 
1586 	ifp->if_flags &= ~IFF_UP;
1587 	getmicrotime(&ifp->if_lastchange);
1588 	IFQ_PURGE(&ifp->if_snd);
1589 
1590 	if_linkstate(ifp);
1591 }
1592 
1593 /*
1594  * Mark an interface up and notify protocols of
1595  * the transition.
1596  */
1597 void
1598 if_up(struct ifnet *ifp)
1599 {
1600 	NET_ASSERT_LOCKED();
1601 
1602 	ifp->if_flags |= IFF_UP;
1603 	getmicrotime(&ifp->if_lastchange);
1604 
1605 #ifdef INET6
1606 	/* Userland expects the kernel to set ::1 on default lo(4). */
1607 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1608 		in6_ifattach(ifp);
1609 #endif
1610 
1611 	if_linkstate(ifp);
1612 }
1613 
1614 /*
1615  * Notify userland, the routing table and hooks owner of
1616  * a link-state transition.
1617  */
1618 void
1619 if_linkstate_task(void *xifidx)
1620 {
1621 	unsigned int ifidx = (unsigned long)xifidx;
1622 	struct ifnet *ifp;
1623 
1624 	KERNEL_LOCK();
1625 	NET_LOCK();
1626 
1627 	ifp = if_get(ifidx);
1628 	if (ifp != NULL)
1629 		if_linkstate(ifp);
1630 	if_put(ifp);
1631 
1632 	NET_UNLOCK();
1633 	KERNEL_UNLOCK();
1634 }
1635 
1636 void
1637 if_linkstate(struct ifnet *ifp)
1638 {
1639 	NET_ASSERT_LOCKED();
1640 
1641 	rtm_ifchg(ifp);
1642 	rt_if_track(ifp);
1643 	dohooks(ifp->if_linkstatehooks, 0);
1644 }
1645 
1646 /*
1647  * Schedule a link state change task.
1648  */
1649 void
1650 if_link_state_change(struct ifnet *ifp)
1651 {
1652 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1653 }
1654 
1655 /*
1656  * Handle interface watchdog timer routine.  Called
1657  * from softclock, we decrement timer (if set) and
1658  * call the appropriate interface routine on expiration.
1659  */
1660 void
1661 if_slowtimo(void *arg)
1662 {
1663 	struct ifnet *ifp = arg;
1664 	int s = splnet();
1665 
1666 	if (ifp->if_watchdog) {
1667 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1668 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1669 		timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO);
1670 	}
1671 	splx(s);
1672 }
1673 
1674 void
1675 if_watchdog_task(void *xifidx)
1676 {
1677 	unsigned int ifidx = (unsigned long)xifidx;
1678 	struct ifnet *ifp;
1679 	int s;
1680 
1681 	ifp = if_get(ifidx);
1682 	if (ifp == NULL)
1683 		return;
1684 
1685 	KERNEL_LOCK();
1686 	s = splnet();
1687 	if (ifp->if_watchdog)
1688 		(*ifp->if_watchdog)(ifp);
1689 	splx(s);
1690 	KERNEL_UNLOCK();
1691 
1692 	if_put(ifp);
1693 }
1694 
1695 /*
1696  * Map interface name to interface structure pointer.
1697  */
1698 struct ifnet *
1699 ifunit(const char *name)
1700 {
1701 	struct ifnet *ifp;
1702 
1703 	KERNEL_ASSERT_LOCKED();
1704 
1705 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1706 		if (strcmp(ifp->if_xname, name) == 0)
1707 			return (ifp);
1708 	}
1709 	return (NULL);
1710 }
1711 
1712 /*
1713  * Map interface index to interface structure pointer.
1714  */
1715 struct ifnet *
1716 if_get(unsigned int index)
1717 {
1718 	struct srp_ref sr;
1719 	struct if_map *if_map;
1720 	struct srp *map;
1721 	struct ifnet *ifp = NULL;
1722 
1723 	if_map = srp_enter(&sr, &if_idxmap.map);
1724 	if (index < if_map->limit) {
1725 		map = (struct srp *)(if_map + 1);
1726 
1727 		ifp = srp_follow(&sr, &map[index]);
1728 		if (ifp != NULL) {
1729 			KASSERT(ifp->if_index == index);
1730 			if_ref(ifp);
1731 		}
1732 	}
1733 	srp_leave(&sr);
1734 
1735 	return (ifp);
1736 }
1737 
1738 struct ifnet *
1739 if_ref(struct ifnet *ifp)
1740 {
1741 	refcnt_take(&ifp->if_refcnt);
1742 
1743 	return (ifp);
1744 }
1745 
1746 void
1747 if_put(struct ifnet *ifp)
1748 {
1749 	if (ifp == NULL)
1750 		return;
1751 
1752 	refcnt_rele_wake(&ifp->if_refcnt);
1753 }
1754 
1755 int
1756 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1757 {
1758 	if (ifp->if_sadl == NULL)
1759 		return (EINVAL);
1760 
1761 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1762 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1763 
1764 	return (0);
1765 }
1766 
1767 int
1768 if_createrdomain(int rdomain, struct ifnet *ifp)
1769 {
1770 	int error;
1771 	struct ifnet *loifp;
1772 	char loifname[IFNAMSIZ];
1773 	unsigned int unit = rdomain;
1774 
1775 	if (!rtable_exists(rdomain) && (error = rtable_add(rdomain)) != 0)
1776 		return (error);
1777 	if (!rtable_empty(rdomain))
1778 		return (EEXIST);
1779 
1780 	/* Create rdomain including its loopback if with unit == rdomain */
1781 	snprintf(loifname, sizeof(loifname), "lo%u", unit);
1782 	error = if_clone_create(loifname, 0);
1783 	if ((loifp = ifunit(loifname)) == NULL)
1784 		return (ENXIO);
1785 	if (error && (ifp != loifp || error != EEXIST))
1786 		return (error);
1787 
1788 	rtable_l2set(rdomain, rdomain, loifp->if_index);
1789 	loifp->if_rdomain = rdomain;
1790 
1791 	return (0);
1792 }
1793 
1794 int
1795 if_setrdomain(struct ifnet *ifp, int rdomain)
1796 {
1797 	struct ifreq ifr;
1798 	int error, up = 0, s;
1799 
1800 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1801 		return (EINVAL);
1802 
1803 	if (rdomain != ifp->if_rdomain &&
1804 	    (ifp->if_flags & IFF_LOOPBACK) &&
1805 	    (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1806 		return (EPERM);
1807 
1808 	if (!rtable_exists(rdomain))
1809 		return (ESRCH);
1810 
1811 	/* make sure that the routing table is a real rdomain */
1812 	if (rdomain != rtable_l2(rdomain))
1813 		return (EINVAL);
1814 
1815 	if (rdomain != ifp->if_rdomain) {
1816 		s = splnet();
1817 		/*
1818 		 * We are tearing down the world.
1819 		 * Take down the IF so:
1820 		 * 1. everything that cares gets a message
1821 		 * 2. the automagic IPv6 bits are recreated
1822 		 */
1823 		if (ifp->if_flags & IFF_UP) {
1824 			up = 1;
1825 			if_down(ifp);
1826 		}
1827 		rti_delete(ifp);
1828 #ifdef MROUTING
1829 		vif_delete(ifp);
1830 #endif
1831 		in_ifdetach(ifp);
1832 #ifdef INET6
1833 		in6_ifdetach(ifp);
1834 #endif
1835 		splx(s);
1836 	}
1837 
1838 	/* Let devices like enc(4) or mpe(4) know about the change */
1839 	ifr.ifr_rdomainid = rdomain;
1840 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
1841 	    (caddr_t)&ifr)) != ENOTTY)
1842 		return (error);
1843 	error = 0;
1844 
1845 	/* Add interface to the specified rdomain */
1846 	ifp->if_rdomain = rdomain;
1847 
1848 	/* If we took down the IF, bring it back */
1849 	if (up) {
1850 		s = splnet();
1851 		if_up(ifp);
1852 		splx(s);
1853 	}
1854 
1855 	return (0);
1856 }
1857 
1858 /*
1859  * Interface ioctls.
1860  */
1861 int
1862 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1863 {
1864 	struct ifnet *ifp;
1865 	struct ifreq *ifr = (struct ifreq *)data;
1866 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
1867 	struct if_afreq *ifar = (struct if_afreq *)data;
1868 	char ifdescrbuf[IFDESCRSIZE];
1869 	char ifrtlabelbuf[RTLABEL_LEN];
1870 	int s, error = 0, oif_xflags;
1871 	size_t bytesdone;
1872 	unsigned short oif_flags;
1873 
1874 	switch (cmd) {
1875 	case SIOCIFCREATE:
1876 		if ((error = suser(p)) != 0)
1877 			return (error);
1878 		error = if_clone_create(ifr->ifr_name, 0);
1879 		return (error);
1880 	case SIOCIFDESTROY:
1881 		if ((error = suser(p)) != 0)
1882 			return (error);
1883 		error = if_clone_destroy(ifr->ifr_name);
1884 		return (error);
1885 	case SIOCSIFGATTR:
1886 		if ((error = suser(p)) != 0)
1887 			return (error);
1888 		NET_LOCK();
1889 		error = if_setgroupattribs(data);
1890 		NET_UNLOCK();
1891 		return (error);
1892 	case SIOCGIFCONF:
1893 	case SIOCIFGCLONERS:
1894 	case SIOCGIFGMEMB:
1895 	case SIOCGIFGATTR:
1896 	case SIOCGIFGLIST:
1897 	case SIOCGIFFLAGS:
1898 	case SIOCGIFXFLAGS:
1899 	case SIOCGIFMETRIC:
1900 	case SIOCGIFMTU:
1901 	case SIOCGIFHARDMTU:
1902 	case SIOCGIFDATA:
1903 	case SIOCGIFDESCR:
1904 	case SIOCGIFRTLABEL:
1905 	case SIOCGIFPRIORITY:
1906 	case SIOCGIFRDOMAIN:
1907 	case SIOCGIFGROUP:
1908 	case SIOCGIFLLPRIO:
1909 		return (ifioctl_get(cmd, data));
1910 	}
1911 
1912 	ifp = ifunit(ifr->ifr_name);
1913 	if (ifp == NULL)
1914 		return (ENXIO);
1915 	oif_flags = ifp->if_flags;
1916 	oif_xflags = ifp->if_xflags;
1917 
1918 	switch (cmd) {
1919 	case SIOCIFAFATTACH:
1920 	case SIOCIFAFDETACH:
1921 		if ((error = suser(p)) != 0)
1922 			break;
1923 		NET_LOCK();
1924 		switch (ifar->ifar_af) {
1925 		case AF_INET:
1926 			/* attach is a noop for AF_INET */
1927 			if (cmd == SIOCIFAFDETACH)
1928 				in_ifdetach(ifp);
1929 			break;
1930 #ifdef INET6
1931 		case AF_INET6:
1932 			if (cmd == SIOCIFAFATTACH)
1933 				error = in6_ifattach(ifp);
1934 			else
1935 				in6_ifdetach(ifp);
1936 			break;
1937 #endif /* INET6 */
1938 		default:
1939 			error = EAFNOSUPPORT;
1940 		}
1941 		NET_UNLOCK();
1942 		break;
1943 
1944 	case SIOCSIFFLAGS:
1945 		if ((error = suser(p)) != 0)
1946 			break;
1947 
1948 		NET_LOCK();
1949 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1950 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
1951 
1952 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1953 		if (error != 0) {
1954 			ifp->if_flags = oif_flags;
1955 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
1956 			s = splnet();
1957 			if (ISSET(ifp->if_flags, IFF_UP))
1958 				if_up(ifp);
1959 			else
1960 				if_down(ifp);
1961 			splx(s);
1962 		}
1963 		NET_UNLOCK();
1964 		break;
1965 
1966 	case SIOCSIFXFLAGS:
1967 		if ((error = suser(p)) != 0)
1968 			break;
1969 
1970 		NET_LOCK();
1971 #ifdef INET6
1972 		if (ISSET(ifr->ifr_flags, IFXF_AUTOCONF6)) {
1973 			error = in6_ifattach(ifp);
1974 			if (error != 0) {
1975 				NET_UNLOCK();
1976 				break;
1977 			}
1978 		}
1979 
1980 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1981 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII)) {
1982 			ifp->if_xflags |= IFXF_INET6_NOSOII;
1983 			in6_soiiupdate(ifp);
1984 		}
1985 
1986 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1987 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII)) {
1988 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
1989 			in6_soiiupdate(ifp);
1990 		}
1991 
1992 #endif	/* INET6 */
1993 
1994 #ifdef MPLS
1995 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
1996 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
1997 			s = splnet();
1998 			ifp->if_xflags |= IFXF_MPLS;
1999 			ifp->if_ll_output = ifp->if_output;
2000 			ifp->if_output = mpls_output;
2001 			splx(s);
2002 		}
2003 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
2004 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
2005 			s = splnet();
2006 			ifp->if_xflags &= ~IFXF_MPLS;
2007 			ifp->if_output = ifp->if_ll_output;
2008 			ifp->if_ll_output = NULL;
2009 			splx(s);
2010 		}
2011 #endif	/* MPLS */
2012 
2013 #ifndef SMALL_KERNEL
2014 		if (ifp->if_capabilities & IFCAP_WOL) {
2015 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
2016 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
2017 				s = splnet();
2018 				ifp->if_xflags |= IFXF_WOL;
2019 				error = ifp->if_wol(ifp, 1);
2020 				splx(s);
2021 			}
2022 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
2023 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
2024 				s = splnet();
2025 				ifp->if_xflags &= ~IFXF_WOL;
2026 				error = ifp->if_wol(ifp, 0);
2027 				splx(s);
2028 			}
2029 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
2030 			ifr->ifr_flags &= ~IFXF_WOL;
2031 			error = ENOTSUP;
2032 		}
2033 #endif
2034 
2035 		if (error == 0)
2036 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
2037 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
2038 		NET_UNLOCK();
2039 		break;
2040 
2041 	case SIOCSIFMETRIC:
2042 		if ((error = suser(p)) != 0)
2043 			break;
2044 		NET_LOCK();
2045 		ifp->if_metric = ifr->ifr_metric;
2046 		NET_UNLOCK();
2047 		break;
2048 
2049 	case SIOCSIFMTU:
2050 		if ((error = suser(p)) != 0)
2051 			break;
2052 		NET_LOCK();
2053 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2054 		NET_UNLOCK();
2055 		if (!error)
2056 			rtm_ifchg(ifp);
2057 		break;
2058 
2059 	case SIOCSIFDESCR:
2060 		if ((error = suser(p)) != 0)
2061 			break;
2062 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2063 		    IFDESCRSIZE, &bytesdone);
2064 		if (error == 0) {
2065 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2066 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2067 		}
2068 		break;
2069 
2070 	case SIOCSIFRTLABEL:
2071 		if ((error = suser(p)) != 0)
2072 			break;
2073 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2074 		    RTLABEL_LEN, &bytesdone);
2075 		if (error == 0) {
2076 			rtlabel_unref(ifp->if_rtlabelid);
2077 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2078 		}
2079 		break;
2080 
2081 	case SIOCSIFPRIORITY:
2082 		if ((error = suser(p)) != 0)
2083 			break;
2084 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2085 			error = EINVAL;
2086 			break;
2087 		}
2088 		ifp->if_priority = ifr->ifr_metric;
2089 		break;
2090 
2091 	case SIOCSIFRDOMAIN:
2092 		if ((error = suser(p)) != 0)
2093 			break;
2094 		error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2095 		if (!error || error == EEXIST) {
2096 			NET_LOCK();
2097 			error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2098 			NET_UNLOCK();
2099 		}
2100 		break;
2101 
2102 	case SIOCAIFGROUP:
2103 		if ((error = suser(p)))
2104 			break;
2105 		NET_LOCK();
2106 		error = if_addgroup(ifp, ifgr->ifgr_group);
2107 		if (error == 0) {
2108 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2109 			if (error == ENOTTY)
2110 				error = 0;
2111 		}
2112 		NET_UNLOCK();
2113 		break;
2114 
2115 	case SIOCDIFGROUP:
2116 		if ((error = suser(p)))
2117 			break;
2118 		NET_LOCK();
2119 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2120 		if (error == ENOTTY)
2121 			error = 0;
2122 		if (error == 0)
2123 			error = if_delgroup(ifp, ifgr->ifgr_group);
2124 		NET_UNLOCK();
2125 		break;
2126 
2127 	case SIOCSIFLLADDR:
2128 		if ((error = suser(p)))
2129 			break;
2130 		if ((ifp->if_sadl == NULL) ||
2131 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2132 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2133 			error = EINVAL;
2134 			break;
2135 		}
2136 		NET_LOCK();
2137 		switch (ifp->if_type) {
2138 		case IFT_ETHER:
2139 		case IFT_CARP:
2140 		case IFT_XETHER:
2141 		case IFT_ISO88025:
2142 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2143 			if (error == ENOTTY)
2144 				error = 0;
2145 			if (error == 0)
2146 				error = if_setlladdr(ifp,
2147 				    ifr->ifr_addr.sa_data);
2148 			break;
2149 		default:
2150 			error = ENODEV;
2151 		}
2152 
2153 		if (error == 0)
2154 			ifnewlladdr(ifp);
2155 		NET_UNLOCK();
2156 		break;
2157 
2158 	case SIOCSIFLLPRIO:
2159 		if ((error = suser(p)))
2160 			break;
2161 		if (ifr->ifr_llprio < IFQ_MINPRIO ||
2162 		    ifr->ifr_llprio > IFQ_MAXPRIO) {
2163 			error = EINVAL;
2164 			break;
2165 		}
2166 		NET_LOCK();
2167 		ifp->if_llprio = ifr->ifr_llprio;
2168 		NET_UNLOCK();
2169 		break;
2170 
2171 	case SIOCGIFSFFPAGE:
2172 		error = suser(p);
2173 		if (error != 0)
2174 			break;
2175 
2176 		error = if_sffpage_check(data);
2177 		if (error != 0)
2178 			break;
2179 
2180 		/* don't take NET_LOCK because i2c reads take a long time */
2181 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2182 		break;
2183 
2184 	case SIOCSETKALIVE:
2185 	case SIOCDIFPHYADDR:
2186 	case SIOCSLIFPHYADDR:
2187 	case SIOCSLIFPHYRTABLE:
2188 	case SIOCSLIFPHYTTL:
2189 	case SIOCSLIFPHYDF:
2190 	case SIOCSLIFPHYECN:
2191 	case SIOCADDMULTI:
2192 	case SIOCDELMULTI:
2193 	case SIOCSIFMEDIA:
2194 	case SIOCSVNETID:
2195 	case SIOCSVNETFLOWID:
2196 	case SIOCSTXHPRIO:
2197 	case SIOCSRXHPRIO:
2198 	case SIOCSIFPAIR:
2199 	case SIOCSIFPARENT:
2200 	case SIOCDIFPARENT:
2201 	case SIOCSETMPWCFG:
2202 	case SIOCSETLABEL:
2203 	case SIOCDELLABEL:
2204 	case SIOCSPWE3CTRLWORD:
2205 	case SIOCSPWE3FAT:
2206 	case SIOCSPWE3NEIGHBOR:
2207 	case SIOCDPWE3NEIGHBOR:
2208 		if ((error = suser(p)) != 0)
2209 			break;
2210 		/* FALLTHROUGH */
2211 	default:
2212 		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
2213 			(struct mbuf *) cmd, (struct mbuf *) data,
2214 			(struct mbuf *) ifp, p));
2215 		if (error == EOPNOTSUPP) {
2216 			NET_LOCK();
2217 			error = ((*ifp->if_ioctl)(ifp, cmd, data));
2218 			NET_UNLOCK();
2219 		}
2220 		break;
2221 	}
2222 
2223 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags)
2224 		rtm_ifchg(ifp);
2225 
2226 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2227 		getmicrotime(&ifp->if_lastchange);
2228 
2229 	return (error);
2230 }
2231 
2232 int
2233 ifioctl_get(u_long cmd, caddr_t data)
2234 {
2235 	struct ifnet *ifp;
2236 	struct ifreq *ifr = (struct ifreq *)data;
2237 	char ifdescrbuf[IFDESCRSIZE];
2238 	char ifrtlabelbuf[RTLABEL_LEN];
2239 	int error = 0;
2240 	size_t bytesdone;
2241 	const char *label;
2242 
2243 	switch(cmd) {
2244 	case SIOCGIFCONF:
2245 		NET_RLOCK();
2246 		error = ifconf(data);
2247 		NET_RUNLOCK();
2248 		return (error);
2249 	case SIOCIFGCLONERS:
2250 		error = if_clone_list((struct if_clonereq *)data);
2251 		return (error);
2252 	case SIOCGIFGMEMB:
2253 		NET_RLOCK();
2254 		error = if_getgroupmembers(data);
2255 		NET_RUNLOCK();
2256 		return (error);
2257 	case SIOCGIFGATTR:
2258 		NET_RLOCK();
2259 		error = if_getgroupattribs(data);
2260 		NET_RUNLOCK();
2261 		return (error);
2262 	case SIOCGIFGLIST:
2263 		NET_RLOCK();
2264 		error = if_getgrouplist(data);
2265 		NET_RUNLOCK();
2266 		return (error);
2267 	}
2268 
2269 	ifp = ifunit(ifr->ifr_name);
2270 	if (ifp == NULL)
2271 		return (ENXIO);
2272 
2273 	NET_RLOCK();
2274 
2275 	switch(cmd) {
2276 	case SIOCGIFFLAGS:
2277 		ifr->ifr_flags = ifp->if_flags;
2278 		if (ifq_is_oactive(&ifp->if_snd))
2279 			ifr->ifr_flags |= IFF_OACTIVE;
2280 		break;
2281 
2282 	case SIOCGIFXFLAGS:
2283 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2284 		break;
2285 
2286 	case SIOCGIFMETRIC:
2287 		ifr->ifr_metric = ifp->if_metric;
2288 		break;
2289 
2290 	case SIOCGIFMTU:
2291 		ifr->ifr_mtu = ifp->if_mtu;
2292 		break;
2293 
2294 	case SIOCGIFHARDMTU:
2295 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2296 		break;
2297 
2298 	case SIOCGIFDATA: {
2299 		struct if_data ifdata;
2300 		if_getdata(ifp, &ifdata);
2301 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2302 		break;
2303 	}
2304 
2305 	case SIOCGIFDESCR:
2306 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2307 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2308 		    &bytesdone);
2309 		break;
2310 
2311 	case SIOCGIFRTLABEL:
2312 		if (ifp->if_rtlabelid &&
2313 		    (label = rtlabel_id2name(ifp->if_rtlabelid)) != NULL) {
2314 			strlcpy(ifrtlabelbuf, label, RTLABEL_LEN);
2315 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2316 			    RTLABEL_LEN, &bytesdone);
2317 		} else
2318 			error = ENOENT;
2319 		break;
2320 
2321 	case SIOCGIFPRIORITY:
2322 		ifr->ifr_metric = ifp->if_priority;
2323 		break;
2324 
2325 	case SIOCGIFRDOMAIN:
2326 		ifr->ifr_rdomainid = ifp->if_rdomain;
2327 		break;
2328 
2329 	case SIOCGIFGROUP:
2330 		error = if_getgroup(data, ifp);
2331 		break;
2332 
2333 	case SIOCGIFLLPRIO:
2334 		ifr->ifr_llprio = ifp->if_llprio;
2335 		break;
2336 
2337 	default:
2338 		panic("invalid ioctl %lu", cmd);
2339 	}
2340 
2341 	NET_RUNLOCK();
2342 
2343 	return (error);
2344 }
2345 
2346 static int
2347 if_sffpage_check(const caddr_t data)
2348 {
2349 	const struct if_sffpage *sff = (const struct if_sffpage *)data;
2350 
2351 	switch (sff->sff_addr) {
2352 	case IFSFF_ADDR_EEPROM:
2353 	case IFSFF_ADDR_DDM:
2354 		break;
2355 	default:
2356 		return (EINVAL);
2357 	}
2358 
2359 	return (0);
2360 }
2361 
2362 int
2363 if_txhprio_l2_check(int hdrprio)
2364 {
2365 	switch (hdrprio) {
2366 	case IF_HDRPRIO_PACKET:
2367 		return (0);
2368 	default:
2369 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2370 			return (0);
2371 		break;
2372 	}
2373 
2374 	return (EINVAL);
2375 }
2376 
2377 int
2378 if_txhprio_l3_check(int hdrprio)
2379 {
2380 	switch (hdrprio) {
2381 	case IF_HDRPRIO_PACKET:
2382 	case IF_HDRPRIO_PAYLOAD:
2383 		return (0);
2384 	default:
2385 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2386 			return (0);
2387 		break;
2388 	}
2389 
2390 	return (EINVAL);
2391 }
2392 
2393 int
2394 if_rxhprio_l2_check(int hdrprio)
2395 {
2396 	switch (hdrprio) {
2397 	case IF_HDRPRIO_PACKET:
2398 	case IF_HDRPRIO_OUTER:
2399 		return (0);
2400 	default:
2401 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2402 			return (0);
2403 		break;
2404 	}
2405 
2406 	return (EINVAL);
2407 }
2408 
2409 int
2410 if_rxhprio_l3_check(int hdrprio)
2411 {
2412 	switch (hdrprio) {
2413 	case IF_HDRPRIO_PACKET:
2414 	case IF_HDRPRIO_PAYLOAD:
2415 	case IF_HDRPRIO_OUTER:
2416 		return (0);
2417 	default:
2418 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2419 			return (0);
2420 		break;
2421 	}
2422 
2423 	return (EINVAL);
2424 }
2425 
2426 /*
2427  * Return interface configuration
2428  * of system.  List may be used
2429  * in later ioctl's (above) to get
2430  * other information.
2431  */
2432 int
2433 ifconf(caddr_t data)
2434 {
2435 	struct ifconf *ifc = (struct ifconf *)data;
2436 	struct ifnet *ifp;
2437 	struct ifaddr *ifa;
2438 	struct ifreq ifr, *ifrp;
2439 	int space = ifc->ifc_len, error = 0;
2440 
2441 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2442 	if (space == 0) {
2443 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
2444 			struct sockaddr *sa;
2445 
2446 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2447 				space += sizeof (ifr);
2448 			else
2449 				TAILQ_FOREACH(ifa,
2450 				    &ifp->if_addrlist, ifa_list) {
2451 					sa = ifa->ifa_addr;
2452 					if (sa->sa_len > sizeof(*sa))
2453 						space += sa->sa_len -
2454 						    sizeof(*sa);
2455 					space += sizeof(ifr);
2456 				}
2457 		}
2458 		ifc->ifc_len = space;
2459 		return (0);
2460 	}
2461 
2462 	ifrp = ifc->ifc_req;
2463 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2464 		if (space < sizeof(ifr))
2465 			break;
2466 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2467 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2468 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2469 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2470 			    sizeof(ifr));
2471 			if (error)
2472 				break;
2473 			space -= sizeof (ifr), ifrp++;
2474 		} else
2475 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2476 				struct sockaddr *sa = ifa->ifa_addr;
2477 
2478 				if (space < sizeof(ifr))
2479 					break;
2480 				if (sa->sa_len <= sizeof(*sa)) {
2481 					ifr.ifr_addr = *sa;
2482 					error = copyout((caddr_t)&ifr,
2483 					    (caddr_t)ifrp, sizeof (ifr));
2484 					ifrp++;
2485 				} else {
2486 					space -= sa->sa_len - sizeof(*sa);
2487 					if (space < sizeof (ifr))
2488 						break;
2489 					error = copyout((caddr_t)&ifr,
2490 					    (caddr_t)ifrp,
2491 					    sizeof(ifr.ifr_name));
2492 					if (error == 0)
2493 						error = copyout((caddr_t)sa,
2494 						    (caddr_t)&ifrp->ifr_addr,
2495 						    sa->sa_len);
2496 					ifrp = (struct ifreq *)(sa->sa_len +
2497 					    (caddr_t)&ifrp->ifr_addr);
2498 				}
2499 				if (error)
2500 					break;
2501 				space -= sizeof (ifr);
2502 			}
2503 	}
2504 	ifc->ifc_len -= space;
2505 	return (error);
2506 }
2507 
2508 void
2509 if_counters_alloc(struct ifnet *ifp)
2510 {
2511 	KASSERT(ifp->if_counters == NULL);
2512 
2513 	ifp->if_counters = counters_alloc(ifc_ncounters);
2514 }
2515 
2516 void
2517 if_counters_free(struct ifnet *ifp)
2518 {
2519 	KASSERT(ifp->if_counters != NULL);
2520 
2521 	counters_free(ifp->if_counters, ifc_ncounters);
2522 	ifp->if_counters = NULL;
2523 }
2524 
2525 void
2526 if_getdata(struct ifnet *ifp, struct if_data *data)
2527 {
2528 	unsigned int i;
2529 
2530 	*data = ifp->if_data;
2531 
2532 	if (ifp->if_counters != NULL) {
2533 		uint64_t counters[ifc_ncounters];
2534 
2535 		counters_read(ifp->if_counters, counters, nitems(counters));
2536 
2537 		data->ifi_ipackets += counters[ifc_ipackets];
2538 		data->ifi_ierrors += counters[ifc_ierrors];
2539 		data->ifi_opackets += counters[ifc_opackets];
2540 		data->ifi_oerrors += counters[ifc_oerrors];
2541 		data->ifi_collisions += counters[ifc_collisions];
2542 		data->ifi_ibytes += counters[ifc_ibytes];
2543 		data->ifi_obytes += counters[ifc_obytes];
2544 		data->ifi_imcasts += counters[ifc_imcasts];
2545 		data->ifi_omcasts += counters[ifc_omcasts];
2546 		data->ifi_iqdrops += counters[ifc_iqdrops];
2547 		data->ifi_oqdrops += counters[ifc_oqdrops];
2548 		data->ifi_noproto += counters[ifc_noproto];
2549 	}
2550 
2551 	for (i = 0; i < ifp->if_nifqs; i++) {
2552 		struct ifqueue *ifq = ifp->if_ifqs[i];
2553 
2554 		ifq_add_data(ifq, data);
2555 	}
2556 
2557 	for (i = 0; i < ifp->if_niqs; i++) {
2558 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2559 
2560 		ifiq_add_data(ifiq, data);
2561 	}
2562 }
2563 
2564 /*
2565  * Dummy functions replaced in ifnet during detach (if protocols decide to
2566  * fiddle with the if during detach.
2567  */
2568 void
2569 if_detached_qstart(struct ifqueue *ifq)
2570 {
2571 	ifq_purge(ifq);
2572 }
2573 
2574 int
2575 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2576 {
2577 	return ENODEV;
2578 }
2579 
2580 /*
2581  * Create interface group without members
2582  */
2583 struct ifg_group *
2584 if_creategroup(const char *groupname)
2585 {
2586 	struct ifg_group	*ifg;
2587 
2588 	if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
2589 		return (NULL);
2590 
2591 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2592 	ifg->ifg_refcnt = 0;
2593 	ifg->ifg_carp_demoted = 0;
2594 	TAILQ_INIT(&ifg->ifg_members);
2595 #if NPF > 0
2596 	pfi_attach_ifgroup(ifg);
2597 #endif
2598 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2599 
2600 	return (ifg);
2601 }
2602 
2603 /*
2604  * Add a group to an interface
2605  */
2606 int
2607 if_addgroup(struct ifnet *ifp, const char *groupname)
2608 {
2609 	struct ifg_list		*ifgl;
2610 	struct ifg_group	*ifg = NULL;
2611 	struct ifg_member	*ifgm;
2612 
2613 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
2614 	    groupname[strlen(groupname) - 1] <= '9')
2615 		return (EINVAL);
2616 
2617 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2618 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2619 			return (EEXIST);
2620 
2621 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
2622 		return (ENOMEM);
2623 
2624 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
2625 		free(ifgl, M_TEMP, sizeof(*ifgl));
2626 		return (ENOMEM);
2627 	}
2628 
2629 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2630 		if (!strcmp(ifg->ifg_group, groupname))
2631 			break;
2632 
2633 	if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
2634 		free(ifgl, M_TEMP, sizeof(*ifgl));
2635 		free(ifgm, M_TEMP, sizeof(*ifgm));
2636 		return (ENOMEM);
2637 	}
2638 
2639 	ifg->ifg_refcnt++;
2640 	ifgl->ifgl_group = ifg;
2641 	ifgm->ifgm_ifp = ifp;
2642 
2643 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2644 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2645 
2646 #if NPF > 0
2647 	pfi_group_addmember(groupname, ifp);
2648 #endif
2649 
2650 	return (0);
2651 }
2652 
2653 /*
2654  * Remove a group from an interface
2655  */
2656 int
2657 if_delgroup(struct ifnet *ifp, const char *groupname)
2658 {
2659 	struct ifg_list		*ifgl;
2660 	struct ifg_member	*ifgm;
2661 
2662 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2663 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2664 			break;
2665 	if (ifgl == NULL)
2666 		return (ENOENT);
2667 
2668 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2669 
2670 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2671 		if (ifgm->ifgm_ifp == ifp)
2672 			break;
2673 
2674 	if (ifgm != NULL) {
2675 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2676 		free(ifgm, M_TEMP, sizeof(*ifgm));
2677 	}
2678 
2679 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2680 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2681 #if NPF > 0
2682 		pfi_detach_ifgroup(ifgl->ifgl_group);
2683 #endif
2684 		free(ifgl->ifgl_group, M_TEMP, 0);
2685 	}
2686 
2687 	free(ifgl, M_TEMP, sizeof(*ifgl));
2688 
2689 #if NPF > 0
2690 	pfi_group_change(groupname);
2691 #endif
2692 
2693 	return (0);
2694 }
2695 
2696 /*
2697  * Stores all groups from an interface in memory pointed
2698  * to by data
2699  */
2700 int
2701 if_getgroup(caddr_t data, struct ifnet *ifp)
2702 {
2703 	int			 len, error;
2704 	struct ifg_list		*ifgl;
2705 	struct ifg_req		 ifgrq, *ifgp;
2706 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2707 
2708 	if (ifgr->ifgr_len == 0) {
2709 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2710 			ifgr->ifgr_len += sizeof(struct ifg_req);
2711 		return (0);
2712 	}
2713 
2714 	len = ifgr->ifgr_len;
2715 	ifgp = ifgr->ifgr_groups;
2716 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2717 		if (len < sizeof(ifgrq))
2718 			return (EINVAL);
2719 		bzero(&ifgrq, sizeof ifgrq);
2720 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2721 		    sizeof(ifgrq.ifgrq_group));
2722 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2723 		    sizeof(struct ifg_req))))
2724 			return (error);
2725 		len -= sizeof(ifgrq);
2726 		ifgp++;
2727 	}
2728 
2729 	return (0);
2730 }
2731 
2732 /*
2733  * Stores all members of a group in memory pointed to by data
2734  */
2735 int
2736 if_getgroupmembers(caddr_t data)
2737 {
2738 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2739 	struct ifg_group	*ifg;
2740 	struct ifg_member	*ifgm;
2741 	struct ifg_req		 ifgrq, *ifgp;
2742 	int			 len, error;
2743 
2744 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2745 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2746 			break;
2747 	if (ifg == NULL)
2748 		return (ENOENT);
2749 
2750 	if (ifgr->ifgr_len == 0) {
2751 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2752 			ifgr->ifgr_len += sizeof(ifgrq);
2753 		return (0);
2754 	}
2755 
2756 	len = ifgr->ifgr_len;
2757 	ifgp = ifgr->ifgr_groups;
2758 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
2759 		if (len < sizeof(ifgrq))
2760 			return (EINVAL);
2761 		bzero(&ifgrq, sizeof ifgrq);
2762 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
2763 		    sizeof(ifgrq.ifgrq_member));
2764 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2765 		    sizeof(struct ifg_req))))
2766 			return (error);
2767 		len -= sizeof(ifgrq);
2768 		ifgp++;
2769 	}
2770 
2771 	return (0);
2772 }
2773 
2774 int
2775 if_getgroupattribs(caddr_t data)
2776 {
2777 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2778 	struct ifg_group	*ifg;
2779 
2780 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2781 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2782 			break;
2783 	if (ifg == NULL)
2784 		return (ENOENT);
2785 
2786 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
2787 
2788 	return (0);
2789 }
2790 
2791 int
2792 if_setgroupattribs(caddr_t data)
2793 {
2794 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2795 	struct ifg_group	*ifg;
2796 	struct ifg_member	*ifgm;
2797 	int			 demote;
2798 
2799 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2800 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2801 			break;
2802 	if (ifg == NULL)
2803 		return (ENOENT);
2804 
2805 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
2806 	if (demote + ifg->ifg_carp_demoted > 0xff ||
2807 	    demote + ifg->ifg_carp_demoted < 0)
2808 		return (EINVAL);
2809 
2810 	ifg->ifg_carp_demoted += demote;
2811 
2812 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2813 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
2814 
2815 	return (0);
2816 }
2817 
2818 /*
2819  * Stores all groups in memory pointed to by data
2820  */
2821 int
2822 if_getgrouplist(caddr_t data)
2823 {
2824 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2825 	struct ifg_group	*ifg;
2826 	struct ifg_req		 ifgrq, *ifgp;
2827 	int			 len, error;
2828 
2829 	if (ifgr->ifgr_len == 0) {
2830 		TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2831 			ifgr->ifgr_len += sizeof(ifgrq);
2832 		return (0);
2833 	}
2834 
2835 	len = ifgr->ifgr_len;
2836 	ifgp = ifgr->ifgr_groups;
2837 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
2838 		if (len < sizeof(ifgrq))
2839 			return (EINVAL);
2840 		bzero(&ifgrq, sizeof ifgrq);
2841 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
2842                     sizeof(ifgrq.ifgrq_group));
2843 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2844                     sizeof(struct ifg_req))))
2845 			return (error);
2846 		len -= sizeof(ifgrq);
2847 		ifgp++;
2848 	}
2849 
2850 	return (0);
2851 }
2852 
2853 void
2854 if_group_routechange(struct sockaddr *dst, struct sockaddr *mask)
2855 {
2856 	switch (dst->sa_family) {
2857 	case AF_INET:
2858 		if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
2859 		    mask && (mask->sa_len == 0 ||
2860 		    satosin(mask)->sin_addr.s_addr == INADDR_ANY))
2861 			if_group_egress_build();
2862 		break;
2863 #ifdef INET6
2864 	case AF_INET6:
2865 		if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
2866 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
2867 		    IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
2868 		    &in6addr_any)))
2869 			if_group_egress_build();
2870 		break;
2871 #endif
2872 	}
2873 }
2874 
2875 int
2876 if_group_egress_build(void)
2877 {
2878 	struct ifnet		*ifp;
2879 	struct ifg_group	*ifg;
2880 	struct ifg_member	*ifgm, *next;
2881 	struct sockaddr_in	 sa_in;
2882 #ifdef INET6
2883 	struct sockaddr_in6	 sa_in6;
2884 #endif
2885 	struct rtentry		*rt;
2886 
2887 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2888 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
2889 			break;
2890 
2891 	if (ifg != NULL)
2892 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
2893 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
2894 
2895 	bzero(&sa_in, sizeof(sa_in));
2896 	sa_in.sin_len = sizeof(sa_in);
2897 	sa_in.sin_family = AF_INET;
2898 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
2899 	while (rt != NULL) {
2900 		ifp = if_get(rt->rt_ifidx);
2901 		if (ifp != NULL) {
2902 			if_addgroup(ifp, IFG_EGRESS);
2903 			if_put(ifp);
2904 		}
2905 		rt = rtable_iterate(rt);
2906 	}
2907 
2908 #ifdef INET6
2909 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
2910 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
2911 	    RTP_ANY);
2912 	while (rt != NULL) {
2913 		ifp = if_get(rt->rt_ifidx);
2914 		if (ifp != NULL) {
2915 			if_addgroup(ifp, IFG_EGRESS);
2916 			if_put(ifp);
2917 		}
2918 		rt = rtable_iterate(rt);
2919 	}
2920 #endif /* INET6 */
2921 
2922 	return (0);
2923 }
2924 
2925 /*
2926  * Set/clear promiscuous mode on interface ifp based on the truth value
2927  * of pswitch.  The calls are reference counted so that only the first
2928  * "on" request actually has an effect, as does the final "off" request.
2929  * Results are undefined if the "off" and "on" requests are not matched.
2930  */
2931 int
2932 ifpromisc(struct ifnet *ifp, int pswitch)
2933 {
2934 	struct ifreq ifr;
2935 	unsigned short oif_flags;
2936 	int oif_pcount, error;
2937 
2938 	oif_flags = ifp->if_flags;
2939 	oif_pcount = ifp->if_pcount;
2940 	if (pswitch) {
2941 		if (ifp->if_pcount++ != 0)
2942 			return (0);
2943 		ifp->if_flags |= IFF_PROMISC;
2944 	} else {
2945 		if (--ifp->if_pcount > 0)
2946 			return (0);
2947 		ifp->if_flags &= ~IFF_PROMISC;
2948 	}
2949 
2950 	if ((ifp->if_flags & IFF_UP) == 0)
2951 		return (0);
2952 
2953 	memset(&ifr, 0, sizeof(ifr));
2954 	ifr.ifr_flags = ifp->if_flags;
2955 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
2956 	if (error) {
2957 		ifp->if_flags = oif_flags;
2958 		ifp->if_pcount = oif_pcount;
2959 	}
2960 
2961 	return (error);
2962 }
2963 
2964 void
2965 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
2966 {
2967 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
2968 }
2969 
2970 void
2971 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
2972 {
2973 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
2974 }
2975 
2976 void
2977 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
2978 {
2979 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
2980 		panic("ifa_update_broadaddr does not support dynamic length");
2981 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
2982 }
2983 
2984 #ifdef DDB
2985 /* debug function, can be called from ddb> */
2986 void
2987 ifa_print_all(void)
2988 {
2989 	struct ifnet *ifp;
2990 	struct ifaddr *ifa;
2991 
2992 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2993 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2994 			char addr[INET6_ADDRSTRLEN];
2995 
2996 			switch (ifa->ifa_addr->sa_family) {
2997 			case AF_INET:
2998 				printf("%s", inet_ntop(AF_INET,
2999 				    &satosin(ifa->ifa_addr)->sin_addr,
3000 				    addr, sizeof(addr)));
3001 				break;
3002 #ifdef INET6
3003 			case AF_INET6:
3004 				printf("%s", inet_ntop(AF_INET6,
3005 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
3006 				    addr, sizeof(addr)));
3007 				break;
3008 #endif
3009 			}
3010 			printf(" on %s\n", ifp->if_xname);
3011 		}
3012 	}
3013 }
3014 #endif /* DDB */
3015 
3016 void
3017 ifnewlladdr(struct ifnet *ifp)
3018 {
3019 #ifdef INET6
3020 	struct ifaddr *ifa;
3021 #endif
3022 	struct ifreq ifrq;
3023 	short up;
3024 	int s;
3025 
3026 	s = splnet();
3027 	up = ifp->if_flags & IFF_UP;
3028 
3029 	if (up) {
3030 		/* go down for a moment... */
3031 		ifp->if_flags &= ~IFF_UP;
3032 		ifrq.ifr_flags = ifp->if_flags;
3033 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3034 	}
3035 
3036 	ifp->if_flags |= IFF_UP;
3037 	ifrq.ifr_flags = ifp->if_flags;
3038 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3039 
3040 #ifdef INET6
3041 	/*
3042 	 * Update the link-local address.  Don't do it if we're
3043 	 * a router to avoid confusing hosts on the network.
3044 	 */
3045 	if (!ip6_forwarding) {
3046 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
3047 		if (ifa) {
3048 			in6_purgeaddr(ifa);
3049 			dohooks(ifp->if_addrhooks, 0);
3050 			in6_ifattach(ifp);
3051 		}
3052 	}
3053 #endif
3054 	if (!up) {
3055 		/* go back down */
3056 		ifp->if_flags &= ~IFF_UP;
3057 		ifrq.ifr_flags = ifp->if_flags;
3058 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3059 	}
3060 	splx(s);
3061 }
3062 
3063 int net_ticks;
3064 u_int net_livelocks;
3065 
3066 void
3067 net_tick(void *null)
3068 {
3069 	extern int ticks;
3070 
3071 	if (ticks - net_ticks > 1)
3072 		net_livelocks++;
3073 
3074 	net_ticks = ticks;
3075 
3076 	timeout_add(&net_tick_to, 1);
3077 }
3078 
3079 int
3080 net_livelocked(void)
3081 {
3082 	extern int ticks;
3083 
3084 	return (ticks - net_ticks > 1);
3085 }
3086 
3087 void
3088 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
3089 {
3090 	extern int ticks;
3091 
3092 	memset(rxr, 0, sizeof(*rxr));
3093 
3094 	rxr->rxr_adjusted = ticks;
3095 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
3096 	rxr->rxr_hwm = hwm;
3097 }
3098 
3099 static inline void
3100 if_rxr_adjust_cwm(struct if_rxring *rxr)
3101 {
3102 	extern int ticks;
3103 
3104 	if (net_livelocked()) {
3105 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3106 			rxr->rxr_cwm--;
3107 		else
3108 			return;
3109 	} else if (rxr->rxr_alive >= rxr->rxr_lwm)
3110 		return;
3111 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
3112 		rxr->rxr_cwm++;
3113 
3114 	rxr->rxr_adjusted = ticks;
3115 }
3116 
3117 void
3118 if_rxr_livelocked(struct if_rxring *rxr)
3119 {
3120 	extern int ticks;
3121 
3122 	if (ticks - rxr->rxr_adjusted >= 1) {
3123 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3124 			rxr->rxr_cwm--;
3125 
3126 		rxr->rxr_adjusted = ticks;
3127 	}
3128 }
3129 
3130 u_int
3131 if_rxr_get(struct if_rxring *rxr, u_int max)
3132 {
3133 	extern int ticks;
3134 	u_int diff;
3135 
3136 	if (ticks - rxr->rxr_adjusted >= 1) {
3137 		/* we're free to try for an adjustment */
3138 		if_rxr_adjust_cwm(rxr);
3139 	}
3140 
3141 	if (rxr->rxr_alive >= rxr->rxr_cwm)
3142 		return (0);
3143 
3144 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
3145 	rxr->rxr_alive += diff;
3146 
3147 	return (diff);
3148 }
3149 
3150 int
3151 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
3152 {
3153 	struct if_rxrinfo kifri;
3154 	int error;
3155 	u_int n;
3156 
3157 	error = copyin(uifri, &kifri, sizeof(kifri));
3158 	if (error)
3159 		return (error);
3160 
3161 	n = min(t, kifri.ifri_total);
3162 	kifri.ifri_total = t;
3163 
3164 	if (n > 0) {
3165 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
3166 		if (error)
3167 			return (error);
3168 	}
3169 
3170 	return (copyout(&kifri, uifri, sizeof(kifri)));
3171 }
3172 
3173 int
3174 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
3175     struct if_rxring *rxr)
3176 {
3177 	struct if_rxring_info ifr;
3178 
3179 	memset(&ifr, 0, sizeof(ifr));
3180 
3181 	if (name != NULL)
3182 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3183 
3184 	ifr.ifr_size = size;
3185 	ifr.ifr_info = *rxr;
3186 
3187 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
3188 }
3189 
3190 /*
3191  * Network stack input queues.
3192  */
3193 
3194 void
3195 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3196 {
3197 	mq_init(&niq->ni_q, maxlen, IPL_NET);
3198 	niq->ni_isr = isr;
3199 }
3200 
3201 int
3202 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3203 {
3204 	int rv;
3205 
3206 	rv = mq_enqueue(&niq->ni_q, m);
3207 	if (rv == 0)
3208 		schednetisr(niq->ni_isr);
3209 	else
3210 		if_congestion();
3211 
3212 	return (rv);
3213 }
3214 
3215 int
3216 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3217 {
3218 	int rv;
3219 
3220 	rv = mq_enlist(&niq->ni_q, ml);
3221 	if (rv == 0)
3222 		schednetisr(niq->ni_isr);
3223 	else
3224 		if_congestion();
3225 
3226 	return (rv);
3227 }
3228 
3229 __dead void
3230 unhandled_af(int af)
3231 {
3232 	panic("unhandled af %d", af);
3233 }
3234 
3235 /*
3236  * XXXSMP This tunable is here to work around the fact that IPsec
3237  * globals aren't ready to be accessed by multiple threads in
3238  * parallel.
3239  */
3240 int		 nettaskqs = NET_TASKQ;
3241 
3242 struct taskq *
3243 net_tq(unsigned int ifindex)
3244 {
3245 	struct taskq *t = NULL;
3246 
3247 	t = nettqmp[ifindex % nettaskqs];
3248 
3249 	return (t);
3250 }
3251