xref: /openbsd-src/sys/net/if.c (revision e603c72f713dd59b67030a9b97ec661800da159e)
1 /*	$OpenBSD: if.c,v 1.583 2019/05/12 16:38:02 sashan Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "pfsync.h"
70 #include "ppp.h"
71 #include "pppoe.h"
72 #include "switch.h"
73 #include "trunk.h"
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/mbuf.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/timeout.h>
81 #include <sys/protosw.h>
82 #include <sys/kernel.h>
83 #include <sys/ioctl.h>
84 #include <sys/domain.h>
85 #include <sys/task.h>
86 #include <sys/atomic.h>
87 #include <sys/percpu.h>
88 #include <sys/proc.h>
89 
90 #include <dev/rndvar.h>
91 
92 #include <net/if.h>
93 #include <net/if_dl.h>
94 #include <net/if_types.h>
95 #include <net/route.h>
96 #include <net/netisr.h>
97 
98 #include <netinet/in.h>
99 #include <netinet/if_ether.h>
100 #include <netinet/igmp.h>
101 #ifdef MROUTING
102 #include <netinet/ip_mroute.h>
103 #endif
104 
105 #ifdef INET6
106 #include <netinet6/in6_var.h>
107 #include <netinet6/in6_ifattach.h>
108 #include <netinet6/nd6.h>
109 #include <netinet/ip6.h>
110 #include <netinet6/ip6_var.h>
111 #endif
112 
113 #ifdef MPLS
114 #include <netmpls/mpls.h>
115 #endif
116 
117 #if NBPFILTER > 0
118 #include <net/bpf.h>
119 #endif
120 
121 #if NBRIDGE > 0
122 #include <net/if_bridge.h>
123 #endif
124 
125 #if NCARP > 0
126 #include <netinet/ip_carp.h>
127 #endif
128 
129 #if NPF > 0
130 #include <net/pfvar.h>
131 #endif
132 
133 #include <sys/device.h>
134 
135 void	if_attachsetup(struct ifnet *);
136 void	if_attachdomain(struct ifnet *);
137 void	if_attach_common(struct ifnet *);
138 int	if_createrdomain(int, struct ifnet *);
139 int	if_setrdomain(struct ifnet *, int);
140 void	if_slowtimo(void *);
141 
142 void	if_detached_qstart(struct ifqueue *);
143 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
144 
145 int	ifioctl_get(u_long, caddr_t);
146 int	ifconf(caddr_t);
147 static int
148 	if_sffpage_check(const caddr_t);
149 
150 int	if_getgroup(caddr_t, struct ifnet *);
151 int	if_getgroupmembers(caddr_t);
152 int	if_getgroupattribs(caddr_t);
153 int	if_setgroupattribs(caddr_t);
154 int	if_getgrouplist(caddr_t);
155 
156 void	if_linkstate(struct ifnet *);
157 void	if_linkstate_task(void *);
158 
159 int	if_clone_list(struct if_clonereq *);
160 struct if_clone	*if_clone_lookup(const char *, int *);
161 
162 int	if_group_egress_build(void);
163 
164 void	if_watchdog_task(void *);
165 
166 void	if_netisr(void *);
167 
168 #ifdef DDB
169 void	ifa_print_all(void);
170 #endif
171 
172 void	if_qstart_compat(struct ifqueue *);
173 
174 /*
175  * interface index map
176  *
177  * the kernel maintains a mapping of interface indexes to struct ifnet
178  * pointers.
179  *
180  * the map is an array of struct ifnet pointers prefixed by an if_map
181  * structure. the if_map structure stores the length of its array.
182  *
183  * as interfaces are attached to the system, the map is grown on demand
184  * up to USHRT_MAX entries.
185  *
186  * interface index 0 is reserved and represents no interface. this
187  * supports the use of the interface index as the scope for IPv6 link
188  * local addresses, where scope 0 means no scope has been specified.
189  * it also supports the use of interface index as the unique identifier
190  * for network interfaces in SNMP applications as per RFC2863. therefore
191  * if_get(0) returns NULL.
192  */
193 
194 void if_ifp_dtor(void *, void *);
195 void if_map_dtor(void *, void *);
196 struct ifnet *if_ref(struct ifnet *);
197 
198 /*
199  * struct if_map
200  *
201  * bounded array of ifnet srp pointers used to fetch references of live
202  * interfaces with if_get().
203  */
204 
205 struct if_map {
206 	unsigned long		 limit;
207 	/* followed by limit ifnet srp pointers */
208 };
209 
210 /*
211  * struct if_idxmap
212  *
213  * infrastructure to manage updates and accesses to the current if_map.
214  */
215 
216 struct if_idxmap {
217 	unsigned int		 serial;
218 	unsigned int		 count;
219 	struct srp		 map;
220 };
221 
222 void	if_idxmap_init(unsigned int);
223 void	if_idxmap_insert(struct ifnet *);
224 void	if_idxmap_remove(struct ifnet *);
225 
226 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
227 
228 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
229 int if_cloners_count;
230 
231 struct timeout net_tick_to;
232 void	net_tick(void *);
233 int	net_livelocked(void);
234 int	ifq_congestion;
235 
236 int		 netisr;
237 
238 #define	NET_TASKQ	1
239 struct taskq	*nettqmp[NET_TASKQ];
240 
241 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
242 
243 /*
244  * Serialize socket operations to ensure no new sleeping points
245  * are introduced in IP output paths.
246  */
247 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
248 
249 /*
250  * Network interface utility routines.
251  */
252 void
253 ifinit(void)
254 {
255 	unsigned int	i;
256 
257 	/*
258 	 * most machines boot with 4 or 5 interfaces, so size the initial map
259 	 * to accomodate this
260 	 */
261 	if_idxmap_init(8);
262 
263 	timeout_set(&net_tick_to, net_tick, &net_tick_to);
264 
265 	for (i = 0; i < NET_TASKQ; i++) {
266 		nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
267 		if (nettqmp[i] == NULL)
268 			panic("unable to create network taskq %d", i);
269 	}
270 
271 	net_tick(&net_tick_to);
272 }
273 
274 static struct if_idxmap if_idxmap = {
275 	0,
276 	0,
277 	SRP_INITIALIZER()
278 };
279 
280 struct srp_gc if_ifp_gc = SRP_GC_INITIALIZER(if_ifp_dtor, NULL);
281 struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
282 
283 struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
284 
285 void
286 if_idxmap_init(unsigned int limit)
287 {
288 	struct if_map *if_map;
289 	struct srp *map;
290 	unsigned int i;
291 
292 	if_idxmap.serial = 1; /* skip ifidx 0 so it can return NULL */
293 
294 	if_map = malloc(sizeof(*if_map) + limit * sizeof(*map),
295 	    M_IFADDR, M_WAITOK);
296 
297 	if_map->limit = limit;
298 	map = (struct srp *)(if_map + 1);
299 	for (i = 0; i < limit; i++)
300 		srp_init(&map[i]);
301 
302 	/* this is called early so there's nothing to race with */
303 	srp_update_locked(&if_map_gc, &if_idxmap.map, if_map);
304 }
305 
306 void
307 if_idxmap_insert(struct ifnet *ifp)
308 {
309 	struct if_map *if_map;
310 	struct srp *map;
311 	unsigned int index, i;
312 
313 	refcnt_init(&ifp->if_refcnt);
314 
315 	/* the kernel lock guarantees serialised modifications to if_idxmap */
316 	KERNEL_ASSERT_LOCKED();
317 
318 	if (++if_idxmap.count > USHRT_MAX)
319 		panic("too many interfaces");
320 
321 	if_map = srp_get_locked(&if_idxmap.map);
322 	map = (struct srp *)(if_map + 1);
323 
324 	index = if_idxmap.serial++ & USHRT_MAX;
325 
326 	if (index >= if_map->limit) {
327 		struct if_map *nif_map;
328 		struct srp *nmap;
329 		unsigned int nlimit;
330 		struct ifnet *nifp;
331 
332 		nlimit = if_map->limit * 2;
333 		nif_map = malloc(sizeof(*nif_map) + nlimit * sizeof(*nmap),
334 		    M_IFADDR, M_WAITOK);
335 		nmap = (struct srp *)(nif_map + 1);
336 
337 		nif_map->limit = nlimit;
338 		for (i = 0; i < if_map->limit; i++) {
339 			srp_init(&nmap[i]);
340 			nifp = srp_get_locked(&map[i]);
341 			if (nifp != NULL) {
342 				srp_update_locked(&if_ifp_gc, &nmap[i],
343 				    if_ref(nifp));
344 			}
345 		}
346 
347 		while (i < nlimit) {
348 			srp_init(&nmap[i]);
349 			i++;
350 		}
351 
352 		srp_update_locked(&if_map_gc, &if_idxmap.map, nif_map);
353 		if_map = nif_map;
354 		map = nmap;
355 	}
356 
357 	/* pick the next free index */
358 	for (i = 0; i < USHRT_MAX; i++) {
359 		if (index != 0 && srp_get_locked(&map[index]) == NULL)
360 			break;
361 
362 		index = if_idxmap.serial++ & USHRT_MAX;
363 	}
364 
365 	/* commit */
366 	ifp->if_index = index;
367 	srp_update_locked(&if_ifp_gc, &map[index], if_ref(ifp));
368 }
369 
370 void
371 if_idxmap_remove(struct ifnet *ifp)
372 {
373 	struct if_map *if_map;
374 	struct srp *map;
375 	unsigned int index;
376 
377 	index = ifp->if_index;
378 
379 	/* the kernel lock guarantees serialised modifications to if_idxmap */
380 	KERNEL_ASSERT_LOCKED();
381 
382 	if_map = srp_get_locked(&if_idxmap.map);
383 	KASSERT(index < if_map->limit);
384 
385 	map = (struct srp *)(if_map + 1);
386 	KASSERT(ifp == (struct ifnet *)srp_get_locked(&map[index]));
387 
388 	srp_update_locked(&if_ifp_gc, &map[index], NULL);
389 	if_idxmap.count--;
390 	/* end of if_idxmap modifications */
391 
392 	/* sleep until the last reference is released */
393 	refcnt_finalize(&ifp->if_refcnt, "ifidxrm");
394 }
395 
396 void
397 if_ifp_dtor(void *null, void *ifp)
398 {
399 	if_put(ifp);
400 }
401 
402 void
403 if_map_dtor(void *null, void *m)
404 {
405 	struct if_map *if_map = m;
406 	struct srp *map = (struct srp *)(if_map + 1);
407 	unsigned int i;
408 
409 	/*
410 	 * dont need to serialize the use of update_locked since this is
411 	 * the last reference to this map. there's nothing to race against.
412 	 */
413 	for (i = 0; i < if_map->limit; i++)
414 		srp_update_locked(&if_ifp_gc, &map[i], NULL);
415 
416 	free(if_map, M_IFADDR, sizeof(*if_map) + if_map->limit * sizeof(*map));
417 }
418 
419 /*
420  * Attach an interface to the
421  * list of "active" interfaces.
422  */
423 void
424 if_attachsetup(struct ifnet *ifp)
425 {
426 	unsigned long ifidx;
427 
428 	NET_ASSERT_LOCKED();
429 
430 	TAILQ_INIT(&ifp->if_groups);
431 
432 	if_addgroup(ifp, IFG_ALL);
433 
434 	if_attachdomain(ifp);
435 #if NPF > 0
436 	pfi_attach_ifnet(ifp);
437 #endif
438 
439 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
440 	if_slowtimo(ifp);
441 
442 	if_idxmap_insert(ifp);
443 	KASSERT(if_get(0) == NULL);
444 
445 	ifidx = ifp->if_index;
446 
447 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
448 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
449 
450 	/* Announce the interface. */
451 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
452 }
453 
454 /*
455  * Allocate the link level name for the specified interface.  This
456  * is an attachment helper.  It must be called after ifp->if_addrlen
457  * is initialized, which may not be the case when if_attach() is
458  * called.
459  */
460 void
461 if_alloc_sadl(struct ifnet *ifp)
462 {
463 	unsigned int socksize;
464 	int namelen, masklen;
465 	struct sockaddr_dl *sdl;
466 
467 	/*
468 	 * If the interface already has a link name, release it
469 	 * now.  This is useful for interfaces that can change
470 	 * link types, and thus switch link names often.
471 	 */
472 	if (ifp->if_sadl != NULL)
473 		if_free_sadl(ifp);
474 
475 	namelen = strlen(ifp->if_xname);
476 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
477 	socksize = masklen + ifp->if_addrlen;
478 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
479 	if (socksize < sizeof(*sdl))
480 		socksize = sizeof(*sdl);
481 	socksize = ROUNDUP(socksize);
482 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
483 	sdl->sdl_len = socksize;
484 	sdl->sdl_family = AF_LINK;
485 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
486 	sdl->sdl_nlen = namelen;
487 	sdl->sdl_alen = ifp->if_addrlen;
488 	sdl->sdl_index = ifp->if_index;
489 	sdl->sdl_type = ifp->if_type;
490 	ifp->if_sadl = sdl;
491 }
492 
493 /*
494  * Free the link level name for the specified interface.  This is
495  * a detach helper.  This is called from if_detach() or from
496  * link layer type specific detach functions.
497  */
498 void
499 if_free_sadl(struct ifnet *ifp)
500 {
501 	free(ifp->if_sadl, M_IFADDR, 0);
502 	ifp->if_sadl = NULL;
503 }
504 
505 void
506 if_attachdomain(struct ifnet *ifp)
507 {
508 	struct domain *dp;
509 	int i, s;
510 
511 	s = splnet();
512 
513 	/* address family dependent data region */
514 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
515 	for (i = 0; (dp = domains[i]) != NULL; i++) {
516 		if (dp->dom_ifattach)
517 			ifp->if_afdata[dp->dom_family] =
518 			    (*dp->dom_ifattach)(ifp);
519 	}
520 
521 	splx(s);
522 }
523 
524 void
525 if_attachhead(struct ifnet *ifp)
526 {
527 	if_attach_common(ifp);
528 	NET_LOCK();
529 	TAILQ_INSERT_HEAD(&ifnet, ifp, if_list);
530 	if_attachsetup(ifp);
531 	NET_UNLOCK();
532 }
533 
534 void
535 if_attach(struct ifnet *ifp)
536 {
537 	if_attach_common(ifp);
538 	NET_LOCK();
539 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
540 	if_attachsetup(ifp);
541 	NET_UNLOCK();
542 }
543 
544 void
545 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
546 {
547 	struct ifqueue **map;
548 	struct ifqueue *ifq;
549 	int i;
550 
551 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
552 	KASSERT(nqs != 0);
553 
554 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
555 
556 	ifp->if_snd.ifq_softc = NULL;
557 	map[0] = &ifp->if_snd;
558 
559 	for (i = 1; i < nqs; i++) {
560 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
561 		ifq_set_maxlen(ifq, ifp->if_snd.ifq_maxlen);
562 		ifq_init(ifq, ifp, i);
563 		map[i] = ifq;
564 	}
565 
566 	ifp->if_ifqs = map;
567 	ifp->if_nifqs = nqs;
568 }
569 
570 void
571 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
572 {
573 	struct ifiqueue **map;
574 	struct ifiqueue *ifiq;
575 	unsigned int i;
576 
577 	KASSERT(niqs != 0);
578 
579 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
580 
581 	ifp->if_rcv.ifiq_softc = NULL;
582 	map[0] = &ifp->if_rcv;
583 
584 	for (i = 1; i < niqs; i++) {
585 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
586 		ifiq_init(ifiq, ifp, i);
587 		map[i] = ifiq;
588 	}
589 
590 	ifp->if_iqs = map;
591 	ifp->if_niqs = niqs;
592 }
593 
594 void
595 if_attach_common(struct ifnet *ifp)
596 {
597 	KASSERT(ifp->if_ioctl != NULL);
598 
599 	TAILQ_INIT(&ifp->if_addrlist);
600 	TAILQ_INIT(&ifp->if_maddrlist);
601 
602 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
603 		KASSERTMSG(ifp->if_qstart == NULL,
604 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
605 		ifp->if_qstart = if_qstart_compat;
606 	} else {
607 		KASSERTMSG(ifp->if_start == NULL,
608 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
609 		KASSERTMSG(ifp->if_qstart != NULL,
610 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
611 	}
612 
613 	ifq_init(&ifp->if_snd, ifp, 0);
614 
615 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
616 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
617 	ifp->if_nifqs = 1;
618 	if (ifp->if_txmit == 0)
619 		ifp->if_txmit = IF_TXMIT_DEFAULT;
620 
621 	ifiq_init(&ifp->if_rcv, ifp, 0);
622 
623 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
624 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
625 	ifp->if_niqs = 1;
626 
627 	ifp->if_addrhooks = malloc(sizeof(*ifp->if_addrhooks),
628 	    M_TEMP, M_WAITOK);
629 	TAILQ_INIT(ifp->if_addrhooks);
630 	ifp->if_linkstatehooks = malloc(sizeof(*ifp->if_linkstatehooks),
631 	    M_TEMP, M_WAITOK);
632 	TAILQ_INIT(ifp->if_linkstatehooks);
633 	ifp->if_detachhooks = malloc(sizeof(*ifp->if_detachhooks),
634 	    M_TEMP, M_WAITOK);
635 	TAILQ_INIT(ifp->if_detachhooks);
636 
637 	if (ifp->if_rtrequest == NULL)
638 		ifp->if_rtrequest = if_rtrequest_dummy;
639 	if (ifp->if_enqueue == NULL)
640 		ifp->if_enqueue = if_enqueue_ifq;
641 	ifp->if_llprio = IFQ_DEFPRIO;
642 
643 	SRPL_INIT(&ifp->if_inputs);
644 }
645 
646 void
647 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
648 {
649 	/*
650 	 * only switch the ifq_ops on the first ifq on an interface.
651 	 *
652 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
653 	 * works on a single ifq. because the code uses the ifq_ops
654 	 * on the first ifq (if_snd) to select a queue for an mbuf,
655 	 * by switching only the first one we change both the algorithm
656 	 * and force the routing of all new packets to it.
657 	 */
658 	ifq_attach(&ifp->if_snd, newops, args);
659 }
660 
661 void
662 if_start(struct ifnet *ifp)
663 {
664 	KASSERT(ifp->if_qstart == if_qstart_compat);
665 	if_qstart_compat(&ifp->if_snd);
666 }
667 void
668 if_qstart_compat(struct ifqueue *ifq)
669 {
670 	struct ifnet *ifp = ifq->ifq_if;
671 	int s;
672 
673 	/*
674 	 * the stack assumes that an interface can have multiple
675 	 * transmit rings, but a lot of drivers are still written
676 	 * so that interfaces and send rings have a 1:1 mapping.
677 	 * this provides compatability between the stack and the older
678 	 * drivers by translating from the only queue they have
679 	 * (ifp->if_snd) back to the interface and calling if_start.
680  	 */
681 
682 	KERNEL_LOCK();
683 	s = splnet();
684 	(*ifp->if_start)(ifp);
685 	splx(s);
686 	KERNEL_UNLOCK();
687 }
688 
689 int
690 if_enqueue(struct ifnet *ifp, struct mbuf *m)
691 {
692 #if NPF > 0
693 	if (m->m_pkthdr.pf.delay > 0)
694 		return (pf_delay_pkt(m, ifp->if_index));
695 #endif
696 
697 #if NBRIDGE > 0
698 	if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
699 		int error;
700 
701 		error = bridge_enqueue(ifp, m);
702 		return (error);
703 	}
704 #endif
705 
706 #if NPF > 0
707 	pf_pkt_addr_changed(m);
708 #endif	/* NPF > 0 */
709 
710 	return ((*ifp->if_enqueue)(ifp, m));
711 }
712 
713 int
714 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
715 {
716 	struct ifqueue *ifq = &ifp->if_snd;
717 	int error;
718 
719 	if (ifp->if_nifqs > 1) {
720 		unsigned int idx;
721 
722 		/*
723 		 * use the operations on the first ifq to pick which of
724 		 * the array gets this mbuf.
725 		 */
726 
727 		idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
728 		ifq = ifp->if_ifqs[idx];
729 	}
730 
731 	error = ifq_enqueue(ifq, m);
732 	if (error)
733 		return (error);
734 
735 	ifq_start(ifq);
736 
737 	return (0);
738 }
739 
740 void
741 if_input(struct ifnet *ifp, struct mbuf_list *ml)
742 {
743 	ifiq_input(&ifp->if_rcv, ml);
744 }
745 
746 int
747 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
748 {
749 #if NBPFILTER > 0
750 	/*
751 	 * Only send packets to bpf if they are destinated to local
752 	 * addresses.
753 	 *
754 	 * if_input_local() is also called for SIMPLEX interfaces to
755 	 * duplicate packets for local use.  But don't dup them to bpf.
756 	 */
757 	if (ifp->if_flags & IFF_LOOPBACK) {
758 		caddr_t if_bpf = ifp->if_bpf;
759 
760 		if (if_bpf)
761 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
762 	}
763 #endif
764 	m_resethdr(m);
765 	m->m_flags |= M_LOOP;
766 	m->m_pkthdr.ph_ifidx = ifp->if_index;
767 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
768 
769 	ifp->if_opackets++;
770 	ifp->if_obytes += m->m_pkthdr.len;
771 
772 	ifp->if_ipackets++;
773 	ifp->if_ibytes += m->m_pkthdr.len;
774 
775 	switch (af) {
776 	case AF_INET:
777 		ipv4_input(ifp, m);
778 		break;
779 #ifdef INET6
780 	case AF_INET6:
781 		ipv6_input(ifp, m);
782 		break;
783 #endif /* INET6 */
784 #ifdef MPLS
785 	case AF_MPLS:
786 		mpls_input(ifp, m);
787 		break;
788 #endif /* MPLS */
789 	default:
790 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
791 		m_freem(m);
792 		return (EAFNOSUPPORT);
793 	}
794 
795 	return (0);
796 }
797 
798 int
799 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
800 {
801 	struct ifiqueue *ifiq;
802 	unsigned int flow = 0;
803 
804 	m->m_pkthdr.ph_family = af;
805 	m->m_pkthdr.ph_ifidx = ifp->if_index;
806 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
807 
808 	if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID))
809 		flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK;
810 
811 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
812 
813 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
814 }
815 
816 struct ifih {
817 	SRPL_ENTRY(ifih)	  ifih_next;
818 	int			(*ifih_input)(struct ifnet *, struct mbuf *,
819 				      void *);
820 	void			 *ifih_cookie;
821 	int			  ifih_refcnt;
822 	struct refcnt		  ifih_srpcnt;
823 };
824 
825 void	if_ih_ref(void *, void *);
826 void	if_ih_unref(void *, void *);
827 
828 struct srpl_rc ifih_rc = SRPL_RC_INITIALIZER(if_ih_ref, if_ih_unref, NULL);
829 
830 void
831 if_ih_insert(struct ifnet *ifp, int (*input)(struct ifnet *, struct mbuf *,
832     void *), void *cookie)
833 {
834 	struct ifih *ifih;
835 
836 	/* the kernel lock guarantees serialised modifications to if_inputs */
837 	KERNEL_ASSERT_LOCKED();
838 
839 	SRPL_FOREACH_LOCKED(ifih, &ifp->if_inputs, ifih_next) {
840 		if (ifih->ifih_input == input && ifih->ifih_cookie == cookie) {
841 			ifih->ifih_refcnt++;
842 			break;
843 		}
844 	}
845 
846 	if (ifih == NULL) {
847 		ifih = malloc(sizeof(*ifih), M_DEVBUF, M_WAITOK);
848 
849 		ifih->ifih_input = input;
850 		ifih->ifih_cookie = cookie;
851 		ifih->ifih_refcnt = 1;
852 		refcnt_init(&ifih->ifih_srpcnt);
853 		SRPL_INSERT_HEAD_LOCKED(&ifih_rc, &ifp->if_inputs,
854 		    ifih, ifih_next);
855 	}
856 }
857 
858 void
859 if_ih_ref(void *null, void *i)
860 {
861 	struct ifih *ifih = i;
862 
863 	refcnt_take(&ifih->ifih_srpcnt);
864 }
865 
866 void
867 if_ih_unref(void *null, void *i)
868 {
869 	struct ifih *ifih = i;
870 
871 	refcnt_rele_wake(&ifih->ifih_srpcnt);
872 }
873 
874 void
875 if_ih_remove(struct ifnet *ifp, int (*input)(struct ifnet *, struct mbuf *,
876     void *), void *cookie)
877 {
878 	struct ifih *ifih;
879 
880 	/* the kernel lock guarantees serialised modifications to if_inputs */
881 	KERNEL_ASSERT_LOCKED();
882 
883 	SRPL_FOREACH_LOCKED(ifih, &ifp->if_inputs, ifih_next) {
884 		if (ifih->ifih_input == input && ifih->ifih_cookie == cookie)
885 			break;
886 	}
887 
888 	KASSERT(ifih != NULL);
889 
890 	if (--ifih->ifih_refcnt == 0) {
891 		SRPL_REMOVE_LOCKED(&ifih_rc, &ifp->if_inputs, ifih,
892 		    ifih, ifih_next);
893 
894 		refcnt_finalize(&ifih->ifih_srpcnt, "ifihrm");
895 		free(ifih, M_DEVBUF, sizeof(*ifih));
896 	}
897 }
898 
899 static void
900 if_ih_input(struct ifnet *ifp, struct mbuf *m)
901 {
902 	struct ifih *ifih;
903 	struct srp_ref sr;
904 
905 	/*
906 	 * Pass this mbuf to all input handlers of its
907 	 * interface until it is consumed.
908 	 */
909 	SRPL_FOREACH(ifih, &sr, &ifp->if_inputs, ifih_next) {
910 		if ((*ifih->ifih_input)(ifp, m, ifih->ifih_cookie))
911 			break;
912 	}
913 	SRPL_LEAVE(&sr);
914 
915 	if (ifih == NULL)
916 		m_freem(m);
917 }
918 
919 void
920 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
921 {
922 	struct mbuf *m;
923 
924 	if (ml_empty(ml))
925 		return;
926 
927 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
928 		enqueue_randomness(ml_len(ml));
929 
930 	/*
931 	 * We grab the NET_LOCK() before processing any packet to
932 	 * ensure there's no contention on the routing table lock.
933 	 *
934 	 * Without it we could race with a userland thread to insert
935 	 * a L2 entry in ip{6,}_output().  Such race would result in
936 	 * one of the threads sleeping *inside* the IP output path.
937 	 *
938 	 * Since we have a NET_LOCK() we also use it to serialize access
939 	 * to PF globals, pipex globals, unicast and multicast addresses
940 	 * lists.
941 	 */
942 	NET_RLOCK();
943 	while ((m = ml_dequeue(ml)) != NULL)
944 		if_ih_input(ifp, m);
945 	NET_RUNLOCK();
946 }
947 
948 void
949 if_vinput(struct ifnet *ifp, struct mbuf *m)
950 {
951 #if NBPFILTER > 0
952 	caddr_t if_bpf;
953 #endif
954 
955 	m->m_pkthdr.ph_ifidx = ifp->if_index;
956 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
957 
958 	counters_pkt(ifp->if_counters,
959 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
960 
961 #if NBPFILTER > 0
962 	if_bpf = ifp->if_bpf;
963 	if (if_bpf) {
964 		if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT)) {
965 			m_freem(m);
966 			return;
967 		}
968 	}
969 #endif
970 
971 	if_ih_input(ifp, m);
972 }
973 
974 void
975 if_netisr(void *unused)
976 {
977 	int n, t = 0;
978 
979 	NET_LOCK();
980 
981 	while ((n = netisr) != 0) {
982 		/* Like sched_pause() but with a rwlock dance. */
983 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
984 			NET_UNLOCK();
985 			yield();
986 			NET_LOCK();
987 		}
988 
989 		atomic_clearbits_int(&netisr, n);
990 
991 #if NETHER > 0
992 		if (n & (1 << NETISR_ARP)) {
993 			KERNEL_LOCK();
994 			arpintr();
995 			KERNEL_UNLOCK();
996 		}
997 #endif
998 		if (n & (1 << NETISR_IP))
999 			ipintr();
1000 #ifdef INET6
1001 		if (n & (1 << NETISR_IPV6))
1002 			ip6intr();
1003 #endif
1004 #if NPPP > 0
1005 		if (n & (1 << NETISR_PPP)) {
1006 			KERNEL_LOCK();
1007 			pppintr();
1008 			KERNEL_UNLOCK();
1009 		}
1010 #endif
1011 #if NBRIDGE > 0
1012 		if (n & (1 << NETISR_BRIDGE))
1013 			bridgeintr();
1014 #endif
1015 #if NSWITCH > 0
1016 		if (n & (1 << NETISR_SWITCH)) {
1017 			KERNEL_LOCK();
1018 			switchintr();
1019 			KERNEL_UNLOCK();
1020 		}
1021 #endif
1022 #if NPPPOE > 0
1023 		if (n & (1 << NETISR_PPPOE)) {
1024 			KERNEL_LOCK();
1025 			pppoeintr();
1026 			KERNEL_UNLOCK();
1027 		}
1028 #endif
1029 #ifdef PIPEX
1030 		if (n & (1 << NETISR_PIPEX)) {
1031 			KERNEL_LOCK();
1032 			pipexintr();
1033 			KERNEL_UNLOCK();
1034 		}
1035 #endif
1036 		t |= n;
1037 	}
1038 
1039 #if NPFSYNC > 0
1040 	if (t & (1 << NETISR_PFSYNC)) {
1041 		KERNEL_LOCK();
1042 		pfsyncintr();
1043 		KERNEL_UNLOCK();
1044 	}
1045 #endif
1046 
1047 	NET_UNLOCK();
1048 }
1049 
1050 void
1051 if_deactivate(struct ifnet *ifp)
1052 {
1053 	NET_LOCK();
1054 	/*
1055 	 * Call detach hooks from head to tail.  To make sure detach
1056 	 * hooks are executed in the reverse order they were added, all
1057 	 * the hooks have to be added to the head!
1058 	 */
1059 	dohooks(ifp->if_detachhooks, HOOK_REMOVE | HOOK_FREE);
1060 
1061 	NET_UNLOCK();
1062 }
1063 
1064 /*
1065  * Detach an interface from everything in the kernel.  Also deallocate
1066  * private resources.
1067  */
1068 void
1069 if_detach(struct ifnet *ifp)
1070 {
1071 	struct ifaddr *ifa;
1072 	struct ifg_list *ifg;
1073 	struct domain *dp;
1074 	int i, s;
1075 
1076 	/* Undo pseudo-driver changes. */
1077 	if_deactivate(ifp);
1078 
1079 	ifq_clr_oactive(&ifp->if_snd);
1080 
1081 	/* Other CPUs must not have a reference before we start destroying. */
1082 	if_idxmap_remove(ifp);
1083 
1084 #if NBPFILTER > 0
1085 	bpfdetach(ifp);
1086 #endif
1087 
1088 	NET_LOCK();
1089 	s = splnet();
1090 	ifp->if_qstart = if_detached_qstart;
1091 	ifp->if_ioctl = if_detached_ioctl;
1092 	ifp->if_watchdog = NULL;
1093 
1094 	/* Remove the watchdog timeout & task */
1095 	timeout_del(&ifp->if_slowtimo);
1096 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1097 
1098 	/* Remove the link state task */
1099 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1100 
1101 	rti_delete(ifp);
1102 #if NETHER > 0 && defined(NFSCLIENT)
1103 	if (ifp->if_index == revarp_ifidx)
1104 		revarp_ifidx = 0;
1105 #endif
1106 #ifdef MROUTING
1107 	vif_delete(ifp);
1108 #endif
1109 	in_ifdetach(ifp);
1110 #ifdef INET6
1111 	in6_ifdetach(ifp);
1112 #endif
1113 #if NPF > 0
1114 	pfi_detach_ifnet(ifp);
1115 #endif
1116 
1117 	/* Remove the interface from the list of all interfaces.  */
1118 	TAILQ_REMOVE(&ifnet, ifp, if_list);
1119 
1120 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1121 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1122 
1123 	if_free_sadl(ifp);
1124 
1125 	/* We should not have any address left at this point. */
1126 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1127 #ifdef DIAGNOSTIC
1128 		printf("%s: address list non empty\n", ifp->if_xname);
1129 #endif
1130 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1131 			ifa_del(ifp, ifa);
1132 			ifa->ifa_ifp = NULL;
1133 			ifafree(ifa);
1134 		}
1135 	}
1136 
1137 	free(ifp->if_addrhooks, M_TEMP, 0);
1138 	free(ifp->if_linkstatehooks, M_TEMP, 0);
1139 	free(ifp->if_detachhooks, M_TEMP, 0);
1140 
1141 	for (i = 0; (dp = domains[i]) != NULL; i++) {
1142 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
1143 			(*dp->dom_ifdetach)(ifp,
1144 			    ifp->if_afdata[dp->dom_family]);
1145 	}
1146 
1147 	/* Announce that the interface is gone. */
1148 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1149 	splx(s);
1150 	NET_UNLOCK();
1151 
1152 	if (ifp->if_counters != NULL)
1153 		if_counters_free(ifp);
1154 
1155 	for (i = 0; i < ifp->if_nifqs; i++)
1156 		ifq_destroy(ifp->if_ifqs[i]);
1157 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1158 		for (i = 1; i < ifp->if_nifqs; i++) {
1159 			free(ifp->if_ifqs[i], M_DEVBUF,
1160 			    sizeof(struct ifqueue));
1161 		}
1162 		free(ifp->if_ifqs, M_DEVBUF,
1163 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1164 	}
1165 
1166 	for (i = 0; i < ifp->if_niqs; i++)
1167 		ifiq_destroy(ifp->if_iqs[i]);
1168 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1169 		for (i = 1; i < ifp->if_niqs; i++) {
1170 			free(ifp->if_iqs[i], M_DEVBUF,
1171 			    sizeof(struct ifiqueue));
1172 		}
1173 		free(ifp->if_iqs, M_DEVBUF,
1174 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1175 	}
1176 }
1177 
1178 /*
1179  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1180  */
1181 int
1182 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1183 {
1184 	struct ifnet *ifp;
1185 	int connected = 0;
1186 
1187 	ifp = if_get(ifidx);
1188 	if (ifp == NULL)
1189 		return (0);
1190 
1191 	if (ifp0->if_index == ifp->if_index)
1192 		connected = 1;
1193 
1194 #if NBRIDGE > 0
1195 	if (ifp0->if_bridgeidx != 0 && ifp0->if_bridgeidx == ifp->if_bridgeidx)
1196 		connected = 1;
1197 #endif
1198 #if NCARP > 0
1199 	if ((ifp0->if_type == IFT_CARP && ifp0->if_carpdev == ifp) ||
1200 	    (ifp->if_type == IFT_CARP && ifp->if_carpdev == ifp0))
1201 		connected = 1;
1202 #endif
1203 
1204 	if_put(ifp);
1205 	return (connected);
1206 }
1207 
1208 /*
1209  * Create a clone network interface.
1210  */
1211 int
1212 if_clone_create(const char *name, int rdomain)
1213 {
1214 	struct if_clone *ifc;
1215 	struct ifnet *ifp;
1216 	int unit, ret;
1217 
1218 	ifc = if_clone_lookup(name, &unit);
1219 	if (ifc == NULL)
1220 		return (EINVAL);
1221 
1222 	if (ifunit(name) != NULL)
1223 		return (EEXIST);
1224 
1225 	ret = (*ifc->ifc_create)(ifc, unit);
1226 
1227 	if (ret != 0 || (ifp = ifunit(name)) == NULL)
1228 		return (ret);
1229 
1230 	NET_LOCK();
1231 	if_addgroup(ifp, ifc->ifc_name);
1232 	if (rdomain != 0)
1233 		if_setrdomain(ifp, rdomain);
1234 	NET_UNLOCK();
1235 
1236 	return (ret);
1237 }
1238 
1239 /*
1240  * Destroy a clone network interface.
1241  */
1242 int
1243 if_clone_destroy(const char *name)
1244 {
1245 	struct if_clone *ifc;
1246 	struct ifnet *ifp;
1247 	int ret;
1248 
1249 	ifc = if_clone_lookup(name, NULL);
1250 	if (ifc == NULL)
1251 		return (EINVAL);
1252 
1253 	ifp = ifunit(name);
1254 	if (ifp == NULL)
1255 		return (ENXIO);
1256 
1257 	if (ifc->ifc_destroy == NULL)
1258 		return (EOPNOTSUPP);
1259 
1260 	NET_LOCK();
1261 	if (ifp->if_flags & IFF_UP) {
1262 		int s;
1263 		s = splnet();
1264 		if_down(ifp);
1265 		splx(s);
1266 	}
1267 	NET_UNLOCK();
1268 	ret = (*ifc->ifc_destroy)(ifp);
1269 
1270 	return (ret);
1271 }
1272 
1273 /*
1274  * Look up a network interface cloner.
1275  */
1276 struct if_clone *
1277 if_clone_lookup(const char *name, int *unitp)
1278 {
1279 	struct if_clone *ifc;
1280 	const char *cp;
1281 	int unit;
1282 
1283 	/* separate interface name from unit */
1284 	for (cp = name;
1285 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1286 	    cp++)
1287 		continue;
1288 
1289 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1290 		return (NULL);	/* No name or unit number */
1291 
1292 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1293 		return (NULL);	/* unit number 0 padded */
1294 
1295 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1296 		if (strlen(ifc->ifc_name) == cp - name &&
1297 		    !strncmp(name, ifc->ifc_name, cp - name))
1298 			break;
1299 	}
1300 
1301 	if (ifc == NULL)
1302 		return (NULL);
1303 
1304 	unit = 0;
1305 	while (cp - name < IFNAMSIZ && *cp) {
1306 		if (*cp < '0' || *cp > '9' ||
1307 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1308 			/* Bogus unit number. */
1309 			return (NULL);
1310 		}
1311 		unit = (unit * 10) + (*cp++ - '0');
1312 	}
1313 
1314 	if (unitp != NULL)
1315 		*unitp = unit;
1316 	return (ifc);
1317 }
1318 
1319 /*
1320  * Register a network interface cloner.
1321  */
1322 void
1323 if_clone_attach(struct if_clone *ifc)
1324 {
1325 	/*
1326 	 * we are called at kernel boot by main(), when pseudo devices are
1327 	 * being attached. The main() is the only guy which may alter the
1328 	 * if_cloners. While system is running and main() is done with
1329 	 * initialization, the if_cloners becomes immutable.
1330 	 */
1331 	KASSERT(pdevinit_done == 0);
1332 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1333 	if_cloners_count++;
1334 }
1335 
1336 /*
1337  * Provide list of interface cloners to userspace.
1338  */
1339 int
1340 if_clone_list(struct if_clonereq *ifcr)
1341 {
1342 	char outbuf[IFNAMSIZ], *dst;
1343 	struct if_clone *ifc;
1344 	int count, error = 0;
1345 
1346 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1347 		/* Just asking how many there are. */
1348 		ifcr->ifcr_total = if_cloners_count;
1349 		return (0);
1350 	}
1351 
1352 	if (ifcr->ifcr_count < 0)
1353 		return (EINVAL);
1354 
1355 	ifcr->ifcr_total = if_cloners_count;
1356 	count = MIN(if_cloners_count, ifcr->ifcr_count);
1357 
1358 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1359 		if (count == 0)
1360 			break;
1361 		bzero(outbuf, sizeof outbuf);
1362 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1363 		error = copyout(outbuf, dst, IFNAMSIZ);
1364 		if (error)
1365 			break;
1366 		count--;
1367 		dst += IFNAMSIZ;
1368 	}
1369 
1370 	return (error);
1371 }
1372 
1373 /*
1374  * set queue congestion marker
1375  */
1376 void
1377 if_congestion(void)
1378 {
1379 	extern int ticks;
1380 
1381 	ifq_congestion = ticks;
1382 }
1383 
1384 int
1385 if_congested(void)
1386 {
1387 	extern int ticks;
1388 	int diff;
1389 
1390 	diff = ticks - ifq_congestion;
1391 	if (diff < 0) {
1392 		ifq_congestion = ticks - hz;
1393 		return (0);
1394 	}
1395 
1396 	return (diff <= (hz / 100));
1397 }
1398 
1399 #define	equal(a1, a2)	\
1400 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1401 	(a1)->sa_len) == 0)
1402 
1403 /*
1404  * Locate an interface based on a complete address.
1405  */
1406 struct ifaddr *
1407 ifa_ifwithaddr(struct sockaddr *addr, u_int rtableid)
1408 {
1409 	struct ifnet *ifp;
1410 	struct ifaddr *ifa;
1411 	u_int rdomain;
1412 
1413 	rdomain = rtable_l2(rtableid);
1414 	KERNEL_LOCK();
1415 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1416 		if (ifp->if_rdomain != rdomain)
1417 			continue;
1418 
1419 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1420 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1421 				continue;
1422 
1423 			if (equal(addr, ifa->ifa_addr)) {
1424 				KERNEL_UNLOCK();
1425 				return (ifa);
1426 			}
1427 		}
1428 	}
1429 	KERNEL_UNLOCK();
1430 	return (NULL);
1431 }
1432 
1433 /*
1434  * Locate the point to point interface with a given destination address.
1435  */
1436 struct ifaddr *
1437 ifa_ifwithdstaddr(struct sockaddr *addr, u_int rdomain)
1438 {
1439 	struct ifnet *ifp;
1440 	struct ifaddr *ifa;
1441 
1442 	rdomain = rtable_l2(rdomain);
1443 	KERNEL_LOCK();
1444 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1445 		if (ifp->if_rdomain != rdomain)
1446 			continue;
1447 		if (ifp->if_flags & IFF_POINTOPOINT) {
1448 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1449 				if (ifa->ifa_addr->sa_family !=
1450 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1451 					continue;
1452 				if (equal(addr, ifa->ifa_dstaddr)) {
1453 					KERNEL_UNLOCK();
1454 					return (ifa);
1455 				}
1456 			}
1457 		}
1458 	}
1459 	KERNEL_UNLOCK();
1460 	return (NULL);
1461 }
1462 
1463 /*
1464  * Find an interface address specific to an interface best matching
1465  * a given address.
1466  */
1467 struct ifaddr *
1468 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1469 {
1470 	struct ifaddr *ifa;
1471 	char *cp, *cp2, *cp3;
1472 	char *cplim;
1473 	struct ifaddr *ifa_maybe = NULL;
1474 	u_int af = addr->sa_family;
1475 
1476 	if (af >= AF_MAX)
1477 		return (NULL);
1478 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1479 		if (ifa->ifa_addr->sa_family != af)
1480 			continue;
1481 		if (ifa_maybe == NULL)
1482 			ifa_maybe = ifa;
1483 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1484 			if (equal(addr, ifa->ifa_addr) ||
1485 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1486 				return (ifa);
1487 			continue;
1488 		}
1489 		cp = addr->sa_data;
1490 		cp2 = ifa->ifa_addr->sa_data;
1491 		cp3 = ifa->ifa_netmask->sa_data;
1492 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1493 		for (; cp3 < cplim; cp3++)
1494 			if ((*cp++ ^ *cp2++) & *cp3)
1495 				break;
1496 		if (cp3 == cplim)
1497 			return (ifa);
1498 	}
1499 	return (ifa_maybe);
1500 }
1501 
1502 void
1503 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1504 {
1505 }
1506 
1507 /*
1508  * Default action when installing a local route on a point-to-point
1509  * interface.
1510  */
1511 void
1512 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1513 {
1514 	struct ifnet *lo0ifp;
1515 	struct ifaddr *ifa, *lo0ifa;
1516 
1517 	switch (req) {
1518 	case RTM_ADD:
1519 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1520 			break;
1521 
1522 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1523 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1524 			    rt_key(rt)->sa_len) == 0)
1525 				break;
1526 		}
1527 
1528 		if (ifa == NULL)
1529 			break;
1530 
1531 		KASSERT(ifa == rt->rt_ifa);
1532 
1533 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1534 		KASSERT(lo0ifp != NULL);
1535 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1536 			if (lo0ifa->ifa_addr->sa_family ==
1537 			    ifa->ifa_addr->sa_family)
1538 				break;
1539 		}
1540 		if_put(lo0ifp);
1541 
1542 		if (lo0ifa == NULL)
1543 			break;
1544 
1545 		rt->rt_flags &= ~RTF_LLINFO;
1546 		break;
1547 	case RTM_DELETE:
1548 	case RTM_RESOLVE:
1549 	default:
1550 		break;
1551 	}
1552 }
1553 
1554 
1555 /*
1556  * Bring down all interfaces
1557  */
1558 void
1559 if_downall(void)
1560 {
1561 	struct ifreq ifrq;	/* XXX only partly built */
1562 	struct ifnet *ifp;
1563 
1564 	NET_LOCK();
1565 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1566 		if ((ifp->if_flags & IFF_UP) == 0)
1567 			continue;
1568 		if_down(ifp);
1569 		ifrq.ifr_flags = ifp->if_flags;
1570 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1571 	}
1572 	NET_UNLOCK();
1573 }
1574 
1575 /*
1576  * Mark an interface down and notify protocols of
1577  * the transition.
1578  */
1579 void
1580 if_down(struct ifnet *ifp)
1581 {
1582 	NET_ASSERT_LOCKED();
1583 
1584 	ifp->if_flags &= ~IFF_UP;
1585 	getmicrotime(&ifp->if_lastchange);
1586 	IFQ_PURGE(&ifp->if_snd);
1587 
1588 	if_linkstate(ifp);
1589 }
1590 
1591 /*
1592  * Mark an interface up and notify protocols of
1593  * the transition.
1594  */
1595 void
1596 if_up(struct ifnet *ifp)
1597 {
1598 	NET_ASSERT_LOCKED();
1599 
1600 	ifp->if_flags |= IFF_UP;
1601 	getmicrotime(&ifp->if_lastchange);
1602 
1603 #ifdef INET6
1604 	/* Userland expects the kernel to set ::1 on default lo(4). */
1605 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1606 		in6_ifattach(ifp);
1607 #endif
1608 
1609 	if_linkstate(ifp);
1610 }
1611 
1612 /*
1613  * Notify userland, the routing table and hooks owner of
1614  * a link-state transition.
1615  */
1616 void
1617 if_linkstate_task(void *xifidx)
1618 {
1619 	unsigned int ifidx = (unsigned long)xifidx;
1620 	struct ifnet *ifp;
1621 
1622 	KERNEL_LOCK();
1623 	NET_LOCK();
1624 
1625 	ifp = if_get(ifidx);
1626 	if (ifp != NULL)
1627 		if_linkstate(ifp);
1628 	if_put(ifp);
1629 
1630 	NET_UNLOCK();
1631 	KERNEL_UNLOCK();
1632 }
1633 
1634 void
1635 if_linkstate(struct ifnet *ifp)
1636 {
1637 	NET_ASSERT_LOCKED();
1638 
1639 	rtm_ifchg(ifp);
1640 	rt_if_track(ifp);
1641 	dohooks(ifp->if_linkstatehooks, 0);
1642 }
1643 
1644 /*
1645  * Schedule a link state change task.
1646  */
1647 void
1648 if_link_state_change(struct ifnet *ifp)
1649 {
1650 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1651 }
1652 
1653 /*
1654  * Handle interface watchdog timer routine.  Called
1655  * from softclock, we decrement timer (if set) and
1656  * call the appropriate interface routine on expiration.
1657  */
1658 void
1659 if_slowtimo(void *arg)
1660 {
1661 	struct ifnet *ifp = arg;
1662 	int s = splnet();
1663 
1664 	if (ifp->if_watchdog) {
1665 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1666 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1667 		timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO);
1668 	}
1669 	splx(s);
1670 }
1671 
1672 void
1673 if_watchdog_task(void *xifidx)
1674 {
1675 	unsigned int ifidx = (unsigned long)xifidx;
1676 	struct ifnet *ifp;
1677 	int s;
1678 
1679 	ifp = if_get(ifidx);
1680 	if (ifp == NULL)
1681 		return;
1682 
1683 	KERNEL_LOCK();
1684 	s = splnet();
1685 	if (ifp->if_watchdog)
1686 		(*ifp->if_watchdog)(ifp);
1687 	splx(s);
1688 	KERNEL_UNLOCK();
1689 
1690 	if_put(ifp);
1691 }
1692 
1693 /*
1694  * Map interface name to interface structure pointer.
1695  */
1696 struct ifnet *
1697 ifunit(const char *name)
1698 {
1699 	struct ifnet *ifp;
1700 
1701 	KERNEL_ASSERT_LOCKED();
1702 
1703 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1704 		if (strcmp(ifp->if_xname, name) == 0)
1705 			return (ifp);
1706 	}
1707 	return (NULL);
1708 }
1709 
1710 /*
1711  * Map interface index to interface structure pointer.
1712  */
1713 struct ifnet *
1714 if_get(unsigned int index)
1715 {
1716 	struct srp_ref sr;
1717 	struct if_map *if_map;
1718 	struct srp *map;
1719 	struct ifnet *ifp = NULL;
1720 
1721 	if_map = srp_enter(&sr, &if_idxmap.map);
1722 	if (index < if_map->limit) {
1723 		map = (struct srp *)(if_map + 1);
1724 
1725 		ifp = srp_follow(&sr, &map[index]);
1726 		if (ifp != NULL) {
1727 			KASSERT(ifp->if_index == index);
1728 			if_ref(ifp);
1729 		}
1730 	}
1731 	srp_leave(&sr);
1732 
1733 	return (ifp);
1734 }
1735 
1736 struct ifnet *
1737 if_ref(struct ifnet *ifp)
1738 {
1739 	refcnt_take(&ifp->if_refcnt);
1740 
1741 	return (ifp);
1742 }
1743 
1744 void
1745 if_put(struct ifnet *ifp)
1746 {
1747 	if (ifp == NULL)
1748 		return;
1749 
1750 	refcnt_rele_wake(&ifp->if_refcnt);
1751 }
1752 
1753 int
1754 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1755 {
1756 	if (ifp->if_sadl == NULL)
1757 		return (EINVAL);
1758 
1759 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1760 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1761 
1762 	return (0);
1763 }
1764 
1765 int
1766 if_createrdomain(int rdomain, struct ifnet *ifp)
1767 {
1768 	int error;
1769 	struct ifnet *loifp;
1770 	char loifname[IFNAMSIZ];
1771 	unsigned int unit = rdomain;
1772 
1773 	if (!rtable_exists(rdomain) && (error = rtable_add(rdomain)) != 0)
1774 		return (error);
1775 	if (!rtable_empty(rdomain))
1776 		return (EEXIST);
1777 
1778 	/* Create rdomain including its loopback if with unit == rdomain */
1779 	snprintf(loifname, sizeof(loifname), "lo%u", unit);
1780 	error = if_clone_create(loifname, 0);
1781 	if ((loifp = ifunit(loifname)) == NULL)
1782 		return (ENXIO);
1783 	if (error && (ifp != loifp || error != EEXIST))
1784 		return (error);
1785 
1786 	rtable_l2set(rdomain, rdomain, loifp->if_index);
1787 	loifp->if_rdomain = rdomain;
1788 
1789 	return (0);
1790 }
1791 
1792 int
1793 if_setrdomain(struct ifnet *ifp, int rdomain)
1794 {
1795 	struct ifreq ifr;
1796 	int error, up = 0, s;
1797 
1798 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1799 		return (EINVAL);
1800 
1801 	if (rdomain != ifp->if_rdomain &&
1802 	    (ifp->if_flags & IFF_LOOPBACK) &&
1803 	    (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1804 		return (EPERM);
1805 
1806 	if (!rtable_exists(rdomain))
1807 		return (ESRCH);
1808 
1809 	/* make sure that the routing table is a real rdomain */
1810 	if (rdomain != rtable_l2(rdomain))
1811 		return (EINVAL);
1812 
1813 	if (rdomain != ifp->if_rdomain) {
1814 		s = splnet();
1815 		/*
1816 		 * We are tearing down the world.
1817 		 * Take down the IF so:
1818 		 * 1. everything that cares gets a message
1819 		 * 2. the automagic IPv6 bits are recreated
1820 		 */
1821 		if (ifp->if_flags & IFF_UP) {
1822 			up = 1;
1823 			if_down(ifp);
1824 		}
1825 		rti_delete(ifp);
1826 #ifdef MROUTING
1827 		vif_delete(ifp);
1828 #endif
1829 		in_ifdetach(ifp);
1830 #ifdef INET6
1831 		in6_ifdetach(ifp);
1832 #endif
1833 		splx(s);
1834 	}
1835 
1836 	/* Let devices like enc(4) or mpe(4) know about the change */
1837 	ifr.ifr_rdomainid = rdomain;
1838 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
1839 	    (caddr_t)&ifr)) != ENOTTY)
1840 		return (error);
1841 	error = 0;
1842 
1843 	/* Add interface to the specified rdomain */
1844 	ifp->if_rdomain = rdomain;
1845 
1846 	/* If we took down the IF, bring it back */
1847 	if (up) {
1848 		s = splnet();
1849 		if_up(ifp);
1850 		splx(s);
1851 	}
1852 
1853 	return (0);
1854 }
1855 
1856 /*
1857  * Interface ioctls.
1858  */
1859 int
1860 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1861 {
1862 	struct ifnet *ifp;
1863 	struct ifreq *ifr = (struct ifreq *)data;
1864 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
1865 	struct if_afreq *ifar = (struct if_afreq *)data;
1866 	char ifdescrbuf[IFDESCRSIZE];
1867 	char ifrtlabelbuf[RTLABEL_LEN];
1868 	int s, error = 0, oif_xflags;
1869 	size_t bytesdone;
1870 	unsigned short oif_flags;
1871 
1872 	switch (cmd) {
1873 	case SIOCIFCREATE:
1874 		if ((error = suser(p)) != 0)
1875 			return (error);
1876 		error = if_clone_create(ifr->ifr_name, 0);
1877 		return (error);
1878 	case SIOCIFDESTROY:
1879 		if ((error = suser(p)) != 0)
1880 			return (error);
1881 		error = if_clone_destroy(ifr->ifr_name);
1882 		return (error);
1883 	case SIOCSIFGATTR:
1884 		if ((error = suser(p)) != 0)
1885 			return (error);
1886 		NET_LOCK();
1887 		error = if_setgroupattribs(data);
1888 		NET_UNLOCK();
1889 		return (error);
1890 	case SIOCGIFCONF:
1891 	case SIOCIFGCLONERS:
1892 	case SIOCGIFGMEMB:
1893 	case SIOCGIFGATTR:
1894 	case SIOCGIFGLIST:
1895 	case SIOCGIFFLAGS:
1896 	case SIOCGIFXFLAGS:
1897 	case SIOCGIFMETRIC:
1898 	case SIOCGIFMTU:
1899 	case SIOCGIFHARDMTU:
1900 	case SIOCGIFDATA:
1901 	case SIOCGIFDESCR:
1902 	case SIOCGIFRTLABEL:
1903 	case SIOCGIFPRIORITY:
1904 	case SIOCGIFRDOMAIN:
1905 	case SIOCGIFGROUP:
1906 	case SIOCGIFLLPRIO:
1907 		return (ifioctl_get(cmd, data));
1908 	}
1909 
1910 	ifp = ifunit(ifr->ifr_name);
1911 	if (ifp == NULL)
1912 		return (ENXIO);
1913 	oif_flags = ifp->if_flags;
1914 	oif_xflags = ifp->if_xflags;
1915 
1916 	switch (cmd) {
1917 	case SIOCIFAFATTACH:
1918 	case SIOCIFAFDETACH:
1919 		if ((error = suser(p)) != 0)
1920 			break;
1921 		NET_LOCK();
1922 		switch (ifar->ifar_af) {
1923 		case AF_INET:
1924 			/* attach is a noop for AF_INET */
1925 			if (cmd == SIOCIFAFDETACH)
1926 				in_ifdetach(ifp);
1927 			break;
1928 #ifdef INET6
1929 		case AF_INET6:
1930 			if (cmd == SIOCIFAFATTACH)
1931 				error = in6_ifattach(ifp);
1932 			else
1933 				in6_ifdetach(ifp);
1934 			break;
1935 #endif /* INET6 */
1936 		default:
1937 			error = EAFNOSUPPORT;
1938 		}
1939 		NET_UNLOCK();
1940 		break;
1941 
1942 	case SIOCSIFFLAGS:
1943 		if ((error = suser(p)) != 0)
1944 			break;
1945 
1946 		NET_LOCK();
1947 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1948 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
1949 
1950 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1951 		if (error != 0) {
1952 			ifp->if_flags = oif_flags;
1953 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
1954 			s = splnet();
1955 			if (ISSET(ifp->if_flags, IFF_UP))
1956 				if_up(ifp);
1957 			else
1958 				if_down(ifp);
1959 			splx(s);
1960 		}
1961 		NET_UNLOCK();
1962 		break;
1963 
1964 	case SIOCSIFXFLAGS:
1965 		if ((error = suser(p)) != 0)
1966 			break;
1967 
1968 		NET_LOCK();
1969 #ifdef INET6
1970 		if (ISSET(ifr->ifr_flags, IFXF_AUTOCONF6)) {
1971 			error = in6_ifattach(ifp);
1972 			if (error != 0) {
1973 				NET_UNLOCK();
1974 				break;
1975 			}
1976 		}
1977 
1978 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1979 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII)) {
1980 			ifp->if_xflags |= IFXF_INET6_NOSOII;
1981 			in6_soiiupdate(ifp);
1982 		}
1983 
1984 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1985 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII)) {
1986 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
1987 			in6_soiiupdate(ifp);
1988 		}
1989 
1990 #endif	/* INET6 */
1991 
1992 #ifdef MPLS
1993 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
1994 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
1995 			s = splnet();
1996 			ifp->if_xflags |= IFXF_MPLS;
1997 			ifp->if_ll_output = ifp->if_output;
1998 			ifp->if_output = mpls_output;
1999 			splx(s);
2000 		}
2001 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
2002 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
2003 			s = splnet();
2004 			ifp->if_xflags &= ~IFXF_MPLS;
2005 			ifp->if_output = ifp->if_ll_output;
2006 			ifp->if_ll_output = NULL;
2007 			splx(s);
2008 		}
2009 #endif	/* MPLS */
2010 
2011 #ifndef SMALL_KERNEL
2012 		if (ifp->if_capabilities & IFCAP_WOL) {
2013 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
2014 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
2015 				s = splnet();
2016 				ifp->if_xflags |= IFXF_WOL;
2017 				error = ifp->if_wol(ifp, 1);
2018 				splx(s);
2019 			}
2020 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
2021 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
2022 				s = splnet();
2023 				ifp->if_xflags &= ~IFXF_WOL;
2024 				error = ifp->if_wol(ifp, 0);
2025 				splx(s);
2026 			}
2027 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
2028 			ifr->ifr_flags &= ~IFXF_WOL;
2029 			error = ENOTSUP;
2030 		}
2031 #endif
2032 
2033 		if (error == 0)
2034 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
2035 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
2036 		NET_UNLOCK();
2037 		break;
2038 
2039 	case SIOCSIFMETRIC:
2040 		if ((error = suser(p)) != 0)
2041 			break;
2042 		NET_LOCK();
2043 		ifp->if_metric = ifr->ifr_metric;
2044 		NET_UNLOCK();
2045 		break;
2046 
2047 	case SIOCSIFMTU:
2048 		if ((error = suser(p)) != 0)
2049 			break;
2050 		NET_LOCK();
2051 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2052 		NET_UNLOCK();
2053 		if (!error)
2054 			rtm_ifchg(ifp);
2055 		break;
2056 
2057 	case SIOCSIFDESCR:
2058 		if ((error = suser(p)) != 0)
2059 			break;
2060 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2061 		    IFDESCRSIZE, &bytesdone);
2062 		if (error == 0) {
2063 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2064 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2065 		}
2066 		break;
2067 
2068 	case SIOCSIFRTLABEL:
2069 		if ((error = suser(p)) != 0)
2070 			break;
2071 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2072 		    RTLABEL_LEN, &bytesdone);
2073 		if (error == 0) {
2074 			rtlabel_unref(ifp->if_rtlabelid);
2075 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2076 		}
2077 		break;
2078 
2079 	case SIOCSIFPRIORITY:
2080 		if ((error = suser(p)) != 0)
2081 			break;
2082 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2083 			error = EINVAL;
2084 			break;
2085 		}
2086 		ifp->if_priority = ifr->ifr_metric;
2087 		break;
2088 
2089 	case SIOCSIFRDOMAIN:
2090 		if ((error = suser(p)) != 0)
2091 			break;
2092 		error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2093 		if (!error || error == EEXIST) {
2094 			NET_LOCK();
2095 			error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2096 			NET_UNLOCK();
2097 		}
2098 		break;
2099 
2100 	case SIOCAIFGROUP:
2101 		if ((error = suser(p)))
2102 			break;
2103 		NET_LOCK();
2104 		error = if_addgroup(ifp, ifgr->ifgr_group);
2105 		if (error == 0) {
2106 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2107 			if (error == ENOTTY)
2108 				error = 0;
2109 		}
2110 		NET_UNLOCK();
2111 		break;
2112 
2113 	case SIOCDIFGROUP:
2114 		if ((error = suser(p)))
2115 			break;
2116 		NET_LOCK();
2117 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2118 		if (error == ENOTTY)
2119 			error = 0;
2120 		if (error == 0)
2121 			error = if_delgroup(ifp, ifgr->ifgr_group);
2122 		NET_UNLOCK();
2123 		break;
2124 
2125 	case SIOCSIFLLADDR:
2126 		if ((error = suser(p)))
2127 			break;
2128 		if ((ifp->if_sadl == NULL) ||
2129 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2130 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2131 			error = EINVAL;
2132 			break;
2133 		}
2134 		NET_LOCK();
2135 		switch (ifp->if_type) {
2136 		case IFT_ETHER:
2137 		case IFT_CARP:
2138 		case IFT_XETHER:
2139 		case IFT_ISO88025:
2140 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2141 			if (error == ENOTTY)
2142 				error = 0;
2143 			if (error == 0)
2144 				error = if_setlladdr(ifp,
2145 				    ifr->ifr_addr.sa_data);
2146 			break;
2147 		default:
2148 			error = ENODEV;
2149 		}
2150 
2151 		if (error == 0)
2152 			ifnewlladdr(ifp);
2153 		NET_UNLOCK();
2154 		break;
2155 
2156 	case SIOCSIFLLPRIO:
2157 		if ((error = suser(p)))
2158 			break;
2159 		if (ifr->ifr_llprio < IFQ_MINPRIO ||
2160 		    ifr->ifr_llprio > IFQ_MAXPRIO) {
2161 			error = EINVAL;
2162 			break;
2163 		}
2164 		NET_LOCK();
2165 		ifp->if_llprio = ifr->ifr_llprio;
2166 		NET_UNLOCK();
2167 		break;
2168 
2169 	case SIOCGIFSFFPAGE:
2170 		error = suser(p);
2171 		if (error != 0)
2172 			break;
2173 
2174 		error = if_sffpage_check(data);
2175 		if (error != 0)
2176 			break;
2177 
2178 		/* don't take NET_LOCK because i2c reads take a long time */
2179 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2180 		break;
2181 
2182 	case SIOCSETKALIVE:
2183 	case SIOCDIFPHYADDR:
2184 	case SIOCSLIFPHYADDR:
2185 	case SIOCSLIFPHYRTABLE:
2186 	case SIOCSLIFPHYTTL:
2187 	case SIOCSLIFPHYDF:
2188 	case SIOCSLIFPHYECN:
2189 	case SIOCADDMULTI:
2190 	case SIOCDELMULTI:
2191 	case SIOCSIFMEDIA:
2192 	case SIOCSVNETID:
2193 	case SIOCSVNETFLOWID:
2194 	case SIOCSTXHPRIO:
2195 	case SIOCSRXHPRIO:
2196 	case SIOCSIFPAIR:
2197 	case SIOCSIFPARENT:
2198 	case SIOCDIFPARENT:
2199 	case SIOCSETMPWCFG:
2200 	case SIOCSETLABEL:
2201 	case SIOCDELLABEL:
2202 	case SIOCSPWE3CTRLWORD:
2203 	case SIOCSPWE3FAT:
2204 	case SIOCSPWE3NEIGHBOR:
2205 	case SIOCDPWE3NEIGHBOR:
2206 		if ((error = suser(p)) != 0)
2207 			break;
2208 		/* FALLTHROUGH */
2209 	default:
2210 		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
2211 			(struct mbuf *) cmd, (struct mbuf *) data,
2212 			(struct mbuf *) ifp, p));
2213 		if (error == EOPNOTSUPP) {
2214 			NET_LOCK();
2215 			error = ((*ifp->if_ioctl)(ifp, cmd, data));
2216 			NET_UNLOCK();
2217 		}
2218 		break;
2219 	}
2220 
2221 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags)
2222 		rtm_ifchg(ifp);
2223 
2224 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2225 		getmicrotime(&ifp->if_lastchange);
2226 
2227 	return (error);
2228 }
2229 
2230 int
2231 ifioctl_get(u_long cmd, caddr_t data)
2232 {
2233 	struct ifnet *ifp;
2234 	struct ifreq *ifr = (struct ifreq *)data;
2235 	char ifdescrbuf[IFDESCRSIZE];
2236 	char ifrtlabelbuf[RTLABEL_LEN];
2237 	int error = 0;
2238 	size_t bytesdone;
2239 	const char *label;
2240 
2241 	switch(cmd) {
2242 	case SIOCGIFCONF:
2243 		NET_RLOCK();
2244 		error = ifconf(data);
2245 		NET_RUNLOCK();
2246 		return (error);
2247 	case SIOCIFGCLONERS:
2248 		error = if_clone_list((struct if_clonereq *)data);
2249 		return (error);
2250 	case SIOCGIFGMEMB:
2251 		NET_RLOCK();
2252 		error = if_getgroupmembers(data);
2253 		NET_RUNLOCK();
2254 		return (error);
2255 	case SIOCGIFGATTR:
2256 		NET_RLOCK();
2257 		error = if_getgroupattribs(data);
2258 		NET_RUNLOCK();
2259 		return (error);
2260 	case SIOCGIFGLIST:
2261 		NET_RLOCK();
2262 		error = if_getgrouplist(data);
2263 		NET_RUNLOCK();
2264 		return (error);
2265 	}
2266 
2267 	ifp = ifunit(ifr->ifr_name);
2268 	if (ifp == NULL)
2269 		return (ENXIO);
2270 
2271 	NET_RLOCK();
2272 
2273 	switch(cmd) {
2274 	case SIOCGIFFLAGS:
2275 		ifr->ifr_flags = ifp->if_flags;
2276 		if (ifq_is_oactive(&ifp->if_snd))
2277 			ifr->ifr_flags |= IFF_OACTIVE;
2278 		break;
2279 
2280 	case SIOCGIFXFLAGS:
2281 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2282 		break;
2283 
2284 	case SIOCGIFMETRIC:
2285 		ifr->ifr_metric = ifp->if_metric;
2286 		break;
2287 
2288 	case SIOCGIFMTU:
2289 		ifr->ifr_mtu = ifp->if_mtu;
2290 		break;
2291 
2292 	case SIOCGIFHARDMTU:
2293 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2294 		break;
2295 
2296 	case SIOCGIFDATA: {
2297 		struct if_data ifdata;
2298 		if_getdata(ifp, &ifdata);
2299 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2300 		break;
2301 	}
2302 
2303 	case SIOCGIFDESCR:
2304 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2305 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2306 		    &bytesdone);
2307 		break;
2308 
2309 	case SIOCGIFRTLABEL:
2310 		if (ifp->if_rtlabelid &&
2311 		    (label = rtlabel_id2name(ifp->if_rtlabelid)) != NULL) {
2312 			strlcpy(ifrtlabelbuf, label, RTLABEL_LEN);
2313 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2314 			    RTLABEL_LEN, &bytesdone);
2315 		} else
2316 			error = ENOENT;
2317 		break;
2318 
2319 	case SIOCGIFPRIORITY:
2320 		ifr->ifr_metric = ifp->if_priority;
2321 		break;
2322 
2323 	case SIOCGIFRDOMAIN:
2324 		ifr->ifr_rdomainid = ifp->if_rdomain;
2325 		break;
2326 
2327 	case SIOCGIFGROUP:
2328 		error = if_getgroup(data, ifp);
2329 		break;
2330 
2331 	case SIOCGIFLLPRIO:
2332 		ifr->ifr_llprio = ifp->if_llprio;
2333 		break;
2334 
2335 	default:
2336 		panic("invalid ioctl %lu", cmd);
2337 	}
2338 
2339 	NET_RUNLOCK();
2340 
2341 	return (error);
2342 }
2343 
2344 static int
2345 if_sffpage_check(const caddr_t data)
2346 {
2347 	const struct if_sffpage *sff = (const struct if_sffpage *)data;
2348 
2349 	switch (sff->sff_addr) {
2350 	case IFSFF_ADDR_EEPROM:
2351 	case IFSFF_ADDR_DDM:
2352 		break;
2353 	default:
2354 		return (EINVAL);
2355 	}
2356 
2357 	return (0);
2358 }
2359 
2360 int
2361 if_txhprio_l2_check(int hdrprio)
2362 {
2363 	switch (hdrprio) {
2364 	case IF_HDRPRIO_PACKET:
2365 		return (0);
2366 	default:
2367 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2368 			return (0);
2369 		break;
2370 	}
2371 
2372 	return (EINVAL);
2373 }
2374 
2375 int
2376 if_txhprio_l3_check(int hdrprio)
2377 {
2378 	switch (hdrprio) {
2379 	case IF_HDRPRIO_PACKET:
2380 	case IF_HDRPRIO_PAYLOAD:
2381 		return (0);
2382 	default:
2383 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2384 			return (0);
2385 		break;
2386 	}
2387 
2388 	return (EINVAL);
2389 }
2390 
2391 int
2392 if_rxhprio_l2_check(int hdrprio)
2393 {
2394 	switch (hdrprio) {
2395 	case IF_HDRPRIO_PACKET:
2396 	case IF_HDRPRIO_OUTER:
2397 		return (0);
2398 	default:
2399 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2400 			return (0);
2401 		break;
2402 	}
2403 
2404 	return (EINVAL);
2405 }
2406 
2407 int
2408 if_rxhprio_l3_check(int hdrprio)
2409 {
2410 	switch (hdrprio) {
2411 	case IF_HDRPRIO_PACKET:
2412 	case IF_HDRPRIO_PAYLOAD:
2413 	case IF_HDRPRIO_OUTER:
2414 		return (0);
2415 	default:
2416 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2417 			return (0);
2418 		break;
2419 	}
2420 
2421 	return (EINVAL);
2422 }
2423 
2424 /*
2425  * Return interface configuration
2426  * of system.  List may be used
2427  * in later ioctl's (above) to get
2428  * other information.
2429  */
2430 int
2431 ifconf(caddr_t data)
2432 {
2433 	struct ifconf *ifc = (struct ifconf *)data;
2434 	struct ifnet *ifp;
2435 	struct ifaddr *ifa;
2436 	struct ifreq ifr, *ifrp;
2437 	int space = ifc->ifc_len, error = 0;
2438 
2439 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2440 	if (space == 0) {
2441 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
2442 			struct sockaddr *sa;
2443 
2444 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2445 				space += sizeof (ifr);
2446 			else
2447 				TAILQ_FOREACH(ifa,
2448 				    &ifp->if_addrlist, ifa_list) {
2449 					sa = ifa->ifa_addr;
2450 					if (sa->sa_len > sizeof(*sa))
2451 						space += sa->sa_len -
2452 						    sizeof(*sa);
2453 					space += sizeof(ifr);
2454 				}
2455 		}
2456 		ifc->ifc_len = space;
2457 		return (0);
2458 	}
2459 
2460 	ifrp = ifc->ifc_req;
2461 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2462 		if (space < sizeof(ifr))
2463 			break;
2464 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2465 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2466 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2467 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2468 			    sizeof(ifr));
2469 			if (error)
2470 				break;
2471 			space -= sizeof (ifr), ifrp++;
2472 		} else
2473 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2474 				struct sockaddr *sa = ifa->ifa_addr;
2475 
2476 				if (space < sizeof(ifr))
2477 					break;
2478 				if (sa->sa_len <= sizeof(*sa)) {
2479 					ifr.ifr_addr = *sa;
2480 					error = copyout((caddr_t)&ifr,
2481 					    (caddr_t)ifrp, sizeof (ifr));
2482 					ifrp++;
2483 				} else {
2484 					space -= sa->sa_len - sizeof(*sa);
2485 					if (space < sizeof (ifr))
2486 						break;
2487 					error = copyout((caddr_t)&ifr,
2488 					    (caddr_t)ifrp,
2489 					    sizeof(ifr.ifr_name));
2490 					if (error == 0)
2491 						error = copyout((caddr_t)sa,
2492 						    (caddr_t)&ifrp->ifr_addr,
2493 						    sa->sa_len);
2494 					ifrp = (struct ifreq *)(sa->sa_len +
2495 					    (caddr_t)&ifrp->ifr_addr);
2496 				}
2497 				if (error)
2498 					break;
2499 				space -= sizeof (ifr);
2500 			}
2501 	}
2502 	ifc->ifc_len -= space;
2503 	return (error);
2504 }
2505 
2506 void
2507 if_counters_alloc(struct ifnet *ifp)
2508 {
2509 	KASSERT(ifp->if_counters == NULL);
2510 
2511 	ifp->if_counters = counters_alloc(ifc_ncounters);
2512 }
2513 
2514 void
2515 if_counters_free(struct ifnet *ifp)
2516 {
2517 	KASSERT(ifp->if_counters != NULL);
2518 
2519 	counters_free(ifp->if_counters, ifc_ncounters);
2520 	ifp->if_counters = NULL;
2521 }
2522 
2523 void
2524 if_getdata(struct ifnet *ifp, struct if_data *data)
2525 {
2526 	unsigned int i;
2527 
2528 	*data = ifp->if_data;
2529 
2530 	if (ifp->if_counters != NULL) {
2531 		uint64_t counters[ifc_ncounters];
2532 
2533 		counters_read(ifp->if_counters, counters, nitems(counters));
2534 
2535 		data->ifi_ipackets += counters[ifc_ipackets];
2536 		data->ifi_ierrors += counters[ifc_ierrors];
2537 		data->ifi_opackets += counters[ifc_opackets];
2538 		data->ifi_oerrors += counters[ifc_oerrors];
2539 		data->ifi_collisions += counters[ifc_collisions];
2540 		data->ifi_ibytes += counters[ifc_ibytes];
2541 		data->ifi_obytes += counters[ifc_obytes];
2542 		data->ifi_imcasts += counters[ifc_imcasts];
2543 		data->ifi_omcasts += counters[ifc_omcasts];
2544 		data->ifi_iqdrops += counters[ifc_iqdrops];
2545 		data->ifi_oqdrops += counters[ifc_oqdrops];
2546 		data->ifi_noproto += counters[ifc_noproto];
2547 	}
2548 
2549 	for (i = 0; i < ifp->if_nifqs; i++) {
2550 		struct ifqueue *ifq = ifp->if_ifqs[i];
2551 
2552 		ifq_add_data(ifq, data);
2553 	}
2554 
2555 	for (i = 0; i < ifp->if_niqs; i++) {
2556 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2557 
2558 		ifiq_add_data(ifiq, data);
2559 	}
2560 }
2561 
2562 /*
2563  * Dummy functions replaced in ifnet during detach (if protocols decide to
2564  * fiddle with the if during detach.
2565  */
2566 void
2567 if_detached_qstart(struct ifqueue *ifq)
2568 {
2569 	ifq_purge(ifq);
2570 }
2571 
2572 int
2573 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2574 {
2575 	return ENODEV;
2576 }
2577 
2578 /*
2579  * Create interface group without members
2580  */
2581 struct ifg_group *
2582 if_creategroup(const char *groupname)
2583 {
2584 	struct ifg_group	*ifg;
2585 
2586 	if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
2587 		return (NULL);
2588 
2589 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2590 	ifg->ifg_refcnt = 0;
2591 	ifg->ifg_carp_demoted = 0;
2592 	TAILQ_INIT(&ifg->ifg_members);
2593 #if NPF > 0
2594 	pfi_attach_ifgroup(ifg);
2595 #endif
2596 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2597 
2598 	return (ifg);
2599 }
2600 
2601 /*
2602  * Add a group to an interface
2603  */
2604 int
2605 if_addgroup(struct ifnet *ifp, const char *groupname)
2606 {
2607 	struct ifg_list		*ifgl;
2608 	struct ifg_group	*ifg = NULL;
2609 	struct ifg_member	*ifgm;
2610 
2611 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
2612 	    groupname[strlen(groupname) - 1] <= '9')
2613 		return (EINVAL);
2614 
2615 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2616 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2617 			return (EEXIST);
2618 
2619 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
2620 		return (ENOMEM);
2621 
2622 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
2623 		free(ifgl, M_TEMP, sizeof(*ifgl));
2624 		return (ENOMEM);
2625 	}
2626 
2627 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2628 		if (!strcmp(ifg->ifg_group, groupname))
2629 			break;
2630 
2631 	if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
2632 		free(ifgl, M_TEMP, sizeof(*ifgl));
2633 		free(ifgm, M_TEMP, sizeof(*ifgm));
2634 		return (ENOMEM);
2635 	}
2636 
2637 	ifg->ifg_refcnt++;
2638 	ifgl->ifgl_group = ifg;
2639 	ifgm->ifgm_ifp = ifp;
2640 
2641 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2642 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2643 
2644 #if NPF > 0
2645 	pfi_group_addmember(groupname, ifp);
2646 #endif
2647 
2648 	return (0);
2649 }
2650 
2651 /*
2652  * Remove a group from an interface
2653  */
2654 int
2655 if_delgroup(struct ifnet *ifp, const char *groupname)
2656 {
2657 	struct ifg_list		*ifgl;
2658 	struct ifg_member	*ifgm;
2659 
2660 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2661 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2662 			break;
2663 	if (ifgl == NULL)
2664 		return (ENOENT);
2665 
2666 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2667 
2668 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2669 		if (ifgm->ifgm_ifp == ifp)
2670 			break;
2671 
2672 	if (ifgm != NULL) {
2673 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2674 		free(ifgm, M_TEMP, sizeof(*ifgm));
2675 	}
2676 
2677 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2678 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2679 #if NPF > 0
2680 		pfi_detach_ifgroup(ifgl->ifgl_group);
2681 #endif
2682 		free(ifgl->ifgl_group, M_TEMP, 0);
2683 	}
2684 
2685 	free(ifgl, M_TEMP, sizeof(*ifgl));
2686 
2687 #if NPF > 0
2688 	pfi_group_change(groupname);
2689 #endif
2690 
2691 	return (0);
2692 }
2693 
2694 /*
2695  * Stores all groups from an interface in memory pointed
2696  * to by data
2697  */
2698 int
2699 if_getgroup(caddr_t data, struct ifnet *ifp)
2700 {
2701 	int			 len, error;
2702 	struct ifg_list		*ifgl;
2703 	struct ifg_req		 ifgrq, *ifgp;
2704 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2705 
2706 	if (ifgr->ifgr_len == 0) {
2707 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2708 			ifgr->ifgr_len += sizeof(struct ifg_req);
2709 		return (0);
2710 	}
2711 
2712 	len = ifgr->ifgr_len;
2713 	ifgp = ifgr->ifgr_groups;
2714 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2715 		if (len < sizeof(ifgrq))
2716 			return (EINVAL);
2717 		bzero(&ifgrq, sizeof ifgrq);
2718 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2719 		    sizeof(ifgrq.ifgrq_group));
2720 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2721 		    sizeof(struct ifg_req))))
2722 			return (error);
2723 		len -= sizeof(ifgrq);
2724 		ifgp++;
2725 	}
2726 
2727 	return (0);
2728 }
2729 
2730 /*
2731  * Stores all members of a group in memory pointed to by data
2732  */
2733 int
2734 if_getgroupmembers(caddr_t data)
2735 {
2736 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2737 	struct ifg_group	*ifg;
2738 	struct ifg_member	*ifgm;
2739 	struct ifg_req		 ifgrq, *ifgp;
2740 	int			 len, error;
2741 
2742 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2743 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2744 			break;
2745 	if (ifg == NULL)
2746 		return (ENOENT);
2747 
2748 	if (ifgr->ifgr_len == 0) {
2749 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2750 			ifgr->ifgr_len += sizeof(ifgrq);
2751 		return (0);
2752 	}
2753 
2754 	len = ifgr->ifgr_len;
2755 	ifgp = ifgr->ifgr_groups;
2756 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
2757 		if (len < sizeof(ifgrq))
2758 			return (EINVAL);
2759 		bzero(&ifgrq, sizeof ifgrq);
2760 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
2761 		    sizeof(ifgrq.ifgrq_member));
2762 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2763 		    sizeof(struct ifg_req))))
2764 			return (error);
2765 		len -= sizeof(ifgrq);
2766 		ifgp++;
2767 	}
2768 
2769 	return (0);
2770 }
2771 
2772 int
2773 if_getgroupattribs(caddr_t data)
2774 {
2775 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2776 	struct ifg_group	*ifg;
2777 
2778 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2779 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2780 			break;
2781 	if (ifg == NULL)
2782 		return (ENOENT);
2783 
2784 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
2785 
2786 	return (0);
2787 }
2788 
2789 int
2790 if_setgroupattribs(caddr_t data)
2791 {
2792 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2793 	struct ifg_group	*ifg;
2794 	struct ifg_member	*ifgm;
2795 	int			 demote;
2796 
2797 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2798 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2799 			break;
2800 	if (ifg == NULL)
2801 		return (ENOENT);
2802 
2803 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
2804 	if (demote + ifg->ifg_carp_demoted > 0xff ||
2805 	    demote + ifg->ifg_carp_demoted < 0)
2806 		return (EINVAL);
2807 
2808 	ifg->ifg_carp_demoted += demote;
2809 
2810 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2811 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
2812 
2813 	return (0);
2814 }
2815 
2816 /*
2817  * Stores all groups in memory pointed to by data
2818  */
2819 int
2820 if_getgrouplist(caddr_t data)
2821 {
2822 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2823 	struct ifg_group	*ifg;
2824 	struct ifg_req		 ifgrq, *ifgp;
2825 	int			 len, error;
2826 
2827 	if (ifgr->ifgr_len == 0) {
2828 		TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2829 			ifgr->ifgr_len += sizeof(ifgrq);
2830 		return (0);
2831 	}
2832 
2833 	len = ifgr->ifgr_len;
2834 	ifgp = ifgr->ifgr_groups;
2835 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
2836 		if (len < sizeof(ifgrq))
2837 			return (EINVAL);
2838 		bzero(&ifgrq, sizeof ifgrq);
2839 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
2840                     sizeof(ifgrq.ifgrq_group));
2841 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2842                     sizeof(struct ifg_req))))
2843 			return (error);
2844 		len -= sizeof(ifgrq);
2845 		ifgp++;
2846 	}
2847 
2848 	return (0);
2849 }
2850 
2851 void
2852 if_group_routechange(struct sockaddr *dst, struct sockaddr *mask)
2853 {
2854 	switch (dst->sa_family) {
2855 	case AF_INET:
2856 		if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
2857 		    mask && (mask->sa_len == 0 ||
2858 		    satosin(mask)->sin_addr.s_addr == INADDR_ANY))
2859 			if_group_egress_build();
2860 		break;
2861 #ifdef INET6
2862 	case AF_INET6:
2863 		if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
2864 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
2865 		    IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
2866 		    &in6addr_any)))
2867 			if_group_egress_build();
2868 		break;
2869 #endif
2870 	}
2871 }
2872 
2873 int
2874 if_group_egress_build(void)
2875 {
2876 	struct ifnet		*ifp;
2877 	struct ifg_group	*ifg;
2878 	struct ifg_member	*ifgm, *next;
2879 	struct sockaddr_in	 sa_in;
2880 #ifdef INET6
2881 	struct sockaddr_in6	 sa_in6;
2882 #endif
2883 	struct rtentry		*rt;
2884 
2885 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2886 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
2887 			break;
2888 
2889 	if (ifg != NULL)
2890 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
2891 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
2892 
2893 	bzero(&sa_in, sizeof(sa_in));
2894 	sa_in.sin_len = sizeof(sa_in);
2895 	sa_in.sin_family = AF_INET;
2896 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
2897 	while (rt != NULL) {
2898 		ifp = if_get(rt->rt_ifidx);
2899 		if (ifp != NULL) {
2900 			if_addgroup(ifp, IFG_EGRESS);
2901 			if_put(ifp);
2902 		}
2903 		rt = rtable_iterate(rt);
2904 	}
2905 
2906 #ifdef INET6
2907 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
2908 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
2909 	    RTP_ANY);
2910 	while (rt != NULL) {
2911 		ifp = if_get(rt->rt_ifidx);
2912 		if (ifp != NULL) {
2913 			if_addgroup(ifp, IFG_EGRESS);
2914 			if_put(ifp);
2915 		}
2916 		rt = rtable_iterate(rt);
2917 	}
2918 #endif /* INET6 */
2919 
2920 	return (0);
2921 }
2922 
2923 /*
2924  * Set/clear promiscuous mode on interface ifp based on the truth value
2925  * of pswitch.  The calls are reference counted so that only the first
2926  * "on" request actually has an effect, as does the final "off" request.
2927  * Results are undefined if the "off" and "on" requests are not matched.
2928  */
2929 int
2930 ifpromisc(struct ifnet *ifp, int pswitch)
2931 {
2932 	struct ifreq ifr;
2933 	unsigned short oif_flags;
2934 	int oif_pcount, error;
2935 
2936 	oif_flags = ifp->if_flags;
2937 	oif_pcount = ifp->if_pcount;
2938 	if (pswitch) {
2939 		if (ifp->if_pcount++ != 0)
2940 			return (0);
2941 		ifp->if_flags |= IFF_PROMISC;
2942 	} else {
2943 		if (--ifp->if_pcount > 0)
2944 			return (0);
2945 		ifp->if_flags &= ~IFF_PROMISC;
2946 	}
2947 
2948 	if ((ifp->if_flags & IFF_UP) == 0)
2949 		return (0);
2950 
2951 	memset(&ifr, 0, sizeof(ifr));
2952 	ifr.ifr_flags = ifp->if_flags;
2953 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
2954 	if (error) {
2955 		ifp->if_flags = oif_flags;
2956 		ifp->if_pcount = oif_pcount;
2957 	}
2958 
2959 	return (error);
2960 }
2961 
2962 void
2963 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
2964 {
2965 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
2966 }
2967 
2968 void
2969 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
2970 {
2971 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
2972 }
2973 
2974 void
2975 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
2976 {
2977 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
2978 		panic("ifa_update_broadaddr does not support dynamic length");
2979 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
2980 }
2981 
2982 #ifdef DDB
2983 /* debug function, can be called from ddb> */
2984 void
2985 ifa_print_all(void)
2986 {
2987 	struct ifnet *ifp;
2988 	struct ifaddr *ifa;
2989 
2990 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2991 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2992 			char addr[INET6_ADDRSTRLEN];
2993 
2994 			switch (ifa->ifa_addr->sa_family) {
2995 			case AF_INET:
2996 				printf("%s", inet_ntop(AF_INET,
2997 				    &satosin(ifa->ifa_addr)->sin_addr,
2998 				    addr, sizeof(addr)));
2999 				break;
3000 #ifdef INET6
3001 			case AF_INET6:
3002 				printf("%s", inet_ntop(AF_INET6,
3003 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
3004 				    addr, sizeof(addr)));
3005 				break;
3006 #endif
3007 			}
3008 			printf(" on %s\n", ifp->if_xname);
3009 		}
3010 	}
3011 }
3012 #endif /* DDB */
3013 
3014 void
3015 ifnewlladdr(struct ifnet *ifp)
3016 {
3017 #ifdef INET6
3018 	struct ifaddr *ifa;
3019 #endif
3020 	struct ifreq ifrq;
3021 	short up;
3022 	int s;
3023 
3024 	s = splnet();
3025 	up = ifp->if_flags & IFF_UP;
3026 
3027 	if (up) {
3028 		/* go down for a moment... */
3029 		ifp->if_flags &= ~IFF_UP;
3030 		ifrq.ifr_flags = ifp->if_flags;
3031 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3032 	}
3033 
3034 	ifp->if_flags |= IFF_UP;
3035 	ifrq.ifr_flags = ifp->if_flags;
3036 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3037 
3038 #ifdef INET6
3039 	/*
3040 	 * Update the link-local address.  Don't do it if we're
3041 	 * a router to avoid confusing hosts on the network.
3042 	 */
3043 	if (!ip6_forwarding) {
3044 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
3045 		if (ifa) {
3046 			in6_purgeaddr(ifa);
3047 			dohooks(ifp->if_addrhooks, 0);
3048 			in6_ifattach(ifp);
3049 		}
3050 	}
3051 #endif
3052 	if (!up) {
3053 		/* go back down */
3054 		ifp->if_flags &= ~IFF_UP;
3055 		ifrq.ifr_flags = ifp->if_flags;
3056 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3057 	}
3058 	splx(s);
3059 }
3060 
3061 int net_ticks;
3062 u_int net_livelocks;
3063 
3064 void
3065 net_tick(void *null)
3066 {
3067 	extern int ticks;
3068 
3069 	if (ticks - net_ticks > 1)
3070 		net_livelocks++;
3071 
3072 	net_ticks = ticks;
3073 
3074 	timeout_add(&net_tick_to, 1);
3075 }
3076 
3077 int
3078 net_livelocked(void)
3079 {
3080 	extern int ticks;
3081 
3082 	return (ticks - net_ticks > 1);
3083 }
3084 
3085 void
3086 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
3087 {
3088 	extern int ticks;
3089 
3090 	memset(rxr, 0, sizeof(*rxr));
3091 
3092 	rxr->rxr_adjusted = ticks;
3093 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
3094 	rxr->rxr_hwm = hwm;
3095 }
3096 
3097 static inline void
3098 if_rxr_adjust_cwm(struct if_rxring *rxr)
3099 {
3100 	extern int ticks;
3101 
3102 	if (net_livelocked()) {
3103 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3104 			rxr->rxr_cwm--;
3105 		else
3106 			return;
3107 	} else if (rxr->rxr_alive >= rxr->rxr_lwm)
3108 		return;
3109 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
3110 		rxr->rxr_cwm++;
3111 
3112 	rxr->rxr_adjusted = ticks;
3113 }
3114 
3115 void
3116 if_rxr_livelocked(struct if_rxring *rxr)
3117 {
3118 	extern int ticks;
3119 
3120 	if (ticks - rxr->rxr_adjusted >= 1) {
3121 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3122 			rxr->rxr_cwm--;
3123 
3124 		rxr->rxr_adjusted = ticks;
3125 	}
3126 }
3127 
3128 u_int
3129 if_rxr_get(struct if_rxring *rxr, u_int max)
3130 {
3131 	extern int ticks;
3132 	u_int diff;
3133 
3134 	if (ticks - rxr->rxr_adjusted >= 1) {
3135 		/* we're free to try for an adjustment */
3136 		if_rxr_adjust_cwm(rxr);
3137 	}
3138 
3139 	if (rxr->rxr_alive >= rxr->rxr_cwm)
3140 		return (0);
3141 
3142 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
3143 	rxr->rxr_alive += diff;
3144 
3145 	return (diff);
3146 }
3147 
3148 int
3149 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
3150 {
3151 	struct if_rxrinfo kifri;
3152 	int error;
3153 	u_int n;
3154 
3155 	error = copyin(uifri, &kifri, sizeof(kifri));
3156 	if (error)
3157 		return (error);
3158 
3159 	n = min(t, kifri.ifri_total);
3160 	kifri.ifri_total = t;
3161 
3162 	if (n > 0) {
3163 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
3164 		if (error)
3165 			return (error);
3166 	}
3167 
3168 	return (copyout(&kifri, uifri, sizeof(kifri)));
3169 }
3170 
3171 int
3172 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
3173     struct if_rxring *rxr)
3174 {
3175 	struct if_rxring_info ifr;
3176 
3177 	memset(&ifr, 0, sizeof(ifr));
3178 
3179 	if (name != NULL)
3180 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3181 
3182 	ifr.ifr_size = size;
3183 	ifr.ifr_info = *rxr;
3184 
3185 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
3186 }
3187 
3188 /*
3189  * Network stack input queues.
3190  */
3191 
3192 void
3193 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3194 {
3195 	mq_init(&niq->ni_q, maxlen, IPL_NET);
3196 	niq->ni_isr = isr;
3197 }
3198 
3199 int
3200 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3201 {
3202 	int rv;
3203 
3204 	rv = mq_enqueue(&niq->ni_q, m);
3205 	if (rv == 0)
3206 		schednetisr(niq->ni_isr);
3207 	else
3208 		if_congestion();
3209 
3210 	return (rv);
3211 }
3212 
3213 int
3214 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3215 {
3216 	int rv;
3217 
3218 	rv = mq_enlist(&niq->ni_q, ml);
3219 	if (rv == 0)
3220 		schednetisr(niq->ni_isr);
3221 	else
3222 		if_congestion();
3223 
3224 	return (rv);
3225 }
3226 
3227 __dead void
3228 unhandled_af(int af)
3229 {
3230 	panic("unhandled af %d", af);
3231 }
3232 
3233 /*
3234  * XXXSMP This tunable is here to work around the fact that IPsec
3235  * globals aren't ready to be accessed by multiple threads in
3236  * parallel.
3237  */
3238 int		 nettaskqs = NET_TASKQ;
3239 
3240 struct taskq *
3241 net_tq(unsigned int ifindex)
3242 {
3243 	struct taskq *t = NULL;
3244 
3245 	t = nettqmp[ifindex % nettaskqs];
3246 
3247 	return (t);
3248 }
3249