xref: /openbsd-src/sys/net/if.c (revision 4b70baf6e17fc8b27fc1f7fa7929335753fa94c3)
1 /*	$OpenBSD: if.c,v 1.581 2019/04/28 22:15:57 mpi Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "pfsync.h"
70 #include "ppp.h"
71 #include "pppoe.h"
72 #include "switch.h"
73 #include "trunk.h"
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/mbuf.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/timeout.h>
81 #include <sys/protosw.h>
82 #include <sys/kernel.h>
83 #include <sys/ioctl.h>
84 #include <sys/domain.h>
85 #include <sys/task.h>
86 #include <sys/atomic.h>
87 #include <sys/percpu.h>
88 #include <sys/proc.h>
89 
90 #include <dev/rndvar.h>
91 
92 #include <net/if.h>
93 #include <net/if_dl.h>
94 #include <net/if_types.h>
95 #include <net/route.h>
96 #include <net/netisr.h>
97 
98 #include <netinet/in.h>
99 #include <netinet/if_ether.h>
100 #include <netinet/igmp.h>
101 #ifdef MROUTING
102 #include <netinet/ip_mroute.h>
103 #endif
104 
105 #ifdef INET6
106 #include <netinet6/in6_var.h>
107 #include <netinet6/in6_ifattach.h>
108 #include <netinet6/nd6.h>
109 #include <netinet/ip6.h>
110 #include <netinet6/ip6_var.h>
111 #endif
112 
113 #ifdef MPLS
114 #include <netmpls/mpls.h>
115 #endif
116 
117 #if NBPFILTER > 0
118 #include <net/bpf.h>
119 #endif
120 
121 #if NBRIDGE > 0
122 #include <net/if_bridge.h>
123 #endif
124 
125 #if NCARP > 0
126 #include <netinet/ip_carp.h>
127 #endif
128 
129 #if NPF > 0
130 #include <net/pfvar.h>
131 #endif
132 
133 #include <sys/device.h>
134 
135 void	if_attachsetup(struct ifnet *);
136 void	if_attachdomain(struct ifnet *);
137 void	if_attach_common(struct ifnet *);
138 int	if_createrdomain(int, struct ifnet *);
139 int	if_setrdomain(struct ifnet *, int);
140 void	if_slowtimo(void *);
141 
142 void	if_detached_qstart(struct ifqueue *);
143 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
144 
145 int	ifioctl_get(u_long, caddr_t);
146 int	ifconf(caddr_t);
147 static int
148 	if_sffpage_check(const caddr_t);
149 
150 int	if_getgroup(caddr_t, struct ifnet *);
151 int	if_getgroupmembers(caddr_t);
152 int	if_getgroupattribs(caddr_t);
153 int	if_setgroupattribs(caddr_t);
154 int	if_getgrouplist(caddr_t);
155 
156 void	if_linkstate(struct ifnet *);
157 void	if_linkstate_task(void *);
158 
159 int	if_clone_list(struct if_clonereq *);
160 struct if_clone	*if_clone_lookup(const char *, int *);
161 
162 int	if_group_egress_build(void);
163 
164 void	if_watchdog_task(void *);
165 
166 void	if_netisr(void *);
167 
168 #ifdef DDB
169 void	ifa_print_all(void);
170 #endif
171 
172 void	if_qstart_compat(struct ifqueue *);
173 
174 /*
175  * interface index map
176  *
177  * the kernel maintains a mapping of interface indexes to struct ifnet
178  * pointers.
179  *
180  * the map is an array of struct ifnet pointers prefixed by an if_map
181  * structure. the if_map structure stores the length of its array.
182  *
183  * as interfaces are attached to the system, the map is grown on demand
184  * up to USHRT_MAX entries.
185  *
186  * interface index 0 is reserved and represents no interface. this
187  * supports the use of the interface index as the scope for IPv6 link
188  * local addresses, where scope 0 means no scope has been specified.
189  * it also supports the use of interface index as the unique identifier
190  * for network interfaces in SNMP applications as per RFC2863. therefore
191  * if_get(0) returns NULL.
192  */
193 
194 void if_ifp_dtor(void *, void *);
195 void if_map_dtor(void *, void *);
196 struct ifnet *if_ref(struct ifnet *);
197 
198 /*
199  * struct if_map
200  *
201  * bounded array of ifnet srp pointers used to fetch references of live
202  * interfaces with if_get().
203  */
204 
205 struct if_map {
206 	unsigned long		 limit;
207 	/* followed by limit ifnet srp pointers */
208 };
209 
210 /*
211  * struct if_idxmap
212  *
213  * infrastructure to manage updates and accesses to the current if_map.
214  */
215 
216 struct if_idxmap {
217 	unsigned int		 serial;
218 	unsigned int		 count;
219 	struct srp		 map;
220 };
221 
222 void	if_idxmap_init(unsigned int);
223 void	if_idxmap_insert(struct ifnet *);
224 void	if_idxmap_remove(struct ifnet *);
225 
226 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
227 
228 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
229 int if_cloners_count;
230 
231 struct timeout net_tick_to;
232 void	net_tick(void *);
233 int	net_livelocked(void);
234 int	ifq_congestion;
235 
236 int		 netisr;
237 
238 #define	NET_TASKQ	1
239 struct taskq	*nettqmp[NET_TASKQ];
240 
241 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
242 
243 /*
244  * Serialize socket operations to ensure no new sleeping points
245  * are introduced in IP output paths.
246  */
247 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
248 
249 /*
250  * Network interface utility routines.
251  */
252 void
253 ifinit(void)
254 {
255 	unsigned int	i;
256 
257 	/*
258 	 * most machines boot with 4 or 5 interfaces, so size the initial map
259 	 * to accomodate this
260 	 */
261 	if_idxmap_init(8);
262 
263 	timeout_set(&net_tick_to, net_tick, &net_tick_to);
264 
265 	for (i = 0; i < NET_TASKQ; i++) {
266 		nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
267 		if (nettqmp[i] == NULL)
268 			panic("unable to create network taskq %d", i);
269 	}
270 
271 	net_tick(&net_tick_to);
272 }
273 
274 static struct if_idxmap if_idxmap = {
275 	0,
276 	0,
277 	SRP_INITIALIZER()
278 };
279 
280 struct srp_gc if_ifp_gc = SRP_GC_INITIALIZER(if_ifp_dtor, NULL);
281 struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
282 
283 struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
284 
285 void
286 if_idxmap_init(unsigned int limit)
287 {
288 	struct if_map *if_map;
289 	struct srp *map;
290 	unsigned int i;
291 
292 	if_idxmap.serial = 1; /* skip ifidx 0 so it can return NULL */
293 
294 	if_map = malloc(sizeof(*if_map) + limit * sizeof(*map),
295 	    M_IFADDR, M_WAITOK);
296 
297 	if_map->limit = limit;
298 	map = (struct srp *)(if_map + 1);
299 	for (i = 0; i < limit; i++)
300 		srp_init(&map[i]);
301 
302 	/* this is called early so there's nothing to race with */
303 	srp_update_locked(&if_map_gc, &if_idxmap.map, if_map);
304 }
305 
306 void
307 if_idxmap_insert(struct ifnet *ifp)
308 {
309 	struct if_map *if_map;
310 	struct srp *map;
311 	unsigned int index, i;
312 
313 	refcnt_init(&ifp->if_refcnt);
314 
315 	/* the kernel lock guarantees serialised modifications to if_idxmap */
316 	KERNEL_ASSERT_LOCKED();
317 
318 	if (++if_idxmap.count > USHRT_MAX)
319 		panic("too many interfaces");
320 
321 	if_map = srp_get_locked(&if_idxmap.map);
322 	map = (struct srp *)(if_map + 1);
323 
324 	index = if_idxmap.serial++ & USHRT_MAX;
325 
326 	if (index >= if_map->limit) {
327 		struct if_map *nif_map;
328 		struct srp *nmap;
329 		unsigned int nlimit;
330 		struct ifnet *nifp;
331 
332 		nlimit = if_map->limit * 2;
333 		nif_map = malloc(sizeof(*nif_map) + nlimit * sizeof(*nmap),
334 		    M_IFADDR, M_WAITOK);
335 		nmap = (struct srp *)(nif_map + 1);
336 
337 		nif_map->limit = nlimit;
338 		for (i = 0; i < if_map->limit; i++) {
339 			srp_init(&nmap[i]);
340 			nifp = srp_get_locked(&map[i]);
341 			if (nifp != NULL) {
342 				srp_update_locked(&if_ifp_gc, &nmap[i],
343 				    if_ref(nifp));
344 			}
345 		}
346 
347 		while (i < nlimit) {
348 			srp_init(&nmap[i]);
349 			i++;
350 		}
351 
352 		srp_update_locked(&if_map_gc, &if_idxmap.map, nif_map);
353 		if_map = nif_map;
354 		map = nmap;
355 	}
356 
357 	/* pick the next free index */
358 	for (i = 0; i < USHRT_MAX; i++) {
359 		if (index != 0 && srp_get_locked(&map[index]) == NULL)
360 			break;
361 
362 		index = if_idxmap.serial++ & USHRT_MAX;
363 	}
364 
365 	/* commit */
366 	ifp->if_index = index;
367 	srp_update_locked(&if_ifp_gc, &map[index], if_ref(ifp));
368 }
369 
370 void
371 if_idxmap_remove(struct ifnet *ifp)
372 {
373 	struct if_map *if_map;
374 	struct srp *map;
375 	unsigned int index;
376 
377 	index = ifp->if_index;
378 
379 	/* the kernel lock guarantees serialised modifications to if_idxmap */
380 	KERNEL_ASSERT_LOCKED();
381 
382 	if_map = srp_get_locked(&if_idxmap.map);
383 	KASSERT(index < if_map->limit);
384 
385 	map = (struct srp *)(if_map + 1);
386 	KASSERT(ifp == (struct ifnet *)srp_get_locked(&map[index]));
387 
388 	srp_update_locked(&if_ifp_gc, &map[index], NULL);
389 	if_idxmap.count--;
390 	/* end of if_idxmap modifications */
391 
392 	/* sleep until the last reference is released */
393 	refcnt_finalize(&ifp->if_refcnt, "ifidxrm");
394 }
395 
396 void
397 if_ifp_dtor(void *null, void *ifp)
398 {
399 	if_put(ifp);
400 }
401 
402 void
403 if_map_dtor(void *null, void *m)
404 {
405 	struct if_map *if_map = m;
406 	struct srp *map = (struct srp *)(if_map + 1);
407 	unsigned int i;
408 
409 	/*
410 	 * dont need to serialize the use of update_locked since this is
411 	 * the last reference to this map. there's nothing to race against.
412 	 */
413 	for (i = 0; i < if_map->limit; i++)
414 		srp_update_locked(&if_ifp_gc, &map[i], NULL);
415 
416 	free(if_map, M_IFADDR, sizeof(*if_map) + if_map->limit * sizeof(*map));
417 }
418 
419 /*
420  * Attach an interface to the
421  * list of "active" interfaces.
422  */
423 void
424 if_attachsetup(struct ifnet *ifp)
425 {
426 	unsigned long ifidx;
427 
428 	NET_ASSERT_LOCKED();
429 
430 	TAILQ_INIT(&ifp->if_groups);
431 
432 	if_addgroup(ifp, IFG_ALL);
433 
434 	if_attachdomain(ifp);
435 #if NPF > 0
436 	pfi_attach_ifnet(ifp);
437 #endif
438 
439 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
440 	if_slowtimo(ifp);
441 
442 	if_idxmap_insert(ifp);
443 	KASSERT(if_get(0) == NULL);
444 
445 	ifidx = ifp->if_index;
446 
447 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
448 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
449 
450 	/* Announce the interface. */
451 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
452 }
453 
454 /*
455  * Allocate the link level name for the specified interface.  This
456  * is an attachment helper.  It must be called after ifp->if_addrlen
457  * is initialized, which may not be the case when if_attach() is
458  * called.
459  */
460 void
461 if_alloc_sadl(struct ifnet *ifp)
462 {
463 	unsigned int socksize;
464 	int namelen, masklen;
465 	struct sockaddr_dl *sdl;
466 
467 	/*
468 	 * If the interface already has a link name, release it
469 	 * now.  This is useful for interfaces that can change
470 	 * link types, and thus switch link names often.
471 	 */
472 	if (ifp->if_sadl != NULL)
473 		if_free_sadl(ifp);
474 
475 	namelen = strlen(ifp->if_xname);
476 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
477 	socksize = masklen + ifp->if_addrlen;
478 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
479 	if (socksize < sizeof(*sdl))
480 		socksize = sizeof(*sdl);
481 	socksize = ROUNDUP(socksize);
482 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
483 	sdl->sdl_len = socksize;
484 	sdl->sdl_family = AF_LINK;
485 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
486 	sdl->sdl_nlen = namelen;
487 	sdl->sdl_alen = ifp->if_addrlen;
488 	sdl->sdl_index = ifp->if_index;
489 	sdl->sdl_type = ifp->if_type;
490 	ifp->if_sadl = sdl;
491 }
492 
493 /*
494  * Free the link level name for the specified interface.  This is
495  * a detach helper.  This is called from if_detach() or from
496  * link layer type specific detach functions.
497  */
498 void
499 if_free_sadl(struct ifnet *ifp)
500 {
501 	free(ifp->if_sadl, M_IFADDR, 0);
502 	ifp->if_sadl = NULL;
503 }
504 
505 void
506 if_attachdomain(struct ifnet *ifp)
507 {
508 	struct domain *dp;
509 	int i, s;
510 
511 	s = splnet();
512 
513 	/* address family dependent data region */
514 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
515 	for (i = 0; (dp = domains[i]) != NULL; i++) {
516 		if (dp->dom_ifattach)
517 			ifp->if_afdata[dp->dom_family] =
518 			    (*dp->dom_ifattach)(ifp);
519 	}
520 
521 	splx(s);
522 }
523 
524 void
525 if_attachhead(struct ifnet *ifp)
526 {
527 	if_attach_common(ifp);
528 	NET_LOCK();
529 	TAILQ_INSERT_HEAD(&ifnet, ifp, if_list);
530 	if_attachsetup(ifp);
531 	NET_UNLOCK();
532 }
533 
534 void
535 if_attach(struct ifnet *ifp)
536 {
537 	if_attach_common(ifp);
538 	NET_LOCK();
539 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
540 	if_attachsetup(ifp);
541 	NET_UNLOCK();
542 }
543 
544 void
545 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
546 {
547 	struct ifqueue **map;
548 	struct ifqueue *ifq;
549 	int i;
550 
551 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
552 	KASSERT(nqs != 0);
553 
554 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
555 
556 	ifp->if_snd.ifq_softc = NULL;
557 	map[0] = &ifp->if_snd;
558 
559 	for (i = 1; i < nqs; i++) {
560 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
561 		ifq_set_maxlen(ifq, ifp->if_snd.ifq_maxlen);
562 		ifq_init(ifq, ifp, i);
563 		map[i] = ifq;
564 	}
565 
566 	ifp->if_ifqs = map;
567 	ifp->if_nifqs = nqs;
568 }
569 
570 void
571 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
572 {
573 	struct ifiqueue **map;
574 	struct ifiqueue *ifiq;
575 	unsigned int i;
576 
577 	KASSERT(niqs != 0);
578 
579 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
580 
581 	ifp->if_rcv.ifiq_softc = NULL;
582 	map[0] = &ifp->if_rcv;
583 
584 	for (i = 1; i < niqs; i++) {
585 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
586 		ifiq_init(ifiq, ifp, i);
587 		map[i] = ifiq;
588 	}
589 
590 	ifp->if_iqs = map;
591 	ifp->if_niqs = niqs;
592 }
593 
594 void
595 if_attach_common(struct ifnet *ifp)
596 {
597 	KASSERT(ifp->if_ioctl != NULL);
598 
599 	TAILQ_INIT(&ifp->if_addrlist);
600 	TAILQ_INIT(&ifp->if_maddrlist);
601 
602 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
603 		KASSERTMSG(ifp->if_qstart == NULL,
604 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
605 		ifp->if_qstart = if_qstart_compat;
606 	} else {
607 		KASSERTMSG(ifp->if_start == NULL,
608 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
609 		KASSERTMSG(ifp->if_qstart != NULL,
610 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
611 	}
612 
613 	ifq_init(&ifp->if_snd, ifp, 0);
614 
615 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
616 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
617 	ifp->if_nifqs = 1;
618 	if (ifp->if_txmit == 0)
619 		ifp->if_txmit = IF_TXMIT_DEFAULT;
620 
621 	ifiq_init(&ifp->if_rcv, ifp, 0);
622 
623 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
624 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
625 	ifp->if_niqs = 1;
626 
627 	ifp->if_addrhooks = malloc(sizeof(*ifp->if_addrhooks),
628 	    M_TEMP, M_WAITOK);
629 	TAILQ_INIT(ifp->if_addrhooks);
630 	ifp->if_linkstatehooks = malloc(sizeof(*ifp->if_linkstatehooks),
631 	    M_TEMP, M_WAITOK);
632 	TAILQ_INIT(ifp->if_linkstatehooks);
633 	ifp->if_detachhooks = malloc(sizeof(*ifp->if_detachhooks),
634 	    M_TEMP, M_WAITOK);
635 	TAILQ_INIT(ifp->if_detachhooks);
636 
637 	if (ifp->if_rtrequest == NULL)
638 		ifp->if_rtrequest = if_rtrequest_dummy;
639 	if (ifp->if_enqueue == NULL)
640 		ifp->if_enqueue = if_enqueue_ifq;
641 	ifp->if_llprio = IFQ_DEFPRIO;
642 
643 	SRPL_INIT(&ifp->if_inputs);
644 }
645 
646 void
647 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
648 {
649 	/*
650 	 * only switch the ifq_ops on the first ifq on an interface.
651 	 *
652 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
653 	 * works on a single ifq. because the code uses the ifq_ops
654 	 * on the first ifq (if_snd) to select a queue for an mbuf,
655 	 * by switching only the first one we change both the algorithm
656 	 * and force the routing of all new packets to it.
657 	 */
658 	ifq_attach(&ifp->if_snd, newops, args);
659 }
660 
661 void
662 if_start(struct ifnet *ifp)
663 {
664 	KASSERT(ifp->if_qstart == if_qstart_compat);
665 	if_qstart_compat(&ifp->if_snd);
666 }
667 void
668 if_qstart_compat(struct ifqueue *ifq)
669 {
670 	struct ifnet *ifp = ifq->ifq_if;
671 	int s;
672 
673 	/*
674 	 * the stack assumes that an interface can have multiple
675 	 * transmit rings, but a lot of drivers are still written
676 	 * so that interfaces and send rings have a 1:1 mapping.
677 	 * this provides compatability between the stack and the older
678 	 * drivers by translating from the only queue they have
679 	 * (ifp->if_snd) back to the interface and calling if_start.
680  	 */
681 
682 	KERNEL_LOCK();
683 	s = splnet();
684 	(*ifp->if_start)(ifp);
685 	splx(s);
686 	KERNEL_UNLOCK();
687 }
688 
689 int
690 if_enqueue(struct ifnet *ifp, struct mbuf *m)
691 {
692 #if NPF > 0
693 	if (m->m_pkthdr.pf.delay > 0)
694 		return (pf_delay_pkt(m, ifp->if_index));
695 #endif
696 
697 #if NBRIDGE > 0
698 	if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
699 		int error;
700 
701 		error = bridge_enqueue(ifp, m);
702 		return (error);
703 	}
704 #endif
705 
706 #if NPF > 0
707 	pf_pkt_addr_changed(m);
708 #endif	/* NPF > 0 */
709 
710 	return ((*ifp->if_enqueue)(ifp, m));
711 }
712 
713 int
714 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
715 {
716 	struct ifqueue *ifq = &ifp->if_snd;
717 	int error;
718 
719 	if (ifp->if_nifqs > 1) {
720 		unsigned int idx;
721 
722 		/*
723 		 * use the operations on the first ifq to pick which of
724 		 * the array gets this mbuf.
725 		 */
726 
727 		idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
728 		ifq = ifp->if_ifqs[idx];
729 	}
730 
731 	error = ifq_enqueue(ifq, m);
732 	if (error)
733 		return (error);
734 
735 	ifq_start(ifq);
736 
737 	return (0);
738 }
739 
740 void
741 if_input(struct ifnet *ifp, struct mbuf_list *ml)
742 {
743 	ifiq_input(&ifp->if_rcv, ml);
744 }
745 
746 int
747 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
748 {
749 #if NBPFILTER > 0
750 	/*
751 	 * Only send packets to bpf if they are destinated to local
752 	 * addresses.
753 	 *
754 	 * if_input_local() is also called for SIMPLEX interfaces to
755 	 * duplicate packets for local use.  But don't dup them to bpf.
756 	 */
757 	if (ifp->if_flags & IFF_LOOPBACK) {
758 		caddr_t if_bpf = ifp->if_bpf;
759 
760 		if (if_bpf)
761 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
762 	}
763 #endif
764 	m_resethdr(m);
765 	m->m_flags |= M_LOOP;
766 	m->m_pkthdr.ph_ifidx = ifp->if_index;
767 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
768 
769 	ifp->if_opackets++;
770 	ifp->if_obytes += m->m_pkthdr.len;
771 
772 	ifp->if_ipackets++;
773 	ifp->if_ibytes += m->m_pkthdr.len;
774 
775 	switch (af) {
776 	case AF_INET:
777 		ipv4_input(ifp, m);
778 		break;
779 #ifdef INET6
780 	case AF_INET6:
781 		ipv6_input(ifp, m);
782 		break;
783 #endif /* INET6 */
784 #ifdef MPLS
785 	case AF_MPLS:
786 		mpls_input(ifp, m);
787 		break;
788 #endif /* MPLS */
789 	default:
790 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
791 		m_freem(m);
792 		return (EAFNOSUPPORT);
793 	}
794 
795 	return (0);
796 }
797 
798 int
799 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
800 {
801 	struct ifiqueue *ifiq;
802 	unsigned int flow = 0;
803 
804 	m->m_pkthdr.ph_family = af;
805 	m->m_pkthdr.ph_ifidx = ifp->if_index;
806 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
807 
808 	if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID))
809 		flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK;
810 
811 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
812 
813 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
814 }
815 
816 struct ifih {
817 	SRPL_ENTRY(ifih)	  ifih_next;
818 	int			(*ifih_input)(struct ifnet *, struct mbuf *,
819 				      void *);
820 	void			 *ifih_cookie;
821 	int			  ifih_refcnt;
822 	struct refcnt		  ifih_srpcnt;
823 };
824 
825 void	if_ih_ref(void *, void *);
826 void	if_ih_unref(void *, void *);
827 
828 struct srpl_rc ifih_rc = SRPL_RC_INITIALIZER(if_ih_ref, if_ih_unref, NULL);
829 
830 void
831 if_ih_insert(struct ifnet *ifp, int (*input)(struct ifnet *, struct mbuf *,
832     void *), void *cookie)
833 {
834 	struct ifih *ifih;
835 
836 	/* the kernel lock guarantees serialised modifications to if_inputs */
837 	KERNEL_ASSERT_LOCKED();
838 
839 	SRPL_FOREACH_LOCKED(ifih, &ifp->if_inputs, ifih_next) {
840 		if (ifih->ifih_input == input && ifih->ifih_cookie == cookie) {
841 			ifih->ifih_refcnt++;
842 			break;
843 		}
844 	}
845 
846 	if (ifih == NULL) {
847 		ifih = malloc(sizeof(*ifih), M_DEVBUF, M_WAITOK);
848 
849 		ifih->ifih_input = input;
850 		ifih->ifih_cookie = cookie;
851 		ifih->ifih_refcnt = 1;
852 		refcnt_init(&ifih->ifih_srpcnt);
853 		SRPL_INSERT_HEAD_LOCKED(&ifih_rc, &ifp->if_inputs,
854 		    ifih, ifih_next);
855 	}
856 }
857 
858 void
859 if_ih_ref(void *null, void *i)
860 {
861 	struct ifih *ifih = i;
862 
863 	refcnt_take(&ifih->ifih_srpcnt);
864 }
865 
866 void
867 if_ih_unref(void *null, void *i)
868 {
869 	struct ifih *ifih = i;
870 
871 	refcnt_rele_wake(&ifih->ifih_srpcnt);
872 }
873 
874 void
875 if_ih_remove(struct ifnet *ifp, int (*input)(struct ifnet *, struct mbuf *,
876     void *), void *cookie)
877 {
878 	struct ifih *ifih;
879 
880 	/* the kernel lock guarantees serialised modifications to if_inputs */
881 	KERNEL_ASSERT_LOCKED();
882 
883 	SRPL_FOREACH_LOCKED(ifih, &ifp->if_inputs, ifih_next) {
884 		if (ifih->ifih_input == input && ifih->ifih_cookie == cookie)
885 			break;
886 	}
887 
888 	KASSERT(ifih != NULL);
889 
890 	if (--ifih->ifih_refcnt == 0) {
891 		SRPL_REMOVE_LOCKED(&ifih_rc, &ifp->if_inputs, ifih,
892 		    ifih, ifih_next);
893 
894 		refcnt_finalize(&ifih->ifih_srpcnt, "ifihrm");
895 		free(ifih, M_DEVBUF, sizeof(*ifih));
896 	}
897 }
898 
899 static void
900 if_ih_input(struct ifnet *ifp, struct mbuf *m)
901 {
902 	struct ifih *ifih;
903 	struct srp_ref sr;
904 
905 	/*
906 	 * Pass this mbuf to all input handlers of its
907 	 * interface until it is consumed.
908 	 */
909 	SRPL_FOREACH(ifih, &sr, &ifp->if_inputs, ifih_next) {
910 		if ((*ifih->ifih_input)(ifp, m, ifih->ifih_cookie))
911 			break;
912 	}
913 	SRPL_LEAVE(&sr);
914 
915 	if (ifih == NULL)
916 		m_freem(m);
917 }
918 
919 void
920 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
921 {
922 	struct mbuf *m;
923 
924 	if (ml_empty(ml))
925 		return;
926 
927 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
928 		enqueue_randomness(ml_len(ml));
929 
930 	/*
931 	 * We grab the NET_LOCK() before processing any packet to
932 	 * ensure there's no contention on the routing table lock.
933 	 *
934 	 * Without it we could race with a userland thread to insert
935 	 * a L2 entry in ip{6,}_output().  Such race would result in
936 	 * one of the threads sleeping *inside* the IP output path.
937 	 *
938 	 * Since we have a NET_LOCK() we also use it to serialize access
939 	 * to PF globals, pipex globals, unicast and multicast addresses
940 	 * lists.
941 	 */
942 	NET_RLOCK();
943 	while ((m = ml_dequeue(ml)) != NULL)
944 		if_ih_input(ifp, m);
945 	NET_RUNLOCK();
946 }
947 
948 void
949 if_vinput(struct ifnet *ifp, struct mbuf *m)
950 {
951 #if NBPFILTER > 0
952 	caddr_t if_bpf;
953 #endif
954 
955 	m->m_pkthdr.ph_ifidx = ifp->if_index;
956 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
957 
958 	counters_pkt(ifp->if_counters,
959 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
960 
961 #if NBPFILTER > 0
962 	if_bpf = ifp->if_bpf;
963 	if (if_bpf) {
964 		if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT)) {
965 			m_freem(m);
966 			return;
967 		}
968 	}
969 #endif
970 
971 	if_ih_input(ifp, m);
972 }
973 
974 void
975 if_netisr(void *unused)
976 {
977 	int n, t = 0;
978 
979 	NET_LOCK();
980 
981 	while ((n = netisr) != 0) {
982 		/* Like sched_pause() but with a rwlock dance. */
983 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
984 			NET_UNLOCK();
985 			yield();
986 			NET_LOCK();
987 		}
988 
989 		atomic_clearbits_int(&netisr, n);
990 
991 #if NETHER > 0
992 		if (n & (1 << NETISR_ARP)) {
993 			KERNEL_LOCK();
994 			arpintr();
995 			KERNEL_UNLOCK();
996 		}
997 #endif
998 		if (n & (1 << NETISR_IP))
999 			ipintr();
1000 #ifdef INET6
1001 		if (n & (1 << NETISR_IPV6))
1002 			ip6intr();
1003 #endif
1004 #if NPPP > 0
1005 		if (n & (1 << NETISR_PPP)) {
1006 			KERNEL_LOCK();
1007 			pppintr();
1008 			KERNEL_UNLOCK();
1009 		}
1010 #endif
1011 #if NBRIDGE > 0
1012 		if (n & (1 << NETISR_BRIDGE))
1013 			bridgeintr();
1014 #endif
1015 #if NSWITCH > 0
1016 		if (n & (1 << NETISR_SWITCH)) {
1017 			KERNEL_LOCK();
1018 			switchintr();
1019 			KERNEL_UNLOCK();
1020 		}
1021 #endif
1022 #if NPPPOE > 0
1023 		if (n & (1 << NETISR_PPPOE)) {
1024 			KERNEL_LOCK();
1025 			pppoeintr();
1026 			KERNEL_UNLOCK();
1027 		}
1028 #endif
1029 #ifdef PIPEX
1030 		if (n & (1 << NETISR_PIPEX)) {
1031 			KERNEL_LOCK();
1032 			pipexintr();
1033 			KERNEL_UNLOCK();
1034 		}
1035 #endif
1036 		t |= n;
1037 	}
1038 
1039 #if NPFSYNC > 0
1040 	if (t & (1 << NETISR_PFSYNC)) {
1041 		KERNEL_LOCK();
1042 		pfsyncintr();
1043 		KERNEL_UNLOCK();
1044 	}
1045 #endif
1046 
1047 	NET_UNLOCK();
1048 }
1049 
1050 void
1051 if_deactivate(struct ifnet *ifp)
1052 {
1053 	NET_LOCK();
1054 	/*
1055 	 * Call detach hooks from head to tail.  To make sure detach
1056 	 * hooks are executed in the reverse order they were added, all
1057 	 * the hooks have to be added to the head!
1058 	 */
1059 	dohooks(ifp->if_detachhooks, HOOK_REMOVE | HOOK_FREE);
1060 
1061 	NET_UNLOCK();
1062 }
1063 
1064 /*
1065  * Detach an interface from everything in the kernel.  Also deallocate
1066  * private resources.
1067  */
1068 void
1069 if_detach(struct ifnet *ifp)
1070 {
1071 	struct ifaddr *ifa;
1072 	struct ifg_list *ifg;
1073 	struct domain *dp;
1074 	int i, s;
1075 
1076 	/* Undo pseudo-driver changes. */
1077 	if_deactivate(ifp);
1078 
1079 	ifq_clr_oactive(&ifp->if_snd);
1080 
1081 	/* Other CPUs must not have a reference before we start destroying. */
1082 	if_idxmap_remove(ifp);
1083 
1084 #if NBPFILTER > 0
1085 	bpfdetach(ifp);
1086 #endif
1087 
1088 	NET_LOCK();
1089 	s = splnet();
1090 	ifp->if_qstart = if_detached_qstart;
1091 	ifp->if_ioctl = if_detached_ioctl;
1092 	ifp->if_watchdog = NULL;
1093 
1094 	/* Remove the watchdog timeout & task */
1095 	timeout_del(&ifp->if_slowtimo);
1096 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1097 
1098 	/* Remove the link state task */
1099 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1100 
1101 	rti_delete(ifp);
1102 #if NETHER > 0 && defined(NFSCLIENT)
1103 	if (ifp->if_index == revarp_ifidx)
1104 		revarp_ifidx = 0;
1105 #endif
1106 #ifdef MROUTING
1107 	vif_delete(ifp);
1108 #endif
1109 	in_ifdetach(ifp);
1110 #ifdef INET6
1111 	in6_ifdetach(ifp);
1112 #endif
1113 #if NPF > 0
1114 	pfi_detach_ifnet(ifp);
1115 #endif
1116 
1117 	/* Remove the interface from the list of all interfaces.  */
1118 	TAILQ_REMOVE(&ifnet, ifp, if_list);
1119 
1120 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1121 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1122 
1123 	if_free_sadl(ifp);
1124 
1125 	/* We should not have any address left at this point. */
1126 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1127 #ifdef DIAGNOSTIC
1128 		printf("%s: address list non empty\n", ifp->if_xname);
1129 #endif
1130 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1131 			ifa_del(ifp, ifa);
1132 			ifa->ifa_ifp = NULL;
1133 			ifafree(ifa);
1134 		}
1135 	}
1136 
1137 	free(ifp->if_addrhooks, M_TEMP, 0);
1138 	free(ifp->if_linkstatehooks, M_TEMP, 0);
1139 	free(ifp->if_detachhooks, M_TEMP, 0);
1140 
1141 	for (i = 0; (dp = domains[i]) != NULL; i++) {
1142 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
1143 			(*dp->dom_ifdetach)(ifp,
1144 			    ifp->if_afdata[dp->dom_family]);
1145 	}
1146 
1147 	/* Announce that the interface is gone. */
1148 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1149 	splx(s);
1150 	NET_UNLOCK();
1151 
1152 	if (ifp->if_counters != NULL)
1153 		if_counters_free(ifp);
1154 
1155 	for (i = 0; i < ifp->if_nifqs; i++)
1156 		ifq_destroy(ifp->if_ifqs[i]);
1157 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1158 		for (i = 1; i < ifp->if_nifqs; i++) {
1159 			free(ifp->if_ifqs[i], M_DEVBUF,
1160 			    sizeof(struct ifqueue));
1161 		}
1162 		free(ifp->if_ifqs, M_DEVBUF,
1163 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1164 	}
1165 
1166 	for (i = 0; i < ifp->if_niqs; i++)
1167 		ifiq_destroy(ifp->if_iqs[i]);
1168 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1169 		for (i = 1; i < ifp->if_niqs; i++) {
1170 			free(ifp->if_iqs[i], M_DEVBUF,
1171 			    sizeof(struct ifiqueue));
1172 		}
1173 		free(ifp->if_iqs, M_DEVBUF,
1174 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1175 	}
1176 }
1177 
1178 /*
1179  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1180  */
1181 int
1182 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1183 {
1184 	struct ifnet *ifp;
1185 	int connected = 0;
1186 
1187 	ifp = if_get(ifidx);
1188 	if (ifp == NULL)
1189 		return (0);
1190 
1191 	if (ifp0->if_index == ifp->if_index)
1192 		connected = 1;
1193 
1194 #if NBRIDGE > 0
1195 	if (ifp0->if_bridgeidx == ifp->if_bridgeidx)
1196 		connected = 1;
1197 #endif
1198 #if NCARP > 0
1199 	if ((ifp0->if_type == IFT_CARP && ifp0->if_carpdev == ifp) ||
1200 	    (ifp->if_type == IFT_CARP && ifp->if_carpdev == ifp0))
1201 		connected = 1;
1202 #endif
1203 
1204 	if_put(ifp);
1205 	return (connected);
1206 }
1207 
1208 /*
1209  * Create a clone network interface.
1210  */
1211 int
1212 if_clone_create(const char *name, int rdomain)
1213 {
1214 	struct if_clone *ifc;
1215 	struct ifnet *ifp;
1216 	int unit, ret;
1217 
1218 	NET_ASSERT_LOCKED();
1219 
1220 	ifc = if_clone_lookup(name, &unit);
1221 	if (ifc == NULL)
1222 		return (EINVAL);
1223 
1224 	if (ifunit(name) != NULL)
1225 		return (EEXIST);
1226 
1227 	/* XXXSMP breaks atomicity */
1228 	NET_UNLOCK();
1229 	ret = (*ifc->ifc_create)(ifc, unit);
1230 	NET_LOCK();
1231 
1232 	if (ret != 0 || (ifp = ifunit(name)) == NULL)
1233 		return (ret);
1234 
1235 	if_addgroup(ifp, ifc->ifc_name);
1236 	if (rdomain != 0)
1237 		if_setrdomain(ifp, rdomain);
1238 
1239 	return (ret);
1240 }
1241 
1242 /*
1243  * Destroy a clone network interface.
1244  */
1245 int
1246 if_clone_destroy(const char *name)
1247 {
1248 	struct if_clone *ifc;
1249 	struct ifnet *ifp;
1250 	int ret;
1251 
1252 	NET_ASSERT_LOCKED();
1253 
1254 	ifc = if_clone_lookup(name, NULL);
1255 	if (ifc == NULL)
1256 		return (EINVAL);
1257 
1258 	ifp = ifunit(name);
1259 	if (ifp == NULL)
1260 		return (ENXIO);
1261 
1262 	if (ifc->ifc_destroy == NULL)
1263 		return (EOPNOTSUPP);
1264 
1265 	if (ifp->if_flags & IFF_UP) {
1266 		int s;
1267 		s = splnet();
1268 		if_down(ifp);
1269 		splx(s);
1270 	}
1271 
1272 	/* XXXSMP breaks atomicity */
1273 	NET_UNLOCK();
1274 	ret = (*ifc->ifc_destroy)(ifp);
1275 	NET_LOCK();
1276 
1277 	return (ret);
1278 }
1279 
1280 /*
1281  * Look up a network interface cloner.
1282  */
1283 struct if_clone *
1284 if_clone_lookup(const char *name, int *unitp)
1285 {
1286 	struct if_clone *ifc;
1287 	const char *cp;
1288 	int unit;
1289 
1290 	/* separate interface name from unit */
1291 	for (cp = name;
1292 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1293 	    cp++)
1294 		continue;
1295 
1296 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1297 		return (NULL);	/* No name or unit number */
1298 
1299 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1300 		return (NULL);	/* unit number 0 padded */
1301 
1302 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1303 		if (strlen(ifc->ifc_name) == cp - name &&
1304 		    !strncmp(name, ifc->ifc_name, cp - name))
1305 			break;
1306 	}
1307 
1308 	if (ifc == NULL)
1309 		return (NULL);
1310 
1311 	unit = 0;
1312 	while (cp - name < IFNAMSIZ && *cp) {
1313 		if (*cp < '0' || *cp > '9' ||
1314 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1315 			/* Bogus unit number. */
1316 			return (NULL);
1317 		}
1318 		unit = (unit * 10) + (*cp++ - '0');
1319 	}
1320 
1321 	if (unitp != NULL)
1322 		*unitp = unit;
1323 	return (ifc);
1324 }
1325 
1326 /*
1327  * Register a network interface cloner.
1328  */
1329 void
1330 if_clone_attach(struct if_clone *ifc)
1331 {
1332 	/*
1333 	 * we are called at kernel boot by main(), when pseudo devices are
1334 	 * being attached. The main() is the only guy which may alter the
1335 	 * if_cloners. While system is running and main() is done with
1336 	 * initialization, the if_cloners becomes immutable.
1337 	 */
1338 	KASSERT(pdevinit_done == 0);
1339 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1340 	if_cloners_count++;
1341 }
1342 
1343 /*
1344  * Provide list of interface cloners to userspace.
1345  */
1346 int
1347 if_clone_list(struct if_clonereq *ifcr)
1348 {
1349 	char outbuf[IFNAMSIZ], *dst;
1350 	struct if_clone *ifc;
1351 	int count, error = 0;
1352 
1353 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1354 		/* Just asking how many there are. */
1355 		ifcr->ifcr_total = if_cloners_count;
1356 		return (0);
1357 	}
1358 
1359 	if (ifcr->ifcr_count < 0)
1360 		return (EINVAL);
1361 
1362 	ifcr->ifcr_total = if_cloners_count;
1363 	count = MIN(if_cloners_count, ifcr->ifcr_count);
1364 
1365 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1366 		if (count == 0)
1367 			break;
1368 		bzero(outbuf, sizeof outbuf);
1369 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1370 		error = copyout(outbuf, dst, IFNAMSIZ);
1371 		if (error)
1372 			break;
1373 		count--;
1374 		dst += IFNAMSIZ;
1375 	}
1376 
1377 	return (error);
1378 }
1379 
1380 /*
1381  * set queue congestion marker
1382  */
1383 void
1384 if_congestion(void)
1385 {
1386 	extern int ticks;
1387 
1388 	ifq_congestion = ticks;
1389 }
1390 
1391 int
1392 if_congested(void)
1393 {
1394 	extern int ticks;
1395 	int diff;
1396 
1397 	diff = ticks - ifq_congestion;
1398 	if (diff < 0) {
1399 		ifq_congestion = ticks - hz;
1400 		return (0);
1401 	}
1402 
1403 	return (diff <= (hz / 100));
1404 }
1405 
1406 #define	equal(a1, a2)	\
1407 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1408 	(a1)->sa_len) == 0)
1409 
1410 /*
1411  * Locate an interface based on a complete address.
1412  */
1413 struct ifaddr *
1414 ifa_ifwithaddr(struct sockaddr *addr, u_int rtableid)
1415 {
1416 	struct ifnet *ifp;
1417 	struct ifaddr *ifa;
1418 	u_int rdomain;
1419 
1420 	rdomain = rtable_l2(rtableid);
1421 	KERNEL_LOCK();
1422 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1423 		if (ifp->if_rdomain != rdomain)
1424 			continue;
1425 
1426 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1427 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1428 				continue;
1429 
1430 			if (equal(addr, ifa->ifa_addr)) {
1431 				KERNEL_UNLOCK();
1432 				return (ifa);
1433 			}
1434 		}
1435 	}
1436 	KERNEL_UNLOCK();
1437 	return (NULL);
1438 }
1439 
1440 /*
1441  * Locate the point to point interface with a given destination address.
1442  */
1443 struct ifaddr *
1444 ifa_ifwithdstaddr(struct sockaddr *addr, u_int rdomain)
1445 {
1446 	struct ifnet *ifp;
1447 	struct ifaddr *ifa;
1448 
1449 	rdomain = rtable_l2(rdomain);
1450 	KERNEL_LOCK();
1451 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1452 		if (ifp->if_rdomain != rdomain)
1453 			continue;
1454 		if (ifp->if_flags & IFF_POINTOPOINT) {
1455 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1456 				if (ifa->ifa_addr->sa_family !=
1457 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1458 					continue;
1459 				if (equal(addr, ifa->ifa_dstaddr)) {
1460 					KERNEL_UNLOCK();
1461 					return (ifa);
1462 				}
1463 			}
1464 		}
1465 	}
1466 	KERNEL_UNLOCK();
1467 	return (NULL);
1468 }
1469 
1470 /*
1471  * Find an interface address specific to an interface best matching
1472  * a given address.
1473  */
1474 struct ifaddr *
1475 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1476 {
1477 	struct ifaddr *ifa;
1478 	char *cp, *cp2, *cp3;
1479 	char *cplim;
1480 	struct ifaddr *ifa_maybe = NULL;
1481 	u_int af = addr->sa_family;
1482 
1483 	if (af >= AF_MAX)
1484 		return (NULL);
1485 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1486 		if (ifa->ifa_addr->sa_family != af)
1487 			continue;
1488 		if (ifa_maybe == NULL)
1489 			ifa_maybe = ifa;
1490 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1491 			if (equal(addr, ifa->ifa_addr) ||
1492 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1493 				return (ifa);
1494 			continue;
1495 		}
1496 		cp = addr->sa_data;
1497 		cp2 = ifa->ifa_addr->sa_data;
1498 		cp3 = ifa->ifa_netmask->sa_data;
1499 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1500 		for (; cp3 < cplim; cp3++)
1501 			if ((*cp++ ^ *cp2++) & *cp3)
1502 				break;
1503 		if (cp3 == cplim)
1504 			return (ifa);
1505 	}
1506 	return (ifa_maybe);
1507 }
1508 
1509 void
1510 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1511 {
1512 }
1513 
1514 /*
1515  * Default action when installing a local route on a point-to-point
1516  * interface.
1517  */
1518 void
1519 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1520 {
1521 	struct ifnet *lo0ifp;
1522 	struct ifaddr *ifa, *lo0ifa;
1523 
1524 	switch (req) {
1525 	case RTM_ADD:
1526 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1527 			break;
1528 
1529 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1530 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1531 			    rt_key(rt)->sa_len) == 0)
1532 				break;
1533 		}
1534 
1535 		if (ifa == NULL)
1536 			break;
1537 
1538 		KASSERT(ifa == rt->rt_ifa);
1539 
1540 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1541 		KASSERT(lo0ifp != NULL);
1542 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1543 			if (lo0ifa->ifa_addr->sa_family ==
1544 			    ifa->ifa_addr->sa_family)
1545 				break;
1546 		}
1547 		if_put(lo0ifp);
1548 
1549 		if (lo0ifa == NULL)
1550 			break;
1551 
1552 		rt->rt_flags &= ~RTF_LLINFO;
1553 		break;
1554 	case RTM_DELETE:
1555 	case RTM_RESOLVE:
1556 	default:
1557 		break;
1558 	}
1559 }
1560 
1561 
1562 /*
1563  * Bring down all interfaces
1564  */
1565 void
1566 if_downall(void)
1567 {
1568 	struct ifreq ifrq;	/* XXX only partly built */
1569 	struct ifnet *ifp;
1570 
1571 	NET_LOCK();
1572 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1573 		if ((ifp->if_flags & IFF_UP) == 0)
1574 			continue;
1575 		if_down(ifp);
1576 		ifrq.ifr_flags = ifp->if_flags;
1577 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1578 	}
1579 	NET_UNLOCK();
1580 }
1581 
1582 /*
1583  * Mark an interface down and notify protocols of
1584  * the transition.
1585  */
1586 void
1587 if_down(struct ifnet *ifp)
1588 {
1589 	NET_ASSERT_LOCKED();
1590 
1591 	ifp->if_flags &= ~IFF_UP;
1592 	getmicrotime(&ifp->if_lastchange);
1593 	IFQ_PURGE(&ifp->if_snd);
1594 
1595 	if_linkstate(ifp);
1596 }
1597 
1598 /*
1599  * Mark an interface up and notify protocols of
1600  * the transition.
1601  */
1602 void
1603 if_up(struct ifnet *ifp)
1604 {
1605 	NET_ASSERT_LOCKED();
1606 
1607 	ifp->if_flags |= IFF_UP;
1608 	getmicrotime(&ifp->if_lastchange);
1609 
1610 #ifdef INET6
1611 	/* Userland expects the kernel to set ::1 on default lo(4). */
1612 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1613 		in6_ifattach(ifp);
1614 #endif
1615 
1616 	if_linkstate(ifp);
1617 }
1618 
1619 /*
1620  * Notify userland, the routing table and hooks owner of
1621  * a link-state transition.
1622  */
1623 void
1624 if_linkstate_task(void *xifidx)
1625 {
1626 	unsigned int ifidx = (unsigned long)xifidx;
1627 	struct ifnet *ifp;
1628 
1629 	KERNEL_LOCK();
1630 	NET_LOCK();
1631 
1632 	ifp = if_get(ifidx);
1633 	if (ifp != NULL)
1634 		if_linkstate(ifp);
1635 	if_put(ifp);
1636 
1637 	NET_UNLOCK();
1638 	KERNEL_UNLOCK();
1639 }
1640 
1641 void
1642 if_linkstate(struct ifnet *ifp)
1643 {
1644 	NET_ASSERT_LOCKED();
1645 
1646 	rtm_ifchg(ifp);
1647 	rt_if_track(ifp);
1648 	dohooks(ifp->if_linkstatehooks, 0);
1649 }
1650 
1651 /*
1652  * Schedule a link state change task.
1653  */
1654 void
1655 if_link_state_change(struct ifnet *ifp)
1656 {
1657 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1658 }
1659 
1660 /*
1661  * Handle interface watchdog timer routine.  Called
1662  * from softclock, we decrement timer (if set) and
1663  * call the appropriate interface routine on expiration.
1664  */
1665 void
1666 if_slowtimo(void *arg)
1667 {
1668 	struct ifnet *ifp = arg;
1669 	int s = splnet();
1670 
1671 	if (ifp->if_watchdog) {
1672 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1673 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1674 		timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO);
1675 	}
1676 	splx(s);
1677 }
1678 
1679 void
1680 if_watchdog_task(void *xifidx)
1681 {
1682 	unsigned int ifidx = (unsigned long)xifidx;
1683 	struct ifnet *ifp;
1684 	int s;
1685 
1686 	ifp = if_get(ifidx);
1687 	if (ifp == NULL)
1688 		return;
1689 
1690 	KERNEL_LOCK();
1691 	s = splnet();
1692 	if (ifp->if_watchdog)
1693 		(*ifp->if_watchdog)(ifp);
1694 	splx(s);
1695 	KERNEL_UNLOCK();
1696 
1697 	if_put(ifp);
1698 }
1699 
1700 /*
1701  * Map interface name to interface structure pointer.
1702  */
1703 struct ifnet *
1704 ifunit(const char *name)
1705 {
1706 	struct ifnet *ifp;
1707 
1708 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1709 		if (strcmp(ifp->if_xname, name) == 0)
1710 			return (ifp);
1711 	}
1712 	return (NULL);
1713 }
1714 
1715 /*
1716  * Map interface index to interface structure pointer.
1717  */
1718 struct ifnet *
1719 if_get(unsigned int index)
1720 {
1721 	struct srp_ref sr;
1722 	struct if_map *if_map;
1723 	struct srp *map;
1724 	struct ifnet *ifp = NULL;
1725 
1726 	if_map = srp_enter(&sr, &if_idxmap.map);
1727 	if (index < if_map->limit) {
1728 		map = (struct srp *)(if_map + 1);
1729 
1730 		ifp = srp_follow(&sr, &map[index]);
1731 		if (ifp != NULL) {
1732 			KASSERT(ifp->if_index == index);
1733 			if_ref(ifp);
1734 		}
1735 	}
1736 	srp_leave(&sr);
1737 
1738 	return (ifp);
1739 }
1740 
1741 struct ifnet *
1742 if_ref(struct ifnet *ifp)
1743 {
1744 	refcnt_take(&ifp->if_refcnt);
1745 
1746 	return (ifp);
1747 }
1748 
1749 void
1750 if_put(struct ifnet *ifp)
1751 {
1752 	if (ifp == NULL)
1753 		return;
1754 
1755 	refcnt_rele_wake(&ifp->if_refcnt);
1756 }
1757 
1758 int
1759 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1760 {
1761 	if (ifp->if_sadl == NULL)
1762 		return (EINVAL);
1763 
1764 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1765 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1766 
1767 	return (0);
1768 }
1769 
1770 int
1771 if_createrdomain(int rdomain, struct ifnet *ifp)
1772 {
1773 	int error;
1774 	struct ifnet *loifp;
1775 	char loifname[IFNAMSIZ];
1776 	unsigned int unit = rdomain;
1777 
1778 	if (!rtable_exists(rdomain) && (error = rtable_add(rdomain)) != 0)
1779 		return (error);
1780 	if (!rtable_empty(rdomain))
1781 		return (EEXIST);
1782 
1783 	/* Create rdomain including its loopback if with unit == rdomain */
1784 	snprintf(loifname, sizeof(loifname), "lo%u", unit);
1785 	error = if_clone_create(loifname, 0);
1786 	if ((loifp = ifunit(loifname)) == NULL)
1787 		return (ENXIO);
1788 	if (error && (ifp != loifp || error != EEXIST))
1789 		return (error);
1790 
1791 	rtable_l2set(rdomain, rdomain, loifp->if_index);
1792 	loifp->if_rdomain = rdomain;
1793 
1794 	return (0);
1795 }
1796 
1797 int
1798 if_setrdomain(struct ifnet *ifp, int rdomain)
1799 {
1800 	struct ifreq ifr;
1801 	int error, up = 0, s;
1802 
1803 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1804 		return (EINVAL);
1805 
1806 	if (rdomain != ifp->if_rdomain &&
1807 	    (ifp->if_flags & IFF_LOOPBACK) &&
1808 	    (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1809 		return (EPERM);
1810 
1811 	if (!rtable_exists(rdomain))
1812 		return (ESRCH);
1813 
1814 	/* make sure that the routing table is a real rdomain */
1815 	if (rdomain != rtable_l2(rdomain))
1816 		return (EINVAL);
1817 
1818 	if (rdomain != ifp->if_rdomain) {
1819 		s = splnet();
1820 		/*
1821 		 * We are tearing down the world.
1822 		 * Take down the IF so:
1823 		 * 1. everything that cares gets a message
1824 		 * 2. the automagic IPv6 bits are recreated
1825 		 */
1826 		if (ifp->if_flags & IFF_UP) {
1827 			up = 1;
1828 			if_down(ifp);
1829 		}
1830 		rti_delete(ifp);
1831 #ifdef MROUTING
1832 		vif_delete(ifp);
1833 #endif
1834 		in_ifdetach(ifp);
1835 #ifdef INET6
1836 		in6_ifdetach(ifp);
1837 #endif
1838 		splx(s);
1839 	}
1840 
1841 	/* Let devices like enc(4) or mpe(4) know about the change */
1842 	ifr.ifr_rdomainid = rdomain;
1843 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
1844 	    (caddr_t)&ifr)) != ENOTTY)
1845 		return (error);
1846 	error = 0;
1847 
1848 	/* Add interface to the specified rdomain */
1849 	ifp->if_rdomain = rdomain;
1850 
1851 	/* If we took down the IF, bring it back */
1852 	if (up) {
1853 		s = splnet();
1854 		if_up(ifp);
1855 		splx(s);
1856 	}
1857 
1858 	return (0);
1859 }
1860 
1861 /*
1862  * Interface ioctls.
1863  */
1864 int
1865 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1866 {
1867 	struct ifnet *ifp;
1868 	struct ifreq *ifr = (struct ifreq *)data;
1869 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
1870 	struct if_afreq *ifar = (struct if_afreq *)data;
1871 	char ifdescrbuf[IFDESCRSIZE];
1872 	char ifrtlabelbuf[RTLABEL_LEN];
1873 	int s, error = 0, oif_xflags;
1874 	size_t bytesdone;
1875 	unsigned short oif_flags;
1876 
1877 	switch (cmd) {
1878 	case SIOCIFCREATE:
1879 		if ((error = suser(p)) != 0)
1880 			return (error);
1881 		NET_LOCK();
1882 		error = if_clone_create(ifr->ifr_name, 0);
1883 		NET_UNLOCK();
1884 		return (error);
1885 	case SIOCIFDESTROY:
1886 		if ((error = suser(p)) != 0)
1887 			return (error);
1888 		NET_LOCK();
1889 		error = if_clone_destroy(ifr->ifr_name);
1890 		NET_UNLOCK();
1891 		return (error);
1892 	case SIOCSIFGATTR:
1893 		if ((error = suser(p)) != 0)
1894 			return (error);
1895 		NET_LOCK();
1896 		error = if_setgroupattribs(data);
1897 		NET_UNLOCK();
1898 		return (error);
1899 	case SIOCGIFCONF:
1900 	case SIOCIFGCLONERS:
1901 	case SIOCGIFGMEMB:
1902 	case SIOCGIFGATTR:
1903 	case SIOCGIFGLIST:
1904 	case SIOCGIFFLAGS:
1905 	case SIOCGIFXFLAGS:
1906 	case SIOCGIFMETRIC:
1907 	case SIOCGIFMTU:
1908 	case SIOCGIFHARDMTU:
1909 	case SIOCGIFDATA:
1910 	case SIOCGIFDESCR:
1911 	case SIOCGIFRTLABEL:
1912 	case SIOCGIFPRIORITY:
1913 	case SIOCGIFRDOMAIN:
1914 	case SIOCGIFGROUP:
1915 	case SIOCGIFLLPRIO:
1916 		return (ifioctl_get(cmd, data));
1917 	}
1918 
1919 	ifp = ifunit(ifr->ifr_name);
1920 	if (ifp == NULL)
1921 		return (ENXIO);
1922 	oif_flags = ifp->if_flags;
1923 	oif_xflags = ifp->if_xflags;
1924 
1925 	switch (cmd) {
1926 	case SIOCIFAFATTACH:
1927 	case SIOCIFAFDETACH:
1928 		if ((error = suser(p)) != 0)
1929 			break;
1930 		NET_LOCK();
1931 		switch (ifar->ifar_af) {
1932 		case AF_INET:
1933 			/* attach is a noop for AF_INET */
1934 			if (cmd == SIOCIFAFDETACH)
1935 				in_ifdetach(ifp);
1936 			break;
1937 #ifdef INET6
1938 		case AF_INET6:
1939 			if (cmd == SIOCIFAFATTACH)
1940 				error = in6_ifattach(ifp);
1941 			else
1942 				in6_ifdetach(ifp);
1943 			break;
1944 #endif /* INET6 */
1945 		default:
1946 			error = EAFNOSUPPORT;
1947 		}
1948 		NET_UNLOCK();
1949 		break;
1950 
1951 	case SIOCSIFFLAGS:
1952 		if ((error = suser(p)) != 0)
1953 			break;
1954 
1955 		NET_LOCK();
1956 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1957 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
1958 
1959 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1960 		if (error != 0) {
1961 			ifp->if_flags = oif_flags;
1962 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
1963 			s = splnet();
1964 			if (ISSET(ifp->if_flags, IFF_UP))
1965 				if_up(ifp);
1966 			else
1967 				if_down(ifp);
1968 			splx(s);
1969 		}
1970 		NET_UNLOCK();
1971 		break;
1972 
1973 	case SIOCSIFXFLAGS:
1974 		if ((error = suser(p)) != 0)
1975 			break;
1976 
1977 		NET_LOCK();
1978 #ifdef INET6
1979 		if (ISSET(ifr->ifr_flags, IFXF_AUTOCONF6)) {
1980 			error = in6_ifattach(ifp);
1981 			if (error != 0) {
1982 				NET_UNLOCK();
1983 				break;
1984 			}
1985 		}
1986 
1987 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1988 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII)) {
1989 			ifp->if_xflags |= IFXF_INET6_NOSOII;
1990 			in6_soiiupdate(ifp);
1991 		}
1992 
1993 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1994 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII)) {
1995 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
1996 			in6_soiiupdate(ifp);
1997 		}
1998 
1999 #endif	/* INET6 */
2000 
2001 #ifdef MPLS
2002 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
2003 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
2004 			s = splnet();
2005 			ifp->if_xflags |= IFXF_MPLS;
2006 			ifp->if_ll_output = ifp->if_output;
2007 			ifp->if_output = mpls_output;
2008 			splx(s);
2009 		}
2010 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
2011 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
2012 			s = splnet();
2013 			ifp->if_xflags &= ~IFXF_MPLS;
2014 			ifp->if_output = ifp->if_ll_output;
2015 			ifp->if_ll_output = NULL;
2016 			splx(s);
2017 		}
2018 #endif	/* MPLS */
2019 
2020 #ifndef SMALL_KERNEL
2021 		if (ifp->if_capabilities & IFCAP_WOL) {
2022 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
2023 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
2024 				s = splnet();
2025 				ifp->if_xflags |= IFXF_WOL;
2026 				error = ifp->if_wol(ifp, 1);
2027 				splx(s);
2028 			}
2029 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
2030 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
2031 				s = splnet();
2032 				ifp->if_xflags &= ~IFXF_WOL;
2033 				error = ifp->if_wol(ifp, 0);
2034 				splx(s);
2035 			}
2036 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
2037 			ifr->ifr_flags &= ~IFXF_WOL;
2038 			error = ENOTSUP;
2039 		}
2040 #endif
2041 
2042 		if (error == 0)
2043 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
2044 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
2045 		NET_UNLOCK();
2046 		break;
2047 
2048 	case SIOCSIFMETRIC:
2049 		if ((error = suser(p)) != 0)
2050 			break;
2051 		NET_LOCK();
2052 		ifp->if_metric = ifr->ifr_metric;
2053 		NET_UNLOCK();
2054 		break;
2055 
2056 	case SIOCSIFMTU:
2057 		if ((error = suser(p)) != 0)
2058 			break;
2059 		NET_LOCK();
2060 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2061 		NET_UNLOCK();
2062 		if (!error)
2063 			rtm_ifchg(ifp);
2064 		break;
2065 
2066 	case SIOCSIFDESCR:
2067 		if ((error = suser(p)) != 0)
2068 			break;
2069 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2070 		    IFDESCRSIZE, &bytesdone);
2071 		if (error == 0) {
2072 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2073 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2074 		}
2075 		break;
2076 
2077 	case SIOCSIFRTLABEL:
2078 		if ((error = suser(p)) != 0)
2079 			break;
2080 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2081 		    RTLABEL_LEN, &bytesdone);
2082 		if (error == 0) {
2083 			rtlabel_unref(ifp->if_rtlabelid);
2084 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2085 		}
2086 		break;
2087 
2088 	case SIOCSIFPRIORITY:
2089 		if ((error = suser(p)) != 0)
2090 			break;
2091 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2092 			error = EINVAL;
2093 			break;
2094 		}
2095 		ifp->if_priority = ifr->ifr_metric;
2096 		break;
2097 
2098 	case SIOCSIFRDOMAIN:
2099 		if ((error = suser(p)) != 0)
2100 			break;
2101 		NET_LOCK();
2102 		error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2103 		if (!error || error == EEXIST)
2104 			error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2105 		NET_UNLOCK();
2106 		break;
2107 
2108 	case SIOCAIFGROUP:
2109 		if ((error = suser(p)))
2110 			break;
2111 		NET_LOCK();
2112 		error = if_addgroup(ifp, ifgr->ifgr_group);
2113 		if (error == 0) {
2114 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2115 			if (error == ENOTTY)
2116 				error = 0;
2117 		}
2118 		NET_UNLOCK();
2119 		break;
2120 
2121 	case SIOCDIFGROUP:
2122 		if ((error = suser(p)))
2123 			break;
2124 		NET_LOCK();
2125 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2126 		if (error == ENOTTY)
2127 			error = 0;
2128 		if (error == 0)
2129 			error = if_delgroup(ifp, ifgr->ifgr_group);
2130 		NET_UNLOCK();
2131 		break;
2132 
2133 	case SIOCSIFLLADDR:
2134 		if ((error = suser(p)))
2135 			break;
2136 		if ((ifp->if_sadl == NULL) ||
2137 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2138 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2139 			error = EINVAL;
2140 			break;
2141 		}
2142 		NET_LOCK();
2143 		switch (ifp->if_type) {
2144 		case IFT_ETHER:
2145 		case IFT_CARP:
2146 		case IFT_XETHER:
2147 		case IFT_ISO88025:
2148 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2149 			if (error == ENOTTY)
2150 				error = 0;
2151 			if (error == 0)
2152 				error = if_setlladdr(ifp,
2153 				    ifr->ifr_addr.sa_data);
2154 			break;
2155 		default:
2156 			error = ENODEV;
2157 		}
2158 
2159 		if (error == 0)
2160 			ifnewlladdr(ifp);
2161 		NET_UNLOCK();
2162 		break;
2163 
2164 	case SIOCSIFLLPRIO:
2165 		if ((error = suser(p)))
2166 			break;
2167 		if (ifr->ifr_llprio < IFQ_MINPRIO ||
2168 		    ifr->ifr_llprio > IFQ_MAXPRIO) {
2169 			error = EINVAL;
2170 			break;
2171 		}
2172 		NET_LOCK();
2173 		ifp->if_llprio = ifr->ifr_llprio;
2174 		NET_UNLOCK();
2175 		break;
2176 
2177 	case SIOCGIFSFFPAGE:
2178 		error = suser(p);
2179 		if (error != 0)
2180 			break;
2181 
2182 		error = if_sffpage_check(data);
2183 		if (error != 0)
2184 			break;
2185 
2186 		/* don't take NET_LOCK because i2c reads take a long time */
2187 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2188 		break;
2189 
2190 	case SIOCSETKALIVE:
2191 	case SIOCDIFPHYADDR:
2192 	case SIOCSLIFPHYADDR:
2193 	case SIOCSLIFPHYRTABLE:
2194 	case SIOCSLIFPHYTTL:
2195 	case SIOCSLIFPHYDF:
2196 	case SIOCSLIFPHYECN:
2197 	case SIOCADDMULTI:
2198 	case SIOCDELMULTI:
2199 	case SIOCSIFMEDIA:
2200 	case SIOCSVNETID:
2201 	case SIOCSVNETFLOWID:
2202 	case SIOCSTXHPRIO:
2203 	case SIOCSRXHPRIO:
2204 	case SIOCSIFPAIR:
2205 	case SIOCSIFPARENT:
2206 	case SIOCDIFPARENT:
2207 	case SIOCSETMPWCFG:
2208 	case SIOCSETLABEL:
2209 	case SIOCDELLABEL:
2210 	case SIOCSPWE3CTRLWORD:
2211 	case SIOCSPWE3FAT:
2212 	case SIOCSPWE3NEIGHBOR:
2213 	case SIOCDPWE3NEIGHBOR:
2214 		if ((error = suser(p)) != 0)
2215 			break;
2216 		/* FALLTHROUGH */
2217 	default:
2218 		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
2219 			(struct mbuf *) cmd, (struct mbuf *) data,
2220 			(struct mbuf *) ifp, p));
2221 		if (error == EOPNOTSUPP) {
2222 			NET_LOCK();
2223 			error = ((*ifp->if_ioctl)(ifp, cmd, data));
2224 			NET_UNLOCK();
2225 		}
2226 		break;
2227 	}
2228 
2229 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags)
2230 		rtm_ifchg(ifp);
2231 
2232 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2233 		getmicrotime(&ifp->if_lastchange);
2234 
2235 	return (error);
2236 }
2237 
2238 int
2239 ifioctl_get(u_long cmd, caddr_t data)
2240 {
2241 	struct ifnet *ifp;
2242 	struct ifreq *ifr = (struct ifreq *)data;
2243 	char ifdescrbuf[IFDESCRSIZE];
2244 	char ifrtlabelbuf[RTLABEL_LEN];
2245 	int error = 0;
2246 	size_t bytesdone;
2247 	const char *label;
2248 
2249 	switch(cmd) {
2250 	case SIOCGIFCONF:
2251 		NET_RLOCK();
2252 		error = ifconf(data);
2253 		NET_RUNLOCK();
2254 		return (error);
2255 	case SIOCIFGCLONERS:
2256 		error = if_clone_list((struct if_clonereq *)data);
2257 		return (error);
2258 	case SIOCGIFGMEMB:
2259 		NET_RLOCK();
2260 		error = if_getgroupmembers(data);
2261 		NET_RUNLOCK();
2262 		return (error);
2263 	case SIOCGIFGATTR:
2264 		NET_RLOCK();
2265 		error = if_getgroupattribs(data);
2266 		NET_RUNLOCK();
2267 		return (error);
2268 	case SIOCGIFGLIST:
2269 		NET_RLOCK();
2270 		error = if_getgrouplist(data);
2271 		NET_RUNLOCK();
2272 		return (error);
2273 	}
2274 
2275 	ifp = ifunit(ifr->ifr_name);
2276 	if (ifp == NULL)
2277 		return (ENXIO);
2278 
2279 	NET_RLOCK();
2280 
2281 	switch(cmd) {
2282 	case SIOCGIFFLAGS:
2283 		ifr->ifr_flags = ifp->if_flags;
2284 		if (ifq_is_oactive(&ifp->if_snd))
2285 			ifr->ifr_flags |= IFF_OACTIVE;
2286 		break;
2287 
2288 	case SIOCGIFXFLAGS:
2289 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2290 		break;
2291 
2292 	case SIOCGIFMETRIC:
2293 		ifr->ifr_metric = ifp->if_metric;
2294 		break;
2295 
2296 	case SIOCGIFMTU:
2297 		ifr->ifr_mtu = ifp->if_mtu;
2298 		break;
2299 
2300 	case SIOCGIFHARDMTU:
2301 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2302 		break;
2303 
2304 	case SIOCGIFDATA: {
2305 		struct if_data ifdata;
2306 		if_getdata(ifp, &ifdata);
2307 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2308 		break;
2309 	}
2310 
2311 	case SIOCGIFDESCR:
2312 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2313 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2314 		    &bytesdone);
2315 		break;
2316 
2317 	case SIOCGIFRTLABEL:
2318 		if (ifp->if_rtlabelid &&
2319 		    (label = rtlabel_id2name(ifp->if_rtlabelid)) != NULL) {
2320 			strlcpy(ifrtlabelbuf, label, RTLABEL_LEN);
2321 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2322 			    RTLABEL_LEN, &bytesdone);
2323 		} else
2324 			error = ENOENT;
2325 		break;
2326 
2327 	case SIOCGIFPRIORITY:
2328 		ifr->ifr_metric = ifp->if_priority;
2329 		break;
2330 
2331 	case SIOCGIFRDOMAIN:
2332 		ifr->ifr_rdomainid = ifp->if_rdomain;
2333 		break;
2334 
2335 	case SIOCGIFGROUP:
2336 		error = if_getgroup(data, ifp);
2337 		break;
2338 
2339 	case SIOCGIFLLPRIO:
2340 		ifr->ifr_llprio = ifp->if_llprio;
2341 		break;
2342 
2343 	default:
2344 		panic("invalid ioctl %lu", cmd);
2345 	}
2346 
2347 	NET_RUNLOCK();
2348 
2349 	return (error);
2350 }
2351 
2352 static int
2353 if_sffpage_check(const caddr_t data)
2354 {
2355 	const struct if_sffpage *sff = (const struct if_sffpage *)data;
2356 
2357 	switch (sff->sff_addr) {
2358 	case IFSFF_ADDR_EEPROM:
2359 	case IFSFF_ADDR_DDM:
2360 		break;
2361 	default:
2362 		return (EINVAL);
2363 	}
2364 
2365 	return (0);
2366 }
2367 
2368 int
2369 if_txhprio_l2_check(int hdrprio)
2370 {
2371 	switch (hdrprio) {
2372 	case IF_HDRPRIO_PACKET:
2373 		return (0);
2374 	default:
2375 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2376 			return (0);
2377 		break;
2378 	}
2379 
2380 	return (EINVAL);
2381 }
2382 
2383 int
2384 if_txhprio_l3_check(int hdrprio)
2385 {
2386 	switch (hdrprio) {
2387 	case IF_HDRPRIO_PACKET:
2388 	case IF_HDRPRIO_PAYLOAD:
2389 		return (0);
2390 	default:
2391 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2392 			return (0);
2393 		break;
2394 	}
2395 
2396 	return (EINVAL);
2397 }
2398 
2399 int
2400 if_rxhprio_l2_check(int hdrprio)
2401 {
2402 	switch (hdrprio) {
2403 	case IF_HDRPRIO_PACKET:
2404 	case IF_HDRPRIO_OUTER:
2405 		return (0);
2406 	default:
2407 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2408 			return (0);
2409 		break;
2410 	}
2411 
2412 	return (EINVAL);
2413 }
2414 
2415 int
2416 if_rxhprio_l3_check(int hdrprio)
2417 {
2418 	switch (hdrprio) {
2419 	case IF_HDRPRIO_PACKET:
2420 	case IF_HDRPRIO_PAYLOAD:
2421 	case IF_HDRPRIO_OUTER:
2422 		return (0);
2423 	default:
2424 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2425 			return (0);
2426 		break;
2427 	}
2428 
2429 	return (EINVAL);
2430 }
2431 
2432 /*
2433  * Return interface configuration
2434  * of system.  List may be used
2435  * in later ioctl's (above) to get
2436  * other information.
2437  */
2438 int
2439 ifconf(caddr_t data)
2440 {
2441 	struct ifconf *ifc = (struct ifconf *)data;
2442 	struct ifnet *ifp;
2443 	struct ifaddr *ifa;
2444 	struct ifreq ifr, *ifrp;
2445 	int space = ifc->ifc_len, error = 0;
2446 
2447 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2448 	if (space == 0) {
2449 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
2450 			struct sockaddr *sa;
2451 
2452 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2453 				space += sizeof (ifr);
2454 			else
2455 				TAILQ_FOREACH(ifa,
2456 				    &ifp->if_addrlist, ifa_list) {
2457 					sa = ifa->ifa_addr;
2458 					if (sa->sa_len > sizeof(*sa))
2459 						space += sa->sa_len -
2460 						    sizeof(*sa);
2461 					space += sizeof(ifr);
2462 				}
2463 		}
2464 		ifc->ifc_len = space;
2465 		return (0);
2466 	}
2467 
2468 	ifrp = ifc->ifc_req;
2469 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2470 		if (space < sizeof(ifr))
2471 			break;
2472 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2473 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2474 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2475 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2476 			    sizeof(ifr));
2477 			if (error)
2478 				break;
2479 			space -= sizeof (ifr), ifrp++;
2480 		} else
2481 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2482 				struct sockaddr *sa = ifa->ifa_addr;
2483 
2484 				if (space < sizeof(ifr))
2485 					break;
2486 				if (sa->sa_len <= sizeof(*sa)) {
2487 					ifr.ifr_addr = *sa;
2488 					error = copyout((caddr_t)&ifr,
2489 					    (caddr_t)ifrp, sizeof (ifr));
2490 					ifrp++;
2491 				} else {
2492 					space -= sa->sa_len - sizeof(*sa);
2493 					if (space < sizeof (ifr))
2494 						break;
2495 					error = copyout((caddr_t)&ifr,
2496 					    (caddr_t)ifrp,
2497 					    sizeof(ifr.ifr_name));
2498 					if (error == 0)
2499 						error = copyout((caddr_t)sa,
2500 						    (caddr_t)&ifrp->ifr_addr,
2501 						    sa->sa_len);
2502 					ifrp = (struct ifreq *)(sa->sa_len +
2503 					    (caddr_t)&ifrp->ifr_addr);
2504 				}
2505 				if (error)
2506 					break;
2507 				space -= sizeof (ifr);
2508 			}
2509 	}
2510 	ifc->ifc_len -= space;
2511 	return (error);
2512 }
2513 
2514 void
2515 if_counters_alloc(struct ifnet *ifp)
2516 {
2517 	KASSERT(ifp->if_counters == NULL);
2518 
2519 	ifp->if_counters = counters_alloc(ifc_ncounters);
2520 }
2521 
2522 void
2523 if_counters_free(struct ifnet *ifp)
2524 {
2525 	KASSERT(ifp->if_counters != NULL);
2526 
2527 	counters_free(ifp->if_counters, ifc_ncounters);
2528 	ifp->if_counters = NULL;
2529 }
2530 
2531 void
2532 if_getdata(struct ifnet *ifp, struct if_data *data)
2533 {
2534 	unsigned int i;
2535 
2536 	*data = ifp->if_data;
2537 
2538 	if (ifp->if_counters != NULL) {
2539 		uint64_t counters[ifc_ncounters];
2540 
2541 		counters_read(ifp->if_counters, counters, nitems(counters));
2542 
2543 		data->ifi_ipackets += counters[ifc_ipackets];
2544 		data->ifi_ierrors += counters[ifc_ierrors];
2545 		data->ifi_opackets += counters[ifc_opackets];
2546 		data->ifi_oerrors += counters[ifc_oerrors];
2547 		data->ifi_collisions += counters[ifc_collisions];
2548 		data->ifi_ibytes += counters[ifc_ibytes];
2549 		data->ifi_obytes += counters[ifc_obytes];
2550 		data->ifi_imcasts += counters[ifc_imcasts];
2551 		data->ifi_omcasts += counters[ifc_omcasts];
2552 		data->ifi_iqdrops += counters[ifc_iqdrops];
2553 		data->ifi_oqdrops += counters[ifc_oqdrops];
2554 		data->ifi_noproto += counters[ifc_noproto];
2555 	}
2556 
2557 	for (i = 0; i < ifp->if_nifqs; i++) {
2558 		struct ifqueue *ifq = ifp->if_ifqs[i];
2559 
2560 		ifq_add_data(ifq, data);
2561 	}
2562 
2563 	for (i = 0; i < ifp->if_niqs; i++) {
2564 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2565 
2566 		ifiq_add_data(ifiq, data);
2567 	}
2568 }
2569 
2570 /*
2571  * Dummy functions replaced in ifnet during detach (if protocols decide to
2572  * fiddle with the if during detach.
2573  */
2574 void
2575 if_detached_qstart(struct ifqueue *ifq)
2576 {
2577 	ifq_purge(ifq);
2578 }
2579 
2580 int
2581 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2582 {
2583 	return ENODEV;
2584 }
2585 
2586 /*
2587  * Create interface group without members
2588  */
2589 struct ifg_group *
2590 if_creategroup(const char *groupname)
2591 {
2592 	struct ifg_group	*ifg;
2593 
2594 	if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
2595 		return (NULL);
2596 
2597 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2598 	ifg->ifg_refcnt = 0;
2599 	ifg->ifg_carp_demoted = 0;
2600 	TAILQ_INIT(&ifg->ifg_members);
2601 #if NPF > 0
2602 	pfi_attach_ifgroup(ifg);
2603 #endif
2604 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2605 
2606 	return (ifg);
2607 }
2608 
2609 /*
2610  * Add a group to an interface
2611  */
2612 int
2613 if_addgroup(struct ifnet *ifp, const char *groupname)
2614 {
2615 	struct ifg_list		*ifgl;
2616 	struct ifg_group	*ifg = NULL;
2617 	struct ifg_member	*ifgm;
2618 
2619 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
2620 	    groupname[strlen(groupname) - 1] <= '9')
2621 		return (EINVAL);
2622 
2623 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2624 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2625 			return (EEXIST);
2626 
2627 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
2628 		return (ENOMEM);
2629 
2630 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
2631 		free(ifgl, M_TEMP, sizeof(*ifgl));
2632 		return (ENOMEM);
2633 	}
2634 
2635 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2636 		if (!strcmp(ifg->ifg_group, groupname))
2637 			break;
2638 
2639 	if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
2640 		free(ifgl, M_TEMP, sizeof(*ifgl));
2641 		free(ifgm, M_TEMP, sizeof(*ifgm));
2642 		return (ENOMEM);
2643 	}
2644 
2645 	ifg->ifg_refcnt++;
2646 	ifgl->ifgl_group = ifg;
2647 	ifgm->ifgm_ifp = ifp;
2648 
2649 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2650 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2651 
2652 #if NPF > 0
2653 	pfi_group_addmember(groupname, ifp);
2654 #endif
2655 
2656 	return (0);
2657 }
2658 
2659 /*
2660  * Remove a group from an interface
2661  */
2662 int
2663 if_delgroup(struct ifnet *ifp, const char *groupname)
2664 {
2665 	struct ifg_list		*ifgl;
2666 	struct ifg_member	*ifgm;
2667 
2668 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2669 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2670 			break;
2671 	if (ifgl == NULL)
2672 		return (ENOENT);
2673 
2674 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2675 
2676 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2677 		if (ifgm->ifgm_ifp == ifp)
2678 			break;
2679 
2680 	if (ifgm != NULL) {
2681 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2682 		free(ifgm, M_TEMP, sizeof(*ifgm));
2683 	}
2684 
2685 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2686 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2687 #if NPF > 0
2688 		pfi_detach_ifgroup(ifgl->ifgl_group);
2689 #endif
2690 		free(ifgl->ifgl_group, M_TEMP, 0);
2691 	}
2692 
2693 	free(ifgl, M_TEMP, sizeof(*ifgl));
2694 
2695 #if NPF > 0
2696 	pfi_group_change(groupname);
2697 #endif
2698 
2699 	return (0);
2700 }
2701 
2702 /*
2703  * Stores all groups from an interface in memory pointed
2704  * to by data
2705  */
2706 int
2707 if_getgroup(caddr_t data, struct ifnet *ifp)
2708 {
2709 	int			 len, error;
2710 	struct ifg_list		*ifgl;
2711 	struct ifg_req		 ifgrq, *ifgp;
2712 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2713 
2714 	if (ifgr->ifgr_len == 0) {
2715 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2716 			ifgr->ifgr_len += sizeof(struct ifg_req);
2717 		return (0);
2718 	}
2719 
2720 	len = ifgr->ifgr_len;
2721 	ifgp = ifgr->ifgr_groups;
2722 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2723 		if (len < sizeof(ifgrq))
2724 			return (EINVAL);
2725 		bzero(&ifgrq, sizeof ifgrq);
2726 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2727 		    sizeof(ifgrq.ifgrq_group));
2728 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2729 		    sizeof(struct ifg_req))))
2730 			return (error);
2731 		len -= sizeof(ifgrq);
2732 		ifgp++;
2733 	}
2734 
2735 	return (0);
2736 }
2737 
2738 /*
2739  * Stores all members of a group in memory pointed to by data
2740  */
2741 int
2742 if_getgroupmembers(caddr_t data)
2743 {
2744 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2745 	struct ifg_group	*ifg;
2746 	struct ifg_member	*ifgm;
2747 	struct ifg_req		 ifgrq, *ifgp;
2748 	int			 len, error;
2749 
2750 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2751 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2752 			break;
2753 	if (ifg == NULL)
2754 		return (ENOENT);
2755 
2756 	if (ifgr->ifgr_len == 0) {
2757 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2758 			ifgr->ifgr_len += sizeof(ifgrq);
2759 		return (0);
2760 	}
2761 
2762 	len = ifgr->ifgr_len;
2763 	ifgp = ifgr->ifgr_groups;
2764 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
2765 		if (len < sizeof(ifgrq))
2766 			return (EINVAL);
2767 		bzero(&ifgrq, sizeof ifgrq);
2768 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
2769 		    sizeof(ifgrq.ifgrq_member));
2770 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2771 		    sizeof(struct ifg_req))))
2772 			return (error);
2773 		len -= sizeof(ifgrq);
2774 		ifgp++;
2775 	}
2776 
2777 	return (0);
2778 }
2779 
2780 int
2781 if_getgroupattribs(caddr_t data)
2782 {
2783 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2784 	struct ifg_group	*ifg;
2785 
2786 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2787 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2788 			break;
2789 	if (ifg == NULL)
2790 		return (ENOENT);
2791 
2792 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
2793 
2794 	return (0);
2795 }
2796 
2797 int
2798 if_setgroupattribs(caddr_t data)
2799 {
2800 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2801 	struct ifg_group	*ifg;
2802 	struct ifg_member	*ifgm;
2803 	int			 demote;
2804 
2805 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2806 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2807 			break;
2808 	if (ifg == NULL)
2809 		return (ENOENT);
2810 
2811 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
2812 	if (demote + ifg->ifg_carp_demoted > 0xff ||
2813 	    demote + ifg->ifg_carp_demoted < 0)
2814 		return (EINVAL);
2815 
2816 	ifg->ifg_carp_demoted += demote;
2817 
2818 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2819 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
2820 
2821 	return (0);
2822 }
2823 
2824 /*
2825  * Stores all groups in memory pointed to by data
2826  */
2827 int
2828 if_getgrouplist(caddr_t data)
2829 {
2830 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2831 	struct ifg_group	*ifg;
2832 	struct ifg_req		 ifgrq, *ifgp;
2833 	int			 len, error;
2834 
2835 	if (ifgr->ifgr_len == 0) {
2836 		TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2837 			ifgr->ifgr_len += sizeof(ifgrq);
2838 		return (0);
2839 	}
2840 
2841 	len = ifgr->ifgr_len;
2842 	ifgp = ifgr->ifgr_groups;
2843 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
2844 		if (len < sizeof(ifgrq))
2845 			return (EINVAL);
2846 		bzero(&ifgrq, sizeof ifgrq);
2847 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
2848                     sizeof(ifgrq.ifgrq_group));
2849 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2850                     sizeof(struct ifg_req))))
2851 			return (error);
2852 		len -= sizeof(ifgrq);
2853 		ifgp++;
2854 	}
2855 
2856 	return (0);
2857 }
2858 
2859 void
2860 if_group_routechange(struct sockaddr *dst, struct sockaddr *mask)
2861 {
2862 	switch (dst->sa_family) {
2863 	case AF_INET:
2864 		if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
2865 		    mask && (mask->sa_len == 0 ||
2866 		    satosin(mask)->sin_addr.s_addr == INADDR_ANY))
2867 			if_group_egress_build();
2868 		break;
2869 #ifdef INET6
2870 	case AF_INET6:
2871 		if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
2872 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
2873 		    IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
2874 		    &in6addr_any)))
2875 			if_group_egress_build();
2876 		break;
2877 #endif
2878 	}
2879 }
2880 
2881 int
2882 if_group_egress_build(void)
2883 {
2884 	struct ifnet		*ifp;
2885 	struct ifg_group	*ifg;
2886 	struct ifg_member	*ifgm, *next;
2887 	struct sockaddr_in	 sa_in;
2888 #ifdef INET6
2889 	struct sockaddr_in6	 sa_in6;
2890 #endif
2891 	struct rtentry		*rt;
2892 
2893 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2894 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
2895 			break;
2896 
2897 	if (ifg != NULL)
2898 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
2899 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
2900 
2901 	bzero(&sa_in, sizeof(sa_in));
2902 	sa_in.sin_len = sizeof(sa_in);
2903 	sa_in.sin_family = AF_INET;
2904 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
2905 	while (rt != NULL) {
2906 		ifp = if_get(rt->rt_ifidx);
2907 		if (ifp != NULL) {
2908 			if_addgroup(ifp, IFG_EGRESS);
2909 			if_put(ifp);
2910 		}
2911 		rt = rtable_iterate(rt);
2912 	}
2913 
2914 #ifdef INET6
2915 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
2916 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
2917 	    RTP_ANY);
2918 	while (rt != NULL) {
2919 		ifp = if_get(rt->rt_ifidx);
2920 		if (ifp != NULL) {
2921 			if_addgroup(ifp, IFG_EGRESS);
2922 			if_put(ifp);
2923 		}
2924 		rt = rtable_iterate(rt);
2925 	}
2926 #endif /* INET6 */
2927 
2928 	return (0);
2929 }
2930 
2931 /*
2932  * Set/clear promiscuous mode on interface ifp based on the truth value
2933  * of pswitch.  The calls are reference counted so that only the first
2934  * "on" request actually has an effect, as does the final "off" request.
2935  * Results are undefined if the "off" and "on" requests are not matched.
2936  */
2937 int
2938 ifpromisc(struct ifnet *ifp, int pswitch)
2939 {
2940 	struct ifreq ifr;
2941 	unsigned short oif_flags;
2942 	int oif_pcount, error;
2943 
2944 	oif_flags = ifp->if_flags;
2945 	oif_pcount = ifp->if_pcount;
2946 	if (pswitch) {
2947 		if (ifp->if_pcount++ != 0)
2948 			return (0);
2949 		ifp->if_flags |= IFF_PROMISC;
2950 	} else {
2951 		if (--ifp->if_pcount > 0)
2952 			return (0);
2953 		ifp->if_flags &= ~IFF_PROMISC;
2954 	}
2955 
2956 	if ((ifp->if_flags & IFF_UP) == 0)
2957 		return (0);
2958 
2959 	memset(&ifr, 0, sizeof(ifr));
2960 	ifr.ifr_flags = ifp->if_flags;
2961 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
2962 	if (error) {
2963 		ifp->if_flags = oif_flags;
2964 		ifp->if_pcount = oif_pcount;
2965 	}
2966 
2967 	return (error);
2968 }
2969 
2970 void
2971 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
2972 {
2973 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
2974 }
2975 
2976 void
2977 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
2978 {
2979 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
2980 }
2981 
2982 void
2983 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
2984 {
2985 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
2986 		panic("ifa_update_broadaddr does not support dynamic length");
2987 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
2988 }
2989 
2990 #ifdef DDB
2991 /* debug function, can be called from ddb> */
2992 void
2993 ifa_print_all(void)
2994 {
2995 	struct ifnet *ifp;
2996 	struct ifaddr *ifa;
2997 
2998 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2999 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
3000 			char addr[INET6_ADDRSTRLEN];
3001 
3002 			switch (ifa->ifa_addr->sa_family) {
3003 			case AF_INET:
3004 				printf("%s", inet_ntop(AF_INET,
3005 				    &satosin(ifa->ifa_addr)->sin_addr,
3006 				    addr, sizeof(addr)));
3007 				break;
3008 #ifdef INET6
3009 			case AF_INET6:
3010 				printf("%s", inet_ntop(AF_INET6,
3011 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
3012 				    addr, sizeof(addr)));
3013 				break;
3014 #endif
3015 			}
3016 			printf(" on %s\n", ifp->if_xname);
3017 		}
3018 	}
3019 }
3020 #endif /* DDB */
3021 
3022 void
3023 ifnewlladdr(struct ifnet *ifp)
3024 {
3025 #ifdef INET6
3026 	struct ifaddr *ifa;
3027 #endif
3028 	struct ifreq ifrq;
3029 	short up;
3030 	int s;
3031 
3032 	s = splnet();
3033 	up = ifp->if_flags & IFF_UP;
3034 
3035 	if (up) {
3036 		/* go down for a moment... */
3037 		ifp->if_flags &= ~IFF_UP;
3038 		ifrq.ifr_flags = ifp->if_flags;
3039 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3040 	}
3041 
3042 	ifp->if_flags |= IFF_UP;
3043 	ifrq.ifr_flags = ifp->if_flags;
3044 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3045 
3046 #ifdef INET6
3047 	/*
3048 	 * Update the link-local address.  Don't do it if we're
3049 	 * a router to avoid confusing hosts on the network.
3050 	 */
3051 	if (!ip6_forwarding) {
3052 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
3053 		if (ifa) {
3054 			in6_purgeaddr(ifa);
3055 			dohooks(ifp->if_addrhooks, 0);
3056 			in6_ifattach(ifp);
3057 		}
3058 	}
3059 #endif
3060 	if (!up) {
3061 		/* go back down */
3062 		ifp->if_flags &= ~IFF_UP;
3063 		ifrq.ifr_flags = ifp->if_flags;
3064 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3065 	}
3066 	splx(s);
3067 }
3068 
3069 int net_ticks;
3070 u_int net_livelocks;
3071 
3072 void
3073 net_tick(void *null)
3074 {
3075 	extern int ticks;
3076 
3077 	if (ticks - net_ticks > 1)
3078 		net_livelocks++;
3079 
3080 	net_ticks = ticks;
3081 
3082 	timeout_add(&net_tick_to, 1);
3083 }
3084 
3085 int
3086 net_livelocked(void)
3087 {
3088 	extern int ticks;
3089 
3090 	return (ticks - net_ticks > 1);
3091 }
3092 
3093 void
3094 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
3095 {
3096 	extern int ticks;
3097 
3098 	memset(rxr, 0, sizeof(*rxr));
3099 
3100 	rxr->rxr_adjusted = ticks;
3101 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
3102 	rxr->rxr_hwm = hwm;
3103 }
3104 
3105 static inline void
3106 if_rxr_adjust_cwm(struct if_rxring *rxr)
3107 {
3108 	extern int ticks;
3109 
3110 	if (net_livelocked()) {
3111 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3112 			rxr->rxr_cwm--;
3113 		else
3114 			return;
3115 	} else if (rxr->rxr_alive >= rxr->rxr_lwm)
3116 		return;
3117 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
3118 		rxr->rxr_cwm++;
3119 
3120 	rxr->rxr_adjusted = ticks;
3121 }
3122 
3123 void
3124 if_rxr_livelocked(struct if_rxring *rxr)
3125 {
3126 	extern int ticks;
3127 
3128 	if (ticks - rxr->rxr_adjusted >= 1) {
3129 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3130 			rxr->rxr_cwm--;
3131 
3132 		rxr->rxr_adjusted = ticks;
3133 	}
3134 }
3135 
3136 u_int
3137 if_rxr_get(struct if_rxring *rxr, u_int max)
3138 {
3139 	extern int ticks;
3140 	u_int diff;
3141 
3142 	if (ticks - rxr->rxr_adjusted >= 1) {
3143 		/* we're free to try for an adjustment */
3144 		if_rxr_adjust_cwm(rxr);
3145 	}
3146 
3147 	if (rxr->rxr_alive >= rxr->rxr_cwm)
3148 		return (0);
3149 
3150 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
3151 	rxr->rxr_alive += diff;
3152 
3153 	return (diff);
3154 }
3155 
3156 int
3157 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
3158 {
3159 	struct if_rxrinfo kifri;
3160 	int error;
3161 	u_int n;
3162 
3163 	error = copyin(uifri, &kifri, sizeof(kifri));
3164 	if (error)
3165 		return (error);
3166 
3167 	n = min(t, kifri.ifri_total);
3168 	kifri.ifri_total = t;
3169 
3170 	if (n > 0) {
3171 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
3172 		if (error)
3173 			return (error);
3174 	}
3175 
3176 	return (copyout(&kifri, uifri, sizeof(kifri)));
3177 }
3178 
3179 int
3180 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
3181     struct if_rxring *rxr)
3182 {
3183 	struct if_rxring_info ifr;
3184 
3185 	memset(&ifr, 0, sizeof(ifr));
3186 
3187 	if (name != NULL)
3188 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3189 
3190 	ifr.ifr_size = size;
3191 	ifr.ifr_info = *rxr;
3192 
3193 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
3194 }
3195 
3196 /*
3197  * Network stack input queues.
3198  */
3199 
3200 void
3201 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3202 {
3203 	mq_init(&niq->ni_q, maxlen, IPL_NET);
3204 	niq->ni_isr = isr;
3205 }
3206 
3207 int
3208 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3209 {
3210 	int rv;
3211 
3212 	rv = mq_enqueue(&niq->ni_q, m);
3213 	if (rv == 0)
3214 		schednetisr(niq->ni_isr);
3215 	else
3216 		if_congestion();
3217 
3218 	return (rv);
3219 }
3220 
3221 int
3222 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3223 {
3224 	int rv;
3225 
3226 	rv = mq_enlist(&niq->ni_q, ml);
3227 	if (rv == 0)
3228 		schednetisr(niq->ni_isr);
3229 	else
3230 		if_congestion();
3231 
3232 	return (rv);
3233 }
3234 
3235 __dead void
3236 unhandled_af(int af)
3237 {
3238 	panic("unhandled af %d", af);
3239 }
3240 
3241 /*
3242  * XXXSMP This tunable is here to work around the fact that IPsec
3243  * globals aren't ready to be accessed by multiple threads in
3244  * parallel.
3245  */
3246 int		 nettaskqs = NET_TASKQ;
3247 
3248 struct taskq *
3249 net_tq(unsigned int ifindex)
3250 {
3251 	struct taskq *t = NULL;
3252 
3253 	t = nettqmp[ifindex % nettaskqs];
3254 
3255 	return (t);
3256 }
3257