xref: /openbsd-src/sys/net/if.c (revision d1df930ffab53da22f3324c32bed7ac5709915e6)
1 /*	$OpenBSD: if.c,v 1.566 2018/10/01 12:38:32 mpi Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "pfsync.h"
70 #include "ppp.h"
71 #include "pppoe.h"
72 #include "switch.h"
73 #include "trunk.h"
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/mbuf.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/timeout.h>
81 #include <sys/protosw.h>
82 #include <sys/kernel.h>
83 #include <sys/ioctl.h>
84 #include <sys/domain.h>
85 #include <sys/task.h>
86 #include <sys/atomic.h>
87 #include <sys/proc.h>
88 
89 #include <dev/rndvar.h>
90 
91 #include <net/if.h>
92 #include <net/if_dl.h>
93 #include <net/if_types.h>
94 #include <net/route.h>
95 #include <net/netisr.h>
96 
97 #include <netinet/in.h>
98 #include <netinet/if_ether.h>
99 #include <netinet/igmp.h>
100 #ifdef MROUTING
101 #include <netinet/ip_mroute.h>
102 #endif
103 
104 #ifdef INET6
105 #include <netinet6/in6_var.h>
106 #include <netinet6/in6_ifattach.h>
107 #include <netinet6/nd6.h>
108 #include <netinet/ip6.h>
109 #include <netinet6/ip6_var.h>
110 #endif
111 
112 #ifdef MPLS
113 #include <netmpls/mpls.h>
114 #endif
115 
116 #if NBPFILTER > 0
117 #include <net/bpf.h>
118 #endif
119 
120 #if NBRIDGE > 0
121 #include <net/if_bridge.h>
122 #endif
123 
124 #if NCARP > 0
125 #include <netinet/ip_carp.h>
126 #endif
127 
128 #if NPF > 0
129 #include <net/pfvar.h>
130 #endif
131 
132 #include <sys/device.h>
133 
134 void	if_attachsetup(struct ifnet *);
135 void	if_attachdomain(struct ifnet *);
136 void	if_attach_common(struct ifnet *);
137 int	if_createrdomain(int, struct ifnet *);
138 int	if_setrdomain(struct ifnet *, int);
139 void	if_slowtimo(void *);
140 
141 void	if_detached_qstart(struct ifqueue *);
142 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
143 
144 int	ifioctl_get(u_long, caddr_t);
145 int	ifconf(caddr_t);
146 
147 int	if_getgroup(caddr_t, struct ifnet *);
148 int	if_getgroupmembers(caddr_t);
149 int	if_getgroupattribs(caddr_t);
150 int	if_setgroupattribs(caddr_t);
151 int	if_getgrouplist(caddr_t);
152 
153 void	if_linkstate(struct ifnet *);
154 void	if_linkstate_task(void *);
155 
156 int	if_clone_list(struct if_clonereq *);
157 struct if_clone	*if_clone_lookup(const char *, int *);
158 
159 int	if_group_egress_build(void);
160 
161 void	if_watchdog_task(void *);
162 
163 void	if_netisr(void *);
164 
165 #ifdef DDB
166 void	ifa_print_all(void);
167 #endif
168 
169 void	if_qstart_compat(struct ifqueue *);
170 
171 /*
172  * interface index map
173  *
174  * the kernel maintains a mapping of interface indexes to struct ifnet
175  * pointers.
176  *
177  * the map is an array of struct ifnet pointers prefixed by an if_map
178  * structure. the if_map structure stores the length of its array.
179  *
180  * as interfaces are attached to the system, the map is grown on demand
181  * up to USHRT_MAX entries.
182  *
183  * interface index 0 is reserved and represents no interface. this
184  * supports the use of the interface index as the scope for IPv6 link
185  * local addresses, where scope 0 means no scope has been specified.
186  * it also supports the use of interface index as the unique identifier
187  * for network interfaces in SNMP applications as per RFC2863. therefore
188  * if_get(0) returns NULL.
189  */
190 
191 void if_ifp_dtor(void *, void *);
192 void if_map_dtor(void *, void *);
193 struct ifnet *if_ref(struct ifnet *);
194 
195 /*
196  * struct if_map
197  *
198  * bounded array of ifnet srp pointers used to fetch references of live
199  * interfaces with if_get().
200  */
201 
202 struct if_map {
203 	unsigned long		 limit;
204 	/* followed by limit ifnet srp pointers */
205 };
206 
207 /*
208  * struct if_idxmap
209  *
210  * infrastructure to manage updates and accesses to the current if_map.
211  */
212 
213 struct if_idxmap {
214 	unsigned int		 serial;
215 	unsigned int		 count;
216 	struct srp		 map;
217 };
218 
219 void	if_idxmap_init(unsigned int);
220 void	if_idxmap_insert(struct ifnet *);
221 void	if_idxmap_remove(struct ifnet *);
222 
223 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
224 
225 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
226 int if_cloners_count;
227 
228 struct timeout net_tick_to;
229 void	net_tick(void *);
230 int	net_livelocked(void);
231 int	ifq_congestion;
232 
233 int		 netisr;
234 
235 #define	NET_TASKQ	1
236 struct taskq	*nettqmp[NET_TASKQ];
237 
238 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
239 
240 /*
241  * Serialize socket operations to ensure no new sleeping points
242  * are introduced in IP output paths.
243  */
244 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
245 
246 /*
247  * Network interface utility routines.
248  */
249 void
250 ifinit(void)
251 {
252 	unsigned int	i;
253 
254 	/*
255 	 * most machines boot with 4 or 5 interfaces, so size the initial map
256 	 * to accomodate this
257 	 */
258 	if_idxmap_init(8);
259 
260 	timeout_set(&net_tick_to, net_tick, &net_tick_to);
261 
262 	for (i = 0; i < NET_TASKQ; i++) {
263 		nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
264 		if (nettqmp[i] == NULL)
265 			panic("unable to create network taskq %d", i);
266 	}
267 
268 	net_tick(&net_tick_to);
269 }
270 
271 static struct if_idxmap if_idxmap = {
272 	0,
273 	0,
274 	SRP_INITIALIZER()
275 };
276 
277 struct srp_gc if_ifp_gc = SRP_GC_INITIALIZER(if_ifp_dtor, NULL);
278 struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
279 
280 struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
281 
282 void
283 if_idxmap_init(unsigned int limit)
284 {
285 	struct if_map *if_map;
286 	struct srp *map;
287 	unsigned int i;
288 
289 	if_idxmap.serial = 1; /* skip ifidx 0 so it can return NULL */
290 
291 	if_map = malloc(sizeof(*if_map) + limit * sizeof(*map),
292 	    M_IFADDR, M_WAITOK);
293 
294 	if_map->limit = limit;
295 	map = (struct srp *)(if_map + 1);
296 	for (i = 0; i < limit; i++)
297 		srp_init(&map[i]);
298 
299 	/* this is called early so there's nothing to race with */
300 	srp_update_locked(&if_map_gc, &if_idxmap.map, if_map);
301 }
302 
303 void
304 if_idxmap_insert(struct ifnet *ifp)
305 {
306 	struct if_map *if_map;
307 	struct srp *map;
308 	unsigned int index, i;
309 
310 	refcnt_init(&ifp->if_refcnt);
311 
312 	/* the kernel lock guarantees serialised modifications to if_idxmap */
313 	KERNEL_ASSERT_LOCKED();
314 
315 	if (++if_idxmap.count > USHRT_MAX)
316 		panic("too many interfaces");
317 
318 	if_map = srp_get_locked(&if_idxmap.map);
319 	map = (struct srp *)(if_map + 1);
320 
321 	index = if_idxmap.serial++ & USHRT_MAX;
322 
323 	if (index >= if_map->limit) {
324 		struct if_map *nif_map;
325 		struct srp *nmap;
326 		unsigned int nlimit;
327 		struct ifnet *nifp;
328 
329 		nlimit = if_map->limit * 2;
330 		nif_map = malloc(sizeof(*nif_map) + nlimit * sizeof(*nmap),
331 		    M_IFADDR, M_WAITOK);
332 		nmap = (struct srp *)(nif_map + 1);
333 
334 		nif_map->limit = nlimit;
335 		for (i = 0; i < if_map->limit; i++) {
336 			srp_init(&nmap[i]);
337 			nifp = srp_get_locked(&map[i]);
338 			if (nifp != NULL) {
339 				srp_update_locked(&if_ifp_gc, &nmap[i],
340 				    if_ref(nifp));
341 			}
342 		}
343 
344 		while (i < nlimit) {
345 			srp_init(&nmap[i]);
346 			i++;
347 		}
348 
349 		srp_update_locked(&if_map_gc, &if_idxmap.map, nif_map);
350 		if_map = nif_map;
351 		map = nmap;
352 	}
353 
354 	/* pick the next free index */
355 	for (i = 0; i < USHRT_MAX; i++) {
356 		if (index != 0 && srp_get_locked(&map[index]) == NULL)
357 			break;
358 
359 		index = if_idxmap.serial++ & USHRT_MAX;
360 	}
361 
362 	/* commit */
363 	ifp->if_index = index;
364 	srp_update_locked(&if_ifp_gc, &map[index], if_ref(ifp));
365 }
366 
367 void
368 if_idxmap_remove(struct ifnet *ifp)
369 {
370 	struct if_map *if_map;
371 	struct srp *map;
372 	unsigned int index;
373 
374 	index = ifp->if_index;
375 
376 	/* the kernel lock guarantees serialised modifications to if_idxmap */
377 	KERNEL_ASSERT_LOCKED();
378 
379 	if_map = srp_get_locked(&if_idxmap.map);
380 	KASSERT(index < if_map->limit);
381 
382 	map = (struct srp *)(if_map + 1);
383 	KASSERT(ifp == (struct ifnet *)srp_get_locked(&map[index]));
384 
385 	srp_update_locked(&if_ifp_gc, &map[index], NULL);
386 	if_idxmap.count--;
387 	/* end of if_idxmap modifications */
388 
389 	/* sleep until the last reference is released */
390 	refcnt_finalize(&ifp->if_refcnt, "ifidxrm");
391 }
392 
393 void
394 if_ifp_dtor(void *null, void *ifp)
395 {
396 	if_put(ifp);
397 }
398 
399 void
400 if_map_dtor(void *null, void *m)
401 {
402 	struct if_map *if_map = m;
403 	struct srp *map = (struct srp *)(if_map + 1);
404 	unsigned int i;
405 
406 	/*
407 	 * dont need to serialize the use of update_locked since this is
408 	 * the last reference to this map. there's nothing to race against.
409 	 */
410 	for (i = 0; i < if_map->limit; i++)
411 		srp_update_locked(&if_ifp_gc, &map[i], NULL);
412 
413 	free(if_map, M_IFADDR, sizeof(*if_map) + if_map->limit * sizeof(*map));
414 }
415 
416 /*
417  * Attach an interface to the
418  * list of "active" interfaces.
419  */
420 void
421 if_attachsetup(struct ifnet *ifp)
422 {
423 	unsigned long ifidx;
424 
425 	NET_ASSERT_LOCKED();
426 
427 	TAILQ_INIT(&ifp->if_groups);
428 
429 	if_addgroup(ifp, IFG_ALL);
430 
431 	if_attachdomain(ifp);
432 #if NPF > 0
433 	pfi_attach_ifnet(ifp);
434 #endif
435 
436 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
437 	if_slowtimo(ifp);
438 
439 	if_idxmap_insert(ifp);
440 	KASSERT(if_get(0) == NULL);
441 
442 	ifidx = ifp->if_index;
443 
444 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
445 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
446 
447 	/* Announce the interface. */
448 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
449 }
450 
451 /*
452  * Allocate the link level name for the specified interface.  This
453  * is an attachment helper.  It must be called after ifp->if_addrlen
454  * is initialized, which may not be the case when if_attach() is
455  * called.
456  */
457 void
458 if_alloc_sadl(struct ifnet *ifp)
459 {
460 	unsigned int socksize;
461 	int namelen, masklen;
462 	struct sockaddr_dl *sdl;
463 
464 	/*
465 	 * If the interface already has a link name, release it
466 	 * now.  This is useful for interfaces that can change
467 	 * link types, and thus switch link names often.
468 	 */
469 	if (ifp->if_sadl != NULL)
470 		if_free_sadl(ifp);
471 
472 	namelen = strlen(ifp->if_xname);
473 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
474 	socksize = masklen + ifp->if_addrlen;
475 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
476 	if (socksize < sizeof(*sdl))
477 		socksize = sizeof(*sdl);
478 	socksize = ROUNDUP(socksize);
479 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
480 	sdl->sdl_len = socksize;
481 	sdl->sdl_family = AF_LINK;
482 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
483 	sdl->sdl_nlen = namelen;
484 	sdl->sdl_alen = ifp->if_addrlen;
485 	sdl->sdl_index = ifp->if_index;
486 	sdl->sdl_type = ifp->if_type;
487 	ifp->if_sadl = sdl;
488 }
489 
490 /*
491  * Free the link level name for the specified interface.  This is
492  * a detach helper.  This is called from if_detach() or from
493  * link layer type specific detach functions.
494  */
495 void
496 if_free_sadl(struct ifnet *ifp)
497 {
498 	free(ifp->if_sadl, M_IFADDR, 0);
499 	ifp->if_sadl = NULL;
500 }
501 
502 void
503 if_attachdomain(struct ifnet *ifp)
504 {
505 	struct domain *dp;
506 	int i, s;
507 
508 	s = splnet();
509 
510 	/* address family dependent data region */
511 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
512 	for (i = 0; (dp = domains[i]) != NULL; i++) {
513 		if (dp->dom_ifattach)
514 			ifp->if_afdata[dp->dom_family] =
515 			    (*dp->dom_ifattach)(ifp);
516 	}
517 
518 	splx(s);
519 }
520 
521 void
522 if_attachhead(struct ifnet *ifp)
523 {
524 	if_attach_common(ifp);
525 	NET_LOCK();
526 	TAILQ_INSERT_HEAD(&ifnet, ifp, if_list);
527 	if_attachsetup(ifp);
528 	NET_UNLOCK();
529 }
530 
531 void
532 if_attach(struct ifnet *ifp)
533 {
534 	if_attach_common(ifp);
535 	NET_LOCK();
536 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
537 	if_attachsetup(ifp);
538 	NET_UNLOCK();
539 }
540 
541 void
542 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
543 {
544 	struct ifqueue **map;
545 	struct ifqueue *ifq;
546 	int i;
547 
548 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
549 	KASSERT(nqs != 0);
550 
551 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
552 
553 	ifp->if_snd.ifq_softc = NULL;
554 	map[0] = &ifp->if_snd;
555 
556 	for (i = 1; i < nqs; i++) {
557 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
558 		ifq_set_maxlen(ifq, ifp->if_snd.ifq_maxlen);
559 		ifq_init(ifq, ifp, i);
560 		map[i] = ifq;
561 	}
562 
563 	ifp->if_ifqs = map;
564 	ifp->if_nifqs = nqs;
565 }
566 
567 void
568 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
569 {
570 	struct ifiqueue **map;
571 	struct ifiqueue *ifiq;
572 	unsigned int i;
573 
574 	KASSERT(niqs != 0);
575 
576 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
577 
578 	ifp->if_rcv.ifiq_softc = NULL;
579 	map[0] = &ifp->if_rcv;
580 
581 	for (i = 1; i < niqs; i++) {
582 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
583 		ifiq_init(ifiq, ifp, i);
584 		map[i] = ifiq;
585 	}
586 
587 	ifp->if_iqs = map;
588 	ifp->if_niqs = niqs;
589 }
590 
591 void
592 if_attach_common(struct ifnet *ifp)
593 {
594 	KASSERT(ifp->if_ioctl != NULL);
595 
596 	TAILQ_INIT(&ifp->if_addrlist);
597 	TAILQ_INIT(&ifp->if_maddrlist);
598 
599 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
600 		KASSERTMSG(ifp->if_qstart == NULL,
601 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
602 		ifp->if_qstart = if_qstart_compat;
603 	} else {
604 		KASSERTMSG(ifp->if_start == NULL,
605 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
606 		KASSERTMSG(ifp->if_qstart != NULL,
607 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
608 	}
609 
610 	ifq_init(&ifp->if_snd, ifp, 0);
611 
612 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
613 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
614 	ifp->if_nifqs = 1;
615 
616 	ifiq_init(&ifp->if_rcv, ifp, 0);
617 
618 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
619 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
620 	ifp->if_niqs = 1;
621 
622 	ifp->if_addrhooks = malloc(sizeof(*ifp->if_addrhooks),
623 	    M_TEMP, M_WAITOK);
624 	TAILQ_INIT(ifp->if_addrhooks);
625 	ifp->if_linkstatehooks = malloc(sizeof(*ifp->if_linkstatehooks),
626 	    M_TEMP, M_WAITOK);
627 	TAILQ_INIT(ifp->if_linkstatehooks);
628 	ifp->if_detachhooks = malloc(sizeof(*ifp->if_detachhooks),
629 	    M_TEMP, M_WAITOK);
630 	TAILQ_INIT(ifp->if_detachhooks);
631 
632 	if (ifp->if_rtrequest == NULL)
633 		ifp->if_rtrequest = if_rtrequest_dummy;
634 	ifp->if_llprio = IFQ_DEFPRIO;
635 
636 	SRPL_INIT(&ifp->if_inputs);
637 }
638 
639 void
640 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
641 {
642 	/*
643 	 * only switch the ifq_ops on the first ifq on an interface.
644 	 *
645 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
646 	 * works on a single ifq. because the code uses the ifq_ops
647 	 * on the first ifq (if_snd) to select a queue for an mbuf,
648 	 * by switching only the first one we change both the algorithm
649 	 * and force the routing of all new packets to it.
650 	 */
651 	ifq_attach(&ifp->if_snd, newops, args);
652 }
653 
654 void
655 if_start(struct ifnet *ifp)
656 {
657 	KASSERT(ifp->if_qstart == if_qstart_compat);
658 	if_qstart_compat(&ifp->if_snd);
659 }
660 void
661 if_qstart_compat(struct ifqueue *ifq)
662 {
663 	struct ifnet *ifp = ifq->ifq_if;
664 	int s;
665 
666 	/*
667 	 * the stack assumes that an interface can have multiple
668 	 * transmit rings, but a lot of drivers are still written
669 	 * so that interfaces and send rings have a 1:1 mapping.
670 	 * this provides compatability between the stack and the older
671 	 * drivers by translating from the only queue they have
672 	 * (ifp->if_snd) back to the interface and calling if_start.
673  	 */
674 
675 	KERNEL_LOCK();
676 	s = splnet();
677 	(*ifp->if_start)(ifp);
678 	splx(s);
679 	KERNEL_UNLOCK();
680 }
681 
682 int
683 if_enqueue(struct ifnet *ifp, struct mbuf *m)
684 {
685 	unsigned int idx;
686 	struct ifqueue *ifq;
687 	int error;
688 
689 #if NPF > 0
690 	if (m->m_pkthdr.pf.delay > 0)
691 		return (pf_delay_pkt(m, ifp->if_index));
692 #endif
693 
694 #if NBRIDGE > 0
695 	if (ifp->if_bridgeport && (m->m_flags & M_PROTO1) == 0) {
696 		KERNEL_LOCK();
697 		error = bridge_output(ifp, m, NULL, NULL);
698 		KERNEL_UNLOCK();
699 		return (error);
700 	}
701 #endif
702 
703 #if NPF > 0
704 	pf_pkt_addr_changed(m);
705 #endif	/* NPF > 0 */
706 
707 	/*
708 	 * use the operations on the first ifq to pick which of the array
709 	 * gets this mbuf.
710 	 */
711 	idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
712 	ifq = ifp->if_ifqs[idx];
713 
714 	error = ifq_enqueue(ifq, m);
715 	if (error)
716 		return (error);
717 
718 	ifq_start(ifq);
719 
720 	return (0);
721 }
722 
723 void
724 if_input(struct ifnet *ifp, struct mbuf_list *ml)
725 {
726 	ifiq_input(&ifp->if_rcv, ml, 2048);
727 }
728 
729 int
730 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
731 {
732 #if NBPFILTER > 0
733 	/*
734 	 * Only send packets to bpf if they are destinated to local
735 	 * addresses.
736 	 *
737 	 * if_input_local() is also called for SIMPLEX interfaces to
738 	 * duplicate packets for local use.  But don't dup them to bpf.
739 	 */
740 	if (ifp->if_flags & IFF_LOOPBACK) {
741 		caddr_t if_bpf = ifp->if_bpf;
742 
743 		if (if_bpf)
744 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
745 	}
746 #endif
747 	m_resethdr(m);
748 	m->m_flags |= M_LOOP;
749 	m->m_pkthdr.ph_ifidx = ifp->if_index;
750 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
751 
752 	ifp->if_opackets++;
753 	ifp->if_obytes += m->m_pkthdr.len;
754 
755 	ifp->if_ipackets++;
756 	ifp->if_ibytes += m->m_pkthdr.len;
757 
758 	switch (af) {
759 	case AF_INET:
760 		ipv4_input(ifp, m);
761 		break;
762 #ifdef INET6
763 	case AF_INET6:
764 		ipv6_input(ifp, m);
765 		break;
766 #endif /* INET6 */
767 #ifdef MPLS
768 	case AF_MPLS:
769 		mpls_input(ifp, m);
770 		break;
771 #endif /* MPLS */
772 	default:
773 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
774 		m_freem(m);
775 		return (EAFNOSUPPORT);
776 	}
777 
778 	return (0);
779 }
780 
781 int
782 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
783 {
784 	struct ifiqueue *ifiq;
785 	unsigned int flow = 0;
786 
787 	m->m_pkthdr.ph_family = af;
788 	m->m_pkthdr.ph_ifidx = ifp->if_index;
789 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
790 
791 	if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID))
792 		flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK;
793 
794 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
795 
796 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
797 }
798 
799 struct ifih {
800 	SRPL_ENTRY(ifih)	  ifih_next;
801 	int			(*ifih_input)(struct ifnet *, struct mbuf *,
802 				      void *);
803 	void			 *ifih_cookie;
804 	int			  ifih_refcnt;
805 	struct refcnt		  ifih_srpcnt;
806 };
807 
808 void	if_ih_ref(void *, void *);
809 void	if_ih_unref(void *, void *);
810 
811 struct srpl_rc ifih_rc = SRPL_RC_INITIALIZER(if_ih_ref, if_ih_unref, NULL);
812 
813 void
814 if_ih_insert(struct ifnet *ifp, int (*input)(struct ifnet *, struct mbuf *,
815     void *), void *cookie)
816 {
817 	struct ifih *ifih;
818 
819 	/* the kernel lock guarantees serialised modifications to if_inputs */
820 	KERNEL_ASSERT_LOCKED();
821 
822 	SRPL_FOREACH_LOCKED(ifih, &ifp->if_inputs, ifih_next) {
823 		if (ifih->ifih_input == input && ifih->ifih_cookie == cookie) {
824 			ifih->ifih_refcnt++;
825 			break;
826 		}
827 	}
828 
829 	if (ifih == NULL) {
830 		ifih = malloc(sizeof(*ifih), M_DEVBUF, M_WAITOK);
831 
832 		ifih->ifih_input = input;
833 		ifih->ifih_cookie = cookie;
834 		ifih->ifih_refcnt = 1;
835 		refcnt_init(&ifih->ifih_srpcnt);
836 		SRPL_INSERT_HEAD_LOCKED(&ifih_rc, &ifp->if_inputs,
837 		    ifih, ifih_next);
838 	}
839 }
840 
841 void
842 if_ih_ref(void *null, void *i)
843 {
844 	struct ifih *ifih = i;
845 
846 	refcnt_take(&ifih->ifih_srpcnt);
847 }
848 
849 void
850 if_ih_unref(void *null, void *i)
851 {
852 	struct ifih *ifih = i;
853 
854 	refcnt_rele_wake(&ifih->ifih_srpcnt);
855 }
856 
857 void
858 if_ih_remove(struct ifnet *ifp, int (*input)(struct ifnet *, struct mbuf *,
859     void *), void *cookie)
860 {
861 	struct ifih *ifih;
862 
863 	/* the kernel lock guarantees serialised modifications to if_inputs */
864 	KERNEL_ASSERT_LOCKED();
865 
866 	SRPL_FOREACH_LOCKED(ifih, &ifp->if_inputs, ifih_next) {
867 		if (ifih->ifih_input == input && ifih->ifih_cookie == cookie)
868 			break;
869 	}
870 
871 	KASSERT(ifih != NULL);
872 
873 	if (--ifih->ifih_refcnt == 0) {
874 		SRPL_REMOVE_LOCKED(&ifih_rc, &ifp->if_inputs, ifih,
875 		    ifih, ifih_next);
876 
877 		refcnt_finalize(&ifih->ifih_srpcnt, "ifihrm");
878 		free(ifih, M_DEVBUF, sizeof(*ifih));
879 	}
880 }
881 
882 void
883 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
884 {
885 	struct mbuf *m;
886 	struct ifih *ifih;
887 	struct srp_ref sr;
888 	int s;
889 
890 	if (ml_empty(ml))
891 		return;
892 
893 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
894 		enqueue_randomness(ml_len(ml));
895 
896 	/*
897 	 * We grab the NET_LOCK() before processing any packet to
898 	 * ensure there's no contention on the routing table lock.
899 	 *
900 	 * Without it we could race with a userland thread to insert
901 	 * a L2 entry in ip{6,}_output().  Such race would result in
902 	 * one of the threads sleeping *inside* the IP output path.
903 	 *
904 	 * Since we have a NET_LOCK() we also use it to serialize access
905 	 * to PF globals, pipex globals, unicast and multicast addresses
906 	 * lists.
907 	 */
908 	NET_RLOCK();
909 	s = splnet();
910 	while ((m = ml_dequeue(ml)) != NULL) {
911 		/*
912 		 * Pass this mbuf to all input handlers of its
913 		 * interface until it is consumed.
914 		 */
915 		SRPL_FOREACH(ifih, &sr, &ifp->if_inputs, ifih_next) {
916 			if ((*ifih->ifih_input)(ifp, m, ifih->ifih_cookie))
917 				break;
918 		}
919 		SRPL_LEAVE(&sr);
920 
921 		if (ifih == NULL)
922 			m_freem(m);
923 	}
924 	splx(s);
925 	NET_RUNLOCK();
926 }
927 
928 void
929 if_netisr(void *unused)
930 {
931 	int n, t = 0;
932 
933 	NET_LOCK();
934 
935 	while ((n = netisr) != 0) {
936 		/* Like sched_pause() but with a rwlock dance. */
937 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
938 			NET_UNLOCK();
939 			yield();
940 			NET_LOCK();
941 		}
942 
943 		atomic_clearbits_int(&netisr, n);
944 
945 #if NETHER > 0
946 		if (n & (1 << NETISR_ARP)) {
947 			KERNEL_LOCK();
948 			arpintr();
949 			KERNEL_UNLOCK();
950 		}
951 #endif
952 		if (n & (1 << NETISR_IP))
953 			ipintr();
954 #ifdef INET6
955 		if (n & (1 << NETISR_IPV6))
956 			ip6intr();
957 #endif
958 #if NPPP > 0
959 		if (n & (1 << NETISR_PPP)) {
960 			KERNEL_LOCK();
961 			pppintr();
962 			KERNEL_UNLOCK();
963 		}
964 #endif
965 #if NBRIDGE > 0
966 		if (n & (1 << NETISR_BRIDGE))
967 			bridgeintr();
968 #endif
969 #if NSWITCH > 0
970 		if (n & (1 << NETISR_SWITCH)) {
971 			KERNEL_LOCK();
972 			switchintr();
973 			KERNEL_UNLOCK();
974 		}
975 #endif
976 #if NPPPOE > 0
977 		if (n & (1 << NETISR_PPPOE)) {
978 			KERNEL_LOCK();
979 			pppoeintr();
980 			KERNEL_UNLOCK();
981 		}
982 #endif
983 #ifdef PIPEX
984 		if (n & (1 << NETISR_PIPEX)) {
985 			KERNEL_LOCK();
986 			pipexintr();
987 			KERNEL_UNLOCK();
988 		}
989 #endif
990 		t |= n;
991 	}
992 
993 #if NPFSYNC > 0
994 	if (t & (1 << NETISR_PFSYNC)) {
995 		KERNEL_LOCK();
996 		pfsyncintr();
997 		KERNEL_UNLOCK();
998 	}
999 #endif
1000 
1001 	NET_UNLOCK();
1002 }
1003 
1004 void
1005 if_deactivate(struct ifnet *ifp)
1006 {
1007 	NET_LOCK();
1008 	/*
1009 	 * Call detach hooks from head to tail.  To make sure detach
1010 	 * hooks are executed in the reverse order they were added, all
1011 	 * the hooks have to be added to the head!
1012 	 */
1013 	dohooks(ifp->if_detachhooks, HOOK_REMOVE | HOOK_FREE);
1014 
1015 	NET_UNLOCK();
1016 }
1017 
1018 /*
1019  * Detach an interface from everything in the kernel.  Also deallocate
1020  * private resources.
1021  */
1022 void
1023 if_detach(struct ifnet *ifp)
1024 {
1025 	struct ifaddr *ifa;
1026 	struct ifg_list *ifg;
1027 	struct domain *dp;
1028 	int i, s;
1029 
1030 	/* Undo pseudo-driver changes. */
1031 	if_deactivate(ifp);
1032 
1033 	ifq_clr_oactive(&ifp->if_snd);
1034 
1035 	/* Other CPUs must not have a reference before we start destroying. */
1036 	if_idxmap_remove(ifp);
1037 
1038 #if NBPFILTER > 0
1039 	bpfdetach(ifp);
1040 #endif
1041 
1042 	NET_LOCK();
1043 	s = splnet();
1044 	ifp->if_qstart = if_detached_qstart;
1045 	ifp->if_ioctl = if_detached_ioctl;
1046 	ifp->if_watchdog = NULL;
1047 
1048 	/* Remove the watchdog timeout & task */
1049 	timeout_del(&ifp->if_slowtimo);
1050 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1051 
1052 	/* Remove the link state task */
1053 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1054 
1055 	rti_delete(ifp);
1056 #if NETHER > 0 && defined(NFSCLIENT)
1057 	if (ifp->if_index == revarp_ifidx)
1058 		revarp_ifidx = 0;
1059 #endif
1060 #ifdef MROUTING
1061 	vif_delete(ifp);
1062 #endif
1063 	in_ifdetach(ifp);
1064 #ifdef INET6
1065 	in6_ifdetach(ifp);
1066 #endif
1067 #if NPF > 0
1068 	pfi_detach_ifnet(ifp);
1069 #endif
1070 
1071 	/* Remove the interface from the list of all interfaces.  */
1072 	TAILQ_REMOVE(&ifnet, ifp, if_list);
1073 
1074 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1075 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1076 
1077 	if_free_sadl(ifp);
1078 
1079 	/* We should not have any address left at this point. */
1080 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1081 #ifdef DIAGNOSTIC
1082 		printf("%s: address list non empty\n", ifp->if_xname);
1083 #endif
1084 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1085 			ifa_del(ifp, ifa);
1086 			ifa->ifa_ifp = NULL;
1087 			ifafree(ifa);
1088 		}
1089 	}
1090 
1091 	free(ifp->if_addrhooks, M_TEMP, 0);
1092 	free(ifp->if_linkstatehooks, M_TEMP, 0);
1093 	free(ifp->if_detachhooks, M_TEMP, 0);
1094 
1095 	for (i = 0; (dp = domains[i]) != NULL; i++) {
1096 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
1097 			(*dp->dom_ifdetach)(ifp,
1098 			    ifp->if_afdata[dp->dom_family]);
1099 	}
1100 
1101 	/* Announce that the interface is gone. */
1102 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1103 	splx(s);
1104 	NET_UNLOCK();
1105 
1106 	for (i = 0; i < ifp->if_nifqs; i++)
1107 		ifq_destroy(ifp->if_ifqs[i]);
1108 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1109 		for (i = 1; i < ifp->if_nifqs; i++) {
1110 			free(ifp->if_ifqs[i], M_DEVBUF,
1111 			    sizeof(struct ifqueue));
1112 		}
1113 		free(ifp->if_ifqs, M_DEVBUF,
1114 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1115 	}
1116 
1117 	for (i = 0; i < ifp->if_niqs; i++)
1118 		ifiq_destroy(ifp->if_iqs[i]);
1119 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1120 		for (i = 1; i < ifp->if_niqs; i++) {
1121 			free(ifp->if_iqs[i], M_DEVBUF,
1122 			    sizeof(struct ifiqueue));
1123 		}
1124 		free(ifp->if_iqs, M_DEVBUF,
1125 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1126 	}
1127 }
1128 
1129 /*
1130  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1131  */
1132 int
1133 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1134 {
1135 	struct ifnet *ifp;
1136 	int connected = 0;
1137 
1138 	ifp = if_get(ifidx);
1139 	if (ifp == NULL)
1140 		return (0);
1141 
1142 	if (ifp0->if_index == ifp->if_index)
1143 		connected = 1;
1144 
1145 #if NBRIDGE > 0
1146 	if (SAME_BRIDGE(ifp0->if_bridgeport, ifp->if_bridgeport))
1147 		connected = 1;
1148 #endif
1149 #if NCARP > 0
1150 	if ((ifp0->if_type == IFT_CARP && ifp0->if_carpdev == ifp) ||
1151 	    (ifp->if_type == IFT_CARP && ifp->if_carpdev == ifp0))
1152 		connected = 1;
1153 #endif
1154 
1155 	if_put(ifp);
1156 	return (connected);
1157 }
1158 
1159 /*
1160  * Create a clone network interface.
1161  */
1162 int
1163 if_clone_create(const char *name, int rdomain)
1164 {
1165 	struct if_clone *ifc;
1166 	struct ifnet *ifp;
1167 	int unit, ret;
1168 
1169 	NET_ASSERT_LOCKED();
1170 
1171 	ifc = if_clone_lookup(name, &unit);
1172 	if (ifc == NULL)
1173 		return (EINVAL);
1174 
1175 	if (ifunit(name) != NULL)
1176 		return (EEXIST);
1177 
1178 	/* XXXSMP breaks atomicity */
1179 	NET_UNLOCK();
1180 	ret = (*ifc->ifc_create)(ifc, unit);
1181 	NET_LOCK();
1182 
1183 	if (ret != 0 || (ifp = ifunit(name)) == NULL)
1184 		return (ret);
1185 
1186 	if_addgroup(ifp, ifc->ifc_name);
1187 	if (rdomain != 0)
1188 		if_setrdomain(ifp, rdomain);
1189 
1190 	return (ret);
1191 }
1192 
1193 /*
1194  * Destroy a clone network interface.
1195  */
1196 int
1197 if_clone_destroy(const char *name)
1198 {
1199 	struct if_clone *ifc;
1200 	struct ifnet *ifp;
1201 	int ret;
1202 
1203 	NET_ASSERT_LOCKED();
1204 
1205 	ifc = if_clone_lookup(name, NULL);
1206 	if (ifc == NULL)
1207 		return (EINVAL);
1208 
1209 	ifp = ifunit(name);
1210 	if (ifp == NULL)
1211 		return (ENXIO);
1212 
1213 	if (ifc->ifc_destroy == NULL)
1214 		return (EOPNOTSUPP);
1215 
1216 	if (ifp->if_flags & IFF_UP) {
1217 		int s;
1218 		s = splnet();
1219 		if_down(ifp);
1220 		splx(s);
1221 	}
1222 
1223 	/* XXXSMP breaks atomicity */
1224 	NET_UNLOCK();
1225 	ret = (*ifc->ifc_destroy)(ifp);
1226 	NET_LOCK();
1227 
1228 	return (ret);
1229 }
1230 
1231 /*
1232  * Look up a network interface cloner.
1233  */
1234 struct if_clone *
1235 if_clone_lookup(const char *name, int *unitp)
1236 {
1237 	struct if_clone *ifc;
1238 	const char *cp;
1239 	int unit;
1240 
1241 	/* separate interface name from unit */
1242 	for (cp = name;
1243 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1244 	    cp++)
1245 		continue;
1246 
1247 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1248 		return (NULL);	/* No name or unit number */
1249 
1250 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1251 		return (NULL);	/* unit number 0 padded */
1252 
1253 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1254 		if (strlen(ifc->ifc_name) == cp - name &&
1255 		    !strncmp(name, ifc->ifc_name, cp - name))
1256 			break;
1257 	}
1258 
1259 	if (ifc == NULL)
1260 		return (NULL);
1261 
1262 	unit = 0;
1263 	while (cp - name < IFNAMSIZ && *cp) {
1264 		if (*cp < '0' || *cp > '9' ||
1265 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1266 			/* Bogus unit number. */
1267 			return (NULL);
1268 		}
1269 		unit = (unit * 10) + (*cp++ - '0');
1270 	}
1271 
1272 	if (unitp != NULL)
1273 		*unitp = unit;
1274 	return (ifc);
1275 }
1276 
1277 /*
1278  * Register a network interface cloner.
1279  */
1280 void
1281 if_clone_attach(struct if_clone *ifc)
1282 {
1283 	/*
1284 	 * we are called at kernel boot by main(), when pseudo devices are
1285 	 * being attached. The main() is the only guy which may alter the
1286 	 * if_cloners. While system is running and main() is done with
1287 	 * initialization, the if_cloners becomes immutable.
1288 	 */
1289 	KASSERT(pdevinit_done == 0);
1290 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1291 	if_cloners_count++;
1292 }
1293 
1294 /*
1295  * Provide list of interface cloners to userspace.
1296  */
1297 int
1298 if_clone_list(struct if_clonereq *ifcr)
1299 {
1300 	char outbuf[IFNAMSIZ], *dst;
1301 	struct if_clone *ifc;
1302 	int count, error = 0;
1303 
1304 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1305 		/* Just asking how many there are. */
1306 		ifcr->ifcr_total = if_cloners_count;
1307 		return (0);
1308 	}
1309 
1310 	if (ifcr->ifcr_count < 0)
1311 		return (EINVAL);
1312 
1313 	ifcr->ifcr_total = if_cloners_count;
1314 	count = MIN(if_cloners_count, ifcr->ifcr_count);
1315 
1316 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1317 		if (count == 0)
1318 			break;
1319 		bzero(outbuf, sizeof outbuf);
1320 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1321 		error = copyout(outbuf, dst, IFNAMSIZ);
1322 		if (error)
1323 			break;
1324 		count--;
1325 		dst += IFNAMSIZ;
1326 	}
1327 
1328 	return (error);
1329 }
1330 
1331 /*
1332  * set queue congestion marker
1333  */
1334 void
1335 if_congestion(void)
1336 {
1337 	extern int ticks;
1338 
1339 	ifq_congestion = ticks;
1340 }
1341 
1342 int
1343 if_congested(void)
1344 {
1345 	extern int ticks;
1346 	int diff;
1347 
1348 	diff = ticks - ifq_congestion;
1349 	if (diff < 0) {
1350 		ifq_congestion = ticks - hz;
1351 		return (0);
1352 	}
1353 
1354 	return (diff <= (hz / 100));
1355 }
1356 
1357 #define	equal(a1, a2)	\
1358 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1359 	(a1)->sa_len) == 0)
1360 
1361 /*
1362  * Locate an interface based on a complete address.
1363  */
1364 struct ifaddr *
1365 ifa_ifwithaddr(struct sockaddr *addr, u_int rtableid)
1366 {
1367 	struct ifnet *ifp;
1368 	struct ifaddr *ifa;
1369 	u_int rdomain;
1370 
1371 	rdomain = rtable_l2(rtableid);
1372 	KERNEL_LOCK();
1373 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1374 		if (ifp->if_rdomain != rdomain)
1375 			continue;
1376 
1377 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1378 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1379 				continue;
1380 
1381 			if (equal(addr, ifa->ifa_addr)) {
1382 				KERNEL_UNLOCK();
1383 				return (ifa);
1384 			}
1385 		}
1386 	}
1387 	KERNEL_UNLOCK();
1388 	return (NULL);
1389 }
1390 
1391 /*
1392  * Locate the point to point interface with a given destination address.
1393  */
1394 struct ifaddr *
1395 ifa_ifwithdstaddr(struct sockaddr *addr, u_int rdomain)
1396 {
1397 	struct ifnet *ifp;
1398 	struct ifaddr *ifa;
1399 
1400 	rdomain = rtable_l2(rdomain);
1401 	KERNEL_LOCK();
1402 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1403 		if (ifp->if_rdomain != rdomain)
1404 			continue;
1405 		if (ifp->if_flags & IFF_POINTOPOINT) {
1406 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1407 				if (ifa->ifa_addr->sa_family !=
1408 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1409 					continue;
1410 				if (equal(addr, ifa->ifa_dstaddr)) {
1411 					KERNEL_UNLOCK();
1412 					return (ifa);
1413 				}
1414 			}
1415 		}
1416 	}
1417 	KERNEL_UNLOCK();
1418 	return (NULL);
1419 }
1420 
1421 /*
1422  * Find an interface address specific to an interface best matching
1423  * a given address.
1424  */
1425 struct ifaddr *
1426 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1427 {
1428 	struct ifaddr *ifa;
1429 	char *cp, *cp2, *cp3;
1430 	char *cplim;
1431 	struct ifaddr *ifa_maybe = NULL;
1432 	u_int af = addr->sa_family;
1433 
1434 	if (af >= AF_MAX)
1435 		return (NULL);
1436 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1437 		if (ifa->ifa_addr->sa_family != af)
1438 			continue;
1439 		if (ifa_maybe == NULL)
1440 			ifa_maybe = ifa;
1441 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1442 			if (equal(addr, ifa->ifa_addr) ||
1443 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1444 				return (ifa);
1445 			continue;
1446 		}
1447 		cp = addr->sa_data;
1448 		cp2 = ifa->ifa_addr->sa_data;
1449 		cp3 = ifa->ifa_netmask->sa_data;
1450 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1451 		for (; cp3 < cplim; cp3++)
1452 			if ((*cp++ ^ *cp2++) & *cp3)
1453 				break;
1454 		if (cp3 == cplim)
1455 			return (ifa);
1456 	}
1457 	return (ifa_maybe);
1458 }
1459 
1460 void
1461 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1462 {
1463 }
1464 
1465 /*
1466  * Default action when installing a local route on a point-to-point
1467  * interface.
1468  */
1469 void
1470 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1471 {
1472 	struct ifnet *lo0ifp;
1473 	struct ifaddr *ifa, *lo0ifa;
1474 
1475 	switch (req) {
1476 	case RTM_ADD:
1477 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1478 			break;
1479 
1480 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1481 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1482 			    rt_key(rt)->sa_len) == 0)
1483 				break;
1484 		}
1485 
1486 		if (ifa == NULL)
1487 			break;
1488 
1489 		KASSERT(ifa == rt->rt_ifa);
1490 
1491 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1492 		KASSERT(lo0ifp != NULL);
1493 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1494 			if (lo0ifa->ifa_addr->sa_family ==
1495 			    ifa->ifa_addr->sa_family)
1496 				break;
1497 		}
1498 		if_put(lo0ifp);
1499 
1500 		if (lo0ifa == NULL)
1501 			break;
1502 
1503 		rt->rt_flags &= ~RTF_LLINFO;
1504 		break;
1505 	case RTM_DELETE:
1506 	case RTM_RESOLVE:
1507 	default:
1508 		break;
1509 	}
1510 }
1511 
1512 
1513 /*
1514  * Bring down all interfaces
1515  */
1516 void
1517 if_downall(void)
1518 {
1519 	struct ifreq ifrq;	/* XXX only partly built */
1520 	struct ifnet *ifp;
1521 
1522 	NET_LOCK();
1523 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1524 		if ((ifp->if_flags & IFF_UP) == 0)
1525 			continue;
1526 		if_down(ifp);
1527 		ifrq.ifr_flags = ifp->if_flags;
1528 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1529 	}
1530 	NET_UNLOCK();
1531 }
1532 
1533 /*
1534  * Mark an interface down and notify protocols of
1535  * the transition.
1536  */
1537 void
1538 if_down(struct ifnet *ifp)
1539 {
1540 	NET_ASSERT_LOCKED();
1541 
1542 	ifp->if_flags &= ~IFF_UP;
1543 	getmicrotime(&ifp->if_lastchange);
1544 	IFQ_PURGE(&ifp->if_snd);
1545 
1546 	if_linkstate(ifp);
1547 }
1548 
1549 /*
1550  * Mark an interface up and notify protocols of
1551  * the transition.
1552  */
1553 void
1554 if_up(struct ifnet *ifp)
1555 {
1556 	NET_ASSERT_LOCKED();
1557 
1558 	ifp->if_flags |= IFF_UP;
1559 	getmicrotime(&ifp->if_lastchange);
1560 
1561 #ifdef INET6
1562 	/* Userland expects the kernel to set ::1 on default lo(4). */
1563 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1564 		in6_ifattach(ifp);
1565 #endif
1566 
1567 	if_linkstate(ifp);
1568 }
1569 
1570 /*
1571  * Notify userland, the routing table and hooks owner of
1572  * a link-state transition.
1573  */
1574 void
1575 if_linkstate_task(void *xifidx)
1576 {
1577 	unsigned int ifidx = (unsigned long)xifidx;
1578 	struct ifnet *ifp;
1579 
1580 	KERNEL_LOCK();
1581 	NET_LOCK();
1582 
1583 	ifp = if_get(ifidx);
1584 	if (ifp != NULL)
1585 		if_linkstate(ifp);
1586 	if_put(ifp);
1587 
1588 	NET_UNLOCK();
1589 	KERNEL_UNLOCK();
1590 }
1591 
1592 void
1593 if_linkstate(struct ifnet *ifp)
1594 {
1595 	NET_ASSERT_LOCKED();
1596 
1597 	rtm_ifchg(ifp);
1598 	rt_if_track(ifp);
1599 	dohooks(ifp->if_linkstatehooks, 0);
1600 }
1601 
1602 /*
1603  * Schedule a link state change task.
1604  */
1605 void
1606 if_link_state_change(struct ifnet *ifp)
1607 {
1608 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1609 }
1610 
1611 /*
1612  * Handle interface watchdog timer routine.  Called
1613  * from softclock, we decrement timer (if set) and
1614  * call the appropriate interface routine on expiration.
1615  */
1616 void
1617 if_slowtimo(void *arg)
1618 {
1619 	struct ifnet *ifp = arg;
1620 	int s = splnet();
1621 
1622 	if (ifp->if_watchdog) {
1623 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1624 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1625 		timeout_add(&ifp->if_slowtimo, hz / IFNET_SLOWHZ);
1626 	}
1627 	splx(s);
1628 }
1629 
1630 void
1631 if_watchdog_task(void *xifidx)
1632 {
1633 	unsigned int ifidx = (unsigned long)xifidx;
1634 	struct ifnet *ifp;
1635 	int s;
1636 
1637 	ifp = if_get(ifidx);
1638 	if (ifp == NULL)
1639 		return;
1640 
1641 	KERNEL_LOCK();
1642 	s = splnet();
1643 	if (ifp->if_watchdog)
1644 		(*ifp->if_watchdog)(ifp);
1645 	splx(s);
1646 	KERNEL_UNLOCK();
1647 
1648 	if_put(ifp);
1649 }
1650 
1651 /*
1652  * Map interface name to interface structure pointer.
1653  */
1654 struct ifnet *
1655 ifunit(const char *name)
1656 {
1657 	struct ifnet *ifp;
1658 
1659 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1660 		if (strcmp(ifp->if_xname, name) == 0)
1661 			return (ifp);
1662 	}
1663 	return (NULL);
1664 }
1665 
1666 /*
1667  * Map interface index to interface structure pointer.
1668  */
1669 struct ifnet *
1670 if_get(unsigned int index)
1671 {
1672 	struct srp_ref sr;
1673 	struct if_map *if_map;
1674 	struct srp *map;
1675 	struct ifnet *ifp = NULL;
1676 
1677 	if_map = srp_enter(&sr, &if_idxmap.map);
1678 	if (index < if_map->limit) {
1679 		map = (struct srp *)(if_map + 1);
1680 
1681 		ifp = srp_follow(&sr, &map[index]);
1682 		if (ifp != NULL) {
1683 			KASSERT(ifp->if_index == index);
1684 			if_ref(ifp);
1685 		}
1686 	}
1687 	srp_leave(&sr);
1688 
1689 	return (ifp);
1690 }
1691 
1692 struct ifnet *
1693 if_ref(struct ifnet *ifp)
1694 {
1695 	refcnt_take(&ifp->if_refcnt);
1696 
1697 	return (ifp);
1698 }
1699 
1700 void
1701 if_put(struct ifnet *ifp)
1702 {
1703 	if (ifp == NULL)
1704 		return;
1705 
1706 	refcnt_rele_wake(&ifp->if_refcnt);
1707 }
1708 
1709 int
1710 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1711 {
1712 	if (ifp->if_sadl == NULL)
1713 		return (EINVAL);
1714 
1715 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1716 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1717 
1718 	return (0);
1719 }
1720 
1721 int
1722 if_createrdomain(int rdomain, struct ifnet *ifp)
1723 {
1724 	int error;
1725 	struct ifnet *loifp;
1726 	char loifname[IFNAMSIZ];
1727 	unsigned int unit = rdomain;
1728 
1729 	if (!rtable_exists(rdomain) && (error = rtable_add(rdomain)) != 0)
1730 		return (error);
1731 	if (!rtable_empty(rdomain))
1732 		return (EEXIST);
1733 
1734 	/* Create rdomain including its loopback if with unit == rdomain */
1735 	snprintf(loifname, sizeof(loifname), "lo%u", unit);
1736 	error = if_clone_create(loifname, 0);
1737 	if ((loifp = ifunit(loifname)) == NULL)
1738 		return (ENXIO);
1739 	if (error && (ifp != loifp || error != EEXIST))
1740 		return (error);
1741 
1742 	rtable_l2set(rdomain, rdomain, loifp->if_index);
1743 	loifp->if_rdomain = rdomain;
1744 
1745 	return (0);
1746 }
1747 
1748 int
1749 if_setrdomain(struct ifnet *ifp, int rdomain)
1750 {
1751 	struct ifreq ifr;
1752 	int error, up = 0, s;
1753 
1754 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1755 		return (EINVAL);
1756 
1757 	if (rdomain != ifp->if_rdomain &&
1758 	    (ifp->if_flags & IFF_LOOPBACK) &&
1759 	    (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1760 		return (EPERM);
1761 
1762 	if (!rtable_exists(rdomain))
1763 		return (ESRCH);
1764 
1765 	/* make sure that the routing table is a real rdomain */
1766 	if (rdomain != rtable_l2(rdomain))
1767 		return (EINVAL);
1768 
1769 	if (rdomain != ifp->if_rdomain) {
1770 		s = splnet();
1771 		/*
1772 		 * We are tearing down the world.
1773 		 * Take down the IF so:
1774 		 * 1. everything that cares gets a message
1775 		 * 2. the automagic IPv6 bits are recreated
1776 		 */
1777 		if (ifp->if_flags & IFF_UP) {
1778 			up = 1;
1779 			if_down(ifp);
1780 		}
1781 		rti_delete(ifp);
1782 #ifdef MROUTING
1783 		vif_delete(ifp);
1784 #endif
1785 		in_ifdetach(ifp);
1786 #ifdef INET6
1787 		in6_ifdetach(ifp);
1788 #endif
1789 		splx(s);
1790 	}
1791 
1792 	/* Let devices like enc(4) or mpe(4) know about the change */
1793 	ifr.ifr_rdomainid = rdomain;
1794 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
1795 	    (caddr_t)&ifr)) != ENOTTY)
1796 		return (error);
1797 	error = 0;
1798 
1799 	/* Add interface to the specified rdomain */
1800 	ifp->if_rdomain = rdomain;
1801 
1802 	/* If we took down the IF, bring it back */
1803 	if (up) {
1804 		s = splnet();
1805 		if_up(ifp);
1806 		splx(s);
1807 	}
1808 
1809 	return (0);
1810 }
1811 
1812 /*
1813  * Interface ioctls.
1814  */
1815 int
1816 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1817 {
1818 	struct ifnet *ifp;
1819 	struct ifreq *ifr = (struct ifreq *)data;
1820 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
1821 	struct if_afreq *ifar = (struct if_afreq *)data;
1822 	char ifdescrbuf[IFDESCRSIZE];
1823 	char ifrtlabelbuf[RTLABEL_LEN];
1824 	int s, error = 0, oif_xflags;
1825 	size_t bytesdone;
1826 	unsigned short oif_flags;
1827 
1828 	switch (cmd) {
1829 	case SIOCIFCREATE:
1830 		if ((error = suser(p)) != 0)
1831 			return (error);
1832 		NET_LOCK();
1833 		error = if_clone_create(ifr->ifr_name, 0);
1834 		NET_UNLOCK();
1835 		return (error);
1836 	case SIOCIFDESTROY:
1837 		if ((error = suser(p)) != 0)
1838 			return (error);
1839 		NET_LOCK();
1840 		error = if_clone_destroy(ifr->ifr_name);
1841 		NET_UNLOCK();
1842 		return (error);
1843 	case SIOCSIFGATTR:
1844 		if ((error = suser(p)) != 0)
1845 			return (error);
1846 		NET_LOCK();
1847 		error = if_setgroupattribs(data);
1848 		NET_UNLOCK();
1849 		return (error);
1850 	case SIOCGIFCONF:
1851 	case SIOCIFGCLONERS:
1852 	case SIOCGIFGMEMB:
1853 	case SIOCGIFGATTR:
1854 	case SIOCGIFGLIST:
1855 	case SIOCGIFFLAGS:
1856 	case SIOCGIFXFLAGS:
1857 	case SIOCGIFMETRIC:
1858 	case SIOCGIFMTU:
1859 	case SIOCGIFHARDMTU:
1860 	case SIOCGIFDATA:
1861 	case SIOCGIFDESCR:
1862 	case SIOCGIFRTLABEL:
1863 	case SIOCGIFPRIORITY:
1864 	case SIOCGIFRDOMAIN:
1865 	case SIOCGIFGROUP:
1866 	case SIOCGIFLLPRIO:
1867 		return (ifioctl_get(cmd, data));
1868 	}
1869 
1870 	ifp = ifunit(ifr->ifr_name);
1871 	if (ifp == NULL)
1872 		return (ENXIO);
1873 	oif_flags = ifp->if_flags;
1874 	oif_xflags = ifp->if_xflags;
1875 
1876 	switch (cmd) {
1877 	case SIOCIFAFATTACH:
1878 	case SIOCIFAFDETACH:
1879 		if ((error = suser(p)) != 0)
1880 			break;
1881 		NET_LOCK();
1882 		switch (ifar->ifar_af) {
1883 		case AF_INET:
1884 			/* attach is a noop for AF_INET */
1885 			if (cmd == SIOCIFAFDETACH)
1886 				in_ifdetach(ifp);
1887 			break;
1888 #ifdef INET6
1889 		case AF_INET6:
1890 			if (cmd == SIOCIFAFATTACH)
1891 				error = in6_ifattach(ifp);
1892 			else
1893 				in6_ifdetach(ifp);
1894 			break;
1895 #endif /* INET6 */
1896 		default:
1897 			error = EAFNOSUPPORT;
1898 		}
1899 		NET_UNLOCK();
1900 		break;
1901 
1902 	case SIOCSIFFLAGS:
1903 		if ((error = suser(p)) != 0)
1904 			break;
1905 
1906 		NET_LOCK();
1907 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1908 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
1909 
1910 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1911 		if (error != 0) {
1912 			ifp->if_flags = oif_flags;
1913 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
1914 			s = splnet();
1915 			if (ISSET(ifp->if_flags, IFF_UP))
1916 				if_up(ifp);
1917 			else
1918 				if_down(ifp);
1919 			splx(s);
1920 		}
1921 		NET_UNLOCK();
1922 		break;
1923 
1924 	case SIOCSIFXFLAGS:
1925 		if ((error = suser(p)) != 0)
1926 			break;
1927 
1928 		NET_LOCK();
1929 #ifdef INET6
1930 		if (ISSET(ifr->ifr_flags, IFXF_AUTOCONF6)) {
1931 			error = in6_ifattach(ifp);
1932 			if (error != 0) {
1933 				NET_UNLOCK();
1934 				break;
1935 			}
1936 		}
1937 
1938 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1939 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII)) {
1940 			ifp->if_xflags |= IFXF_INET6_NOSOII;
1941 			in6_soiiupdate(ifp);
1942 		}
1943 
1944 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1945 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII)) {
1946 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
1947 			in6_soiiupdate(ifp);
1948 		}
1949 
1950 #endif	/* INET6 */
1951 
1952 #ifdef MPLS
1953 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
1954 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
1955 			s = splnet();
1956 			ifp->if_xflags |= IFXF_MPLS;
1957 			ifp->if_ll_output = ifp->if_output;
1958 			ifp->if_output = mpls_output;
1959 			splx(s);
1960 		}
1961 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
1962 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
1963 			s = splnet();
1964 			ifp->if_xflags &= ~IFXF_MPLS;
1965 			ifp->if_output = ifp->if_ll_output;
1966 			ifp->if_ll_output = NULL;
1967 			splx(s);
1968 		}
1969 #endif	/* MPLS */
1970 
1971 #ifndef SMALL_KERNEL
1972 		if (ifp->if_capabilities & IFCAP_WOL) {
1973 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
1974 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
1975 				s = splnet();
1976 				ifp->if_xflags |= IFXF_WOL;
1977 				error = ifp->if_wol(ifp, 1);
1978 				splx(s);
1979 			}
1980 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
1981 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
1982 				s = splnet();
1983 				ifp->if_xflags &= ~IFXF_WOL;
1984 				error = ifp->if_wol(ifp, 0);
1985 				splx(s);
1986 			}
1987 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
1988 			ifr->ifr_flags &= ~IFXF_WOL;
1989 			error = ENOTSUP;
1990 		}
1991 #endif
1992 
1993 		if (error == 0)
1994 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
1995 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
1996 		NET_UNLOCK();
1997 		break;
1998 
1999 	case SIOCSIFMETRIC:
2000 		if ((error = suser(p)) != 0)
2001 			break;
2002 		NET_LOCK();
2003 		ifp->if_metric = ifr->ifr_metric;
2004 		NET_UNLOCK();
2005 		break;
2006 
2007 	case SIOCSIFMTU:
2008 		if ((error = suser(p)) != 0)
2009 			break;
2010 		NET_LOCK();
2011 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2012 		NET_UNLOCK();
2013 		if (!error)
2014 			rtm_ifchg(ifp);
2015 		break;
2016 
2017 	case SIOCSIFDESCR:
2018 		if ((error = suser(p)) != 0)
2019 			break;
2020 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2021 		    IFDESCRSIZE, &bytesdone);
2022 		if (error == 0) {
2023 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2024 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2025 		}
2026 		break;
2027 
2028 	case SIOCSIFRTLABEL:
2029 		if ((error = suser(p)) != 0)
2030 			break;
2031 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2032 		    RTLABEL_LEN, &bytesdone);
2033 		if (error == 0) {
2034 			rtlabel_unref(ifp->if_rtlabelid);
2035 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2036 		}
2037 		break;
2038 
2039 	case SIOCSIFPRIORITY:
2040 		if ((error = suser(p)) != 0)
2041 			break;
2042 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2043 			error = EINVAL;
2044 			break;
2045 		}
2046 		ifp->if_priority = ifr->ifr_metric;
2047 		break;
2048 
2049 	case SIOCSIFRDOMAIN:
2050 		if ((error = suser(p)) != 0)
2051 			break;
2052 		NET_LOCK();
2053 		error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2054 		if (!error || error == EEXIST)
2055 			error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2056 		NET_UNLOCK();
2057 		break;
2058 
2059 	case SIOCAIFGROUP:
2060 		if ((error = suser(p)))
2061 			break;
2062 		NET_LOCK();
2063 		error = if_addgroup(ifp, ifgr->ifgr_group);
2064 		if (error == 0) {
2065 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2066 			if (error == ENOTTY)
2067 				error = 0;
2068 		}
2069 		NET_UNLOCK();
2070 		break;
2071 
2072 	case SIOCDIFGROUP:
2073 		if ((error = suser(p)))
2074 			break;
2075 		NET_LOCK();
2076 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2077 		if (error == ENOTTY)
2078 			error = 0;
2079 		if (error == 0)
2080 			error = if_delgroup(ifp, ifgr->ifgr_group);
2081 		NET_UNLOCK();
2082 		break;
2083 
2084 	case SIOCSIFLLADDR:
2085 		if ((error = suser(p)))
2086 			break;
2087 		if ((ifp->if_sadl == NULL) ||
2088 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2089 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2090 			error = EINVAL;
2091 			break;
2092 		}
2093 		NET_LOCK();
2094 		switch (ifp->if_type) {
2095 		case IFT_ETHER:
2096 		case IFT_CARP:
2097 		case IFT_XETHER:
2098 		case IFT_ISO88025:
2099 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2100 			if (error == ENOTTY)
2101 				error = 0;
2102 			if (error == 0)
2103 				error = if_setlladdr(ifp,
2104 				    ifr->ifr_addr.sa_data);
2105 			break;
2106 		default:
2107 			error = ENODEV;
2108 		}
2109 
2110 		if (error == 0)
2111 			ifnewlladdr(ifp);
2112 		NET_UNLOCK();
2113 		break;
2114 
2115 	case SIOCSIFLLPRIO:
2116 		if ((error = suser(p)))
2117 			break;
2118 		if (ifr->ifr_llprio < IFQ_MINPRIO ||
2119 		    ifr->ifr_llprio > IFQ_MAXPRIO) {
2120 			error = EINVAL;
2121 			break;
2122 		}
2123 		NET_LOCK();
2124 		ifp->if_llprio = ifr->ifr_llprio;
2125 		NET_UNLOCK();
2126 		break;
2127 
2128 	case SIOCSETKALIVE:
2129 	case SIOCDIFPHYADDR:
2130 	case SIOCSLIFPHYADDR:
2131 	case SIOCSLIFPHYRTABLE:
2132 	case SIOCSLIFPHYTTL:
2133 	case SIOCSLIFPHYDF:
2134 	case SIOCADDMULTI:
2135 	case SIOCDELMULTI:
2136 	case SIOCSIFMEDIA:
2137 	case SIOCSVNETID:
2138 	case SIOCSVNETFLOWID:
2139 	case SIOCSIFPAIR:
2140 	case SIOCSIFPARENT:
2141 	case SIOCDIFPARENT:
2142 		if ((error = suser(p)) != 0)
2143 			break;
2144 		/* FALLTHROUGH */
2145 	default:
2146 		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
2147 			(struct mbuf *) cmd, (struct mbuf *) data,
2148 			(struct mbuf *) ifp, p));
2149 		if (error == EOPNOTSUPP) {
2150 			NET_LOCK();
2151 			error = ((*ifp->if_ioctl)(ifp, cmd, data));
2152 			NET_UNLOCK();
2153 		}
2154 		break;
2155 	}
2156 
2157 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags)
2158 		rtm_ifchg(ifp);
2159 
2160 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2161 		getmicrotime(&ifp->if_lastchange);
2162 
2163 	return (error);
2164 }
2165 
2166 int
2167 ifioctl_get(u_long cmd, caddr_t data)
2168 {
2169 	struct ifnet *ifp;
2170 	struct ifreq *ifr = (struct ifreq *)data;
2171 	char ifdescrbuf[IFDESCRSIZE];
2172 	char ifrtlabelbuf[RTLABEL_LEN];
2173 	int error = 0;
2174 	size_t bytesdone;
2175 	const char *label;
2176 
2177 	switch(cmd) {
2178 	case SIOCGIFCONF:
2179 		NET_RLOCK();
2180 		error = ifconf(data);
2181 		NET_RUNLOCK();
2182 		return (error);
2183 	case SIOCIFGCLONERS:
2184 		error = if_clone_list((struct if_clonereq *)data);
2185 		return (error);
2186 	case SIOCGIFGMEMB:
2187 		NET_RLOCK();
2188 		error = if_getgroupmembers(data);
2189 		NET_RUNLOCK();
2190 		return (error);
2191 	case SIOCGIFGATTR:
2192 		NET_RLOCK();
2193 		error = if_getgroupattribs(data);
2194 		NET_RUNLOCK();
2195 		return (error);
2196 	case SIOCGIFGLIST:
2197 		NET_RLOCK();
2198 		error = if_getgrouplist(data);
2199 		NET_RUNLOCK();
2200 		return (error);
2201 	}
2202 
2203 	ifp = ifunit(ifr->ifr_name);
2204 	if (ifp == NULL)
2205 		return (ENXIO);
2206 
2207 	NET_RLOCK();
2208 
2209 	switch(cmd) {
2210 	case SIOCGIFFLAGS:
2211 		ifr->ifr_flags = ifp->if_flags;
2212 		if (ifq_is_oactive(&ifp->if_snd))
2213 			ifr->ifr_flags |= IFF_OACTIVE;
2214 		break;
2215 
2216 	case SIOCGIFXFLAGS:
2217 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2218 		break;
2219 
2220 	case SIOCGIFMETRIC:
2221 		ifr->ifr_metric = ifp->if_metric;
2222 		break;
2223 
2224 	case SIOCGIFMTU:
2225 		ifr->ifr_mtu = ifp->if_mtu;
2226 		break;
2227 
2228 	case SIOCGIFHARDMTU:
2229 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2230 		break;
2231 
2232 	case SIOCGIFDATA: {
2233 		struct if_data ifdata;
2234 		if_getdata(ifp, &ifdata);
2235 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2236 		break;
2237 	}
2238 
2239 	case SIOCGIFDESCR:
2240 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2241 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2242 		    &bytesdone);
2243 		break;
2244 
2245 	case SIOCGIFRTLABEL:
2246 		if (ifp->if_rtlabelid &&
2247 		    (label = rtlabel_id2name(ifp->if_rtlabelid)) != NULL) {
2248 			strlcpy(ifrtlabelbuf, label, RTLABEL_LEN);
2249 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2250 			    RTLABEL_LEN, &bytesdone);
2251 		} else
2252 			error = ENOENT;
2253 		break;
2254 
2255 	case SIOCGIFPRIORITY:
2256 		ifr->ifr_metric = ifp->if_priority;
2257 		break;
2258 
2259 	case SIOCGIFRDOMAIN:
2260 		ifr->ifr_rdomainid = ifp->if_rdomain;
2261 		break;
2262 
2263 	case SIOCGIFGROUP:
2264 		error = if_getgroup(data, ifp);
2265 		break;
2266 
2267 	case SIOCGIFLLPRIO:
2268 		ifr->ifr_llprio = ifp->if_llprio;
2269 		break;
2270 
2271 	default:
2272 		panic("invalid ioctl %lu", cmd);
2273 	}
2274 
2275 	NET_RUNLOCK();
2276 
2277 	return (error);
2278 }
2279 
2280 /*
2281  * Return interface configuration
2282  * of system.  List may be used
2283  * in later ioctl's (above) to get
2284  * other information.
2285  */
2286 int
2287 ifconf(caddr_t data)
2288 {
2289 	struct ifconf *ifc = (struct ifconf *)data;
2290 	struct ifnet *ifp;
2291 	struct ifaddr *ifa;
2292 	struct ifreq ifr, *ifrp;
2293 	int space = ifc->ifc_len, error = 0;
2294 
2295 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2296 	if (space == 0) {
2297 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
2298 			struct sockaddr *sa;
2299 
2300 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2301 				space += sizeof (ifr);
2302 			else
2303 				TAILQ_FOREACH(ifa,
2304 				    &ifp->if_addrlist, ifa_list) {
2305 					sa = ifa->ifa_addr;
2306 					if (sa->sa_len > sizeof(*sa))
2307 						space += sa->sa_len -
2308 						    sizeof(*sa);
2309 					space += sizeof(ifr);
2310 				}
2311 		}
2312 		ifc->ifc_len = space;
2313 		return (0);
2314 	}
2315 
2316 	ifrp = ifc->ifc_req;
2317 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2318 		if (space < sizeof(ifr))
2319 			break;
2320 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2321 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2322 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2323 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2324 			    sizeof(ifr));
2325 			if (error)
2326 				break;
2327 			space -= sizeof (ifr), ifrp++;
2328 		} else
2329 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2330 				struct sockaddr *sa = ifa->ifa_addr;
2331 
2332 				if (space < sizeof(ifr))
2333 					break;
2334 				if (sa->sa_len <= sizeof(*sa)) {
2335 					ifr.ifr_addr = *sa;
2336 					error = copyout((caddr_t)&ifr,
2337 					    (caddr_t)ifrp, sizeof (ifr));
2338 					ifrp++;
2339 				} else {
2340 					space -= sa->sa_len - sizeof(*sa);
2341 					if (space < sizeof (ifr))
2342 						break;
2343 					error = copyout((caddr_t)&ifr,
2344 					    (caddr_t)ifrp,
2345 					    sizeof(ifr.ifr_name));
2346 					if (error == 0)
2347 						error = copyout((caddr_t)sa,
2348 						    (caddr_t)&ifrp->ifr_addr,
2349 						    sa->sa_len);
2350 					ifrp = (struct ifreq *)(sa->sa_len +
2351 					    (caddr_t)&ifrp->ifr_addr);
2352 				}
2353 				if (error)
2354 					break;
2355 				space -= sizeof (ifr);
2356 			}
2357 	}
2358 	ifc->ifc_len -= space;
2359 	return (error);
2360 }
2361 
2362 void
2363 if_getdata(struct ifnet *ifp, struct if_data *data)
2364 {
2365 	unsigned int i;
2366 
2367 	*data = ifp->if_data;
2368 
2369 	for (i = 0; i < ifp->if_nifqs; i++) {
2370 		struct ifqueue *ifq = ifp->if_ifqs[i];
2371 
2372 		ifq_add_data(ifq, data);
2373 	}
2374 
2375 	for (i = 0; i < ifp->if_niqs; i++) {
2376 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2377 
2378 		ifiq_add_data(ifiq, data);
2379 	}
2380 }
2381 
2382 /*
2383  * Dummy functions replaced in ifnet during detach (if protocols decide to
2384  * fiddle with the if during detach.
2385  */
2386 void
2387 if_detached_qstart(struct ifqueue *ifq)
2388 {
2389 	ifq_purge(ifq);
2390 }
2391 
2392 int
2393 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2394 {
2395 	return ENODEV;
2396 }
2397 
2398 /*
2399  * Create interface group without members
2400  */
2401 struct ifg_group *
2402 if_creategroup(const char *groupname)
2403 {
2404 	struct ifg_group	*ifg;
2405 
2406 	if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
2407 		return (NULL);
2408 
2409 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2410 	ifg->ifg_refcnt = 0;
2411 	ifg->ifg_carp_demoted = 0;
2412 	TAILQ_INIT(&ifg->ifg_members);
2413 #if NPF > 0
2414 	pfi_attach_ifgroup(ifg);
2415 #endif
2416 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2417 
2418 	return (ifg);
2419 }
2420 
2421 /*
2422  * Add a group to an interface
2423  */
2424 int
2425 if_addgroup(struct ifnet *ifp, const char *groupname)
2426 {
2427 	struct ifg_list		*ifgl;
2428 	struct ifg_group	*ifg = NULL;
2429 	struct ifg_member	*ifgm;
2430 
2431 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
2432 	    groupname[strlen(groupname) - 1] <= '9')
2433 		return (EINVAL);
2434 
2435 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2436 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2437 			return (EEXIST);
2438 
2439 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
2440 		return (ENOMEM);
2441 
2442 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
2443 		free(ifgl, M_TEMP, sizeof(*ifgl));
2444 		return (ENOMEM);
2445 	}
2446 
2447 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2448 		if (!strcmp(ifg->ifg_group, groupname))
2449 			break;
2450 
2451 	if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
2452 		free(ifgl, M_TEMP, sizeof(*ifgl));
2453 		free(ifgm, M_TEMP, sizeof(*ifgm));
2454 		return (ENOMEM);
2455 	}
2456 
2457 	ifg->ifg_refcnt++;
2458 	ifgl->ifgl_group = ifg;
2459 	ifgm->ifgm_ifp = ifp;
2460 
2461 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2462 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2463 
2464 #if NPF > 0
2465 	pfi_group_addmember(groupname, ifp);
2466 #endif
2467 
2468 	return (0);
2469 }
2470 
2471 /*
2472  * Remove a group from an interface
2473  */
2474 int
2475 if_delgroup(struct ifnet *ifp, const char *groupname)
2476 {
2477 	struct ifg_list		*ifgl;
2478 	struct ifg_member	*ifgm;
2479 
2480 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2481 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2482 			break;
2483 	if (ifgl == NULL)
2484 		return (ENOENT);
2485 
2486 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2487 
2488 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2489 		if (ifgm->ifgm_ifp == ifp)
2490 			break;
2491 
2492 	if (ifgm != NULL) {
2493 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2494 		free(ifgm, M_TEMP, sizeof(*ifgm));
2495 	}
2496 
2497 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2498 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2499 #if NPF > 0
2500 		pfi_detach_ifgroup(ifgl->ifgl_group);
2501 #endif
2502 		free(ifgl->ifgl_group, M_TEMP, 0);
2503 	}
2504 
2505 	free(ifgl, M_TEMP, sizeof(*ifgl));
2506 
2507 #if NPF > 0
2508 	pfi_group_change(groupname);
2509 #endif
2510 
2511 	return (0);
2512 }
2513 
2514 /*
2515  * Stores all groups from an interface in memory pointed
2516  * to by data
2517  */
2518 int
2519 if_getgroup(caddr_t data, struct ifnet *ifp)
2520 {
2521 	int			 len, error;
2522 	struct ifg_list		*ifgl;
2523 	struct ifg_req		 ifgrq, *ifgp;
2524 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2525 
2526 	if (ifgr->ifgr_len == 0) {
2527 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2528 			ifgr->ifgr_len += sizeof(struct ifg_req);
2529 		return (0);
2530 	}
2531 
2532 	len = ifgr->ifgr_len;
2533 	ifgp = ifgr->ifgr_groups;
2534 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2535 		if (len < sizeof(ifgrq))
2536 			return (EINVAL);
2537 		bzero(&ifgrq, sizeof ifgrq);
2538 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2539 		    sizeof(ifgrq.ifgrq_group));
2540 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2541 		    sizeof(struct ifg_req))))
2542 			return (error);
2543 		len -= sizeof(ifgrq);
2544 		ifgp++;
2545 	}
2546 
2547 	return (0);
2548 }
2549 
2550 /*
2551  * Stores all members of a group in memory pointed to by data
2552  */
2553 int
2554 if_getgroupmembers(caddr_t data)
2555 {
2556 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2557 	struct ifg_group	*ifg;
2558 	struct ifg_member	*ifgm;
2559 	struct ifg_req		 ifgrq, *ifgp;
2560 	int			 len, error;
2561 
2562 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2563 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2564 			break;
2565 	if (ifg == NULL)
2566 		return (ENOENT);
2567 
2568 	if (ifgr->ifgr_len == 0) {
2569 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2570 			ifgr->ifgr_len += sizeof(ifgrq);
2571 		return (0);
2572 	}
2573 
2574 	len = ifgr->ifgr_len;
2575 	ifgp = ifgr->ifgr_groups;
2576 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
2577 		if (len < sizeof(ifgrq))
2578 			return (EINVAL);
2579 		bzero(&ifgrq, sizeof ifgrq);
2580 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
2581 		    sizeof(ifgrq.ifgrq_member));
2582 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2583 		    sizeof(struct ifg_req))))
2584 			return (error);
2585 		len -= sizeof(ifgrq);
2586 		ifgp++;
2587 	}
2588 
2589 	return (0);
2590 }
2591 
2592 int
2593 if_getgroupattribs(caddr_t data)
2594 {
2595 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2596 	struct ifg_group	*ifg;
2597 
2598 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2599 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2600 			break;
2601 	if (ifg == NULL)
2602 		return (ENOENT);
2603 
2604 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
2605 
2606 	return (0);
2607 }
2608 
2609 int
2610 if_setgroupattribs(caddr_t data)
2611 {
2612 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2613 	struct ifg_group	*ifg;
2614 	struct ifg_member	*ifgm;
2615 	int			 demote;
2616 
2617 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2618 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2619 			break;
2620 	if (ifg == NULL)
2621 		return (ENOENT);
2622 
2623 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
2624 	if (demote + ifg->ifg_carp_demoted > 0xff ||
2625 	    demote + ifg->ifg_carp_demoted < 0)
2626 		return (EINVAL);
2627 
2628 	ifg->ifg_carp_demoted += demote;
2629 
2630 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2631 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
2632 
2633 	return (0);
2634 }
2635 
2636 /*
2637  * Stores all groups in memory pointed to by data
2638  */
2639 int
2640 if_getgrouplist(caddr_t data)
2641 {
2642 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2643 	struct ifg_group	*ifg;
2644 	struct ifg_req		 ifgrq, *ifgp;
2645 	int			 len, error;
2646 
2647 	if (ifgr->ifgr_len == 0) {
2648 		TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2649 			ifgr->ifgr_len += sizeof(ifgrq);
2650 		return (0);
2651 	}
2652 
2653 	len = ifgr->ifgr_len;
2654 	ifgp = ifgr->ifgr_groups;
2655 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
2656 		if (len < sizeof(ifgrq))
2657 			return (EINVAL);
2658 		bzero(&ifgrq, sizeof ifgrq);
2659 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
2660                     sizeof(ifgrq.ifgrq_group));
2661 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2662                     sizeof(struct ifg_req))))
2663 			return (error);
2664 		len -= sizeof(ifgrq);
2665 		ifgp++;
2666 	}
2667 
2668 	return (0);
2669 }
2670 
2671 void
2672 if_group_routechange(struct sockaddr *dst, struct sockaddr *mask)
2673 {
2674 	switch (dst->sa_family) {
2675 	case AF_INET:
2676 		if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
2677 		    mask && (mask->sa_len == 0 ||
2678 		    satosin(mask)->sin_addr.s_addr == INADDR_ANY))
2679 			if_group_egress_build();
2680 		break;
2681 #ifdef INET6
2682 	case AF_INET6:
2683 		if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
2684 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
2685 		    IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
2686 		    &in6addr_any)))
2687 			if_group_egress_build();
2688 		break;
2689 #endif
2690 	}
2691 }
2692 
2693 int
2694 if_group_egress_build(void)
2695 {
2696 	struct ifnet		*ifp;
2697 	struct ifg_group	*ifg;
2698 	struct ifg_member	*ifgm, *next;
2699 	struct sockaddr_in	 sa_in;
2700 #ifdef INET6
2701 	struct sockaddr_in6	 sa_in6;
2702 #endif
2703 	struct rtentry		*rt;
2704 
2705 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2706 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
2707 			break;
2708 
2709 	if (ifg != NULL)
2710 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
2711 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
2712 
2713 	bzero(&sa_in, sizeof(sa_in));
2714 	sa_in.sin_len = sizeof(sa_in);
2715 	sa_in.sin_family = AF_INET;
2716 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
2717 	while (rt != NULL) {
2718 		ifp = if_get(rt->rt_ifidx);
2719 		if (ifp != NULL) {
2720 			if_addgroup(ifp, IFG_EGRESS);
2721 			if_put(ifp);
2722 		}
2723 		rt = rtable_iterate(rt);
2724 	}
2725 
2726 #ifdef INET6
2727 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
2728 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
2729 	    RTP_ANY);
2730 	while (rt != NULL) {
2731 		ifp = if_get(rt->rt_ifidx);
2732 		if (ifp != NULL) {
2733 			if_addgroup(ifp, IFG_EGRESS);
2734 			if_put(ifp);
2735 		}
2736 		rt = rtable_iterate(rt);
2737 	}
2738 #endif /* INET6 */
2739 
2740 	return (0);
2741 }
2742 
2743 /*
2744  * Set/clear promiscuous mode on interface ifp based on the truth value
2745  * of pswitch.  The calls are reference counted so that only the first
2746  * "on" request actually has an effect, as does the final "off" request.
2747  * Results are undefined if the "off" and "on" requests are not matched.
2748  */
2749 int
2750 ifpromisc(struct ifnet *ifp, int pswitch)
2751 {
2752 	struct ifreq ifr;
2753 	unsigned short oif_flags;
2754 	int oif_pcount, error;
2755 
2756 	oif_flags = ifp->if_flags;
2757 	oif_pcount = ifp->if_pcount;
2758 	if (pswitch) {
2759 		if (ifp->if_pcount++ != 0)
2760 			return (0);
2761 		ifp->if_flags |= IFF_PROMISC;
2762 	} else {
2763 		if (--ifp->if_pcount > 0)
2764 			return (0);
2765 		ifp->if_flags &= ~IFF_PROMISC;
2766 	}
2767 
2768 	if ((ifp->if_flags & IFF_UP) == 0)
2769 		return (0);
2770 
2771 	memset(&ifr, 0, sizeof(ifr));
2772 	ifr.ifr_flags = ifp->if_flags;
2773 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
2774 	if (error) {
2775 		ifp->if_flags = oif_flags;
2776 		ifp->if_pcount = oif_pcount;
2777 	}
2778 
2779 	return (error);
2780 }
2781 
2782 void
2783 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
2784 {
2785 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
2786 }
2787 
2788 void
2789 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
2790 {
2791 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
2792 }
2793 
2794 void
2795 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
2796 {
2797 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
2798 		panic("ifa_update_broadaddr does not support dynamic length");
2799 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
2800 }
2801 
2802 #ifdef DDB
2803 /* debug function, can be called from ddb> */
2804 void
2805 ifa_print_all(void)
2806 {
2807 	struct ifnet *ifp;
2808 	struct ifaddr *ifa;
2809 
2810 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2811 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2812 			char addr[INET6_ADDRSTRLEN];
2813 
2814 			switch (ifa->ifa_addr->sa_family) {
2815 			case AF_INET:
2816 				printf("%s", inet_ntop(AF_INET,
2817 				    &satosin(ifa->ifa_addr)->sin_addr,
2818 				    addr, sizeof(addr)));
2819 				break;
2820 #ifdef INET6
2821 			case AF_INET6:
2822 				printf("%s", inet_ntop(AF_INET6,
2823 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
2824 				    addr, sizeof(addr)));
2825 				break;
2826 #endif
2827 			}
2828 			printf(" on %s\n", ifp->if_xname);
2829 		}
2830 	}
2831 }
2832 #endif /* DDB */
2833 
2834 void
2835 ifnewlladdr(struct ifnet *ifp)
2836 {
2837 #ifdef INET6
2838 	struct ifaddr *ifa;
2839 #endif
2840 	struct ifreq ifrq;
2841 	short up;
2842 	int s;
2843 
2844 	s = splnet();
2845 	up = ifp->if_flags & IFF_UP;
2846 
2847 	if (up) {
2848 		/* go down for a moment... */
2849 		ifp->if_flags &= ~IFF_UP;
2850 		ifrq.ifr_flags = ifp->if_flags;
2851 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
2852 	}
2853 
2854 	ifp->if_flags |= IFF_UP;
2855 	ifrq.ifr_flags = ifp->if_flags;
2856 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
2857 
2858 #ifdef INET6
2859 	/*
2860 	 * Update the link-local address.  Don't do it if we're
2861 	 * a router to avoid confusing hosts on the network.
2862 	 */
2863 	if (!ip6_forwarding) {
2864 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
2865 		if (ifa) {
2866 			in6_purgeaddr(ifa);
2867 			dohooks(ifp->if_addrhooks, 0);
2868 			in6_ifattach(ifp);
2869 		}
2870 	}
2871 #endif
2872 	if (!up) {
2873 		/* go back down */
2874 		ifp->if_flags &= ~IFF_UP;
2875 		ifrq.ifr_flags = ifp->if_flags;
2876 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
2877 	}
2878 	splx(s);
2879 }
2880 
2881 int net_ticks;
2882 u_int net_livelocks;
2883 
2884 void
2885 net_tick(void *null)
2886 {
2887 	extern int ticks;
2888 
2889 	if (ticks - net_ticks > 1)
2890 		net_livelocks++;
2891 
2892 	net_ticks = ticks;
2893 
2894 	timeout_add(&net_tick_to, 1);
2895 }
2896 
2897 int
2898 net_livelocked(void)
2899 {
2900 	extern int ticks;
2901 
2902 	return (ticks - net_ticks > 1);
2903 }
2904 
2905 void
2906 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
2907 {
2908 	extern int ticks;
2909 
2910 	memset(rxr, 0, sizeof(*rxr));
2911 
2912 	rxr->rxr_adjusted = ticks;
2913 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
2914 	rxr->rxr_hwm = hwm;
2915 }
2916 
2917 static inline void
2918 if_rxr_adjust_cwm(struct if_rxring *rxr)
2919 {
2920 	extern int ticks;
2921 
2922 	if (net_livelocked()) {
2923 		if (rxr->rxr_cwm > rxr->rxr_lwm)
2924 			rxr->rxr_cwm--;
2925 		else
2926 			return;
2927 	} else if (rxr->rxr_alive >= rxr->rxr_lwm)
2928 		return;
2929 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
2930 		rxr->rxr_cwm++;
2931 
2932 	rxr->rxr_adjusted = ticks;
2933 }
2934 
2935 void
2936 if_rxr_livelocked(struct if_rxring *rxr)
2937 {
2938 	extern int ticks;
2939 
2940 	if (ticks - rxr->rxr_adjusted >= 1) {
2941 		if (rxr->rxr_cwm > rxr->rxr_lwm)
2942 			rxr->rxr_cwm--;
2943 
2944 		rxr->rxr_adjusted = ticks;
2945 	}
2946 }
2947 
2948 u_int
2949 if_rxr_get(struct if_rxring *rxr, u_int max)
2950 {
2951 	extern int ticks;
2952 	u_int diff;
2953 
2954 	if (ticks - rxr->rxr_adjusted >= 1) {
2955 		/* we're free to try for an adjustment */
2956 		if_rxr_adjust_cwm(rxr);
2957 	}
2958 
2959 	if (rxr->rxr_alive >= rxr->rxr_cwm)
2960 		return (0);
2961 
2962 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
2963 	rxr->rxr_alive += diff;
2964 
2965 	return (diff);
2966 }
2967 
2968 int
2969 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
2970 {
2971 	struct if_rxrinfo kifri;
2972 	int error;
2973 	u_int n;
2974 
2975 	error = copyin(uifri, &kifri, sizeof(kifri));
2976 	if (error)
2977 		return (error);
2978 
2979 	n = min(t, kifri.ifri_total);
2980 	kifri.ifri_total = t;
2981 
2982 	if (n > 0) {
2983 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
2984 		if (error)
2985 			return (error);
2986 	}
2987 
2988 	return (copyout(&kifri, uifri, sizeof(kifri)));
2989 }
2990 
2991 int
2992 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
2993     struct if_rxring *rxr)
2994 {
2995 	struct if_rxring_info ifr;
2996 
2997 	memset(&ifr, 0, sizeof(ifr));
2998 
2999 	if (name != NULL)
3000 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3001 
3002 	ifr.ifr_size = size;
3003 	ifr.ifr_info = *rxr;
3004 
3005 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
3006 }
3007 
3008 /*
3009  * Network stack input queues.
3010  */
3011 
3012 void
3013 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3014 {
3015 	mq_init(&niq->ni_q, maxlen, IPL_NET);
3016 	niq->ni_isr = isr;
3017 }
3018 
3019 int
3020 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3021 {
3022 	int rv;
3023 
3024 	rv = mq_enqueue(&niq->ni_q, m);
3025 	if (rv == 0)
3026 		schednetisr(niq->ni_isr);
3027 	else
3028 		if_congestion();
3029 
3030 	return (rv);
3031 }
3032 
3033 int
3034 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3035 {
3036 	int rv;
3037 
3038 	rv = mq_enlist(&niq->ni_q, ml);
3039 	if (rv == 0)
3040 		schednetisr(niq->ni_isr);
3041 	else
3042 		if_congestion();
3043 
3044 	return (rv);
3045 }
3046 
3047 __dead void
3048 unhandled_af(int af)
3049 {
3050 	panic("unhandled af %d", af);
3051 }
3052 
3053 /*
3054  * XXXSMP This tunable is here to work around the fact that IPsec
3055  * globals aren't ready to be accessed by multiple threads in
3056  * parallel.
3057  */
3058 int		 nettaskqs = NET_TASKQ;
3059 
3060 struct taskq *
3061 net_tq(unsigned int ifindex)
3062 {
3063 	struct taskq *t = NULL;
3064 
3065 	t = nettqmp[ifindex % nettaskqs];
3066 
3067 	return (t);
3068 }
3069