xref: /openbsd-src/sys/net/if.c (revision 897fc685943471cf985a0fe38ba076ea6fe74fa5)
1 /*	$OpenBSD: if.c,v 1.549 2018/03/20 08:58:19 mpi Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "pfsync.h"
70 #include "ppp.h"
71 #include "pppoe.h"
72 #include "switch.h"
73 #include "trunk.h"
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/mbuf.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/timeout.h>
81 #include <sys/protosw.h>
82 #include <sys/kernel.h>
83 #include <sys/ioctl.h>
84 #include <sys/domain.h>
85 #include <sys/task.h>
86 #include <sys/atomic.h>
87 #include <sys/proc.h>
88 
89 #include <dev/rndvar.h>
90 
91 #include <net/if.h>
92 #include <net/if_dl.h>
93 #include <net/if_types.h>
94 #include <net/route.h>
95 #include <net/netisr.h>
96 
97 #include <netinet/in.h>
98 #include <netinet/if_ether.h>
99 #include <netinet/igmp.h>
100 #ifdef MROUTING
101 #include <netinet/ip_mroute.h>
102 #endif
103 
104 #ifdef INET6
105 #include <netinet6/in6_var.h>
106 #include <netinet6/in6_ifattach.h>
107 #include <netinet6/nd6.h>
108 #include <netinet/ip6.h>
109 #include <netinet6/ip6_var.h>
110 #endif
111 
112 #ifdef MPLS
113 #include <netmpls/mpls.h>
114 #endif
115 
116 #if NBPFILTER > 0
117 #include <net/bpf.h>
118 #endif
119 
120 #if NBRIDGE > 0
121 #include <net/if_bridge.h>
122 #endif
123 
124 #if NCARP > 0
125 #include <netinet/ip_carp.h>
126 #endif
127 
128 #if NPF > 0
129 #include <net/pfvar.h>
130 #endif
131 
132 void	if_attachsetup(struct ifnet *);
133 void	if_attachdomain(struct ifnet *);
134 void	if_attach_common(struct ifnet *);
135 int	if_setrdomain(struct ifnet *, int);
136 void	if_slowtimo(void *);
137 
138 void	if_detached_qstart(struct ifqueue *);
139 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
140 
141 int	ifioctl_get(u_long, caddr_t);
142 int	ifconf(caddr_t);
143 
144 int	if_getgroup(caddr_t, struct ifnet *);
145 int	if_getgroupmembers(caddr_t);
146 int	if_getgroupattribs(caddr_t);
147 int	if_setgroupattribs(caddr_t);
148 
149 void	if_linkstate(struct ifnet *);
150 void	if_linkstate_task(void *);
151 
152 int	if_clone_list(struct if_clonereq *);
153 struct if_clone	*if_clone_lookup(const char *, int *);
154 
155 int	if_group_egress_build(void);
156 
157 void	if_watchdog_task(void *);
158 
159 void	if_netisr(void *);
160 
161 #ifdef DDB
162 void	ifa_print_all(void);
163 #endif
164 
165 void	if_qstart_compat(struct ifqueue *);
166 
167 /*
168  * interface index map
169  *
170  * the kernel maintains a mapping of interface indexes to struct ifnet
171  * pointers.
172  *
173  * the map is an array of struct ifnet pointers prefixed by an if_map
174  * structure. the if_map structure stores the length of its array.
175  *
176  * as interfaces are attached to the system, the map is grown on demand
177  * up to USHRT_MAX entries.
178  *
179  * interface index 0 is reserved and represents no interface. this
180  * supports the use of the interface index as the scope for IPv6 link
181  * local addresses, where scope 0 means no scope has been specified.
182  * it also supports the use of interface index as the unique identifier
183  * for network interfaces in SNMP applications as per RFC2863. therefore
184  * if_get(0) returns NULL.
185  */
186 
187 void if_ifp_dtor(void *, void *);
188 void if_map_dtor(void *, void *);
189 struct ifnet *if_ref(struct ifnet *);
190 
191 /*
192  * struct if_map
193  *
194  * bounded array of ifnet srp pointers used to fetch references of live
195  * interfaces with if_get().
196  */
197 
198 struct if_map {
199 	unsigned long		 limit;
200 	/* followed by limit ifnet srp pointers */
201 };
202 
203 /*
204  * struct if_idxmap
205  *
206  * infrastructure to manage updates and accesses to the current if_map.
207  */
208 
209 struct if_idxmap {
210 	unsigned int		 serial;
211 	unsigned int		 count;
212 	struct srp		 map;
213 };
214 
215 void	if_idxmap_init(unsigned int);
216 void	if_idxmap_insert(struct ifnet *);
217 void	if_idxmap_remove(struct ifnet *);
218 
219 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
220 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
221 int if_cloners_count;
222 
223 struct timeout net_tick_to;
224 void	net_tick(void *);
225 int	net_livelocked(void);
226 int	ifq_congestion;
227 
228 int		 netisr;
229 
230 #define	NET_TASKQ	1
231 struct taskq	*nettqmp[NET_TASKQ];
232 
233 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
234 
235 /*
236  * Serialize socket operations to ensure no new sleeping points
237  * are introduced in IP output paths.
238  */
239 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
240 
241 /*
242  * Network interface utility routines.
243  */
244 void
245 ifinit(void)
246 {
247 	unsigned int	i;
248 
249 	/*
250 	 * most machines boot with 4 or 5 interfaces, so size the initial map
251 	 * to accomodate this
252 	 */
253 	if_idxmap_init(8);
254 
255 	timeout_set(&net_tick_to, net_tick, &net_tick_to);
256 
257 	for (i = 0; i < NET_TASKQ; i++) {
258 		nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
259 		if (nettqmp[i] == NULL)
260 			panic("unable to create network taskq %d", i);
261 	}
262 
263 	net_tick(&net_tick_to);
264 }
265 
266 static struct if_idxmap if_idxmap = {
267 	0,
268 	0,
269 	SRP_INITIALIZER()
270 };
271 
272 struct srp_gc if_ifp_gc = SRP_GC_INITIALIZER(if_ifp_dtor, NULL);
273 struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
274 
275 struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
276 
277 void
278 if_idxmap_init(unsigned int limit)
279 {
280 	struct if_map *if_map;
281 	struct srp *map;
282 	unsigned int i;
283 
284 	if_idxmap.serial = 1; /* skip ifidx 0 so it can return NULL */
285 
286 	if_map = malloc(sizeof(*if_map) + limit * sizeof(*map),
287 	    M_IFADDR, M_WAITOK);
288 
289 	if_map->limit = limit;
290 	map = (struct srp *)(if_map + 1);
291 	for (i = 0; i < limit; i++)
292 		srp_init(&map[i]);
293 
294 	/* this is called early so there's nothing to race with */
295 	srp_update_locked(&if_map_gc, &if_idxmap.map, if_map);
296 }
297 
298 void
299 if_idxmap_insert(struct ifnet *ifp)
300 {
301 	struct if_map *if_map;
302 	struct srp *map;
303 	unsigned int index, i;
304 
305 	refcnt_init(&ifp->if_refcnt);
306 
307 	/* the kernel lock guarantees serialised modifications to if_idxmap */
308 	KERNEL_ASSERT_LOCKED();
309 
310 	if (++if_idxmap.count > USHRT_MAX)
311 		panic("too many interfaces");
312 
313 	if_map = srp_get_locked(&if_idxmap.map);
314 	map = (struct srp *)(if_map + 1);
315 
316 	index = if_idxmap.serial++ & USHRT_MAX;
317 
318 	if (index >= if_map->limit) {
319 		struct if_map *nif_map;
320 		struct srp *nmap;
321 		unsigned int nlimit;
322 		struct ifnet *nifp;
323 
324 		nlimit = if_map->limit * 2;
325 		nif_map = malloc(sizeof(*nif_map) + nlimit * sizeof(*nmap),
326 		    M_IFADDR, M_WAITOK);
327 		nmap = (struct srp *)(nif_map + 1);
328 
329 		nif_map->limit = nlimit;
330 		for (i = 0; i < if_map->limit; i++) {
331 			srp_init(&nmap[i]);
332 			nifp = srp_get_locked(&map[i]);
333 			if (nifp != NULL) {
334 				srp_update_locked(&if_ifp_gc, &nmap[i],
335 				    if_ref(nifp));
336 			}
337 		}
338 
339 		while (i < nlimit) {
340 			srp_init(&nmap[i]);
341 			i++;
342 		}
343 
344 		srp_update_locked(&if_map_gc, &if_idxmap.map, nif_map);
345 		if_map = nif_map;
346 		map = nmap;
347 	}
348 
349 	/* pick the next free index */
350 	for (i = 0; i < USHRT_MAX; i++) {
351 		if (index != 0 && srp_get_locked(&map[index]) == NULL)
352 			break;
353 
354 		index = if_idxmap.serial++ & USHRT_MAX;
355 	}
356 
357 	/* commit */
358 	ifp->if_index = index;
359 	srp_update_locked(&if_ifp_gc, &map[index], if_ref(ifp));
360 }
361 
362 void
363 if_idxmap_remove(struct ifnet *ifp)
364 {
365 	struct if_map *if_map;
366 	struct srp *map;
367 	unsigned int index;
368 
369 	index = ifp->if_index;
370 
371 	/* the kernel lock guarantees serialised modifications to if_idxmap */
372 	KERNEL_ASSERT_LOCKED();
373 
374 	if_map = srp_get_locked(&if_idxmap.map);
375 	KASSERT(index < if_map->limit);
376 
377 	map = (struct srp *)(if_map + 1);
378 	KASSERT(ifp == (struct ifnet *)srp_get_locked(&map[index]));
379 
380 	srp_update_locked(&if_ifp_gc, &map[index], NULL);
381 	if_idxmap.count--;
382 	/* end of if_idxmap modifications */
383 
384 	/* sleep until the last reference is released */
385 	refcnt_finalize(&ifp->if_refcnt, "ifidxrm");
386 }
387 
388 void
389 if_ifp_dtor(void *null, void *ifp)
390 {
391 	if_put(ifp);
392 }
393 
394 void
395 if_map_dtor(void *null, void *m)
396 {
397 	struct if_map *if_map = m;
398 	struct srp *map = (struct srp *)(if_map + 1);
399 	unsigned int i;
400 
401 	/*
402 	 * dont need to serialize the use of update_locked since this is
403 	 * the last reference to this map. there's nothing to race against.
404 	 */
405 	for (i = 0; i < if_map->limit; i++)
406 		srp_update_locked(&if_ifp_gc, &map[i], NULL);
407 
408 	free(if_map, M_IFADDR, sizeof(*if_map) + if_map->limit * sizeof(*map));
409 }
410 
411 /*
412  * Attach an interface to the
413  * list of "active" interfaces.
414  */
415 void
416 if_attachsetup(struct ifnet *ifp)
417 {
418 	unsigned long ifidx;
419 
420 	NET_ASSERT_LOCKED();
421 
422 	TAILQ_INIT(&ifp->if_groups);
423 
424 	if_addgroup(ifp, IFG_ALL);
425 
426 	if_attachdomain(ifp);
427 #if NPF > 0
428 	pfi_attach_ifnet(ifp);
429 #endif
430 
431 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
432 	if_slowtimo(ifp);
433 
434 	if_idxmap_insert(ifp);
435 	KASSERT(if_get(0) == NULL);
436 
437 	ifidx = ifp->if_index;
438 
439 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
440 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
441 
442 	/* Announce the interface. */
443 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
444 }
445 
446 /*
447  * Allocate the link level name for the specified interface.  This
448  * is an attachment helper.  It must be called after ifp->if_addrlen
449  * is initialized, which may not be the case when if_attach() is
450  * called.
451  */
452 void
453 if_alloc_sadl(struct ifnet *ifp)
454 {
455 	unsigned int socksize;
456 	int namelen, masklen;
457 	struct sockaddr_dl *sdl;
458 
459 	/*
460 	 * If the interface already has a link name, release it
461 	 * now.  This is useful for interfaces that can change
462 	 * link types, and thus switch link names often.
463 	 */
464 	if (ifp->if_sadl != NULL)
465 		if_free_sadl(ifp);
466 
467 	namelen = strlen(ifp->if_xname);
468 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
469 	socksize = masklen + ifp->if_addrlen;
470 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
471 	if (socksize < sizeof(*sdl))
472 		socksize = sizeof(*sdl);
473 	socksize = ROUNDUP(socksize);
474 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
475 	sdl->sdl_len = socksize;
476 	sdl->sdl_family = AF_LINK;
477 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
478 	sdl->sdl_nlen = namelen;
479 	sdl->sdl_alen = ifp->if_addrlen;
480 	sdl->sdl_index = ifp->if_index;
481 	sdl->sdl_type = ifp->if_type;
482 	ifp->if_sadl = sdl;
483 }
484 
485 /*
486  * Free the link level name for the specified interface.  This is
487  * a detach helper.  This is called from if_detach() or from
488  * link layer type specific detach functions.
489  */
490 void
491 if_free_sadl(struct ifnet *ifp)
492 {
493 	free(ifp->if_sadl, M_IFADDR, 0);
494 	ifp->if_sadl = NULL;
495 }
496 
497 void
498 if_attachdomain(struct ifnet *ifp)
499 {
500 	struct domain *dp;
501 	int i, s;
502 
503 	s = splnet();
504 
505 	/* address family dependent data region */
506 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
507 	for (i = 0; (dp = domains[i]) != NULL; i++) {
508 		if (dp->dom_ifattach)
509 			ifp->if_afdata[dp->dom_family] =
510 			    (*dp->dom_ifattach)(ifp);
511 	}
512 
513 	splx(s);
514 }
515 
516 void
517 if_attachhead(struct ifnet *ifp)
518 {
519 	if_attach_common(ifp);
520 	NET_LOCK();
521 	TAILQ_INSERT_HEAD(&ifnet, ifp, if_list);
522 	if_attachsetup(ifp);
523 	NET_UNLOCK();
524 }
525 
526 void
527 if_attach(struct ifnet *ifp)
528 {
529 	if_attach_common(ifp);
530 	NET_LOCK();
531 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
532 	if_attachsetup(ifp);
533 	NET_UNLOCK();
534 }
535 
536 void
537 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
538 {
539 	struct ifqueue **map;
540 	struct ifqueue *ifq;
541 	int i;
542 
543 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
544 	KASSERT(nqs != 0);
545 
546 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
547 
548 	ifp->if_snd.ifq_softc = NULL;
549 	map[0] = &ifp->if_snd;
550 
551 	for (i = 1; i < nqs; i++) {
552 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
553 		ifq_set_maxlen(ifq, ifp->if_snd.ifq_maxlen);
554 		ifq_init(ifq, ifp, i);
555 		map[i] = ifq;
556 	}
557 
558 	ifp->if_ifqs = map;
559 	ifp->if_nifqs = nqs;
560 }
561 
562 void
563 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
564 {
565 	struct ifiqueue **map;
566 	struct ifiqueue *ifiq;
567 	unsigned int i;
568 
569 	KASSERT(niqs != 0);
570 
571 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
572 
573 	ifp->if_rcv.ifiq_softc = NULL;
574 	map[0] = &ifp->if_rcv;
575 
576 	for (i = 1; i < niqs; i++) {
577 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
578 		ifiq_init(ifiq, ifp, i);
579 		map[i] = ifiq;
580 	}
581 
582 	ifp->if_iqs = map;
583 	ifp->if_niqs = niqs;
584 }
585 
586 void
587 if_attach_common(struct ifnet *ifp)
588 {
589 	KASSERT(ifp->if_ioctl != NULL);
590 
591 	TAILQ_INIT(&ifp->if_addrlist);
592 	TAILQ_INIT(&ifp->if_maddrlist);
593 
594 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
595 		KASSERTMSG(ifp->if_qstart == NULL,
596 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
597 		ifp->if_qstart = if_qstart_compat;
598 	} else {
599 		KASSERTMSG(ifp->if_start == NULL,
600 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
601 		KASSERTMSG(ifp->if_qstart != NULL,
602 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
603 	}
604 
605 	ifq_init(&ifp->if_snd, ifp, 0);
606 
607 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
608 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
609 	ifp->if_nifqs = 1;
610 
611 	ifiq_init(&ifp->if_rcv, ifp, 0);
612 
613 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
614 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
615 	ifp->if_niqs = 1;
616 
617 	ifp->if_addrhooks = malloc(sizeof(*ifp->if_addrhooks),
618 	    M_TEMP, M_WAITOK);
619 	TAILQ_INIT(ifp->if_addrhooks);
620 	ifp->if_linkstatehooks = malloc(sizeof(*ifp->if_linkstatehooks),
621 	    M_TEMP, M_WAITOK);
622 	TAILQ_INIT(ifp->if_linkstatehooks);
623 	ifp->if_detachhooks = malloc(sizeof(*ifp->if_detachhooks),
624 	    M_TEMP, M_WAITOK);
625 	TAILQ_INIT(ifp->if_detachhooks);
626 
627 	if (ifp->if_rtrequest == NULL)
628 		ifp->if_rtrequest = if_rtrequest_dummy;
629 	ifp->if_llprio = IFQ_DEFPRIO;
630 
631 	SRPL_INIT(&ifp->if_inputs);
632 }
633 
634 void
635 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
636 {
637 	/*
638 	 * only switch the ifq_ops on the first ifq on an interface.
639 	 *
640 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
641 	 * works on a single ifq. because the code uses the ifq_ops
642 	 * on the first ifq (if_snd) to select a queue for an mbuf,
643 	 * by switching only the first one we change both the algorithm
644 	 * and force the routing of all new packets to it.
645 	 */
646 	ifq_attach(&ifp->if_snd, newops, args);
647 }
648 
649 void
650 if_start(struct ifnet *ifp)
651 {
652 	KASSERT(ifp->if_qstart == if_qstart_compat);
653 	if_qstart_compat(&ifp->if_snd);
654 }
655 void
656 if_qstart_compat(struct ifqueue *ifq)
657 {
658 	struct ifnet *ifp = ifq->ifq_if;
659 	int s;
660 
661 	/*
662 	 * the stack assumes that an interface can have multiple
663 	 * transmit rings, but a lot of drivers are still written
664 	 * so that interfaces and send rings have a 1:1 mapping.
665 	 * this provides compatability between the stack and the older
666 	 * drivers by translating from the only queue they have
667 	 * (ifp->if_snd) back to the interface and calling if_start.
668  	 */
669 
670 	KERNEL_LOCK();
671 	s = splnet();
672 	(*ifp->if_start)(ifp);
673 	splx(s);
674 	KERNEL_UNLOCK();
675 }
676 
677 int
678 if_enqueue(struct ifnet *ifp, struct mbuf *m)
679 {
680 	unsigned int idx;
681 	struct ifqueue *ifq;
682 	int error;
683 
684 #if NBRIDGE > 0
685 	if (ifp->if_bridgeport && (m->m_flags & M_PROTO1) == 0) {
686 		KERNEL_LOCK();
687 		error = bridge_output(ifp, m, NULL, NULL);
688 		KERNEL_UNLOCK();
689 		return (error);
690 	}
691 #endif
692 
693 #if NPF > 0
694 	pf_pkt_addr_changed(m);
695 #endif	/* NPF > 0 */
696 
697 	/*
698 	 * use the operations on the first ifq to pick which of the array
699 	 * gets this mbuf.
700 	 */
701 	idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
702 	ifq = ifp->if_ifqs[idx];
703 
704 	error = ifq_enqueue(ifq, m);
705 	if (error)
706 		return (error);
707 
708 	ifq_start(ifq);
709 
710 	return (0);
711 }
712 
713 void
714 if_input(struct ifnet *ifp, struct mbuf_list *ml)
715 {
716 	ifiq_input(&ifp->if_rcv, ml, 2048);
717 }
718 
719 int
720 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
721 {
722 #if NBPFILTER > 0
723 	/*
724 	 * Only send packets to bpf if they are destinated to local
725 	 * addresses.
726 	 *
727 	 * if_input_local() is also called for SIMPLEX interfaces to
728 	 * duplicate packets for local use.  But don't dup them to bpf.
729 	 */
730 	if (ifp->if_flags & IFF_LOOPBACK) {
731 		caddr_t if_bpf = ifp->if_bpf;
732 
733 		if (if_bpf)
734 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
735 	}
736 #endif
737 	m_resethdr(m);
738 	m->m_flags |= M_LOOP;
739 	m->m_pkthdr.ph_ifidx = ifp->if_index;
740 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
741 
742 	ifp->if_opackets++;
743 	ifp->if_obytes += m->m_pkthdr.len;
744 
745 	ifp->if_ipackets++;
746 	ifp->if_ibytes += m->m_pkthdr.len;
747 
748 	switch (af) {
749 	case AF_INET:
750 		ipv4_input(ifp, m);
751 		break;
752 #ifdef INET6
753 	case AF_INET6:
754 		ipv6_input(ifp, m);
755 		break;
756 #endif /* INET6 */
757 #ifdef MPLS
758 	case AF_MPLS:
759 		mpls_input(ifp, m);
760 		break;
761 #endif /* MPLS */
762 	default:
763 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
764 		m_freem(m);
765 		return (EAFNOSUPPORT);
766 	}
767 
768 	return (0);
769 }
770 
771 int
772 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
773 {
774 	struct ifiqueue *ifiq;
775 	unsigned int flow = 0;
776 
777 	m->m_pkthdr.ph_family = af;
778 	m->m_pkthdr.ph_ifidx = ifp->if_index;
779 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
780 
781 	if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID))
782 		flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK;
783 
784 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
785 
786 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
787 }
788 
789 struct ifih {
790 	SRPL_ENTRY(ifih)	  ifih_next;
791 	int			(*ifih_input)(struct ifnet *, struct mbuf *,
792 				      void *);
793 	void			 *ifih_cookie;
794 	int			  ifih_refcnt;
795 	struct refcnt		  ifih_srpcnt;
796 };
797 
798 void	if_ih_ref(void *, void *);
799 void	if_ih_unref(void *, void *);
800 
801 struct srpl_rc ifih_rc = SRPL_RC_INITIALIZER(if_ih_ref, if_ih_unref, NULL);
802 
803 void
804 if_ih_insert(struct ifnet *ifp, int (*input)(struct ifnet *, struct mbuf *,
805     void *), void *cookie)
806 {
807 	struct ifih *ifih;
808 
809 	/* the kernel lock guarantees serialised modifications to if_inputs */
810 	KERNEL_ASSERT_LOCKED();
811 
812 	SRPL_FOREACH_LOCKED(ifih, &ifp->if_inputs, ifih_next) {
813 		if (ifih->ifih_input == input && ifih->ifih_cookie == cookie) {
814 			ifih->ifih_refcnt++;
815 			break;
816 		}
817 	}
818 
819 	if (ifih == NULL) {
820 		ifih = malloc(sizeof(*ifih), M_DEVBUF, M_WAITOK);
821 
822 		ifih->ifih_input = input;
823 		ifih->ifih_cookie = cookie;
824 		ifih->ifih_refcnt = 1;
825 		refcnt_init(&ifih->ifih_srpcnt);
826 		SRPL_INSERT_HEAD_LOCKED(&ifih_rc, &ifp->if_inputs,
827 		    ifih, ifih_next);
828 	}
829 }
830 
831 void
832 if_ih_ref(void *null, void *i)
833 {
834 	struct ifih *ifih = i;
835 
836 	refcnt_take(&ifih->ifih_srpcnt);
837 }
838 
839 void
840 if_ih_unref(void *null, void *i)
841 {
842 	struct ifih *ifih = i;
843 
844 	refcnt_rele_wake(&ifih->ifih_srpcnt);
845 }
846 
847 void
848 if_ih_remove(struct ifnet *ifp, int (*input)(struct ifnet *, struct mbuf *,
849     void *), void *cookie)
850 {
851 	struct ifih *ifih;
852 
853 	/* the kernel lock guarantees serialised modifications to if_inputs */
854 	KERNEL_ASSERT_LOCKED();
855 
856 	SRPL_FOREACH_LOCKED(ifih, &ifp->if_inputs, ifih_next) {
857 		if (ifih->ifih_input == input && ifih->ifih_cookie == cookie)
858 			break;
859 	}
860 
861 	KASSERT(ifih != NULL);
862 
863 	if (--ifih->ifih_refcnt == 0) {
864 		SRPL_REMOVE_LOCKED(&ifih_rc, &ifp->if_inputs, ifih,
865 		    ifih, ifih_next);
866 
867 		refcnt_finalize(&ifih->ifih_srpcnt, "ifihrm");
868 		free(ifih, M_DEVBUF, sizeof(*ifih));
869 	}
870 }
871 
872 void
873 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
874 {
875 	struct mbuf *m;
876 	struct ifih *ifih;
877 	struct srp_ref sr;
878 	int s;
879 
880 	if (ml_empty(ml))
881 		return;
882 
883 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
884 		add_net_randomness(ml_len(ml));
885 
886 	/*
887 	 * We grab the NET_LOCK() before processing any packet to
888 	 * ensure there's no contention on the routing table lock.
889 	 *
890 	 * Without it we could race with a userland thread to insert
891 	 * a L2 entry in ip{6,}_output().  Such race would result in
892 	 * one of the threads sleeping *inside* the IP output path.
893 	 *
894 	 * Since we have a NET_LOCK() we also use it to serialize access
895 	 * to PF globals, pipex globals, unicast and multicast addresses
896 	 * lists.
897 	 */
898 	NET_RLOCK();
899 	s = splnet();
900 	while ((m = ml_dequeue(ml)) != NULL) {
901 		/*
902 		 * Pass this mbuf to all input handlers of its
903 		 * interface until it is consumed.
904 		 */
905 		SRPL_FOREACH(ifih, &sr, &ifp->if_inputs, ifih_next) {
906 			if ((*ifih->ifih_input)(ifp, m, ifih->ifih_cookie))
907 				break;
908 		}
909 		SRPL_LEAVE(&sr);
910 
911 		if (ifih == NULL)
912 			m_freem(m);
913 	}
914 	splx(s);
915 	NET_RUNLOCK();
916 }
917 
918 void
919 if_netisr(void *unused)
920 {
921 	int n, t = 0;
922 
923 	NET_LOCK();
924 
925 	while ((n = netisr) != 0) {
926 		/* Like sched_pause() but with a rwlock dance. */
927 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
928 			NET_UNLOCK();
929 			yield();
930 			NET_LOCK();
931 		}
932 
933 		atomic_clearbits_int(&netisr, n);
934 
935 #if NETHER > 0
936 		if (n & (1 << NETISR_ARP)) {
937 			KERNEL_LOCK();
938 			arpintr();
939 			KERNEL_UNLOCK();
940 		}
941 #endif
942 		if (n & (1 << NETISR_IP))
943 			ipintr();
944 #ifdef INET6
945 		if (n & (1 << NETISR_IPV6))
946 			ip6intr();
947 #endif
948 #if NPPP > 0
949 		if (n & (1 << NETISR_PPP)) {
950 			KERNEL_LOCK();
951 			pppintr();
952 			KERNEL_UNLOCK();
953 		}
954 #endif
955 #if NBRIDGE > 0
956 		if (n & (1 << NETISR_BRIDGE)) {
957 			KERNEL_LOCK();
958 			bridgeintr();
959 			KERNEL_UNLOCK();
960 		}
961 #endif
962 #if NSWITCH > 0
963 		if (n & (1 << NETISR_SWITCH)) {
964 			KERNEL_LOCK();
965 			switchintr();
966 			KERNEL_UNLOCK();
967 		}
968 #endif
969 #if NPPPOE > 0
970 		if (n & (1 << NETISR_PPPOE)) {
971 			KERNEL_LOCK();
972 			pppoeintr();
973 			KERNEL_UNLOCK();
974 		}
975 #endif
976 #ifdef PIPEX
977 		if (n & (1 << NETISR_PIPEX)) {
978 			KERNEL_LOCK();
979 			pipexintr();
980 			KERNEL_UNLOCK();
981 		}
982 #endif
983 		t |= n;
984 	}
985 
986 #if NPFSYNC > 0
987 	if (t & (1 << NETISR_PFSYNC)) {
988 		KERNEL_LOCK();
989 		pfsyncintr();
990 		KERNEL_UNLOCK();
991 	}
992 #endif
993 
994 	NET_UNLOCK();
995 }
996 
997 void
998 if_deactivate(struct ifnet *ifp)
999 {
1000 	NET_LOCK();
1001 	/*
1002 	 * Call detach hooks from head to tail.  To make sure detach
1003 	 * hooks are executed in the reverse order they were added, all
1004 	 * the hooks have to be added to the head!
1005 	 */
1006 	dohooks(ifp->if_detachhooks, HOOK_REMOVE | HOOK_FREE);
1007 
1008 	NET_UNLOCK();
1009 }
1010 
1011 /*
1012  * Detach an interface from everything in the kernel.  Also deallocate
1013  * private resources.
1014  */
1015 void
1016 if_detach(struct ifnet *ifp)
1017 {
1018 	struct ifaddr *ifa;
1019 	struct ifg_list *ifg;
1020 	struct domain *dp;
1021 	int i, s;
1022 
1023 	/* Undo pseudo-driver changes. */
1024 	if_deactivate(ifp);
1025 
1026 	ifq_clr_oactive(&ifp->if_snd);
1027 
1028 	/* Other CPUs must not have a reference before we start destroying. */
1029 	if_idxmap_remove(ifp);
1030 
1031 #if NBPFILTER > 0
1032 	bpfdetach(ifp);
1033 #endif
1034 
1035 	NET_LOCK();
1036 	s = splnet();
1037 	ifp->if_qstart = if_detached_qstart;
1038 	ifp->if_ioctl = if_detached_ioctl;
1039 	ifp->if_watchdog = NULL;
1040 
1041 	/* Remove the watchdog timeout & task */
1042 	timeout_del(&ifp->if_slowtimo);
1043 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1044 
1045 	/* Remove the link state task */
1046 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1047 
1048 	rti_delete(ifp);
1049 #if NETHER > 0 && defined(NFSCLIENT)
1050 	if (ifp->if_index == revarp_ifidx)
1051 		revarp_ifidx = 0;
1052 #endif
1053 #ifdef MROUTING
1054 	vif_delete(ifp);
1055 #endif
1056 	in_ifdetach(ifp);
1057 #ifdef INET6
1058 	in6_ifdetach(ifp);
1059 #endif
1060 #if NPF > 0
1061 	pfi_detach_ifnet(ifp);
1062 #endif
1063 
1064 	/* Remove the interface from the list of all interfaces.  */
1065 	TAILQ_REMOVE(&ifnet, ifp, if_list);
1066 
1067 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1068 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1069 
1070 	if_free_sadl(ifp);
1071 
1072 	/* We should not have any address left at this point. */
1073 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1074 #ifdef DIAGNOSTIC
1075 		printf("%s: address list non empty\n", ifp->if_xname);
1076 #endif
1077 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1078 			ifa_del(ifp, ifa);
1079 			ifa->ifa_ifp = NULL;
1080 			ifafree(ifa);
1081 		}
1082 	}
1083 
1084 	free(ifp->if_addrhooks, M_TEMP, 0);
1085 	free(ifp->if_linkstatehooks, M_TEMP, 0);
1086 	free(ifp->if_detachhooks, M_TEMP, 0);
1087 
1088 	for (i = 0; (dp = domains[i]) != NULL; i++) {
1089 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
1090 			(*dp->dom_ifdetach)(ifp,
1091 			    ifp->if_afdata[dp->dom_family]);
1092 	}
1093 
1094 	/* Announce that the interface is gone. */
1095 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1096 	splx(s);
1097 	NET_UNLOCK();
1098 
1099 	for (i = 0; i < ifp->if_nifqs; i++)
1100 		ifq_destroy(ifp->if_ifqs[i]);
1101 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1102 		for (i = 1; i < ifp->if_nifqs; i++) {
1103 			free(ifp->if_ifqs[i], M_DEVBUF,
1104 			    sizeof(struct ifqueue));
1105 		}
1106 		free(ifp->if_ifqs, M_DEVBUF,
1107 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1108 	}
1109 
1110 	for (i = 0; i < ifp->if_niqs; i++)
1111 		ifiq_destroy(ifp->if_iqs[i]);
1112 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1113 		for (i = 1; i < ifp->if_niqs; i++) {
1114 			free(ifp->if_iqs[i], M_DEVBUF,
1115 			    sizeof(struct ifiqueue));
1116 		}
1117 		free(ifp->if_iqs, M_DEVBUF,
1118 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1119 	}
1120 }
1121 
1122 /*
1123  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1124  */
1125 int
1126 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1127 {
1128 	struct ifnet *ifp;
1129 	int connected = 0;
1130 
1131 	ifp = if_get(ifidx);
1132 	if (ifp == NULL)
1133 		return (0);
1134 
1135 	if (ifp0->if_index == ifp->if_index)
1136 		connected = 1;
1137 
1138 #if NBRIDGE > 0
1139 	if (SAME_BRIDGE(ifp0->if_bridgeport, ifp->if_bridgeport))
1140 		connected = 1;
1141 #endif
1142 #if NCARP > 0
1143 	if ((ifp0->if_type == IFT_CARP && ifp0->if_carpdev == ifp) ||
1144 	    (ifp->if_type == IFT_CARP && ifp->if_carpdev == ifp0))
1145 		connected = 1;
1146 #endif
1147 
1148 	if_put(ifp);
1149 	return (connected);
1150 }
1151 
1152 /*
1153  * Create a clone network interface.
1154  */
1155 int
1156 if_clone_create(const char *name, int rdomain)
1157 {
1158 	struct if_clone *ifc;
1159 	struct ifnet *ifp;
1160 	int unit, ret;
1161 
1162 	NET_ASSERT_LOCKED();
1163 
1164 	ifc = if_clone_lookup(name, &unit);
1165 	if (ifc == NULL)
1166 		return (EINVAL);
1167 
1168 	if (ifunit(name) != NULL)
1169 		return (EEXIST);
1170 
1171 	/* XXXSMP breaks atomicity */
1172 	NET_UNLOCK();
1173 	ret = (*ifc->ifc_create)(ifc, unit);
1174 	NET_LOCK();
1175 
1176 	if (ret != 0 || (ifp = ifunit(name)) == NULL)
1177 		return (ret);
1178 
1179 	if_addgroup(ifp, ifc->ifc_name);
1180 	if (rdomain != 0)
1181 		if_setrdomain(ifp, rdomain);
1182 
1183 	return (ret);
1184 }
1185 
1186 /*
1187  * Destroy a clone network interface.
1188  */
1189 int
1190 if_clone_destroy(const char *name)
1191 {
1192 	struct if_clone *ifc;
1193 	struct ifnet *ifp;
1194 	int ret;
1195 
1196 	NET_ASSERT_LOCKED();
1197 
1198 	ifc = if_clone_lookup(name, NULL);
1199 	if (ifc == NULL)
1200 		return (EINVAL);
1201 
1202 	ifp = ifunit(name);
1203 	if (ifp == NULL)
1204 		return (ENXIO);
1205 
1206 	if (ifc->ifc_destroy == NULL)
1207 		return (EOPNOTSUPP);
1208 
1209 	if (ifp->if_flags & IFF_UP) {
1210 		int s;
1211 		s = splnet();
1212 		if_down(ifp);
1213 		splx(s);
1214 	}
1215 
1216 	/* XXXSMP breaks atomicity */
1217 	NET_UNLOCK();
1218 	ret = (*ifc->ifc_destroy)(ifp);
1219 	NET_LOCK();
1220 
1221 	return (ret);
1222 }
1223 
1224 /*
1225  * Look up a network interface cloner.
1226  */
1227 struct if_clone *
1228 if_clone_lookup(const char *name, int *unitp)
1229 {
1230 	struct if_clone *ifc;
1231 	const char *cp;
1232 	int unit;
1233 
1234 	/* separate interface name from unit */
1235 	for (cp = name;
1236 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1237 	    cp++)
1238 		continue;
1239 
1240 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1241 		return (NULL);	/* No name or unit number */
1242 
1243 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1244 		return (NULL);	/* unit number 0 padded */
1245 
1246 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1247 		if (strlen(ifc->ifc_name) == cp - name &&
1248 		    !strncmp(name, ifc->ifc_name, cp - name))
1249 			break;
1250 	}
1251 
1252 	if (ifc == NULL)
1253 		return (NULL);
1254 
1255 	unit = 0;
1256 	while (cp - name < IFNAMSIZ && *cp) {
1257 		if (*cp < '0' || *cp > '9' ||
1258 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1259 			/* Bogus unit number. */
1260 			return (NULL);
1261 		}
1262 		unit = (unit * 10) + (*cp++ - '0');
1263 	}
1264 
1265 	if (unitp != NULL)
1266 		*unitp = unit;
1267 	return (ifc);
1268 }
1269 
1270 /*
1271  * Register a network interface cloner.
1272  */
1273 void
1274 if_clone_attach(struct if_clone *ifc)
1275 {
1276 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1277 	if_cloners_count++;
1278 }
1279 
1280 /*
1281  * Unregister a network interface cloner.
1282  */
1283 void
1284 if_clone_detach(struct if_clone *ifc)
1285 {
1286 
1287 	LIST_REMOVE(ifc, ifc_list);
1288 	if_cloners_count--;
1289 }
1290 
1291 /*
1292  * Provide list of interface cloners to userspace.
1293  */
1294 int
1295 if_clone_list(struct if_clonereq *ifcr)
1296 {
1297 	char outbuf[IFNAMSIZ], *dst;
1298 	struct if_clone *ifc;
1299 	int count, error = 0;
1300 
1301 	ifcr->ifcr_total = if_cloners_count;
1302 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1303 		/* Just asking how many there are. */
1304 		return (0);
1305 	}
1306 
1307 	if (ifcr->ifcr_count < 0)
1308 		return (EINVAL);
1309 
1310 	count = (if_cloners_count < ifcr->ifcr_count) ?
1311 	    if_cloners_count : ifcr->ifcr_count;
1312 
1313 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1314 		if (count == 0)
1315 			break;
1316 		bzero(outbuf, sizeof outbuf);
1317 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1318 		error = copyout(outbuf, dst, IFNAMSIZ);
1319 		if (error)
1320 			break;
1321 		count--;
1322 		dst += IFNAMSIZ;
1323 	}
1324 
1325 	return (error);
1326 }
1327 
1328 /*
1329  * set queue congestion marker
1330  */
1331 void
1332 if_congestion(void)
1333 {
1334 	extern int ticks;
1335 
1336 	ifq_congestion = ticks;
1337 }
1338 
1339 int
1340 if_congested(void)
1341 {
1342 	extern int ticks;
1343 	int diff;
1344 
1345 	diff = ticks - ifq_congestion;
1346 	if (diff < 0) {
1347 		ifq_congestion = ticks - hz;
1348 		return (0);
1349 	}
1350 
1351 	return (diff <= (hz / 100));
1352 }
1353 
1354 #define	equal(a1, a2)	\
1355 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1356 	(a1)->sa_len) == 0)
1357 
1358 /*
1359  * Locate an interface based on a complete address.
1360  */
1361 struct ifaddr *
1362 ifa_ifwithaddr(struct sockaddr *addr, u_int rtableid)
1363 {
1364 	struct ifnet *ifp;
1365 	struct ifaddr *ifa;
1366 	u_int rdomain;
1367 
1368 	KERNEL_ASSERT_LOCKED();
1369 	rdomain = rtable_l2(rtableid);
1370 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1371 		if (ifp->if_rdomain != rdomain)
1372 			continue;
1373 
1374 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1375 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1376 				continue;
1377 
1378 			if (equal(addr, ifa->ifa_addr))
1379 				return (ifa);
1380 		}
1381 	}
1382 	return (NULL);
1383 }
1384 
1385 /*
1386  * Locate the point to point interface with a given destination address.
1387  */
1388 struct ifaddr *
1389 ifa_ifwithdstaddr(struct sockaddr *addr, u_int rdomain)
1390 {
1391 	struct ifnet *ifp;
1392 	struct ifaddr *ifa;
1393 
1394 	KERNEL_ASSERT_LOCKED();
1395 	rdomain = rtable_l2(rdomain);
1396 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1397 		if (ifp->if_rdomain != rdomain)
1398 			continue;
1399 		if (ifp->if_flags & IFF_POINTOPOINT) {
1400 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1401 				if (ifa->ifa_addr->sa_family !=
1402 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1403 					continue;
1404 				if (equal(addr, ifa->ifa_dstaddr))
1405 					return (ifa);
1406 			}
1407 		}
1408 	}
1409 	return (NULL);
1410 }
1411 
1412 /*
1413  * Find an interface address specific to an interface best matching
1414  * a given address.
1415  */
1416 struct ifaddr *
1417 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1418 {
1419 	struct ifaddr *ifa;
1420 	char *cp, *cp2, *cp3;
1421 	char *cplim;
1422 	struct ifaddr *ifa_maybe = NULL;
1423 	u_int af = addr->sa_family;
1424 
1425 	if (af >= AF_MAX)
1426 		return (NULL);
1427 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1428 		if (ifa->ifa_addr->sa_family != af)
1429 			continue;
1430 		if (ifa_maybe == NULL)
1431 			ifa_maybe = ifa;
1432 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1433 			if (equal(addr, ifa->ifa_addr) ||
1434 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1435 				return (ifa);
1436 			continue;
1437 		}
1438 		cp = addr->sa_data;
1439 		cp2 = ifa->ifa_addr->sa_data;
1440 		cp3 = ifa->ifa_netmask->sa_data;
1441 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1442 		for (; cp3 < cplim; cp3++)
1443 			if ((*cp++ ^ *cp2++) & *cp3)
1444 				break;
1445 		if (cp3 == cplim)
1446 			return (ifa);
1447 	}
1448 	return (ifa_maybe);
1449 }
1450 
1451 void
1452 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1453 {
1454 }
1455 
1456 /*
1457  * Default action when installing a local route on a point-to-point
1458  * interface.
1459  */
1460 void
1461 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1462 {
1463 	struct ifnet *lo0ifp;
1464 	struct ifaddr *ifa, *lo0ifa;
1465 
1466 	switch (req) {
1467 	case RTM_ADD:
1468 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1469 			break;
1470 
1471 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1472 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1473 			    rt_key(rt)->sa_len) == 0)
1474 				break;
1475 		}
1476 
1477 		if (ifa == NULL)
1478 			break;
1479 
1480 		KASSERT(ifa == rt->rt_ifa);
1481 
1482 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1483 		KASSERT(lo0ifp != NULL);
1484 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1485 			if (lo0ifa->ifa_addr->sa_family ==
1486 			    ifa->ifa_addr->sa_family)
1487 				break;
1488 		}
1489 		if_put(lo0ifp);
1490 
1491 		if (lo0ifa == NULL)
1492 			break;
1493 
1494 		rt->rt_flags &= ~RTF_LLINFO;
1495 		break;
1496 	case RTM_DELETE:
1497 	case RTM_RESOLVE:
1498 	default:
1499 		break;
1500 	}
1501 }
1502 
1503 
1504 /*
1505  * Bring down all interfaces
1506  */
1507 void
1508 if_downall(void)
1509 {
1510 	struct ifreq ifrq;	/* XXX only partly built */
1511 	struct ifnet *ifp;
1512 
1513 	NET_LOCK();
1514 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1515 		if ((ifp->if_flags & IFF_UP) == 0)
1516 			continue;
1517 		if_down(ifp);
1518 		ifrq.ifr_flags = ifp->if_flags;
1519 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1520 	}
1521 	NET_UNLOCK();
1522 }
1523 
1524 /*
1525  * Mark an interface down and notify protocols of
1526  * the transition.
1527  */
1528 void
1529 if_down(struct ifnet *ifp)
1530 {
1531 	NET_ASSERT_LOCKED();
1532 
1533 	ifp->if_flags &= ~IFF_UP;
1534 	getmicrotime(&ifp->if_lastchange);
1535 	IFQ_PURGE(&ifp->if_snd);
1536 
1537 	if_linkstate(ifp);
1538 }
1539 
1540 /*
1541  * Mark an interface up and notify protocols of
1542  * the transition.
1543  */
1544 void
1545 if_up(struct ifnet *ifp)
1546 {
1547 	NET_ASSERT_LOCKED();
1548 
1549 	ifp->if_flags |= IFF_UP;
1550 	getmicrotime(&ifp->if_lastchange);
1551 
1552 #ifdef INET6
1553 	/* Userland expects the kernel to set ::1 on default lo(4). */
1554 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1555 		in6_ifattach(ifp);
1556 #endif
1557 
1558 	if_linkstate(ifp);
1559 }
1560 
1561 /*
1562  * Notify userland, the routing table and hooks owner of
1563  * a link-state transition.
1564  */
1565 void
1566 if_linkstate_task(void *xifidx)
1567 {
1568 	unsigned int ifidx = (unsigned long)xifidx;
1569 	struct ifnet *ifp;
1570 
1571 	KERNEL_LOCK();
1572 	NET_LOCK();
1573 
1574 	ifp = if_get(ifidx);
1575 	if (ifp != NULL)
1576 		if_linkstate(ifp);
1577 	if_put(ifp);
1578 
1579 	NET_UNLOCK();
1580 	KERNEL_UNLOCK();
1581 }
1582 
1583 void
1584 if_linkstate(struct ifnet *ifp)
1585 {
1586 	NET_ASSERT_LOCKED();
1587 
1588 	rtm_ifchg(ifp);
1589 	rt_if_track(ifp);
1590 	dohooks(ifp->if_linkstatehooks, 0);
1591 }
1592 
1593 /*
1594  * Schedule a link state change task.
1595  */
1596 void
1597 if_link_state_change(struct ifnet *ifp)
1598 {
1599 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1600 }
1601 
1602 /*
1603  * Handle interface watchdog timer routine.  Called
1604  * from softclock, we decrement timer (if set) and
1605  * call the appropriate interface routine on expiration.
1606  */
1607 void
1608 if_slowtimo(void *arg)
1609 {
1610 	struct ifnet *ifp = arg;
1611 	int s = splnet();
1612 
1613 	if (ifp->if_watchdog) {
1614 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1615 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1616 		timeout_add(&ifp->if_slowtimo, hz / IFNET_SLOWHZ);
1617 	}
1618 	splx(s);
1619 }
1620 
1621 void
1622 if_watchdog_task(void *xifidx)
1623 {
1624 	unsigned int ifidx = (unsigned long)xifidx;
1625 	struct ifnet *ifp;
1626 	int s;
1627 
1628 	ifp = if_get(ifidx);
1629 	if (ifp == NULL)
1630 		return;
1631 
1632 	KERNEL_LOCK();
1633 	s = splnet();
1634 	if (ifp->if_watchdog)
1635 		(*ifp->if_watchdog)(ifp);
1636 	splx(s);
1637 	KERNEL_UNLOCK();
1638 
1639 	if_put(ifp);
1640 }
1641 
1642 /*
1643  * Map interface name to interface structure pointer.
1644  */
1645 struct ifnet *
1646 ifunit(const char *name)
1647 {
1648 	struct ifnet *ifp;
1649 
1650 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1651 		if (strcmp(ifp->if_xname, name) == 0)
1652 			return (ifp);
1653 	}
1654 	return (NULL);
1655 }
1656 
1657 /*
1658  * Map interface index to interface structure pointer.
1659  */
1660 struct ifnet *
1661 if_get(unsigned int index)
1662 {
1663 	struct srp_ref sr;
1664 	struct if_map *if_map;
1665 	struct srp *map;
1666 	struct ifnet *ifp = NULL;
1667 
1668 	if_map = srp_enter(&sr, &if_idxmap.map);
1669 	if (index < if_map->limit) {
1670 		map = (struct srp *)(if_map + 1);
1671 
1672 		ifp = srp_follow(&sr, &map[index]);
1673 		if (ifp != NULL) {
1674 			KASSERT(ifp->if_index == index);
1675 			if_ref(ifp);
1676 		}
1677 	}
1678 	srp_leave(&sr);
1679 
1680 	return (ifp);
1681 }
1682 
1683 struct ifnet *
1684 if_ref(struct ifnet *ifp)
1685 {
1686 	refcnt_take(&ifp->if_refcnt);
1687 
1688 	return (ifp);
1689 }
1690 
1691 void
1692 if_put(struct ifnet *ifp)
1693 {
1694 	if (ifp == NULL)
1695 		return;
1696 
1697 	refcnt_rele_wake(&ifp->if_refcnt);
1698 }
1699 
1700 int
1701 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1702 {
1703 	if (ifp->if_sadl == NULL)
1704 		return (EINVAL);
1705 
1706 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1707 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1708 
1709 	return (0);
1710 }
1711 
1712 int
1713 if_setrdomain(struct ifnet *ifp, int rdomain)
1714 {
1715 	struct ifreq ifr;
1716 	int error, up = 0, s;
1717 
1718 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1719 		return (EINVAL);
1720 
1721 	/*
1722 	 * Create the routing table if it does not exist, including its
1723 	 * loopback interface with unit == rdomain.
1724 	 */
1725 	if (!rtable_exists(rdomain)) {
1726 		struct ifnet *loifp;
1727 		char loifname[IFNAMSIZ];
1728 		unsigned int unit = rdomain;
1729 
1730 		snprintf(loifname, sizeof(loifname), "lo%u", unit);
1731 		error = if_clone_create(loifname, 0);
1732 
1733 		if ((loifp = ifunit(loifname)) == NULL)
1734 			return (ENXIO);
1735 
1736 		/* Do not error out if creating the default lo(4) interface */
1737 		if (error && (ifp != loifp || error != EEXIST))
1738 			return (error);
1739 
1740 		if ((error = rtable_add(rdomain)) == 0)
1741 			rtable_l2set(rdomain, rdomain, loifp->if_index);
1742 		if (error) {
1743 			if_clone_destroy(loifname);
1744 			return (error);
1745 		}
1746 
1747 		loifp->if_rdomain = rdomain;
1748 	}
1749 
1750 	/* make sure that the routing table is a real rdomain */
1751 	if (rdomain != rtable_l2(rdomain))
1752 		return (EINVAL);
1753 
1754 	/* remove all routing entries when switching domains */
1755 	/* XXX this is a bit ugly */
1756 	if (rdomain != ifp->if_rdomain) {
1757 		s = splnet();
1758 		/*
1759 		 * We are tearing down the world.
1760 		 * Take down the IF so:
1761 		 * 1. everything that cares gets a message
1762 		 * 2. the automagic IPv6 bits are recreated
1763 		 */
1764 		if (ifp->if_flags & IFF_UP) {
1765 			up = 1;
1766 			if_down(ifp);
1767 		}
1768 		rti_delete(ifp);
1769 #ifdef MROUTING
1770 		vif_delete(ifp);
1771 #endif
1772 		in_ifdetach(ifp);
1773 #ifdef INET6
1774 		in6_ifdetach(ifp);
1775 #endif
1776 		splx(s);
1777 	}
1778 
1779 	/* Let devices like enc(4) or mpe(4) know about the change */
1780 	ifr.ifr_rdomainid = rdomain;
1781 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
1782 	    (caddr_t)&ifr)) != ENOTTY)
1783 		return (error);
1784 	error = 0;
1785 
1786 	/* Add interface to the specified rdomain */
1787 	ifp->if_rdomain = rdomain;
1788 
1789 	/* If we took down the IF, bring it back */
1790 	if (up) {
1791 		s = splnet();
1792 		if_up(ifp);
1793 		splx(s);
1794 	}
1795 
1796 	return (0);
1797 }
1798 
1799 /*
1800  * Interface ioctls.
1801  */
1802 int
1803 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1804 {
1805 	struct ifnet *ifp;
1806 	struct ifreq *ifr = (struct ifreq *)data;
1807 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
1808 	struct if_afreq *ifar = (struct if_afreq *)data;
1809 	char ifdescrbuf[IFDESCRSIZE];
1810 	char ifrtlabelbuf[RTLABEL_LEN];
1811 	int s, error = 0, oif_xflags;
1812 	size_t bytesdone;
1813 	unsigned short oif_flags;
1814 
1815 	switch (cmd) {
1816 	case SIOCIFCREATE:
1817 		if ((error = suser(p)) != 0)
1818 			return (error);
1819 		NET_LOCK();
1820 		error = if_clone_create(ifr->ifr_name, 0);
1821 		NET_UNLOCK();
1822 		return (error);
1823 	case SIOCIFDESTROY:
1824 		if ((error = suser(p)) != 0)
1825 			return (error);
1826 		NET_LOCK();
1827 		error = if_clone_destroy(ifr->ifr_name);
1828 		NET_UNLOCK();
1829 		return (error);
1830 	case SIOCSIFGATTR:
1831 		if ((error = suser(p)) != 0)
1832 			return (error);
1833 		NET_LOCK();
1834 		error = if_setgroupattribs(data);
1835 		NET_UNLOCK();
1836 		return (error);
1837 	case SIOCGIFCONF:
1838 	case SIOCIFGCLONERS:
1839 	case SIOCGIFGMEMB:
1840 	case SIOCGIFGATTR:
1841 	case SIOCGIFFLAGS:
1842 	case SIOCGIFXFLAGS:
1843 	case SIOCGIFMETRIC:
1844 	case SIOCGIFMTU:
1845 	case SIOCGIFHARDMTU:
1846 	case SIOCGIFDATA:
1847 	case SIOCGIFDESCR:
1848 	case SIOCGIFRTLABEL:
1849 	case SIOCGIFPRIORITY:
1850 	case SIOCGIFRDOMAIN:
1851 	case SIOCGIFGROUP:
1852 	case SIOCGIFLLPRIO:
1853 		return (ifioctl_get(cmd, data));
1854 	}
1855 
1856 	ifp = ifunit(ifr->ifr_name);
1857 	if (ifp == NULL)
1858 		return (ENXIO);
1859 	oif_flags = ifp->if_flags;
1860 	oif_xflags = ifp->if_xflags;
1861 
1862 	switch (cmd) {
1863 	case SIOCIFAFATTACH:
1864 	case SIOCIFAFDETACH:
1865 		if ((error = suser(p)) != 0)
1866 			break;
1867 		NET_LOCK();
1868 		switch (ifar->ifar_af) {
1869 		case AF_INET:
1870 			/* attach is a noop for AF_INET */
1871 			if (cmd == SIOCIFAFDETACH)
1872 				in_ifdetach(ifp);
1873 			break;
1874 #ifdef INET6
1875 		case AF_INET6:
1876 			if (cmd == SIOCIFAFATTACH)
1877 				error = in6_ifattach(ifp);
1878 			else
1879 				in6_ifdetach(ifp);
1880 			break;
1881 #endif /* INET6 */
1882 		default:
1883 			error = EAFNOSUPPORT;
1884 		}
1885 		NET_UNLOCK();
1886 		break;
1887 
1888 	case SIOCSIFFLAGS:
1889 		if ((error = suser(p)) != 0)
1890 			break;
1891 
1892 		NET_LOCK();
1893 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1894 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
1895 
1896 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1897 		if (error != 0) {
1898 			ifp->if_flags = oif_flags;
1899 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
1900 			s = splnet();
1901 			if (ISSET(ifp->if_flags, IFF_UP))
1902 				if_up(ifp);
1903 			else
1904 				if_down(ifp);
1905 			splx(s);
1906 		}
1907 		NET_UNLOCK();
1908 		break;
1909 
1910 	case SIOCSIFXFLAGS:
1911 		if ((error = suser(p)) != 0)
1912 			break;
1913 
1914 		NET_LOCK();
1915 #ifdef INET6
1916 		if (ISSET(ifr->ifr_flags, IFXF_AUTOCONF6)) {
1917 			error = in6_ifattach(ifp);
1918 			if (error != 0) {
1919 				NET_UNLOCK();
1920 				break;
1921 			}
1922 		}
1923 
1924 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1925 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII)) {
1926 			ifp->if_xflags |= IFXF_INET6_NOSOII;
1927 			in6_soiiupdate(ifp);
1928 		}
1929 
1930 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1931 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII)) {
1932 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
1933 			in6_soiiupdate(ifp);
1934 		}
1935 
1936 #endif	/* INET6 */
1937 
1938 #ifdef MPLS
1939 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
1940 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
1941 			s = splnet();
1942 			ifp->if_xflags |= IFXF_MPLS;
1943 			ifp->if_ll_output = ifp->if_output;
1944 			ifp->if_output = mpls_output;
1945 			splx(s);
1946 		}
1947 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
1948 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
1949 			s = splnet();
1950 			ifp->if_xflags &= ~IFXF_MPLS;
1951 			ifp->if_output = ifp->if_ll_output;
1952 			ifp->if_ll_output = NULL;
1953 			splx(s);
1954 		}
1955 #endif	/* MPLS */
1956 
1957 #ifndef SMALL_KERNEL
1958 		if (ifp->if_capabilities & IFCAP_WOL) {
1959 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
1960 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
1961 				s = splnet();
1962 				ifp->if_xflags |= IFXF_WOL;
1963 				error = ifp->if_wol(ifp, 1);
1964 				splx(s);
1965 			}
1966 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
1967 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
1968 				s = splnet();
1969 				ifp->if_xflags &= ~IFXF_WOL;
1970 				error = ifp->if_wol(ifp, 0);
1971 				splx(s);
1972 			}
1973 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
1974 			ifr->ifr_flags &= ~IFXF_WOL;
1975 			error = ENOTSUP;
1976 		}
1977 #endif
1978 
1979 		if (error == 0)
1980 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
1981 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
1982 		NET_UNLOCK();
1983 		break;
1984 
1985 	case SIOCSIFMETRIC:
1986 		if ((error = suser(p)) != 0)
1987 			break;
1988 		NET_LOCK();
1989 		ifp->if_metric = ifr->ifr_metric;
1990 		NET_UNLOCK();
1991 		break;
1992 
1993 	case SIOCSIFMTU:
1994 		if ((error = suser(p)) != 0)
1995 			break;
1996 		NET_LOCK();
1997 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1998 		NET_UNLOCK();
1999 		if (!error)
2000 			rtm_ifchg(ifp);
2001 		break;
2002 
2003 	case SIOCSIFDESCR:
2004 		if ((error = suser(p)) != 0)
2005 			break;
2006 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2007 		    IFDESCRSIZE, &bytesdone);
2008 		if (error == 0) {
2009 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2010 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2011 		}
2012 		break;
2013 
2014 	case SIOCSIFRTLABEL:
2015 		if ((error = suser(p)) != 0)
2016 			break;
2017 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2018 		    RTLABEL_LEN, &bytesdone);
2019 		if (error == 0) {
2020 			rtlabel_unref(ifp->if_rtlabelid);
2021 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2022 		}
2023 		break;
2024 
2025 	case SIOCSIFPRIORITY:
2026 		if ((error = suser(p)) != 0)
2027 			break;
2028 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2029 			error = EINVAL;
2030 			break;
2031 		}
2032 		ifp->if_priority = ifr->ifr_metric;
2033 		break;
2034 
2035 	case SIOCSIFRDOMAIN:
2036 		if ((error = suser(p)) != 0)
2037 			break;
2038 		NET_LOCK();
2039 		error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2040 		NET_UNLOCK();
2041 		break;
2042 
2043 	case SIOCAIFGROUP:
2044 		if ((error = suser(p)))
2045 			break;
2046 		NET_LOCK();
2047 		error = if_addgroup(ifp, ifgr->ifgr_group);
2048 		if (error == 0) {
2049 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2050 			if (error == ENOTTY)
2051 				error = 0;
2052 		}
2053 		NET_UNLOCK();
2054 		break;
2055 
2056 	case SIOCDIFGROUP:
2057 		if ((error = suser(p)))
2058 			break;
2059 		NET_LOCK();
2060 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2061 		if (error == ENOTTY)
2062 			error = 0;
2063 		if (error == 0)
2064 			error = if_delgroup(ifp, ifgr->ifgr_group);
2065 		NET_UNLOCK();
2066 		break;
2067 
2068 	case SIOCSIFLLADDR:
2069 		if ((error = suser(p)))
2070 			break;
2071 		if ((ifp->if_sadl == NULL) ||
2072 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2073 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2074 			error = EINVAL;
2075 			break;
2076 		}
2077 		NET_LOCK();
2078 		switch (ifp->if_type) {
2079 		case IFT_ETHER:
2080 		case IFT_CARP:
2081 		case IFT_XETHER:
2082 		case IFT_ISO88025:
2083 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2084 			if (error == ENOTTY)
2085 				error = 0;
2086 			if (error == 0)
2087 				error = if_setlladdr(ifp,
2088 				    ifr->ifr_addr.sa_data);
2089 			break;
2090 		default:
2091 			error = ENODEV;
2092 		}
2093 
2094 		if (error == 0)
2095 			ifnewlladdr(ifp);
2096 		NET_UNLOCK();
2097 		break;
2098 
2099 	case SIOCSIFLLPRIO:
2100 		if ((error = suser(p)))
2101 			break;
2102 		if (ifr->ifr_llprio > UCHAR_MAX) {
2103 			error = EINVAL;
2104 			break;
2105 		}
2106 		NET_LOCK();
2107 		ifp->if_llprio = ifr->ifr_llprio;
2108 		NET_UNLOCK();
2109 		break;
2110 
2111 	case SIOCSETKALIVE:
2112 	case SIOCDIFPHYADDR:
2113 	case SIOCSLIFPHYADDR:
2114 	case SIOCSLIFPHYRTABLE:
2115 	case SIOCSLIFPHYTTL:
2116 	case SIOCSLIFPHYDF:
2117 	case SIOCADDMULTI:
2118 	case SIOCDELMULTI:
2119 	case SIOCSIFMEDIA:
2120 	case SIOCSVNETID:
2121 	case SIOCSVNETFLOWID:
2122 	case SIOCSIFPAIR:
2123 	case SIOCSIFPARENT:
2124 	case SIOCDIFPARENT:
2125 		if ((error = suser(p)) != 0)
2126 			break;
2127 		/* FALLTHROUGH */
2128 	default:
2129 		NET_LOCK();
2130 		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
2131 			(struct mbuf *) cmd, (struct mbuf *) data,
2132 			(struct mbuf *) ifp, p));
2133 		if (error == EOPNOTSUPP)
2134 			error = ((*ifp->if_ioctl)(ifp, cmd, data));
2135 		NET_UNLOCK();
2136 		break;
2137 	}
2138 
2139 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags)
2140 		rtm_ifchg(ifp);
2141 
2142 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2143 		getmicrotime(&ifp->if_lastchange);
2144 
2145 	return (error);
2146 }
2147 
2148 int
2149 ifioctl_get(u_long cmd, caddr_t data)
2150 {
2151 	struct ifnet *ifp;
2152 	struct ifreq *ifr = (struct ifreq *)data;
2153 	char ifdescrbuf[IFDESCRSIZE];
2154 	char ifrtlabelbuf[RTLABEL_LEN];
2155 	int error = 0;
2156 	size_t bytesdone;
2157 	const char *label;
2158 
2159 	switch(cmd) {
2160 	case SIOCGIFCONF:
2161 		NET_RLOCK();
2162 		error = ifconf(data);
2163 		NET_RUNLOCK();
2164 		return (error);
2165 	case SIOCIFGCLONERS:
2166 		NET_RLOCK();
2167 		error = if_clone_list((struct if_clonereq *)data);
2168 		NET_RUNLOCK();
2169 		return (error);
2170 	case SIOCGIFGMEMB:
2171 		NET_RLOCK();
2172 		error = if_getgroupmembers(data);
2173 		NET_RUNLOCK();
2174 		return (error);
2175 	case SIOCGIFGATTR:
2176 		NET_RLOCK();
2177 		error = if_getgroupattribs(data);
2178 		NET_RUNLOCK();
2179 		return (error);
2180 	}
2181 
2182 	ifp = ifunit(ifr->ifr_name);
2183 	if (ifp == NULL)
2184 		return (ENXIO);
2185 
2186 	NET_RLOCK();
2187 
2188 	switch(cmd) {
2189 	case SIOCGIFFLAGS:
2190 		ifr->ifr_flags = ifp->if_flags;
2191 		if (ifq_is_oactive(&ifp->if_snd))
2192 			ifr->ifr_flags |= IFF_OACTIVE;
2193 		break;
2194 
2195 	case SIOCGIFXFLAGS:
2196 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2197 		break;
2198 
2199 	case SIOCGIFMETRIC:
2200 		ifr->ifr_metric = ifp->if_metric;
2201 		break;
2202 
2203 	case SIOCGIFMTU:
2204 		ifr->ifr_mtu = ifp->if_mtu;
2205 		break;
2206 
2207 	case SIOCGIFHARDMTU:
2208 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2209 		break;
2210 
2211 	case SIOCGIFDATA: {
2212 		struct if_data ifdata;
2213 		if_getdata(ifp, &ifdata);
2214 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2215 		break;
2216 	}
2217 
2218 	case SIOCGIFDESCR:
2219 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2220 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2221 		    &bytesdone);
2222 		break;
2223 
2224 	case SIOCGIFRTLABEL:
2225 		if (ifp->if_rtlabelid &&
2226 		    (label = rtlabel_id2name(ifp->if_rtlabelid)) != NULL) {
2227 			strlcpy(ifrtlabelbuf, label, RTLABEL_LEN);
2228 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2229 			    RTLABEL_LEN, &bytesdone);
2230 		} else
2231 			error = ENOENT;
2232 		break;
2233 
2234 	case SIOCGIFPRIORITY:
2235 		ifr->ifr_metric = ifp->if_priority;
2236 		break;
2237 
2238 	case SIOCGIFRDOMAIN:
2239 		ifr->ifr_rdomainid = ifp->if_rdomain;
2240 		break;
2241 
2242 	case SIOCGIFGROUP:
2243 		error = if_getgroup(data, ifp);
2244 		break;
2245 
2246 	case SIOCGIFLLPRIO:
2247 		ifr->ifr_llprio = ifp->if_llprio;
2248 		break;
2249 
2250 	default:
2251 		panic("invalid ioctl %lu", cmd);
2252 	}
2253 
2254 	NET_RUNLOCK();
2255 
2256 	return (error);
2257 }
2258 
2259 /*
2260  * Return interface configuration
2261  * of system.  List may be used
2262  * in later ioctl's (above) to get
2263  * other information.
2264  */
2265 int
2266 ifconf(caddr_t data)
2267 {
2268 	struct ifconf *ifc = (struct ifconf *)data;
2269 	struct ifnet *ifp;
2270 	struct ifaddr *ifa;
2271 	struct ifreq ifr, *ifrp;
2272 	int space = ifc->ifc_len, error = 0;
2273 
2274 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2275 	if (space == 0) {
2276 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
2277 			struct sockaddr *sa;
2278 
2279 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2280 				space += sizeof (ifr);
2281 			else
2282 				TAILQ_FOREACH(ifa,
2283 				    &ifp->if_addrlist, ifa_list) {
2284 					sa = ifa->ifa_addr;
2285 					if (sa->sa_len > sizeof(*sa))
2286 						space += sa->sa_len -
2287 						    sizeof(*sa);
2288 					space += sizeof(ifr);
2289 				}
2290 		}
2291 		ifc->ifc_len = space;
2292 		return (0);
2293 	}
2294 
2295 	ifrp = ifc->ifc_req;
2296 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2297 		if (space < sizeof(ifr))
2298 			break;
2299 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2300 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2301 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2302 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2303 			    sizeof(ifr));
2304 			if (error)
2305 				break;
2306 			space -= sizeof (ifr), ifrp++;
2307 		} else
2308 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2309 				struct sockaddr *sa = ifa->ifa_addr;
2310 
2311 				if (space < sizeof(ifr))
2312 					break;
2313 				if (sa->sa_len <= sizeof(*sa)) {
2314 					ifr.ifr_addr = *sa;
2315 					error = copyout((caddr_t)&ifr,
2316 					    (caddr_t)ifrp, sizeof (ifr));
2317 					ifrp++;
2318 				} else {
2319 					space -= sa->sa_len - sizeof(*sa);
2320 					if (space < sizeof (ifr))
2321 						break;
2322 					error = copyout((caddr_t)&ifr,
2323 					    (caddr_t)ifrp,
2324 					    sizeof(ifr.ifr_name));
2325 					if (error == 0)
2326 						error = copyout((caddr_t)sa,
2327 						    (caddr_t)&ifrp->ifr_addr,
2328 						    sa->sa_len);
2329 					ifrp = (struct ifreq *)(sa->sa_len +
2330 					    (caddr_t)&ifrp->ifr_addr);
2331 				}
2332 				if (error)
2333 					break;
2334 				space -= sizeof (ifr);
2335 			}
2336 	}
2337 	ifc->ifc_len -= space;
2338 	return (error);
2339 }
2340 
2341 void
2342 if_getdata(struct ifnet *ifp, struct if_data *data)
2343 {
2344 	unsigned int i;
2345 
2346 	*data = ifp->if_data;
2347 
2348 	for (i = 0; i < ifp->if_nifqs; i++) {
2349 		struct ifqueue *ifq = ifp->if_ifqs[i];
2350 
2351 		ifq_add_data(ifq, data);
2352 	}
2353 
2354 	for (i = 0; i < ifp->if_niqs; i++) {
2355 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2356 
2357 		ifiq_add_data(ifiq, data);
2358 	}
2359 }
2360 
2361 /*
2362  * Dummy functions replaced in ifnet during detach (if protocols decide to
2363  * fiddle with the if during detach.
2364  */
2365 void
2366 if_detached_qstart(struct ifqueue *ifq)
2367 {
2368 	ifq_purge(ifq);
2369 }
2370 
2371 int
2372 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2373 {
2374 	return ENODEV;
2375 }
2376 
2377 /*
2378  * Create interface group without members
2379  */
2380 struct ifg_group *
2381 if_creategroup(const char *groupname)
2382 {
2383 	struct ifg_group	*ifg;
2384 
2385 	if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
2386 		return (NULL);
2387 
2388 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2389 	ifg->ifg_refcnt = 0;
2390 	ifg->ifg_carp_demoted = 0;
2391 	TAILQ_INIT(&ifg->ifg_members);
2392 #if NPF > 0
2393 	pfi_attach_ifgroup(ifg);
2394 #endif
2395 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2396 
2397 	return (ifg);
2398 }
2399 
2400 /*
2401  * Add a group to an interface
2402  */
2403 int
2404 if_addgroup(struct ifnet *ifp, const char *groupname)
2405 {
2406 	struct ifg_list		*ifgl;
2407 	struct ifg_group	*ifg = NULL;
2408 	struct ifg_member	*ifgm;
2409 
2410 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
2411 	    groupname[strlen(groupname) - 1] <= '9')
2412 		return (EINVAL);
2413 
2414 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2415 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2416 			return (EEXIST);
2417 
2418 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
2419 		return (ENOMEM);
2420 
2421 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
2422 		free(ifgl, M_TEMP, sizeof(*ifgl));
2423 		return (ENOMEM);
2424 	}
2425 
2426 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2427 		if (!strcmp(ifg->ifg_group, groupname))
2428 			break;
2429 
2430 	if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
2431 		free(ifgl, M_TEMP, sizeof(*ifgl));
2432 		free(ifgm, M_TEMP, sizeof(*ifgm));
2433 		return (ENOMEM);
2434 	}
2435 
2436 	ifg->ifg_refcnt++;
2437 	ifgl->ifgl_group = ifg;
2438 	ifgm->ifgm_ifp = ifp;
2439 
2440 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2441 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2442 
2443 #if NPF > 0
2444 	pfi_group_change(groupname);
2445 #endif
2446 
2447 	return (0);
2448 }
2449 
2450 /*
2451  * Remove a group from an interface
2452  */
2453 int
2454 if_delgroup(struct ifnet *ifp, const char *groupname)
2455 {
2456 	struct ifg_list		*ifgl;
2457 	struct ifg_member	*ifgm;
2458 
2459 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2460 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2461 			break;
2462 	if (ifgl == NULL)
2463 		return (ENOENT);
2464 
2465 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2466 
2467 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2468 		if (ifgm->ifgm_ifp == ifp)
2469 			break;
2470 
2471 	if (ifgm != NULL) {
2472 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2473 		free(ifgm, M_TEMP, sizeof(*ifgm));
2474 	}
2475 
2476 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2477 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2478 #if NPF > 0
2479 		pfi_detach_ifgroup(ifgl->ifgl_group);
2480 #endif
2481 		free(ifgl->ifgl_group, M_TEMP, 0);
2482 	}
2483 
2484 	free(ifgl, M_TEMP, sizeof(*ifgl));
2485 
2486 #if NPF > 0
2487 	pfi_group_change(groupname);
2488 #endif
2489 
2490 	return (0);
2491 }
2492 
2493 /*
2494  * Stores all groups from an interface in memory pointed
2495  * to by data
2496  */
2497 int
2498 if_getgroup(caddr_t data, struct ifnet *ifp)
2499 {
2500 	int			 len, error;
2501 	struct ifg_list		*ifgl;
2502 	struct ifg_req		 ifgrq, *ifgp;
2503 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2504 
2505 	if (ifgr->ifgr_len == 0) {
2506 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2507 			ifgr->ifgr_len += sizeof(struct ifg_req);
2508 		return (0);
2509 	}
2510 
2511 	len = ifgr->ifgr_len;
2512 	ifgp = ifgr->ifgr_groups;
2513 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2514 		if (len < sizeof(ifgrq))
2515 			return (EINVAL);
2516 		bzero(&ifgrq, sizeof ifgrq);
2517 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2518 		    sizeof(ifgrq.ifgrq_group));
2519 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2520 		    sizeof(struct ifg_req))))
2521 			return (error);
2522 		len -= sizeof(ifgrq);
2523 		ifgp++;
2524 	}
2525 
2526 	return (0);
2527 }
2528 
2529 /*
2530  * Stores all members of a group in memory pointed to by data
2531  */
2532 int
2533 if_getgroupmembers(caddr_t data)
2534 {
2535 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2536 	struct ifg_group	*ifg;
2537 	struct ifg_member	*ifgm;
2538 	struct ifg_req		 ifgrq, *ifgp;
2539 	int			 len, error;
2540 
2541 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2542 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2543 			break;
2544 	if (ifg == NULL)
2545 		return (ENOENT);
2546 
2547 	if (ifgr->ifgr_len == 0) {
2548 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2549 			ifgr->ifgr_len += sizeof(ifgrq);
2550 		return (0);
2551 	}
2552 
2553 	len = ifgr->ifgr_len;
2554 	ifgp = ifgr->ifgr_groups;
2555 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
2556 		if (len < sizeof(ifgrq))
2557 			return (EINVAL);
2558 		bzero(&ifgrq, sizeof ifgrq);
2559 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
2560 		    sizeof(ifgrq.ifgrq_member));
2561 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2562 		    sizeof(struct ifg_req))))
2563 			return (error);
2564 		len -= sizeof(ifgrq);
2565 		ifgp++;
2566 	}
2567 
2568 	return (0);
2569 }
2570 
2571 int
2572 if_getgroupattribs(caddr_t data)
2573 {
2574 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2575 	struct ifg_group	*ifg;
2576 
2577 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2578 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2579 			break;
2580 	if (ifg == NULL)
2581 		return (ENOENT);
2582 
2583 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
2584 
2585 	return (0);
2586 }
2587 
2588 int
2589 if_setgroupattribs(caddr_t data)
2590 {
2591 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2592 	struct ifg_group	*ifg;
2593 	struct ifg_member	*ifgm;
2594 	int			 demote;
2595 
2596 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2597 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2598 			break;
2599 	if (ifg == NULL)
2600 		return (ENOENT);
2601 
2602 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
2603 	if (demote + ifg->ifg_carp_demoted > 0xff ||
2604 	    demote + ifg->ifg_carp_demoted < 0)
2605 		return (EINVAL);
2606 
2607 	ifg->ifg_carp_demoted += demote;
2608 
2609 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2610 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
2611 
2612 	return (0);
2613 }
2614 
2615 void
2616 if_group_routechange(struct sockaddr *dst, struct sockaddr *mask)
2617 {
2618 	switch (dst->sa_family) {
2619 	case AF_INET:
2620 		if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
2621 		    mask && (mask->sa_len == 0 ||
2622 		    satosin(mask)->sin_addr.s_addr == INADDR_ANY))
2623 			if_group_egress_build();
2624 		break;
2625 #ifdef INET6
2626 	case AF_INET6:
2627 		if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
2628 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
2629 		    IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
2630 		    &in6addr_any)))
2631 			if_group_egress_build();
2632 		break;
2633 #endif
2634 	}
2635 }
2636 
2637 int
2638 if_group_egress_build(void)
2639 {
2640 	struct ifnet		*ifp;
2641 	struct ifg_group	*ifg;
2642 	struct ifg_member	*ifgm, *next;
2643 	struct sockaddr_in	 sa_in;
2644 #ifdef INET6
2645 	struct sockaddr_in6	 sa_in6;
2646 #endif
2647 	struct rtentry		*rt;
2648 
2649 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2650 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
2651 			break;
2652 
2653 	if (ifg != NULL)
2654 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
2655 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
2656 
2657 	bzero(&sa_in, sizeof(sa_in));
2658 	sa_in.sin_len = sizeof(sa_in);
2659 	sa_in.sin_family = AF_INET;
2660 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
2661 	while (rt != NULL) {
2662 		ifp = if_get(rt->rt_ifidx);
2663 		if (ifp != NULL) {
2664 			if_addgroup(ifp, IFG_EGRESS);
2665 			if_put(ifp);
2666 		}
2667 		rt = rtable_iterate(rt);
2668 	}
2669 
2670 #ifdef INET6
2671 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
2672 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
2673 	    RTP_ANY);
2674 	while (rt != NULL) {
2675 		ifp = if_get(rt->rt_ifidx);
2676 		if (ifp != NULL) {
2677 			if_addgroup(ifp, IFG_EGRESS);
2678 			if_put(ifp);
2679 		}
2680 		rt = rtable_iterate(rt);
2681 	}
2682 #endif /* INET6 */
2683 
2684 	return (0);
2685 }
2686 
2687 /*
2688  * Set/clear promiscuous mode on interface ifp based on the truth value
2689  * of pswitch.  The calls are reference counted so that only the first
2690  * "on" request actually has an effect, as does the final "off" request.
2691  * Results are undefined if the "off" and "on" requests are not matched.
2692  */
2693 int
2694 ifpromisc(struct ifnet *ifp, int pswitch)
2695 {
2696 	struct ifreq ifr;
2697 	unsigned short oif_flags;
2698 	int oif_pcount, error;
2699 
2700 	oif_flags = ifp->if_flags;
2701 	oif_pcount = ifp->if_pcount;
2702 	if (pswitch) {
2703 		if (ifp->if_pcount++ != 0)
2704 			return (0);
2705 		ifp->if_flags |= IFF_PROMISC;
2706 	} else {
2707 		if (--ifp->if_pcount > 0)
2708 			return (0);
2709 		ifp->if_flags &= ~IFF_PROMISC;
2710 	}
2711 
2712 	if ((ifp->if_flags & IFF_UP) == 0)
2713 		return (0);
2714 
2715 	memset(&ifr, 0, sizeof(ifr));
2716 	ifr.ifr_flags = ifp->if_flags;
2717 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
2718 	if (error) {
2719 		ifp->if_flags = oif_flags;
2720 		ifp->if_pcount = oif_pcount;
2721 	}
2722 
2723 	return (error);
2724 }
2725 
2726 void
2727 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
2728 {
2729 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
2730 }
2731 
2732 void
2733 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
2734 {
2735 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
2736 }
2737 
2738 void
2739 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
2740 {
2741 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
2742 		panic("ifa_update_broadaddr does not support dynamic length");
2743 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
2744 }
2745 
2746 #ifdef DDB
2747 /* debug function, can be called from ddb> */
2748 void
2749 ifa_print_all(void)
2750 {
2751 	struct ifnet *ifp;
2752 	struct ifaddr *ifa;
2753 
2754 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2755 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2756 			char addr[INET6_ADDRSTRLEN];
2757 
2758 			switch (ifa->ifa_addr->sa_family) {
2759 			case AF_INET:
2760 				printf("%s", inet_ntop(AF_INET,
2761 				    &satosin(ifa->ifa_addr)->sin_addr,
2762 				    addr, sizeof(addr)));
2763 				break;
2764 #ifdef INET6
2765 			case AF_INET6:
2766 				printf("%s", inet_ntop(AF_INET6,
2767 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
2768 				    addr, sizeof(addr)));
2769 				break;
2770 #endif
2771 			}
2772 			printf(" on %s\n", ifp->if_xname);
2773 		}
2774 	}
2775 }
2776 #endif /* DDB */
2777 
2778 void
2779 ifnewlladdr(struct ifnet *ifp)
2780 {
2781 #ifdef INET6
2782 	struct ifaddr *ifa;
2783 #endif
2784 	struct ifreq ifrq;
2785 	short up;
2786 	int s;
2787 
2788 	s = splnet();
2789 	up = ifp->if_flags & IFF_UP;
2790 
2791 	if (up) {
2792 		/* go down for a moment... */
2793 		ifp->if_flags &= ~IFF_UP;
2794 		ifrq.ifr_flags = ifp->if_flags;
2795 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
2796 	}
2797 
2798 	ifp->if_flags |= IFF_UP;
2799 	ifrq.ifr_flags = ifp->if_flags;
2800 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
2801 
2802 #ifdef INET6
2803 	/*
2804 	 * Update the link-local address.  Don't do it if we're
2805 	 * a router to avoid confusing hosts on the network.
2806 	 */
2807 	if (!ip6_forwarding) {
2808 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
2809 		if (ifa) {
2810 			in6_purgeaddr(ifa);
2811 			dohooks(ifp->if_addrhooks, 0);
2812 			in6_ifattach(ifp);
2813 		}
2814 	}
2815 #endif
2816 	if (!up) {
2817 		/* go back down */
2818 		ifp->if_flags &= ~IFF_UP;
2819 		ifrq.ifr_flags = ifp->if_flags;
2820 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
2821 	}
2822 	splx(s);
2823 }
2824 
2825 int net_ticks;
2826 u_int net_livelocks;
2827 
2828 void
2829 net_tick(void *null)
2830 {
2831 	extern int ticks;
2832 
2833 	if (ticks - net_ticks > 1)
2834 		net_livelocks++;
2835 
2836 	net_ticks = ticks;
2837 
2838 	timeout_add(&net_tick_to, 1);
2839 }
2840 
2841 int
2842 net_livelocked(void)
2843 {
2844 	extern int ticks;
2845 
2846 	return (ticks - net_ticks > 1);
2847 }
2848 
2849 void
2850 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
2851 {
2852 	extern int ticks;
2853 
2854 	memset(rxr, 0, sizeof(*rxr));
2855 
2856 	rxr->rxr_adjusted = ticks;
2857 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
2858 	rxr->rxr_hwm = hwm;
2859 }
2860 
2861 static inline void
2862 if_rxr_adjust_cwm(struct if_rxring *rxr)
2863 {
2864 	extern int ticks;
2865 
2866 	if (net_livelocked()) {
2867 		if (rxr->rxr_cwm > rxr->rxr_lwm)
2868 			rxr->rxr_cwm--;
2869 		else
2870 			return;
2871 	} else if (rxr->rxr_alive >= rxr->rxr_lwm)
2872 		return;
2873 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
2874 		rxr->rxr_cwm++;
2875 
2876 	rxr->rxr_adjusted = ticks;
2877 }
2878 
2879 void
2880 if_rxr_livelocked(struct if_rxring *rxr)
2881 {
2882 	extern int ticks;
2883 
2884 	if (ticks - rxr->rxr_adjusted >= 1) {
2885 		if (rxr->rxr_cwm > rxr->rxr_lwm)
2886 			rxr->rxr_cwm--;
2887 
2888 		rxr->rxr_adjusted = ticks;
2889 	}
2890 }
2891 
2892 u_int
2893 if_rxr_get(struct if_rxring *rxr, u_int max)
2894 {
2895 	extern int ticks;
2896 	u_int diff;
2897 
2898 	if (ticks - rxr->rxr_adjusted >= 1) {
2899 		/* we're free to try for an adjustment */
2900 		if_rxr_adjust_cwm(rxr);
2901 	}
2902 
2903 	if (rxr->rxr_alive >= rxr->rxr_cwm)
2904 		return (0);
2905 
2906 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
2907 	rxr->rxr_alive += diff;
2908 
2909 	return (diff);
2910 }
2911 
2912 int
2913 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
2914 {
2915 	struct if_rxrinfo kifri;
2916 	int error;
2917 	u_int n;
2918 
2919 	error = copyin(uifri, &kifri, sizeof(kifri));
2920 	if (error)
2921 		return (error);
2922 
2923 	n = min(t, kifri.ifri_total);
2924 	kifri.ifri_total = t;
2925 
2926 	if (n > 0) {
2927 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
2928 		if (error)
2929 			return (error);
2930 	}
2931 
2932 	return (copyout(&kifri, uifri, sizeof(kifri)));
2933 }
2934 
2935 int
2936 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
2937     struct if_rxring *rxr)
2938 {
2939 	struct if_rxring_info ifr;
2940 
2941 	memset(&ifr, 0, sizeof(ifr));
2942 
2943 	if (name != NULL)
2944 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
2945 
2946 	ifr.ifr_size = size;
2947 	ifr.ifr_info = *rxr;
2948 
2949 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
2950 }
2951 
2952 /*
2953  * Network stack input queues.
2954  */
2955 
2956 void
2957 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
2958 {
2959 	mq_init(&niq->ni_q, maxlen, IPL_NET);
2960 	niq->ni_isr = isr;
2961 }
2962 
2963 int
2964 niq_enqueue(struct niqueue *niq, struct mbuf *m)
2965 {
2966 	int rv;
2967 
2968 	rv = mq_enqueue(&niq->ni_q, m);
2969 	if (rv == 0)
2970 		schednetisr(niq->ni_isr);
2971 	else
2972 		if_congestion();
2973 
2974 	return (rv);
2975 }
2976 
2977 int
2978 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
2979 {
2980 	int rv;
2981 
2982 	rv = mq_enlist(&niq->ni_q, ml);
2983 	if (rv == 0)
2984 		schednetisr(niq->ni_isr);
2985 	else
2986 		if_congestion();
2987 
2988 	return (rv);
2989 }
2990 
2991 __dead void
2992 unhandled_af(int af)
2993 {
2994 	panic("unhandled af %d", af);
2995 }
2996 
2997 /*
2998  * XXXSMP This tunable is here to work around the fact that IPsec
2999  * globals aren't ready to be accessed by multiple threads in
3000  * parallel.
3001  */
3002 int		 nettaskqs = NET_TASKQ;
3003 
3004 struct taskq *
3005 net_tq(unsigned int ifindex)
3006 {
3007 	struct taskq *t = NULL;
3008 
3009 	t = nettqmp[ifindex % nettaskqs];
3010 
3011 	return (t);
3012 }
3013