xref: /openbsd-src/sys/net/if.c (revision f763167468dba5339ed4b14b7ecaca2a397ab0f6)
1 /*	$OpenBSD: if.c,v 1.512 2017/08/22 15:02:34 mpi Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "pfsync.h"
70 #include "ppp.h"
71 #include "pppoe.h"
72 #include "switch.h"
73 #include "trunk.h"
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/mbuf.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/timeout.h>
81 #include <sys/protosw.h>
82 #include <sys/kernel.h>
83 #include <sys/ioctl.h>
84 #include <sys/domain.h>
85 #include <sys/sysctl.h>
86 #include <sys/task.h>
87 #include <sys/atomic.h>
88 #include <sys/proc.h>
89 
90 #include <dev/rndvar.h>
91 
92 #include <net/if.h>
93 #include <net/if_dl.h>
94 #include <net/if_types.h>
95 #include <net/route.h>
96 #include <net/netisr.h>
97 
98 #include <netinet/in.h>
99 #include <netinet/if_ether.h>
100 #include <netinet/igmp.h>
101 #ifdef MROUTING
102 #include <netinet/ip_mroute.h>
103 #endif
104 
105 #ifdef INET6
106 #include <netinet6/in6_var.h>
107 #include <netinet6/in6_ifattach.h>
108 #include <netinet6/nd6.h>
109 #include <netinet/ip6.h>
110 #include <netinet6/ip6_var.h>
111 #endif
112 
113 #ifdef MPLS
114 #include <netmpls/mpls.h>
115 #endif
116 
117 #if NBPFILTER > 0
118 #include <net/bpf.h>
119 #endif
120 
121 #if NBRIDGE > 0
122 #include <net/if_bridge.h>
123 #endif
124 
125 #if NCARP > 0
126 #include <netinet/ip_carp.h>
127 #endif
128 
129 #if NPF > 0
130 #include <net/pfvar.h>
131 #endif
132 
133 void	if_attachsetup(struct ifnet *);
134 void	if_attachdomain(struct ifnet *);
135 void	if_attach_common(struct ifnet *);
136 int	if_setrdomain(struct ifnet *, int);
137 void	if_slowtimo(void *);
138 
139 void	if_detached_qstart(struct ifqueue *);
140 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
141 
142 int	if_getgroup(caddr_t, struct ifnet *);
143 int	if_getgroupmembers(caddr_t);
144 int	if_getgroupattribs(caddr_t);
145 int	if_setgroupattribs(caddr_t);
146 
147 void	if_linkstate(struct ifnet *);
148 void	if_linkstate_task(void *);
149 
150 int	if_clone_list(struct if_clonereq *);
151 struct if_clone	*if_clone_lookup(const char *, int *);
152 
153 int	if_group_egress_build(void);
154 
155 void	if_watchdog_task(void *);
156 
157 void	if_input_process(void *);
158 void	if_netisr(void *);
159 
160 #ifdef DDB
161 void	ifa_print_all(void);
162 #endif
163 
164 void	if_qstart_compat(struct ifqueue *);
165 
166 /*
167  * interface index map
168  *
169  * the kernel maintains a mapping of interface indexes to struct ifnet
170  * pointers.
171  *
172  * the map is an array of struct ifnet pointers prefixed by an if_map
173  * structure. the if_map structure stores the length of its array.
174  *
175  * as interfaces are attached to the system, the map is grown on demand
176  * up to USHRT_MAX entries.
177  *
178  * interface index 0 is reserved and represents no interface. this
179  * supports the use of the interface index as the scope for IPv6 link
180  * local addresses, where scope 0 means no scope has been specified.
181  * it also supports the use of interface index as the unique identifier
182  * for network interfaces in SNMP applications as per RFC2863. therefore
183  * if_get(0) returns NULL.
184  */
185 
186 void if_ifp_dtor(void *, void *);
187 void if_map_dtor(void *, void *);
188 struct ifnet *if_ref(struct ifnet *);
189 
190 /*
191  * struct if_map
192  *
193  * bounded array of ifnet srp pointers used to fetch references of live
194  * interfaces with if_get().
195  */
196 
197 struct if_map {
198 	unsigned long		 limit;
199 	/* followed by limit ifnet srp pointers */
200 };
201 
202 /*
203  * struct if_idxmap
204  *
205  * infrastructure to manage updates and accesses to the current if_map.
206  */
207 
208 struct if_idxmap {
209 	unsigned int		 serial;
210 	unsigned int		 count;
211 	struct srp		 map;
212 };
213 
214 void	if_idxmap_init(unsigned int);
215 void	if_idxmap_insert(struct ifnet *);
216 void	if_idxmap_remove(struct ifnet *);
217 
218 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
219 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
220 int if_cloners_count;
221 
222 struct timeout net_tick_to;
223 void	net_tick(void *);
224 int	net_livelocked(void);
225 int	ifq_congestion;
226 
227 int		 netisr;
228 struct taskq	*softnettq;
229 
230 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
231 
232 /*
233  * Serialize socket operations to ensure no new sleeping points
234  * are introduced in IP output paths.
235  */
236 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
237 
238 /*
239  * Network interface utility routines.
240  */
241 void
242 ifinit(void)
243 {
244 	/*
245 	 * most machines boot with 4 or 5 interfaces, so size the initial map
246 	 * to accomodate this
247 	 */
248 	if_idxmap_init(8);
249 
250 	timeout_set(&net_tick_to, net_tick, &net_tick_to);
251 
252 	softnettq = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
253 	if (softnettq == NULL)
254 		panic("unable to create softnet taskq");
255 
256 	net_tick(&net_tick_to);
257 }
258 
259 static struct if_idxmap if_idxmap = {
260 	0,
261 	0,
262 	SRP_INITIALIZER()
263 };
264 
265 struct srp_gc if_ifp_gc = SRP_GC_INITIALIZER(if_ifp_dtor, NULL);
266 struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
267 
268 struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
269 
270 void
271 if_idxmap_init(unsigned int limit)
272 {
273 	struct if_map *if_map;
274 	struct srp *map;
275 	unsigned int i;
276 
277 	if_idxmap.serial = 1; /* skip ifidx 0 so it can return NULL */
278 
279 	if_map = malloc(sizeof(*if_map) + limit * sizeof(*map),
280 	    M_IFADDR, M_WAITOK);
281 
282 	if_map->limit = limit;
283 	map = (struct srp *)(if_map + 1);
284 	for (i = 0; i < limit; i++)
285 		srp_init(&map[i]);
286 
287 	/* this is called early so there's nothing to race with */
288 	srp_update_locked(&if_map_gc, &if_idxmap.map, if_map);
289 }
290 
291 void
292 if_idxmap_insert(struct ifnet *ifp)
293 {
294 	struct if_map *if_map;
295 	struct srp *map;
296 	unsigned int index, i;
297 
298 	refcnt_init(&ifp->if_refcnt);
299 
300 	/* the kernel lock guarantees serialised modifications to if_idxmap */
301 	KERNEL_ASSERT_LOCKED();
302 
303 	if (++if_idxmap.count > USHRT_MAX)
304 		panic("too many interfaces");
305 
306 	if_map = srp_get_locked(&if_idxmap.map);
307 	map = (struct srp *)(if_map + 1);
308 
309 	index = if_idxmap.serial++ & USHRT_MAX;
310 
311 	if (index >= if_map->limit) {
312 		struct if_map *nif_map;
313 		struct srp *nmap;
314 		unsigned int nlimit;
315 		struct ifnet *nifp;
316 
317 		nlimit = if_map->limit * 2;
318 		nif_map = malloc(sizeof(*nif_map) + nlimit * sizeof(*nmap),
319 		    M_IFADDR, M_WAITOK);
320 		nmap = (struct srp *)(nif_map + 1);
321 
322 		nif_map->limit = nlimit;
323 		for (i = 0; i < if_map->limit; i++) {
324 			srp_init(&nmap[i]);
325 			nifp = srp_get_locked(&map[i]);
326 			if (nifp != NULL) {
327 				srp_update_locked(&if_ifp_gc, &nmap[i],
328 				    if_ref(nifp));
329 			}
330 		}
331 
332 		while (i < nlimit) {
333 			srp_init(&nmap[i]);
334 			i++;
335 		}
336 
337 		srp_update_locked(&if_map_gc, &if_idxmap.map, nif_map);
338 		if_map = nif_map;
339 		map = nmap;
340 	}
341 
342 	/* pick the next free index */
343 	for (i = 0; i < USHRT_MAX; i++) {
344 		if (index != 0 && srp_get_locked(&map[index]) == NULL)
345 			break;
346 
347 		index = if_idxmap.serial++ & USHRT_MAX;
348 	}
349 
350 	/* commit */
351 	ifp->if_index = index;
352 	srp_update_locked(&if_ifp_gc, &map[index], if_ref(ifp));
353 }
354 
355 void
356 if_idxmap_remove(struct ifnet *ifp)
357 {
358 	struct if_map *if_map;
359 	struct srp *map;
360 	unsigned int index;
361 
362 	index = ifp->if_index;
363 
364 	/* the kernel lock guarantees serialised modifications to if_idxmap */
365 	KERNEL_ASSERT_LOCKED();
366 
367 	if_map = srp_get_locked(&if_idxmap.map);
368 	KASSERT(index < if_map->limit);
369 
370 	map = (struct srp *)(if_map + 1);
371 	KASSERT(ifp == (struct ifnet *)srp_get_locked(&map[index]));
372 
373 	srp_update_locked(&if_ifp_gc, &map[index], NULL);
374 	if_idxmap.count--;
375 	/* end of if_idxmap modifications */
376 
377 	/* sleep until the last reference is released */
378 	refcnt_finalize(&ifp->if_refcnt, "ifidxrm");
379 }
380 
381 void
382 if_ifp_dtor(void *null, void *ifp)
383 {
384 	if_put(ifp);
385 }
386 
387 void
388 if_map_dtor(void *null, void *m)
389 {
390 	struct if_map *if_map = m;
391 	struct srp *map = (struct srp *)(if_map + 1);
392 	unsigned int i;
393 
394 	/*
395 	 * dont need to serialize the use of update_locked since this is
396 	 * the last reference to this map. there's nothing to race against.
397 	 */
398 	for (i = 0; i < if_map->limit; i++)
399 		srp_update_locked(&if_ifp_gc, &map[i], NULL);
400 
401 	free(if_map, M_IFADDR, sizeof(*if_map) + if_map->limit * sizeof(*map));
402 }
403 
404 /*
405  * Attach an interface to the
406  * list of "active" interfaces.
407  */
408 void
409 if_attachsetup(struct ifnet *ifp)
410 {
411 	unsigned long ifidx;
412 
413 	NET_ASSERT_LOCKED();
414 
415 	TAILQ_INIT(&ifp->if_groups);
416 
417 	if_addgroup(ifp, IFG_ALL);
418 
419 	if_attachdomain(ifp);
420 #if NPF > 0
421 	pfi_attach_ifnet(ifp);
422 #endif
423 
424 	timeout_set(ifp->if_slowtimo, if_slowtimo, ifp);
425 	if_slowtimo(ifp);
426 
427 	if_idxmap_insert(ifp);
428 	KASSERT(if_get(0) == NULL);
429 
430 	ifidx = ifp->if_index;
431 
432 	mq_init(&ifp->if_inputqueue, 8192, IPL_NET);
433 	task_set(ifp->if_inputtask, if_input_process, (void *)ifidx);
434 	task_set(ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
435 	task_set(ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
436 
437 	/* Announce the interface. */
438 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
439 }
440 
441 /*
442  * Allocate the link level name for the specified interface.  This
443  * is an attachment helper.  It must be called after ifp->if_addrlen
444  * is initialized, which may not be the case when if_attach() is
445  * called.
446  */
447 void
448 if_alloc_sadl(struct ifnet *ifp)
449 {
450 	unsigned int socksize;
451 	int namelen, masklen;
452 	struct sockaddr_dl *sdl;
453 
454 	/*
455 	 * If the interface already has a link name, release it
456 	 * now.  This is useful for interfaces that can change
457 	 * link types, and thus switch link names often.
458 	 */
459 	if (ifp->if_sadl != NULL)
460 		if_free_sadl(ifp);
461 
462 	namelen = strlen(ifp->if_xname);
463 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
464 	socksize = masklen + ifp->if_addrlen;
465 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
466 	if (socksize < sizeof(*sdl))
467 		socksize = sizeof(*sdl);
468 	socksize = ROUNDUP(socksize);
469 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
470 	sdl->sdl_len = socksize;
471 	sdl->sdl_family = AF_LINK;
472 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
473 	sdl->sdl_nlen = namelen;
474 	sdl->sdl_alen = ifp->if_addrlen;
475 	sdl->sdl_index = ifp->if_index;
476 	sdl->sdl_type = ifp->if_type;
477 	ifp->if_sadl = sdl;
478 }
479 
480 /*
481  * Free the link level name for the specified interface.  This is
482  * a detach helper.  This is called from if_detach() or from
483  * link layer type specific detach functions.
484  */
485 void
486 if_free_sadl(struct ifnet *ifp)
487 {
488 	free(ifp->if_sadl, M_IFADDR, 0);
489 	ifp->if_sadl = NULL;
490 }
491 
492 void
493 if_attachdomain(struct ifnet *ifp)
494 {
495 	struct domain *dp;
496 	int i, s;
497 
498 	s = splnet();
499 
500 	/* address family dependent data region */
501 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
502 	for (i = 0; (dp = domains[i]) != NULL; i++) {
503 		if (dp->dom_ifattach)
504 			ifp->if_afdata[dp->dom_family] =
505 			    (*dp->dom_ifattach)(ifp);
506 	}
507 
508 	splx(s);
509 }
510 
511 void
512 if_attachhead(struct ifnet *ifp)
513 {
514 	if_attach_common(ifp);
515 	NET_LOCK();
516 	TAILQ_INSERT_HEAD(&ifnet, ifp, if_list);
517 	if_attachsetup(ifp);
518 	NET_UNLOCK();
519 }
520 
521 void
522 if_attach(struct ifnet *ifp)
523 {
524 	if_attach_common(ifp);
525 	NET_LOCK();
526 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
527 	if_attachsetup(ifp);
528 	NET_UNLOCK();
529 }
530 
531 void
532 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
533 {
534 	struct ifqueue **map;
535 	struct ifqueue *ifq;
536 	int i;
537 
538 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
539 	KASSERT(nqs != 0);
540 
541 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
542 
543 	ifp->if_snd.ifq_softc = NULL;
544 	map[0] = &ifp->if_snd;
545 
546 	for (i = 1; i < nqs; i++) {
547 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
548 		ifq_set_maxlen(ifq, ifp->if_snd.ifq_maxlen);
549 		ifq_init(ifq, ifp, i);
550 		map[i] = ifq;
551 	}
552 
553 	ifp->if_ifqs = map;
554 	ifp->if_nifqs = nqs;
555 }
556 
557 void
558 if_attach_common(struct ifnet *ifp)
559 {
560 	TAILQ_INIT(&ifp->if_addrlist);
561 	TAILQ_INIT(&ifp->if_maddrlist);
562 
563 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
564 		KASSERTMSG(ifp->if_qstart == NULL,
565 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
566 		ifp->if_qstart = if_qstart_compat;
567 	} else {
568 		KASSERTMSG(ifp->if_start == NULL,
569 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
570 		KASSERTMSG(ifp->if_qstart != NULL,
571 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
572 	}
573 
574 	ifq_init(&ifp->if_snd, ifp, 0);
575 
576 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
577 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
578 	ifp->if_nifqs = 1;
579 
580 	ifp->if_addrhooks = malloc(sizeof(*ifp->if_addrhooks),
581 	    M_TEMP, M_WAITOK);
582 	TAILQ_INIT(ifp->if_addrhooks);
583 	ifp->if_linkstatehooks = malloc(sizeof(*ifp->if_linkstatehooks),
584 	    M_TEMP, M_WAITOK);
585 	TAILQ_INIT(ifp->if_linkstatehooks);
586 	ifp->if_detachhooks = malloc(sizeof(*ifp->if_detachhooks),
587 	    M_TEMP, M_WAITOK);
588 	TAILQ_INIT(ifp->if_detachhooks);
589 
590 	if (ifp->if_rtrequest == NULL)
591 		ifp->if_rtrequest = if_rtrequest_dummy;
592 	ifp->if_slowtimo = malloc(sizeof(*ifp->if_slowtimo), M_TEMP,
593 	    M_WAITOK|M_ZERO);
594 	ifp->if_watchdogtask = malloc(sizeof(*ifp->if_watchdogtask),
595 	    M_TEMP, M_WAITOK|M_ZERO);
596 	ifp->if_linkstatetask = malloc(sizeof(*ifp->if_linkstatetask),
597 	    M_TEMP, M_WAITOK|M_ZERO);
598 	ifp->if_inputtask = malloc(sizeof(*ifp->if_inputtask),
599 	    M_TEMP, M_WAITOK|M_ZERO);
600 	ifp->if_llprio = IFQ_DEFPRIO;
601 
602 	SRPL_INIT(&ifp->if_inputs);
603 }
604 
605 void
606 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
607 {
608 	/*
609 	 * only switch the ifq_ops on the first ifq on an interface.
610 	 *
611 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
612 	 * works on a single ifq. because the code uses the ifq_ops
613 	 * on the first ifq (if_snd) to select a queue for an mbuf,
614 	 * by switching only the first one we change both the algorithm
615 	 * and force the routing of all new packets to it.
616 	 */
617 	ifq_attach(&ifp->if_snd, newops, args);
618 }
619 
620 void
621 if_start(struct ifnet *ifp)
622 {
623 	KASSERT(ifp->if_qstart == if_qstart_compat);
624 	if_qstart_compat(&ifp->if_snd);
625 }
626 void
627 if_qstart_compat(struct ifqueue *ifq)
628 {
629 	struct ifnet *ifp = ifq->ifq_if;
630 	int s;
631 
632 	/*
633 	 * the stack assumes that an interface can have multiple
634 	 * transmit rings, but a lot of drivers are still written
635 	 * so that interfaces and send rings have a 1:1 mapping.
636 	 * this provides compatability between the stack and the older
637 	 * drivers by translating from the only queue they have
638 	 * (ifp->if_snd) back to the interface and calling if_start.
639  	 */
640 
641 	KERNEL_LOCK();
642 	s = splnet();
643 	(*ifp->if_start)(ifp);
644 	splx(s);
645 	KERNEL_UNLOCK();
646 }
647 
648 int
649 if_enqueue(struct ifnet *ifp, struct mbuf *m)
650 {
651 	unsigned int idx;
652 	struct ifqueue *ifq;
653 	int error;
654 
655 #if NBRIDGE > 0
656 	if (ifp->if_bridgeport && (m->m_flags & M_PROTO1) == 0) {
657 		KERNEL_LOCK();
658 		error = bridge_output(ifp, m, NULL, NULL);
659 		KERNEL_UNLOCK();
660 		return (error);
661 	}
662 #endif
663 
664 #if NPF > 0
665 	pf_pkt_unlink_state_key(m);
666 #endif	/* NPF > 0 */
667 
668 	/*
669 	 * use the operations on the first ifq to pick which of the array
670 	 * gets this mbuf.
671 	 */
672 	idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
673 	ifq = ifp->if_ifqs[idx];
674 
675 	error = ifq_enqueue(ifq, m);
676 	if (error)
677 		return (error);
678 
679 	ifq_start(ifq);
680 
681 	return (0);
682 }
683 
684 void
685 if_input(struct ifnet *ifp, struct mbuf_list *ml)
686 {
687 	struct mbuf *m;
688 	size_t ibytes = 0;
689 #if NBPFILTER > 0
690 	caddr_t if_bpf;
691 #endif
692 
693 	if (ml_empty(ml))
694 		return;
695 
696 	MBUF_LIST_FOREACH(ml, m) {
697 		m->m_pkthdr.ph_ifidx = ifp->if_index;
698 		m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
699 		ibytes += m->m_pkthdr.len;
700 	}
701 
702 	ifp->if_ipackets += ml_len(ml);
703 	ifp->if_ibytes += ibytes;
704 
705 #if NBPFILTER > 0
706 	if_bpf = ifp->if_bpf;
707 	if (if_bpf) {
708 		struct mbuf_list ml0;
709 
710 		ml_init(&ml0);
711 		ml_enlist(&ml0, ml);
712 		ml_init(ml);
713 
714 		while ((m = ml_dequeue(&ml0)) != NULL) {
715 			if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN))
716 				m_freem(m);
717 			else
718 				ml_enqueue(ml, m);
719 		}
720 
721 		if (ml_empty(ml))
722 			return;
723 	}
724 #endif
725 
726 	if (mq_enlist(&ifp->if_inputqueue, ml) == 0)
727 		task_add(softnettq, ifp->if_inputtask);
728 }
729 
730 int
731 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
732 {
733 #if NBPFILTER > 0
734 	/*
735 	 * Only send packets to bpf if they are destinated to local
736 	 * addresses.
737 	 *
738 	 * if_input_local() is also called for SIMPLEX interfaces to
739 	 * duplicate packets for local use.  But don't dup them to bpf.
740 	 */
741 	if (ifp->if_flags & IFF_LOOPBACK) {
742 		caddr_t if_bpf = ifp->if_bpf;
743 
744 		if (if_bpf)
745 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
746 	}
747 #endif
748 	m_resethdr(m);
749 	m->m_flags |= M_LOOP;
750 	m->m_pkthdr.ph_ifidx = ifp->if_index;
751 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
752 
753 	ifp->if_opackets++;
754 	ifp->if_obytes += m->m_pkthdr.len;
755 
756 	ifp->if_ipackets++;
757 	ifp->if_ibytes += m->m_pkthdr.len;
758 
759 	switch (af) {
760 	case AF_INET:
761 		ipv4_input(ifp, m);
762 		break;
763 #ifdef INET6
764 	case AF_INET6:
765 		ipv6_input(ifp, m);
766 		break;
767 #endif /* INET6 */
768 #ifdef MPLS
769 	case AF_MPLS:
770 		mpls_input(m);
771 		break;
772 #endif /* MPLS */
773 	default:
774 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
775 		m_freem(m);
776 		return (EAFNOSUPPORT);
777 	}
778 
779 	return (0);
780 }
781 
782 struct ifih {
783 	SRPL_ENTRY(ifih)	  ifih_next;
784 	int			(*ifih_input)(struct ifnet *, struct mbuf *,
785 				      void *);
786 	void			 *ifih_cookie;
787 	int			  ifih_refcnt;
788 	struct refcnt		  ifih_srpcnt;
789 };
790 
791 void	if_ih_ref(void *, void *);
792 void	if_ih_unref(void *, void *);
793 
794 struct srpl_rc ifih_rc = SRPL_RC_INITIALIZER(if_ih_ref, if_ih_unref, NULL);
795 
796 void
797 if_ih_insert(struct ifnet *ifp, int (*input)(struct ifnet *, struct mbuf *,
798     void *), void *cookie)
799 {
800 	struct ifih *ifih;
801 
802 	/* the kernel lock guarantees serialised modifications to if_inputs */
803 	KERNEL_ASSERT_LOCKED();
804 
805 	SRPL_FOREACH_LOCKED(ifih, &ifp->if_inputs, ifih_next) {
806 		if (ifih->ifih_input == input && ifih->ifih_cookie == cookie) {
807 			ifih->ifih_refcnt++;
808 			break;
809 		}
810 	}
811 
812 	if (ifih == NULL) {
813 		ifih = malloc(sizeof(*ifih), M_DEVBUF, M_WAITOK);
814 
815 		ifih->ifih_input = input;
816 		ifih->ifih_cookie = cookie;
817 		ifih->ifih_refcnt = 1;
818 		refcnt_init(&ifih->ifih_srpcnt);
819 		SRPL_INSERT_HEAD_LOCKED(&ifih_rc, &ifp->if_inputs,
820 		    ifih, ifih_next);
821 	}
822 }
823 
824 void
825 if_ih_ref(void *null, void *i)
826 {
827 	struct ifih *ifih = i;
828 
829 	refcnt_take(&ifih->ifih_srpcnt);
830 }
831 
832 void
833 if_ih_unref(void *null, void *i)
834 {
835 	struct ifih *ifih = i;
836 
837 	refcnt_rele_wake(&ifih->ifih_srpcnt);
838 }
839 
840 void
841 if_ih_remove(struct ifnet *ifp, int (*input)(struct ifnet *, struct mbuf *,
842     void *), void *cookie)
843 {
844 	struct ifih *ifih;
845 
846 	/* the kernel lock guarantees serialised modifications to if_inputs */
847 	KERNEL_ASSERT_LOCKED();
848 
849 	SRPL_FOREACH_LOCKED(ifih, &ifp->if_inputs, ifih_next) {
850 		if (ifih->ifih_input == input && ifih->ifih_cookie == cookie)
851 			break;
852 	}
853 
854 	KASSERT(ifih != NULL);
855 
856 	if (--ifih->ifih_refcnt == 0) {
857 		SRPL_REMOVE_LOCKED(&ifih_rc, &ifp->if_inputs, ifih,
858 		    ifih, ifih_next);
859 
860 		refcnt_finalize(&ifih->ifih_srpcnt, "ifihrm");
861 		free(ifih, M_DEVBUF, sizeof(*ifih));
862 	}
863 }
864 
865 void
866 if_input_process(void *xifidx)
867 {
868 	unsigned int ifidx = (unsigned long)xifidx;
869 	struct mbuf_list ml;
870 	struct mbuf *m;
871 	struct ifnet *ifp;
872 	struct ifih *ifih;
873 	struct srp_ref sr;
874 	int s;
875 #ifdef IPSEC
876 	int locked = 0;
877 #endif /* IPSEC */
878 
879 	ifp = if_get(ifidx);
880 	if (ifp == NULL)
881 		return;
882 
883 	mq_delist(&ifp->if_inputqueue, &ml);
884 	if (ml_empty(&ml))
885 		goto out;
886 
887 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
888 		add_net_randomness(ml_len(&ml));
889 
890 	/*
891 	 * We grab the NET_LOCK() before processing any packet to
892 	 * ensure there's no contention on the routing table lock.
893 	 *
894 	 * Without it we could race with a userland thread to insert
895 	 * a L2 entry in ip{6,}_output().  Such race would result in
896 	 * one of the threads sleeping *inside* the IP output path.
897 	 *
898 	 * Since we have a NET_LOCK() we also use it to serialize access
899 	 * to PF globals, pipex globals, unicast and multicast addresses
900 	 * lists.
901 	 */
902 	NET_LOCK();
903 	s = splnet();
904 
905 #ifdef IPSEC
906 	/*
907 	 * IPsec is not ready to run without KERNEL_LOCK().  So all
908 	 * the traffic on your machine is punished if you have IPsec
909 	 * enabled.
910 	 */
911 	extern int ipsec_in_use;
912 	if (ipsec_in_use) {
913 		NET_UNLOCK();
914 		KERNEL_LOCK();
915 		NET_LOCK();
916 		locked = 1;
917 	}
918 #endif /* IPSEC */
919 
920 	while ((m = ml_dequeue(&ml)) != NULL) {
921 		/*
922 		 * Pass this mbuf to all input handlers of its
923 		 * interface until it is consumed.
924 		 */
925 		SRPL_FOREACH(ifih, &sr, &ifp->if_inputs, ifih_next) {
926 			if ((*ifih->ifih_input)(ifp, m, ifih->ifih_cookie))
927 				break;
928 		}
929 		SRPL_LEAVE(&sr);
930 
931 		if (ifih == NULL)
932 			m_freem(m);
933 	}
934 	splx(s);
935 	NET_UNLOCK();
936 
937 #ifdef IPSEC
938 	if (locked)
939 		KERNEL_UNLOCK();
940 #endif /* IPSEC */
941 out:
942 	if_put(ifp);
943 }
944 
945 void
946 if_netisr(void *unused)
947 {
948 	int n, t = 0;
949 
950 	KERNEL_LOCK();
951 	NET_LOCK();
952 
953 	while ((n = netisr) != 0) {
954 		/* Like sched_pause() but with a rwlock dance. */
955 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
956 			NET_UNLOCK();
957 			yield();
958 			NET_LOCK();
959 		}
960 
961 		atomic_clearbits_int(&netisr, n);
962 
963 #if NETHER > 0
964 		if (n & (1 << NETISR_ARP))
965 			arpintr();
966 #endif
967 		if (n & (1 << NETISR_IP))
968 			ipintr();
969 #ifdef INET6
970 		if (n & (1 << NETISR_IPV6))
971 			ip6intr();
972 #endif
973 #if NPPP > 0
974 		if (n & (1 << NETISR_PPP))
975 			pppintr();
976 #endif
977 #if NBRIDGE > 0
978 		if (n & (1 << NETISR_BRIDGE))
979 			bridgeintr();
980 #endif
981 #if NSWITCH > 0
982 		if (n & (1 << NETISR_SWITCH))
983 			switchintr();
984 #endif
985 #if NPPPOE > 0
986 		if (n & (1 << NETISR_PPPOE))
987 			pppoeintr();
988 #endif
989 #ifdef PIPEX
990 		if (n & (1 << NETISR_PIPEX))
991 			pipexintr();
992 #endif
993 		t |= n;
994 	}
995 
996 #if NPFSYNC > 0
997 	if (t & (1 << NETISR_PFSYNC))
998 		pfsyncintr();
999 #endif
1000 
1001 	NET_UNLOCK();
1002 	KERNEL_UNLOCK();
1003 }
1004 
1005 void
1006 if_deactivate(struct ifnet *ifp)
1007 {
1008 	NET_LOCK();
1009 	/*
1010 	 * Call detach hooks from head to tail.  To make sure detach
1011 	 * hooks are executed in the reverse order they were added, all
1012 	 * the hooks have to be added to the head!
1013 	 */
1014 	dohooks(ifp->if_detachhooks, HOOK_REMOVE | HOOK_FREE);
1015 
1016 #if NCARP > 0
1017 	/* Remove the interface from any carp group it is a part of.  */
1018 	if (ifp->if_carp && ifp->if_type != IFT_CARP)
1019 		carp_ifdetach(ifp);
1020 #endif
1021 	NET_UNLOCK();
1022 }
1023 
1024 /*
1025  * Detach an interface from everything in the kernel.  Also deallocate
1026  * private resources.
1027  */
1028 void
1029 if_detach(struct ifnet *ifp)
1030 {
1031 	struct ifaddr *ifa;
1032 	struct ifg_list *ifg;
1033 	struct domain *dp;
1034 	int i, s;
1035 
1036 	/* Undo pseudo-driver changes. */
1037 	if_deactivate(ifp);
1038 
1039 	ifq_clr_oactive(&ifp->if_snd);
1040 
1041 	/* Other CPUs must not have a reference before we start destroying. */
1042 	if_idxmap_remove(ifp);
1043 
1044 	NET_LOCK();
1045 	s = splnet();
1046 	ifp->if_qstart = if_detached_qstart;
1047 	ifp->if_ioctl = if_detached_ioctl;
1048 	ifp->if_watchdog = NULL;
1049 
1050 	/* Remove the input task */
1051 	task_del(softnettq, ifp->if_inputtask);
1052 	mq_purge(&ifp->if_inputqueue);
1053 
1054 	/* Remove the watchdog timeout & task */
1055 	timeout_del(ifp->if_slowtimo);
1056 	task_del(softnettq, ifp->if_watchdogtask);
1057 
1058 	/* Remove the link state task */
1059 	task_del(softnettq, ifp->if_linkstatetask);
1060 
1061 #if NBPFILTER > 0
1062 	bpfdetach(ifp);
1063 #endif
1064 	rti_delete(ifp);
1065 #if NETHER > 0 && defined(NFSCLIENT)
1066 	if (ifp->if_index == revarp_ifidx)
1067 		revarp_ifidx = 0;
1068 #endif
1069 #ifdef MROUTING
1070 	vif_delete(ifp);
1071 #endif
1072 	in_ifdetach(ifp);
1073 #ifdef INET6
1074 	in6_ifdetach(ifp);
1075 #endif
1076 #if NPF > 0
1077 	pfi_detach_ifnet(ifp);
1078 #endif
1079 
1080 	/* Remove the interface from the list of all interfaces.  */
1081 	TAILQ_REMOVE(&ifnet, ifp, if_list);
1082 
1083 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1084 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1085 
1086 	if_free_sadl(ifp);
1087 
1088 	/* We should not have any address left at this point. */
1089 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1090 #ifdef DIAGNOSTIC
1091 		printf("%s: address list non empty\n", ifp->if_xname);
1092 #endif
1093 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1094 			ifa_del(ifp, ifa);
1095 			ifa->ifa_ifp = NULL;
1096 			ifafree(ifa);
1097 		}
1098 	}
1099 
1100 	free(ifp->if_addrhooks, M_TEMP, 0);
1101 	free(ifp->if_linkstatehooks, M_TEMP, 0);
1102 	free(ifp->if_detachhooks, M_TEMP, 0);
1103 
1104 	free(ifp->if_slowtimo, M_TEMP, sizeof(*ifp->if_slowtimo));
1105 	free(ifp->if_watchdogtask, M_TEMP, sizeof(*ifp->if_watchdogtask));
1106 	free(ifp->if_linkstatetask, M_TEMP, sizeof(*ifp->if_linkstatetask));
1107 	free(ifp->if_inputtask, M_TEMP, sizeof(*ifp->if_inputtask));
1108 
1109 	for (i = 0; (dp = domains[i]) != NULL; i++) {
1110 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
1111 			(*dp->dom_ifdetach)(ifp,
1112 			    ifp->if_afdata[dp->dom_family]);
1113 	}
1114 
1115 	/* Announce that the interface is gone. */
1116 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1117 	splx(s);
1118 	NET_UNLOCK();
1119 
1120 	for (i = 0; i < ifp->if_nifqs; i++)
1121 		ifq_destroy(ifp->if_ifqs[i]);
1122 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1123 		for (i = 1; i < ifp->if_nifqs; i++) {
1124 			free(ifp->if_ifqs[i], M_DEVBUF,
1125 			    sizeof(struct ifqueue));
1126 		}
1127 		free(ifp->if_ifqs, M_DEVBUF,
1128 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1129 	}
1130 }
1131 
1132 /*
1133  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1134  */
1135 int
1136 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1137 {
1138 	struct ifnet *ifp;
1139 	int connected = 0;
1140 
1141 	ifp = if_get(ifidx);
1142 	if (ifp == NULL)
1143 		return (0);
1144 
1145 	if (ifp0->if_index == ifp->if_index)
1146 		connected = 1;
1147 
1148 #if NBRIDGE > 0
1149 	if (SAME_BRIDGE(ifp0->if_bridgeport, ifp->if_bridgeport))
1150 		connected = 1;
1151 #endif
1152 #if NCARP > 0
1153 	if ((ifp0->if_type == IFT_CARP && ifp0->if_carpdev == ifp) ||
1154 	    (ifp->if_type == IFT_CARP && ifp->if_carpdev == ifp0))
1155 		connected = 1;
1156 #endif
1157 
1158 	if_put(ifp);
1159 	return (connected);
1160 }
1161 
1162 /*
1163  * Create a clone network interface.
1164  */
1165 int
1166 if_clone_create(const char *name, int rdomain)
1167 {
1168 	struct if_clone *ifc;
1169 	struct ifnet *ifp;
1170 	int unit, ret;
1171 
1172 	NET_ASSERT_LOCKED();
1173 
1174 	ifc = if_clone_lookup(name, &unit);
1175 	if (ifc == NULL)
1176 		return (EINVAL);
1177 
1178 	if (ifunit(name) != NULL)
1179 		return (EEXIST);
1180 
1181 	/* XXXSMP breaks atomicity */
1182 	NET_UNLOCK();
1183 	ret = (*ifc->ifc_create)(ifc, unit);
1184 	NET_LOCK();
1185 
1186 	if (ret != 0 || (ifp = ifunit(name)) == NULL)
1187 		return (ret);
1188 
1189 	if_addgroup(ifp, ifc->ifc_name);
1190 	if (rdomain != 0)
1191 		if_setrdomain(ifp, rdomain);
1192 
1193 	return (ret);
1194 }
1195 
1196 /*
1197  * Destroy a clone network interface.
1198  */
1199 int
1200 if_clone_destroy(const char *name)
1201 {
1202 	struct if_clone *ifc;
1203 	struct ifnet *ifp;
1204 	int ret;
1205 
1206 	NET_ASSERT_LOCKED();
1207 
1208 	ifc = if_clone_lookup(name, NULL);
1209 	if (ifc == NULL)
1210 		return (EINVAL);
1211 
1212 	ifp = ifunit(name);
1213 	if (ifp == NULL)
1214 		return (ENXIO);
1215 
1216 	if (ifc->ifc_destroy == NULL)
1217 		return (EOPNOTSUPP);
1218 
1219 	if (ifp->if_flags & IFF_UP) {
1220 		int s;
1221 		s = splnet();
1222 		if_down(ifp);
1223 		splx(s);
1224 	}
1225 
1226 	/* XXXSMP breaks atomicity */
1227 	NET_UNLOCK();
1228 	ret = (*ifc->ifc_destroy)(ifp);
1229 	NET_LOCK();
1230 
1231 	return (ret);
1232 }
1233 
1234 /*
1235  * Look up a network interface cloner.
1236  */
1237 struct if_clone *
1238 if_clone_lookup(const char *name, int *unitp)
1239 {
1240 	struct if_clone *ifc;
1241 	const char *cp;
1242 	int unit;
1243 
1244 	/* separate interface name from unit */
1245 	for (cp = name;
1246 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1247 	    cp++)
1248 		continue;
1249 
1250 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1251 		return (NULL);	/* No name or unit number */
1252 
1253 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1254 		return (NULL);	/* unit number 0 padded */
1255 
1256 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1257 		if (strlen(ifc->ifc_name) == cp - name &&
1258 		    !strncmp(name, ifc->ifc_name, cp - name))
1259 			break;
1260 	}
1261 
1262 	if (ifc == NULL)
1263 		return (NULL);
1264 
1265 	unit = 0;
1266 	while (cp - name < IFNAMSIZ && *cp) {
1267 		if (*cp < '0' || *cp > '9' ||
1268 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1269 			/* Bogus unit number. */
1270 			return (NULL);
1271 		}
1272 		unit = (unit * 10) + (*cp++ - '0');
1273 	}
1274 
1275 	if (unitp != NULL)
1276 		*unitp = unit;
1277 	return (ifc);
1278 }
1279 
1280 /*
1281  * Register a network interface cloner.
1282  */
1283 void
1284 if_clone_attach(struct if_clone *ifc)
1285 {
1286 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1287 	if_cloners_count++;
1288 }
1289 
1290 /*
1291  * Unregister a network interface cloner.
1292  */
1293 void
1294 if_clone_detach(struct if_clone *ifc)
1295 {
1296 
1297 	LIST_REMOVE(ifc, ifc_list);
1298 	if_cloners_count--;
1299 }
1300 
1301 /*
1302  * Provide list of interface cloners to userspace.
1303  */
1304 int
1305 if_clone_list(struct if_clonereq *ifcr)
1306 {
1307 	char outbuf[IFNAMSIZ], *dst;
1308 	struct if_clone *ifc;
1309 	int count, error = 0;
1310 
1311 	ifcr->ifcr_total = if_cloners_count;
1312 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1313 		/* Just asking how many there are. */
1314 		return (0);
1315 	}
1316 
1317 	if (ifcr->ifcr_count < 0)
1318 		return (EINVAL);
1319 
1320 	count = (if_cloners_count < ifcr->ifcr_count) ?
1321 	    if_cloners_count : ifcr->ifcr_count;
1322 
1323 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1324 		if (count == 0)
1325 			break;
1326 		bzero(outbuf, sizeof outbuf);
1327 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1328 		error = copyout(outbuf, dst, IFNAMSIZ);
1329 		if (error)
1330 			break;
1331 		count--;
1332 		dst += IFNAMSIZ;
1333 	}
1334 
1335 	return (error);
1336 }
1337 
1338 /*
1339  * set queue congestion marker
1340  */
1341 void
1342 if_congestion(void)
1343 {
1344 	extern int ticks;
1345 
1346 	ifq_congestion = ticks;
1347 }
1348 
1349 int
1350 if_congested(void)
1351 {
1352 	extern int ticks;
1353 	int diff;
1354 
1355 	diff = ticks - ifq_congestion;
1356 	if (diff < 0) {
1357 		ifq_congestion = ticks - hz;
1358 		return (0);
1359 	}
1360 
1361 	return (diff <= (hz / 100));
1362 }
1363 
1364 #define	equal(a1, a2)	\
1365 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1366 	(a1)->sa_len) == 0)
1367 
1368 /*
1369  * Locate an interface based on a complete address.
1370  */
1371 struct ifaddr *
1372 ifa_ifwithaddr(struct sockaddr *addr, u_int rtableid)
1373 {
1374 	struct ifnet *ifp;
1375 	struct ifaddr *ifa;
1376 	u_int rdomain;
1377 
1378 	KERNEL_ASSERT_LOCKED();
1379 	rdomain = rtable_l2(rtableid);
1380 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1381 		if (ifp->if_rdomain != rdomain)
1382 			continue;
1383 
1384 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1385 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1386 				continue;
1387 
1388 			if (equal(addr, ifa->ifa_addr))
1389 				return (ifa);
1390 		}
1391 	}
1392 	return (NULL);
1393 }
1394 
1395 /*
1396  * Locate the point to point interface with a given destination address.
1397  */
1398 struct ifaddr *
1399 ifa_ifwithdstaddr(struct sockaddr *addr, u_int rdomain)
1400 {
1401 	struct ifnet *ifp;
1402 	struct ifaddr *ifa;
1403 
1404 	KERNEL_ASSERT_LOCKED();
1405 	rdomain = rtable_l2(rdomain);
1406 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1407 		if (ifp->if_rdomain != rdomain)
1408 			continue;
1409 		if (ifp->if_flags & IFF_POINTOPOINT) {
1410 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1411 				if (ifa->ifa_addr->sa_family !=
1412 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1413 					continue;
1414 				if (equal(addr, ifa->ifa_dstaddr))
1415 					return (ifa);
1416 			}
1417 		}
1418 	}
1419 	return (NULL);
1420 }
1421 
1422 /*
1423  * Find an interface address specific to an interface best matching
1424  * a given address.
1425  */
1426 struct ifaddr *
1427 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1428 {
1429 	struct ifaddr *ifa;
1430 	char *cp, *cp2, *cp3;
1431 	char *cplim;
1432 	struct ifaddr *ifa_maybe = NULL;
1433 	u_int af = addr->sa_family;
1434 
1435 	if (af >= AF_MAX)
1436 		return (NULL);
1437 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1438 		if (ifa->ifa_addr->sa_family != af)
1439 			continue;
1440 		if (ifa_maybe == NULL)
1441 			ifa_maybe = ifa;
1442 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1443 			if (equal(addr, ifa->ifa_addr) ||
1444 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1445 				return (ifa);
1446 			continue;
1447 		}
1448 		cp = addr->sa_data;
1449 		cp2 = ifa->ifa_addr->sa_data;
1450 		cp3 = ifa->ifa_netmask->sa_data;
1451 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1452 		for (; cp3 < cplim; cp3++)
1453 			if ((*cp++ ^ *cp2++) & *cp3)
1454 				break;
1455 		if (cp3 == cplim)
1456 			return (ifa);
1457 	}
1458 	return (ifa_maybe);
1459 }
1460 
1461 void
1462 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1463 {
1464 }
1465 
1466 /*
1467  * Default action when installing a local route on a point-to-point
1468  * interface.
1469  */
1470 void
1471 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1472 {
1473 	struct ifnet *lo0ifp;
1474 	struct ifaddr *ifa, *lo0ifa;
1475 
1476 	switch (req) {
1477 	case RTM_ADD:
1478 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1479 			break;
1480 
1481 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1482 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1483 			    rt_key(rt)->sa_len) == 0)
1484 				break;
1485 		}
1486 
1487 		if (ifa == NULL)
1488 			break;
1489 
1490 		KASSERT(ifa == rt->rt_ifa);
1491 
1492 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1493 		KASSERT(lo0ifp != NULL);
1494 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1495 			if (lo0ifa->ifa_addr->sa_family ==
1496 			    ifa->ifa_addr->sa_family)
1497 				break;
1498 		}
1499 		if_put(lo0ifp);
1500 
1501 		if (lo0ifa == NULL)
1502 			break;
1503 
1504 		rt->rt_flags &= ~RTF_LLINFO;
1505 		break;
1506 	case RTM_DELETE:
1507 	case RTM_RESOLVE:
1508 	default:
1509 		break;
1510 	}
1511 }
1512 
1513 
1514 /*
1515  * Bring down all interfaces
1516  */
1517 void
1518 if_downall(void)
1519 {
1520 	struct ifreq ifrq;	/* XXX only partly built */
1521 	struct ifnet *ifp;
1522 
1523 	NET_LOCK();
1524 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1525 		if ((ifp->if_flags & IFF_UP) == 0)
1526 			continue;
1527 		if_down(ifp);
1528 		if (ifp->if_ioctl) {
1529 			ifrq.ifr_flags = ifp->if_flags;
1530 			(void) (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS,
1531 			    (caddr_t)&ifrq);
1532 		}
1533 	}
1534 	NET_UNLOCK();
1535 }
1536 
1537 /*
1538  * Mark an interface down and notify protocols of
1539  * the transition.
1540  */
1541 void
1542 if_down(struct ifnet *ifp)
1543 {
1544 	NET_ASSERT_LOCKED();
1545 
1546 	ifp->if_flags &= ~IFF_UP;
1547 	getmicrotime(&ifp->if_lastchange);
1548 	IFQ_PURGE(&ifp->if_snd);
1549 
1550 	if_linkstate(ifp);
1551 }
1552 
1553 /*
1554  * Mark an interface up and notify protocols of
1555  * the transition.
1556  */
1557 void
1558 if_up(struct ifnet *ifp)
1559 {
1560 	NET_ASSERT_LOCKED();
1561 
1562 	ifp->if_flags |= IFF_UP;
1563 	getmicrotime(&ifp->if_lastchange);
1564 
1565 #ifdef INET6
1566 	/* Userland expects the kernel to set ::1 on default lo(4). */
1567 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1568 		in6_ifattach(ifp);
1569 #endif
1570 
1571 	if_linkstate(ifp);
1572 }
1573 
1574 /*
1575  * Notify userland, the routing table and hooks owner of
1576  * a link-state transition.
1577  */
1578 void
1579 if_linkstate_task(void *xifidx)
1580 {
1581 	unsigned int ifidx = (unsigned long)xifidx;
1582 	struct ifnet *ifp;
1583 
1584 	KERNEL_LOCK();
1585 	NET_LOCK();
1586 
1587 	ifp = if_get(ifidx);
1588 	if (ifp != NULL)
1589 		if_linkstate(ifp);
1590 	if_put(ifp);
1591 
1592 	NET_UNLOCK();
1593 	KERNEL_UNLOCK();
1594 }
1595 
1596 void
1597 if_linkstate(struct ifnet *ifp)
1598 {
1599 	NET_ASSERT_LOCKED();
1600 
1601 	rtm_ifchg(ifp);
1602 	rt_if_track(ifp);
1603 	dohooks(ifp->if_linkstatehooks, 0);
1604 }
1605 
1606 /*
1607  * Schedule a link state change task.
1608  */
1609 void
1610 if_link_state_change(struct ifnet *ifp)
1611 {
1612 	task_add(softnettq, ifp->if_linkstatetask);
1613 }
1614 
1615 /*
1616  * Handle interface watchdog timer routine.  Called
1617  * from softclock, we decrement timer (if set) and
1618  * call the appropriate interface routine on expiration.
1619  */
1620 void
1621 if_slowtimo(void *arg)
1622 {
1623 	struct ifnet *ifp = arg;
1624 	int s = splnet();
1625 
1626 	if (ifp->if_watchdog) {
1627 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1628 			task_add(softnettq, ifp->if_watchdogtask);
1629 		timeout_add(ifp->if_slowtimo, hz / IFNET_SLOWHZ);
1630 	}
1631 	splx(s);
1632 }
1633 
1634 void
1635 if_watchdog_task(void *xifidx)
1636 {
1637 	unsigned int ifidx = (unsigned long)xifidx;
1638 	struct ifnet *ifp;
1639 	int s;
1640 
1641 	ifp = if_get(ifidx);
1642 	if (ifp == NULL)
1643 		return;
1644 
1645 	KERNEL_LOCK();
1646 	s = splnet();
1647 	if (ifp->if_watchdog)
1648 		(*ifp->if_watchdog)(ifp);
1649 	splx(s);
1650 	KERNEL_UNLOCK();
1651 
1652 	if_put(ifp);
1653 }
1654 
1655 /*
1656  * Map interface name to interface structure pointer.
1657  */
1658 struct ifnet *
1659 ifunit(const char *name)
1660 {
1661 	struct ifnet *ifp;
1662 
1663 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1664 		if (strcmp(ifp->if_xname, name) == 0)
1665 			return (ifp);
1666 	}
1667 	return (NULL);
1668 }
1669 
1670 /*
1671  * Map interface index to interface structure pointer.
1672  */
1673 struct ifnet *
1674 if_get(unsigned int index)
1675 {
1676 	struct srp_ref sr;
1677 	struct if_map *if_map;
1678 	struct srp *map;
1679 	struct ifnet *ifp = NULL;
1680 
1681 	if_map = srp_enter(&sr, &if_idxmap.map);
1682 	if (index < if_map->limit) {
1683 		map = (struct srp *)(if_map + 1);
1684 
1685 		ifp = srp_follow(&sr, &map[index]);
1686 		if (ifp != NULL) {
1687 			KASSERT(ifp->if_index == index);
1688 			if_ref(ifp);
1689 		}
1690 	}
1691 	srp_leave(&sr);
1692 
1693 	return (ifp);
1694 }
1695 
1696 struct ifnet *
1697 if_ref(struct ifnet *ifp)
1698 {
1699 	refcnt_take(&ifp->if_refcnt);
1700 
1701 	return (ifp);
1702 }
1703 
1704 void
1705 if_put(struct ifnet *ifp)
1706 {
1707 	if (ifp == NULL)
1708 		return;
1709 
1710 	refcnt_rele_wake(&ifp->if_refcnt);
1711 }
1712 
1713 int
1714 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1715 {
1716 	if (ifp->if_sadl == NULL)
1717 		return (EINVAL);
1718 
1719 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1720 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1721 
1722 	return (0);
1723 }
1724 
1725 int
1726 if_setrdomain(struct ifnet *ifp, int rdomain)
1727 {
1728 	struct ifreq ifr;
1729 	int error, up = 0, s;
1730 
1731 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1732 		return (EINVAL);
1733 
1734 	/*
1735 	 * Create the routing table if it does not exist, including its
1736 	 * loopback interface with unit == rdomain.
1737 	 */
1738 	if (!rtable_exists(rdomain)) {
1739 		struct ifnet *loifp;
1740 		char loifname[IFNAMSIZ];
1741 		unsigned int unit = rdomain;
1742 
1743 		snprintf(loifname, sizeof(loifname), "lo%u", unit);
1744 		error = if_clone_create(loifname, 0);
1745 
1746 		if ((loifp = ifunit(loifname)) == NULL)
1747 			return (ENXIO);
1748 
1749 		/* Do not error out if creating the default lo(4) interface */
1750 		if (error && (ifp != loifp || error != EEXIST))
1751 			return (error);
1752 
1753 		if ((error = rtable_add(rdomain)) == 0)
1754 			rtable_l2set(rdomain, rdomain, loifp->if_index);
1755 		if (error) {
1756 			if_clone_destroy(loifname);
1757 			return (error);
1758 		}
1759 
1760 		loifp->if_rdomain = rdomain;
1761 	}
1762 
1763 	/* make sure that the routing table is a real rdomain */
1764 	if (rdomain != rtable_l2(rdomain))
1765 		return (EINVAL);
1766 
1767 	/* remove all routing entries when switching domains */
1768 	/* XXX this is a bit ugly */
1769 	if (rdomain != ifp->if_rdomain) {
1770 		s = splnet();
1771 		/*
1772 		 * We are tearing down the world.
1773 		 * Take down the IF so:
1774 		 * 1. everything that cares gets a message
1775 		 * 2. the automagic IPv6 bits are recreated
1776 		 */
1777 		if (ifp->if_flags & IFF_UP) {
1778 			up = 1;
1779 			if_down(ifp);
1780 		}
1781 		rti_delete(ifp);
1782 #ifdef MROUTING
1783 		vif_delete(ifp);
1784 #endif
1785 		in_ifdetach(ifp);
1786 #ifdef INET6
1787 		in6_ifdetach(ifp);
1788 #endif
1789 		splx(s);
1790 	}
1791 
1792 	/* Let devices like enc(4) or mpe(4) know about the change */
1793 	ifr.ifr_rdomainid = rdomain;
1794 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
1795 	    (caddr_t)&ifr)) != ENOTTY)
1796 		return (error);
1797 	error = 0;
1798 
1799 	/* Add interface to the specified rdomain */
1800 	ifp->if_rdomain = rdomain;
1801 
1802 	/* If we took down the IF, bring it back */
1803 	if (up) {
1804 		s = splnet();
1805 		if_up(ifp);
1806 		splx(s);
1807 	}
1808 
1809 	return (0);
1810 }
1811 
1812 /*
1813  * Interface ioctls.
1814  */
1815 int
1816 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1817 {
1818 	struct ifnet *ifp;
1819 	struct ifreq *ifr;
1820 	struct sockaddr_dl *sdl;
1821 	struct ifgroupreq *ifgr;
1822 	struct if_afreq *ifar;
1823 	char ifdescrbuf[IFDESCRSIZE];
1824 	char ifrtlabelbuf[RTLABEL_LEN];
1825 	int s, error = 0, oif_xflags;
1826 	size_t bytesdone;
1827 	short oif_flags;
1828 	const char *label;
1829 
1830 	switch (cmd) {
1831 
1832 	case SIOCGIFCONF:
1833 		return (ifconf(cmd, data));
1834 	}
1835 	ifr = (struct ifreq *)data;
1836 
1837 	switch (cmd) {
1838 	case SIOCIFCREATE:
1839 	case SIOCIFDESTROY:
1840 		if ((error = suser(p, 0)) != 0)
1841 			return (error);
1842 		return ((cmd == SIOCIFCREATE) ?
1843 		    if_clone_create(ifr->ifr_name, 0) :
1844 		    if_clone_destroy(ifr->ifr_name));
1845 	case SIOCIFGCLONERS:
1846 		return (if_clone_list((struct if_clonereq *)data));
1847 	case SIOCGIFGMEMB:
1848 		return (if_getgroupmembers(data));
1849 	case SIOCGIFGATTR:
1850 		return (if_getgroupattribs(data));
1851 	case SIOCSIFGATTR:
1852 		if ((error = suser(p, 0)) != 0)
1853 			return (error);
1854 		return (if_setgroupattribs(data));
1855 	case SIOCIFAFATTACH:
1856 	case SIOCIFAFDETACH:
1857 		if ((error = suser(p, 0)) != 0)
1858 			return (error);
1859 		ifar = (struct if_afreq *)data;
1860 		if ((ifp = ifunit(ifar->ifar_name)) == NULL)
1861 			return (ENXIO);
1862 		oif_flags = ifp->if_flags;
1863 		oif_xflags = ifp->if_xflags;
1864 		switch (ifar->ifar_af) {
1865 		case AF_INET:
1866 			/* attach is a noop for AF_INET */
1867 			if (cmd == SIOCIFAFDETACH)
1868 				in_ifdetach(ifp);
1869 			break;
1870 #ifdef INET6
1871 		case AF_INET6:
1872 			if (cmd == SIOCIFAFATTACH)
1873 				error = in6_ifattach(ifp);
1874 			else
1875 				in6_ifdetach(ifp);
1876 			break;
1877 #endif /* INET6 */
1878 		default:
1879 			return (EAFNOSUPPORT);
1880 		}
1881 		if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags)
1882 			rtm_ifchg(ifp);
1883 		return (error);
1884 	}
1885 
1886 	ifp = ifunit(ifr->ifr_name);
1887 	if (ifp == 0)
1888 		return (ENXIO);
1889 	oif_flags = ifp->if_flags;
1890 	switch (cmd) {
1891 
1892 	case SIOCGIFFLAGS:
1893 		ifr->ifr_flags = ifp->if_flags;
1894 		if (ifq_is_oactive(&ifp->if_snd))
1895 			ifr->ifr_flags |= IFF_OACTIVE;
1896 		break;
1897 
1898 	case SIOCGIFXFLAGS:
1899 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
1900 		break;
1901 
1902 	case SIOCGIFMETRIC:
1903 		ifr->ifr_metric = ifp->if_metric;
1904 		break;
1905 
1906 	case SIOCGIFMTU:
1907 		ifr->ifr_mtu = ifp->if_mtu;
1908 		break;
1909 
1910 	case SIOCGIFHARDMTU:
1911 		ifr->ifr_hardmtu = ifp->if_hardmtu;
1912 		break;
1913 
1914 	case SIOCGIFDATA: {
1915 		struct if_data ifdata;
1916 		if_getdata(ifp, &ifdata);
1917 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
1918 		break;
1919 	}
1920 
1921 	case SIOCSIFFLAGS:
1922 		if ((error = suser(p, 0)) != 0)
1923 			return (error);
1924 
1925 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1926 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
1927 
1928 		if (ifp->if_ioctl != NULL) {
1929 			error = (*ifp->if_ioctl)(ifp, cmd, data);
1930 			if (error != 0) {
1931 				ifp->if_flags = oif_flags;
1932 				break;
1933 			}
1934 		}
1935 
1936 		if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
1937 			s = splnet();
1938 			if (ISSET(ifp->if_flags, IFF_UP))
1939 				if_up(ifp);
1940 			else
1941 				if_down(ifp);
1942 			splx(s);
1943 		}
1944 		break;
1945 
1946 	case SIOCSIFXFLAGS:
1947 		if ((error = suser(p, 0)) != 0)
1948 			return (error);
1949 
1950 #ifdef INET6
1951 		if (ISSET(ifr->ifr_flags, IFXF_AUTOCONF6)) {
1952 			error = in6_ifattach(ifp);
1953 			if (error != 0)
1954 				return (error);
1955 		}
1956 #endif	/* INET6 */
1957 
1958 #ifdef MPLS
1959 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
1960 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
1961 			s = splnet();
1962 			ifp->if_xflags |= IFXF_MPLS;
1963 			ifp->if_ll_output = ifp->if_output;
1964 			ifp->if_output = mpls_output;
1965 			splx(s);
1966 		}
1967 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
1968 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
1969 			s = splnet();
1970 			ifp->if_xflags &= ~IFXF_MPLS;
1971 			ifp->if_output = ifp->if_ll_output;
1972 			ifp->if_ll_output = NULL;
1973 			splx(s);
1974 		}
1975 #endif	/* MPLS */
1976 
1977 #ifndef SMALL_KERNEL
1978 		if (ifp->if_capabilities & IFCAP_WOL) {
1979 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
1980 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
1981 				s = splnet();
1982 				ifp->if_xflags |= IFXF_WOL;
1983 				error = ifp->if_wol(ifp, 1);
1984 				splx(s);
1985 				if (error)
1986 					return (error);
1987 			}
1988 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
1989 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
1990 				s = splnet();
1991 				ifp->if_xflags &= ~IFXF_WOL;
1992 				error = ifp->if_wol(ifp, 0);
1993 				splx(s);
1994 				if (error)
1995 					return (error);
1996 			}
1997 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
1998 			ifr->ifr_flags &= ~IFXF_WOL;
1999 			error = ENOTSUP;
2000 		}
2001 #endif
2002 
2003 		ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
2004 			(ifr->ifr_flags & ~IFXF_CANTCHANGE);
2005 		rtm_ifchg(ifp);
2006 		break;
2007 
2008 	case SIOCSIFMETRIC:
2009 		if ((error = suser(p, 0)) != 0)
2010 			return (error);
2011 		ifp->if_metric = ifr->ifr_metric;
2012 		break;
2013 
2014 	case SIOCSIFMTU:
2015 		if ((error = suser(p, 0)) != 0)
2016 			return (error);
2017 		if (ifp->if_ioctl == NULL)
2018 			return (EOPNOTSUPP);
2019 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2020 		if (!error)
2021 			rtm_ifchg(ifp);
2022 		break;
2023 
2024 	case SIOCSIFPHYADDR:
2025 	case SIOCDIFPHYADDR:
2026 #ifdef INET6
2027 	case SIOCSIFPHYADDR_IN6:
2028 #endif
2029 	case SIOCSLIFPHYADDR:
2030 	case SIOCSLIFPHYRTABLE:
2031 	case SIOCSLIFPHYTTL:
2032 	case SIOCADDMULTI:
2033 	case SIOCDELMULTI:
2034 	case SIOCSIFMEDIA:
2035 	case SIOCSVNETID:
2036 	case SIOCSIFPAIR:
2037 	case SIOCSIFPARENT:
2038 	case SIOCDIFPARENT:
2039 		if ((error = suser(p, 0)) != 0)
2040 			return (error);
2041 		/* FALLTHROUGH */
2042 	case SIOCGIFPSRCADDR:
2043 	case SIOCGIFPDSTADDR:
2044 	case SIOCGLIFPHYADDR:
2045 	case SIOCGLIFPHYRTABLE:
2046 	case SIOCGLIFPHYTTL:
2047 	case SIOCGIFMEDIA:
2048 	case SIOCGVNETID:
2049 	case SIOCGIFPAIR:
2050 	case SIOCGIFPARENT:
2051 		if (ifp->if_ioctl == 0)
2052 			return (EOPNOTSUPP);
2053 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2054 		break;
2055 
2056 	case SIOCGIFDESCR:
2057 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2058 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2059 		    &bytesdone);
2060 		break;
2061 
2062 	case SIOCSIFDESCR:
2063 		if ((error = suser(p, 0)) != 0)
2064 			return (error);
2065 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2066 		    IFDESCRSIZE, &bytesdone);
2067 		if (error == 0) {
2068 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2069 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2070 		}
2071 		break;
2072 
2073 	case SIOCGIFRTLABEL:
2074 		if (ifp->if_rtlabelid &&
2075 		    (label = rtlabel_id2name(ifp->if_rtlabelid)) != NULL) {
2076 			strlcpy(ifrtlabelbuf, label, RTLABEL_LEN);
2077 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2078 			    RTLABEL_LEN, &bytesdone);
2079 		} else
2080 			error = ENOENT;
2081 		break;
2082 
2083 	case SIOCSIFRTLABEL:
2084 		if ((error = suser(p, 0)) != 0)
2085 			return (error);
2086 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2087 		    RTLABEL_LEN, &bytesdone);
2088 		if (error == 0) {
2089 			rtlabel_unref(ifp->if_rtlabelid);
2090 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2091 		}
2092 		break;
2093 
2094 	case SIOCGIFPRIORITY:
2095 		ifr->ifr_metric = ifp->if_priority;
2096 		break;
2097 
2098 	case SIOCSIFPRIORITY:
2099 		if ((error = suser(p, 0)) != 0)
2100 			return (error);
2101 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15)
2102 			return (EINVAL);
2103 		ifp->if_priority = ifr->ifr_metric;
2104 		break;
2105 
2106 	case SIOCGIFRDOMAIN:
2107 		ifr->ifr_rdomainid = ifp->if_rdomain;
2108 		break;
2109 
2110 	case SIOCSIFRDOMAIN:
2111 		if ((error = suser(p, 0)) != 0)
2112 			return (error);
2113 		if ((error = if_setrdomain(ifp, ifr->ifr_rdomainid)) != 0)
2114 			return (error);
2115 		break;
2116 
2117 	case SIOCAIFGROUP:
2118 		if ((error = suser(p, 0)))
2119 			return (error);
2120 		ifgr = (struct ifgroupreq *)data;
2121 		if ((error = if_addgroup(ifp, ifgr->ifgr_group)))
2122 			return (error);
2123 		(*ifp->if_ioctl)(ifp, cmd, data); /* XXX error check */
2124 		break;
2125 
2126 	case SIOCGIFGROUP:
2127 		if ((error = if_getgroup(data, ifp)))
2128 			return (error);
2129 		break;
2130 
2131 	case SIOCDIFGROUP:
2132 		if ((error = suser(p, 0)))
2133 			return (error);
2134 		(*ifp->if_ioctl)(ifp, cmd, data); /* XXX error check */
2135 		ifgr = (struct ifgroupreq *)data;
2136 		if ((error = if_delgroup(ifp, ifgr->ifgr_group)))
2137 			return (error);
2138 		break;
2139 
2140 	case SIOCSIFLLADDR:
2141 		if ((error = suser(p, 0)))
2142 			return (error);
2143 		sdl = ifp->if_sadl;
2144 		if (sdl == NULL)
2145 			return (EINVAL);
2146 		if (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN)
2147 			return (EINVAL);
2148 		if (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))
2149 			return (EINVAL);
2150 		switch (ifp->if_type) {
2151 		case IFT_ETHER:
2152 		case IFT_CARP:
2153 		case IFT_XETHER:
2154 		case IFT_ISO88025:
2155 			if_setlladdr(ifp, ifr->ifr_addr.sa_data);
2156 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2157 			if (error == ENOTTY)
2158 				error = 0;
2159 			break;
2160 		default:
2161 			return (ENODEV);
2162 		}
2163 
2164 		ifnewlladdr(ifp);
2165 		break;
2166 
2167 	case SIOCGIFLLPRIO:
2168 		ifr->ifr_llprio = ifp->if_llprio;
2169 		break;
2170 
2171 	case SIOCSIFLLPRIO:
2172 		if ((error = suser(p, 0)))
2173 			return (error);
2174 		if (ifr->ifr_llprio > UCHAR_MAX)
2175 			return (EINVAL);
2176 		ifp->if_llprio = ifr->ifr_llprio;
2177 		break;
2178 
2179 	default:
2180 		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
2181 			(struct mbuf *) cmd, (struct mbuf *) data,
2182 			(struct mbuf *) ifp, p));
2183 		break;
2184 	}
2185 
2186 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2187 		getmicrotime(&ifp->if_lastchange);
2188 
2189 	return (error);
2190 }
2191 
2192 /*
2193  * Return interface configuration
2194  * of system.  List may be used
2195  * in later ioctl's (above) to get
2196  * other information.
2197  */
2198 int
2199 ifconf(u_long cmd, caddr_t data)
2200 {
2201 	struct ifconf *ifc = (struct ifconf *)data;
2202 	struct ifnet *ifp;
2203 	struct ifaddr *ifa;
2204 	struct ifreq ifr, *ifrp;
2205 	int space = ifc->ifc_len, error = 0;
2206 
2207 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2208 	if (space == 0) {
2209 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
2210 			struct sockaddr *sa;
2211 
2212 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2213 				space += sizeof (ifr);
2214 			else
2215 				TAILQ_FOREACH(ifa,
2216 				    &ifp->if_addrlist, ifa_list) {
2217 					sa = ifa->ifa_addr;
2218 					if (sa->sa_len > sizeof(*sa))
2219 						space += sa->sa_len -
2220 						    sizeof(*sa);
2221 					space += sizeof(ifr);
2222 				}
2223 		}
2224 		ifc->ifc_len = space;
2225 		return (0);
2226 	}
2227 
2228 	ifrp = ifc->ifc_req;
2229 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2230 		if (space < sizeof(ifr))
2231 			break;
2232 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2233 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2234 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2235 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2236 			    sizeof(ifr));
2237 			if (error)
2238 				break;
2239 			space -= sizeof (ifr), ifrp++;
2240 		} else
2241 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2242 				struct sockaddr *sa = ifa->ifa_addr;
2243 
2244 				if (space < sizeof(ifr))
2245 					break;
2246 				if (sa->sa_len <= sizeof(*sa)) {
2247 					ifr.ifr_addr = *sa;
2248 					error = copyout((caddr_t)&ifr,
2249 					    (caddr_t)ifrp, sizeof (ifr));
2250 					ifrp++;
2251 				} else {
2252 					space -= sa->sa_len - sizeof(*sa);
2253 					if (space < sizeof (ifr))
2254 						break;
2255 					error = copyout((caddr_t)&ifr,
2256 					    (caddr_t)ifrp,
2257 					    sizeof(ifr.ifr_name));
2258 					if (error == 0)
2259 						error = copyout((caddr_t)sa,
2260 						    (caddr_t)&ifrp->ifr_addr,
2261 						    sa->sa_len);
2262 					ifrp = (struct ifreq *)(sa->sa_len +
2263 					    (caddr_t)&ifrp->ifr_addr);
2264 				}
2265 				if (error)
2266 					break;
2267 				space -= sizeof (ifr);
2268 			}
2269 	}
2270 	ifc->ifc_len -= space;
2271 	return (error);
2272 }
2273 
2274 void
2275 if_getdata(struct ifnet *ifp, struct if_data *data)
2276 {
2277 	unsigned int i;
2278 	struct ifqueue *ifq;
2279 	uint64_t opackets = 0;
2280 	uint64_t obytes = 0;
2281 	uint64_t omcasts = 0;
2282 	uint64_t oqdrops = 0;
2283 
2284 	for (i = 0; i < ifp->if_nifqs; i++) {
2285 		ifq = ifp->if_ifqs[i];
2286 
2287 		mtx_enter(&ifq->ifq_mtx);
2288 		opackets += ifq->ifq_packets;
2289 		obytes += ifq->ifq_bytes;
2290 		oqdrops += ifq->ifq_qdrops;
2291 		omcasts += ifq->ifq_mcasts;
2292 		mtx_leave(&ifq->ifq_mtx);
2293 		/* ifq->ifq_errors */
2294 	}
2295 
2296 	*data = ifp->if_data;
2297 	data->ifi_opackets += opackets;
2298 	data->ifi_obytes += obytes;
2299 	data->ifi_oqdrops += oqdrops;
2300 	data->ifi_omcasts += omcasts;
2301 	/* ifp->if_data.ifi_oerrors */
2302 }
2303 
2304 /*
2305  * Dummy functions replaced in ifnet during detach (if protocols decide to
2306  * fiddle with the if during detach.
2307  */
2308 void
2309 if_detached_qstart(struct ifqueue *ifq)
2310 {
2311 	ifq_purge(ifq);
2312 }
2313 
2314 int
2315 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2316 {
2317 	return ENODEV;
2318 }
2319 
2320 /*
2321  * Create interface group without members
2322  */
2323 struct ifg_group *
2324 if_creategroup(const char *groupname)
2325 {
2326 	struct ifg_group	*ifg;
2327 
2328 	if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
2329 		return (NULL);
2330 
2331 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2332 	ifg->ifg_refcnt = 0;
2333 	ifg->ifg_carp_demoted = 0;
2334 	TAILQ_INIT(&ifg->ifg_members);
2335 #if NPF > 0
2336 	pfi_attach_ifgroup(ifg);
2337 #endif
2338 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2339 
2340 	return (ifg);
2341 }
2342 
2343 /*
2344  * Add a group to an interface
2345  */
2346 int
2347 if_addgroup(struct ifnet *ifp, const char *groupname)
2348 {
2349 	struct ifg_list		*ifgl;
2350 	struct ifg_group	*ifg = NULL;
2351 	struct ifg_member	*ifgm;
2352 
2353 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
2354 	    groupname[strlen(groupname) - 1] <= '9')
2355 		return (EINVAL);
2356 
2357 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2358 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2359 			return (EEXIST);
2360 
2361 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
2362 		return (ENOMEM);
2363 
2364 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
2365 		free(ifgl, M_TEMP, sizeof(*ifgl));
2366 		return (ENOMEM);
2367 	}
2368 
2369 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2370 		if (!strcmp(ifg->ifg_group, groupname))
2371 			break;
2372 
2373 	if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
2374 		free(ifgl, M_TEMP, sizeof(*ifgl));
2375 		free(ifgm, M_TEMP, sizeof(*ifgm));
2376 		return (ENOMEM);
2377 	}
2378 
2379 	ifg->ifg_refcnt++;
2380 	ifgl->ifgl_group = ifg;
2381 	ifgm->ifgm_ifp = ifp;
2382 
2383 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2384 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2385 
2386 #if NPF > 0
2387 	pfi_group_change(groupname);
2388 #endif
2389 
2390 	return (0);
2391 }
2392 
2393 /*
2394  * Remove a group from an interface
2395  */
2396 int
2397 if_delgroup(struct ifnet *ifp, const char *groupname)
2398 {
2399 	struct ifg_list		*ifgl;
2400 	struct ifg_member	*ifgm;
2401 
2402 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2403 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2404 			break;
2405 	if (ifgl == NULL)
2406 		return (ENOENT);
2407 
2408 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2409 
2410 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2411 		if (ifgm->ifgm_ifp == ifp)
2412 			break;
2413 
2414 	if (ifgm != NULL) {
2415 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2416 		free(ifgm, M_TEMP, sizeof(*ifgm));
2417 	}
2418 
2419 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2420 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2421 #if NPF > 0
2422 		pfi_detach_ifgroup(ifgl->ifgl_group);
2423 #endif
2424 		free(ifgl->ifgl_group, M_TEMP, 0);
2425 	}
2426 
2427 	free(ifgl, M_TEMP, sizeof(*ifgl));
2428 
2429 #if NPF > 0
2430 	pfi_group_change(groupname);
2431 #endif
2432 
2433 	return (0);
2434 }
2435 
2436 /*
2437  * Stores all groups from an interface in memory pointed
2438  * to by data
2439  */
2440 int
2441 if_getgroup(caddr_t data, struct ifnet *ifp)
2442 {
2443 	int			 len, error;
2444 	struct ifg_list		*ifgl;
2445 	struct ifg_req		 ifgrq, *ifgp;
2446 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2447 
2448 	if (ifgr->ifgr_len == 0) {
2449 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2450 			ifgr->ifgr_len += sizeof(struct ifg_req);
2451 		return (0);
2452 	}
2453 
2454 	len = ifgr->ifgr_len;
2455 	ifgp = ifgr->ifgr_groups;
2456 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2457 		if (len < sizeof(ifgrq))
2458 			return (EINVAL);
2459 		bzero(&ifgrq, sizeof ifgrq);
2460 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2461 		    sizeof(ifgrq.ifgrq_group));
2462 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2463 		    sizeof(struct ifg_req))))
2464 			return (error);
2465 		len -= sizeof(ifgrq);
2466 		ifgp++;
2467 	}
2468 
2469 	return (0);
2470 }
2471 
2472 /*
2473  * Stores all members of a group in memory pointed to by data
2474  */
2475 int
2476 if_getgroupmembers(caddr_t data)
2477 {
2478 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2479 	struct ifg_group	*ifg;
2480 	struct ifg_member	*ifgm;
2481 	struct ifg_req		 ifgrq, *ifgp;
2482 	int			 len, error;
2483 
2484 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2485 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2486 			break;
2487 	if (ifg == NULL)
2488 		return (ENOENT);
2489 
2490 	if (ifgr->ifgr_len == 0) {
2491 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2492 			ifgr->ifgr_len += sizeof(ifgrq);
2493 		return (0);
2494 	}
2495 
2496 	len = ifgr->ifgr_len;
2497 	ifgp = ifgr->ifgr_groups;
2498 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
2499 		if (len < sizeof(ifgrq))
2500 			return (EINVAL);
2501 		bzero(&ifgrq, sizeof ifgrq);
2502 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
2503 		    sizeof(ifgrq.ifgrq_member));
2504 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2505 		    sizeof(struct ifg_req))))
2506 			return (error);
2507 		len -= sizeof(ifgrq);
2508 		ifgp++;
2509 	}
2510 
2511 	return (0);
2512 }
2513 
2514 int
2515 if_getgroupattribs(caddr_t data)
2516 {
2517 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2518 	struct ifg_group	*ifg;
2519 
2520 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2521 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2522 			break;
2523 	if (ifg == NULL)
2524 		return (ENOENT);
2525 
2526 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
2527 
2528 	return (0);
2529 }
2530 
2531 int
2532 if_setgroupattribs(caddr_t data)
2533 {
2534 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2535 	struct ifg_group	*ifg;
2536 	struct ifg_member	*ifgm;
2537 	int			 demote;
2538 
2539 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2540 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2541 			break;
2542 	if (ifg == NULL)
2543 		return (ENOENT);
2544 
2545 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
2546 	if (demote + ifg->ifg_carp_demoted > 0xff ||
2547 	    demote + ifg->ifg_carp_demoted < 0)
2548 		return (EINVAL);
2549 
2550 	ifg->ifg_carp_demoted += demote;
2551 
2552 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2553 		if (ifgm->ifgm_ifp->if_ioctl)
2554 			ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp,
2555 			    SIOCSIFGATTR, data);
2556 	return (0);
2557 }
2558 
2559 void
2560 if_group_routechange(struct sockaddr *dst, struct sockaddr *mask)
2561 {
2562 	switch (dst->sa_family) {
2563 	case AF_INET:
2564 		if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
2565 		    mask && (mask->sa_len == 0 ||
2566 		    satosin(mask)->sin_addr.s_addr == INADDR_ANY))
2567 			if_group_egress_build();
2568 		break;
2569 #ifdef INET6
2570 	case AF_INET6:
2571 		if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
2572 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
2573 		    IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
2574 		    &in6addr_any)))
2575 			if_group_egress_build();
2576 		break;
2577 #endif
2578 	}
2579 }
2580 
2581 int
2582 if_group_egress_build(void)
2583 {
2584 	struct ifnet		*ifp;
2585 	struct ifg_group	*ifg;
2586 	struct ifg_member	*ifgm, *next;
2587 	struct sockaddr_in	 sa_in;
2588 #ifdef INET6
2589 	struct sockaddr_in6	 sa_in6;
2590 #endif
2591 	struct rtentry		*rt;
2592 
2593 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2594 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
2595 			break;
2596 
2597 	if (ifg != NULL)
2598 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
2599 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
2600 
2601 	bzero(&sa_in, sizeof(sa_in));
2602 	sa_in.sin_len = sizeof(sa_in);
2603 	sa_in.sin_family = AF_INET;
2604 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
2605 	while (rt != NULL) {
2606 		ifp = if_get(rt->rt_ifidx);
2607 		if (ifp != NULL) {
2608 			if_addgroup(ifp, IFG_EGRESS);
2609 			if_put(ifp);
2610 		}
2611 		rt = rtable_iterate(rt);
2612 	}
2613 
2614 #ifdef INET6
2615 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
2616 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
2617 	    RTP_ANY);
2618 	while (rt != NULL) {
2619 		ifp = if_get(rt->rt_ifidx);
2620 		if (ifp != NULL) {
2621 			if_addgroup(ifp, IFG_EGRESS);
2622 			if_put(ifp);
2623 		}
2624 		rt = rtable_iterate(rt);
2625 	}
2626 #endif /* INET6 */
2627 
2628 	return (0);
2629 }
2630 
2631 /*
2632  * Set/clear promiscuous mode on interface ifp based on the truth value
2633  * of pswitch.  The calls are reference counted so that only the first
2634  * "on" request actually has an effect, as does the final "off" request.
2635  * Results are undefined if the "off" and "on" requests are not matched.
2636  */
2637 int
2638 ifpromisc(struct ifnet *ifp, int pswitch)
2639 {
2640 	struct ifreq ifr;
2641 
2642 	if (pswitch) {
2643 		/*
2644 		 * If the device is not configured up, we cannot put it in
2645 		 * promiscuous mode.
2646 		 */
2647 		if ((ifp->if_flags & IFF_UP) == 0)
2648 			return (ENETDOWN);
2649 		if (ifp->if_pcount++ != 0)
2650 			return (0);
2651 		ifp->if_flags |= IFF_PROMISC;
2652 	} else {
2653 		if (--ifp->if_pcount > 0)
2654 			return (0);
2655 		ifp->if_flags &= ~IFF_PROMISC;
2656 		/*
2657 		 * If the device is not configured up, we should not need to
2658 		 * turn off promiscuous mode (device should have turned it
2659 		 * off when interface went down; and will look at IFF_PROMISC
2660 		 * again next time interface comes up).
2661 		 */
2662 		if ((ifp->if_flags & IFF_UP) == 0)
2663 			return (0);
2664 	}
2665 	ifr.ifr_flags = ifp->if_flags;
2666 	return ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
2667 }
2668 
2669 int
2670 sysctl_mq(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2671     void *newp, size_t newlen, struct mbuf_queue *mq)
2672 {
2673 	/* All sysctl names at this level are terminal. */
2674 	if (namelen != 1)
2675 		return (ENOTDIR);
2676 
2677 	switch (name[0]) {
2678 	case IFQCTL_LEN:
2679 		return (sysctl_rdint(oldp, oldlenp, newp, mq_len(mq)));
2680 	case IFQCTL_MAXLEN:
2681 		return (sysctl_int(oldp, oldlenp, newp, newlen,
2682 		    &mq->mq_maxlen)); /* XXX directly accessing maxlen */
2683 	case IFQCTL_DROPS:
2684 		return (sysctl_rdint(oldp, oldlenp, newp, mq_drops(mq)));
2685 	default:
2686 		return (EOPNOTSUPP);
2687 	}
2688 	/* NOTREACHED */
2689 }
2690 
2691 void
2692 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
2693 {
2694 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
2695 }
2696 
2697 void
2698 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
2699 {
2700 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
2701 }
2702 
2703 void
2704 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
2705 {
2706 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
2707 		panic("ifa_update_broadaddr does not support dynamic length");
2708 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
2709 }
2710 
2711 #ifdef DDB
2712 /* debug function, can be called from ddb> */
2713 void
2714 ifa_print_all(void)
2715 {
2716 	struct ifnet *ifp;
2717 	struct ifaddr *ifa;
2718 
2719 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2720 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2721 			char addr[INET6_ADDRSTRLEN];
2722 
2723 			switch (ifa->ifa_addr->sa_family) {
2724 			case AF_INET:
2725 				printf("%s", inet_ntop(AF_INET,
2726 				    &satosin(ifa->ifa_addr)->sin_addr,
2727 				    addr, sizeof(addr)));
2728 				break;
2729 #ifdef INET6
2730 			case AF_INET6:
2731 				printf("%s", inet_ntop(AF_INET6,
2732 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
2733 				    addr, sizeof(addr)));
2734 				break;
2735 #endif
2736 			}
2737 			printf(" on %s\n", ifp->if_xname);
2738 		}
2739 	}
2740 }
2741 #endif /* DDB */
2742 
2743 void
2744 ifnewlladdr(struct ifnet *ifp)
2745 {
2746 #ifdef INET6
2747 	struct ifaddr *ifa;
2748 #endif
2749 	struct ifreq ifrq;
2750 	short up;
2751 	int s;
2752 
2753 	s = splnet();
2754 	up = ifp->if_flags & IFF_UP;
2755 
2756 	if (up) {
2757 		/* go down for a moment... */
2758 		ifp->if_flags &= ~IFF_UP;
2759 		ifrq.ifr_flags = ifp->if_flags;
2760 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
2761 	}
2762 
2763 	ifp->if_flags |= IFF_UP;
2764 	ifrq.ifr_flags = ifp->if_flags;
2765 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
2766 
2767 #ifdef INET6
2768 	/*
2769 	 * Update the link-local address.  Don't do it if we're
2770 	 * a router to avoid confusing hosts on the network.
2771 	 */
2772 	if (!ip6_forwarding) {
2773 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
2774 		if (ifa) {
2775 			in6_purgeaddr(ifa);
2776 			dohooks(ifp->if_addrhooks, 0);
2777 			in6_ifattach(ifp);
2778 		}
2779 	}
2780 #endif
2781 	if (!up) {
2782 		/* go back down */
2783 		ifp->if_flags &= ~IFF_UP;
2784 		ifrq.ifr_flags = ifp->if_flags;
2785 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
2786 	}
2787 	splx(s);
2788 }
2789 
2790 int net_ticks;
2791 u_int net_livelocks;
2792 
2793 void
2794 net_tick(void *null)
2795 {
2796 	extern int ticks;
2797 
2798 	if (ticks - net_ticks > 1)
2799 		net_livelocks++;
2800 
2801 	net_ticks = ticks;
2802 
2803 	timeout_add(&net_tick_to, 1);
2804 }
2805 
2806 int
2807 net_livelocked(void)
2808 {
2809 	extern int ticks;
2810 
2811 	return (ticks - net_ticks > 1);
2812 }
2813 
2814 void
2815 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
2816 {
2817 	extern int ticks;
2818 
2819 	memset(rxr, 0, sizeof(*rxr));
2820 
2821 	rxr->rxr_adjusted = ticks;
2822 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
2823 	rxr->rxr_hwm = hwm;
2824 }
2825 
2826 static inline void
2827 if_rxr_adjust_cwm(struct if_rxring *rxr)
2828 {
2829 	extern int ticks;
2830 
2831 	if (net_livelocked()) {
2832 		if (rxr->rxr_cwm > rxr->rxr_lwm)
2833 			rxr->rxr_cwm--;
2834 		else
2835 			return;
2836 	} else if (rxr->rxr_alive >= rxr->rxr_lwm)
2837 		return;
2838 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
2839 		rxr->rxr_cwm++;
2840 
2841 	rxr->rxr_adjusted = ticks;
2842 }
2843 
2844 u_int
2845 if_rxr_get(struct if_rxring *rxr, u_int max)
2846 {
2847 	extern int ticks;
2848 	u_int diff;
2849 
2850 	if (ticks - rxr->rxr_adjusted >= 1) {
2851 		/* we're free to try for an adjustment */
2852 		if_rxr_adjust_cwm(rxr);
2853 	}
2854 
2855 	if (rxr->rxr_alive >= rxr->rxr_cwm)
2856 		return (0);
2857 
2858 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
2859 	rxr->rxr_alive += diff;
2860 
2861 	return (diff);
2862 }
2863 
2864 int
2865 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
2866 {
2867 	struct if_rxrinfo kifri;
2868 	int error;
2869 	u_int n;
2870 
2871 	error = copyin(uifri, &kifri, sizeof(kifri));
2872 	if (error)
2873 		return (error);
2874 
2875 	n = min(t, kifri.ifri_total);
2876 	kifri.ifri_total = t;
2877 
2878 	if (n > 0) {
2879 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
2880 		if (error)
2881 			return (error);
2882 	}
2883 
2884 	return (copyout(&kifri, uifri, sizeof(kifri)));
2885 }
2886 
2887 int
2888 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
2889     struct if_rxring *rxr)
2890 {
2891 	struct if_rxring_info ifr;
2892 
2893 	memset(&ifr, 0, sizeof(ifr));
2894 
2895 	if (name != NULL)
2896 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
2897 
2898 	ifr.ifr_size = size;
2899 	ifr.ifr_info = *rxr;
2900 
2901 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
2902 }
2903 
2904 /*
2905  * Network stack input queues.
2906  */
2907 
2908 void
2909 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
2910 {
2911 	mq_init(&niq->ni_q, maxlen, IPL_NET);
2912 	niq->ni_isr = isr;
2913 }
2914 
2915 int
2916 niq_enqueue(struct niqueue *niq, struct mbuf *m)
2917 {
2918 	int rv;
2919 
2920 	rv = mq_enqueue(&niq->ni_q, m);
2921 	if (rv == 0)
2922 		schednetisr(niq->ni_isr);
2923 	else
2924 		if_congestion();
2925 
2926 	return (rv);
2927 }
2928 
2929 int
2930 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
2931 {
2932 	int rv;
2933 
2934 	rv = mq_enlist(&niq->ni_q, ml);
2935 	if (rv == 0)
2936 		schednetisr(niq->ni_isr);
2937 	else
2938 		if_congestion();
2939 
2940 	return (rv);
2941 }
2942 
2943 __dead void
2944 unhandled_af(int af)
2945 {
2946 	panic("unhandled af %d", af);
2947 }
2948