xref: /openbsd-src/sys/net/if.c (revision 9f11ffb7133c203312a01e4b986886bc88c7d74b)
1 /*	$OpenBSD: if.c,v 1.571 2019/01/09 01:14:21 dlg Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "pfsync.h"
70 #include "ppp.h"
71 #include "pppoe.h"
72 #include "switch.h"
73 #include "trunk.h"
74 
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/mbuf.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/timeout.h>
81 #include <sys/protosw.h>
82 #include <sys/kernel.h>
83 #include <sys/ioctl.h>
84 #include <sys/domain.h>
85 #include <sys/task.h>
86 #include <sys/atomic.h>
87 #include <sys/percpu.h>
88 #include <sys/proc.h>
89 
90 #include <dev/rndvar.h>
91 
92 #include <net/if.h>
93 #include <net/if_dl.h>
94 #include <net/if_types.h>
95 #include <net/route.h>
96 #include <net/netisr.h>
97 
98 #include <netinet/in.h>
99 #include <netinet/if_ether.h>
100 #include <netinet/igmp.h>
101 #ifdef MROUTING
102 #include <netinet/ip_mroute.h>
103 #endif
104 
105 #ifdef INET6
106 #include <netinet6/in6_var.h>
107 #include <netinet6/in6_ifattach.h>
108 #include <netinet6/nd6.h>
109 #include <netinet/ip6.h>
110 #include <netinet6/ip6_var.h>
111 #endif
112 
113 #ifdef MPLS
114 #include <netmpls/mpls.h>
115 #endif
116 
117 #if NBPFILTER > 0
118 #include <net/bpf.h>
119 #endif
120 
121 #if NBRIDGE > 0
122 #include <net/if_bridge.h>
123 #endif
124 
125 #if NCARP > 0
126 #include <netinet/ip_carp.h>
127 #endif
128 
129 #if NPF > 0
130 #include <net/pfvar.h>
131 #endif
132 
133 #include <sys/device.h>
134 
135 void	if_attachsetup(struct ifnet *);
136 void	if_attachdomain(struct ifnet *);
137 void	if_attach_common(struct ifnet *);
138 int	if_createrdomain(int, struct ifnet *);
139 int	if_setrdomain(struct ifnet *, int);
140 void	if_slowtimo(void *);
141 
142 void	if_detached_qstart(struct ifqueue *);
143 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
144 
145 int	ifioctl_get(u_long, caddr_t);
146 int	ifconf(caddr_t);
147 
148 int	if_getgroup(caddr_t, struct ifnet *);
149 int	if_getgroupmembers(caddr_t);
150 int	if_getgroupattribs(caddr_t);
151 int	if_setgroupattribs(caddr_t);
152 int	if_getgrouplist(caddr_t);
153 
154 void	if_linkstate(struct ifnet *);
155 void	if_linkstate_task(void *);
156 
157 int	if_clone_list(struct if_clonereq *);
158 struct if_clone	*if_clone_lookup(const char *, int *);
159 
160 int	if_group_egress_build(void);
161 
162 void	if_watchdog_task(void *);
163 
164 void	if_netisr(void *);
165 
166 #ifdef DDB
167 void	ifa_print_all(void);
168 #endif
169 
170 void	if_qstart_compat(struct ifqueue *);
171 
172 /*
173  * interface index map
174  *
175  * the kernel maintains a mapping of interface indexes to struct ifnet
176  * pointers.
177  *
178  * the map is an array of struct ifnet pointers prefixed by an if_map
179  * structure. the if_map structure stores the length of its array.
180  *
181  * as interfaces are attached to the system, the map is grown on demand
182  * up to USHRT_MAX entries.
183  *
184  * interface index 0 is reserved and represents no interface. this
185  * supports the use of the interface index as the scope for IPv6 link
186  * local addresses, where scope 0 means no scope has been specified.
187  * it also supports the use of interface index as the unique identifier
188  * for network interfaces in SNMP applications as per RFC2863. therefore
189  * if_get(0) returns NULL.
190  */
191 
192 void if_ifp_dtor(void *, void *);
193 void if_map_dtor(void *, void *);
194 struct ifnet *if_ref(struct ifnet *);
195 
196 /*
197  * struct if_map
198  *
199  * bounded array of ifnet srp pointers used to fetch references of live
200  * interfaces with if_get().
201  */
202 
203 struct if_map {
204 	unsigned long		 limit;
205 	/* followed by limit ifnet srp pointers */
206 };
207 
208 /*
209  * struct if_idxmap
210  *
211  * infrastructure to manage updates and accesses to the current if_map.
212  */
213 
214 struct if_idxmap {
215 	unsigned int		 serial;
216 	unsigned int		 count;
217 	struct srp		 map;
218 };
219 
220 void	if_idxmap_init(unsigned int);
221 void	if_idxmap_insert(struct ifnet *);
222 void	if_idxmap_remove(struct ifnet *);
223 
224 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
225 
226 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
227 int if_cloners_count;
228 
229 struct timeout net_tick_to;
230 void	net_tick(void *);
231 int	net_livelocked(void);
232 int	ifq_congestion;
233 
234 int		 netisr;
235 
236 #define	NET_TASKQ	1
237 struct taskq	*nettqmp[NET_TASKQ];
238 
239 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
240 
241 /*
242  * Serialize socket operations to ensure no new sleeping points
243  * are introduced in IP output paths.
244  */
245 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
246 
247 /*
248  * Network interface utility routines.
249  */
250 void
251 ifinit(void)
252 {
253 	unsigned int	i;
254 
255 	/*
256 	 * most machines boot with 4 or 5 interfaces, so size the initial map
257 	 * to accomodate this
258 	 */
259 	if_idxmap_init(8);
260 
261 	timeout_set(&net_tick_to, net_tick, &net_tick_to);
262 
263 	for (i = 0; i < NET_TASKQ; i++) {
264 		nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
265 		if (nettqmp[i] == NULL)
266 			panic("unable to create network taskq %d", i);
267 	}
268 
269 	net_tick(&net_tick_to);
270 }
271 
272 static struct if_idxmap if_idxmap = {
273 	0,
274 	0,
275 	SRP_INITIALIZER()
276 };
277 
278 struct srp_gc if_ifp_gc = SRP_GC_INITIALIZER(if_ifp_dtor, NULL);
279 struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
280 
281 struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
282 
283 void
284 if_idxmap_init(unsigned int limit)
285 {
286 	struct if_map *if_map;
287 	struct srp *map;
288 	unsigned int i;
289 
290 	if_idxmap.serial = 1; /* skip ifidx 0 so it can return NULL */
291 
292 	if_map = malloc(sizeof(*if_map) + limit * sizeof(*map),
293 	    M_IFADDR, M_WAITOK);
294 
295 	if_map->limit = limit;
296 	map = (struct srp *)(if_map + 1);
297 	for (i = 0; i < limit; i++)
298 		srp_init(&map[i]);
299 
300 	/* this is called early so there's nothing to race with */
301 	srp_update_locked(&if_map_gc, &if_idxmap.map, if_map);
302 }
303 
304 void
305 if_idxmap_insert(struct ifnet *ifp)
306 {
307 	struct if_map *if_map;
308 	struct srp *map;
309 	unsigned int index, i;
310 
311 	refcnt_init(&ifp->if_refcnt);
312 
313 	/* the kernel lock guarantees serialised modifications to if_idxmap */
314 	KERNEL_ASSERT_LOCKED();
315 
316 	if (++if_idxmap.count > USHRT_MAX)
317 		panic("too many interfaces");
318 
319 	if_map = srp_get_locked(&if_idxmap.map);
320 	map = (struct srp *)(if_map + 1);
321 
322 	index = if_idxmap.serial++ & USHRT_MAX;
323 
324 	if (index >= if_map->limit) {
325 		struct if_map *nif_map;
326 		struct srp *nmap;
327 		unsigned int nlimit;
328 		struct ifnet *nifp;
329 
330 		nlimit = if_map->limit * 2;
331 		nif_map = malloc(sizeof(*nif_map) + nlimit * sizeof(*nmap),
332 		    M_IFADDR, M_WAITOK);
333 		nmap = (struct srp *)(nif_map + 1);
334 
335 		nif_map->limit = nlimit;
336 		for (i = 0; i < if_map->limit; i++) {
337 			srp_init(&nmap[i]);
338 			nifp = srp_get_locked(&map[i]);
339 			if (nifp != NULL) {
340 				srp_update_locked(&if_ifp_gc, &nmap[i],
341 				    if_ref(nifp));
342 			}
343 		}
344 
345 		while (i < nlimit) {
346 			srp_init(&nmap[i]);
347 			i++;
348 		}
349 
350 		srp_update_locked(&if_map_gc, &if_idxmap.map, nif_map);
351 		if_map = nif_map;
352 		map = nmap;
353 	}
354 
355 	/* pick the next free index */
356 	for (i = 0; i < USHRT_MAX; i++) {
357 		if (index != 0 && srp_get_locked(&map[index]) == NULL)
358 			break;
359 
360 		index = if_idxmap.serial++ & USHRT_MAX;
361 	}
362 
363 	/* commit */
364 	ifp->if_index = index;
365 	srp_update_locked(&if_ifp_gc, &map[index], if_ref(ifp));
366 }
367 
368 void
369 if_idxmap_remove(struct ifnet *ifp)
370 {
371 	struct if_map *if_map;
372 	struct srp *map;
373 	unsigned int index;
374 
375 	index = ifp->if_index;
376 
377 	/* the kernel lock guarantees serialised modifications to if_idxmap */
378 	KERNEL_ASSERT_LOCKED();
379 
380 	if_map = srp_get_locked(&if_idxmap.map);
381 	KASSERT(index < if_map->limit);
382 
383 	map = (struct srp *)(if_map + 1);
384 	KASSERT(ifp == (struct ifnet *)srp_get_locked(&map[index]));
385 
386 	srp_update_locked(&if_ifp_gc, &map[index], NULL);
387 	if_idxmap.count--;
388 	/* end of if_idxmap modifications */
389 
390 	/* sleep until the last reference is released */
391 	refcnt_finalize(&ifp->if_refcnt, "ifidxrm");
392 }
393 
394 void
395 if_ifp_dtor(void *null, void *ifp)
396 {
397 	if_put(ifp);
398 }
399 
400 void
401 if_map_dtor(void *null, void *m)
402 {
403 	struct if_map *if_map = m;
404 	struct srp *map = (struct srp *)(if_map + 1);
405 	unsigned int i;
406 
407 	/*
408 	 * dont need to serialize the use of update_locked since this is
409 	 * the last reference to this map. there's nothing to race against.
410 	 */
411 	for (i = 0; i < if_map->limit; i++)
412 		srp_update_locked(&if_ifp_gc, &map[i], NULL);
413 
414 	free(if_map, M_IFADDR, sizeof(*if_map) + if_map->limit * sizeof(*map));
415 }
416 
417 /*
418  * Attach an interface to the
419  * list of "active" interfaces.
420  */
421 void
422 if_attachsetup(struct ifnet *ifp)
423 {
424 	unsigned long ifidx;
425 
426 	NET_ASSERT_LOCKED();
427 
428 	TAILQ_INIT(&ifp->if_groups);
429 
430 	if_addgroup(ifp, IFG_ALL);
431 
432 	if_attachdomain(ifp);
433 #if NPF > 0
434 	pfi_attach_ifnet(ifp);
435 #endif
436 
437 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
438 	if_slowtimo(ifp);
439 
440 	if_idxmap_insert(ifp);
441 	KASSERT(if_get(0) == NULL);
442 
443 	ifidx = ifp->if_index;
444 
445 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
446 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
447 
448 	/* Announce the interface. */
449 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
450 }
451 
452 /*
453  * Allocate the link level name for the specified interface.  This
454  * is an attachment helper.  It must be called after ifp->if_addrlen
455  * is initialized, which may not be the case when if_attach() is
456  * called.
457  */
458 void
459 if_alloc_sadl(struct ifnet *ifp)
460 {
461 	unsigned int socksize;
462 	int namelen, masklen;
463 	struct sockaddr_dl *sdl;
464 
465 	/*
466 	 * If the interface already has a link name, release it
467 	 * now.  This is useful for interfaces that can change
468 	 * link types, and thus switch link names often.
469 	 */
470 	if (ifp->if_sadl != NULL)
471 		if_free_sadl(ifp);
472 
473 	namelen = strlen(ifp->if_xname);
474 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
475 	socksize = masklen + ifp->if_addrlen;
476 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
477 	if (socksize < sizeof(*sdl))
478 		socksize = sizeof(*sdl);
479 	socksize = ROUNDUP(socksize);
480 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
481 	sdl->sdl_len = socksize;
482 	sdl->sdl_family = AF_LINK;
483 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
484 	sdl->sdl_nlen = namelen;
485 	sdl->sdl_alen = ifp->if_addrlen;
486 	sdl->sdl_index = ifp->if_index;
487 	sdl->sdl_type = ifp->if_type;
488 	ifp->if_sadl = sdl;
489 }
490 
491 /*
492  * Free the link level name for the specified interface.  This is
493  * a detach helper.  This is called from if_detach() or from
494  * link layer type specific detach functions.
495  */
496 void
497 if_free_sadl(struct ifnet *ifp)
498 {
499 	free(ifp->if_sadl, M_IFADDR, 0);
500 	ifp->if_sadl = NULL;
501 }
502 
503 void
504 if_attachdomain(struct ifnet *ifp)
505 {
506 	struct domain *dp;
507 	int i, s;
508 
509 	s = splnet();
510 
511 	/* address family dependent data region */
512 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
513 	for (i = 0; (dp = domains[i]) != NULL; i++) {
514 		if (dp->dom_ifattach)
515 			ifp->if_afdata[dp->dom_family] =
516 			    (*dp->dom_ifattach)(ifp);
517 	}
518 
519 	splx(s);
520 }
521 
522 void
523 if_attachhead(struct ifnet *ifp)
524 {
525 	if_attach_common(ifp);
526 	NET_LOCK();
527 	TAILQ_INSERT_HEAD(&ifnet, ifp, if_list);
528 	if_attachsetup(ifp);
529 	NET_UNLOCK();
530 }
531 
532 void
533 if_attach(struct ifnet *ifp)
534 {
535 	if_attach_common(ifp);
536 	NET_LOCK();
537 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
538 	if_attachsetup(ifp);
539 	NET_UNLOCK();
540 }
541 
542 void
543 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
544 {
545 	struct ifqueue **map;
546 	struct ifqueue *ifq;
547 	int i;
548 
549 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
550 	KASSERT(nqs != 0);
551 
552 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
553 
554 	ifp->if_snd.ifq_softc = NULL;
555 	map[0] = &ifp->if_snd;
556 
557 	for (i = 1; i < nqs; i++) {
558 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
559 		ifq_set_maxlen(ifq, ifp->if_snd.ifq_maxlen);
560 		ifq_init(ifq, ifp, i);
561 		map[i] = ifq;
562 	}
563 
564 	ifp->if_ifqs = map;
565 	ifp->if_nifqs = nqs;
566 }
567 
568 void
569 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
570 {
571 	struct ifiqueue **map;
572 	struct ifiqueue *ifiq;
573 	unsigned int i;
574 
575 	KASSERT(niqs != 0);
576 
577 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
578 
579 	ifp->if_rcv.ifiq_softc = NULL;
580 	map[0] = &ifp->if_rcv;
581 
582 	for (i = 1; i < niqs; i++) {
583 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
584 		ifiq_init(ifiq, ifp, i);
585 		map[i] = ifiq;
586 	}
587 
588 	ifp->if_iqs = map;
589 	ifp->if_niqs = niqs;
590 }
591 
592 void
593 if_attach_common(struct ifnet *ifp)
594 {
595 	KASSERT(ifp->if_ioctl != NULL);
596 
597 	TAILQ_INIT(&ifp->if_addrlist);
598 	TAILQ_INIT(&ifp->if_maddrlist);
599 
600 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
601 		KASSERTMSG(ifp->if_qstart == NULL,
602 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
603 		ifp->if_qstart = if_qstart_compat;
604 	} else {
605 		KASSERTMSG(ifp->if_start == NULL,
606 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
607 		KASSERTMSG(ifp->if_qstart != NULL,
608 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
609 	}
610 
611 	ifq_init(&ifp->if_snd, ifp, 0);
612 
613 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
614 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
615 	ifp->if_nifqs = 1;
616 
617 	ifiq_init(&ifp->if_rcv, ifp, 0);
618 
619 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
620 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
621 	ifp->if_niqs = 1;
622 
623 	ifp->if_addrhooks = malloc(sizeof(*ifp->if_addrhooks),
624 	    M_TEMP, M_WAITOK);
625 	TAILQ_INIT(ifp->if_addrhooks);
626 	ifp->if_linkstatehooks = malloc(sizeof(*ifp->if_linkstatehooks),
627 	    M_TEMP, M_WAITOK);
628 	TAILQ_INIT(ifp->if_linkstatehooks);
629 	ifp->if_detachhooks = malloc(sizeof(*ifp->if_detachhooks),
630 	    M_TEMP, M_WAITOK);
631 	TAILQ_INIT(ifp->if_detachhooks);
632 
633 	if (ifp->if_rtrequest == NULL)
634 		ifp->if_rtrequest = if_rtrequest_dummy;
635 	if (ifp->if_enqueue == NULL)
636 		ifp->if_enqueue = if_enqueue_ifq;
637 	ifp->if_llprio = IFQ_DEFPRIO;
638 
639 	SRPL_INIT(&ifp->if_inputs);
640 }
641 
642 void
643 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
644 {
645 	/*
646 	 * only switch the ifq_ops on the first ifq on an interface.
647 	 *
648 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
649 	 * works on a single ifq. because the code uses the ifq_ops
650 	 * on the first ifq (if_snd) to select a queue for an mbuf,
651 	 * by switching only the first one we change both the algorithm
652 	 * and force the routing of all new packets to it.
653 	 */
654 	ifq_attach(&ifp->if_snd, newops, args);
655 }
656 
657 void
658 if_start(struct ifnet *ifp)
659 {
660 	KASSERT(ifp->if_qstart == if_qstart_compat);
661 	if_qstart_compat(&ifp->if_snd);
662 }
663 void
664 if_qstart_compat(struct ifqueue *ifq)
665 {
666 	struct ifnet *ifp = ifq->ifq_if;
667 	int s;
668 
669 	/*
670 	 * the stack assumes that an interface can have multiple
671 	 * transmit rings, but a lot of drivers are still written
672 	 * so that interfaces and send rings have a 1:1 mapping.
673 	 * this provides compatability between the stack and the older
674 	 * drivers by translating from the only queue they have
675 	 * (ifp->if_snd) back to the interface and calling if_start.
676  	 */
677 
678 	KERNEL_LOCK();
679 	s = splnet();
680 	(*ifp->if_start)(ifp);
681 	splx(s);
682 	KERNEL_UNLOCK();
683 }
684 
685 int
686 if_enqueue(struct ifnet *ifp, struct mbuf *m)
687 {
688 #if NPF > 0
689 	if (m->m_pkthdr.pf.delay > 0)
690 		return (pf_delay_pkt(m, ifp->if_index));
691 #endif
692 
693 #if NBRIDGE > 0
694 	if (ifp->if_bridgeport && (m->m_flags & M_PROTO1) == 0) {
695 		int error;
696 
697 		KERNEL_LOCK();
698 		error = bridge_output(ifp, m, NULL, NULL);
699 		KERNEL_UNLOCK();
700 		return (error);
701 	}
702 #endif
703 
704 #if NPF > 0
705 	pf_pkt_addr_changed(m);
706 #endif	/* NPF > 0 */
707 
708 	return ((*ifp->if_enqueue)(ifp, m));
709 }
710 
711 int
712 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
713 {
714 	struct ifqueue *ifq = &ifp->if_snd;
715 	int error;
716 
717 	if (ifp->if_nifqs > 1) {
718 		unsigned int idx;
719 
720 		/*
721 		 * use the operations on the first ifq to pick which of
722 		 * the array gets this mbuf.
723 		 */
724 
725 		idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
726 		ifq = ifp->if_ifqs[idx];
727 	}
728 
729 	error = ifq_enqueue(ifq, m);
730 	if (error)
731 		return (error);
732 
733 	ifq_start(ifq);
734 
735 	return (0);
736 }
737 
738 void
739 if_input(struct ifnet *ifp, struct mbuf_list *ml)
740 {
741 	ifiq_input(&ifp->if_rcv, ml, 2048);
742 }
743 
744 int
745 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
746 {
747 #if NBPFILTER > 0
748 	/*
749 	 * Only send packets to bpf if they are destinated to local
750 	 * addresses.
751 	 *
752 	 * if_input_local() is also called for SIMPLEX interfaces to
753 	 * duplicate packets for local use.  But don't dup them to bpf.
754 	 */
755 	if (ifp->if_flags & IFF_LOOPBACK) {
756 		caddr_t if_bpf = ifp->if_bpf;
757 
758 		if (if_bpf)
759 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
760 	}
761 #endif
762 	m_resethdr(m);
763 	m->m_flags |= M_LOOP;
764 	m->m_pkthdr.ph_ifidx = ifp->if_index;
765 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
766 
767 	ifp->if_opackets++;
768 	ifp->if_obytes += m->m_pkthdr.len;
769 
770 	ifp->if_ipackets++;
771 	ifp->if_ibytes += m->m_pkthdr.len;
772 
773 	switch (af) {
774 	case AF_INET:
775 		ipv4_input(ifp, m);
776 		break;
777 #ifdef INET6
778 	case AF_INET6:
779 		ipv6_input(ifp, m);
780 		break;
781 #endif /* INET6 */
782 #ifdef MPLS
783 	case AF_MPLS:
784 		mpls_input(ifp, m);
785 		break;
786 #endif /* MPLS */
787 	default:
788 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
789 		m_freem(m);
790 		return (EAFNOSUPPORT);
791 	}
792 
793 	return (0);
794 }
795 
796 int
797 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
798 {
799 	struct ifiqueue *ifiq;
800 	unsigned int flow = 0;
801 
802 	m->m_pkthdr.ph_family = af;
803 	m->m_pkthdr.ph_ifidx = ifp->if_index;
804 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
805 
806 	if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID))
807 		flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK;
808 
809 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
810 
811 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
812 }
813 
814 struct ifih {
815 	SRPL_ENTRY(ifih)	  ifih_next;
816 	int			(*ifih_input)(struct ifnet *, struct mbuf *,
817 				      void *);
818 	void			 *ifih_cookie;
819 	int			  ifih_refcnt;
820 	struct refcnt		  ifih_srpcnt;
821 };
822 
823 void	if_ih_ref(void *, void *);
824 void	if_ih_unref(void *, void *);
825 
826 struct srpl_rc ifih_rc = SRPL_RC_INITIALIZER(if_ih_ref, if_ih_unref, NULL);
827 
828 void
829 if_ih_insert(struct ifnet *ifp, int (*input)(struct ifnet *, struct mbuf *,
830     void *), void *cookie)
831 {
832 	struct ifih *ifih;
833 
834 	/* the kernel lock guarantees serialised modifications to if_inputs */
835 	KERNEL_ASSERT_LOCKED();
836 
837 	SRPL_FOREACH_LOCKED(ifih, &ifp->if_inputs, ifih_next) {
838 		if (ifih->ifih_input == input && ifih->ifih_cookie == cookie) {
839 			ifih->ifih_refcnt++;
840 			break;
841 		}
842 	}
843 
844 	if (ifih == NULL) {
845 		ifih = malloc(sizeof(*ifih), M_DEVBUF, M_WAITOK);
846 
847 		ifih->ifih_input = input;
848 		ifih->ifih_cookie = cookie;
849 		ifih->ifih_refcnt = 1;
850 		refcnt_init(&ifih->ifih_srpcnt);
851 		SRPL_INSERT_HEAD_LOCKED(&ifih_rc, &ifp->if_inputs,
852 		    ifih, ifih_next);
853 	}
854 }
855 
856 void
857 if_ih_ref(void *null, void *i)
858 {
859 	struct ifih *ifih = i;
860 
861 	refcnt_take(&ifih->ifih_srpcnt);
862 }
863 
864 void
865 if_ih_unref(void *null, void *i)
866 {
867 	struct ifih *ifih = i;
868 
869 	refcnt_rele_wake(&ifih->ifih_srpcnt);
870 }
871 
872 void
873 if_ih_remove(struct ifnet *ifp, int (*input)(struct ifnet *, struct mbuf *,
874     void *), void *cookie)
875 {
876 	struct ifih *ifih;
877 
878 	/* the kernel lock guarantees serialised modifications to if_inputs */
879 	KERNEL_ASSERT_LOCKED();
880 
881 	SRPL_FOREACH_LOCKED(ifih, &ifp->if_inputs, ifih_next) {
882 		if (ifih->ifih_input == input && ifih->ifih_cookie == cookie)
883 			break;
884 	}
885 
886 	KASSERT(ifih != NULL);
887 
888 	if (--ifih->ifih_refcnt == 0) {
889 		SRPL_REMOVE_LOCKED(&ifih_rc, &ifp->if_inputs, ifih,
890 		    ifih, ifih_next);
891 
892 		refcnt_finalize(&ifih->ifih_srpcnt, "ifihrm");
893 		free(ifih, M_DEVBUF, sizeof(*ifih));
894 	}
895 }
896 
897 void
898 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
899 {
900 	struct mbuf *m;
901 	struct ifih *ifih;
902 	struct srp_ref sr;
903 	int s;
904 
905 	if (ml_empty(ml))
906 		return;
907 
908 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
909 		enqueue_randomness(ml_len(ml));
910 
911 	/*
912 	 * We grab the NET_LOCK() before processing any packet to
913 	 * ensure there's no contention on the routing table lock.
914 	 *
915 	 * Without it we could race with a userland thread to insert
916 	 * a L2 entry in ip{6,}_output().  Such race would result in
917 	 * one of the threads sleeping *inside* the IP output path.
918 	 *
919 	 * Since we have a NET_LOCK() we also use it to serialize access
920 	 * to PF globals, pipex globals, unicast and multicast addresses
921 	 * lists.
922 	 */
923 	NET_RLOCK();
924 	s = splnet();
925 	while ((m = ml_dequeue(ml)) != NULL) {
926 		/*
927 		 * Pass this mbuf to all input handlers of its
928 		 * interface until it is consumed.
929 		 */
930 		SRPL_FOREACH(ifih, &sr, &ifp->if_inputs, ifih_next) {
931 			if ((*ifih->ifih_input)(ifp, m, ifih->ifih_cookie))
932 				break;
933 		}
934 		SRPL_LEAVE(&sr);
935 
936 		if (ifih == NULL)
937 			m_freem(m);
938 	}
939 	splx(s);
940 	NET_RUNLOCK();
941 }
942 
943 void
944 if_netisr(void *unused)
945 {
946 	int n, t = 0;
947 
948 	NET_LOCK();
949 
950 	while ((n = netisr) != 0) {
951 		/* Like sched_pause() but with a rwlock dance. */
952 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
953 			NET_UNLOCK();
954 			yield();
955 			NET_LOCK();
956 		}
957 
958 		atomic_clearbits_int(&netisr, n);
959 
960 #if NETHER > 0
961 		if (n & (1 << NETISR_ARP)) {
962 			KERNEL_LOCK();
963 			arpintr();
964 			KERNEL_UNLOCK();
965 		}
966 #endif
967 		if (n & (1 << NETISR_IP))
968 			ipintr();
969 #ifdef INET6
970 		if (n & (1 << NETISR_IPV6))
971 			ip6intr();
972 #endif
973 #if NPPP > 0
974 		if (n & (1 << NETISR_PPP)) {
975 			KERNEL_LOCK();
976 			pppintr();
977 			KERNEL_UNLOCK();
978 		}
979 #endif
980 #if NBRIDGE > 0
981 		if (n & (1 << NETISR_BRIDGE))
982 			bridgeintr();
983 #endif
984 #if NSWITCH > 0
985 		if (n & (1 << NETISR_SWITCH)) {
986 			KERNEL_LOCK();
987 			switchintr();
988 			KERNEL_UNLOCK();
989 		}
990 #endif
991 #if NPPPOE > 0
992 		if (n & (1 << NETISR_PPPOE)) {
993 			KERNEL_LOCK();
994 			pppoeintr();
995 			KERNEL_UNLOCK();
996 		}
997 #endif
998 #ifdef PIPEX
999 		if (n & (1 << NETISR_PIPEX)) {
1000 			KERNEL_LOCK();
1001 			pipexintr();
1002 			KERNEL_UNLOCK();
1003 		}
1004 #endif
1005 		t |= n;
1006 	}
1007 
1008 #if NPFSYNC > 0
1009 	if (t & (1 << NETISR_PFSYNC)) {
1010 		KERNEL_LOCK();
1011 		pfsyncintr();
1012 		KERNEL_UNLOCK();
1013 	}
1014 #endif
1015 
1016 	NET_UNLOCK();
1017 }
1018 
1019 void
1020 if_deactivate(struct ifnet *ifp)
1021 {
1022 	NET_LOCK();
1023 	/*
1024 	 * Call detach hooks from head to tail.  To make sure detach
1025 	 * hooks are executed in the reverse order they were added, all
1026 	 * the hooks have to be added to the head!
1027 	 */
1028 	dohooks(ifp->if_detachhooks, HOOK_REMOVE | HOOK_FREE);
1029 
1030 	NET_UNLOCK();
1031 }
1032 
1033 /*
1034  * Detach an interface from everything in the kernel.  Also deallocate
1035  * private resources.
1036  */
1037 void
1038 if_detach(struct ifnet *ifp)
1039 {
1040 	struct ifaddr *ifa;
1041 	struct ifg_list *ifg;
1042 	struct domain *dp;
1043 	int i, s;
1044 
1045 	/* Undo pseudo-driver changes. */
1046 	if_deactivate(ifp);
1047 
1048 	ifq_clr_oactive(&ifp->if_snd);
1049 
1050 	/* Other CPUs must not have a reference before we start destroying. */
1051 	if_idxmap_remove(ifp);
1052 
1053 #if NBPFILTER > 0
1054 	bpfdetach(ifp);
1055 #endif
1056 
1057 	NET_LOCK();
1058 	s = splnet();
1059 	ifp->if_qstart = if_detached_qstart;
1060 	ifp->if_ioctl = if_detached_ioctl;
1061 	ifp->if_watchdog = NULL;
1062 
1063 	/* Remove the watchdog timeout & task */
1064 	timeout_del(&ifp->if_slowtimo);
1065 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1066 
1067 	/* Remove the link state task */
1068 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1069 
1070 	rti_delete(ifp);
1071 #if NETHER > 0 && defined(NFSCLIENT)
1072 	if (ifp->if_index == revarp_ifidx)
1073 		revarp_ifidx = 0;
1074 #endif
1075 #ifdef MROUTING
1076 	vif_delete(ifp);
1077 #endif
1078 	in_ifdetach(ifp);
1079 #ifdef INET6
1080 	in6_ifdetach(ifp);
1081 #endif
1082 #if NPF > 0
1083 	pfi_detach_ifnet(ifp);
1084 #endif
1085 
1086 	/* Remove the interface from the list of all interfaces.  */
1087 	TAILQ_REMOVE(&ifnet, ifp, if_list);
1088 
1089 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1090 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1091 
1092 	if_free_sadl(ifp);
1093 
1094 	/* We should not have any address left at this point. */
1095 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1096 #ifdef DIAGNOSTIC
1097 		printf("%s: address list non empty\n", ifp->if_xname);
1098 #endif
1099 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1100 			ifa_del(ifp, ifa);
1101 			ifa->ifa_ifp = NULL;
1102 			ifafree(ifa);
1103 		}
1104 	}
1105 
1106 	free(ifp->if_addrhooks, M_TEMP, 0);
1107 	free(ifp->if_linkstatehooks, M_TEMP, 0);
1108 	free(ifp->if_detachhooks, M_TEMP, 0);
1109 
1110 	for (i = 0; (dp = domains[i]) != NULL; i++) {
1111 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
1112 			(*dp->dom_ifdetach)(ifp,
1113 			    ifp->if_afdata[dp->dom_family]);
1114 	}
1115 
1116 	/* Announce that the interface is gone. */
1117 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1118 	splx(s);
1119 	NET_UNLOCK();
1120 
1121 	if (ifp->if_counters != NULL)
1122 		if_counters_free(ifp);
1123 
1124 	for (i = 0; i < ifp->if_nifqs; i++)
1125 		ifq_destroy(ifp->if_ifqs[i]);
1126 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1127 		for (i = 1; i < ifp->if_nifqs; i++) {
1128 			free(ifp->if_ifqs[i], M_DEVBUF,
1129 			    sizeof(struct ifqueue));
1130 		}
1131 		free(ifp->if_ifqs, M_DEVBUF,
1132 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1133 	}
1134 
1135 	for (i = 0; i < ifp->if_niqs; i++)
1136 		ifiq_destroy(ifp->if_iqs[i]);
1137 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1138 		for (i = 1; i < ifp->if_niqs; i++) {
1139 			free(ifp->if_iqs[i], M_DEVBUF,
1140 			    sizeof(struct ifiqueue));
1141 		}
1142 		free(ifp->if_iqs, M_DEVBUF,
1143 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1144 	}
1145 }
1146 
1147 /*
1148  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1149  */
1150 int
1151 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1152 {
1153 	struct ifnet *ifp;
1154 	int connected = 0;
1155 
1156 	ifp = if_get(ifidx);
1157 	if (ifp == NULL)
1158 		return (0);
1159 
1160 	if (ifp0->if_index == ifp->if_index)
1161 		connected = 1;
1162 
1163 #if NBRIDGE > 0
1164 	if (SAME_BRIDGE(ifp0->if_bridgeport, ifp->if_bridgeport))
1165 		connected = 1;
1166 #endif
1167 #if NCARP > 0
1168 	if ((ifp0->if_type == IFT_CARP && ifp0->if_carpdev == ifp) ||
1169 	    (ifp->if_type == IFT_CARP && ifp->if_carpdev == ifp0))
1170 		connected = 1;
1171 #endif
1172 
1173 	if_put(ifp);
1174 	return (connected);
1175 }
1176 
1177 /*
1178  * Create a clone network interface.
1179  */
1180 int
1181 if_clone_create(const char *name, int rdomain)
1182 {
1183 	struct if_clone *ifc;
1184 	struct ifnet *ifp;
1185 	int unit, ret;
1186 
1187 	NET_ASSERT_LOCKED();
1188 
1189 	ifc = if_clone_lookup(name, &unit);
1190 	if (ifc == NULL)
1191 		return (EINVAL);
1192 
1193 	if (ifunit(name) != NULL)
1194 		return (EEXIST);
1195 
1196 	/* XXXSMP breaks atomicity */
1197 	NET_UNLOCK();
1198 	ret = (*ifc->ifc_create)(ifc, unit);
1199 	NET_LOCK();
1200 
1201 	if (ret != 0 || (ifp = ifunit(name)) == NULL)
1202 		return (ret);
1203 
1204 	if_addgroup(ifp, ifc->ifc_name);
1205 	if (rdomain != 0)
1206 		if_setrdomain(ifp, rdomain);
1207 
1208 	return (ret);
1209 }
1210 
1211 /*
1212  * Destroy a clone network interface.
1213  */
1214 int
1215 if_clone_destroy(const char *name)
1216 {
1217 	struct if_clone *ifc;
1218 	struct ifnet *ifp;
1219 	int ret;
1220 
1221 	NET_ASSERT_LOCKED();
1222 
1223 	ifc = if_clone_lookup(name, NULL);
1224 	if (ifc == NULL)
1225 		return (EINVAL);
1226 
1227 	ifp = ifunit(name);
1228 	if (ifp == NULL)
1229 		return (ENXIO);
1230 
1231 	if (ifc->ifc_destroy == NULL)
1232 		return (EOPNOTSUPP);
1233 
1234 	if (ifp->if_flags & IFF_UP) {
1235 		int s;
1236 		s = splnet();
1237 		if_down(ifp);
1238 		splx(s);
1239 	}
1240 
1241 	/* XXXSMP breaks atomicity */
1242 	NET_UNLOCK();
1243 	ret = (*ifc->ifc_destroy)(ifp);
1244 	NET_LOCK();
1245 
1246 	return (ret);
1247 }
1248 
1249 /*
1250  * Look up a network interface cloner.
1251  */
1252 struct if_clone *
1253 if_clone_lookup(const char *name, int *unitp)
1254 {
1255 	struct if_clone *ifc;
1256 	const char *cp;
1257 	int unit;
1258 
1259 	/* separate interface name from unit */
1260 	for (cp = name;
1261 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1262 	    cp++)
1263 		continue;
1264 
1265 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1266 		return (NULL);	/* No name or unit number */
1267 
1268 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1269 		return (NULL);	/* unit number 0 padded */
1270 
1271 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1272 		if (strlen(ifc->ifc_name) == cp - name &&
1273 		    !strncmp(name, ifc->ifc_name, cp - name))
1274 			break;
1275 	}
1276 
1277 	if (ifc == NULL)
1278 		return (NULL);
1279 
1280 	unit = 0;
1281 	while (cp - name < IFNAMSIZ && *cp) {
1282 		if (*cp < '0' || *cp > '9' ||
1283 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1284 			/* Bogus unit number. */
1285 			return (NULL);
1286 		}
1287 		unit = (unit * 10) + (*cp++ - '0');
1288 	}
1289 
1290 	if (unitp != NULL)
1291 		*unitp = unit;
1292 	return (ifc);
1293 }
1294 
1295 /*
1296  * Register a network interface cloner.
1297  */
1298 void
1299 if_clone_attach(struct if_clone *ifc)
1300 {
1301 	/*
1302 	 * we are called at kernel boot by main(), when pseudo devices are
1303 	 * being attached. The main() is the only guy which may alter the
1304 	 * if_cloners. While system is running and main() is done with
1305 	 * initialization, the if_cloners becomes immutable.
1306 	 */
1307 	KASSERT(pdevinit_done == 0);
1308 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1309 	if_cloners_count++;
1310 }
1311 
1312 /*
1313  * Provide list of interface cloners to userspace.
1314  */
1315 int
1316 if_clone_list(struct if_clonereq *ifcr)
1317 {
1318 	char outbuf[IFNAMSIZ], *dst;
1319 	struct if_clone *ifc;
1320 	int count, error = 0;
1321 
1322 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1323 		/* Just asking how many there are. */
1324 		ifcr->ifcr_total = if_cloners_count;
1325 		return (0);
1326 	}
1327 
1328 	if (ifcr->ifcr_count < 0)
1329 		return (EINVAL);
1330 
1331 	ifcr->ifcr_total = if_cloners_count;
1332 	count = MIN(if_cloners_count, ifcr->ifcr_count);
1333 
1334 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1335 		if (count == 0)
1336 			break;
1337 		bzero(outbuf, sizeof outbuf);
1338 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1339 		error = copyout(outbuf, dst, IFNAMSIZ);
1340 		if (error)
1341 			break;
1342 		count--;
1343 		dst += IFNAMSIZ;
1344 	}
1345 
1346 	return (error);
1347 }
1348 
1349 /*
1350  * set queue congestion marker
1351  */
1352 void
1353 if_congestion(void)
1354 {
1355 	extern int ticks;
1356 
1357 	ifq_congestion = ticks;
1358 }
1359 
1360 int
1361 if_congested(void)
1362 {
1363 	extern int ticks;
1364 	int diff;
1365 
1366 	diff = ticks - ifq_congestion;
1367 	if (diff < 0) {
1368 		ifq_congestion = ticks - hz;
1369 		return (0);
1370 	}
1371 
1372 	return (diff <= (hz / 100));
1373 }
1374 
1375 #define	equal(a1, a2)	\
1376 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1377 	(a1)->sa_len) == 0)
1378 
1379 /*
1380  * Locate an interface based on a complete address.
1381  */
1382 struct ifaddr *
1383 ifa_ifwithaddr(struct sockaddr *addr, u_int rtableid)
1384 {
1385 	struct ifnet *ifp;
1386 	struct ifaddr *ifa;
1387 	u_int rdomain;
1388 
1389 	rdomain = rtable_l2(rtableid);
1390 	KERNEL_LOCK();
1391 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1392 		if (ifp->if_rdomain != rdomain)
1393 			continue;
1394 
1395 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1396 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1397 				continue;
1398 
1399 			if (equal(addr, ifa->ifa_addr)) {
1400 				KERNEL_UNLOCK();
1401 				return (ifa);
1402 			}
1403 		}
1404 	}
1405 	KERNEL_UNLOCK();
1406 	return (NULL);
1407 }
1408 
1409 /*
1410  * Locate the point to point interface with a given destination address.
1411  */
1412 struct ifaddr *
1413 ifa_ifwithdstaddr(struct sockaddr *addr, u_int rdomain)
1414 {
1415 	struct ifnet *ifp;
1416 	struct ifaddr *ifa;
1417 
1418 	rdomain = rtable_l2(rdomain);
1419 	KERNEL_LOCK();
1420 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1421 		if (ifp->if_rdomain != rdomain)
1422 			continue;
1423 		if (ifp->if_flags & IFF_POINTOPOINT) {
1424 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1425 				if (ifa->ifa_addr->sa_family !=
1426 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1427 					continue;
1428 				if (equal(addr, ifa->ifa_dstaddr)) {
1429 					KERNEL_UNLOCK();
1430 					return (ifa);
1431 				}
1432 			}
1433 		}
1434 	}
1435 	KERNEL_UNLOCK();
1436 	return (NULL);
1437 }
1438 
1439 /*
1440  * Find an interface address specific to an interface best matching
1441  * a given address.
1442  */
1443 struct ifaddr *
1444 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1445 {
1446 	struct ifaddr *ifa;
1447 	char *cp, *cp2, *cp3;
1448 	char *cplim;
1449 	struct ifaddr *ifa_maybe = NULL;
1450 	u_int af = addr->sa_family;
1451 
1452 	if (af >= AF_MAX)
1453 		return (NULL);
1454 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1455 		if (ifa->ifa_addr->sa_family != af)
1456 			continue;
1457 		if (ifa_maybe == NULL)
1458 			ifa_maybe = ifa;
1459 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1460 			if (equal(addr, ifa->ifa_addr) ||
1461 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1462 				return (ifa);
1463 			continue;
1464 		}
1465 		cp = addr->sa_data;
1466 		cp2 = ifa->ifa_addr->sa_data;
1467 		cp3 = ifa->ifa_netmask->sa_data;
1468 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1469 		for (; cp3 < cplim; cp3++)
1470 			if ((*cp++ ^ *cp2++) & *cp3)
1471 				break;
1472 		if (cp3 == cplim)
1473 			return (ifa);
1474 	}
1475 	return (ifa_maybe);
1476 }
1477 
1478 void
1479 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1480 {
1481 }
1482 
1483 /*
1484  * Default action when installing a local route on a point-to-point
1485  * interface.
1486  */
1487 void
1488 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1489 {
1490 	struct ifnet *lo0ifp;
1491 	struct ifaddr *ifa, *lo0ifa;
1492 
1493 	switch (req) {
1494 	case RTM_ADD:
1495 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1496 			break;
1497 
1498 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1499 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1500 			    rt_key(rt)->sa_len) == 0)
1501 				break;
1502 		}
1503 
1504 		if (ifa == NULL)
1505 			break;
1506 
1507 		KASSERT(ifa == rt->rt_ifa);
1508 
1509 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1510 		KASSERT(lo0ifp != NULL);
1511 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1512 			if (lo0ifa->ifa_addr->sa_family ==
1513 			    ifa->ifa_addr->sa_family)
1514 				break;
1515 		}
1516 		if_put(lo0ifp);
1517 
1518 		if (lo0ifa == NULL)
1519 			break;
1520 
1521 		rt->rt_flags &= ~RTF_LLINFO;
1522 		break;
1523 	case RTM_DELETE:
1524 	case RTM_RESOLVE:
1525 	default:
1526 		break;
1527 	}
1528 }
1529 
1530 
1531 /*
1532  * Bring down all interfaces
1533  */
1534 void
1535 if_downall(void)
1536 {
1537 	struct ifreq ifrq;	/* XXX only partly built */
1538 	struct ifnet *ifp;
1539 
1540 	NET_LOCK();
1541 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1542 		if ((ifp->if_flags & IFF_UP) == 0)
1543 			continue;
1544 		if_down(ifp);
1545 		ifrq.ifr_flags = ifp->if_flags;
1546 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1547 	}
1548 	NET_UNLOCK();
1549 }
1550 
1551 /*
1552  * Mark an interface down and notify protocols of
1553  * the transition.
1554  */
1555 void
1556 if_down(struct ifnet *ifp)
1557 {
1558 	NET_ASSERT_LOCKED();
1559 
1560 	ifp->if_flags &= ~IFF_UP;
1561 	getmicrotime(&ifp->if_lastchange);
1562 	IFQ_PURGE(&ifp->if_snd);
1563 
1564 	if_linkstate(ifp);
1565 }
1566 
1567 /*
1568  * Mark an interface up and notify protocols of
1569  * the transition.
1570  */
1571 void
1572 if_up(struct ifnet *ifp)
1573 {
1574 	NET_ASSERT_LOCKED();
1575 
1576 	ifp->if_flags |= IFF_UP;
1577 	getmicrotime(&ifp->if_lastchange);
1578 
1579 #ifdef INET6
1580 	/* Userland expects the kernel to set ::1 on default lo(4). */
1581 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1582 		in6_ifattach(ifp);
1583 #endif
1584 
1585 	if_linkstate(ifp);
1586 }
1587 
1588 /*
1589  * Notify userland, the routing table and hooks owner of
1590  * a link-state transition.
1591  */
1592 void
1593 if_linkstate_task(void *xifidx)
1594 {
1595 	unsigned int ifidx = (unsigned long)xifidx;
1596 	struct ifnet *ifp;
1597 
1598 	KERNEL_LOCK();
1599 	NET_LOCK();
1600 
1601 	ifp = if_get(ifidx);
1602 	if (ifp != NULL)
1603 		if_linkstate(ifp);
1604 	if_put(ifp);
1605 
1606 	NET_UNLOCK();
1607 	KERNEL_UNLOCK();
1608 }
1609 
1610 void
1611 if_linkstate(struct ifnet *ifp)
1612 {
1613 	NET_ASSERT_LOCKED();
1614 
1615 	rtm_ifchg(ifp);
1616 	rt_if_track(ifp);
1617 	dohooks(ifp->if_linkstatehooks, 0);
1618 }
1619 
1620 /*
1621  * Schedule a link state change task.
1622  */
1623 void
1624 if_link_state_change(struct ifnet *ifp)
1625 {
1626 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1627 }
1628 
1629 /*
1630  * Handle interface watchdog timer routine.  Called
1631  * from softclock, we decrement timer (if set) and
1632  * call the appropriate interface routine on expiration.
1633  */
1634 void
1635 if_slowtimo(void *arg)
1636 {
1637 	struct ifnet *ifp = arg;
1638 	int s = splnet();
1639 
1640 	if (ifp->if_watchdog) {
1641 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1642 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1643 		timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO);
1644 	}
1645 	splx(s);
1646 }
1647 
1648 void
1649 if_watchdog_task(void *xifidx)
1650 {
1651 	unsigned int ifidx = (unsigned long)xifidx;
1652 	struct ifnet *ifp;
1653 	int s;
1654 
1655 	ifp = if_get(ifidx);
1656 	if (ifp == NULL)
1657 		return;
1658 
1659 	KERNEL_LOCK();
1660 	s = splnet();
1661 	if (ifp->if_watchdog)
1662 		(*ifp->if_watchdog)(ifp);
1663 	splx(s);
1664 	KERNEL_UNLOCK();
1665 
1666 	if_put(ifp);
1667 }
1668 
1669 /*
1670  * Map interface name to interface structure pointer.
1671  */
1672 struct ifnet *
1673 ifunit(const char *name)
1674 {
1675 	struct ifnet *ifp;
1676 
1677 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1678 		if (strcmp(ifp->if_xname, name) == 0)
1679 			return (ifp);
1680 	}
1681 	return (NULL);
1682 }
1683 
1684 /*
1685  * Map interface index to interface structure pointer.
1686  */
1687 struct ifnet *
1688 if_get(unsigned int index)
1689 {
1690 	struct srp_ref sr;
1691 	struct if_map *if_map;
1692 	struct srp *map;
1693 	struct ifnet *ifp = NULL;
1694 
1695 	if_map = srp_enter(&sr, &if_idxmap.map);
1696 	if (index < if_map->limit) {
1697 		map = (struct srp *)(if_map + 1);
1698 
1699 		ifp = srp_follow(&sr, &map[index]);
1700 		if (ifp != NULL) {
1701 			KASSERT(ifp->if_index == index);
1702 			if_ref(ifp);
1703 		}
1704 	}
1705 	srp_leave(&sr);
1706 
1707 	return (ifp);
1708 }
1709 
1710 struct ifnet *
1711 if_ref(struct ifnet *ifp)
1712 {
1713 	refcnt_take(&ifp->if_refcnt);
1714 
1715 	return (ifp);
1716 }
1717 
1718 void
1719 if_put(struct ifnet *ifp)
1720 {
1721 	if (ifp == NULL)
1722 		return;
1723 
1724 	refcnt_rele_wake(&ifp->if_refcnt);
1725 }
1726 
1727 int
1728 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1729 {
1730 	if (ifp->if_sadl == NULL)
1731 		return (EINVAL);
1732 
1733 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1734 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1735 
1736 	return (0);
1737 }
1738 
1739 int
1740 if_createrdomain(int rdomain, struct ifnet *ifp)
1741 {
1742 	int error;
1743 	struct ifnet *loifp;
1744 	char loifname[IFNAMSIZ];
1745 	unsigned int unit = rdomain;
1746 
1747 	if (!rtable_exists(rdomain) && (error = rtable_add(rdomain)) != 0)
1748 		return (error);
1749 	if (!rtable_empty(rdomain))
1750 		return (EEXIST);
1751 
1752 	/* Create rdomain including its loopback if with unit == rdomain */
1753 	snprintf(loifname, sizeof(loifname), "lo%u", unit);
1754 	error = if_clone_create(loifname, 0);
1755 	if ((loifp = ifunit(loifname)) == NULL)
1756 		return (ENXIO);
1757 	if (error && (ifp != loifp || error != EEXIST))
1758 		return (error);
1759 
1760 	rtable_l2set(rdomain, rdomain, loifp->if_index);
1761 	loifp->if_rdomain = rdomain;
1762 
1763 	return (0);
1764 }
1765 
1766 int
1767 if_setrdomain(struct ifnet *ifp, int rdomain)
1768 {
1769 	struct ifreq ifr;
1770 	int error, up = 0, s;
1771 
1772 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1773 		return (EINVAL);
1774 
1775 	if (rdomain != ifp->if_rdomain &&
1776 	    (ifp->if_flags & IFF_LOOPBACK) &&
1777 	    (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1778 		return (EPERM);
1779 
1780 	if (!rtable_exists(rdomain))
1781 		return (ESRCH);
1782 
1783 	/* make sure that the routing table is a real rdomain */
1784 	if (rdomain != rtable_l2(rdomain))
1785 		return (EINVAL);
1786 
1787 	if (rdomain != ifp->if_rdomain) {
1788 		s = splnet();
1789 		/*
1790 		 * We are tearing down the world.
1791 		 * Take down the IF so:
1792 		 * 1. everything that cares gets a message
1793 		 * 2. the automagic IPv6 bits are recreated
1794 		 */
1795 		if (ifp->if_flags & IFF_UP) {
1796 			up = 1;
1797 			if_down(ifp);
1798 		}
1799 		rti_delete(ifp);
1800 #ifdef MROUTING
1801 		vif_delete(ifp);
1802 #endif
1803 		in_ifdetach(ifp);
1804 #ifdef INET6
1805 		in6_ifdetach(ifp);
1806 #endif
1807 		splx(s);
1808 	}
1809 
1810 	/* Let devices like enc(4) or mpe(4) know about the change */
1811 	ifr.ifr_rdomainid = rdomain;
1812 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
1813 	    (caddr_t)&ifr)) != ENOTTY)
1814 		return (error);
1815 	error = 0;
1816 
1817 	/* Add interface to the specified rdomain */
1818 	ifp->if_rdomain = rdomain;
1819 
1820 	/* If we took down the IF, bring it back */
1821 	if (up) {
1822 		s = splnet();
1823 		if_up(ifp);
1824 		splx(s);
1825 	}
1826 
1827 	return (0);
1828 }
1829 
1830 /*
1831  * Interface ioctls.
1832  */
1833 int
1834 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1835 {
1836 	struct ifnet *ifp;
1837 	struct ifreq *ifr = (struct ifreq *)data;
1838 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
1839 	struct if_afreq *ifar = (struct if_afreq *)data;
1840 	char ifdescrbuf[IFDESCRSIZE];
1841 	char ifrtlabelbuf[RTLABEL_LEN];
1842 	int s, error = 0, oif_xflags;
1843 	size_t bytesdone;
1844 	unsigned short oif_flags;
1845 
1846 	switch (cmd) {
1847 	case SIOCIFCREATE:
1848 		if ((error = suser(p)) != 0)
1849 			return (error);
1850 		NET_LOCK();
1851 		error = if_clone_create(ifr->ifr_name, 0);
1852 		NET_UNLOCK();
1853 		return (error);
1854 	case SIOCIFDESTROY:
1855 		if ((error = suser(p)) != 0)
1856 			return (error);
1857 		NET_LOCK();
1858 		error = if_clone_destroy(ifr->ifr_name);
1859 		NET_UNLOCK();
1860 		return (error);
1861 	case SIOCSIFGATTR:
1862 		if ((error = suser(p)) != 0)
1863 			return (error);
1864 		NET_LOCK();
1865 		error = if_setgroupattribs(data);
1866 		NET_UNLOCK();
1867 		return (error);
1868 	case SIOCGIFCONF:
1869 	case SIOCIFGCLONERS:
1870 	case SIOCGIFGMEMB:
1871 	case SIOCGIFGATTR:
1872 	case SIOCGIFGLIST:
1873 	case SIOCGIFFLAGS:
1874 	case SIOCGIFXFLAGS:
1875 	case SIOCGIFMETRIC:
1876 	case SIOCGIFMTU:
1877 	case SIOCGIFHARDMTU:
1878 	case SIOCGIFDATA:
1879 	case SIOCGIFDESCR:
1880 	case SIOCGIFRTLABEL:
1881 	case SIOCGIFPRIORITY:
1882 	case SIOCGIFRDOMAIN:
1883 	case SIOCGIFGROUP:
1884 	case SIOCGIFLLPRIO:
1885 		return (ifioctl_get(cmd, data));
1886 	}
1887 
1888 	ifp = ifunit(ifr->ifr_name);
1889 	if (ifp == NULL)
1890 		return (ENXIO);
1891 	oif_flags = ifp->if_flags;
1892 	oif_xflags = ifp->if_xflags;
1893 
1894 	switch (cmd) {
1895 	case SIOCIFAFATTACH:
1896 	case SIOCIFAFDETACH:
1897 		if ((error = suser(p)) != 0)
1898 			break;
1899 		NET_LOCK();
1900 		switch (ifar->ifar_af) {
1901 		case AF_INET:
1902 			/* attach is a noop for AF_INET */
1903 			if (cmd == SIOCIFAFDETACH)
1904 				in_ifdetach(ifp);
1905 			break;
1906 #ifdef INET6
1907 		case AF_INET6:
1908 			if (cmd == SIOCIFAFATTACH)
1909 				error = in6_ifattach(ifp);
1910 			else
1911 				in6_ifdetach(ifp);
1912 			break;
1913 #endif /* INET6 */
1914 		default:
1915 			error = EAFNOSUPPORT;
1916 		}
1917 		NET_UNLOCK();
1918 		break;
1919 
1920 	case SIOCSIFFLAGS:
1921 		if ((error = suser(p)) != 0)
1922 			break;
1923 
1924 		NET_LOCK();
1925 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1926 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
1927 
1928 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1929 		if (error != 0) {
1930 			ifp->if_flags = oif_flags;
1931 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
1932 			s = splnet();
1933 			if (ISSET(ifp->if_flags, IFF_UP))
1934 				if_up(ifp);
1935 			else
1936 				if_down(ifp);
1937 			splx(s);
1938 		}
1939 		NET_UNLOCK();
1940 		break;
1941 
1942 	case SIOCSIFXFLAGS:
1943 		if ((error = suser(p)) != 0)
1944 			break;
1945 
1946 		NET_LOCK();
1947 #ifdef INET6
1948 		if (ISSET(ifr->ifr_flags, IFXF_AUTOCONF6)) {
1949 			error = in6_ifattach(ifp);
1950 			if (error != 0) {
1951 				NET_UNLOCK();
1952 				break;
1953 			}
1954 		}
1955 
1956 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1957 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII)) {
1958 			ifp->if_xflags |= IFXF_INET6_NOSOII;
1959 			in6_soiiupdate(ifp);
1960 		}
1961 
1962 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
1963 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII)) {
1964 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
1965 			in6_soiiupdate(ifp);
1966 		}
1967 
1968 #endif	/* INET6 */
1969 
1970 #ifdef MPLS
1971 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
1972 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
1973 			s = splnet();
1974 			ifp->if_xflags |= IFXF_MPLS;
1975 			ifp->if_ll_output = ifp->if_output;
1976 			ifp->if_output = mpls_output;
1977 			splx(s);
1978 		}
1979 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
1980 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
1981 			s = splnet();
1982 			ifp->if_xflags &= ~IFXF_MPLS;
1983 			ifp->if_output = ifp->if_ll_output;
1984 			ifp->if_ll_output = NULL;
1985 			splx(s);
1986 		}
1987 #endif	/* MPLS */
1988 
1989 #ifndef SMALL_KERNEL
1990 		if (ifp->if_capabilities & IFCAP_WOL) {
1991 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
1992 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
1993 				s = splnet();
1994 				ifp->if_xflags |= IFXF_WOL;
1995 				error = ifp->if_wol(ifp, 1);
1996 				splx(s);
1997 			}
1998 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
1999 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
2000 				s = splnet();
2001 				ifp->if_xflags &= ~IFXF_WOL;
2002 				error = ifp->if_wol(ifp, 0);
2003 				splx(s);
2004 			}
2005 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
2006 			ifr->ifr_flags &= ~IFXF_WOL;
2007 			error = ENOTSUP;
2008 		}
2009 #endif
2010 
2011 		if (error == 0)
2012 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
2013 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
2014 		NET_UNLOCK();
2015 		break;
2016 
2017 	case SIOCSIFMETRIC:
2018 		if ((error = suser(p)) != 0)
2019 			break;
2020 		NET_LOCK();
2021 		ifp->if_metric = ifr->ifr_metric;
2022 		NET_UNLOCK();
2023 		break;
2024 
2025 	case SIOCSIFMTU:
2026 		if ((error = suser(p)) != 0)
2027 			break;
2028 		NET_LOCK();
2029 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2030 		NET_UNLOCK();
2031 		if (!error)
2032 			rtm_ifchg(ifp);
2033 		break;
2034 
2035 	case SIOCSIFDESCR:
2036 		if ((error = suser(p)) != 0)
2037 			break;
2038 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2039 		    IFDESCRSIZE, &bytesdone);
2040 		if (error == 0) {
2041 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2042 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2043 		}
2044 		break;
2045 
2046 	case SIOCSIFRTLABEL:
2047 		if ((error = suser(p)) != 0)
2048 			break;
2049 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2050 		    RTLABEL_LEN, &bytesdone);
2051 		if (error == 0) {
2052 			rtlabel_unref(ifp->if_rtlabelid);
2053 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2054 		}
2055 		break;
2056 
2057 	case SIOCSIFPRIORITY:
2058 		if ((error = suser(p)) != 0)
2059 			break;
2060 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2061 			error = EINVAL;
2062 			break;
2063 		}
2064 		ifp->if_priority = ifr->ifr_metric;
2065 		break;
2066 
2067 	case SIOCSIFRDOMAIN:
2068 		if ((error = suser(p)) != 0)
2069 			break;
2070 		NET_LOCK();
2071 		error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2072 		if (!error || error == EEXIST)
2073 			error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2074 		NET_UNLOCK();
2075 		break;
2076 
2077 	case SIOCAIFGROUP:
2078 		if ((error = suser(p)))
2079 			break;
2080 		NET_LOCK();
2081 		error = if_addgroup(ifp, ifgr->ifgr_group);
2082 		if (error == 0) {
2083 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2084 			if (error == ENOTTY)
2085 				error = 0;
2086 		}
2087 		NET_UNLOCK();
2088 		break;
2089 
2090 	case SIOCDIFGROUP:
2091 		if ((error = suser(p)))
2092 			break;
2093 		NET_LOCK();
2094 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2095 		if (error == ENOTTY)
2096 			error = 0;
2097 		if (error == 0)
2098 			error = if_delgroup(ifp, ifgr->ifgr_group);
2099 		NET_UNLOCK();
2100 		break;
2101 
2102 	case SIOCSIFLLADDR:
2103 		if ((error = suser(p)))
2104 			break;
2105 		if ((ifp->if_sadl == NULL) ||
2106 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2107 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2108 			error = EINVAL;
2109 			break;
2110 		}
2111 		NET_LOCK();
2112 		switch (ifp->if_type) {
2113 		case IFT_ETHER:
2114 		case IFT_CARP:
2115 		case IFT_XETHER:
2116 		case IFT_ISO88025:
2117 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2118 			if (error == ENOTTY)
2119 				error = 0;
2120 			if (error == 0)
2121 				error = if_setlladdr(ifp,
2122 				    ifr->ifr_addr.sa_data);
2123 			break;
2124 		default:
2125 			error = ENODEV;
2126 		}
2127 
2128 		if (error == 0)
2129 			ifnewlladdr(ifp);
2130 		NET_UNLOCK();
2131 		break;
2132 
2133 	case SIOCSIFLLPRIO:
2134 		if ((error = suser(p)))
2135 			break;
2136 		if (ifr->ifr_llprio < IFQ_MINPRIO ||
2137 		    ifr->ifr_llprio > IFQ_MAXPRIO) {
2138 			error = EINVAL;
2139 			break;
2140 		}
2141 		NET_LOCK();
2142 		ifp->if_llprio = ifr->ifr_llprio;
2143 		NET_UNLOCK();
2144 		break;
2145 
2146 	case SIOCSETKALIVE:
2147 	case SIOCDIFPHYADDR:
2148 	case SIOCSLIFPHYADDR:
2149 	case SIOCSLIFPHYRTABLE:
2150 	case SIOCSLIFPHYTTL:
2151 	case SIOCSLIFPHYDF:
2152 	case SIOCSLIFPHYECN:
2153 	case SIOCADDMULTI:
2154 	case SIOCDELMULTI:
2155 	case SIOCSIFMEDIA:
2156 	case SIOCSVNETID:
2157 	case SIOCSVNETFLOWID:
2158 	case SIOCSTXHPRIO:
2159 	case SIOCSIFPAIR:
2160 	case SIOCSIFPARENT:
2161 	case SIOCDIFPARENT:
2162 		if ((error = suser(p)) != 0)
2163 			break;
2164 		/* FALLTHROUGH */
2165 	default:
2166 		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
2167 			(struct mbuf *) cmd, (struct mbuf *) data,
2168 			(struct mbuf *) ifp, p));
2169 		if (error == EOPNOTSUPP) {
2170 			NET_LOCK();
2171 			error = ((*ifp->if_ioctl)(ifp, cmd, data));
2172 			NET_UNLOCK();
2173 		}
2174 		break;
2175 	}
2176 
2177 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags)
2178 		rtm_ifchg(ifp);
2179 
2180 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2181 		getmicrotime(&ifp->if_lastchange);
2182 
2183 	return (error);
2184 }
2185 
2186 int
2187 ifioctl_get(u_long cmd, caddr_t data)
2188 {
2189 	struct ifnet *ifp;
2190 	struct ifreq *ifr = (struct ifreq *)data;
2191 	char ifdescrbuf[IFDESCRSIZE];
2192 	char ifrtlabelbuf[RTLABEL_LEN];
2193 	int error = 0;
2194 	size_t bytesdone;
2195 	const char *label;
2196 
2197 	switch(cmd) {
2198 	case SIOCGIFCONF:
2199 		NET_RLOCK();
2200 		error = ifconf(data);
2201 		NET_RUNLOCK();
2202 		return (error);
2203 	case SIOCIFGCLONERS:
2204 		error = if_clone_list((struct if_clonereq *)data);
2205 		return (error);
2206 	case SIOCGIFGMEMB:
2207 		NET_RLOCK();
2208 		error = if_getgroupmembers(data);
2209 		NET_RUNLOCK();
2210 		return (error);
2211 	case SIOCGIFGATTR:
2212 		NET_RLOCK();
2213 		error = if_getgroupattribs(data);
2214 		NET_RUNLOCK();
2215 		return (error);
2216 	case SIOCGIFGLIST:
2217 		NET_RLOCK();
2218 		error = if_getgrouplist(data);
2219 		NET_RUNLOCK();
2220 		return (error);
2221 	}
2222 
2223 	ifp = ifunit(ifr->ifr_name);
2224 	if (ifp == NULL)
2225 		return (ENXIO);
2226 
2227 	NET_RLOCK();
2228 
2229 	switch(cmd) {
2230 	case SIOCGIFFLAGS:
2231 		ifr->ifr_flags = ifp->if_flags;
2232 		if (ifq_is_oactive(&ifp->if_snd))
2233 			ifr->ifr_flags |= IFF_OACTIVE;
2234 		break;
2235 
2236 	case SIOCGIFXFLAGS:
2237 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2238 		break;
2239 
2240 	case SIOCGIFMETRIC:
2241 		ifr->ifr_metric = ifp->if_metric;
2242 		break;
2243 
2244 	case SIOCGIFMTU:
2245 		ifr->ifr_mtu = ifp->if_mtu;
2246 		break;
2247 
2248 	case SIOCGIFHARDMTU:
2249 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2250 		break;
2251 
2252 	case SIOCGIFDATA: {
2253 		struct if_data ifdata;
2254 		if_getdata(ifp, &ifdata);
2255 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2256 		break;
2257 	}
2258 
2259 	case SIOCGIFDESCR:
2260 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2261 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2262 		    &bytesdone);
2263 		break;
2264 
2265 	case SIOCGIFRTLABEL:
2266 		if (ifp->if_rtlabelid &&
2267 		    (label = rtlabel_id2name(ifp->if_rtlabelid)) != NULL) {
2268 			strlcpy(ifrtlabelbuf, label, RTLABEL_LEN);
2269 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2270 			    RTLABEL_LEN, &bytesdone);
2271 		} else
2272 			error = ENOENT;
2273 		break;
2274 
2275 	case SIOCGIFPRIORITY:
2276 		ifr->ifr_metric = ifp->if_priority;
2277 		break;
2278 
2279 	case SIOCGIFRDOMAIN:
2280 		ifr->ifr_rdomainid = ifp->if_rdomain;
2281 		break;
2282 
2283 	case SIOCGIFGROUP:
2284 		error = if_getgroup(data, ifp);
2285 		break;
2286 
2287 	case SIOCGIFLLPRIO:
2288 		ifr->ifr_llprio = ifp->if_llprio;
2289 		break;
2290 
2291 	default:
2292 		panic("invalid ioctl %lu", cmd);
2293 	}
2294 
2295 	NET_RUNLOCK();
2296 
2297 	return (error);
2298 }
2299 
2300 /*
2301  * Return interface configuration
2302  * of system.  List may be used
2303  * in later ioctl's (above) to get
2304  * other information.
2305  */
2306 int
2307 ifconf(caddr_t data)
2308 {
2309 	struct ifconf *ifc = (struct ifconf *)data;
2310 	struct ifnet *ifp;
2311 	struct ifaddr *ifa;
2312 	struct ifreq ifr, *ifrp;
2313 	int space = ifc->ifc_len, error = 0;
2314 
2315 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2316 	if (space == 0) {
2317 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
2318 			struct sockaddr *sa;
2319 
2320 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2321 				space += sizeof (ifr);
2322 			else
2323 				TAILQ_FOREACH(ifa,
2324 				    &ifp->if_addrlist, ifa_list) {
2325 					sa = ifa->ifa_addr;
2326 					if (sa->sa_len > sizeof(*sa))
2327 						space += sa->sa_len -
2328 						    sizeof(*sa);
2329 					space += sizeof(ifr);
2330 				}
2331 		}
2332 		ifc->ifc_len = space;
2333 		return (0);
2334 	}
2335 
2336 	ifrp = ifc->ifc_req;
2337 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2338 		if (space < sizeof(ifr))
2339 			break;
2340 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2341 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2342 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2343 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2344 			    sizeof(ifr));
2345 			if (error)
2346 				break;
2347 			space -= sizeof (ifr), ifrp++;
2348 		} else
2349 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2350 				struct sockaddr *sa = ifa->ifa_addr;
2351 
2352 				if (space < sizeof(ifr))
2353 					break;
2354 				if (sa->sa_len <= sizeof(*sa)) {
2355 					ifr.ifr_addr = *sa;
2356 					error = copyout((caddr_t)&ifr,
2357 					    (caddr_t)ifrp, sizeof (ifr));
2358 					ifrp++;
2359 				} else {
2360 					space -= sa->sa_len - sizeof(*sa);
2361 					if (space < sizeof (ifr))
2362 						break;
2363 					error = copyout((caddr_t)&ifr,
2364 					    (caddr_t)ifrp,
2365 					    sizeof(ifr.ifr_name));
2366 					if (error == 0)
2367 						error = copyout((caddr_t)sa,
2368 						    (caddr_t)&ifrp->ifr_addr,
2369 						    sa->sa_len);
2370 					ifrp = (struct ifreq *)(sa->sa_len +
2371 					    (caddr_t)&ifrp->ifr_addr);
2372 				}
2373 				if (error)
2374 					break;
2375 				space -= sizeof (ifr);
2376 			}
2377 	}
2378 	ifc->ifc_len -= space;
2379 	return (error);
2380 }
2381 
2382 void
2383 if_counters_alloc(struct ifnet *ifp)
2384 {
2385 	KASSERT(ifp->if_counters == NULL);
2386 
2387 	ifp->if_counters = counters_alloc(ifc_ncounters);
2388 }
2389 
2390 void
2391 if_counters_free(struct ifnet *ifp)
2392 {
2393 	KASSERT(ifp->if_counters != NULL);
2394 
2395 	counters_free(ifp->if_counters, ifc_ncounters);
2396 	ifp->if_counters = NULL;
2397 }
2398 
2399 void
2400 if_getdata(struct ifnet *ifp, struct if_data *data)
2401 {
2402 	unsigned int i;
2403 
2404 	*data = ifp->if_data;
2405 
2406 	if (ifp->if_counters != NULL) {
2407 		uint64_t counters[ifc_ncounters];
2408 
2409 		counters_read(ifp->if_counters, counters, nitems(counters));
2410 
2411 		data->ifi_ipackets += counters[ifc_ipackets];
2412 		data->ifi_ierrors += counters[ifc_ierrors];
2413 		data->ifi_opackets += counters[ifc_opackets];
2414 		data->ifi_oerrors += counters[ifc_oerrors];
2415 		data->ifi_collisions += counters[ifc_collisions];
2416 		data->ifi_ibytes += counters[ifc_ibytes];
2417 		data->ifi_obytes += counters[ifc_obytes];
2418 		data->ifi_imcasts += counters[ifc_imcasts];
2419 		data->ifi_omcasts += counters[ifc_omcasts];
2420 		data->ifi_iqdrops += counters[ifc_iqdrops];
2421 		data->ifi_oqdrops += counters[ifc_oqdrops];
2422 		data->ifi_noproto += counters[ifc_noproto];
2423 	}
2424 
2425 	for (i = 0; i < ifp->if_nifqs; i++) {
2426 		struct ifqueue *ifq = ifp->if_ifqs[i];
2427 
2428 		ifq_add_data(ifq, data);
2429 	}
2430 
2431 	for (i = 0; i < ifp->if_niqs; i++) {
2432 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2433 
2434 		ifiq_add_data(ifiq, data);
2435 	}
2436 }
2437 
2438 /*
2439  * Dummy functions replaced in ifnet during detach (if protocols decide to
2440  * fiddle with the if during detach.
2441  */
2442 void
2443 if_detached_qstart(struct ifqueue *ifq)
2444 {
2445 	ifq_purge(ifq);
2446 }
2447 
2448 int
2449 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2450 {
2451 	return ENODEV;
2452 }
2453 
2454 /*
2455  * Create interface group without members
2456  */
2457 struct ifg_group *
2458 if_creategroup(const char *groupname)
2459 {
2460 	struct ifg_group	*ifg;
2461 
2462 	if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
2463 		return (NULL);
2464 
2465 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2466 	ifg->ifg_refcnt = 0;
2467 	ifg->ifg_carp_demoted = 0;
2468 	TAILQ_INIT(&ifg->ifg_members);
2469 #if NPF > 0
2470 	pfi_attach_ifgroup(ifg);
2471 #endif
2472 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2473 
2474 	return (ifg);
2475 }
2476 
2477 /*
2478  * Add a group to an interface
2479  */
2480 int
2481 if_addgroup(struct ifnet *ifp, const char *groupname)
2482 {
2483 	struct ifg_list		*ifgl;
2484 	struct ifg_group	*ifg = NULL;
2485 	struct ifg_member	*ifgm;
2486 
2487 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
2488 	    groupname[strlen(groupname) - 1] <= '9')
2489 		return (EINVAL);
2490 
2491 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2492 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2493 			return (EEXIST);
2494 
2495 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
2496 		return (ENOMEM);
2497 
2498 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
2499 		free(ifgl, M_TEMP, sizeof(*ifgl));
2500 		return (ENOMEM);
2501 	}
2502 
2503 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2504 		if (!strcmp(ifg->ifg_group, groupname))
2505 			break;
2506 
2507 	if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
2508 		free(ifgl, M_TEMP, sizeof(*ifgl));
2509 		free(ifgm, M_TEMP, sizeof(*ifgm));
2510 		return (ENOMEM);
2511 	}
2512 
2513 	ifg->ifg_refcnt++;
2514 	ifgl->ifgl_group = ifg;
2515 	ifgm->ifgm_ifp = ifp;
2516 
2517 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2518 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2519 
2520 #if NPF > 0
2521 	pfi_group_addmember(groupname, ifp);
2522 #endif
2523 
2524 	return (0);
2525 }
2526 
2527 /*
2528  * Remove a group from an interface
2529  */
2530 int
2531 if_delgroup(struct ifnet *ifp, const char *groupname)
2532 {
2533 	struct ifg_list		*ifgl;
2534 	struct ifg_member	*ifgm;
2535 
2536 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2537 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2538 			break;
2539 	if (ifgl == NULL)
2540 		return (ENOENT);
2541 
2542 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2543 
2544 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2545 		if (ifgm->ifgm_ifp == ifp)
2546 			break;
2547 
2548 	if (ifgm != NULL) {
2549 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2550 		free(ifgm, M_TEMP, sizeof(*ifgm));
2551 	}
2552 
2553 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2554 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2555 #if NPF > 0
2556 		pfi_detach_ifgroup(ifgl->ifgl_group);
2557 #endif
2558 		free(ifgl->ifgl_group, M_TEMP, 0);
2559 	}
2560 
2561 	free(ifgl, M_TEMP, sizeof(*ifgl));
2562 
2563 #if NPF > 0
2564 	pfi_group_change(groupname);
2565 #endif
2566 
2567 	return (0);
2568 }
2569 
2570 /*
2571  * Stores all groups from an interface in memory pointed
2572  * to by data
2573  */
2574 int
2575 if_getgroup(caddr_t data, struct ifnet *ifp)
2576 {
2577 	int			 len, error;
2578 	struct ifg_list		*ifgl;
2579 	struct ifg_req		 ifgrq, *ifgp;
2580 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2581 
2582 	if (ifgr->ifgr_len == 0) {
2583 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2584 			ifgr->ifgr_len += sizeof(struct ifg_req);
2585 		return (0);
2586 	}
2587 
2588 	len = ifgr->ifgr_len;
2589 	ifgp = ifgr->ifgr_groups;
2590 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2591 		if (len < sizeof(ifgrq))
2592 			return (EINVAL);
2593 		bzero(&ifgrq, sizeof ifgrq);
2594 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2595 		    sizeof(ifgrq.ifgrq_group));
2596 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2597 		    sizeof(struct ifg_req))))
2598 			return (error);
2599 		len -= sizeof(ifgrq);
2600 		ifgp++;
2601 	}
2602 
2603 	return (0);
2604 }
2605 
2606 /*
2607  * Stores all members of a group in memory pointed to by data
2608  */
2609 int
2610 if_getgroupmembers(caddr_t data)
2611 {
2612 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2613 	struct ifg_group	*ifg;
2614 	struct ifg_member	*ifgm;
2615 	struct ifg_req		 ifgrq, *ifgp;
2616 	int			 len, error;
2617 
2618 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2619 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2620 			break;
2621 	if (ifg == NULL)
2622 		return (ENOENT);
2623 
2624 	if (ifgr->ifgr_len == 0) {
2625 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2626 			ifgr->ifgr_len += sizeof(ifgrq);
2627 		return (0);
2628 	}
2629 
2630 	len = ifgr->ifgr_len;
2631 	ifgp = ifgr->ifgr_groups;
2632 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
2633 		if (len < sizeof(ifgrq))
2634 			return (EINVAL);
2635 		bzero(&ifgrq, sizeof ifgrq);
2636 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
2637 		    sizeof(ifgrq.ifgrq_member));
2638 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2639 		    sizeof(struct ifg_req))))
2640 			return (error);
2641 		len -= sizeof(ifgrq);
2642 		ifgp++;
2643 	}
2644 
2645 	return (0);
2646 }
2647 
2648 int
2649 if_getgroupattribs(caddr_t data)
2650 {
2651 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2652 	struct ifg_group	*ifg;
2653 
2654 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2655 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2656 			break;
2657 	if (ifg == NULL)
2658 		return (ENOENT);
2659 
2660 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
2661 
2662 	return (0);
2663 }
2664 
2665 int
2666 if_setgroupattribs(caddr_t data)
2667 {
2668 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2669 	struct ifg_group	*ifg;
2670 	struct ifg_member	*ifgm;
2671 	int			 demote;
2672 
2673 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2674 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2675 			break;
2676 	if (ifg == NULL)
2677 		return (ENOENT);
2678 
2679 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
2680 	if (demote + ifg->ifg_carp_demoted > 0xff ||
2681 	    demote + ifg->ifg_carp_demoted < 0)
2682 		return (EINVAL);
2683 
2684 	ifg->ifg_carp_demoted += demote;
2685 
2686 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2687 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
2688 
2689 	return (0);
2690 }
2691 
2692 /*
2693  * Stores all groups in memory pointed to by data
2694  */
2695 int
2696 if_getgrouplist(caddr_t data)
2697 {
2698 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2699 	struct ifg_group	*ifg;
2700 	struct ifg_req		 ifgrq, *ifgp;
2701 	int			 len, error;
2702 
2703 	if (ifgr->ifgr_len == 0) {
2704 		TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2705 			ifgr->ifgr_len += sizeof(ifgrq);
2706 		return (0);
2707 	}
2708 
2709 	len = ifgr->ifgr_len;
2710 	ifgp = ifgr->ifgr_groups;
2711 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
2712 		if (len < sizeof(ifgrq))
2713 			return (EINVAL);
2714 		bzero(&ifgrq, sizeof ifgrq);
2715 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
2716                     sizeof(ifgrq.ifgrq_group));
2717 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2718                     sizeof(struct ifg_req))))
2719 			return (error);
2720 		len -= sizeof(ifgrq);
2721 		ifgp++;
2722 	}
2723 
2724 	return (0);
2725 }
2726 
2727 void
2728 if_group_routechange(struct sockaddr *dst, struct sockaddr *mask)
2729 {
2730 	switch (dst->sa_family) {
2731 	case AF_INET:
2732 		if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
2733 		    mask && (mask->sa_len == 0 ||
2734 		    satosin(mask)->sin_addr.s_addr == INADDR_ANY))
2735 			if_group_egress_build();
2736 		break;
2737 #ifdef INET6
2738 	case AF_INET6:
2739 		if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
2740 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
2741 		    IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
2742 		    &in6addr_any)))
2743 			if_group_egress_build();
2744 		break;
2745 #endif
2746 	}
2747 }
2748 
2749 int
2750 if_group_egress_build(void)
2751 {
2752 	struct ifnet		*ifp;
2753 	struct ifg_group	*ifg;
2754 	struct ifg_member	*ifgm, *next;
2755 	struct sockaddr_in	 sa_in;
2756 #ifdef INET6
2757 	struct sockaddr_in6	 sa_in6;
2758 #endif
2759 	struct rtentry		*rt;
2760 
2761 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2762 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
2763 			break;
2764 
2765 	if (ifg != NULL)
2766 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
2767 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
2768 
2769 	bzero(&sa_in, sizeof(sa_in));
2770 	sa_in.sin_len = sizeof(sa_in);
2771 	sa_in.sin_family = AF_INET;
2772 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
2773 	while (rt != NULL) {
2774 		ifp = if_get(rt->rt_ifidx);
2775 		if (ifp != NULL) {
2776 			if_addgroup(ifp, IFG_EGRESS);
2777 			if_put(ifp);
2778 		}
2779 		rt = rtable_iterate(rt);
2780 	}
2781 
2782 #ifdef INET6
2783 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
2784 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
2785 	    RTP_ANY);
2786 	while (rt != NULL) {
2787 		ifp = if_get(rt->rt_ifidx);
2788 		if (ifp != NULL) {
2789 			if_addgroup(ifp, IFG_EGRESS);
2790 			if_put(ifp);
2791 		}
2792 		rt = rtable_iterate(rt);
2793 	}
2794 #endif /* INET6 */
2795 
2796 	return (0);
2797 }
2798 
2799 /*
2800  * Set/clear promiscuous mode on interface ifp based on the truth value
2801  * of pswitch.  The calls are reference counted so that only the first
2802  * "on" request actually has an effect, as does the final "off" request.
2803  * Results are undefined if the "off" and "on" requests are not matched.
2804  */
2805 int
2806 ifpromisc(struct ifnet *ifp, int pswitch)
2807 {
2808 	struct ifreq ifr;
2809 	unsigned short oif_flags;
2810 	int oif_pcount, error;
2811 
2812 	oif_flags = ifp->if_flags;
2813 	oif_pcount = ifp->if_pcount;
2814 	if (pswitch) {
2815 		if (ifp->if_pcount++ != 0)
2816 			return (0);
2817 		ifp->if_flags |= IFF_PROMISC;
2818 	} else {
2819 		if (--ifp->if_pcount > 0)
2820 			return (0);
2821 		ifp->if_flags &= ~IFF_PROMISC;
2822 	}
2823 
2824 	if ((ifp->if_flags & IFF_UP) == 0)
2825 		return (0);
2826 
2827 	memset(&ifr, 0, sizeof(ifr));
2828 	ifr.ifr_flags = ifp->if_flags;
2829 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
2830 	if (error) {
2831 		ifp->if_flags = oif_flags;
2832 		ifp->if_pcount = oif_pcount;
2833 	}
2834 
2835 	return (error);
2836 }
2837 
2838 void
2839 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
2840 {
2841 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
2842 }
2843 
2844 void
2845 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
2846 {
2847 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
2848 }
2849 
2850 void
2851 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
2852 {
2853 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
2854 		panic("ifa_update_broadaddr does not support dynamic length");
2855 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
2856 }
2857 
2858 #ifdef DDB
2859 /* debug function, can be called from ddb> */
2860 void
2861 ifa_print_all(void)
2862 {
2863 	struct ifnet *ifp;
2864 	struct ifaddr *ifa;
2865 
2866 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2867 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2868 			char addr[INET6_ADDRSTRLEN];
2869 
2870 			switch (ifa->ifa_addr->sa_family) {
2871 			case AF_INET:
2872 				printf("%s", inet_ntop(AF_INET,
2873 				    &satosin(ifa->ifa_addr)->sin_addr,
2874 				    addr, sizeof(addr)));
2875 				break;
2876 #ifdef INET6
2877 			case AF_INET6:
2878 				printf("%s", inet_ntop(AF_INET6,
2879 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
2880 				    addr, sizeof(addr)));
2881 				break;
2882 #endif
2883 			}
2884 			printf(" on %s\n", ifp->if_xname);
2885 		}
2886 	}
2887 }
2888 #endif /* DDB */
2889 
2890 void
2891 ifnewlladdr(struct ifnet *ifp)
2892 {
2893 #ifdef INET6
2894 	struct ifaddr *ifa;
2895 #endif
2896 	struct ifreq ifrq;
2897 	short up;
2898 	int s;
2899 
2900 	s = splnet();
2901 	up = ifp->if_flags & IFF_UP;
2902 
2903 	if (up) {
2904 		/* go down for a moment... */
2905 		ifp->if_flags &= ~IFF_UP;
2906 		ifrq.ifr_flags = ifp->if_flags;
2907 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
2908 	}
2909 
2910 	ifp->if_flags |= IFF_UP;
2911 	ifrq.ifr_flags = ifp->if_flags;
2912 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
2913 
2914 #ifdef INET6
2915 	/*
2916 	 * Update the link-local address.  Don't do it if we're
2917 	 * a router to avoid confusing hosts on the network.
2918 	 */
2919 	if (!ip6_forwarding) {
2920 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
2921 		if (ifa) {
2922 			in6_purgeaddr(ifa);
2923 			dohooks(ifp->if_addrhooks, 0);
2924 			in6_ifattach(ifp);
2925 		}
2926 	}
2927 #endif
2928 	if (!up) {
2929 		/* go back down */
2930 		ifp->if_flags &= ~IFF_UP;
2931 		ifrq.ifr_flags = ifp->if_flags;
2932 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
2933 	}
2934 	splx(s);
2935 }
2936 
2937 int net_ticks;
2938 u_int net_livelocks;
2939 
2940 void
2941 net_tick(void *null)
2942 {
2943 	extern int ticks;
2944 
2945 	if (ticks - net_ticks > 1)
2946 		net_livelocks++;
2947 
2948 	net_ticks = ticks;
2949 
2950 	timeout_add(&net_tick_to, 1);
2951 }
2952 
2953 int
2954 net_livelocked(void)
2955 {
2956 	extern int ticks;
2957 
2958 	return (ticks - net_ticks > 1);
2959 }
2960 
2961 void
2962 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
2963 {
2964 	extern int ticks;
2965 
2966 	memset(rxr, 0, sizeof(*rxr));
2967 
2968 	rxr->rxr_adjusted = ticks;
2969 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
2970 	rxr->rxr_hwm = hwm;
2971 }
2972 
2973 static inline void
2974 if_rxr_adjust_cwm(struct if_rxring *rxr)
2975 {
2976 	extern int ticks;
2977 
2978 	if (net_livelocked()) {
2979 		if (rxr->rxr_cwm > rxr->rxr_lwm)
2980 			rxr->rxr_cwm--;
2981 		else
2982 			return;
2983 	} else if (rxr->rxr_alive >= rxr->rxr_lwm)
2984 		return;
2985 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
2986 		rxr->rxr_cwm++;
2987 
2988 	rxr->rxr_adjusted = ticks;
2989 }
2990 
2991 void
2992 if_rxr_livelocked(struct if_rxring *rxr)
2993 {
2994 	extern int ticks;
2995 
2996 	if (ticks - rxr->rxr_adjusted >= 1) {
2997 		if (rxr->rxr_cwm > rxr->rxr_lwm)
2998 			rxr->rxr_cwm--;
2999 
3000 		rxr->rxr_adjusted = ticks;
3001 	}
3002 }
3003 
3004 u_int
3005 if_rxr_get(struct if_rxring *rxr, u_int max)
3006 {
3007 	extern int ticks;
3008 	u_int diff;
3009 
3010 	if (ticks - rxr->rxr_adjusted >= 1) {
3011 		/* we're free to try for an adjustment */
3012 		if_rxr_adjust_cwm(rxr);
3013 	}
3014 
3015 	if (rxr->rxr_alive >= rxr->rxr_cwm)
3016 		return (0);
3017 
3018 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
3019 	rxr->rxr_alive += diff;
3020 
3021 	return (diff);
3022 }
3023 
3024 int
3025 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
3026 {
3027 	struct if_rxrinfo kifri;
3028 	int error;
3029 	u_int n;
3030 
3031 	error = copyin(uifri, &kifri, sizeof(kifri));
3032 	if (error)
3033 		return (error);
3034 
3035 	n = min(t, kifri.ifri_total);
3036 	kifri.ifri_total = t;
3037 
3038 	if (n > 0) {
3039 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
3040 		if (error)
3041 			return (error);
3042 	}
3043 
3044 	return (copyout(&kifri, uifri, sizeof(kifri)));
3045 }
3046 
3047 int
3048 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
3049     struct if_rxring *rxr)
3050 {
3051 	struct if_rxring_info ifr;
3052 
3053 	memset(&ifr, 0, sizeof(ifr));
3054 
3055 	if (name != NULL)
3056 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3057 
3058 	ifr.ifr_size = size;
3059 	ifr.ifr_info = *rxr;
3060 
3061 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
3062 }
3063 
3064 /*
3065  * Network stack input queues.
3066  */
3067 
3068 void
3069 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3070 {
3071 	mq_init(&niq->ni_q, maxlen, IPL_NET);
3072 	niq->ni_isr = isr;
3073 }
3074 
3075 int
3076 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3077 {
3078 	int rv;
3079 
3080 	rv = mq_enqueue(&niq->ni_q, m);
3081 	if (rv == 0)
3082 		schednetisr(niq->ni_isr);
3083 	else
3084 		if_congestion();
3085 
3086 	return (rv);
3087 }
3088 
3089 int
3090 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3091 {
3092 	int rv;
3093 
3094 	rv = mq_enlist(&niq->ni_q, ml);
3095 	if (rv == 0)
3096 		schednetisr(niq->ni_isr);
3097 	else
3098 		if_congestion();
3099 
3100 	return (rv);
3101 }
3102 
3103 __dead void
3104 unhandled_af(int af)
3105 {
3106 	panic("unhandled af %d", af);
3107 }
3108 
3109 /*
3110  * XXXSMP This tunable is here to work around the fact that IPsec
3111  * globals aren't ready to be accessed by multiple threads in
3112  * parallel.
3113  */
3114 int		 nettaskqs = NET_TASKQ;
3115 
3116 struct taskq *
3117 net_tq(unsigned int ifindex)
3118 {
3119 	struct taskq *t = NULL;
3120 
3121 	t = nettqmp[ifindex % nettaskqs];
3122 
3123 	return (t);
3124 }
3125