xref: /openbsd-src/sys/net/if.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: if.c,v 1.600 2020/01/24 05:14:51 jsg Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "pfsync.h"
70 #include "ppp.h"
71 #include "pppoe.h"
72 #include "switch.h"
73 
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/mbuf.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/timeout.h>
80 #include <sys/protosw.h>
81 #include <sys/kernel.h>
82 #include <sys/ioctl.h>
83 #include <sys/domain.h>
84 #include <sys/task.h>
85 #include <sys/atomic.h>
86 #include <sys/percpu.h>
87 #include <sys/proc.h>
88 
89 #include <dev/rndvar.h>
90 
91 #include <net/if.h>
92 #include <net/if_dl.h>
93 #include <net/if_types.h>
94 #include <net/route.h>
95 #include <net/netisr.h>
96 
97 #include <netinet/in.h>
98 #include <netinet/if_ether.h>
99 #include <netinet/igmp.h>
100 #ifdef MROUTING
101 #include <netinet/ip_mroute.h>
102 #endif
103 
104 #ifdef INET6
105 #include <netinet6/in6_var.h>
106 #include <netinet6/in6_ifattach.h>
107 #include <netinet6/nd6.h>
108 #include <netinet/ip6.h>
109 #include <netinet6/ip6_var.h>
110 #endif
111 
112 #ifdef MPLS
113 #include <netmpls/mpls.h>
114 #endif
115 
116 #if NBPFILTER > 0
117 #include <net/bpf.h>
118 #endif
119 
120 #if NBRIDGE > 0
121 #include <net/if_bridge.h>
122 #endif
123 
124 #if NCARP > 0
125 #include <netinet/ip_carp.h>
126 #endif
127 
128 #if NPF > 0
129 #include <net/pfvar.h>
130 #endif
131 
132 #include <sys/device.h>
133 
134 void	if_attachsetup(struct ifnet *);
135 void	if_attachdomain(struct ifnet *);
136 void	if_attach_common(struct ifnet *);
137 int	if_createrdomain(int, struct ifnet *);
138 int	if_setrdomain(struct ifnet *, int);
139 void	if_slowtimo(void *);
140 
141 void	if_detached_qstart(struct ifqueue *);
142 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
143 
144 int	ifioctl_get(u_long, caddr_t);
145 int	ifconf(caddr_t);
146 static int
147 	if_sffpage_check(const caddr_t);
148 
149 int	if_getgroup(caddr_t, struct ifnet *);
150 int	if_getgroupmembers(caddr_t);
151 int	if_getgroupattribs(caddr_t);
152 int	if_setgroupattribs(caddr_t);
153 int	if_getgrouplist(caddr_t);
154 
155 void	if_linkstate(struct ifnet *);
156 void	if_linkstate_task(void *);
157 
158 int	if_clone_list(struct if_clonereq *);
159 struct if_clone	*if_clone_lookup(const char *, int *);
160 
161 int	if_group_egress_build(void);
162 
163 void	if_watchdog_task(void *);
164 
165 void	if_netisr(void *);
166 
167 #ifdef DDB
168 void	ifa_print_all(void);
169 #endif
170 
171 void	if_qstart_compat(struct ifqueue *);
172 
173 /*
174  * interface index map
175  *
176  * the kernel maintains a mapping of interface indexes to struct ifnet
177  * pointers.
178  *
179  * the map is an array of struct ifnet pointers prefixed by an if_map
180  * structure. the if_map structure stores the length of its array.
181  *
182  * as interfaces are attached to the system, the map is grown on demand
183  * up to USHRT_MAX entries.
184  *
185  * interface index 0 is reserved and represents no interface. this
186  * supports the use of the interface index as the scope for IPv6 link
187  * local addresses, where scope 0 means no scope has been specified.
188  * it also supports the use of interface index as the unique identifier
189  * for network interfaces in SNMP applications as per RFC2863. therefore
190  * if_get(0) returns NULL.
191  */
192 
193 void if_ifp_dtor(void *, void *);
194 void if_map_dtor(void *, void *);
195 struct ifnet *if_ref(struct ifnet *);
196 
197 /*
198  * struct if_map
199  *
200  * bounded array of ifnet srp pointers used to fetch references of live
201  * interfaces with if_get().
202  */
203 
204 struct if_map {
205 	unsigned long		 limit;
206 	/* followed by limit ifnet srp pointers */
207 };
208 
209 /*
210  * struct if_idxmap
211  *
212  * infrastructure to manage updates and accesses to the current if_map.
213  */
214 
215 struct if_idxmap {
216 	unsigned int		 serial;
217 	unsigned int		 count;
218 	struct srp		 map;
219 };
220 
221 void	if_idxmap_init(unsigned int);
222 void	if_idxmap_insert(struct ifnet *);
223 void	if_idxmap_remove(struct ifnet *);
224 
225 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
226 
227 LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
228 int if_cloners_count;
229 
230 /* hooks should only be added, deleted, and run from a process context */
231 struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE);
232 void	if_hooks_run(struct task_list *);
233 
234 struct timeout net_tick_to;
235 void	net_tick(void *);
236 int	net_livelocked(void);
237 int	ifq_congestion;
238 
239 int		 netisr;
240 
241 #define	NET_TASKQ	1
242 struct taskq	*nettqmp[NET_TASKQ];
243 
244 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
245 
246 /*
247  * Serialize socket operations to ensure no new sleeping points
248  * are introduced in IP output paths.
249  */
250 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
251 
252 /*
253  * Network interface utility routines.
254  */
255 void
256 ifinit(void)
257 {
258 	unsigned int	i;
259 
260 	/*
261 	 * most machines boot with 4 or 5 interfaces, so size the initial map
262 	 * to accomodate this
263 	 */
264 	if_idxmap_init(8);
265 
266 	timeout_set(&net_tick_to, net_tick, &net_tick_to);
267 
268 	for (i = 0; i < NET_TASKQ; i++) {
269 		nettqmp[i] = taskq_create("softnet", 1, IPL_NET, TASKQ_MPSAFE);
270 		if (nettqmp[i] == NULL)
271 			panic("unable to create network taskq %d", i);
272 	}
273 
274 	net_tick(&net_tick_to);
275 }
276 
277 static struct if_idxmap if_idxmap = {
278 	0,
279 	0,
280 	SRP_INITIALIZER()
281 };
282 
283 struct srp_gc if_ifp_gc = SRP_GC_INITIALIZER(if_ifp_dtor, NULL);
284 struct srp_gc if_map_gc = SRP_GC_INITIALIZER(if_map_dtor, NULL);
285 
286 struct ifnet_head ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
287 
288 void
289 if_idxmap_init(unsigned int limit)
290 {
291 	struct if_map *if_map;
292 	struct srp *map;
293 	unsigned int i;
294 
295 	if_idxmap.serial = 1; /* skip ifidx 0 so it can return NULL */
296 
297 	if_map = malloc(sizeof(*if_map) + limit * sizeof(*map),
298 	    M_IFADDR, M_WAITOK);
299 
300 	if_map->limit = limit;
301 	map = (struct srp *)(if_map + 1);
302 	for (i = 0; i < limit; i++)
303 		srp_init(&map[i]);
304 
305 	/* this is called early so there's nothing to race with */
306 	srp_update_locked(&if_map_gc, &if_idxmap.map, if_map);
307 }
308 
309 void
310 if_idxmap_insert(struct ifnet *ifp)
311 {
312 	struct if_map *if_map;
313 	struct srp *map;
314 	unsigned int index, i;
315 
316 	refcnt_init(&ifp->if_refcnt);
317 
318 	/* the kernel lock guarantees serialised modifications to if_idxmap */
319 	KERNEL_ASSERT_LOCKED();
320 
321 	if (++if_idxmap.count > USHRT_MAX)
322 		panic("too many interfaces");
323 
324 	if_map = srp_get_locked(&if_idxmap.map);
325 	map = (struct srp *)(if_map + 1);
326 
327 	index = if_idxmap.serial++ & USHRT_MAX;
328 
329 	if (index >= if_map->limit) {
330 		struct if_map *nif_map;
331 		struct srp *nmap;
332 		unsigned int nlimit;
333 		struct ifnet *nifp;
334 
335 		nlimit = if_map->limit * 2;
336 		nif_map = malloc(sizeof(*nif_map) + nlimit * sizeof(*nmap),
337 		    M_IFADDR, M_WAITOK);
338 		nmap = (struct srp *)(nif_map + 1);
339 
340 		nif_map->limit = nlimit;
341 		for (i = 0; i < if_map->limit; i++) {
342 			srp_init(&nmap[i]);
343 			nifp = srp_get_locked(&map[i]);
344 			if (nifp != NULL) {
345 				srp_update_locked(&if_ifp_gc, &nmap[i],
346 				    if_ref(nifp));
347 			}
348 		}
349 
350 		while (i < nlimit) {
351 			srp_init(&nmap[i]);
352 			i++;
353 		}
354 
355 		srp_update_locked(&if_map_gc, &if_idxmap.map, nif_map);
356 		if_map = nif_map;
357 		map = nmap;
358 	}
359 
360 	/* pick the next free index */
361 	for (i = 0; i < USHRT_MAX; i++) {
362 		if (index != 0 && srp_get_locked(&map[index]) == NULL)
363 			break;
364 
365 		index = if_idxmap.serial++ & USHRT_MAX;
366 	}
367 
368 	/* commit */
369 	ifp->if_index = index;
370 	srp_update_locked(&if_ifp_gc, &map[index], if_ref(ifp));
371 }
372 
373 void
374 if_idxmap_remove(struct ifnet *ifp)
375 {
376 	struct if_map *if_map;
377 	struct srp *map;
378 	unsigned int index;
379 
380 	index = ifp->if_index;
381 
382 	/* the kernel lock guarantees serialised modifications to if_idxmap */
383 	KERNEL_ASSERT_LOCKED();
384 
385 	if_map = srp_get_locked(&if_idxmap.map);
386 	KASSERT(index < if_map->limit);
387 
388 	map = (struct srp *)(if_map + 1);
389 	KASSERT(ifp == (struct ifnet *)srp_get_locked(&map[index]));
390 
391 	srp_update_locked(&if_ifp_gc, &map[index], NULL);
392 	if_idxmap.count--;
393 	/* end of if_idxmap modifications */
394 
395 	/* sleep until the last reference is released */
396 	refcnt_finalize(&ifp->if_refcnt, "ifidxrm");
397 }
398 
399 void
400 if_ifp_dtor(void *null, void *ifp)
401 {
402 	if_put(ifp);
403 }
404 
405 void
406 if_map_dtor(void *null, void *m)
407 {
408 	struct if_map *if_map = m;
409 	struct srp *map = (struct srp *)(if_map + 1);
410 	unsigned int i;
411 
412 	/*
413 	 * dont need to serialize the use of update_locked since this is
414 	 * the last reference to this map. there's nothing to race against.
415 	 */
416 	for (i = 0; i < if_map->limit; i++)
417 		srp_update_locked(&if_ifp_gc, &map[i], NULL);
418 
419 	free(if_map, M_IFADDR, sizeof(*if_map) + if_map->limit * sizeof(*map));
420 }
421 
422 /*
423  * Attach an interface to the
424  * list of "active" interfaces.
425  */
426 void
427 if_attachsetup(struct ifnet *ifp)
428 {
429 	unsigned long ifidx;
430 
431 	NET_ASSERT_LOCKED();
432 
433 	TAILQ_INIT(&ifp->if_groups);
434 
435 	if_addgroup(ifp, IFG_ALL);
436 
437 	if_attachdomain(ifp);
438 #if NPF > 0
439 	pfi_attach_ifnet(ifp);
440 #endif
441 
442 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
443 	if_slowtimo(ifp);
444 
445 	if_idxmap_insert(ifp);
446 	KASSERT(if_get(0) == NULL);
447 
448 	ifidx = ifp->if_index;
449 
450 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
451 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
452 
453 	/* Announce the interface. */
454 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
455 }
456 
457 /*
458  * Allocate the link level name for the specified interface.  This
459  * is an attachment helper.  It must be called after ifp->if_addrlen
460  * is initialized, which may not be the case when if_attach() is
461  * called.
462  */
463 void
464 if_alloc_sadl(struct ifnet *ifp)
465 {
466 	unsigned int socksize;
467 	int namelen, masklen;
468 	struct sockaddr_dl *sdl;
469 
470 	/*
471 	 * If the interface already has a link name, release it
472 	 * now.  This is useful for interfaces that can change
473 	 * link types, and thus switch link names often.
474 	 */
475 	if_free_sadl(ifp);
476 
477 	namelen = strlen(ifp->if_xname);
478 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
479 	socksize = masklen + ifp->if_addrlen;
480 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
481 	if (socksize < sizeof(*sdl))
482 		socksize = sizeof(*sdl);
483 	socksize = ROUNDUP(socksize);
484 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
485 	sdl->sdl_len = socksize;
486 	sdl->sdl_family = AF_LINK;
487 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
488 	sdl->sdl_nlen = namelen;
489 	sdl->sdl_alen = ifp->if_addrlen;
490 	sdl->sdl_index = ifp->if_index;
491 	sdl->sdl_type = ifp->if_type;
492 	ifp->if_sadl = sdl;
493 }
494 
495 /*
496  * Free the link level name for the specified interface.  This is
497  * a detach helper.  This is called from if_detach() or from
498  * link layer type specific detach functions.
499  */
500 void
501 if_free_sadl(struct ifnet *ifp)
502 {
503 	if (ifp->if_sadl == NULL)
504 		return;
505 
506 	free(ifp->if_sadl, M_IFADDR, ifp->if_sadl->sdl_len);
507 	ifp->if_sadl = NULL;
508 }
509 
510 void
511 if_attachdomain(struct ifnet *ifp)
512 {
513 	struct domain *dp;
514 	int i, s;
515 
516 	s = splnet();
517 
518 	/* address family dependent data region */
519 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
520 	for (i = 0; (dp = domains[i]) != NULL; i++) {
521 		if (dp->dom_ifattach)
522 			ifp->if_afdata[dp->dom_family] =
523 			    (*dp->dom_ifattach)(ifp);
524 	}
525 
526 	splx(s);
527 }
528 
529 void
530 if_attachhead(struct ifnet *ifp)
531 {
532 	if_attach_common(ifp);
533 	NET_LOCK();
534 	TAILQ_INSERT_HEAD(&ifnet, ifp, if_list);
535 	if_attachsetup(ifp);
536 	NET_UNLOCK();
537 }
538 
539 void
540 if_attach(struct ifnet *ifp)
541 {
542 	if_attach_common(ifp);
543 	NET_LOCK();
544 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
545 	if_attachsetup(ifp);
546 	NET_UNLOCK();
547 }
548 
549 void
550 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
551 {
552 	struct ifqueue **map;
553 	struct ifqueue *ifq;
554 	int i;
555 
556 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
557 	KASSERT(nqs != 0);
558 
559 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
560 
561 	ifp->if_snd.ifq_softc = NULL;
562 	map[0] = &ifp->if_snd;
563 
564 	for (i = 1; i < nqs; i++) {
565 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
566 		ifq_set_maxlen(ifq, ifp->if_snd.ifq_maxlen);
567 		ifq_init(ifq, ifp, i);
568 		map[i] = ifq;
569 	}
570 
571 	ifp->if_ifqs = map;
572 	ifp->if_nifqs = nqs;
573 }
574 
575 void
576 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
577 {
578 	struct ifiqueue **map;
579 	struct ifiqueue *ifiq;
580 	unsigned int i;
581 
582 	KASSERT(niqs != 0);
583 
584 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
585 
586 	ifp->if_rcv.ifiq_softc = NULL;
587 	map[0] = &ifp->if_rcv;
588 
589 	for (i = 1; i < niqs; i++) {
590 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
591 		ifiq_init(ifiq, ifp, i);
592 		map[i] = ifiq;
593 	}
594 
595 	ifp->if_iqs = map;
596 	ifp->if_niqs = niqs;
597 }
598 
599 void
600 if_attach_common(struct ifnet *ifp)
601 {
602 	KASSERT(ifp->if_ioctl != NULL);
603 
604 	TAILQ_INIT(&ifp->if_addrlist);
605 	TAILQ_INIT(&ifp->if_maddrlist);
606 
607 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
608 		KASSERTMSG(ifp->if_qstart == NULL,
609 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
610 		ifp->if_qstart = if_qstart_compat;
611 	} else {
612 		KASSERTMSG(ifp->if_start == NULL,
613 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
614 		KASSERTMSG(ifp->if_qstart != NULL,
615 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
616 	}
617 
618 	ifq_init(&ifp->if_snd, ifp, 0);
619 
620 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
621 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
622 	ifp->if_nifqs = 1;
623 	if (ifp->if_txmit == 0)
624 		ifp->if_txmit = IF_TXMIT_DEFAULT;
625 
626 	ifiq_init(&ifp->if_rcv, ifp, 0);
627 
628 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
629 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
630 	ifp->if_niqs = 1;
631 
632 	TAILQ_INIT(&ifp->if_addrhooks);
633 	TAILQ_INIT(&ifp->if_linkstatehooks);
634 	TAILQ_INIT(&ifp->if_detachhooks);
635 
636 	if (ifp->if_rtrequest == NULL)
637 		ifp->if_rtrequest = if_rtrequest_dummy;
638 	if (ifp->if_enqueue == NULL)
639 		ifp->if_enqueue = if_enqueue_ifq;
640 	ifp->if_llprio = IFQ_DEFPRIO;
641 
642 	SRPL_INIT(&ifp->if_inputs);
643 }
644 
645 void
646 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
647 {
648 	/*
649 	 * only switch the ifq_ops on the first ifq on an interface.
650 	 *
651 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
652 	 * works on a single ifq. because the code uses the ifq_ops
653 	 * on the first ifq (if_snd) to select a queue for an mbuf,
654 	 * by switching only the first one we change both the algorithm
655 	 * and force the routing of all new packets to it.
656 	 */
657 	ifq_attach(&ifp->if_snd, newops, args);
658 }
659 
660 void
661 if_start(struct ifnet *ifp)
662 {
663 	KASSERT(ifp->if_qstart == if_qstart_compat);
664 	if_qstart_compat(&ifp->if_snd);
665 }
666 void
667 if_qstart_compat(struct ifqueue *ifq)
668 {
669 	struct ifnet *ifp = ifq->ifq_if;
670 	int s;
671 
672 	/*
673 	 * the stack assumes that an interface can have multiple
674 	 * transmit rings, but a lot of drivers are still written
675 	 * so that interfaces and send rings have a 1:1 mapping.
676 	 * this provides compatability between the stack and the older
677 	 * drivers by translating from the only queue they have
678 	 * (ifp->if_snd) back to the interface and calling if_start.
679 	 */
680 
681 	KERNEL_LOCK();
682 	s = splnet();
683 	(*ifp->if_start)(ifp);
684 	splx(s);
685 	KERNEL_UNLOCK();
686 }
687 
688 int
689 if_enqueue(struct ifnet *ifp, struct mbuf *m)
690 {
691 #if NPF > 0
692 	if (m->m_pkthdr.pf.delay > 0)
693 		return (pf_delay_pkt(m, ifp->if_index));
694 #endif
695 
696 #if NBRIDGE > 0
697 	if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
698 		int error;
699 
700 		error = bridge_enqueue(ifp, m);
701 		return (error);
702 	}
703 #endif
704 
705 #if NPF > 0
706 	pf_pkt_addr_changed(m);
707 #endif	/* NPF > 0 */
708 
709 	return ((*ifp->if_enqueue)(ifp, m));
710 }
711 
712 int
713 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
714 {
715 	struct ifqueue *ifq = &ifp->if_snd;
716 	int error;
717 
718 	if (ifp->if_nifqs > 1) {
719 		unsigned int idx;
720 
721 		/*
722 		 * use the operations on the first ifq to pick which of
723 		 * the array gets this mbuf.
724 		 */
725 
726 		idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
727 		ifq = ifp->if_ifqs[idx];
728 	}
729 
730 	error = ifq_enqueue(ifq, m);
731 	if (error)
732 		return (error);
733 
734 	ifq_start(ifq);
735 
736 	return (0);
737 }
738 
739 void
740 if_input(struct ifnet *ifp, struct mbuf_list *ml)
741 {
742 	ifiq_input(&ifp->if_rcv, ml);
743 }
744 
745 int
746 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
747 {
748 #if NBPFILTER > 0
749 	/*
750 	 * Only send packets to bpf if they are destinated to local
751 	 * addresses.
752 	 *
753 	 * if_input_local() is also called for SIMPLEX interfaces to
754 	 * duplicate packets for local use.  But don't dup them to bpf.
755 	 */
756 	if (ifp->if_flags & IFF_LOOPBACK) {
757 		caddr_t if_bpf = ifp->if_bpf;
758 
759 		if (if_bpf)
760 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
761 	}
762 #endif
763 	m_resethdr(m);
764 	m->m_flags |= M_LOOP;
765 	m->m_pkthdr.ph_ifidx = ifp->if_index;
766 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
767 
768 	ifp->if_opackets++;
769 	ifp->if_obytes += m->m_pkthdr.len;
770 
771 	ifp->if_ipackets++;
772 	ifp->if_ibytes += m->m_pkthdr.len;
773 
774 	switch (af) {
775 	case AF_INET:
776 		ipv4_input(ifp, m);
777 		break;
778 #ifdef INET6
779 	case AF_INET6:
780 		ipv6_input(ifp, m);
781 		break;
782 #endif /* INET6 */
783 #ifdef MPLS
784 	case AF_MPLS:
785 		mpls_input(ifp, m);
786 		break;
787 #endif /* MPLS */
788 	default:
789 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
790 		m_freem(m);
791 		return (EAFNOSUPPORT);
792 	}
793 
794 	return (0);
795 }
796 
797 int
798 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
799 {
800 	struct ifiqueue *ifiq;
801 	unsigned int flow = 0;
802 
803 	m->m_pkthdr.ph_family = af;
804 	m->m_pkthdr.ph_ifidx = ifp->if_index;
805 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
806 
807 	if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID))
808 		flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK;
809 
810 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
811 
812 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
813 }
814 
815 struct ifih {
816 	SRPL_ENTRY(ifih)	  ifih_next;
817 	int			(*ifih_input)(struct ifnet *, struct mbuf *,
818 				      void *);
819 	void			 *ifih_cookie;
820 	int			  ifih_refcnt;
821 	struct refcnt		  ifih_srpcnt;
822 };
823 
824 void	if_ih_ref(void *, void *);
825 void	if_ih_unref(void *, void *);
826 
827 struct srpl_rc ifih_rc = SRPL_RC_INITIALIZER(if_ih_ref, if_ih_unref, NULL);
828 
829 void
830 if_ih_insert(struct ifnet *ifp, int (*input)(struct ifnet *, struct mbuf *,
831     void *), void *cookie)
832 {
833 	struct ifih *ifih;
834 
835 	/* the kernel lock guarantees serialised modifications to if_inputs */
836 	KERNEL_ASSERT_LOCKED();
837 
838 	SRPL_FOREACH_LOCKED(ifih, &ifp->if_inputs, ifih_next) {
839 		if (ifih->ifih_input == input && ifih->ifih_cookie == cookie) {
840 			ifih->ifih_refcnt++;
841 			break;
842 		}
843 	}
844 
845 	if (ifih == NULL) {
846 		ifih = malloc(sizeof(*ifih), M_DEVBUF, M_WAITOK);
847 
848 		ifih->ifih_input = input;
849 		ifih->ifih_cookie = cookie;
850 		ifih->ifih_refcnt = 1;
851 		refcnt_init(&ifih->ifih_srpcnt);
852 		SRPL_INSERT_HEAD_LOCKED(&ifih_rc, &ifp->if_inputs,
853 		    ifih, ifih_next);
854 	}
855 }
856 
857 void
858 if_ih_ref(void *null, void *i)
859 {
860 	struct ifih *ifih = i;
861 
862 	refcnt_take(&ifih->ifih_srpcnt);
863 }
864 
865 void
866 if_ih_unref(void *null, void *i)
867 {
868 	struct ifih *ifih = i;
869 
870 	refcnt_rele_wake(&ifih->ifih_srpcnt);
871 }
872 
873 void
874 if_ih_remove(struct ifnet *ifp, int (*input)(struct ifnet *, struct mbuf *,
875     void *), void *cookie)
876 {
877 	struct ifih *ifih;
878 
879 	/* the kernel lock guarantees serialised modifications to if_inputs */
880 	KERNEL_ASSERT_LOCKED();
881 
882 	SRPL_FOREACH_LOCKED(ifih, &ifp->if_inputs, ifih_next) {
883 		if (ifih->ifih_input == input && ifih->ifih_cookie == cookie)
884 			break;
885 	}
886 
887 	KASSERT(ifih != NULL);
888 
889 	if (--ifih->ifih_refcnt == 0) {
890 		SRPL_REMOVE_LOCKED(&ifih_rc, &ifp->if_inputs, ifih,
891 		    ifih, ifih_next);
892 
893 		refcnt_finalize(&ifih->ifih_srpcnt, "ifihrm");
894 		free(ifih, M_DEVBUF, sizeof(*ifih));
895 	}
896 }
897 
898 static void
899 if_ih_input(struct ifnet *ifp, struct mbuf *m)
900 {
901 	struct ifih *ifih;
902 	struct srp_ref sr;
903 
904 	/*
905 	 * Pass this mbuf to all input handlers of its
906 	 * interface until it is consumed.
907 	 */
908 	SRPL_FOREACH(ifih, &sr, &ifp->if_inputs, ifih_next) {
909 		if ((*ifih->ifih_input)(ifp, m, ifih->ifih_cookie))
910 			break;
911 	}
912 	SRPL_LEAVE(&sr);
913 
914 	if (ifih == NULL)
915 		m_freem(m);
916 }
917 
918 void
919 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
920 {
921 	struct mbuf *m;
922 
923 	if (ml_empty(ml))
924 		return;
925 
926 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
927 		enqueue_randomness(ml_len(ml));
928 
929 	/*
930 	 * We grab the NET_LOCK() before processing any packet to
931 	 * ensure there's no contention on the routing table lock.
932 	 *
933 	 * Without it we could race with a userland thread to insert
934 	 * a L2 entry in ip{6,}_output().  Such race would result in
935 	 * one of the threads sleeping *inside* the IP output path.
936 	 *
937 	 * Since we have a NET_LOCK() we also use it to serialize access
938 	 * to PF globals, pipex globals, unicast and multicast addresses
939 	 * lists.
940 	 */
941 	NET_RLOCK();
942 	while ((m = ml_dequeue(ml)) != NULL)
943 		if_ih_input(ifp, m);
944 	NET_RUNLOCK();
945 }
946 
947 void
948 if_vinput(struct ifnet *ifp, struct mbuf *m)
949 {
950 #if NBPFILTER > 0
951 	caddr_t if_bpf;
952 #endif
953 
954 	m->m_pkthdr.ph_ifidx = ifp->if_index;
955 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
956 
957 	counters_pkt(ifp->if_counters,
958 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
959 
960 #if NBPFILTER > 0
961 	if_bpf = ifp->if_bpf;
962 	if (if_bpf) {
963 		if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN)) {
964 			m_freem(m);
965 			return;
966 		}
967 	}
968 #endif
969 
970 	if_ih_input(ifp, m);
971 }
972 
973 void
974 if_netisr(void *unused)
975 {
976 	int n, t = 0;
977 
978 	NET_RLOCK();
979 
980 	while ((n = netisr) != 0) {
981 		/* Like sched_pause() but with a rwlock dance. */
982 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
983 			NET_RUNLOCK();
984 			yield();
985 			NET_RLOCK();
986 		}
987 
988 		atomic_clearbits_int(&netisr, n);
989 
990 #if NETHER > 0
991 		if (n & (1 << NETISR_ARP)) {
992 			KERNEL_LOCK();
993 			arpintr();
994 			KERNEL_UNLOCK();
995 		}
996 #endif
997 #if NPPP > 0
998 		if (n & (1 << NETISR_PPP)) {
999 			KERNEL_LOCK();
1000 			pppintr();
1001 			KERNEL_UNLOCK();
1002 		}
1003 #endif
1004 #if NBRIDGE > 0
1005 		if (n & (1 << NETISR_BRIDGE))
1006 			bridgeintr();
1007 #endif
1008 #if NSWITCH > 0
1009 		if (n & (1 << NETISR_SWITCH)) {
1010 			KERNEL_LOCK();
1011 			switchintr();
1012 			KERNEL_UNLOCK();
1013 		}
1014 #endif
1015 #if NPPPOE > 0
1016 		if (n & (1 << NETISR_PPPOE)) {
1017 			KERNEL_LOCK();
1018 			pppoeintr();
1019 			KERNEL_UNLOCK();
1020 		}
1021 #endif
1022 #ifdef PIPEX
1023 		if (n & (1 << NETISR_PIPEX)) {
1024 			KERNEL_LOCK();
1025 			pipexintr();
1026 			KERNEL_UNLOCK();
1027 		}
1028 #endif
1029 		t |= n;
1030 	}
1031 
1032 #if NPFSYNC > 0
1033 	if (t & (1 << NETISR_PFSYNC)) {
1034 		KERNEL_LOCK();
1035 		pfsyncintr();
1036 		KERNEL_UNLOCK();
1037 	}
1038 #endif
1039 
1040 	NET_RUNLOCK();
1041 }
1042 
1043 void
1044 if_hooks_run(struct task_list *hooks)
1045 {
1046 	struct task *t, *nt;
1047 	struct task cursor = { .t_func = NULL };
1048 	void (*func)(void *);
1049 	void *arg;
1050 
1051 	mtx_enter(&if_hooks_mtx);
1052 	for (t = TAILQ_FIRST(hooks); t != NULL; t = nt) {
1053 		while (t->t_func == NULL) { /* skip cursors */
1054 			t = TAILQ_NEXT(t, t_entry);
1055 			if (t == NULL)
1056 				break;
1057 		}
1058 		func = t->t_func;
1059 		arg = t->t_arg;
1060 
1061 		TAILQ_INSERT_AFTER(hooks, t, &cursor, t_entry);
1062 		mtx_leave(&if_hooks_mtx);
1063 
1064 		(*func)(arg);
1065 
1066 		mtx_enter(&if_hooks_mtx);
1067 		nt = TAILQ_NEXT(&cursor, t_entry); /* avoid _Q_INVALIDATE */
1068 		TAILQ_REMOVE(hooks, &cursor, t_entry);
1069 	}
1070 	mtx_leave(&if_hooks_mtx);
1071 }
1072 
1073 void
1074 if_deactivate(struct ifnet *ifp)
1075 {
1076 	/*
1077 	 * Call detach hooks from head to tail.  To make sure detach
1078 	 * hooks are executed in the reverse order they were added, all
1079 	 * the hooks have to be added to the head!
1080 	 */
1081 
1082 	NET_LOCK();
1083 	if_hooks_run(&ifp->if_detachhooks);
1084 	NET_UNLOCK();
1085 }
1086 
1087 void
1088 if_detachhook_add(struct ifnet *ifp, struct task *t)
1089 {
1090 	mtx_enter(&if_hooks_mtx);
1091 	TAILQ_INSERT_HEAD(&ifp->if_detachhooks, t, t_entry);
1092 	mtx_leave(&if_hooks_mtx);
1093 }
1094 
1095 void
1096 if_detachhook_del(struct ifnet *ifp, struct task *t)
1097 {
1098 	mtx_enter(&if_hooks_mtx);
1099 	TAILQ_REMOVE(&ifp->if_detachhooks, t, t_entry);
1100 	mtx_leave(&if_hooks_mtx);
1101 }
1102 
1103 /*
1104  * Detach an interface from everything in the kernel.  Also deallocate
1105  * private resources.
1106  */
1107 void
1108 if_detach(struct ifnet *ifp)
1109 {
1110 	struct ifaddr *ifa;
1111 	struct ifg_list *ifg;
1112 	struct domain *dp;
1113 	int i, s;
1114 
1115 	/* Undo pseudo-driver changes. */
1116 	if_deactivate(ifp);
1117 
1118 	ifq_clr_oactive(&ifp->if_snd);
1119 
1120 	/* Other CPUs must not have a reference before we start destroying. */
1121 	if_idxmap_remove(ifp);
1122 
1123 #if NBPFILTER > 0
1124 	bpfdetach(ifp);
1125 #endif
1126 
1127 	NET_LOCK();
1128 	s = splnet();
1129 	ifp->if_qstart = if_detached_qstart;
1130 	ifp->if_ioctl = if_detached_ioctl;
1131 	ifp->if_watchdog = NULL;
1132 
1133 	/* Remove the watchdog timeout & task */
1134 	timeout_del(&ifp->if_slowtimo);
1135 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1136 
1137 	/* Remove the link state task */
1138 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1139 
1140 	rti_delete(ifp);
1141 #if NETHER > 0 && defined(NFSCLIENT)
1142 	if (ifp->if_index == revarp_ifidx)
1143 		revarp_ifidx = 0;
1144 #endif
1145 #ifdef MROUTING
1146 	vif_delete(ifp);
1147 #endif
1148 	in_ifdetach(ifp);
1149 #ifdef INET6
1150 	in6_ifdetach(ifp);
1151 #endif
1152 #if NPF > 0
1153 	pfi_detach_ifnet(ifp);
1154 #endif
1155 
1156 	/* Remove the interface from the list of all interfaces.  */
1157 	TAILQ_REMOVE(&ifnet, ifp, if_list);
1158 
1159 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1160 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1161 
1162 	if_free_sadl(ifp);
1163 
1164 	/* We should not have any address left at this point. */
1165 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1166 #ifdef DIAGNOSTIC
1167 		printf("%s: address list non empty\n", ifp->if_xname);
1168 #endif
1169 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1170 			ifa_del(ifp, ifa);
1171 			ifa->ifa_ifp = NULL;
1172 			ifafree(ifa);
1173 		}
1174 	}
1175 
1176 	KASSERT(TAILQ_EMPTY(&ifp->if_addrhooks));
1177 	KASSERT(TAILQ_EMPTY(&ifp->if_linkstatehooks));
1178 	KASSERT(TAILQ_EMPTY(&ifp->if_detachhooks));
1179 
1180 	for (i = 0; (dp = domains[i]) != NULL; i++) {
1181 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
1182 			(*dp->dom_ifdetach)(ifp,
1183 			    ifp->if_afdata[dp->dom_family]);
1184 	}
1185 
1186 	/* Announce that the interface is gone. */
1187 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1188 	splx(s);
1189 	NET_UNLOCK();
1190 
1191 	if (ifp->if_counters != NULL)
1192 		if_counters_free(ifp);
1193 
1194 	for (i = 0; i < ifp->if_nifqs; i++)
1195 		ifq_destroy(ifp->if_ifqs[i]);
1196 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1197 		for (i = 1; i < ifp->if_nifqs; i++) {
1198 			free(ifp->if_ifqs[i], M_DEVBUF,
1199 			    sizeof(struct ifqueue));
1200 		}
1201 		free(ifp->if_ifqs, M_DEVBUF,
1202 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1203 	}
1204 
1205 	for (i = 0; i < ifp->if_niqs; i++)
1206 		ifiq_destroy(ifp->if_iqs[i]);
1207 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1208 		for (i = 1; i < ifp->if_niqs; i++) {
1209 			free(ifp->if_iqs[i], M_DEVBUF,
1210 			    sizeof(struct ifiqueue));
1211 		}
1212 		free(ifp->if_iqs, M_DEVBUF,
1213 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1214 	}
1215 }
1216 
1217 /*
1218  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1219  */
1220 int
1221 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1222 {
1223 	struct ifnet *ifp;
1224 	int connected = 0;
1225 
1226 	ifp = if_get(ifidx);
1227 	if (ifp == NULL)
1228 		return (0);
1229 
1230 	if (ifp0->if_index == ifp->if_index)
1231 		connected = 1;
1232 
1233 #if NBRIDGE > 0
1234 	if (ifp0->if_bridgeidx != 0 && ifp0->if_bridgeidx == ifp->if_bridgeidx)
1235 		connected = 1;
1236 #endif
1237 #if NCARP > 0
1238 	if ((ifp0->if_type == IFT_CARP && ifp0->if_carpdev == ifp) ||
1239 	    (ifp->if_type == IFT_CARP && ifp->if_carpdev == ifp0))
1240 		connected = 1;
1241 #endif
1242 
1243 	if_put(ifp);
1244 	return (connected);
1245 }
1246 
1247 /*
1248  * Create a clone network interface.
1249  */
1250 int
1251 if_clone_create(const char *name, int rdomain)
1252 {
1253 	struct if_clone *ifc;
1254 	struct ifnet *ifp;
1255 	int unit, ret;
1256 
1257 	ifc = if_clone_lookup(name, &unit);
1258 	if (ifc == NULL)
1259 		return (EINVAL);
1260 
1261 	if (ifunit(name) != NULL)
1262 		return (EEXIST);
1263 
1264 	ret = (*ifc->ifc_create)(ifc, unit);
1265 
1266 	if (ret != 0 || (ifp = ifunit(name)) == NULL)
1267 		return (ret);
1268 
1269 	NET_LOCK();
1270 	if_addgroup(ifp, ifc->ifc_name);
1271 	if (rdomain != 0)
1272 		if_setrdomain(ifp, rdomain);
1273 	NET_UNLOCK();
1274 
1275 	return (ret);
1276 }
1277 
1278 /*
1279  * Destroy a clone network interface.
1280  */
1281 int
1282 if_clone_destroy(const char *name)
1283 {
1284 	struct if_clone *ifc;
1285 	struct ifnet *ifp;
1286 	int ret;
1287 
1288 	ifc = if_clone_lookup(name, NULL);
1289 	if (ifc == NULL)
1290 		return (EINVAL);
1291 
1292 	ifp = ifunit(name);
1293 	if (ifp == NULL)
1294 		return (ENXIO);
1295 
1296 	if (ifc->ifc_destroy == NULL)
1297 		return (EOPNOTSUPP);
1298 
1299 	NET_LOCK();
1300 	if (ifp->if_flags & IFF_UP) {
1301 		int s;
1302 		s = splnet();
1303 		if_down(ifp);
1304 		splx(s);
1305 	}
1306 	NET_UNLOCK();
1307 	ret = (*ifc->ifc_destroy)(ifp);
1308 
1309 	return (ret);
1310 }
1311 
1312 /*
1313  * Look up a network interface cloner.
1314  */
1315 struct if_clone *
1316 if_clone_lookup(const char *name, int *unitp)
1317 {
1318 	struct if_clone *ifc;
1319 	const char *cp;
1320 	int unit;
1321 
1322 	/* separate interface name from unit */
1323 	for (cp = name;
1324 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1325 	    cp++)
1326 		continue;
1327 
1328 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1329 		return (NULL);	/* No name or unit number */
1330 
1331 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1332 		return (NULL);	/* unit number 0 padded */
1333 
1334 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1335 		if (strlen(ifc->ifc_name) == cp - name &&
1336 		    !strncmp(name, ifc->ifc_name, cp - name))
1337 			break;
1338 	}
1339 
1340 	if (ifc == NULL)
1341 		return (NULL);
1342 
1343 	unit = 0;
1344 	while (cp - name < IFNAMSIZ && *cp) {
1345 		if (*cp < '0' || *cp > '9' ||
1346 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1347 			/* Bogus unit number. */
1348 			return (NULL);
1349 		}
1350 		unit = (unit * 10) + (*cp++ - '0');
1351 	}
1352 
1353 	if (unitp != NULL)
1354 		*unitp = unit;
1355 	return (ifc);
1356 }
1357 
1358 /*
1359  * Register a network interface cloner.
1360  */
1361 void
1362 if_clone_attach(struct if_clone *ifc)
1363 {
1364 	/*
1365 	 * we are called at kernel boot by main(), when pseudo devices are
1366 	 * being attached. The main() is the only guy which may alter the
1367 	 * if_cloners. While system is running and main() is done with
1368 	 * initialization, the if_cloners becomes immutable.
1369 	 */
1370 	KASSERT(pdevinit_done == 0);
1371 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1372 	if_cloners_count++;
1373 }
1374 
1375 /*
1376  * Provide list of interface cloners to userspace.
1377  */
1378 int
1379 if_clone_list(struct if_clonereq *ifcr)
1380 {
1381 	char outbuf[IFNAMSIZ], *dst;
1382 	struct if_clone *ifc;
1383 	int count, error = 0;
1384 
1385 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1386 		/* Just asking how many there are. */
1387 		ifcr->ifcr_total = if_cloners_count;
1388 		return (0);
1389 	}
1390 
1391 	if (ifcr->ifcr_count < 0)
1392 		return (EINVAL);
1393 
1394 	ifcr->ifcr_total = if_cloners_count;
1395 	count = MIN(if_cloners_count, ifcr->ifcr_count);
1396 
1397 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1398 		if (count == 0)
1399 			break;
1400 		bzero(outbuf, sizeof outbuf);
1401 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1402 		error = copyout(outbuf, dst, IFNAMSIZ);
1403 		if (error)
1404 			break;
1405 		count--;
1406 		dst += IFNAMSIZ;
1407 	}
1408 
1409 	return (error);
1410 }
1411 
1412 /*
1413  * set queue congestion marker
1414  */
1415 void
1416 if_congestion(void)
1417 {
1418 	extern int ticks;
1419 
1420 	ifq_congestion = ticks;
1421 }
1422 
1423 int
1424 if_congested(void)
1425 {
1426 	extern int ticks;
1427 	int diff;
1428 
1429 	diff = ticks - ifq_congestion;
1430 	if (diff < 0) {
1431 		ifq_congestion = ticks - hz;
1432 		return (0);
1433 	}
1434 
1435 	return (diff <= (hz / 100));
1436 }
1437 
1438 #define	equal(a1, a2)	\
1439 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1440 	(a1)->sa_len) == 0)
1441 
1442 /*
1443  * Locate an interface based on a complete address.
1444  */
1445 struct ifaddr *
1446 ifa_ifwithaddr(struct sockaddr *addr, u_int rtableid)
1447 {
1448 	struct ifnet *ifp;
1449 	struct ifaddr *ifa;
1450 	u_int rdomain;
1451 
1452 	rdomain = rtable_l2(rtableid);
1453 	KERNEL_LOCK();
1454 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1455 		if (ifp->if_rdomain != rdomain)
1456 			continue;
1457 
1458 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1459 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1460 				continue;
1461 
1462 			if (equal(addr, ifa->ifa_addr)) {
1463 				KERNEL_UNLOCK();
1464 				return (ifa);
1465 			}
1466 		}
1467 	}
1468 	KERNEL_UNLOCK();
1469 	return (NULL);
1470 }
1471 
1472 /*
1473  * Locate the point to point interface with a given destination address.
1474  */
1475 struct ifaddr *
1476 ifa_ifwithdstaddr(struct sockaddr *addr, u_int rdomain)
1477 {
1478 	struct ifnet *ifp;
1479 	struct ifaddr *ifa;
1480 
1481 	rdomain = rtable_l2(rdomain);
1482 	KERNEL_LOCK();
1483 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1484 		if (ifp->if_rdomain != rdomain)
1485 			continue;
1486 		if (ifp->if_flags & IFF_POINTOPOINT) {
1487 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1488 				if (ifa->ifa_addr->sa_family !=
1489 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1490 					continue;
1491 				if (equal(addr, ifa->ifa_dstaddr)) {
1492 					KERNEL_UNLOCK();
1493 					return (ifa);
1494 				}
1495 			}
1496 		}
1497 	}
1498 	KERNEL_UNLOCK();
1499 	return (NULL);
1500 }
1501 
1502 /*
1503  * Find an interface address specific to an interface best matching
1504  * a given address.
1505  */
1506 struct ifaddr *
1507 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1508 {
1509 	struct ifaddr *ifa;
1510 	char *cp, *cp2, *cp3;
1511 	char *cplim;
1512 	struct ifaddr *ifa_maybe = NULL;
1513 	u_int af = addr->sa_family;
1514 
1515 	if (af >= AF_MAX)
1516 		return (NULL);
1517 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1518 		if (ifa->ifa_addr->sa_family != af)
1519 			continue;
1520 		if (ifa_maybe == NULL)
1521 			ifa_maybe = ifa;
1522 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1523 			if (equal(addr, ifa->ifa_addr) ||
1524 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1525 				return (ifa);
1526 			continue;
1527 		}
1528 		cp = addr->sa_data;
1529 		cp2 = ifa->ifa_addr->sa_data;
1530 		cp3 = ifa->ifa_netmask->sa_data;
1531 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1532 		for (; cp3 < cplim; cp3++)
1533 			if ((*cp++ ^ *cp2++) & *cp3)
1534 				break;
1535 		if (cp3 == cplim)
1536 			return (ifa);
1537 	}
1538 	return (ifa_maybe);
1539 }
1540 
1541 void
1542 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1543 {
1544 }
1545 
1546 /*
1547  * Default action when installing a local route on a point-to-point
1548  * interface.
1549  */
1550 void
1551 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1552 {
1553 	struct ifnet *lo0ifp;
1554 	struct ifaddr *ifa, *lo0ifa;
1555 
1556 	switch (req) {
1557 	case RTM_ADD:
1558 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1559 			break;
1560 
1561 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1562 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1563 			    rt_key(rt)->sa_len) == 0)
1564 				break;
1565 		}
1566 
1567 		if (ifa == NULL)
1568 			break;
1569 
1570 		KASSERT(ifa == rt->rt_ifa);
1571 
1572 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1573 		KASSERT(lo0ifp != NULL);
1574 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1575 			if (lo0ifa->ifa_addr->sa_family ==
1576 			    ifa->ifa_addr->sa_family)
1577 				break;
1578 		}
1579 		if_put(lo0ifp);
1580 
1581 		if (lo0ifa == NULL)
1582 			break;
1583 
1584 		rt->rt_flags &= ~RTF_LLINFO;
1585 		break;
1586 	case RTM_DELETE:
1587 	case RTM_RESOLVE:
1588 	default:
1589 		break;
1590 	}
1591 }
1592 
1593 
1594 /*
1595  * Bring down all interfaces
1596  */
1597 void
1598 if_downall(void)
1599 {
1600 	struct ifreq ifrq;	/* XXX only partly built */
1601 	struct ifnet *ifp;
1602 
1603 	NET_LOCK();
1604 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1605 		if ((ifp->if_flags & IFF_UP) == 0)
1606 			continue;
1607 		if_down(ifp);
1608 		ifrq.ifr_flags = ifp->if_flags;
1609 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1610 	}
1611 	NET_UNLOCK();
1612 }
1613 
1614 /*
1615  * Mark an interface down and notify protocols of
1616  * the transition.
1617  */
1618 void
1619 if_down(struct ifnet *ifp)
1620 {
1621 	NET_ASSERT_LOCKED();
1622 
1623 	ifp->if_flags &= ~IFF_UP;
1624 	getmicrotime(&ifp->if_lastchange);
1625 	IFQ_PURGE(&ifp->if_snd);
1626 
1627 	if_linkstate(ifp);
1628 }
1629 
1630 /*
1631  * Mark an interface up and notify protocols of
1632  * the transition.
1633  */
1634 void
1635 if_up(struct ifnet *ifp)
1636 {
1637 	NET_ASSERT_LOCKED();
1638 
1639 	ifp->if_flags |= IFF_UP;
1640 	getmicrotime(&ifp->if_lastchange);
1641 
1642 #ifdef INET6
1643 	/* Userland expects the kernel to set ::1 on default lo(4). */
1644 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1645 		in6_ifattach(ifp);
1646 #endif
1647 
1648 	if_linkstate(ifp);
1649 }
1650 
1651 /*
1652  * Notify userland, the routing table and hooks owner of
1653  * a link-state transition.
1654  */
1655 void
1656 if_linkstate_task(void *xifidx)
1657 {
1658 	unsigned int ifidx = (unsigned long)xifidx;
1659 	struct ifnet *ifp;
1660 
1661 	KERNEL_LOCK();
1662 	NET_LOCK();
1663 
1664 	ifp = if_get(ifidx);
1665 	if (ifp != NULL)
1666 		if_linkstate(ifp);
1667 	if_put(ifp);
1668 
1669 	NET_UNLOCK();
1670 	KERNEL_UNLOCK();
1671 }
1672 
1673 void
1674 if_linkstate(struct ifnet *ifp)
1675 {
1676 	NET_ASSERT_LOCKED();
1677 
1678 	rtm_ifchg(ifp);
1679 	rt_if_track(ifp);
1680 
1681 	if_hooks_run(&ifp->if_linkstatehooks);
1682 }
1683 
1684 void
1685 if_linkstatehook_add(struct ifnet *ifp, struct task *t)
1686 {
1687 	mtx_enter(&if_hooks_mtx);
1688 	TAILQ_INSERT_HEAD(&ifp->if_linkstatehooks, t, t_entry);
1689 	mtx_leave(&if_hooks_mtx);
1690 }
1691 
1692 void
1693 if_linkstatehook_del(struct ifnet *ifp, struct task *t)
1694 {
1695 	mtx_enter(&if_hooks_mtx);
1696 	TAILQ_REMOVE(&ifp->if_linkstatehooks, t, t_entry);
1697 	mtx_leave(&if_hooks_mtx);
1698 }
1699 
1700 /*
1701  * Schedule a link state change task.
1702  */
1703 void
1704 if_link_state_change(struct ifnet *ifp)
1705 {
1706 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1707 }
1708 
1709 /*
1710  * Handle interface watchdog timer routine.  Called
1711  * from softclock, we decrement timer (if set) and
1712  * call the appropriate interface routine on expiration.
1713  */
1714 void
1715 if_slowtimo(void *arg)
1716 {
1717 	struct ifnet *ifp = arg;
1718 	int s = splnet();
1719 
1720 	if (ifp->if_watchdog) {
1721 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1722 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1723 		timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO);
1724 	}
1725 	splx(s);
1726 }
1727 
1728 void
1729 if_watchdog_task(void *xifidx)
1730 {
1731 	unsigned int ifidx = (unsigned long)xifidx;
1732 	struct ifnet *ifp;
1733 	int s;
1734 
1735 	ifp = if_get(ifidx);
1736 	if (ifp == NULL)
1737 		return;
1738 
1739 	KERNEL_LOCK();
1740 	s = splnet();
1741 	if (ifp->if_watchdog)
1742 		(*ifp->if_watchdog)(ifp);
1743 	splx(s);
1744 	KERNEL_UNLOCK();
1745 
1746 	if_put(ifp);
1747 }
1748 
1749 /*
1750  * Map interface name to interface structure pointer.
1751  */
1752 struct ifnet *
1753 ifunit(const char *name)
1754 {
1755 	struct ifnet *ifp;
1756 
1757 	KERNEL_ASSERT_LOCKED();
1758 
1759 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1760 		if (strcmp(ifp->if_xname, name) == 0)
1761 			return (ifp);
1762 	}
1763 	return (NULL);
1764 }
1765 
1766 /*
1767  * Map interface index to interface structure pointer.
1768  */
1769 struct ifnet *
1770 if_get(unsigned int index)
1771 {
1772 	struct srp_ref sr;
1773 	struct if_map *if_map;
1774 	struct srp *map;
1775 	struct ifnet *ifp = NULL;
1776 
1777 	if_map = srp_enter(&sr, &if_idxmap.map);
1778 	if (index < if_map->limit) {
1779 		map = (struct srp *)(if_map + 1);
1780 
1781 		ifp = srp_follow(&sr, &map[index]);
1782 		if (ifp != NULL) {
1783 			KASSERT(ifp->if_index == index);
1784 			if_ref(ifp);
1785 		}
1786 	}
1787 	srp_leave(&sr);
1788 
1789 	return (ifp);
1790 }
1791 
1792 struct ifnet *
1793 if_ref(struct ifnet *ifp)
1794 {
1795 	refcnt_take(&ifp->if_refcnt);
1796 
1797 	return (ifp);
1798 }
1799 
1800 void
1801 if_put(struct ifnet *ifp)
1802 {
1803 	if (ifp == NULL)
1804 		return;
1805 
1806 	refcnt_rele_wake(&ifp->if_refcnt);
1807 }
1808 
1809 int
1810 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1811 {
1812 	if (ifp->if_sadl == NULL)
1813 		return (EINVAL);
1814 
1815 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1816 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1817 
1818 	return (0);
1819 }
1820 
1821 int
1822 if_createrdomain(int rdomain, struct ifnet *ifp)
1823 {
1824 	int error;
1825 	struct ifnet *loifp;
1826 	char loifname[IFNAMSIZ];
1827 	unsigned int unit = rdomain;
1828 
1829 	if (!rtable_exists(rdomain) && (error = rtable_add(rdomain)) != 0)
1830 		return (error);
1831 	if (!rtable_empty(rdomain))
1832 		return (EEXIST);
1833 
1834 	/* Create rdomain including its loopback if with unit == rdomain */
1835 	snprintf(loifname, sizeof(loifname), "lo%u", unit);
1836 	error = if_clone_create(loifname, 0);
1837 	if ((loifp = ifunit(loifname)) == NULL)
1838 		return (ENXIO);
1839 	if (error && (ifp != loifp || error != EEXIST))
1840 		return (error);
1841 
1842 	rtable_l2set(rdomain, rdomain, loifp->if_index);
1843 	loifp->if_rdomain = rdomain;
1844 
1845 	return (0);
1846 }
1847 
1848 int
1849 if_setrdomain(struct ifnet *ifp, int rdomain)
1850 {
1851 	struct ifreq ifr;
1852 	int error, up = 0, s;
1853 
1854 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1855 		return (EINVAL);
1856 
1857 	if (rdomain != ifp->if_rdomain &&
1858 	    (ifp->if_flags & IFF_LOOPBACK) &&
1859 	    (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1860 		return (EPERM);
1861 
1862 	if (!rtable_exists(rdomain))
1863 		return (ESRCH);
1864 
1865 	/* make sure that the routing table is a real rdomain */
1866 	if (rdomain != rtable_l2(rdomain))
1867 		return (EINVAL);
1868 
1869 	if (rdomain != ifp->if_rdomain) {
1870 		s = splnet();
1871 		/*
1872 		 * We are tearing down the world.
1873 		 * Take down the IF so:
1874 		 * 1. everything that cares gets a message
1875 		 * 2. the automagic IPv6 bits are recreated
1876 		 */
1877 		if (ifp->if_flags & IFF_UP) {
1878 			up = 1;
1879 			if_down(ifp);
1880 		}
1881 		rti_delete(ifp);
1882 #ifdef MROUTING
1883 		vif_delete(ifp);
1884 #endif
1885 		in_ifdetach(ifp);
1886 #ifdef INET6
1887 		in6_ifdetach(ifp);
1888 #endif
1889 		splx(s);
1890 	}
1891 
1892 	/* Let devices like enc(4) or mpe(4) know about the change */
1893 	ifr.ifr_rdomainid = rdomain;
1894 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
1895 	    (caddr_t)&ifr)) != ENOTTY)
1896 		return (error);
1897 	error = 0;
1898 
1899 	/* Add interface to the specified rdomain */
1900 	ifp->if_rdomain = rdomain;
1901 
1902 	/* If we took down the IF, bring it back */
1903 	if (up) {
1904 		s = splnet();
1905 		if_up(ifp);
1906 		splx(s);
1907 	}
1908 
1909 	return (0);
1910 }
1911 
1912 /*
1913  * Interface ioctls.
1914  */
1915 int
1916 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1917 {
1918 	struct ifnet *ifp;
1919 	struct ifreq *ifr = (struct ifreq *)data;
1920 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
1921 	struct if_afreq *ifar = (struct if_afreq *)data;
1922 	char ifdescrbuf[IFDESCRSIZE];
1923 	char ifrtlabelbuf[RTLABEL_LEN];
1924 	int s, error = 0, oif_xflags;
1925 	size_t bytesdone;
1926 	unsigned short oif_flags;
1927 
1928 	switch (cmd) {
1929 	case SIOCIFCREATE:
1930 		if ((error = suser(p)) != 0)
1931 			return (error);
1932 		error = if_clone_create(ifr->ifr_name, 0);
1933 		return (error);
1934 	case SIOCIFDESTROY:
1935 		if ((error = suser(p)) != 0)
1936 			return (error);
1937 		error = if_clone_destroy(ifr->ifr_name);
1938 		return (error);
1939 	case SIOCSIFGATTR:
1940 		if ((error = suser(p)) != 0)
1941 			return (error);
1942 		NET_LOCK();
1943 		error = if_setgroupattribs(data);
1944 		NET_UNLOCK();
1945 		return (error);
1946 	case SIOCGIFCONF:
1947 	case SIOCIFGCLONERS:
1948 	case SIOCGIFGMEMB:
1949 	case SIOCGIFGATTR:
1950 	case SIOCGIFGLIST:
1951 	case SIOCGIFFLAGS:
1952 	case SIOCGIFXFLAGS:
1953 	case SIOCGIFMETRIC:
1954 	case SIOCGIFMTU:
1955 	case SIOCGIFHARDMTU:
1956 	case SIOCGIFDATA:
1957 	case SIOCGIFDESCR:
1958 	case SIOCGIFRTLABEL:
1959 	case SIOCGIFPRIORITY:
1960 	case SIOCGIFRDOMAIN:
1961 	case SIOCGIFGROUP:
1962 	case SIOCGIFLLPRIO:
1963 		return (ifioctl_get(cmd, data));
1964 	}
1965 
1966 	ifp = ifunit(ifr->ifr_name);
1967 	if (ifp == NULL)
1968 		return (ENXIO);
1969 	oif_flags = ifp->if_flags;
1970 	oif_xflags = ifp->if_xflags;
1971 
1972 	switch (cmd) {
1973 	case SIOCIFAFATTACH:
1974 	case SIOCIFAFDETACH:
1975 		if ((error = suser(p)) != 0)
1976 			break;
1977 		NET_LOCK();
1978 		switch (ifar->ifar_af) {
1979 		case AF_INET:
1980 			/* attach is a noop for AF_INET */
1981 			if (cmd == SIOCIFAFDETACH)
1982 				in_ifdetach(ifp);
1983 			break;
1984 #ifdef INET6
1985 		case AF_INET6:
1986 			if (cmd == SIOCIFAFATTACH)
1987 				error = in6_ifattach(ifp);
1988 			else
1989 				in6_ifdetach(ifp);
1990 			break;
1991 #endif /* INET6 */
1992 		default:
1993 			error = EAFNOSUPPORT;
1994 		}
1995 		NET_UNLOCK();
1996 		break;
1997 
1998 	case SIOCSIFFLAGS:
1999 		if ((error = suser(p)) != 0)
2000 			break;
2001 
2002 		NET_LOCK();
2003 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2004 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
2005 
2006 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2007 		if (error != 0) {
2008 			ifp->if_flags = oif_flags;
2009 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
2010 			s = splnet();
2011 			if (ISSET(ifp->if_flags, IFF_UP))
2012 				if_up(ifp);
2013 			else
2014 				if_down(ifp);
2015 			splx(s);
2016 		}
2017 		NET_UNLOCK();
2018 		break;
2019 
2020 	case SIOCSIFXFLAGS:
2021 		if ((error = suser(p)) != 0)
2022 			break;
2023 
2024 		NET_LOCK();
2025 #ifdef INET6
2026 		if (ISSET(ifr->ifr_flags, IFXF_AUTOCONF6)) {
2027 			error = in6_ifattach(ifp);
2028 			if (error != 0) {
2029 				NET_UNLOCK();
2030 				break;
2031 			}
2032 		}
2033 
2034 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
2035 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
2036 			ifp->if_xflags |= IFXF_INET6_NOSOII;
2037 
2038 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
2039 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
2040 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
2041 
2042 #endif	/* INET6 */
2043 
2044 #ifdef MPLS
2045 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
2046 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
2047 			s = splnet();
2048 			ifp->if_xflags |= IFXF_MPLS;
2049 			ifp->if_ll_output = ifp->if_output;
2050 			ifp->if_output = mpls_output;
2051 			splx(s);
2052 		}
2053 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
2054 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
2055 			s = splnet();
2056 			ifp->if_xflags &= ~IFXF_MPLS;
2057 			ifp->if_output = ifp->if_ll_output;
2058 			ifp->if_ll_output = NULL;
2059 			splx(s);
2060 		}
2061 #endif	/* MPLS */
2062 
2063 #ifndef SMALL_KERNEL
2064 		if (ifp->if_capabilities & IFCAP_WOL) {
2065 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
2066 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
2067 				s = splnet();
2068 				ifp->if_xflags |= IFXF_WOL;
2069 				error = ifp->if_wol(ifp, 1);
2070 				splx(s);
2071 			}
2072 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
2073 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
2074 				s = splnet();
2075 				ifp->if_xflags &= ~IFXF_WOL;
2076 				error = ifp->if_wol(ifp, 0);
2077 				splx(s);
2078 			}
2079 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
2080 			ifr->ifr_flags &= ~IFXF_WOL;
2081 			error = ENOTSUP;
2082 		}
2083 #endif
2084 
2085 		if (error == 0)
2086 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
2087 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
2088 		NET_UNLOCK();
2089 		break;
2090 
2091 	case SIOCSIFMETRIC:
2092 		if ((error = suser(p)) != 0)
2093 			break;
2094 		NET_LOCK();
2095 		ifp->if_metric = ifr->ifr_metric;
2096 		NET_UNLOCK();
2097 		break;
2098 
2099 	case SIOCSIFMTU:
2100 		if ((error = suser(p)) != 0)
2101 			break;
2102 		NET_LOCK();
2103 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2104 		NET_UNLOCK();
2105 		if (!error)
2106 			rtm_ifchg(ifp);
2107 		break;
2108 
2109 	case SIOCSIFDESCR:
2110 		if ((error = suser(p)) != 0)
2111 			break;
2112 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2113 		    IFDESCRSIZE, &bytesdone);
2114 		if (error == 0) {
2115 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2116 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2117 		}
2118 		break;
2119 
2120 	case SIOCSIFRTLABEL:
2121 		if ((error = suser(p)) != 0)
2122 			break;
2123 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2124 		    RTLABEL_LEN, &bytesdone);
2125 		if (error == 0) {
2126 			rtlabel_unref(ifp->if_rtlabelid);
2127 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2128 		}
2129 		break;
2130 
2131 	case SIOCSIFPRIORITY:
2132 		if ((error = suser(p)) != 0)
2133 			break;
2134 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2135 			error = EINVAL;
2136 			break;
2137 		}
2138 		ifp->if_priority = ifr->ifr_metric;
2139 		break;
2140 
2141 	case SIOCSIFRDOMAIN:
2142 		if ((error = suser(p)) != 0)
2143 			break;
2144 		error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2145 		if (!error || error == EEXIST) {
2146 			NET_LOCK();
2147 			error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2148 			NET_UNLOCK();
2149 		}
2150 		break;
2151 
2152 	case SIOCAIFGROUP:
2153 		if ((error = suser(p)))
2154 			break;
2155 		NET_LOCK();
2156 		error = if_addgroup(ifp, ifgr->ifgr_group);
2157 		if (error == 0) {
2158 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2159 			if (error == ENOTTY)
2160 				error = 0;
2161 		}
2162 		NET_UNLOCK();
2163 		break;
2164 
2165 	case SIOCDIFGROUP:
2166 		if ((error = suser(p)))
2167 			break;
2168 		NET_LOCK();
2169 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2170 		if (error == ENOTTY)
2171 			error = 0;
2172 		if (error == 0)
2173 			error = if_delgroup(ifp, ifgr->ifgr_group);
2174 		NET_UNLOCK();
2175 		break;
2176 
2177 	case SIOCSIFLLADDR:
2178 		if ((error = suser(p)))
2179 			break;
2180 		if ((ifp->if_sadl == NULL) ||
2181 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2182 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2183 			error = EINVAL;
2184 			break;
2185 		}
2186 		NET_LOCK();
2187 		switch (ifp->if_type) {
2188 		case IFT_ETHER:
2189 		case IFT_CARP:
2190 		case IFT_XETHER:
2191 		case IFT_ISO88025:
2192 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2193 			if (error == ENOTTY)
2194 				error = 0;
2195 			if (error == 0)
2196 				error = if_setlladdr(ifp,
2197 				    ifr->ifr_addr.sa_data);
2198 			break;
2199 		default:
2200 			error = ENODEV;
2201 		}
2202 
2203 		if (error == 0)
2204 			ifnewlladdr(ifp);
2205 		NET_UNLOCK();
2206 		break;
2207 
2208 	case SIOCSIFLLPRIO:
2209 		if ((error = suser(p)))
2210 			break;
2211 		if (ifr->ifr_llprio < IFQ_MINPRIO ||
2212 		    ifr->ifr_llprio > IFQ_MAXPRIO) {
2213 			error = EINVAL;
2214 			break;
2215 		}
2216 		NET_LOCK();
2217 		ifp->if_llprio = ifr->ifr_llprio;
2218 		NET_UNLOCK();
2219 		break;
2220 
2221 	case SIOCGIFSFFPAGE:
2222 		error = suser(p);
2223 		if (error != 0)
2224 			break;
2225 
2226 		error = if_sffpage_check(data);
2227 		if (error != 0)
2228 			break;
2229 
2230 		/* don't take NET_LOCK because i2c reads take a long time */
2231 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2232 		break;
2233 
2234 	case SIOCSETKALIVE:
2235 	case SIOCDIFPHYADDR:
2236 	case SIOCSLIFPHYADDR:
2237 	case SIOCSLIFPHYRTABLE:
2238 	case SIOCSLIFPHYTTL:
2239 	case SIOCSLIFPHYDF:
2240 	case SIOCSLIFPHYECN:
2241 	case SIOCADDMULTI:
2242 	case SIOCDELMULTI:
2243 	case SIOCSIFMEDIA:
2244 	case SIOCSVNETID:
2245 	case SIOCDVNETID:
2246 	case SIOCSVNETFLOWID:
2247 	case SIOCSTXHPRIO:
2248 	case SIOCSRXHPRIO:
2249 	case SIOCSIFPAIR:
2250 	case SIOCSIFPARENT:
2251 	case SIOCDIFPARENT:
2252 	case SIOCSETMPWCFG:
2253 	case SIOCSETLABEL:
2254 	case SIOCDELLABEL:
2255 	case SIOCSPWE3CTRLWORD:
2256 	case SIOCSPWE3FAT:
2257 	case SIOCSPWE3NEIGHBOR:
2258 	case SIOCDPWE3NEIGHBOR:
2259 #if NBRIDGE > 0
2260 	case SIOCBRDGADD:
2261 	case SIOCBRDGDEL:
2262 	case SIOCBRDGSIFFLGS:
2263 	case SIOCBRDGSCACHE:
2264 	case SIOCBRDGADDS:
2265 	case SIOCBRDGDELS:
2266 	case SIOCBRDGSADDR:
2267 	case SIOCBRDGSTO:
2268 	case SIOCBRDGDADDR:
2269 	case SIOCBRDGFLUSH:
2270 	case SIOCBRDGADDL:
2271 	case SIOCBRDGSIFPROT:
2272 	case SIOCBRDGARL:
2273 	case SIOCBRDGFRL:
2274 	case SIOCBRDGSPRI:
2275 	case SIOCBRDGSHT:
2276 	case SIOCBRDGSFD:
2277 	case SIOCBRDGSMA:
2278 	case SIOCBRDGSIFPRIO:
2279 	case SIOCBRDGSIFCOST:
2280 	case SIOCBRDGSTXHC:
2281 	case SIOCBRDGSPROTO:
2282 	case SIOCSWGDPID:
2283 	case SIOCSWSPORTNO:
2284 	case SIOCSWGMAXFLOW:
2285 #endif
2286 		if ((error = suser(p)) != 0)
2287 			break;
2288 		/* FALLTHROUGH */
2289 	default:
2290 		error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL,
2291 			(struct mbuf *) cmd, (struct mbuf *) data,
2292 			(struct mbuf *) ifp, p));
2293 		if (error != EOPNOTSUPP)
2294 			break;
2295 		switch (cmd) {
2296 		case SIOCAIFADDR:
2297 		case SIOCDIFADDR:
2298 		case SIOCSIFADDR:
2299 		case SIOCSIFNETMASK:
2300 		case SIOCSIFDSTADDR:
2301 		case SIOCSIFBRDADDR:
2302 #ifdef INET6
2303 		case SIOCAIFADDR_IN6:
2304 		case SIOCDIFADDR_IN6:
2305 #endif
2306 			error = suser(p);
2307 			break;
2308 		default:
2309 			error = 0;
2310 			break;
2311 		}
2312 		if (error)
2313 			break;
2314 		NET_LOCK();
2315 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2316 		NET_UNLOCK();
2317 		break;
2318 	}
2319 
2320 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags)
2321 		rtm_ifchg(ifp);
2322 
2323 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2324 		getmicrotime(&ifp->if_lastchange);
2325 
2326 	return (error);
2327 }
2328 
2329 int
2330 ifioctl_get(u_long cmd, caddr_t data)
2331 {
2332 	struct ifnet *ifp;
2333 	struct ifreq *ifr = (struct ifreq *)data;
2334 	char ifdescrbuf[IFDESCRSIZE];
2335 	char ifrtlabelbuf[RTLABEL_LEN];
2336 	int error = 0;
2337 	size_t bytesdone;
2338 	const char *label;
2339 
2340 	switch(cmd) {
2341 	case SIOCGIFCONF:
2342 		NET_RLOCK();
2343 		error = ifconf(data);
2344 		NET_RUNLOCK();
2345 		return (error);
2346 	case SIOCIFGCLONERS:
2347 		error = if_clone_list((struct if_clonereq *)data);
2348 		return (error);
2349 	case SIOCGIFGMEMB:
2350 		NET_RLOCK();
2351 		error = if_getgroupmembers(data);
2352 		NET_RUNLOCK();
2353 		return (error);
2354 	case SIOCGIFGATTR:
2355 		NET_RLOCK();
2356 		error = if_getgroupattribs(data);
2357 		NET_RUNLOCK();
2358 		return (error);
2359 	case SIOCGIFGLIST:
2360 		NET_RLOCK();
2361 		error = if_getgrouplist(data);
2362 		NET_RUNLOCK();
2363 		return (error);
2364 	}
2365 
2366 	ifp = ifunit(ifr->ifr_name);
2367 	if (ifp == NULL)
2368 		return (ENXIO);
2369 
2370 	NET_RLOCK();
2371 
2372 	switch(cmd) {
2373 	case SIOCGIFFLAGS:
2374 		ifr->ifr_flags = ifp->if_flags;
2375 		if (ifq_is_oactive(&ifp->if_snd))
2376 			ifr->ifr_flags |= IFF_OACTIVE;
2377 		break;
2378 
2379 	case SIOCGIFXFLAGS:
2380 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2381 		break;
2382 
2383 	case SIOCGIFMETRIC:
2384 		ifr->ifr_metric = ifp->if_metric;
2385 		break;
2386 
2387 	case SIOCGIFMTU:
2388 		ifr->ifr_mtu = ifp->if_mtu;
2389 		break;
2390 
2391 	case SIOCGIFHARDMTU:
2392 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2393 		break;
2394 
2395 	case SIOCGIFDATA: {
2396 		struct if_data ifdata;
2397 		if_getdata(ifp, &ifdata);
2398 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2399 		break;
2400 	}
2401 
2402 	case SIOCGIFDESCR:
2403 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2404 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2405 		    &bytesdone);
2406 		break;
2407 
2408 	case SIOCGIFRTLABEL:
2409 		if (ifp->if_rtlabelid &&
2410 		    (label = rtlabel_id2name(ifp->if_rtlabelid)) != NULL) {
2411 			strlcpy(ifrtlabelbuf, label, RTLABEL_LEN);
2412 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2413 			    RTLABEL_LEN, &bytesdone);
2414 		} else
2415 			error = ENOENT;
2416 		break;
2417 
2418 	case SIOCGIFPRIORITY:
2419 		ifr->ifr_metric = ifp->if_priority;
2420 		break;
2421 
2422 	case SIOCGIFRDOMAIN:
2423 		ifr->ifr_rdomainid = ifp->if_rdomain;
2424 		break;
2425 
2426 	case SIOCGIFGROUP:
2427 		error = if_getgroup(data, ifp);
2428 		break;
2429 
2430 	case SIOCGIFLLPRIO:
2431 		ifr->ifr_llprio = ifp->if_llprio;
2432 		break;
2433 
2434 	default:
2435 		panic("invalid ioctl %lu", cmd);
2436 	}
2437 
2438 	NET_RUNLOCK();
2439 
2440 	return (error);
2441 }
2442 
2443 static int
2444 if_sffpage_check(const caddr_t data)
2445 {
2446 	const struct if_sffpage *sff = (const struct if_sffpage *)data;
2447 
2448 	switch (sff->sff_addr) {
2449 	case IFSFF_ADDR_EEPROM:
2450 	case IFSFF_ADDR_DDM:
2451 		break;
2452 	default:
2453 		return (EINVAL);
2454 	}
2455 
2456 	return (0);
2457 }
2458 
2459 int
2460 if_txhprio_l2_check(int hdrprio)
2461 {
2462 	switch (hdrprio) {
2463 	case IF_HDRPRIO_PACKET:
2464 		return (0);
2465 	default:
2466 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2467 			return (0);
2468 		break;
2469 	}
2470 
2471 	return (EINVAL);
2472 }
2473 
2474 int
2475 if_txhprio_l3_check(int hdrprio)
2476 {
2477 	switch (hdrprio) {
2478 	case IF_HDRPRIO_PACKET:
2479 	case IF_HDRPRIO_PAYLOAD:
2480 		return (0);
2481 	default:
2482 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2483 			return (0);
2484 		break;
2485 	}
2486 
2487 	return (EINVAL);
2488 }
2489 
2490 int
2491 if_rxhprio_l2_check(int hdrprio)
2492 {
2493 	switch (hdrprio) {
2494 	case IF_HDRPRIO_PACKET:
2495 	case IF_HDRPRIO_OUTER:
2496 		return (0);
2497 	default:
2498 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2499 			return (0);
2500 		break;
2501 	}
2502 
2503 	return (EINVAL);
2504 }
2505 
2506 int
2507 if_rxhprio_l3_check(int hdrprio)
2508 {
2509 	switch (hdrprio) {
2510 	case IF_HDRPRIO_PACKET:
2511 	case IF_HDRPRIO_PAYLOAD:
2512 	case IF_HDRPRIO_OUTER:
2513 		return (0);
2514 	default:
2515 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2516 			return (0);
2517 		break;
2518 	}
2519 
2520 	return (EINVAL);
2521 }
2522 
2523 /*
2524  * Return interface configuration
2525  * of system.  List may be used
2526  * in later ioctl's (above) to get
2527  * other information.
2528  */
2529 int
2530 ifconf(caddr_t data)
2531 {
2532 	struct ifconf *ifc = (struct ifconf *)data;
2533 	struct ifnet *ifp;
2534 	struct ifaddr *ifa;
2535 	struct ifreq ifr, *ifrp;
2536 	int space = ifc->ifc_len, error = 0;
2537 
2538 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2539 	if (space == 0) {
2540 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
2541 			struct sockaddr *sa;
2542 
2543 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2544 				space += sizeof (ifr);
2545 			else
2546 				TAILQ_FOREACH(ifa,
2547 				    &ifp->if_addrlist, ifa_list) {
2548 					sa = ifa->ifa_addr;
2549 					if (sa->sa_len > sizeof(*sa))
2550 						space += sa->sa_len -
2551 						    sizeof(*sa);
2552 					space += sizeof(ifr);
2553 				}
2554 		}
2555 		ifc->ifc_len = space;
2556 		return (0);
2557 	}
2558 
2559 	ifrp = ifc->ifc_req;
2560 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2561 		if (space < sizeof(ifr))
2562 			break;
2563 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2564 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2565 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2566 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2567 			    sizeof(ifr));
2568 			if (error)
2569 				break;
2570 			space -= sizeof (ifr), ifrp++;
2571 		} else
2572 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2573 				struct sockaddr *sa = ifa->ifa_addr;
2574 
2575 				if (space < sizeof(ifr))
2576 					break;
2577 				if (sa->sa_len <= sizeof(*sa)) {
2578 					ifr.ifr_addr = *sa;
2579 					error = copyout((caddr_t)&ifr,
2580 					    (caddr_t)ifrp, sizeof (ifr));
2581 					ifrp++;
2582 				} else {
2583 					space -= sa->sa_len - sizeof(*sa);
2584 					if (space < sizeof (ifr))
2585 						break;
2586 					error = copyout((caddr_t)&ifr,
2587 					    (caddr_t)ifrp,
2588 					    sizeof(ifr.ifr_name));
2589 					if (error == 0)
2590 						error = copyout((caddr_t)sa,
2591 						    (caddr_t)&ifrp->ifr_addr,
2592 						    sa->sa_len);
2593 					ifrp = (struct ifreq *)(sa->sa_len +
2594 					    (caddr_t)&ifrp->ifr_addr);
2595 				}
2596 				if (error)
2597 					break;
2598 				space -= sizeof (ifr);
2599 			}
2600 	}
2601 	ifc->ifc_len -= space;
2602 	return (error);
2603 }
2604 
2605 void
2606 if_counters_alloc(struct ifnet *ifp)
2607 {
2608 	KASSERT(ifp->if_counters == NULL);
2609 
2610 	ifp->if_counters = counters_alloc(ifc_ncounters);
2611 }
2612 
2613 void
2614 if_counters_free(struct ifnet *ifp)
2615 {
2616 	KASSERT(ifp->if_counters != NULL);
2617 
2618 	counters_free(ifp->if_counters, ifc_ncounters);
2619 	ifp->if_counters = NULL;
2620 }
2621 
2622 void
2623 if_getdata(struct ifnet *ifp, struct if_data *data)
2624 {
2625 	unsigned int i;
2626 
2627 	*data = ifp->if_data;
2628 
2629 	if (ifp->if_counters != NULL) {
2630 		uint64_t counters[ifc_ncounters];
2631 
2632 		counters_read(ifp->if_counters, counters, nitems(counters));
2633 
2634 		data->ifi_ipackets += counters[ifc_ipackets];
2635 		data->ifi_ierrors += counters[ifc_ierrors];
2636 		data->ifi_opackets += counters[ifc_opackets];
2637 		data->ifi_oerrors += counters[ifc_oerrors];
2638 		data->ifi_collisions += counters[ifc_collisions];
2639 		data->ifi_ibytes += counters[ifc_ibytes];
2640 		data->ifi_obytes += counters[ifc_obytes];
2641 		data->ifi_imcasts += counters[ifc_imcasts];
2642 		data->ifi_omcasts += counters[ifc_omcasts];
2643 		data->ifi_iqdrops += counters[ifc_iqdrops];
2644 		data->ifi_oqdrops += counters[ifc_oqdrops];
2645 		data->ifi_noproto += counters[ifc_noproto];
2646 	}
2647 
2648 	for (i = 0; i < ifp->if_nifqs; i++) {
2649 		struct ifqueue *ifq = ifp->if_ifqs[i];
2650 
2651 		ifq_add_data(ifq, data);
2652 	}
2653 
2654 	for (i = 0; i < ifp->if_niqs; i++) {
2655 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2656 
2657 		ifiq_add_data(ifiq, data);
2658 	}
2659 }
2660 
2661 /*
2662  * Dummy functions replaced in ifnet during detach (if protocols decide to
2663  * fiddle with the if during detach.
2664  */
2665 void
2666 if_detached_qstart(struct ifqueue *ifq)
2667 {
2668 	ifq_purge(ifq);
2669 }
2670 
2671 int
2672 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2673 {
2674 	return ENODEV;
2675 }
2676 
2677 /*
2678  * Create interface group without members
2679  */
2680 struct ifg_group *
2681 if_creategroup(const char *groupname)
2682 {
2683 	struct ifg_group	*ifg;
2684 
2685 	if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
2686 		return (NULL);
2687 
2688 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2689 	ifg->ifg_refcnt = 0;
2690 	ifg->ifg_carp_demoted = 0;
2691 	TAILQ_INIT(&ifg->ifg_members);
2692 #if NPF > 0
2693 	pfi_attach_ifgroup(ifg);
2694 #endif
2695 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2696 
2697 	return (ifg);
2698 }
2699 
2700 /*
2701  * Add a group to an interface
2702  */
2703 int
2704 if_addgroup(struct ifnet *ifp, const char *groupname)
2705 {
2706 	struct ifg_list		*ifgl;
2707 	struct ifg_group	*ifg = NULL;
2708 	struct ifg_member	*ifgm;
2709 
2710 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
2711 	    groupname[strlen(groupname) - 1] <= '9')
2712 		return (EINVAL);
2713 
2714 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2715 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2716 			return (EEXIST);
2717 
2718 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
2719 		return (ENOMEM);
2720 
2721 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
2722 		free(ifgl, M_TEMP, sizeof(*ifgl));
2723 		return (ENOMEM);
2724 	}
2725 
2726 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2727 		if (!strcmp(ifg->ifg_group, groupname))
2728 			break;
2729 
2730 	if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
2731 		free(ifgl, M_TEMP, sizeof(*ifgl));
2732 		free(ifgm, M_TEMP, sizeof(*ifgm));
2733 		return (ENOMEM);
2734 	}
2735 
2736 	ifg->ifg_refcnt++;
2737 	ifgl->ifgl_group = ifg;
2738 	ifgm->ifgm_ifp = ifp;
2739 
2740 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2741 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2742 
2743 #if NPF > 0
2744 	pfi_group_addmember(groupname, ifp);
2745 #endif
2746 
2747 	return (0);
2748 }
2749 
2750 /*
2751  * Remove a group from an interface
2752  */
2753 int
2754 if_delgroup(struct ifnet *ifp, const char *groupname)
2755 {
2756 	struct ifg_list		*ifgl;
2757 	struct ifg_member	*ifgm;
2758 
2759 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2760 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2761 			break;
2762 	if (ifgl == NULL)
2763 		return (ENOENT);
2764 
2765 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2766 
2767 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2768 		if (ifgm->ifgm_ifp == ifp)
2769 			break;
2770 
2771 	if (ifgm != NULL) {
2772 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2773 		free(ifgm, M_TEMP, sizeof(*ifgm));
2774 	}
2775 
2776 #if NPF > 0
2777 	pfi_group_change(groupname);
2778 #endif
2779 
2780 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2781 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2782 #if NPF > 0
2783 		pfi_detach_ifgroup(ifgl->ifgl_group);
2784 #endif
2785 		free(ifgl->ifgl_group, M_TEMP, 0);
2786 	}
2787 
2788 	free(ifgl, M_TEMP, sizeof(*ifgl));
2789 
2790 	return (0);
2791 }
2792 
2793 /*
2794  * Stores all groups from an interface in memory pointed
2795  * to by data
2796  */
2797 int
2798 if_getgroup(caddr_t data, struct ifnet *ifp)
2799 {
2800 	int			 len, error;
2801 	struct ifg_list		*ifgl;
2802 	struct ifg_req		 ifgrq, *ifgp;
2803 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2804 
2805 	if (ifgr->ifgr_len == 0) {
2806 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2807 			ifgr->ifgr_len += sizeof(struct ifg_req);
2808 		return (0);
2809 	}
2810 
2811 	len = ifgr->ifgr_len;
2812 	ifgp = ifgr->ifgr_groups;
2813 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2814 		if (len < sizeof(ifgrq))
2815 			return (EINVAL);
2816 		bzero(&ifgrq, sizeof ifgrq);
2817 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2818 		    sizeof(ifgrq.ifgrq_group));
2819 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2820 		    sizeof(struct ifg_req))))
2821 			return (error);
2822 		len -= sizeof(ifgrq);
2823 		ifgp++;
2824 	}
2825 
2826 	return (0);
2827 }
2828 
2829 /*
2830  * Stores all members of a group in memory pointed to by data
2831  */
2832 int
2833 if_getgroupmembers(caddr_t data)
2834 {
2835 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2836 	struct ifg_group	*ifg;
2837 	struct ifg_member	*ifgm;
2838 	struct ifg_req		 ifgrq, *ifgp;
2839 	int			 len, error;
2840 
2841 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2842 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2843 			break;
2844 	if (ifg == NULL)
2845 		return (ENOENT);
2846 
2847 	if (ifgr->ifgr_len == 0) {
2848 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2849 			ifgr->ifgr_len += sizeof(ifgrq);
2850 		return (0);
2851 	}
2852 
2853 	len = ifgr->ifgr_len;
2854 	ifgp = ifgr->ifgr_groups;
2855 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
2856 		if (len < sizeof(ifgrq))
2857 			return (EINVAL);
2858 		bzero(&ifgrq, sizeof ifgrq);
2859 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
2860 		    sizeof(ifgrq.ifgrq_member));
2861 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2862 		    sizeof(struct ifg_req))))
2863 			return (error);
2864 		len -= sizeof(ifgrq);
2865 		ifgp++;
2866 	}
2867 
2868 	return (0);
2869 }
2870 
2871 int
2872 if_getgroupattribs(caddr_t data)
2873 {
2874 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2875 	struct ifg_group	*ifg;
2876 
2877 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2878 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2879 			break;
2880 	if (ifg == NULL)
2881 		return (ENOENT);
2882 
2883 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
2884 
2885 	return (0);
2886 }
2887 
2888 int
2889 if_setgroupattribs(caddr_t data)
2890 {
2891 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2892 	struct ifg_group	*ifg;
2893 	struct ifg_member	*ifgm;
2894 	int			 demote;
2895 
2896 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2897 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2898 			break;
2899 	if (ifg == NULL)
2900 		return (ENOENT);
2901 
2902 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
2903 	if (demote + ifg->ifg_carp_demoted > 0xff ||
2904 	    demote + ifg->ifg_carp_demoted < 0)
2905 		return (EINVAL);
2906 
2907 	ifg->ifg_carp_demoted += demote;
2908 
2909 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2910 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
2911 
2912 	return (0);
2913 }
2914 
2915 /*
2916  * Stores all groups in memory pointed to by data
2917  */
2918 int
2919 if_getgrouplist(caddr_t data)
2920 {
2921 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2922 	struct ifg_group	*ifg;
2923 	struct ifg_req		 ifgrq, *ifgp;
2924 	int			 len, error;
2925 
2926 	if (ifgr->ifgr_len == 0) {
2927 		TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2928 			ifgr->ifgr_len += sizeof(ifgrq);
2929 		return (0);
2930 	}
2931 
2932 	len = ifgr->ifgr_len;
2933 	ifgp = ifgr->ifgr_groups;
2934 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
2935 		if (len < sizeof(ifgrq))
2936 			return (EINVAL);
2937 		bzero(&ifgrq, sizeof ifgrq);
2938 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
2939 		    sizeof(ifgrq.ifgrq_group));
2940 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2941 		    sizeof(struct ifg_req))))
2942 			return (error);
2943 		len -= sizeof(ifgrq);
2944 		ifgp++;
2945 	}
2946 
2947 	return (0);
2948 }
2949 
2950 void
2951 if_group_routechange(struct sockaddr *dst, struct sockaddr *mask)
2952 {
2953 	switch (dst->sa_family) {
2954 	case AF_INET:
2955 		if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
2956 		    mask && (mask->sa_len == 0 ||
2957 		    satosin(mask)->sin_addr.s_addr == INADDR_ANY))
2958 			if_group_egress_build();
2959 		break;
2960 #ifdef INET6
2961 	case AF_INET6:
2962 		if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
2963 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
2964 		    IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
2965 		    &in6addr_any)))
2966 			if_group_egress_build();
2967 		break;
2968 #endif
2969 	}
2970 }
2971 
2972 int
2973 if_group_egress_build(void)
2974 {
2975 	struct ifnet		*ifp;
2976 	struct ifg_group	*ifg;
2977 	struct ifg_member	*ifgm, *next;
2978 	struct sockaddr_in	 sa_in;
2979 #ifdef INET6
2980 	struct sockaddr_in6	 sa_in6;
2981 #endif
2982 	struct rtentry		*rt;
2983 
2984 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2985 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
2986 			break;
2987 
2988 	if (ifg != NULL)
2989 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
2990 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
2991 
2992 	bzero(&sa_in, sizeof(sa_in));
2993 	sa_in.sin_len = sizeof(sa_in);
2994 	sa_in.sin_family = AF_INET;
2995 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
2996 	while (rt != NULL) {
2997 		ifp = if_get(rt->rt_ifidx);
2998 		if (ifp != NULL) {
2999 			if_addgroup(ifp, IFG_EGRESS);
3000 			if_put(ifp);
3001 		}
3002 		rt = rtable_iterate(rt);
3003 	}
3004 
3005 #ifdef INET6
3006 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
3007 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
3008 	    RTP_ANY);
3009 	while (rt != NULL) {
3010 		ifp = if_get(rt->rt_ifidx);
3011 		if (ifp != NULL) {
3012 			if_addgroup(ifp, IFG_EGRESS);
3013 			if_put(ifp);
3014 		}
3015 		rt = rtable_iterate(rt);
3016 	}
3017 #endif /* INET6 */
3018 
3019 	return (0);
3020 }
3021 
3022 /*
3023  * Set/clear promiscuous mode on interface ifp based on the truth value
3024  * of pswitch.  The calls are reference counted so that only the first
3025  * "on" request actually has an effect, as does the final "off" request.
3026  * Results are undefined if the "off" and "on" requests are not matched.
3027  */
3028 int
3029 ifpromisc(struct ifnet *ifp, int pswitch)
3030 {
3031 	struct ifreq ifr;
3032 	unsigned short oif_flags;
3033 	int oif_pcount, error;
3034 
3035 	oif_flags = ifp->if_flags;
3036 	oif_pcount = ifp->if_pcount;
3037 	if (pswitch) {
3038 		if (ifp->if_pcount++ != 0)
3039 			return (0);
3040 		ifp->if_flags |= IFF_PROMISC;
3041 	} else {
3042 		if (--ifp->if_pcount > 0)
3043 			return (0);
3044 		ifp->if_flags &= ~IFF_PROMISC;
3045 	}
3046 
3047 	if ((ifp->if_flags & IFF_UP) == 0)
3048 		return (0);
3049 
3050 	memset(&ifr, 0, sizeof(ifr));
3051 	ifr.ifr_flags = ifp->if_flags;
3052 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
3053 	if (error) {
3054 		ifp->if_flags = oif_flags;
3055 		ifp->if_pcount = oif_pcount;
3056 	}
3057 
3058 	return (error);
3059 }
3060 
3061 void
3062 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
3063 {
3064 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
3065 }
3066 
3067 void
3068 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
3069 {
3070 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
3071 }
3072 
3073 void
3074 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
3075 {
3076 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
3077 		panic("ifa_update_broadaddr does not support dynamic length");
3078 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
3079 }
3080 
3081 #ifdef DDB
3082 /* debug function, can be called from ddb> */
3083 void
3084 ifa_print_all(void)
3085 {
3086 	struct ifnet *ifp;
3087 	struct ifaddr *ifa;
3088 
3089 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
3090 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
3091 			char addr[INET6_ADDRSTRLEN];
3092 
3093 			switch (ifa->ifa_addr->sa_family) {
3094 			case AF_INET:
3095 				printf("%s", inet_ntop(AF_INET,
3096 				    &satosin(ifa->ifa_addr)->sin_addr,
3097 				    addr, sizeof(addr)));
3098 				break;
3099 #ifdef INET6
3100 			case AF_INET6:
3101 				printf("%s", inet_ntop(AF_INET6,
3102 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
3103 				    addr, sizeof(addr)));
3104 				break;
3105 #endif
3106 			}
3107 			printf(" on %s\n", ifp->if_xname);
3108 		}
3109 	}
3110 }
3111 #endif /* DDB */
3112 
3113 void
3114 ifnewlladdr(struct ifnet *ifp)
3115 {
3116 #ifdef INET6
3117 	struct ifaddr *ifa;
3118 #endif
3119 	struct ifreq ifrq;
3120 	short up;
3121 	int s;
3122 
3123 	s = splnet();
3124 	up = ifp->if_flags & IFF_UP;
3125 
3126 	if (up) {
3127 		/* go down for a moment... */
3128 		ifp->if_flags &= ~IFF_UP;
3129 		ifrq.ifr_flags = ifp->if_flags;
3130 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3131 	}
3132 
3133 	ifp->if_flags |= IFF_UP;
3134 	ifrq.ifr_flags = ifp->if_flags;
3135 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3136 
3137 #ifdef INET6
3138 	/*
3139 	 * Update the link-local address.  Don't do it if we're
3140 	 * a router to avoid confusing hosts on the network.
3141 	 */
3142 	if (!ip6_forwarding) {
3143 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
3144 		if (ifa) {
3145 			in6_purgeaddr(ifa);
3146 			if_hooks_run(&ifp->if_addrhooks);
3147 			in6_ifattach(ifp);
3148 		}
3149 	}
3150 #endif
3151 	if (!up) {
3152 		/* go back down */
3153 		ifp->if_flags &= ~IFF_UP;
3154 		ifrq.ifr_flags = ifp->if_flags;
3155 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3156 	}
3157 	splx(s);
3158 }
3159 
3160 void
3161 if_addrhook_add(struct ifnet *ifp, struct task *t)
3162 {
3163 	mtx_enter(&if_hooks_mtx);
3164 	TAILQ_INSERT_TAIL(&ifp->if_addrhooks, t, t_entry);
3165 	mtx_leave(&if_hooks_mtx);
3166 }
3167 
3168 void
3169 if_addrhook_del(struct ifnet *ifp, struct task *t)
3170 {
3171 	mtx_enter(&if_hooks_mtx);
3172 	TAILQ_REMOVE(&ifp->if_addrhooks, t, t_entry);
3173 	mtx_leave(&if_hooks_mtx);
3174 }
3175 
3176 void
3177 if_addrhooks_run(struct ifnet *ifp)
3178 {
3179 	if_hooks_run(&ifp->if_addrhooks);
3180 }
3181 
3182 int net_ticks;
3183 u_int net_livelocks;
3184 
3185 void
3186 net_tick(void *null)
3187 {
3188 	extern int ticks;
3189 
3190 	if (ticks - net_ticks > 1)
3191 		net_livelocks++;
3192 
3193 	net_ticks = ticks;
3194 
3195 	timeout_add(&net_tick_to, 1);
3196 }
3197 
3198 int
3199 net_livelocked(void)
3200 {
3201 	extern int ticks;
3202 
3203 	return (ticks - net_ticks > 1);
3204 }
3205 
3206 void
3207 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
3208 {
3209 	extern int ticks;
3210 
3211 	memset(rxr, 0, sizeof(*rxr));
3212 
3213 	rxr->rxr_adjusted = ticks;
3214 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
3215 	rxr->rxr_hwm = hwm;
3216 }
3217 
3218 static inline void
3219 if_rxr_adjust_cwm(struct if_rxring *rxr)
3220 {
3221 	extern int ticks;
3222 
3223 	if (net_livelocked()) {
3224 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3225 			rxr->rxr_cwm--;
3226 		else
3227 			return;
3228 	} else if (rxr->rxr_alive >= rxr->rxr_lwm)
3229 		return;
3230 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
3231 		rxr->rxr_cwm++;
3232 
3233 	rxr->rxr_adjusted = ticks;
3234 }
3235 
3236 void
3237 if_rxr_livelocked(struct if_rxring *rxr)
3238 {
3239 	extern int ticks;
3240 
3241 	if (ticks - rxr->rxr_adjusted >= 1) {
3242 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3243 			rxr->rxr_cwm--;
3244 
3245 		rxr->rxr_adjusted = ticks;
3246 	}
3247 }
3248 
3249 u_int
3250 if_rxr_get(struct if_rxring *rxr, u_int max)
3251 {
3252 	extern int ticks;
3253 	u_int diff;
3254 
3255 	if (ticks - rxr->rxr_adjusted >= 1) {
3256 		/* we're free to try for an adjustment */
3257 		if_rxr_adjust_cwm(rxr);
3258 	}
3259 
3260 	if (rxr->rxr_alive >= rxr->rxr_cwm)
3261 		return (0);
3262 
3263 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
3264 	rxr->rxr_alive += diff;
3265 
3266 	return (diff);
3267 }
3268 
3269 int
3270 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
3271 {
3272 	struct if_rxrinfo kifri;
3273 	int error;
3274 	u_int n;
3275 
3276 	error = copyin(uifri, &kifri, sizeof(kifri));
3277 	if (error)
3278 		return (error);
3279 
3280 	n = min(t, kifri.ifri_total);
3281 	kifri.ifri_total = t;
3282 
3283 	if (n > 0) {
3284 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
3285 		if (error)
3286 			return (error);
3287 	}
3288 
3289 	return (copyout(&kifri, uifri, sizeof(kifri)));
3290 }
3291 
3292 int
3293 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
3294     struct if_rxring *rxr)
3295 {
3296 	struct if_rxring_info ifr;
3297 
3298 	memset(&ifr, 0, sizeof(ifr));
3299 
3300 	if (name != NULL)
3301 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3302 
3303 	ifr.ifr_size = size;
3304 	ifr.ifr_info = *rxr;
3305 
3306 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
3307 }
3308 
3309 /*
3310  * Network stack input queues.
3311  */
3312 
3313 void
3314 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3315 {
3316 	mq_init(&niq->ni_q, maxlen, IPL_NET);
3317 	niq->ni_isr = isr;
3318 }
3319 
3320 int
3321 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3322 {
3323 	int rv;
3324 
3325 	rv = mq_enqueue(&niq->ni_q, m);
3326 	if (rv == 0)
3327 		schednetisr(niq->ni_isr);
3328 	else
3329 		if_congestion();
3330 
3331 	return (rv);
3332 }
3333 
3334 int
3335 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3336 {
3337 	int rv;
3338 
3339 	rv = mq_enlist(&niq->ni_q, ml);
3340 	if (rv == 0)
3341 		schednetisr(niq->ni_isr);
3342 	else
3343 		if_congestion();
3344 
3345 	return (rv);
3346 }
3347 
3348 __dead void
3349 unhandled_af(int af)
3350 {
3351 	panic("unhandled af %d", af);
3352 }
3353 
3354 /*
3355  * XXXSMP This tunable is here to work around the fact that IPsec
3356  * globals aren't ready to be accessed by multiple threads in
3357  * parallel.
3358  */
3359 int		 nettaskqs = NET_TASKQ;
3360 
3361 struct taskq *
3362 net_tq(unsigned int ifindex)
3363 {
3364 	struct taskq *t = NULL;
3365 
3366 	t = nettqmp[ifindex % nettaskqs];
3367 
3368 	return (t);
3369 }
3370