xref: /openbsd-src/sys/net/if.c (revision fc405d53b73a2d73393cb97f684863d17b583e38)
1 /*	$OpenBSD: if.c,v 1.697 2023/05/16 14:32:54 jan Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "pfsync.h"
70 #include "ppp.h"
71 #include "pppoe.h"
72 #include "if_wg.h"
73 
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/mbuf.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/timeout.h>
80 #include <sys/protosw.h>
81 #include <sys/kernel.h>
82 #include <sys/ioctl.h>
83 #include <sys/domain.h>
84 #include <sys/task.h>
85 #include <sys/atomic.h>
86 #include <sys/percpu.h>
87 #include <sys/proc.h>
88 #include <sys/stdint.h>	/* uintptr_t */
89 #include <sys/rwlock.h>
90 #include <sys/smr.h>
91 
92 #include <net/if.h>
93 #include <net/if_dl.h>
94 #include <net/if_types.h>
95 #include <net/route.h>
96 #include <net/netisr.h>
97 
98 #include "vlan.h"
99 #if NVLAN > 0
100 #include <net/if_vlan_var.h>
101 #endif
102 
103 #include <netinet/in.h>
104 #include <netinet/if_ether.h>
105 #include <netinet/igmp.h>
106 #ifdef MROUTING
107 #include <netinet/ip_mroute.h>
108 #endif
109 
110 #ifdef INET6
111 #include <netinet6/in6_var.h>
112 #include <netinet6/in6_ifattach.h>
113 #include <netinet6/nd6.h>
114 #include <netinet/ip6.h>
115 #include <netinet6/ip6_var.h>
116 #endif
117 
118 #ifdef MPLS
119 #include <netmpls/mpls.h>
120 #endif
121 
122 #if NBPFILTER > 0
123 #include <net/bpf.h>
124 #endif
125 
126 #if NBRIDGE > 0
127 #include <net/if_bridge.h>
128 #endif
129 
130 #if NCARP > 0
131 #include <netinet/ip_carp.h>
132 #endif
133 
134 #if NPF > 0
135 #include <net/pfvar.h>
136 #endif
137 
138 #include <sys/device.h>
139 
140 void	if_attachsetup(struct ifnet *);
141 void	if_attach_common(struct ifnet *);
142 void	if_remove(struct ifnet *);
143 int	if_createrdomain(int, struct ifnet *);
144 int	if_setrdomain(struct ifnet *, int);
145 void	if_slowtimo(void *);
146 
147 void	if_detached_qstart(struct ifqueue *);
148 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
149 
150 int	ifioctl_get(u_long, caddr_t);
151 int	ifconf(caddr_t);
152 static int
153 	if_sffpage_check(const caddr_t);
154 
155 int	if_getgroup(caddr_t, struct ifnet *);
156 int	if_getgroupmembers(caddr_t);
157 int	if_getgroupattribs(caddr_t);
158 int	if_setgroupattribs(caddr_t);
159 int	if_getgrouplist(caddr_t);
160 
161 void	if_linkstate(struct ifnet *);
162 void	if_linkstate_task(void *);
163 
164 int	if_clone_list(struct if_clonereq *);
165 struct if_clone	*if_clone_lookup(const char *, int *);
166 
167 int	if_group_egress_build(void);
168 
169 void	if_watchdog_task(void *);
170 
171 void	if_netisr(void *);
172 
173 #ifdef DDB
174 void	ifa_print_all(void);
175 #endif
176 
177 void	if_qstart_compat(struct ifqueue *);
178 
179 /*
180  * interface index map
181  *
182  * the kernel maintains a mapping of interface indexes to struct ifnet
183  * pointers.
184  *
185  * the map is an array of struct ifnet pointers prefixed by an if_map
186  * structure. the if_map structure stores the length of its array.
187  *
188  * as interfaces are attached to the system, the map is grown on demand
189  * up to USHRT_MAX entries.
190  *
191  * interface index 0 is reserved and represents no interface. this
192  * supports the use of the interface index as the scope for IPv6 link
193  * local addresses, where scope 0 means no scope has been specified.
194  * it also supports the use of interface index as the unique identifier
195  * for network interfaces in SNMP applications as per RFC2863. therefore
196  * if_get(0) returns NULL.
197  */
198 
199 struct ifnet *if_ref(struct ifnet *);
200 
201 /*
202  * struct if_idxmap
203  *
204  * infrastructure to manage updates and accesses to the current if_map.
205  *
206  * interface index 0 is special and represents "no interface", so we
207  * use the 0th slot in map to store the length of the array.
208  */
209 
210 struct if_idxmap {
211 	unsigned int		  serial;
212 	unsigned int		  count;
213 	struct ifnet		**map;		/* SMR protected */
214 	struct rwlock		  lock;
215 	unsigned char		 *usedidx;	/* bitmap of indices in use */
216 };
217 
218 struct if_idxmap_dtor {
219 	struct smr_entry	  smr;
220 	struct ifnet		**map;
221 };
222 
223 void	if_idxmap_init(unsigned int);
224 void	if_idxmap_free(void *);
225 void	if_idxmap_alloc(struct ifnet *);
226 void	if_idxmap_insert(struct ifnet *);
227 void	if_idxmap_remove(struct ifnet *);
228 
229 TAILQ_HEAD(, ifg_group) ifg_head =
230     TAILQ_HEAD_INITIALIZER(ifg_head);	/* [N] list of interface groups */
231 
232 LIST_HEAD(, if_clone) if_cloners =
233     LIST_HEAD_INITIALIZER(if_cloners);	/* [I] list of clonable interfaces */
234 int if_cloners_count;	/* [I] number of clonable interfaces */
235 
236 struct rwlock if_cloners_lock = RWLOCK_INITIALIZER("clonelk");
237 
238 /* hooks should only be added, deleted, and run from a process context */
239 struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE);
240 void	if_hooks_run(struct task_list *);
241 
242 int	ifq_congestion;
243 
244 int		 netisr;
245 
246 struct softnet {
247 	char		 sn_name[16];
248 	struct taskq	*sn_taskq;
249 };
250 
251 #define	NET_TASKQ	4
252 struct softnet	softnets[NET_TASKQ];
253 
254 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
255 
256 /*
257  * Serialize socket operations to ensure no new sleeping points
258  * are introduced in IP output paths.
259  */
260 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
261 
262 /*
263  * Network interface utility routines.
264  */
265 void
266 ifinit(void)
267 {
268 	unsigned int	i;
269 
270 	/*
271 	 * most machines boot with 4 or 5 interfaces, so size the initial map
272 	 * to accommodate this
273 	 */
274 	if_idxmap_init(8); /* 8 is a nice power of 2 for malloc */
275 
276 	for (i = 0; i < NET_TASKQ; i++) {
277 		struct softnet *sn = &softnets[i];
278 		snprintf(sn->sn_name, sizeof(sn->sn_name), "softnet%u", i);
279 		sn->sn_taskq = taskq_create(sn->sn_name, 1, IPL_NET,
280 		    TASKQ_MPSAFE);
281 		if (sn->sn_taskq == NULL)
282 			panic("unable to create network taskq %d", i);
283 	}
284 }
285 
286 static struct if_idxmap if_idxmap;
287 
288 /*
289  * XXXSMP: For `ifnetlist' modification both kernel and net locks
290  * should be taken. For read-only access only one lock of them required.
291  */
292 struct ifnet_head ifnetlist = TAILQ_HEAD_INITIALIZER(ifnetlist);
293 
294 static inline unsigned int
295 if_idxmap_limit(struct ifnet **if_map)
296 {
297 	return ((uintptr_t)if_map[0]);
298 }
299 
300 static inline size_t
301 if_idxmap_usedidx_size(unsigned int limit)
302 {
303 	return (max(howmany(limit, NBBY), sizeof(struct if_idxmap_dtor)));
304 }
305 
306 void
307 if_idxmap_init(unsigned int limit)
308 {
309 	struct ifnet **if_map;
310 
311 	rw_init(&if_idxmap.lock, "idxmaplk");
312 	if_idxmap.serial = 1; /* skip ifidx 0 */
313 
314 	if_map = mallocarray(limit, sizeof(*if_map), M_IFADDR,
315 	    M_WAITOK | M_ZERO);
316 
317 	if_map[0] = (struct ifnet *)(uintptr_t)limit;
318 
319 	if_idxmap.usedidx = malloc(if_idxmap_usedidx_size(limit),
320 	    M_IFADDR, M_WAITOK | M_ZERO);
321 	setbit(if_idxmap.usedidx, 0); /* blacklist ifidx 0 */
322 
323 	/* this is called early so there's nothing to race with */
324 	SMR_PTR_SET_LOCKED(&if_idxmap.map, if_map);
325 }
326 
327 void
328 if_idxmap_alloc(struct ifnet *ifp)
329 {
330 	struct ifnet **if_map;
331 	unsigned int limit;
332 	unsigned int index, i;
333 
334 	refcnt_init(&ifp->if_refcnt);
335 
336 	rw_enter_write(&if_idxmap.lock);
337 
338 	if (++if_idxmap.count >= USHRT_MAX)
339 		panic("too many interfaces");
340 
341 	if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map);
342 	limit = if_idxmap_limit(if_map);
343 
344 	index = if_idxmap.serial++ & USHRT_MAX;
345 
346 	if (index >= limit) {
347 		struct if_idxmap_dtor *dtor;
348 		struct ifnet **oif_map;
349 		unsigned int olimit;
350 		unsigned char *nusedidx;
351 
352 		oif_map = if_map;
353 		olimit = limit;
354 
355 		limit = olimit * 2;
356 		if_map = mallocarray(limit, sizeof(*if_map), M_IFADDR,
357 		    M_WAITOK | M_ZERO);
358 		if_map[0] = (struct ifnet *)(uintptr_t)limit;
359 
360 		for (i = 1; i < olimit; i++) {
361 			struct ifnet *oifp = SMR_PTR_GET_LOCKED(&oif_map[i]);
362 			if (oifp == NULL)
363 				continue;
364 
365 			/*
366 			 * nif_map isn't visible yet, so don't need
367 			 * SMR_PTR_SET_LOCKED and its membar.
368 			 */
369 			if_map[i] = if_ref(oifp);
370 		}
371 
372 		nusedidx = malloc(if_idxmap_usedidx_size(limit),
373 		    M_IFADDR, M_WAITOK | M_ZERO);
374 		memcpy(nusedidx, if_idxmap.usedidx, howmany(olimit, NBBY));
375 
376 		/* use the old usedidx bitmap as an smr_entry for the if_map */
377 		dtor = (struct if_idxmap_dtor *)if_idxmap.usedidx;
378 		if_idxmap.usedidx = nusedidx;
379 
380 		SMR_PTR_SET_LOCKED(&if_idxmap.map, if_map);
381 
382 		dtor->map = oif_map;
383 		smr_init(&dtor->smr);
384 		smr_call(&dtor->smr, if_idxmap_free, dtor);
385 	}
386 
387 	/* pick the next free index */
388 	for (i = 0; i < USHRT_MAX; i++) {
389 		if (index != 0 && isclr(if_idxmap.usedidx, index))
390 			break;
391 
392 		index = if_idxmap.serial++ & USHRT_MAX;
393 	}
394 	KASSERT(index != 0 && index < limit);
395 	KASSERT(isclr(if_idxmap.usedidx, index));
396 
397 	setbit(if_idxmap.usedidx, index);
398 	ifp->if_index = index;
399 
400 	rw_exit_write(&if_idxmap.lock);
401 }
402 
403 void
404 if_idxmap_free(void *arg)
405 {
406 	struct if_idxmap_dtor *dtor = arg;
407 	struct ifnet **oif_map = dtor->map;
408 	unsigned int olimit = if_idxmap_limit(oif_map);
409 	unsigned int i;
410 
411 	for (i = 1; i < olimit; i++)
412 		if_put(oif_map[i]);
413 
414 	free(oif_map, M_IFADDR, olimit * sizeof(*oif_map));
415 	free(dtor, M_IFADDR, if_idxmap_usedidx_size(olimit));
416 }
417 
418 void
419 if_idxmap_insert(struct ifnet *ifp)
420 {
421 	struct ifnet **if_map;
422 	unsigned int index = ifp->if_index;
423 
424 	rw_enter_write(&if_idxmap.lock);
425 
426 	if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map);
427 
428 	KASSERTMSG(index != 0 && index < if_idxmap_limit(if_map),
429 	    "%s(%p) index %u vs limit %u", ifp->if_xname, ifp, index,
430 	    if_idxmap_limit(if_map));
431 	KASSERT(SMR_PTR_GET_LOCKED(&if_map[index]) == NULL);
432 	KASSERT(isset(if_idxmap.usedidx, index));
433 
434 	/* commit */
435 	SMR_PTR_SET_LOCKED(&if_map[index], if_ref(ifp));
436 
437 	rw_exit_write(&if_idxmap.lock);
438 }
439 
440 void
441 if_idxmap_remove(struct ifnet *ifp)
442 {
443 	struct ifnet **if_map;
444 	unsigned int index = ifp->if_index;
445 
446 	rw_enter_write(&if_idxmap.lock);
447 
448 	if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map);
449 
450 	KASSERT(index != 0 && index < if_idxmap_limit(if_map));
451 	KASSERT(SMR_PTR_GET_LOCKED(&if_map[index]) == ifp);
452 	KASSERT(isset(if_idxmap.usedidx, index));
453 
454 	SMR_PTR_SET_LOCKED(&if_map[index], NULL);
455 
456 	if_idxmap.count--;
457 	clrbit(if_idxmap.usedidx, index);
458 	/* end of if_idxmap modifications */
459 
460 	rw_exit_write(&if_idxmap.lock);
461 
462 	smr_barrier();
463 	if_put(ifp);
464 }
465 
466 /*
467  * Attach an interface to the
468  * list of "active" interfaces.
469  */
470 void
471 if_attachsetup(struct ifnet *ifp)
472 {
473 	unsigned long ifidx;
474 
475 	NET_ASSERT_LOCKED();
476 
477 	if_addgroup(ifp, IFG_ALL);
478 
479 #ifdef INET6
480 	nd6_ifattach(ifp);
481 #endif
482 
483 #if NPF > 0
484 	pfi_attach_ifnet(ifp);
485 #endif
486 
487 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
488 	if_slowtimo(ifp);
489 
490 	if_idxmap_insert(ifp);
491 	KASSERT(if_get(0) == NULL);
492 
493 	ifidx = ifp->if_index;
494 
495 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
496 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
497 
498 	/* Announce the interface. */
499 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
500 }
501 
502 /*
503  * Allocate the link level name for the specified interface.  This
504  * is an attachment helper.  It must be called after ifp->if_addrlen
505  * is initialized, which may not be the case when if_attach() is
506  * called.
507  */
508 void
509 if_alloc_sadl(struct ifnet *ifp)
510 {
511 	unsigned int socksize;
512 	int namelen, masklen;
513 	struct sockaddr_dl *sdl;
514 
515 	/*
516 	 * If the interface already has a link name, release it
517 	 * now.  This is useful for interfaces that can change
518 	 * link types, and thus switch link names often.
519 	 */
520 	if_free_sadl(ifp);
521 
522 	namelen = strlen(ifp->if_xname);
523 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
524 	socksize = masklen + ifp->if_addrlen;
525 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
526 	if (socksize < sizeof(*sdl))
527 		socksize = sizeof(*sdl);
528 	socksize = ROUNDUP(socksize);
529 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
530 	sdl->sdl_len = socksize;
531 	sdl->sdl_family = AF_LINK;
532 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
533 	sdl->sdl_nlen = namelen;
534 	sdl->sdl_alen = ifp->if_addrlen;
535 	sdl->sdl_index = ifp->if_index;
536 	sdl->sdl_type = ifp->if_type;
537 	ifp->if_sadl = sdl;
538 }
539 
540 /*
541  * Free the link level name for the specified interface.  This is
542  * a detach helper.  This is called from if_detach() or from
543  * link layer type specific detach functions.
544  */
545 void
546 if_free_sadl(struct ifnet *ifp)
547 {
548 	if (ifp->if_sadl == NULL)
549 		return;
550 
551 	free(ifp->if_sadl, M_IFADDR, ifp->if_sadl->sdl_len);
552 	ifp->if_sadl = NULL;
553 }
554 
555 void
556 if_attachhead(struct ifnet *ifp)
557 {
558 	if_attach_common(ifp);
559 	NET_LOCK();
560 	TAILQ_INSERT_HEAD(&ifnetlist, ifp, if_list);
561 	if_attachsetup(ifp);
562 	NET_UNLOCK();
563 }
564 
565 void
566 if_attach(struct ifnet *ifp)
567 {
568 	if_attach_common(ifp);
569 	NET_LOCK();
570 	TAILQ_INSERT_TAIL(&ifnetlist, ifp, if_list);
571 	if_attachsetup(ifp);
572 	NET_UNLOCK();
573 }
574 
575 void
576 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
577 {
578 	struct ifqueue **map;
579 	struct ifqueue *ifq;
580 	int i;
581 
582 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
583 	KASSERT(nqs != 0);
584 
585 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
586 
587 	ifp->if_snd.ifq_softc = NULL;
588 	map[0] = &ifp->if_snd;
589 
590 	for (i = 1; i < nqs; i++) {
591 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
592 		ifq_set_maxlen(ifq, ifp->if_snd.ifq_maxlen);
593 		ifq_init(ifq, ifp, i);
594 		map[i] = ifq;
595 	}
596 
597 	ifp->if_ifqs = map;
598 	ifp->if_nifqs = nqs;
599 }
600 
601 void
602 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
603 {
604 	struct ifiqueue **map;
605 	struct ifiqueue *ifiq;
606 	unsigned int i;
607 
608 	KASSERT(niqs != 0);
609 
610 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
611 
612 	ifp->if_rcv.ifiq_softc = NULL;
613 	map[0] = &ifp->if_rcv;
614 
615 	for (i = 1; i < niqs; i++) {
616 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
617 		ifiq_init(ifiq, ifp, i);
618 		map[i] = ifiq;
619 	}
620 
621 	ifp->if_iqs = map;
622 	ifp->if_niqs = niqs;
623 }
624 
625 void
626 if_attach_common(struct ifnet *ifp)
627 {
628 	KASSERT(ifp->if_ioctl != NULL);
629 
630 	TAILQ_INIT(&ifp->if_addrlist);
631 	TAILQ_INIT(&ifp->if_maddrlist);
632 	TAILQ_INIT(&ifp->if_groups);
633 
634 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
635 		KASSERTMSG(ifp->if_qstart == NULL,
636 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
637 		ifp->if_qstart = if_qstart_compat;
638 	} else {
639 		KASSERTMSG(ifp->if_start == NULL,
640 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
641 		KASSERTMSG(ifp->if_qstart != NULL,
642 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
643 	}
644 
645 	if_idxmap_alloc(ifp);
646 
647 	ifq_init(&ifp->if_snd, ifp, 0);
648 
649 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
650 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
651 	ifp->if_nifqs = 1;
652 	if (ifp->if_txmit == 0)
653 		ifp->if_txmit = IF_TXMIT_DEFAULT;
654 
655 	ifiq_init(&ifp->if_rcv, ifp, 0);
656 
657 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
658 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
659 	ifp->if_niqs = 1;
660 
661 	TAILQ_INIT(&ifp->if_addrhooks);
662 	TAILQ_INIT(&ifp->if_linkstatehooks);
663 	TAILQ_INIT(&ifp->if_detachhooks);
664 
665 	if (ifp->if_rtrequest == NULL)
666 		ifp->if_rtrequest = if_rtrequest_dummy;
667 	if (ifp->if_enqueue == NULL)
668 		ifp->if_enqueue = if_enqueue_ifq;
669 #if NBPFILTER > 0
670 	if (ifp->if_bpf_mtap == NULL)
671 		ifp->if_bpf_mtap = bpf_mtap_ether;
672 #endif
673 	ifp->if_llprio = IFQ_DEFPRIO;
674 }
675 
676 void
677 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
678 {
679 	/*
680 	 * only switch the ifq_ops on the first ifq on an interface.
681 	 *
682 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
683 	 * works on a single ifq. because the code uses the ifq_ops
684 	 * on the first ifq (if_snd) to select a queue for an mbuf,
685 	 * by switching only the first one we change both the algorithm
686 	 * and force the routing of all new packets to it.
687 	 */
688 	ifq_attach(&ifp->if_snd, newops, args);
689 }
690 
691 void
692 if_start(struct ifnet *ifp)
693 {
694 	KASSERT(ifp->if_qstart == if_qstart_compat);
695 	if_qstart_compat(&ifp->if_snd);
696 }
697 void
698 if_qstart_compat(struct ifqueue *ifq)
699 {
700 	struct ifnet *ifp = ifq->ifq_if;
701 	int s;
702 
703 	/*
704 	 * the stack assumes that an interface can have multiple
705 	 * transmit rings, but a lot of drivers are still written
706 	 * so that interfaces and send rings have a 1:1 mapping.
707 	 * this provides compatibility between the stack and the older
708 	 * drivers by translating from the only queue they have
709 	 * (ifp->if_snd) back to the interface and calling if_start.
710 	 */
711 
712 	KERNEL_LOCK();
713 	s = splnet();
714 	(*ifp->if_start)(ifp);
715 	splx(s);
716 	KERNEL_UNLOCK();
717 }
718 
719 int
720 if_enqueue(struct ifnet *ifp, struct mbuf *m)
721 {
722 	CLR(m->m_pkthdr.csum_flags, M_TIMESTAMP);
723 
724 #if NPF > 0
725 	if (m->m_pkthdr.pf.delay > 0)
726 		return (pf_delay_pkt(m, ifp->if_index));
727 #endif
728 
729 #if NBRIDGE > 0
730 	if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
731 		int error;
732 
733 		error = bridge_enqueue(ifp, m);
734 		return (error);
735 	}
736 #endif
737 
738 #if NPF > 0
739 	pf_pkt_addr_changed(m);
740 #endif	/* NPF > 0 */
741 
742 	return ((*ifp->if_enqueue)(ifp, m));
743 }
744 
745 int
746 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
747 {
748 	struct ifqueue *ifq = &ifp->if_snd;
749 	int error;
750 
751 	if (ifp->if_nifqs > 1) {
752 		unsigned int idx;
753 
754 		/*
755 		 * use the operations on the first ifq to pick which of
756 		 * the array gets this mbuf.
757 		 */
758 
759 		idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
760 		ifq = ifp->if_ifqs[idx];
761 	}
762 
763 	error = ifq_enqueue(ifq, m);
764 	if (error)
765 		return (error);
766 
767 	ifq_start(ifq);
768 
769 	return (0);
770 }
771 
772 void
773 if_input(struct ifnet *ifp, struct mbuf_list *ml)
774 {
775 	ifiq_input(&ifp->if_rcv, ml);
776 }
777 
778 int
779 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
780 {
781 	int keepflags;
782 
783 #if NBPFILTER > 0
784 	/*
785 	 * Only send packets to bpf if they are destined to local
786 	 * addresses.
787 	 *
788 	 * if_input_local() is also called for SIMPLEX interfaces to
789 	 * duplicate packets for local use.  But don't dup them to bpf.
790 	 */
791 	if (ifp->if_flags & IFF_LOOPBACK) {
792 		caddr_t if_bpf = ifp->if_bpf;
793 
794 		if (if_bpf)
795 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
796 	}
797 #endif
798 	keepflags = m->m_flags & (M_BCAST|M_MCAST);
799 	m_resethdr(m);
800 	m->m_flags |= M_LOOP | keepflags;
801 	m->m_pkthdr.ph_ifidx = ifp->if_index;
802 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
803 
804 	ifp->if_opackets++;
805 	ifp->if_obytes += m->m_pkthdr.len;
806 
807 	ifp->if_ipackets++;
808 	ifp->if_ibytes += m->m_pkthdr.len;
809 
810 	switch (af) {
811 	case AF_INET:
812 		ipv4_input(ifp, m);
813 		break;
814 #ifdef INET6
815 	case AF_INET6:
816 		ipv6_input(ifp, m);
817 		break;
818 #endif /* INET6 */
819 #ifdef MPLS
820 	case AF_MPLS:
821 		mpls_input(ifp, m);
822 		break;
823 #endif /* MPLS */
824 	default:
825 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
826 		m_freem(m);
827 		return (EAFNOSUPPORT);
828 	}
829 
830 	return (0);
831 }
832 
833 int
834 if_output_ml(struct ifnet *ifp, struct mbuf_list *ml,
835     struct sockaddr *dst, struct rtentry *rt)
836 {
837 	struct mbuf *m;
838 	int error = 0;
839 
840 	while ((m = ml_dequeue(ml)) != NULL) {
841 		error = ifp->if_output(ifp, m, dst, rt);
842 		if (error)
843 			break;
844 	}
845 	if (error)
846 		ml_purge(ml);
847 
848 	return error;
849 }
850 
851 int
852 if_output_mq(struct ifnet *ifp, struct mbuf_queue *mq, unsigned int *total,
853     struct sockaddr *dst, struct rtentry *rt)
854 {
855 	struct mbuf_list ml;
856 	unsigned int len;
857 	int error;
858 
859 	mq_delist(mq, &ml);
860 	len = ml_len(&ml);
861 	error = if_output_ml(ifp, &ml, dst, rt);
862 
863 	/* XXXSMP we also discard if other CPU enqueues */
864 	if (mq_len(mq) > 0) {
865 		/* mbuf is back in queue. Discard. */
866 		atomic_sub_int(total, len + mq_purge(mq));
867 	} else
868 		atomic_sub_int(total, len);
869 
870 	return error;
871 }
872 
873 int
874 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
875 {
876 	struct ifiqueue *ifiq;
877 	unsigned int flow = 0;
878 
879 	m->m_pkthdr.ph_family = af;
880 	m->m_pkthdr.ph_ifidx = ifp->if_index;
881 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
882 
883 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
884 		flow = m->m_pkthdr.ph_flowid;
885 
886 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
887 
888 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
889 }
890 
891 void
892 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
893 {
894 	struct mbuf *m;
895 
896 	if (ml_empty(ml))
897 		return;
898 
899 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
900 		enqueue_randomness(ml_len(ml) ^ (uintptr_t)MBUF_LIST_FIRST(ml));
901 
902 	/*
903 	 * We grab the shared netlock for packet processing in the softnet
904 	 * threads.  Packets can regrab the exclusive lock via queues.
905 	 * ioctl, sysctl, and socket syscall may use shared lock if access is
906 	 * read only or MP safe.  Usually they hold the exclusive net lock.
907 	 */
908 
909 	NET_LOCK_SHARED();
910 	while ((m = ml_dequeue(ml)) != NULL)
911 		(*ifp->if_input)(ifp, m);
912 	NET_UNLOCK_SHARED();
913 }
914 
915 void
916 if_vinput(struct ifnet *ifp, struct mbuf *m)
917 {
918 #if NBPFILTER > 0
919 	caddr_t if_bpf;
920 #endif
921 
922 	m->m_pkthdr.ph_ifidx = ifp->if_index;
923 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
924 
925 	counters_pkt(ifp->if_counters,
926 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
927 
928 #if NPF > 0
929 	pf_pkt_addr_changed(m);
930 #endif
931 
932 #if NBPFILTER > 0
933 	if_bpf = ifp->if_bpf;
934 	if (if_bpf) {
935 		if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) {
936 			m_freem(m);
937 			return;
938 		}
939 	}
940 #endif
941 
942 	if (__predict_true(!ISSET(ifp->if_xflags, IFXF_MONITOR)))
943 		(*ifp->if_input)(ifp, m);
944 	else
945 		m_freem(m);
946 }
947 
948 void
949 if_netisr(void *unused)
950 {
951 	int n, t = 0;
952 
953 	NET_LOCK();
954 
955 	while ((n = netisr) != 0) {
956 		/* Like sched_pause() but with a rwlock dance. */
957 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
958 			NET_UNLOCK();
959 			yield();
960 			NET_LOCK();
961 		}
962 
963 		atomic_clearbits_int(&netisr, n);
964 
965 #if NETHER > 0
966 		if (n & (1 << NETISR_ARP))
967 			arpintr();
968 #endif
969 		if (n & (1 << NETISR_IP))
970 			ipintr();
971 #ifdef INET6
972 		if (n & (1 << NETISR_IPV6))
973 			ip6intr();
974 #endif
975 #if NPPP > 0
976 		if (n & (1 << NETISR_PPP)) {
977 			KERNEL_LOCK();
978 			pppintr();
979 			KERNEL_UNLOCK();
980 		}
981 #endif
982 #if NBRIDGE > 0
983 		if (n & (1 << NETISR_BRIDGE))
984 			bridgeintr();
985 #endif
986 #ifdef PIPEX
987 		if (n & (1 << NETISR_PIPEX))
988 			pipexintr();
989 #endif
990 #if NPPPOE > 0
991 		if (n & (1 << NETISR_PPPOE)) {
992 			KERNEL_LOCK();
993 			pppoeintr();
994 			KERNEL_UNLOCK();
995 		}
996 #endif
997 		t |= n;
998 	}
999 
1000 #if NPFSYNC > 0
1001 	if (t & (1 << NETISR_PFSYNC)) {
1002 		KERNEL_LOCK();
1003 		pfsyncintr();
1004 		KERNEL_UNLOCK();
1005 	}
1006 #endif
1007 
1008 	NET_UNLOCK();
1009 }
1010 
1011 void
1012 if_hooks_run(struct task_list *hooks)
1013 {
1014 	struct task *t, *nt;
1015 	struct task cursor = { .t_func = NULL };
1016 	void (*func)(void *);
1017 	void *arg;
1018 
1019 	mtx_enter(&if_hooks_mtx);
1020 	for (t = TAILQ_FIRST(hooks); t != NULL; t = nt) {
1021 		if (t->t_func == NULL) { /* skip cursors */
1022 			nt = TAILQ_NEXT(t, t_entry);
1023 			continue;
1024 		}
1025 		func = t->t_func;
1026 		arg = t->t_arg;
1027 
1028 		TAILQ_INSERT_AFTER(hooks, t, &cursor, t_entry);
1029 		mtx_leave(&if_hooks_mtx);
1030 
1031 		(*func)(arg);
1032 
1033 		mtx_enter(&if_hooks_mtx);
1034 		nt = TAILQ_NEXT(&cursor, t_entry); /* avoid _Q_INVALIDATE */
1035 		TAILQ_REMOVE(hooks, &cursor, t_entry);
1036 	}
1037 	mtx_leave(&if_hooks_mtx);
1038 }
1039 
1040 void
1041 if_remove(struct ifnet *ifp)
1042 {
1043 	/* Remove the interface from the list of all interfaces. */
1044 	NET_LOCK();
1045 	TAILQ_REMOVE(&ifnetlist, ifp, if_list);
1046 	NET_UNLOCK();
1047 
1048 	/* Remove the interface from the interface index map. */
1049 	if_idxmap_remove(ifp);
1050 
1051 	/* Sleep until the last reference is released. */
1052 	refcnt_finalize(&ifp->if_refcnt, "ifrm");
1053 }
1054 
1055 void
1056 if_deactivate(struct ifnet *ifp)
1057 {
1058 	/*
1059 	 * Call detach hooks from head to tail.  To make sure detach
1060 	 * hooks are executed in the reverse order they were added, all
1061 	 * the hooks have to be added to the head!
1062 	 */
1063 
1064 	NET_LOCK();
1065 	if_hooks_run(&ifp->if_detachhooks);
1066 	NET_UNLOCK();
1067 }
1068 
1069 void
1070 if_detachhook_add(struct ifnet *ifp, struct task *t)
1071 {
1072 	mtx_enter(&if_hooks_mtx);
1073 	TAILQ_INSERT_HEAD(&ifp->if_detachhooks, t, t_entry);
1074 	mtx_leave(&if_hooks_mtx);
1075 }
1076 
1077 void
1078 if_detachhook_del(struct ifnet *ifp, struct task *t)
1079 {
1080 	mtx_enter(&if_hooks_mtx);
1081 	TAILQ_REMOVE(&ifp->if_detachhooks, t, t_entry);
1082 	mtx_leave(&if_hooks_mtx);
1083 }
1084 
1085 /*
1086  * Detach an interface from everything in the kernel.  Also deallocate
1087  * private resources.
1088  */
1089 void
1090 if_detach(struct ifnet *ifp)
1091 {
1092 	struct ifaddr *ifa;
1093 	struct ifg_list *ifg;
1094 	int i, s;
1095 
1096 	/* Undo pseudo-driver changes. */
1097 	if_deactivate(ifp);
1098 
1099 	/* Other CPUs must not have a reference before we start destroying. */
1100 	if_remove(ifp);
1101 
1102 	ifp->if_qstart = if_detached_qstart;
1103 
1104 	/* Wait until the start routines finished. */
1105 	ifq_barrier(&ifp->if_snd);
1106 	ifq_clr_oactive(&ifp->if_snd);
1107 
1108 #if NBPFILTER > 0
1109 	bpfdetach(ifp);
1110 #endif
1111 
1112 	NET_LOCK();
1113 	s = splnet();
1114 	ifp->if_ioctl = if_detached_ioctl;
1115 	ifp->if_watchdog = NULL;
1116 
1117 	/* Remove the watchdog timeout & task */
1118 	timeout_del(&ifp->if_slowtimo);
1119 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1120 
1121 	/* Remove the link state task */
1122 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1123 
1124 	rti_delete(ifp);
1125 #if NETHER > 0 && defined(NFSCLIENT)
1126 	if (ifp->if_index == revarp_ifidx)
1127 		revarp_ifidx = 0;
1128 #endif
1129 #ifdef MROUTING
1130 	vif_delete(ifp);
1131 #endif
1132 	in_ifdetach(ifp);
1133 #ifdef INET6
1134 	in6_ifdetach(ifp);
1135 #endif
1136 #if NPF > 0
1137 	pfi_detach_ifnet(ifp);
1138 #endif
1139 
1140 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1141 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1142 
1143 	if_free_sadl(ifp);
1144 
1145 	/* We should not have any address left at this point. */
1146 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1147 #ifdef DIAGNOSTIC
1148 		printf("%s: address list non empty\n", ifp->if_xname);
1149 #endif
1150 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1151 			ifa_del(ifp, ifa);
1152 			ifa->ifa_ifp = NULL;
1153 			ifafree(ifa);
1154 		}
1155 	}
1156 
1157 	KASSERT(TAILQ_EMPTY(&ifp->if_addrhooks));
1158 	KASSERT(TAILQ_EMPTY(&ifp->if_linkstatehooks));
1159 	KASSERT(TAILQ_EMPTY(&ifp->if_detachhooks));
1160 
1161 #ifdef INET6
1162 	nd6_ifdetach(ifp);
1163 #endif
1164 	splx(s);
1165 	NET_UNLOCK();
1166 
1167 	/* Announce that the interface is gone. */
1168 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1169 
1170 	if (ifp->if_counters != NULL)
1171 		if_counters_free(ifp);
1172 
1173 	for (i = 0; i < ifp->if_nifqs; i++)
1174 		ifq_destroy(ifp->if_ifqs[i]);
1175 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1176 		for (i = 1; i < ifp->if_nifqs; i++) {
1177 			free(ifp->if_ifqs[i], M_DEVBUF,
1178 			    sizeof(struct ifqueue));
1179 		}
1180 		free(ifp->if_ifqs, M_DEVBUF,
1181 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1182 	}
1183 
1184 	for (i = 0; i < ifp->if_niqs; i++)
1185 		ifiq_destroy(ifp->if_iqs[i]);
1186 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1187 		for (i = 1; i < ifp->if_niqs; i++) {
1188 			free(ifp->if_iqs[i], M_DEVBUF,
1189 			    sizeof(struct ifiqueue));
1190 		}
1191 		free(ifp->if_iqs, M_DEVBUF,
1192 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1193 	}
1194 }
1195 
1196 /*
1197  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1198  */
1199 int
1200 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1201 {
1202 	struct ifnet *ifp;
1203 	int connected = 0;
1204 
1205 	ifp = if_get(ifidx);
1206 	if (ifp == NULL)
1207 		return (0);
1208 
1209 	if (ifp0->if_index == ifp->if_index)
1210 		connected = 1;
1211 
1212 #if NBRIDGE > 0
1213 	if (ifp0->if_bridgeidx != 0 && ifp0->if_bridgeidx == ifp->if_bridgeidx)
1214 		connected = 1;
1215 #endif
1216 #if NCARP > 0
1217 	if ((ifp0->if_type == IFT_CARP &&
1218 	    ifp0->if_carpdevidx == ifp->if_index) ||
1219 	    (ifp->if_type == IFT_CARP && ifp->if_carpdevidx == ifp0->if_index))
1220 		connected = 1;
1221 #endif
1222 
1223 	if_put(ifp);
1224 	return (connected);
1225 }
1226 
1227 /*
1228  * Create a clone network interface.
1229  */
1230 int
1231 if_clone_create(const char *name, int rdomain)
1232 {
1233 	struct if_clone *ifc;
1234 	struct ifnet *ifp;
1235 	int unit, ret;
1236 
1237 	ifc = if_clone_lookup(name, &unit);
1238 	if (ifc == NULL)
1239 		return (EINVAL);
1240 
1241 	rw_enter_write(&if_cloners_lock);
1242 
1243 	if ((ifp = if_unit(name)) != NULL) {
1244 		ret = EEXIST;
1245 		goto unlock;
1246 	}
1247 
1248 	ret = (*ifc->ifc_create)(ifc, unit);
1249 
1250 	if (ret != 0 || (ifp = if_unit(name)) == NULL)
1251 		goto unlock;
1252 
1253 	NET_LOCK();
1254 	if_addgroup(ifp, ifc->ifc_name);
1255 	if (rdomain != 0)
1256 		if_setrdomain(ifp, rdomain);
1257 	NET_UNLOCK();
1258 unlock:
1259 	rw_exit_write(&if_cloners_lock);
1260 	if_put(ifp);
1261 
1262 	return (ret);
1263 }
1264 
1265 /*
1266  * Destroy a clone network interface.
1267  */
1268 int
1269 if_clone_destroy(const char *name)
1270 {
1271 	struct if_clone *ifc;
1272 	struct ifnet *ifp;
1273 	int ret;
1274 
1275 	ifc = if_clone_lookup(name, NULL);
1276 	if (ifc == NULL)
1277 		return (EINVAL);
1278 
1279 	if (ifc->ifc_destroy == NULL)
1280 		return (EOPNOTSUPP);
1281 
1282 	rw_enter_write(&if_cloners_lock);
1283 
1284 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1285 		if (strcmp(ifp->if_xname, name) == 0)
1286 			break;
1287 	}
1288 	if (ifp == NULL) {
1289 		rw_exit_write(&if_cloners_lock);
1290 		return (ENXIO);
1291 	}
1292 
1293 	NET_LOCK();
1294 	if (ifp->if_flags & IFF_UP) {
1295 		int s;
1296 		s = splnet();
1297 		if_down(ifp);
1298 		splx(s);
1299 	}
1300 	NET_UNLOCK();
1301 	ret = (*ifc->ifc_destroy)(ifp);
1302 
1303 	rw_exit_write(&if_cloners_lock);
1304 
1305 	return (ret);
1306 }
1307 
1308 /*
1309  * Look up a network interface cloner.
1310  */
1311 struct if_clone *
1312 if_clone_lookup(const char *name, int *unitp)
1313 {
1314 	struct if_clone *ifc;
1315 	const char *cp;
1316 	int unit;
1317 
1318 	/* separate interface name from unit */
1319 	for (cp = name;
1320 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1321 	    cp++)
1322 		continue;
1323 
1324 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1325 		return (NULL);	/* No name or unit number */
1326 
1327 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1328 		return (NULL);	/* unit number 0 padded */
1329 
1330 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1331 		if (strlen(ifc->ifc_name) == cp - name &&
1332 		    !strncmp(name, ifc->ifc_name, cp - name))
1333 			break;
1334 	}
1335 
1336 	if (ifc == NULL)
1337 		return (NULL);
1338 
1339 	unit = 0;
1340 	while (cp - name < IFNAMSIZ && *cp) {
1341 		if (*cp < '0' || *cp > '9' ||
1342 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1343 			/* Bogus unit number. */
1344 			return (NULL);
1345 		}
1346 		unit = (unit * 10) + (*cp++ - '0');
1347 	}
1348 
1349 	if (unitp != NULL)
1350 		*unitp = unit;
1351 	return (ifc);
1352 }
1353 
1354 /*
1355  * Register a network interface cloner.
1356  */
1357 void
1358 if_clone_attach(struct if_clone *ifc)
1359 {
1360 	/*
1361 	 * we are called at kernel boot by main(), when pseudo devices are
1362 	 * being attached. The main() is the only guy which may alter the
1363 	 * if_cloners. While system is running and main() is done with
1364 	 * initialization, the if_cloners becomes immutable.
1365 	 */
1366 	KASSERT(pdevinit_done == 0);
1367 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1368 	if_cloners_count++;
1369 }
1370 
1371 /*
1372  * Provide list of interface cloners to userspace.
1373  */
1374 int
1375 if_clone_list(struct if_clonereq *ifcr)
1376 {
1377 	char outbuf[IFNAMSIZ], *dst;
1378 	struct if_clone *ifc;
1379 	int count, error = 0;
1380 
1381 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1382 		/* Just asking how many there are. */
1383 		ifcr->ifcr_total = if_cloners_count;
1384 		return (0);
1385 	}
1386 
1387 	if (ifcr->ifcr_count < 0)
1388 		return (EINVAL);
1389 
1390 	ifcr->ifcr_total = if_cloners_count;
1391 	count = MIN(if_cloners_count, ifcr->ifcr_count);
1392 
1393 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1394 		if (count == 0)
1395 			break;
1396 		bzero(outbuf, sizeof outbuf);
1397 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1398 		error = copyout(outbuf, dst, IFNAMSIZ);
1399 		if (error)
1400 			break;
1401 		count--;
1402 		dst += IFNAMSIZ;
1403 	}
1404 
1405 	return (error);
1406 }
1407 
1408 /*
1409  * set queue congestion marker
1410  */
1411 void
1412 if_congestion(void)
1413 {
1414 	extern int ticks;
1415 
1416 	ifq_congestion = ticks;
1417 }
1418 
1419 int
1420 if_congested(void)
1421 {
1422 	extern int ticks;
1423 	int diff;
1424 
1425 	diff = ticks - ifq_congestion;
1426 	if (diff < 0) {
1427 		ifq_congestion = ticks - hz;
1428 		return (0);
1429 	}
1430 
1431 	return (diff <= (hz / 100));
1432 }
1433 
1434 #define	equal(a1, a2)	\
1435 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1436 	(a1)->sa_len) == 0)
1437 
1438 /*
1439  * Locate an interface based on a complete address.
1440  */
1441 struct ifaddr *
1442 ifa_ifwithaddr(struct sockaddr *addr, u_int rtableid)
1443 {
1444 	struct ifnet *ifp;
1445 	struct ifaddr *ifa;
1446 	u_int rdomain;
1447 
1448 	NET_ASSERT_LOCKED();
1449 
1450 	rdomain = rtable_l2(rtableid);
1451 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1452 		if (ifp->if_rdomain != rdomain)
1453 			continue;
1454 
1455 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1456 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1457 				continue;
1458 
1459 			if (equal(addr, ifa->ifa_addr)) {
1460 				return (ifa);
1461 			}
1462 		}
1463 	}
1464 	return (NULL);
1465 }
1466 
1467 /*
1468  * Locate the point to point interface with a given destination address.
1469  */
1470 struct ifaddr *
1471 ifa_ifwithdstaddr(struct sockaddr *addr, u_int rdomain)
1472 {
1473 	struct ifnet *ifp;
1474 	struct ifaddr *ifa;
1475 
1476 	NET_ASSERT_LOCKED();
1477 
1478 	rdomain = rtable_l2(rdomain);
1479 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1480 		if (ifp->if_rdomain != rdomain)
1481 			continue;
1482 		if (ifp->if_flags & IFF_POINTOPOINT) {
1483 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1484 				if (ifa->ifa_addr->sa_family !=
1485 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1486 					continue;
1487 				if (equal(addr, ifa->ifa_dstaddr)) {
1488 					return (ifa);
1489 				}
1490 			}
1491 		}
1492 	}
1493 	return (NULL);
1494 }
1495 
1496 /*
1497  * Find an interface address specific to an interface best matching
1498  * a given address.
1499  */
1500 struct ifaddr *
1501 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1502 {
1503 	struct ifaddr *ifa;
1504 	char *cp, *cp2, *cp3;
1505 	char *cplim;
1506 	struct ifaddr *ifa_maybe = NULL;
1507 	u_int af = addr->sa_family;
1508 
1509 	if (af >= AF_MAX)
1510 		return (NULL);
1511 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1512 		if (ifa->ifa_addr->sa_family != af)
1513 			continue;
1514 		if (ifa_maybe == NULL)
1515 			ifa_maybe = ifa;
1516 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1517 			if (equal(addr, ifa->ifa_addr) ||
1518 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1519 				return (ifa);
1520 			continue;
1521 		}
1522 		cp = addr->sa_data;
1523 		cp2 = ifa->ifa_addr->sa_data;
1524 		cp3 = ifa->ifa_netmask->sa_data;
1525 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1526 		for (; cp3 < cplim; cp3++)
1527 			if ((*cp++ ^ *cp2++) & *cp3)
1528 				break;
1529 		if (cp3 == cplim)
1530 			return (ifa);
1531 	}
1532 	return (ifa_maybe);
1533 }
1534 
1535 void
1536 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1537 {
1538 }
1539 
1540 /*
1541  * Default action when installing a local route on a point-to-point
1542  * interface.
1543  */
1544 void
1545 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1546 {
1547 	struct ifnet *lo0ifp;
1548 	struct ifaddr *ifa, *lo0ifa;
1549 
1550 	switch (req) {
1551 	case RTM_ADD:
1552 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1553 			break;
1554 
1555 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1556 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1557 			    rt_key(rt)->sa_len) == 0)
1558 				break;
1559 		}
1560 
1561 		if (ifa == NULL)
1562 			break;
1563 
1564 		KASSERT(ifa == rt->rt_ifa);
1565 
1566 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1567 		KASSERT(lo0ifp != NULL);
1568 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1569 			if (lo0ifa->ifa_addr->sa_family ==
1570 			    ifa->ifa_addr->sa_family)
1571 				break;
1572 		}
1573 		if_put(lo0ifp);
1574 
1575 		if (lo0ifa == NULL)
1576 			break;
1577 
1578 		rt->rt_flags &= ~RTF_LLINFO;
1579 		break;
1580 	case RTM_DELETE:
1581 	case RTM_RESOLVE:
1582 	default:
1583 		break;
1584 	}
1585 }
1586 
1587 int
1588 p2p_bpf_mtap(caddr_t if_bpf, const struct mbuf *m, u_int dir)
1589 {
1590 #if NBPFILTER > 0
1591 	return (bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, m, dir));
1592 #else
1593 	return (0);
1594 #endif
1595 }
1596 
1597 void
1598 p2p_input(struct ifnet *ifp, struct mbuf *m)
1599 {
1600 	void (*input)(struct ifnet *, struct mbuf *);
1601 
1602 	switch (m->m_pkthdr.ph_family) {
1603 	case AF_INET:
1604 		input = ipv4_input;
1605 		break;
1606 #ifdef INET6
1607 	case AF_INET6:
1608 		input = ipv6_input;
1609 		break;
1610 #endif
1611 #ifdef MPLS
1612 	case AF_MPLS:
1613 		input = mpls_input;
1614 		break;
1615 #endif
1616 	default:
1617 		m_freem(m);
1618 		return;
1619 	}
1620 
1621 	(*input)(ifp, m);
1622 }
1623 
1624 /*
1625  * Bring down all interfaces
1626  */
1627 void
1628 if_downall(void)
1629 {
1630 	struct ifreq ifrq;	/* XXX only partly built */
1631 	struct ifnet *ifp;
1632 
1633 	NET_LOCK();
1634 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1635 		if ((ifp->if_flags & IFF_UP) == 0)
1636 			continue;
1637 		if_down(ifp);
1638 		ifrq.ifr_flags = ifp->if_flags;
1639 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1640 	}
1641 	NET_UNLOCK();
1642 }
1643 
1644 /*
1645  * Mark an interface down and notify protocols of
1646  * the transition.
1647  */
1648 void
1649 if_down(struct ifnet *ifp)
1650 {
1651 	NET_ASSERT_LOCKED();
1652 
1653 	ifp->if_flags &= ~IFF_UP;
1654 	getmicrotime(&ifp->if_lastchange);
1655 	ifq_purge(&ifp->if_snd);
1656 
1657 	if_linkstate(ifp);
1658 }
1659 
1660 /*
1661  * Mark an interface up and notify protocols of
1662  * the transition.
1663  */
1664 void
1665 if_up(struct ifnet *ifp)
1666 {
1667 	NET_ASSERT_LOCKED();
1668 
1669 	ifp->if_flags |= IFF_UP;
1670 	getmicrotime(&ifp->if_lastchange);
1671 
1672 #ifdef INET6
1673 	/* Userland expects the kernel to set ::1 on default lo(4). */
1674 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1675 		in6_ifattach(ifp);
1676 #endif
1677 
1678 	if_linkstate(ifp);
1679 }
1680 
1681 /*
1682  * Notify userland, the routing table and hooks owner of
1683  * a link-state transition.
1684  */
1685 void
1686 if_linkstate_task(void *xifidx)
1687 {
1688 	unsigned int ifidx = (unsigned long)xifidx;
1689 	struct ifnet *ifp;
1690 
1691 	KERNEL_LOCK();
1692 	NET_LOCK();
1693 
1694 	ifp = if_get(ifidx);
1695 	if (ifp != NULL)
1696 		if_linkstate(ifp);
1697 	if_put(ifp);
1698 
1699 	NET_UNLOCK();
1700 	KERNEL_UNLOCK();
1701 }
1702 
1703 void
1704 if_linkstate(struct ifnet *ifp)
1705 {
1706 	NET_ASSERT_LOCKED();
1707 
1708 	rtm_ifchg(ifp);
1709 	rt_if_track(ifp);
1710 
1711 	if_hooks_run(&ifp->if_linkstatehooks);
1712 }
1713 
1714 void
1715 if_linkstatehook_add(struct ifnet *ifp, struct task *t)
1716 {
1717 	mtx_enter(&if_hooks_mtx);
1718 	TAILQ_INSERT_HEAD(&ifp->if_linkstatehooks, t, t_entry);
1719 	mtx_leave(&if_hooks_mtx);
1720 }
1721 
1722 void
1723 if_linkstatehook_del(struct ifnet *ifp, struct task *t)
1724 {
1725 	mtx_enter(&if_hooks_mtx);
1726 	TAILQ_REMOVE(&ifp->if_linkstatehooks, t, t_entry);
1727 	mtx_leave(&if_hooks_mtx);
1728 }
1729 
1730 /*
1731  * Schedule a link state change task.
1732  */
1733 void
1734 if_link_state_change(struct ifnet *ifp)
1735 {
1736 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1737 }
1738 
1739 /*
1740  * Handle interface watchdog timer routine.  Called
1741  * from softclock, we decrement timer (if set) and
1742  * call the appropriate interface routine on expiration.
1743  */
1744 void
1745 if_slowtimo(void *arg)
1746 {
1747 	struct ifnet *ifp = arg;
1748 	int s = splnet();
1749 
1750 	if (ifp->if_watchdog) {
1751 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1752 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1753 		timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO);
1754 	}
1755 	splx(s);
1756 }
1757 
1758 void
1759 if_watchdog_task(void *xifidx)
1760 {
1761 	unsigned int ifidx = (unsigned long)xifidx;
1762 	struct ifnet *ifp;
1763 	int s;
1764 
1765 	ifp = if_get(ifidx);
1766 	if (ifp == NULL)
1767 		return;
1768 
1769 	KERNEL_LOCK();
1770 	s = splnet();
1771 	if (ifp->if_watchdog)
1772 		(*ifp->if_watchdog)(ifp);
1773 	splx(s);
1774 	KERNEL_UNLOCK();
1775 
1776 	if_put(ifp);
1777 }
1778 
1779 /*
1780  * Map interface name to interface structure pointer.
1781  */
1782 struct ifnet *
1783 if_unit(const char *name)
1784 {
1785 	struct ifnet *ifp;
1786 
1787 	KERNEL_ASSERT_LOCKED();
1788 
1789 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1790 		if (strcmp(ifp->if_xname, name) == 0) {
1791 			if_ref(ifp);
1792 			return (ifp);
1793 		}
1794 	}
1795 
1796 	return (NULL);
1797 }
1798 
1799 /*
1800  * Map interface index to interface structure pointer.
1801  */
1802 struct ifnet *
1803 if_get(unsigned int index)
1804 {
1805 	struct ifnet **if_map;
1806 	struct ifnet *ifp = NULL;
1807 
1808 	if (index == 0)
1809 		return (NULL);
1810 
1811 	smr_read_enter();
1812 	if_map = SMR_PTR_GET(&if_idxmap.map);
1813 	if (index < if_idxmap_limit(if_map)) {
1814 		ifp = SMR_PTR_GET(&if_map[index]);
1815 		if (ifp != NULL) {
1816 			KASSERT(ifp->if_index == index);
1817 			if_ref(ifp);
1818 		}
1819 	}
1820 	smr_read_leave();
1821 
1822 	return (ifp);
1823 }
1824 
1825 struct ifnet *
1826 if_ref(struct ifnet *ifp)
1827 {
1828 	refcnt_take(&ifp->if_refcnt);
1829 
1830 	return (ifp);
1831 }
1832 
1833 void
1834 if_put(struct ifnet *ifp)
1835 {
1836 	if (ifp == NULL)
1837 		return;
1838 
1839 	refcnt_rele_wake(&ifp->if_refcnt);
1840 }
1841 
1842 int
1843 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1844 {
1845 	if (ifp->if_sadl == NULL)
1846 		return (EINVAL);
1847 
1848 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1849 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1850 
1851 	return (0);
1852 }
1853 
1854 int
1855 if_createrdomain(int rdomain, struct ifnet *ifp)
1856 {
1857 	int error;
1858 	struct ifnet *loifp;
1859 	char loifname[IFNAMSIZ];
1860 	unsigned int unit = rdomain;
1861 
1862 	if ((error = rtable_add(rdomain)) != 0)
1863 		return (error);
1864 	if (!rtable_empty(rdomain))
1865 		return (EEXIST);
1866 
1867 	/* Create rdomain including its loopback if with unit == rdomain */
1868 	snprintf(loifname, sizeof(loifname), "lo%u", unit);
1869 	error = if_clone_create(loifname, 0);
1870 	if ((loifp = if_unit(loifname)) == NULL)
1871 		return (ENXIO);
1872 	if (error && (ifp != loifp || error != EEXIST)) {
1873 		if_put(loifp);
1874 		return (error);
1875 	}
1876 
1877 	rtable_l2set(rdomain, rdomain, loifp->if_index);
1878 	loifp->if_rdomain = rdomain;
1879 	if_put(loifp);
1880 
1881 	return (0);
1882 }
1883 
1884 int
1885 if_setrdomain(struct ifnet *ifp, int rdomain)
1886 {
1887 	struct ifreq ifr;
1888 	int error, up = 0, s;
1889 
1890 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1891 		return (EINVAL);
1892 
1893 	if (rdomain != ifp->if_rdomain &&
1894 	    (ifp->if_flags & IFF_LOOPBACK) &&
1895 	    (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1896 		return (EPERM);
1897 
1898 	if (!rtable_exists(rdomain))
1899 		return (ESRCH);
1900 
1901 	/* make sure that the routing table is a real rdomain */
1902 	if (rdomain != rtable_l2(rdomain))
1903 		return (EINVAL);
1904 
1905 	if (rdomain != ifp->if_rdomain) {
1906 		s = splnet();
1907 		/*
1908 		 * We are tearing down the world.
1909 		 * Take down the IF so:
1910 		 * 1. everything that cares gets a message
1911 		 * 2. the automagic IPv6 bits are recreated
1912 		 */
1913 		if (ifp->if_flags & IFF_UP) {
1914 			up = 1;
1915 			if_down(ifp);
1916 		}
1917 		rti_delete(ifp);
1918 #ifdef MROUTING
1919 		vif_delete(ifp);
1920 #endif
1921 		in_ifdetach(ifp);
1922 #ifdef INET6
1923 		in6_ifdetach(ifp);
1924 #endif
1925 		splx(s);
1926 	}
1927 
1928 	/* Let devices like enc(4) or mpe(4) know about the change */
1929 	ifr.ifr_rdomainid = rdomain;
1930 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
1931 	    (caddr_t)&ifr)) != ENOTTY)
1932 		return (error);
1933 	error = 0;
1934 
1935 	/* Add interface to the specified rdomain */
1936 	ifp->if_rdomain = rdomain;
1937 
1938 	/* If we took down the IF, bring it back */
1939 	if (up) {
1940 		s = splnet();
1941 		if_up(ifp);
1942 		splx(s);
1943 	}
1944 
1945 	return (0);
1946 }
1947 
1948 /*
1949  * Interface ioctls.
1950  */
1951 int
1952 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1953 {
1954 	struct ifnet *ifp;
1955 	struct ifreq *ifr = (struct ifreq *)data;
1956 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
1957 	struct if_afreq *ifar = (struct if_afreq *)data;
1958 	char ifdescrbuf[IFDESCRSIZE];
1959 	char ifrtlabelbuf[RTLABEL_LEN];
1960 	int s, error = 0, oif_xflags;
1961 	size_t bytesdone;
1962 	unsigned short oif_flags;
1963 
1964 	switch (cmd) {
1965 	case SIOCIFCREATE:
1966 		if ((error = suser(p)) != 0)
1967 			return (error);
1968 		KERNEL_LOCK();
1969 		error = if_clone_create(ifr->ifr_name, 0);
1970 		KERNEL_UNLOCK();
1971 		return (error);
1972 	case SIOCIFDESTROY:
1973 		if ((error = suser(p)) != 0)
1974 			return (error);
1975 		KERNEL_LOCK();
1976 		error = if_clone_destroy(ifr->ifr_name);
1977 		KERNEL_UNLOCK();
1978 		return (error);
1979 	case SIOCSIFGATTR:
1980 		if ((error = suser(p)) != 0)
1981 			return (error);
1982 		KERNEL_LOCK();
1983 		NET_LOCK();
1984 		error = if_setgroupattribs(data);
1985 		NET_UNLOCK();
1986 		KERNEL_UNLOCK();
1987 		return (error);
1988 	case SIOCGIFCONF:
1989 	case SIOCIFGCLONERS:
1990 	case SIOCGIFGMEMB:
1991 	case SIOCGIFGATTR:
1992 	case SIOCGIFGLIST:
1993 	case SIOCGIFFLAGS:
1994 	case SIOCGIFXFLAGS:
1995 	case SIOCGIFMETRIC:
1996 	case SIOCGIFMTU:
1997 	case SIOCGIFHARDMTU:
1998 	case SIOCGIFDATA:
1999 	case SIOCGIFDESCR:
2000 	case SIOCGIFRTLABEL:
2001 	case SIOCGIFPRIORITY:
2002 	case SIOCGIFRDOMAIN:
2003 	case SIOCGIFGROUP:
2004 	case SIOCGIFLLPRIO:
2005 		error = ifioctl_get(cmd, data);
2006 		return (error);
2007 	}
2008 
2009 	KERNEL_LOCK();
2010 
2011 	ifp = if_unit(ifr->ifr_name);
2012 	if (ifp == NULL) {
2013 		KERNEL_UNLOCK();
2014 		return (ENXIO);
2015 	}
2016 	oif_flags = ifp->if_flags;
2017 	oif_xflags = ifp->if_xflags;
2018 
2019 	switch (cmd) {
2020 	case SIOCIFAFATTACH:
2021 	case SIOCIFAFDETACH:
2022 		if ((error = suser(p)) != 0)
2023 			break;
2024 		NET_LOCK();
2025 		switch (ifar->ifar_af) {
2026 		case AF_INET:
2027 			/* attach is a noop for AF_INET */
2028 			if (cmd == SIOCIFAFDETACH)
2029 				in_ifdetach(ifp);
2030 			break;
2031 #ifdef INET6
2032 		case AF_INET6:
2033 			if (cmd == SIOCIFAFATTACH)
2034 				error = in6_ifattach(ifp);
2035 			else
2036 				in6_ifdetach(ifp);
2037 			break;
2038 #endif /* INET6 */
2039 		default:
2040 			error = EAFNOSUPPORT;
2041 		}
2042 		NET_UNLOCK();
2043 		break;
2044 
2045 	case SIOCSIFXFLAGS:
2046 		if ((error = suser(p)) != 0)
2047 			break;
2048 
2049 		NET_LOCK();
2050 #ifdef INET6
2051 		if ((ISSET(ifr->ifr_flags, IFXF_AUTOCONF6) ||
2052 		    ISSET(ifr->ifr_flags, IFXF_AUTOCONF6TEMP)) &&
2053 		    !ISSET(ifp->if_xflags, IFXF_AUTOCONF6) &&
2054 		    !ISSET(ifp->if_xflags, IFXF_AUTOCONF6TEMP)) {
2055 			error = in6_ifattach(ifp);
2056 			if (error != 0) {
2057 				NET_UNLOCK();
2058 				break;
2059 			}
2060 		}
2061 
2062 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
2063 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
2064 			ifp->if_xflags |= IFXF_INET6_NOSOII;
2065 
2066 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
2067 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
2068 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
2069 
2070 #endif	/* INET6 */
2071 
2072 #ifdef MPLS
2073 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
2074 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
2075 			s = splnet();
2076 			ifp->if_xflags |= IFXF_MPLS;
2077 			ifp->if_ll_output = ifp->if_output;
2078 			ifp->if_output = mpls_output;
2079 			splx(s);
2080 		}
2081 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
2082 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
2083 			s = splnet();
2084 			ifp->if_xflags &= ~IFXF_MPLS;
2085 			ifp->if_output = ifp->if_ll_output;
2086 			ifp->if_ll_output = NULL;
2087 			splx(s);
2088 		}
2089 #endif	/* MPLS */
2090 
2091 #ifndef SMALL_KERNEL
2092 		if (ifp->if_capabilities & IFCAP_WOL) {
2093 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
2094 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
2095 				s = splnet();
2096 				ifp->if_xflags |= IFXF_WOL;
2097 				error = ifp->if_wol(ifp, 1);
2098 				splx(s);
2099 			}
2100 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
2101 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
2102 				s = splnet();
2103 				ifp->if_xflags &= ~IFXF_WOL;
2104 				error = ifp->if_wol(ifp, 0);
2105 				splx(s);
2106 			}
2107 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
2108 			ifr->ifr_flags &= ~IFXF_WOL;
2109 			error = ENOTSUP;
2110 		}
2111 #endif
2112 		if (ISSET(ifr->ifr_flags, IFXF_LRO) !=
2113 		    ISSET(ifp->if_xflags, IFXF_LRO))
2114 			error = ifsetlro(ifp, ISSET(ifr->ifr_flags, IFXF_LRO));
2115 
2116 		if (error == 0)
2117 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
2118 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
2119 
2120 		if (!ISSET(ifp->if_flags, IFF_UP) &&
2121 		    ((!ISSET(oif_xflags, IFXF_AUTOCONF4) &&
2122 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF4)) ||
2123 		    (!ISSET(oif_xflags, IFXF_AUTOCONF6) &&
2124 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF6)) ||
2125 		    (!ISSET(oif_xflags, IFXF_AUTOCONF6TEMP) &&
2126 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF6TEMP)))) {
2127 			ifr->ifr_flags = ifp->if_flags | IFF_UP;
2128 			goto forceup;
2129 		}
2130 
2131 		NET_UNLOCK();
2132 		break;
2133 
2134 	case SIOCSIFFLAGS:
2135 		if ((error = suser(p)) != 0)
2136 			break;
2137 
2138 		NET_LOCK();
2139 forceup:
2140 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2141 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
2142 		error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, data);
2143 		if (error != 0) {
2144 			ifp->if_flags = oif_flags;
2145 			if (cmd == SIOCSIFXFLAGS)
2146 				ifp->if_xflags = oif_xflags;
2147 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
2148 			s = splnet();
2149 			if (ISSET(ifp->if_flags, IFF_UP))
2150 				if_up(ifp);
2151 			else
2152 				if_down(ifp);
2153 			splx(s);
2154 		}
2155 		NET_UNLOCK();
2156 		break;
2157 
2158 	case SIOCSIFMETRIC:
2159 		if ((error = suser(p)) != 0)
2160 			break;
2161 		NET_LOCK();
2162 		ifp->if_metric = ifr->ifr_metric;
2163 		NET_UNLOCK();
2164 		break;
2165 
2166 	case SIOCSIFMTU:
2167 		if ((error = suser(p)) != 0)
2168 			break;
2169 		NET_LOCK();
2170 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2171 		NET_UNLOCK();
2172 		if (error == 0)
2173 			rtm_ifchg(ifp);
2174 		break;
2175 
2176 	case SIOCSIFDESCR:
2177 		if ((error = suser(p)) != 0)
2178 			break;
2179 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2180 		    IFDESCRSIZE, &bytesdone);
2181 		if (error == 0) {
2182 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2183 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2184 		}
2185 		break;
2186 
2187 	case SIOCSIFRTLABEL:
2188 		if ((error = suser(p)) != 0)
2189 			break;
2190 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2191 		    RTLABEL_LEN, &bytesdone);
2192 		if (error == 0) {
2193 			rtlabel_unref(ifp->if_rtlabelid);
2194 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2195 		}
2196 		break;
2197 
2198 	case SIOCSIFPRIORITY:
2199 		if ((error = suser(p)) != 0)
2200 			break;
2201 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2202 			error = EINVAL;
2203 			break;
2204 		}
2205 		ifp->if_priority = ifr->ifr_metric;
2206 		break;
2207 
2208 	case SIOCSIFRDOMAIN:
2209 		if ((error = suser(p)) != 0)
2210 			break;
2211 		error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2212 		if (!error || error == EEXIST) {
2213 			NET_LOCK();
2214 			error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2215 			NET_UNLOCK();
2216 		}
2217 		break;
2218 
2219 	case SIOCAIFGROUP:
2220 		if ((error = suser(p)))
2221 			break;
2222 		NET_LOCK();
2223 		error = if_addgroup(ifp, ifgr->ifgr_group);
2224 		if (error == 0) {
2225 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2226 			if (error == ENOTTY)
2227 				error = 0;
2228 		}
2229 		NET_UNLOCK();
2230 		break;
2231 
2232 	case SIOCDIFGROUP:
2233 		if ((error = suser(p)))
2234 			break;
2235 		NET_LOCK();
2236 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2237 		if (error == ENOTTY)
2238 			error = 0;
2239 		if (error == 0)
2240 			error = if_delgroup(ifp, ifgr->ifgr_group);
2241 		NET_UNLOCK();
2242 		break;
2243 
2244 	case SIOCSIFLLADDR:
2245 		if ((error = suser(p)))
2246 			break;
2247 		if ((ifp->if_sadl == NULL) ||
2248 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2249 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2250 			error = EINVAL;
2251 			break;
2252 		}
2253 		NET_LOCK();
2254 		switch (ifp->if_type) {
2255 		case IFT_ETHER:
2256 		case IFT_CARP:
2257 		case IFT_XETHER:
2258 		case IFT_ISO88025:
2259 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2260 			if (error == ENOTTY)
2261 				error = 0;
2262 			if (error == 0)
2263 				error = if_setlladdr(ifp,
2264 				    ifr->ifr_addr.sa_data);
2265 			break;
2266 		default:
2267 			error = ENODEV;
2268 		}
2269 
2270 		if (error == 0)
2271 			ifnewlladdr(ifp);
2272 		NET_UNLOCK();
2273 		if (error == 0)
2274 			rtm_ifchg(ifp);
2275 		break;
2276 
2277 	case SIOCSIFLLPRIO:
2278 		if ((error = suser(p)))
2279 			break;
2280 		if (ifr->ifr_llprio < IFQ_MINPRIO ||
2281 		    ifr->ifr_llprio > IFQ_MAXPRIO) {
2282 			error = EINVAL;
2283 			break;
2284 		}
2285 		NET_LOCK();
2286 		ifp->if_llprio = ifr->ifr_llprio;
2287 		NET_UNLOCK();
2288 		break;
2289 
2290 	case SIOCGIFSFFPAGE:
2291 		error = suser(p);
2292 		if (error != 0)
2293 			break;
2294 
2295 		error = if_sffpage_check(data);
2296 		if (error != 0)
2297 			break;
2298 
2299 		/* don't take NET_LOCK because i2c reads take a long time */
2300 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2301 		break;
2302 
2303 	case SIOCSIFMEDIA:
2304 		if ((error = suser(p)) != 0)
2305 			break;
2306 		/* FALLTHROUGH */
2307 	case SIOCGIFMEDIA:
2308 		/* net lock is not needed */
2309 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2310 		break;
2311 
2312 	case SIOCSETKALIVE:
2313 	case SIOCDIFPHYADDR:
2314 	case SIOCSLIFPHYADDR:
2315 	case SIOCSLIFPHYRTABLE:
2316 	case SIOCSLIFPHYTTL:
2317 	case SIOCSLIFPHYDF:
2318 	case SIOCSLIFPHYECN:
2319 	case SIOCADDMULTI:
2320 	case SIOCDELMULTI:
2321 	case SIOCSVNETID:
2322 	case SIOCDVNETID:
2323 	case SIOCSVNETFLOWID:
2324 	case SIOCSTXHPRIO:
2325 	case SIOCSRXHPRIO:
2326 	case SIOCSIFPAIR:
2327 	case SIOCSIFPARENT:
2328 	case SIOCDIFPARENT:
2329 	case SIOCSETMPWCFG:
2330 	case SIOCSETLABEL:
2331 	case SIOCDELLABEL:
2332 	case SIOCSPWE3CTRLWORD:
2333 	case SIOCSPWE3FAT:
2334 	case SIOCSPWE3NEIGHBOR:
2335 	case SIOCDPWE3NEIGHBOR:
2336 #if NBRIDGE > 0
2337 	case SIOCBRDGADD:
2338 	case SIOCBRDGDEL:
2339 	case SIOCBRDGSIFFLGS:
2340 	case SIOCBRDGSCACHE:
2341 	case SIOCBRDGADDS:
2342 	case SIOCBRDGDELS:
2343 	case SIOCBRDGSADDR:
2344 	case SIOCBRDGSTO:
2345 	case SIOCBRDGDADDR:
2346 	case SIOCBRDGFLUSH:
2347 	case SIOCBRDGADDL:
2348 	case SIOCBRDGSIFPROT:
2349 	case SIOCBRDGARL:
2350 	case SIOCBRDGFRL:
2351 	case SIOCBRDGSPRI:
2352 	case SIOCBRDGSHT:
2353 	case SIOCBRDGSFD:
2354 	case SIOCBRDGSMA:
2355 	case SIOCBRDGSIFPRIO:
2356 	case SIOCBRDGSIFCOST:
2357 	case SIOCBRDGSTXHC:
2358 	case SIOCBRDGSPROTO:
2359 #endif
2360 		if ((error = suser(p)) != 0)
2361 			break;
2362 		/* FALLTHROUGH */
2363 	default:
2364 		error = pru_control(so, cmd, data, ifp);
2365 		if (error != EOPNOTSUPP)
2366 			break;
2367 		switch (cmd) {
2368 		case SIOCAIFADDR:
2369 		case SIOCDIFADDR:
2370 		case SIOCSIFADDR:
2371 		case SIOCSIFNETMASK:
2372 		case SIOCSIFDSTADDR:
2373 		case SIOCSIFBRDADDR:
2374 #ifdef INET6
2375 		case SIOCAIFADDR_IN6:
2376 		case SIOCDIFADDR_IN6:
2377 #endif
2378 			error = suser(p);
2379 			break;
2380 		default:
2381 			error = 0;
2382 			break;
2383 		}
2384 		if (error)
2385 			break;
2386 		NET_LOCK();
2387 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2388 		NET_UNLOCK();
2389 		break;
2390 	}
2391 
2392 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags) {
2393 		/* if_up() and if_down() already sent an update, skip here */
2394 		if (((oif_flags ^ ifp->if_flags) & IFF_UP) == 0)
2395 			rtm_ifchg(ifp);
2396 	}
2397 
2398 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2399 		getmicrotime(&ifp->if_lastchange);
2400 
2401 	KERNEL_UNLOCK();
2402 
2403 	if_put(ifp);
2404 
2405 	return (error);
2406 }
2407 
2408 int
2409 ifioctl_get(u_long cmd, caddr_t data)
2410 {
2411 	struct ifnet *ifp;
2412 	struct ifreq *ifr = (struct ifreq *)data;
2413 	char ifdescrbuf[IFDESCRSIZE];
2414 	char ifrtlabelbuf[RTLABEL_LEN];
2415 	int error = 0;
2416 	size_t bytesdone;
2417 
2418 	switch(cmd) {
2419 	case SIOCGIFCONF:
2420 		NET_LOCK_SHARED();
2421 		error = ifconf(data);
2422 		NET_UNLOCK_SHARED();
2423 		return (error);
2424 	case SIOCIFGCLONERS:
2425 		error = if_clone_list((struct if_clonereq *)data);
2426 		return (error);
2427 	case SIOCGIFGMEMB:
2428 		NET_LOCK_SHARED();
2429 		error = if_getgroupmembers(data);
2430 		NET_UNLOCK_SHARED();
2431 		return (error);
2432 	case SIOCGIFGATTR:
2433 		NET_LOCK_SHARED();
2434 		error = if_getgroupattribs(data);
2435 		NET_UNLOCK_SHARED();
2436 		return (error);
2437 	case SIOCGIFGLIST:
2438 		NET_LOCK_SHARED();
2439 		error = if_getgrouplist(data);
2440 		NET_UNLOCK_SHARED();
2441 		return (error);
2442 	}
2443 
2444 	KERNEL_LOCK();
2445 
2446 	ifp = if_unit(ifr->ifr_name);
2447 	if (ifp == NULL) {
2448 		KERNEL_UNLOCK();
2449 		return (ENXIO);
2450 	}
2451 
2452 	NET_LOCK_SHARED();
2453 
2454 	switch(cmd) {
2455 	case SIOCGIFFLAGS:
2456 		ifr->ifr_flags = ifp->if_flags;
2457 		if (ifq_is_oactive(&ifp->if_snd))
2458 			ifr->ifr_flags |= IFF_OACTIVE;
2459 		break;
2460 
2461 	case SIOCGIFXFLAGS:
2462 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2463 		break;
2464 
2465 	case SIOCGIFMETRIC:
2466 		ifr->ifr_metric = ifp->if_metric;
2467 		break;
2468 
2469 	case SIOCGIFMTU:
2470 		ifr->ifr_mtu = ifp->if_mtu;
2471 		break;
2472 
2473 	case SIOCGIFHARDMTU:
2474 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2475 		break;
2476 
2477 	case SIOCGIFDATA: {
2478 		struct if_data ifdata;
2479 		if_getdata(ifp, &ifdata);
2480 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2481 		break;
2482 	}
2483 
2484 	case SIOCGIFDESCR:
2485 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2486 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2487 		    &bytesdone);
2488 		break;
2489 
2490 	case SIOCGIFRTLABEL:
2491 		if (ifp->if_rtlabelid && rtlabel_id2name(ifp->if_rtlabelid,
2492 		    ifrtlabelbuf, RTLABEL_LEN) != NULL) {
2493 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2494 			    RTLABEL_LEN, &bytesdone);
2495 		} else
2496 			error = ENOENT;
2497 		break;
2498 
2499 	case SIOCGIFPRIORITY:
2500 		ifr->ifr_metric = ifp->if_priority;
2501 		break;
2502 
2503 	case SIOCGIFRDOMAIN:
2504 		ifr->ifr_rdomainid = ifp->if_rdomain;
2505 		break;
2506 
2507 	case SIOCGIFGROUP:
2508 		error = if_getgroup(data, ifp);
2509 		break;
2510 
2511 	case SIOCGIFLLPRIO:
2512 		ifr->ifr_llprio = ifp->if_llprio;
2513 		break;
2514 
2515 	default:
2516 		panic("invalid ioctl %lu", cmd);
2517 	}
2518 
2519 	NET_UNLOCK_SHARED();
2520 
2521 	KERNEL_UNLOCK();
2522 
2523 	if_put(ifp);
2524 
2525 	return (error);
2526 }
2527 
2528 static int
2529 if_sffpage_check(const caddr_t data)
2530 {
2531 	const struct if_sffpage *sff = (const struct if_sffpage *)data;
2532 
2533 	switch (sff->sff_addr) {
2534 	case IFSFF_ADDR_EEPROM:
2535 	case IFSFF_ADDR_DDM:
2536 		break;
2537 	default:
2538 		return (EINVAL);
2539 	}
2540 
2541 	return (0);
2542 }
2543 
2544 int
2545 if_txhprio_l2_check(int hdrprio)
2546 {
2547 	switch (hdrprio) {
2548 	case IF_HDRPRIO_PACKET:
2549 		return (0);
2550 	default:
2551 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2552 			return (0);
2553 		break;
2554 	}
2555 
2556 	return (EINVAL);
2557 }
2558 
2559 int
2560 if_txhprio_l3_check(int hdrprio)
2561 {
2562 	switch (hdrprio) {
2563 	case IF_HDRPRIO_PACKET:
2564 	case IF_HDRPRIO_PAYLOAD:
2565 		return (0);
2566 	default:
2567 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2568 			return (0);
2569 		break;
2570 	}
2571 
2572 	return (EINVAL);
2573 }
2574 
2575 int
2576 if_rxhprio_l2_check(int hdrprio)
2577 {
2578 	switch (hdrprio) {
2579 	case IF_HDRPRIO_PACKET:
2580 	case IF_HDRPRIO_OUTER:
2581 		return (0);
2582 	default:
2583 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2584 			return (0);
2585 		break;
2586 	}
2587 
2588 	return (EINVAL);
2589 }
2590 
2591 int
2592 if_rxhprio_l3_check(int hdrprio)
2593 {
2594 	switch (hdrprio) {
2595 	case IF_HDRPRIO_PACKET:
2596 	case IF_HDRPRIO_PAYLOAD:
2597 	case IF_HDRPRIO_OUTER:
2598 		return (0);
2599 	default:
2600 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2601 			return (0);
2602 		break;
2603 	}
2604 
2605 	return (EINVAL);
2606 }
2607 
2608 /*
2609  * Return interface configuration
2610  * of system.  List may be used
2611  * in later ioctl's (above) to get
2612  * other information.
2613  */
2614 int
2615 ifconf(caddr_t data)
2616 {
2617 	struct ifconf *ifc = (struct ifconf *)data;
2618 	struct ifnet *ifp;
2619 	struct ifaddr *ifa;
2620 	struct ifreq ifr, *ifrp;
2621 	int space = ifc->ifc_len, error = 0;
2622 
2623 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2624 	if (space == 0) {
2625 		TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
2626 			struct sockaddr *sa;
2627 
2628 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2629 				space += sizeof (ifr);
2630 			else
2631 				TAILQ_FOREACH(ifa,
2632 				    &ifp->if_addrlist, ifa_list) {
2633 					sa = ifa->ifa_addr;
2634 					if (sa->sa_len > sizeof(*sa))
2635 						space += sa->sa_len -
2636 						    sizeof(*sa);
2637 					space += sizeof(ifr);
2638 				}
2639 		}
2640 		ifc->ifc_len = space;
2641 		return (0);
2642 	}
2643 
2644 	ifrp = ifc->ifc_req;
2645 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
2646 		if (space < sizeof(ifr))
2647 			break;
2648 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2649 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2650 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2651 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2652 			    sizeof(ifr));
2653 			if (error)
2654 				break;
2655 			space -= sizeof (ifr), ifrp++;
2656 		} else
2657 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2658 				struct sockaddr *sa = ifa->ifa_addr;
2659 
2660 				if (space < sizeof(ifr))
2661 					break;
2662 				if (sa->sa_len <= sizeof(*sa)) {
2663 					ifr.ifr_addr = *sa;
2664 					error = copyout((caddr_t)&ifr,
2665 					    (caddr_t)ifrp, sizeof (ifr));
2666 					ifrp++;
2667 				} else {
2668 					space -= sa->sa_len - sizeof(*sa);
2669 					if (space < sizeof (ifr))
2670 						break;
2671 					error = copyout((caddr_t)&ifr,
2672 					    (caddr_t)ifrp,
2673 					    sizeof(ifr.ifr_name));
2674 					if (error == 0)
2675 						error = copyout((caddr_t)sa,
2676 						    (caddr_t)&ifrp->ifr_addr,
2677 						    sa->sa_len);
2678 					ifrp = (struct ifreq *)(sa->sa_len +
2679 					    (caddr_t)&ifrp->ifr_addr);
2680 				}
2681 				if (error)
2682 					break;
2683 				space -= sizeof (ifr);
2684 			}
2685 	}
2686 	ifc->ifc_len -= space;
2687 	return (error);
2688 }
2689 
2690 void
2691 if_counters_alloc(struct ifnet *ifp)
2692 {
2693 	KASSERT(ifp->if_counters == NULL);
2694 
2695 	ifp->if_counters = counters_alloc(ifc_ncounters);
2696 }
2697 
2698 void
2699 if_counters_free(struct ifnet *ifp)
2700 {
2701 	KASSERT(ifp->if_counters != NULL);
2702 
2703 	counters_free(ifp->if_counters, ifc_ncounters);
2704 	ifp->if_counters = NULL;
2705 }
2706 
2707 void
2708 if_getdata(struct ifnet *ifp, struct if_data *data)
2709 {
2710 	unsigned int i;
2711 
2712 	*data = ifp->if_data;
2713 
2714 	if (ifp->if_counters != NULL) {
2715 		uint64_t counters[ifc_ncounters];
2716 
2717 		counters_read(ifp->if_counters, counters, nitems(counters));
2718 
2719 		data->ifi_ipackets += counters[ifc_ipackets];
2720 		data->ifi_ierrors += counters[ifc_ierrors];
2721 		data->ifi_opackets += counters[ifc_opackets];
2722 		data->ifi_oerrors += counters[ifc_oerrors];
2723 		data->ifi_collisions += counters[ifc_collisions];
2724 		data->ifi_ibytes += counters[ifc_ibytes];
2725 		data->ifi_obytes += counters[ifc_obytes];
2726 		data->ifi_imcasts += counters[ifc_imcasts];
2727 		data->ifi_omcasts += counters[ifc_omcasts];
2728 		data->ifi_iqdrops += counters[ifc_iqdrops];
2729 		data->ifi_oqdrops += counters[ifc_oqdrops];
2730 		data->ifi_noproto += counters[ifc_noproto];
2731 	}
2732 
2733 	for (i = 0; i < ifp->if_nifqs; i++) {
2734 		struct ifqueue *ifq = ifp->if_ifqs[i];
2735 
2736 		ifq_add_data(ifq, data);
2737 	}
2738 
2739 	for (i = 0; i < ifp->if_niqs; i++) {
2740 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2741 
2742 		ifiq_add_data(ifiq, data);
2743 	}
2744 }
2745 
2746 /*
2747  * Dummy functions replaced in ifnet during detach (if protocols decide to
2748  * fiddle with the if during detach.
2749  */
2750 void
2751 if_detached_qstart(struct ifqueue *ifq)
2752 {
2753 	ifq_purge(ifq);
2754 }
2755 
2756 int
2757 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2758 {
2759 	return ENODEV;
2760 }
2761 
2762 /*
2763  * Create interface group without members
2764  */
2765 struct ifg_group *
2766 if_creategroup(const char *groupname)
2767 {
2768 	struct ifg_group	*ifg;
2769 
2770 	if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
2771 		return (NULL);
2772 
2773 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2774 	ifg->ifg_refcnt = 1;
2775 	ifg->ifg_carp_demoted = 0;
2776 	TAILQ_INIT(&ifg->ifg_members);
2777 #if NPF > 0
2778 	pfi_attach_ifgroup(ifg);
2779 #endif
2780 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2781 
2782 	return (ifg);
2783 }
2784 
2785 /*
2786  * Add a group to an interface
2787  */
2788 int
2789 if_addgroup(struct ifnet *ifp, const char *groupname)
2790 {
2791 	struct ifg_list		*ifgl;
2792 	struct ifg_group	*ifg = NULL;
2793 	struct ifg_member	*ifgm;
2794 	size_t			 namelen;
2795 
2796 	namelen = strlen(groupname);
2797 	if (namelen == 0 || namelen >= IFNAMSIZ ||
2798 	    (groupname[namelen - 1] >= '0' && groupname[namelen - 1] <= '9'))
2799 		return (EINVAL);
2800 
2801 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2802 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2803 			return (EEXIST);
2804 
2805 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
2806 		return (ENOMEM);
2807 
2808 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
2809 		free(ifgl, M_TEMP, sizeof(*ifgl));
2810 		return (ENOMEM);
2811 	}
2812 
2813 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2814 		if (!strcmp(ifg->ifg_group, groupname))
2815 			break;
2816 
2817 	if (ifg == NULL) {
2818 		ifg = if_creategroup(groupname);
2819 		if (ifg == NULL) {
2820 			free(ifgl, M_TEMP, sizeof(*ifgl));
2821 			free(ifgm, M_TEMP, sizeof(*ifgm));
2822 			return (ENOMEM);
2823 		}
2824 	} else
2825 		ifg->ifg_refcnt++;
2826 	KASSERT(ifg->ifg_refcnt != 0);
2827 
2828 	ifgl->ifgl_group = ifg;
2829 	ifgm->ifgm_ifp = ifp;
2830 
2831 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2832 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2833 
2834 #if NPF > 0
2835 	pfi_group_addmember(groupname);
2836 #endif
2837 
2838 	return (0);
2839 }
2840 
2841 /*
2842  * Remove a group from an interface
2843  */
2844 int
2845 if_delgroup(struct ifnet *ifp, const char *groupname)
2846 {
2847 	struct ifg_list		*ifgl;
2848 	struct ifg_member	*ifgm;
2849 
2850 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2851 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2852 			break;
2853 	if (ifgl == NULL)
2854 		return (ENOENT);
2855 
2856 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2857 
2858 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2859 		if (ifgm->ifgm_ifp == ifp)
2860 			break;
2861 
2862 	if (ifgm != NULL) {
2863 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2864 		free(ifgm, M_TEMP, sizeof(*ifgm));
2865 	}
2866 
2867 #if NPF > 0
2868 	pfi_group_delmember(groupname);
2869 #endif
2870 
2871 	KASSERT(ifgl->ifgl_group->ifg_refcnt != 0);
2872 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2873 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2874 #if NPF > 0
2875 		pfi_detach_ifgroup(ifgl->ifgl_group);
2876 #endif
2877 		free(ifgl->ifgl_group, M_TEMP, sizeof(*ifgl->ifgl_group));
2878 	}
2879 
2880 	free(ifgl, M_TEMP, sizeof(*ifgl));
2881 
2882 	return (0);
2883 }
2884 
2885 /*
2886  * Stores all groups from an interface in memory pointed
2887  * to by data
2888  */
2889 int
2890 if_getgroup(caddr_t data, struct ifnet *ifp)
2891 {
2892 	int			 len, error;
2893 	struct ifg_list		*ifgl;
2894 	struct ifg_req		 ifgrq, *ifgp;
2895 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2896 
2897 	if (ifgr->ifgr_len == 0) {
2898 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2899 			ifgr->ifgr_len += sizeof(struct ifg_req);
2900 		return (0);
2901 	}
2902 
2903 	len = ifgr->ifgr_len;
2904 	ifgp = ifgr->ifgr_groups;
2905 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2906 		if (len < sizeof(ifgrq))
2907 			return (EINVAL);
2908 		bzero(&ifgrq, sizeof ifgrq);
2909 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2910 		    sizeof(ifgrq.ifgrq_group));
2911 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2912 		    sizeof(struct ifg_req))))
2913 			return (error);
2914 		len -= sizeof(ifgrq);
2915 		ifgp++;
2916 	}
2917 
2918 	return (0);
2919 }
2920 
2921 /*
2922  * Stores all members of a group in memory pointed to by data
2923  */
2924 int
2925 if_getgroupmembers(caddr_t data)
2926 {
2927 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2928 	struct ifg_group	*ifg;
2929 	struct ifg_member	*ifgm;
2930 	struct ifg_req		 ifgrq, *ifgp;
2931 	int			 len, error;
2932 
2933 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2934 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2935 			break;
2936 	if (ifg == NULL)
2937 		return (ENOENT);
2938 
2939 	if (ifgr->ifgr_len == 0) {
2940 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2941 			ifgr->ifgr_len += sizeof(ifgrq);
2942 		return (0);
2943 	}
2944 
2945 	len = ifgr->ifgr_len;
2946 	ifgp = ifgr->ifgr_groups;
2947 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
2948 		if (len < sizeof(ifgrq))
2949 			return (EINVAL);
2950 		bzero(&ifgrq, sizeof ifgrq);
2951 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
2952 		    sizeof(ifgrq.ifgrq_member));
2953 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2954 		    sizeof(struct ifg_req))))
2955 			return (error);
2956 		len -= sizeof(ifgrq);
2957 		ifgp++;
2958 	}
2959 
2960 	return (0);
2961 }
2962 
2963 int
2964 if_getgroupattribs(caddr_t data)
2965 {
2966 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2967 	struct ifg_group	*ifg;
2968 
2969 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2970 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2971 			break;
2972 	if (ifg == NULL)
2973 		return (ENOENT);
2974 
2975 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
2976 
2977 	return (0);
2978 }
2979 
2980 int
2981 if_setgroupattribs(caddr_t data)
2982 {
2983 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2984 	struct ifg_group	*ifg;
2985 	struct ifg_member	*ifgm;
2986 	int			 demote;
2987 
2988 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2989 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2990 			break;
2991 	if (ifg == NULL)
2992 		return (ENOENT);
2993 
2994 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
2995 	if (demote + ifg->ifg_carp_demoted > 0xff ||
2996 	    demote + ifg->ifg_carp_demoted < 0)
2997 		return (EINVAL);
2998 
2999 	ifg->ifg_carp_demoted += demote;
3000 
3001 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
3002 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
3003 
3004 	return (0);
3005 }
3006 
3007 /*
3008  * Stores all groups in memory pointed to by data
3009  */
3010 int
3011 if_getgrouplist(caddr_t data)
3012 {
3013 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
3014 	struct ifg_group	*ifg;
3015 	struct ifg_req		 ifgrq, *ifgp;
3016 	int			 len, error;
3017 
3018 	if (ifgr->ifgr_len == 0) {
3019 		TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3020 			ifgr->ifgr_len += sizeof(ifgrq);
3021 		return (0);
3022 	}
3023 
3024 	len = ifgr->ifgr_len;
3025 	ifgp = ifgr->ifgr_groups;
3026 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
3027 		if (len < sizeof(ifgrq))
3028 			return (EINVAL);
3029 		bzero(&ifgrq, sizeof ifgrq);
3030 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
3031 		    sizeof(ifgrq.ifgrq_group));
3032 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
3033 		    sizeof(struct ifg_req))))
3034 			return (error);
3035 		len -= sizeof(ifgrq);
3036 		ifgp++;
3037 	}
3038 
3039 	return (0);
3040 }
3041 
3042 void
3043 if_group_routechange(struct sockaddr *dst, struct sockaddr *mask)
3044 {
3045 	switch (dst->sa_family) {
3046 	case AF_INET:
3047 		if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
3048 		    mask && (mask->sa_len == 0 ||
3049 		    satosin(mask)->sin_addr.s_addr == INADDR_ANY))
3050 			if_group_egress_build();
3051 		break;
3052 #ifdef INET6
3053 	case AF_INET6:
3054 		if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
3055 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
3056 		    IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
3057 		    &in6addr_any)))
3058 			if_group_egress_build();
3059 		break;
3060 #endif
3061 	}
3062 }
3063 
3064 int
3065 if_group_egress_build(void)
3066 {
3067 	struct ifnet		*ifp;
3068 	struct ifg_group	*ifg;
3069 	struct ifg_member	*ifgm, *next;
3070 	struct sockaddr_in	 sa_in;
3071 #ifdef INET6
3072 	struct sockaddr_in6	 sa_in6;
3073 #endif
3074 	struct rtentry		*rt;
3075 
3076 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3077 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
3078 			break;
3079 
3080 	if (ifg != NULL)
3081 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
3082 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
3083 
3084 	bzero(&sa_in, sizeof(sa_in));
3085 	sa_in.sin_len = sizeof(sa_in);
3086 	sa_in.sin_family = AF_INET;
3087 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
3088 	while (rt != NULL) {
3089 		ifp = if_get(rt->rt_ifidx);
3090 		if (ifp != NULL) {
3091 			if_addgroup(ifp, IFG_EGRESS);
3092 			if_put(ifp);
3093 		}
3094 		rt = rtable_iterate(rt);
3095 	}
3096 
3097 #ifdef INET6
3098 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
3099 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
3100 	    RTP_ANY);
3101 	while (rt != NULL) {
3102 		ifp = if_get(rt->rt_ifidx);
3103 		if (ifp != NULL) {
3104 			if_addgroup(ifp, IFG_EGRESS);
3105 			if_put(ifp);
3106 		}
3107 		rt = rtable_iterate(rt);
3108 	}
3109 #endif /* INET6 */
3110 
3111 	return (0);
3112 }
3113 
3114 /*
3115  * Set/clear promiscuous mode on interface ifp based on the truth value
3116  * of pswitch.  The calls are reference counted so that only the first
3117  * "on" request actually has an effect, as does the final "off" request.
3118  * Results are undefined if the "off" and "on" requests are not matched.
3119  */
3120 int
3121 ifpromisc(struct ifnet *ifp, int pswitch)
3122 {
3123 	struct ifreq ifr;
3124 	unsigned short oif_flags;
3125 	int oif_pcount, error;
3126 
3127 	NET_ASSERT_LOCKED(); /* modifying if_flags and if_pcount */
3128 
3129 	oif_flags = ifp->if_flags;
3130 	oif_pcount = ifp->if_pcount;
3131 	if (pswitch) {
3132 		if (ifp->if_pcount++ != 0)
3133 			return (0);
3134 		ifp->if_flags |= IFF_PROMISC;
3135 	} else {
3136 		if (--ifp->if_pcount > 0)
3137 			return (0);
3138 		ifp->if_flags &= ~IFF_PROMISC;
3139 	}
3140 
3141 	if ((ifp->if_flags & IFF_UP) == 0)
3142 		return (0);
3143 
3144 	memset(&ifr, 0, sizeof(ifr));
3145 	ifr.ifr_flags = ifp->if_flags;
3146 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
3147 	if (error) {
3148 		ifp->if_flags = oif_flags;
3149 		ifp->if_pcount = oif_pcount;
3150 	}
3151 
3152 	return (error);
3153 }
3154 
3155 /* Set/clear LRO flag and restart interface if needed. */
3156 int
3157 ifsetlro(struct ifnet *ifp, int on)
3158 {
3159 	struct ifreq ifrq;
3160 	int error = 0;
3161 	int s = splnet();
3162 
3163 	if (!ISSET(ifp->if_capabilities, IFCAP_LRO)) {
3164 		error = ENOTSUP;
3165 		goto out;
3166 	}
3167 
3168 	NET_ASSERT_LOCKED();	/* for ioctl */
3169 	KERNEL_ASSERT_LOCKED();	/* for if_flags */
3170 
3171 	if (on && !ISSET(ifp->if_xflags, IFXF_LRO)) {
3172 		if (ether_brport_isset(ifp)) {
3173 			error = EBUSY;
3174 			goto out;
3175 		}
3176 		SET(ifp->if_xflags, IFXF_LRO);
3177 	} else if (!on && ISSET(ifp->if_xflags, IFXF_LRO))
3178 		CLR(ifp->if_xflags, IFXF_LRO);
3179 	else
3180 		goto out;
3181 
3182 	/* restart interface */
3183 	if (ISSET(ifp->if_flags, IFF_UP)) {
3184 		/* go down for a moment... */
3185 		CLR(ifp->if_flags, IFF_UP);
3186 		ifrq.ifr_flags = ifp->if_flags;
3187 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3188 
3189 		/* ... and up again */
3190 		SET(ifp->if_flags, IFF_UP);
3191 		ifrq.ifr_flags = ifp->if_flags;
3192 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3193 	}
3194  out:
3195 	splx(s);
3196 
3197 	return error;
3198 }
3199 
3200 void
3201 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
3202 {
3203 	NET_ASSERT_LOCKED_EXCLUSIVE();
3204 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
3205 }
3206 
3207 void
3208 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
3209 {
3210 	NET_ASSERT_LOCKED_EXCLUSIVE();
3211 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
3212 }
3213 
3214 void
3215 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
3216 {
3217 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
3218 		panic("ifa_update_broadaddr does not support dynamic length");
3219 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
3220 }
3221 
3222 #ifdef DDB
3223 /* debug function, can be called from ddb> */
3224 void
3225 ifa_print_all(void)
3226 {
3227 	struct ifnet *ifp;
3228 	struct ifaddr *ifa;
3229 
3230 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
3231 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
3232 			char addr[INET6_ADDRSTRLEN];
3233 
3234 			switch (ifa->ifa_addr->sa_family) {
3235 			case AF_INET:
3236 				printf("%s", inet_ntop(AF_INET,
3237 				    &satosin(ifa->ifa_addr)->sin_addr,
3238 				    addr, sizeof(addr)));
3239 				break;
3240 #ifdef INET6
3241 			case AF_INET6:
3242 				printf("%s", inet_ntop(AF_INET6,
3243 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
3244 				    addr, sizeof(addr)));
3245 				break;
3246 #endif
3247 			}
3248 			printf(" on %s\n", ifp->if_xname);
3249 		}
3250 	}
3251 }
3252 #endif /* DDB */
3253 
3254 void
3255 ifnewlladdr(struct ifnet *ifp)
3256 {
3257 #ifdef INET6
3258 	struct ifaddr *ifa;
3259 #endif
3260 	struct ifreq ifrq;
3261 	short up;
3262 
3263 	NET_ASSERT_LOCKED();	/* for ioctl and in6 */
3264 	KERNEL_ASSERT_LOCKED();	/* for if_flags */
3265 
3266 	up = ifp->if_flags & IFF_UP;
3267 
3268 	if (up) {
3269 		/* go down for a moment... */
3270 		ifp->if_flags &= ~IFF_UP;
3271 		ifrq.ifr_flags = ifp->if_flags;
3272 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3273 	}
3274 
3275 	ifp->if_flags |= IFF_UP;
3276 	ifrq.ifr_flags = ifp->if_flags;
3277 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3278 
3279 #ifdef INET6
3280 	/*
3281 	 * Update the link-local address.  Don't do it if we're
3282 	 * a router to avoid confusing hosts on the network.
3283 	 */
3284 	if (!ip6_forwarding) {
3285 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
3286 		if (ifa) {
3287 			in6_purgeaddr(ifa);
3288 			if_hooks_run(&ifp->if_addrhooks);
3289 			in6_ifattach(ifp);
3290 		}
3291 	}
3292 #endif
3293 	if (!up) {
3294 		/* go back down */
3295 		ifp->if_flags &= ~IFF_UP;
3296 		ifrq.ifr_flags = ifp->if_flags;
3297 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3298 	}
3299 }
3300 
3301 void
3302 if_addrhook_add(struct ifnet *ifp, struct task *t)
3303 {
3304 	mtx_enter(&if_hooks_mtx);
3305 	TAILQ_INSERT_TAIL(&ifp->if_addrhooks, t, t_entry);
3306 	mtx_leave(&if_hooks_mtx);
3307 }
3308 
3309 void
3310 if_addrhook_del(struct ifnet *ifp, struct task *t)
3311 {
3312 	mtx_enter(&if_hooks_mtx);
3313 	TAILQ_REMOVE(&ifp->if_addrhooks, t, t_entry);
3314 	mtx_leave(&if_hooks_mtx);
3315 }
3316 
3317 void
3318 if_addrhooks_run(struct ifnet *ifp)
3319 {
3320 	if_hooks_run(&ifp->if_addrhooks);
3321 }
3322 
3323 void
3324 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
3325 {
3326 	extern int ticks;
3327 
3328 	memset(rxr, 0, sizeof(*rxr));
3329 
3330 	rxr->rxr_adjusted = ticks;
3331 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
3332 	rxr->rxr_hwm = hwm;
3333 }
3334 
3335 static inline void
3336 if_rxr_adjust_cwm(struct if_rxring *rxr)
3337 {
3338 	extern int ticks;
3339 
3340 	if (rxr->rxr_alive >= rxr->rxr_lwm)
3341 		return;
3342 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
3343 		rxr->rxr_cwm++;
3344 
3345 	rxr->rxr_adjusted = ticks;
3346 }
3347 
3348 void
3349 if_rxr_livelocked(struct if_rxring *rxr)
3350 {
3351 	extern int ticks;
3352 
3353 	if (ticks - rxr->rxr_adjusted >= 1) {
3354 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3355 			rxr->rxr_cwm--;
3356 
3357 		rxr->rxr_adjusted = ticks;
3358 	}
3359 }
3360 
3361 u_int
3362 if_rxr_get(struct if_rxring *rxr, u_int max)
3363 {
3364 	extern int ticks;
3365 	u_int diff;
3366 
3367 	if (ticks - rxr->rxr_adjusted >= 1) {
3368 		/* we're free to try for an adjustment */
3369 		if_rxr_adjust_cwm(rxr);
3370 	}
3371 
3372 	if (rxr->rxr_alive >= rxr->rxr_cwm)
3373 		return (0);
3374 
3375 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
3376 	rxr->rxr_alive += diff;
3377 
3378 	return (diff);
3379 }
3380 
3381 int
3382 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
3383 {
3384 	struct if_rxrinfo kifri;
3385 	int error;
3386 	u_int n;
3387 
3388 	error = copyin(uifri, &kifri, sizeof(kifri));
3389 	if (error)
3390 		return (error);
3391 
3392 	n = min(t, kifri.ifri_total);
3393 	kifri.ifri_total = t;
3394 
3395 	if (n > 0) {
3396 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
3397 		if (error)
3398 			return (error);
3399 	}
3400 
3401 	return (copyout(&kifri, uifri, sizeof(kifri)));
3402 }
3403 
3404 int
3405 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
3406     struct if_rxring *rxr)
3407 {
3408 	struct if_rxring_info ifr;
3409 
3410 	memset(&ifr, 0, sizeof(ifr));
3411 
3412 	if (name != NULL)
3413 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3414 
3415 	ifr.ifr_size = size;
3416 	ifr.ifr_info = *rxr;
3417 
3418 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
3419 }
3420 
3421 /*
3422  * Network stack input queues.
3423  */
3424 
3425 void
3426 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3427 {
3428 	mq_init(&niq->ni_q, maxlen, IPL_NET);
3429 	niq->ni_isr = isr;
3430 }
3431 
3432 int
3433 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3434 {
3435 	int rv;
3436 
3437 	rv = mq_enqueue(&niq->ni_q, m);
3438 	if (rv == 0)
3439 		schednetisr(niq->ni_isr);
3440 	else
3441 		if_congestion();
3442 
3443 	return (rv);
3444 }
3445 
3446 int
3447 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3448 {
3449 	int rv;
3450 
3451 	rv = mq_enlist(&niq->ni_q, ml);
3452 	if (rv == 0)
3453 		schednetisr(niq->ni_isr);
3454 	else
3455 		if_congestion();
3456 
3457 	return (rv);
3458 }
3459 
3460 __dead void
3461 unhandled_af(int af)
3462 {
3463 	panic("unhandled af %d", af);
3464 }
3465 
3466 struct taskq *
3467 net_tq(unsigned int ifindex)
3468 {
3469 	struct softnet *sn;
3470 	static int nettaskqs;
3471 
3472 	if (nettaskqs == 0)
3473 		nettaskqs = min(NET_TASKQ, ncpus);
3474 
3475 	sn = &softnets[ifindex % nettaskqs];
3476 
3477 	return (sn->sn_taskq);
3478 }
3479