xref: /openbsd-src/sys/net/if.c (revision ff0e7be1ebbcc809ea8ad2b6dafe215824da9e46)
1 /*	$OpenBSD: if.c,v 1.699 2023/06/05 11:35:46 bluhm Exp $	*/
2 /*	$NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)if.c	8.3 (Berkeley) 1/4/94
62  */
63 
64 #include "bpfilter.h"
65 #include "bridge.h"
66 #include "carp.h"
67 #include "ether.h"
68 #include "pf.h"
69 #include "pfsync.h"
70 #include "ppp.h"
71 #include "pppoe.h"
72 #include "if_wg.h"
73 
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/mbuf.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/timeout.h>
80 #include <sys/protosw.h>
81 #include <sys/kernel.h>
82 #include <sys/ioctl.h>
83 #include <sys/domain.h>
84 #include <sys/task.h>
85 #include <sys/atomic.h>
86 #include <sys/percpu.h>
87 #include <sys/proc.h>
88 #include <sys/stdint.h>	/* uintptr_t */
89 #include <sys/rwlock.h>
90 #include <sys/smr.h>
91 
92 #include <net/if.h>
93 #include <net/if_dl.h>
94 #include <net/if_types.h>
95 #include <net/route.h>
96 #include <net/netisr.h>
97 
98 #include "vlan.h"
99 #if NVLAN > 0
100 #include <net/if_vlan_var.h>
101 #endif
102 
103 #include <netinet/in.h>
104 #include <netinet/if_ether.h>
105 #include <netinet/igmp.h>
106 #ifdef MROUTING
107 #include <netinet/ip_mroute.h>
108 #endif
109 
110 #ifdef INET6
111 #include <netinet6/in6_var.h>
112 #include <netinet6/in6_ifattach.h>
113 #include <netinet6/nd6.h>
114 #include <netinet/ip6.h>
115 #include <netinet6/ip6_var.h>
116 #endif
117 
118 #ifdef MPLS
119 #include <netmpls/mpls.h>
120 #endif
121 
122 #if NBPFILTER > 0
123 #include <net/bpf.h>
124 #endif
125 
126 #if NBRIDGE > 0
127 #include <net/if_bridge.h>
128 #endif
129 
130 #if NCARP > 0
131 #include <netinet/ip_carp.h>
132 #endif
133 
134 #if NPF > 0
135 #include <net/pfvar.h>
136 #endif
137 
138 #include <sys/device.h>
139 
140 void	if_attachsetup(struct ifnet *);
141 void	if_attach_common(struct ifnet *);
142 void	if_remove(struct ifnet *);
143 int	if_createrdomain(int, struct ifnet *);
144 int	if_setrdomain(struct ifnet *, int);
145 void	if_slowtimo(void *);
146 
147 void	if_detached_qstart(struct ifqueue *);
148 int	if_detached_ioctl(struct ifnet *, u_long, caddr_t);
149 
150 int	ifioctl_get(u_long, caddr_t);
151 int	ifconf(caddr_t);
152 static int
153 	if_sffpage_check(const caddr_t);
154 
155 int	if_getgroup(caddr_t, struct ifnet *);
156 int	if_getgroupmembers(caddr_t);
157 int	if_getgroupattribs(caddr_t);
158 int	if_setgroupattribs(caddr_t);
159 int	if_getgrouplist(caddr_t);
160 
161 void	if_linkstate(struct ifnet *);
162 void	if_linkstate_task(void *);
163 
164 int	if_clone_list(struct if_clonereq *);
165 struct if_clone	*if_clone_lookup(const char *, int *);
166 
167 int	if_group_egress_build(void);
168 
169 void	if_watchdog_task(void *);
170 
171 void	if_netisr(void *);
172 
173 #ifdef DDB
174 void	ifa_print_all(void);
175 #endif
176 
177 void	if_qstart_compat(struct ifqueue *);
178 
179 /*
180  * interface index map
181  *
182  * the kernel maintains a mapping of interface indexes to struct ifnet
183  * pointers.
184  *
185  * the map is an array of struct ifnet pointers prefixed by an if_map
186  * structure. the if_map structure stores the length of its array.
187  *
188  * as interfaces are attached to the system, the map is grown on demand
189  * up to USHRT_MAX entries.
190  *
191  * interface index 0 is reserved and represents no interface. this
192  * supports the use of the interface index as the scope for IPv6 link
193  * local addresses, where scope 0 means no scope has been specified.
194  * it also supports the use of interface index as the unique identifier
195  * for network interfaces in SNMP applications as per RFC2863. therefore
196  * if_get(0) returns NULL.
197  */
198 
199 struct ifnet *if_ref(struct ifnet *);
200 
201 /*
202  * struct if_idxmap
203  *
204  * infrastructure to manage updates and accesses to the current if_map.
205  *
206  * interface index 0 is special and represents "no interface", so we
207  * use the 0th slot in map to store the length of the array.
208  */
209 
210 struct if_idxmap {
211 	unsigned int		  serial;
212 	unsigned int		  count;
213 	struct ifnet		**map;		/* SMR protected */
214 	struct rwlock		  lock;
215 	unsigned char		 *usedidx;	/* bitmap of indices in use */
216 };
217 
218 struct if_idxmap_dtor {
219 	struct smr_entry	  smr;
220 	struct ifnet		**map;
221 };
222 
223 void	if_idxmap_init(unsigned int);
224 void	if_idxmap_free(void *);
225 void	if_idxmap_alloc(struct ifnet *);
226 void	if_idxmap_insert(struct ifnet *);
227 void	if_idxmap_remove(struct ifnet *);
228 
229 TAILQ_HEAD(, ifg_group) ifg_head =
230     TAILQ_HEAD_INITIALIZER(ifg_head);	/* [N] list of interface groups */
231 
232 LIST_HEAD(, if_clone) if_cloners =
233     LIST_HEAD_INITIALIZER(if_cloners);	/* [I] list of clonable interfaces */
234 int if_cloners_count;	/* [I] number of clonable interfaces */
235 
236 struct rwlock if_cloners_lock = RWLOCK_INITIALIZER("clonelk");
237 
238 /* hooks should only be added, deleted, and run from a process context */
239 struct mutex if_hooks_mtx = MUTEX_INITIALIZER(IPL_NONE);
240 void	if_hooks_run(struct task_list *);
241 
242 int	ifq_congestion;
243 
244 int		 netisr;
245 
246 struct softnet {
247 	char		 sn_name[16];
248 	struct taskq	*sn_taskq;
249 };
250 
251 #define	NET_TASKQ	4
252 struct softnet	softnets[NET_TASKQ];
253 
254 struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
255 
256 /*
257  * Serialize socket operations to ensure no new sleeping points
258  * are introduced in IP output paths.
259  */
260 struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
261 
262 /*
263  * Network interface utility routines.
264  */
265 void
266 ifinit(void)
267 {
268 	unsigned int	i;
269 
270 	/*
271 	 * most machines boot with 4 or 5 interfaces, so size the initial map
272 	 * to accommodate this
273 	 */
274 	if_idxmap_init(8); /* 8 is a nice power of 2 for malloc */
275 
276 	for (i = 0; i < NET_TASKQ; i++) {
277 		struct softnet *sn = &softnets[i];
278 		snprintf(sn->sn_name, sizeof(sn->sn_name), "softnet%u", i);
279 		sn->sn_taskq = taskq_create(sn->sn_name, 1, IPL_NET,
280 		    TASKQ_MPSAFE);
281 		if (sn->sn_taskq == NULL)
282 			panic("unable to create network taskq %d", i);
283 	}
284 }
285 
286 static struct if_idxmap if_idxmap;
287 
288 /*
289  * XXXSMP: For `ifnetlist' modification both kernel and net locks
290  * should be taken. For read-only access only one lock of them required.
291  */
292 struct ifnet_head ifnetlist = TAILQ_HEAD_INITIALIZER(ifnetlist);
293 
294 static inline unsigned int
295 if_idxmap_limit(struct ifnet **if_map)
296 {
297 	return ((uintptr_t)if_map[0]);
298 }
299 
300 static inline size_t
301 if_idxmap_usedidx_size(unsigned int limit)
302 {
303 	return (max(howmany(limit, NBBY), sizeof(struct if_idxmap_dtor)));
304 }
305 
306 void
307 if_idxmap_init(unsigned int limit)
308 {
309 	struct ifnet **if_map;
310 
311 	rw_init(&if_idxmap.lock, "idxmaplk");
312 	if_idxmap.serial = 1; /* skip ifidx 0 */
313 
314 	if_map = mallocarray(limit, sizeof(*if_map), M_IFADDR,
315 	    M_WAITOK | M_ZERO);
316 
317 	if_map[0] = (struct ifnet *)(uintptr_t)limit;
318 
319 	if_idxmap.usedidx = malloc(if_idxmap_usedidx_size(limit),
320 	    M_IFADDR, M_WAITOK | M_ZERO);
321 	setbit(if_idxmap.usedidx, 0); /* blacklist ifidx 0 */
322 
323 	/* this is called early so there's nothing to race with */
324 	SMR_PTR_SET_LOCKED(&if_idxmap.map, if_map);
325 }
326 
327 void
328 if_idxmap_alloc(struct ifnet *ifp)
329 {
330 	struct ifnet **if_map;
331 	unsigned int limit;
332 	unsigned int index, i;
333 
334 	refcnt_init(&ifp->if_refcnt);
335 
336 	rw_enter_write(&if_idxmap.lock);
337 
338 	if (++if_idxmap.count >= USHRT_MAX)
339 		panic("too many interfaces");
340 
341 	if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map);
342 	limit = if_idxmap_limit(if_map);
343 
344 	index = if_idxmap.serial++ & USHRT_MAX;
345 
346 	if (index >= limit) {
347 		struct if_idxmap_dtor *dtor;
348 		struct ifnet **oif_map;
349 		unsigned int olimit;
350 		unsigned char *nusedidx;
351 
352 		oif_map = if_map;
353 		olimit = limit;
354 
355 		limit = olimit * 2;
356 		if_map = mallocarray(limit, sizeof(*if_map), M_IFADDR,
357 		    M_WAITOK | M_ZERO);
358 		if_map[0] = (struct ifnet *)(uintptr_t)limit;
359 
360 		for (i = 1; i < olimit; i++) {
361 			struct ifnet *oifp = SMR_PTR_GET_LOCKED(&oif_map[i]);
362 			if (oifp == NULL)
363 				continue;
364 
365 			/*
366 			 * nif_map isn't visible yet, so don't need
367 			 * SMR_PTR_SET_LOCKED and its membar.
368 			 */
369 			if_map[i] = if_ref(oifp);
370 		}
371 
372 		nusedidx = malloc(if_idxmap_usedidx_size(limit),
373 		    M_IFADDR, M_WAITOK | M_ZERO);
374 		memcpy(nusedidx, if_idxmap.usedidx, howmany(olimit, NBBY));
375 
376 		/* use the old usedidx bitmap as an smr_entry for the if_map */
377 		dtor = (struct if_idxmap_dtor *)if_idxmap.usedidx;
378 		if_idxmap.usedidx = nusedidx;
379 
380 		SMR_PTR_SET_LOCKED(&if_idxmap.map, if_map);
381 
382 		dtor->map = oif_map;
383 		smr_init(&dtor->smr);
384 		smr_call(&dtor->smr, if_idxmap_free, dtor);
385 	}
386 
387 	/* pick the next free index */
388 	for (i = 0; i < USHRT_MAX; i++) {
389 		if (index != 0 && isclr(if_idxmap.usedidx, index))
390 			break;
391 
392 		index = if_idxmap.serial++ & USHRT_MAX;
393 	}
394 	KASSERT(index != 0 && index < limit);
395 	KASSERT(isclr(if_idxmap.usedidx, index));
396 
397 	setbit(if_idxmap.usedidx, index);
398 	ifp->if_index = index;
399 
400 	rw_exit_write(&if_idxmap.lock);
401 }
402 
403 void
404 if_idxmap_free(void *arg)
405 {
406 	struct if_idxmap_dtor *dtor = arg;
407 	struct ifnet **oif_map = dtor->map;
408 	unsigned int olimit = if_idxmap_limit(oif_map);
409 	unsigned int i;
410 
411 	for (i = 1; i < olimit; i++)
412 		if_put(oif_map[i]);
413 
414 	free(oif_map, M_IFADDR, olimit * sizeof(*oif_map));
415 	free(dtor, M_IFADDR, if_idxmap_usedidx_size(olimit));
416 }
417 
418 void
419 if_idxmap_insert(struct ifnet *ifp)
420 {
421 	struct ifnet **if_map;
422 	unsigned int index = ifp->if_index;
423 
424 	rw_enter_write(&if_idxmap.lock);
425 
426 	if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map);
427 
428 	KASSERTMSG(index != 0 && index < if_idxmap_limit(if_map),
429 	    "%s(%p) index %u vs limit %u", ifp->if_xname, ifp, index,
430 	    if_idxmap_limit(if_map));
431 	KASSERT(SMR_PTR_GET_LOCKED(&if_map[index]) == NULL);
432 	KASSERT(isset(if_idxmap.usedidx, index));
433 
434 	/* commit */
435 	SMR_PTR_SET_LOCKED(&if_map[index], if_ref(ifp));
436 
437 	rw_exit_write(&if_idxmap.lock);
438 }
439 
440 void
441 if_idxmap_remove(struct ifnet *ifp)
442 {
443 	struct ifnet **if_map;
444 	unsigned int index = ifp->if_index;
445 
446 	rw_enter_write(&if_idxmap.lock);
447 
448 	if_map = SMR_PTR_GET_LOCKED(&if_idxmap.map);
449 
450 	KASSERT(index != 0 && index < if_idxmap_limit(if_map));
451 	KASSERT(SMR_PTR_GET_LOCKED(&if_map[index]) == ifp);
452 	KASSERT(isset(if_idxmap.usedidx, index));
453 
454 	SMR_PTR_SET_LOCKED(&if_map[index], NULL);
455 
456 	if_idxmap.count--;
457 	clrbit(if_idxmap.usedidx, index);
458 	/* end of if_idxmap modifications */
459 
460 	rw_exit_write(&if_idxmap.lock);
461 
462 	smr_barrier();
463 	if_put(ifp);
464 }
465 
466 /*
467  * Attach an interface to the
468  * list of "active" interfaces.
469  */
470 void
471 if_attachsetup(struct ifnet *ifp)
472 {
473 	unsigned long ifidx;
474 
475 	NET_ASSERT_LOCKED();
476 
477 	if_addgroup(ifp, IFG_ALL);
478 
479 #ifdef INET6
480 	nd6_ifattach(ifp);
481 #endif
482 
483 #if NPF > 0
484 	pfi_attach_ifnet(ifp);
485 #endif
486 
487 	timeout_set(&ifp->if_slowtimo, if_slowtimo, ifp);
488 	if_slowtimo(ifp);
489 
490 	if_idxmap_insert(ifp);
491 	KASSERT(if_get(0) == NULL);
492 
493 	ifidx = ifp->if_index;
494 
495 	task_set(&ifp->if_watchdogtask, if_watchdog_task, (void *)ifidx);
496 	task_set(&ifp->if_linkstatetask, if_linkstate_task, (void *)ifidx);
497 
498 	/* Announce the interface. */
499 	rtm_ifannounce(ifp, IFAN_ARRIVAL);
500 }
501 
502 /*
503  * Allocate the link level name for the specified interface.  This
504  * is an attachment helper.  It must be called after ifp->if_addrlen
505  * is initialized, which may not be the case when if_attach() is
506  * called.
507  */
508 void
509 if_alloc_sadl(struct ifnet *ifp)
510 {
511 	unsigned int socksize;
512 	int namelen, masklen;
513 	struct sockaddr_dl *sdl;
514 
515 	/*
516 	 * If the interface already has a link name, release it
517 	 * now.  This is useful for interfaces that can change
518 	 * link types, and thus switch link names often.
519 	 */
520 	if_free_sadl(ifp);
521 
522 	namelen = strlen(ifp->if_xname);
523 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
524 	socksize = masklen + ifp->if_addrlen;
525 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
526 	if (socksize < sizeof(*sdl))
527 		socksize = sizeof(*sdl);
528 	socksize = ROUNDUP(socksize);
529 	sdl = malloc(socksize, M_IFADDR, M_WAITOK|M_ZERO);
530 	sdl->sdl_len = socksize;
531 	sdl->sdl_family = AF_LINK;
532 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
533 	sdl->sdl_nlen = namelen;
534 	sdl->sdl_alen = ifp->if_addrlen;
535 	sdl->sdl_index = ifp->if_index;
536 	sdl->sdl_type = ifp->if_type;
537 	ifp->if_sadl = sdl;
538 }
539 
540 /*
541  * Free the link level name for the specified interface.  This is
542  * a detach helper.  This is called from if_detach() or from
543  * link layer type specific detach functions.
544  */
545 void
546 if_free_sadl(struct ifnet *ifp)
547 {
548 	if (ifp->if_sadl == NULL)
549 		return;
550 
551 	free(ifp->if_sadl, M_IFADDR, ifp->if_sadl->sdl_len);
552 	ifp->if_sadl = NULL;
553 }
554 
555 void
556 if_attachhead(struct ifnet *ifp)
557 {
558 	if_attach_common(ifp);
559 	NET_LOCK();
560 	TAILQ_INSERT_HEAD(&ifnetlist, ifp, if_list);
561 	if_attachsetup(ifp);
562 	NET_UNLOCK();
563 }
564 
565 void
566 if_attach(struct ifnet *ifp)
567 {
568 	if_attach_common(ifp);
569 	NET_LOCK();
570 	TAILQ_INSERT_TAIL(&ifnetlist, ifp, if_list);
571 	if_attachsetup(ifp);
572 	NET_UNLOCK();
573 }
574 
575 void
576 if_attach_queues(struct ifnet *ifp, unsigned int nqs)
577 {
578 	struct ifqueue **map;
579 	struct ifqueue *ifq;
580 	int i;
581 
582 	KASSERT(ifp->if_ifqs == ifp->if_snd.ifq_ifqs);
583 	KASSERT(nqs != 0);
584 
585 	map = mallocarray(sizeof(*map), nqs, M_DEVBUF, M_WAITOK);
586 
587 	ifp->if_snd.ifq_softc = NULL;
588 	map[0] = &ifp->if_snd;
589 
590 	for (i = 1; i < nqs; i++) {
591 		ifq = malloc(sizeof(*ifq), M_DEVBUF, M_WAITOK|M_ZERO);
592 		ifq_set_maxlen(ifq, ifp->if_snd.ifq_maxlen);
593 		ifq_init(ifq, ifp, i);
594 		map[i] = ifq;
595 	}
596 
597 	ifp->if_ifqs = map;
598 	ifp->if_nifqs = nqs;
599 }
600 
601 void
602 if_attach_iqueues(struct ifnet *ifp, unsigned int niqs)
603 {
604 	struct ifiqueue **map;
605 	struct ifiqueue *ifiq;
606 	unsigned int i;
607 
608 	KASSERT(niqs != 0);
609 
610 	map = mallocarray(niqs, sizeof(*map), M_DEVBUF, M_WAITOK);
611 
612 	ifp->if_rcv.ifiq_softc = NULL;
613 	map[0] = &ifp->if_rcv;
614 
615 	for (i = 1; i < niqs; i++) {
616 		ifiq = malloc(sizeof(*ifiq), M_DEVBUF, M_WAITOK|M_ZERO);
617 		ifiq_init(ifiq, ifp, i);
618 		map[i] = ifiq;
619 	}
620 
621 	ifp->if_iqs = map;
622 	ifp->if_niqs = niqs;
623 }
624 
625 void
626 if_attach_common(struct ifnet *ifp)
627 {
628 	KASSERT(ifp->if_ioctl != NULL);
629 
630 	TAILQ_INIT(&ifp->if_addrlist);
631 	TAILQ_INIT(&ifp->if_maddrlist);
632 	TAILQ_INIT(&ifp->if_groups);
633 
634 	if (!ISSET(ifp->if_xflags, IFXF_MPSAFE)) {
635 		KASSERTMSG(ifp->if_qstart == NULL,
636 		    "%s: if_qstart set without MPSAFE set", ifp->if_xname);
637 		ifp->if_qstart = if_qstart_compat;
638 	} else {
639 		KASSERTMSG(ifp->if_start == NULL,
640 		    "%s: if_start set with MPSAFE set", ifp->if_xname);
641 		KASSERTMSG(ifp->if_qstart != NULL,
642 		    "%s: if_qstart not set with MPSAFE set", ifp->if_xname);
643 	}
644 
645 	if_idxmap_alloc(ifp);
646 
647 	ifq_init(&ifp->if_snd, ifp, 0);
648 
649 	ifp->if_snd.ifq_ifqs[0] = &ifp->if_snd;
650 	ifp->if_ifqs = ifp->if_snd.ifq_ifqs;
651 	ifp->if_nifqs = 1;
652 	if (ifp->if_txmit == 0)
653 		ifp->if_txmit = IF_TXMIT_DEFAULT;
654 
655 	ifiq_init(&ifp->if_rcv, ifp, 0);
656 
657 	ifp->if_rcv.ifiq_ifiqs[0] = &ifp->if_rcv;
658 	ifp->if_iqs = ifp->if_rcv.ifiq_ifiqs;
659 	ifp->if_niqs = 1;
660 
661 	TAILQ_INIT(&ifp->if_addrhooks);
662 	TAILQ_INIT(&ifp->if_linkstatehooks);
663 	TAILQ_INIT(&ifp->if_detachhooks);
664 
665 	if (ifp->if_rtrequest == NULL)
666 		ifp->if_rtrequest = if_rtrequest_dummy;
667 	if (ifp->if_enqueue == NULL)
668 		ifp->if_enqueue = if_enqueue_ifq;
669 #if NBPFILTER > 0
670 	if (ifp->if_bpf_mtap == NULL)
671 		ifp->if_bpf_mtap = bpf_mtap_ether;
672 #endif
673 	ifp->if_llprio = IFQ_DEFPRIO;
674 }
675 
676 void
677 if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
678 {
679 	/*
680 	 * only switch the ifq_ops on the first ifq on an interface.
681 	 *
682 	 * the only ifq_ops we provide priq and hfsc, and hfsc only
683 	 * works on a single ifq. because the code uses the ifq_ops
684 	 * on the first ifq (if_snd) to select a queue for an mbuf,
685 	 * by switching only the first one we change both the algorithm
686 	 * and force the routing of all new packets to it.
687 	 */
688 	ifq_attach(&ifp->if_snd, newops, args);
689 }
690 
691 void
692 if_start(struct ifnet *ifp)
693 {
694 	KASSERT(ifp->if_qstart == if_qstart_compat);
695 	if_qstart_compat(&ifp->if_snd);
696 }
697 void
698 if_qstart_compat(struct ifqueue *ifq)
699 {
700 	struct ifnet *ifp = ifq->ifq_if;
701 	int s;
702 
703 	/*
704 	 * the stack assumes that an interface can have multiple
705 	 * transmit rings, but a lot of drivers are still written
706 	 * so that interfaces and send rings have a 1:1 mapping.
707 	 * this provides compatibility between the stack and the older
708 	 * drivers by translating from the only queue they have
709 	 * (ifp->if_snd) back to the interface and calling if_start.
710 	 */
711 
712 	KERNEL_LOCK();
713 	s = splnet();
714 	(*ifp->if_start)(ifp);
715 	splx(s);
716 	KERNEL_UNLOCK();
717 }
718 
719 int
720 if_enqueue(struct ifnet *ifp, struct mbuf *m)
721 {
722 	CLR(m->m_pkthdr.csum_flags, M_TIMESTAMP);
723 
724 #if NPF > 0
725 	if (m->m_pkthdr.pf.delay > 0)
726 		return (pf_delay_pkt(m, ifp->if_index));
727 #endif
728 
729 #if NBRIDGE > 0
730 	if (ifp->if_bridgeidx && (m->m_flags & M_PROTO1) == 0) {
731 		int error;
732 
733 		error = bridge_enqueue(ifp, m);
734 		return (error);
735 	}
736 #endif
737 
738 #if NPF > 0
739 	pf_pkt_addr_changed(m);
740 #endif	/* NPF > 0 */
741 
742 	return ((*ifp->if_enqueue)(ifp, m));
743 }
744 
745 int
746 if_enqueue_ifq(struct ifnet *ifp, struct mbuf *m)
747 {
748 	struct ifqueue *ifq = &ifp->if_snd;
749 	int error;
750 
751 	if (ifp->if_nifqs > 1) {
752 		unsigned int idx;
753 
754 		/*
755 		 * use the operations on the first ifq to pick which of
756 		 * the array gets this mbuf.
757 		 */
758 
759 		idx = ifq_idx(&ifp->if_snd, ifp->if_nifqs, m);
760 		ifq = ifp->if_ifqs[idx];
761 	}
762 
763 	error = ifq_enqueue(ifq, m);
764 	if (error)
765 		return (error);
766 
767 	ifq_start(ifq);
768 
769 	return (0);
770 }
771 
772 void
773 if_input(struct ifnet *ifp, struct mbuf_list *ml)
774 {
775 	ifiq_input(&ifp->if_rcv, ml);
776 }
777 
778 int
779 if_input_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
780 {
781 	int keepflags, keepcksum;
782 
783 #if NBPFILTER > 0
784 	/*
785 	 * Only send packets to bpf if they are destined to local
786 	 * addresses.
787 	 *
788 	 * if_input_local() is also called for SIMPLEX interfaces to
789 	 * duplicate packets for local use.  But don't dup them to bpf.
790 	 */
791 	if (ifp->if_flags & IFF_LOOPBACK) {
792 		caddr_t if_bpf = ifp->if_bpf;
793 
794 		if (if_bpf)
795 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
796 	}
797 #endif
798 	keepflags = m->m_flags & (M_BCAST|M_MCAST);
799 	/*
800 	 * Preserve outgoing checksum flags, in case the packet is
801 	 * forwarded to another interface.  Then the checksum, which
802 	 * is now incorrect, will be calculated before sending.
803 	 */
804 	keepcksum = m->m_pkthdr.csum_flags & (M_IPV4_CSUM_OUT |
805 	    M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT);
806 	m_resethdr(m);
807 	m->m_flags |= M_LOOP | keepflags;
808 	m->m_pkthdr.csum_flags = keepcksum;
809 	m->m_pkthdr.ph_ifidx = ifp->if_index;
810 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
811 
812 	if (ISSET(keepcksum, M_TCP_CSUM_OUT))
813 		m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK;
814 	if (ISSET(keepcksum, M_UDP_CSUM_OUT))
815 		m->m_pkthdr.csum_flags |= M_UDP_CSUM_IN_OK;
816 	if (ISSET(keepcksum, M_ICMP_CSUM_OUT))
817 		m->m_pkthdr.csum_flags |= M_ICMP_CSUM_IN_OK;
818 
819 	ifp->if_opackets++;
820 	ifp->if_obytes += m->m_pkthdr.len;
821 
822 	ifp->if_ipackets++;
823 	ifp->if_ibytes += m->m_pkthdr.len;
824 
825 	switch (af) {
826 	case AF_INET:
827 		if (ISSET(keepcksum, M_IPV4_CSUM_OUT))
828 			m->m_pkthdr.csum_flags |= M_IPV4_CSUM_IN_OK;
829 		ipv4_input(ifp, m);
830 		break;
831 #ifdef INET6
832 	case AF_INET6:
833 		ipv6_input(ifp, m);
834 		break;
835 #endif /* INET6 */
836 #ifdef MPLS
837 	case AF_MPLS:
838 		mpls_input(ifp, m);
839 		break;
840 #endif /* MPLS */
841 	default:
842 		printf("%s: can't handle af%d\n", ifp->if_xname, af);
843 		m_freem(m);
844 		return (EAFNOSUPPORT);
845 	}
846 
847 	return (0);
848 }
849 
850 int
851 if_output_ml(struct ifnet *ifp, struct mbuf_list *ml,
852     struct sockaddr *dst, struct rtentry *rt)
853 {
854 	struct mbuf *m;
855 	int error = 0;
856 
857 	while ((m = ml_dequeue(ml)) != NULL) {
858 		error = ifp->if_output(ifp, m, dst, rt);
859 		if (error)
860 			break;
861 	}
862 	if (error)
863 		ml_purge(ml);
864 
865 	return error;
866 }
867 
868 int
869 if_output_mq(struct ifnet *ifp, struct mbuf_queue *mq, unsigned int *total,
870     struct sockaddr *dst, struct rtentry *rt)
871 {
872 	struct mbuf_list ml;
873 	unsigned int len;
874 	int error;
875 
876 	mq_delist(mq, &ml);
877 	len = ml_len(&ml);
878 	error = if_output_ml(ifp, &ml, dst, rt);
879 
880 	/* XXXSMP we also discard if other CPU enqueues */
881 	if (mq_len(mq) > 0) {
882 		/* mbuf is back in queue. Discard. */
883 		atomic_sub_int(total, len + mq_purge(mq));
884 	} else
885 		atomic_sub_int(total, len);
886 
887 	return error;
888 }
889 
890 int
891 if_output_local(struct ifnet *ifp, struct mbuf *m, sa_family_t af)
892 {
893 	struct ifiqueue *ifiq;
894 	unsigned int flow = 0;
895 
896 	m->m_pkthdr.ph_family = af;
897 	m->m_pkthdr.ph_ifidx = ifp->if_index;
898 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
899 
900 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
901 		flow = m->m_pkthdr.ph_flowid;
902 
903 	ifiq = ifp->if_iqs[flow % ifp->if_niqs];
904 
905 	return (ifiq_enqueue(ifiq, m) == 0 ? 0 : ENOBUFS);
906 }
907 
908 void
909 if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
910 {
911 	struct mbuf *m;
912 
913 	if (ml_empty(ml))
914 		return;
915 
916 	if (!ISSET(ifp->if_xflags, IFXF_CLONED))
917 		enqueue_randomness(ml_len(ml) ^ (uintptr_t)MBUF_LIST_FIRST(ml));
918 
919 	/*
920 	 * We grab the shared netlock for packet processing in the softnet
921 	 * threads.  Packets can regrab the exclusive lock via queues.
922 	 * ioctl, sysctl, and socket syscall may use shared lock if access is
923 	 * read only or MP safe.  Usually they hold the exclusive net lock.
924 	 */
925 
926 	NET_LOCK_SHARED();
927 	while ((m = ml_dequeue(ml)) != NULL)
928 		(*ifp->if_input)(ifp, m);
929 	NET_UNLOCK_SHARED();
930 }
931 
932 void
933 if_vinput(struct ifnet *ifp, struct mbuf *m)
934 {
935 #if NBPFILTER > 0
936 	caddr_t if_bpf;
937 #endif
938 
939 	m->m_pkthdr.ph_ifidx = ifp->if_index;
940 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
941 
942 	counters_pkt(ifp->if_counters,
943 	    ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
944 
945 #if NPF > 0
946 	pf_pkt_addr_changed(m);
947 #endif
948 
949 #if NBPFILTER > 0
950 	if_bpf = ifp->if_bpf;
951 	if (if_bpf) {
952 		if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) {
953 			m_freem(m);
954 			return;
955 		}
956 	}
957 #endif
958 
959 	if (__predict_true(!ISSET(ifp->if_xflags, IFXF_MONITOR)))
960 		(*ifp->if_input)(ifp, m);
961 	else
962 		m_freem(m);
963 }
964 
965 void
966 if_netisr(void *unused)
967 {
968 	int n, t = 0;
969 
970 	NET_LOCK();
971 
972 	while ((n = netisr) != 0) {
973 		/* Like sched_pause() but with a rwlock dance. */
974 		if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD) {
975 			NET_UNLOCK();
976 			yield();
977 			NET_LOCK();
978 		}
979 
980 		atomic_clearbits_int(&netisr, n);
981 
982 #if NETHER > 0
983 		if (n & (1 << NETISR_ARP))
984 			arpintr();
985 #endif
986 		if (n & (1 << NETISR_IP))
987 			ipintr();
988 #ifdef INET6
989 		if (n & (1 << NETISR_IPV6))
990 			ip6intr();
991 #endif
992 #if NPPP > 0
993 		if (n & (1 << NETISR_PPP)) {
994 			KERNEL_LOCK();
995 			pppintr();
996 			KERNEL_UNLOCK();
997 		}
998 #endif
999 #if NBRIDGE > 0
1000 		if (n & (1 << NETISR_BRIDGE))
1001 			bridgeintr();
1002 #endif
1003 #ifdef PIPEX
1004 		if (n & (1 << NETISR_PIPEX))
1005 			pipexintr();
1006 #endif
1007 #if NPPPOE > 0
1008 		if (n & (1 << NETISR_PPPOE)) {
1009 			KERNEL_LOCK();
1010 			pppoeintr();
1011 			KERNEL_UNLOCK();
1012 		}
1013 #endif
1014 		t |= n;
1015 	}
1016 
1017 #if NPFSYNC > 0
1018 	if (t & (1 << NETISR_PFSYNC)) {
1019 		KERNEL_LOCK();
1020 		pfsyncintr();
1021 		KERNEL_UNLOCK();
1022 	}
1023 #endif
1024 
1025 	NET_UNLOCK();
1026 }
1027 
1028 void
1029 if_hooks_run(struct task_list *hooks)
1030 {
1031 	struct task *t, *nt;
1032 	struct task cursor = { .t_func = NULL };
1033 	void (*func)(void *);
1034 	void *arg;
1035 
1036 	mtx_enter(&if_hooks_mtx);
1037 	for (t = TAILQ_FIRST(hooks); t != NULL; t = nt) {
1038 		if (t->t_func == NULL) { /* skip cursors */
1039 			nt = TAILQ_NEXT(t, t_entry);
1040 			continue;
1041 		}
1042 		func = t->t_func;
1043 		arg = t->t_arg;
1044 
1045 		TAILQ_INSERT_AFTER(hooks, t, &cursor, t_entry);
1046 		mtx_leave(&if_hooks_mtx);
1047 
1048 		(*func)(arg);
1049 
1050 		mtx_enter(&if_hooks_mtx);
1051 		nt = TAILQ_NEXT(&cursor, t_entry); /* avoid _Q_INVALIDATE */
1052 		TAILQ_REMOVE(hooks, &cursor, t_entry);
1053 	}
1054 	mtx_leave(&if_hooks_mtx);
1055 }
1056 
1057 void
1058 if_remove(struct ifnet *ifp)
1059 {
1060 	/* Remove the interface from the list of all interfaces. */
1061 	NET_LOCK();
1062 	TAILQ_REMOVE(&ifnetlist, ifp, if_list);
1063 	NET_UNLOCK();
1064 
1065 	/* Remove the interface from the interface index map. */
1066 	if_idxmap_remove(ifp);
1067 
1068 	/* Sleep until the last reference is released. */
1069 	refcnt_finalize(&ifp->if_refcnt, "ifrm");
1070 }
1071 
1072 void
1073 if_deactivate(struct ifnet *ifp)
1074 {
1075 	/*
1076 	 * Call detach hooks from head to tail.  To make sure detach
1077 	 * hooks are executed in the reverse order they were added, all
1078 	 * the hooks have to be added to the head!
1079 	 */
1080 
1081 	NET_LOCK();
1082 	if_hooks_run(&ifp->if_detachhooks);
1083 	NET_UNLOCK();
1084 }
1085 
1086 void
1087 if_detachhook_add(struct ifnet *ifp, struct task *t)
1088 {
1089 	mtx_enter(&if_hooks_mtx);
1090 	TAILQ_INSERT_HEAD(&ifp->if_detachhooks, t, t_entry);
1091 	mtx_leave(&if_hooks_mtx);
1092 }
1093 
1094 void
1095 if_detachhook_del(struct ifnet *ifp, struct task *t)
1096 {
1097 	mtx_enter(&if_hooks_mtx);
1098 	TAILQ_REMOVE(&ifp->if_detachhooks, t, t_entry);
1099 	mtx_leave(&if_hooks_mtx);
1100 }
1101 
1102 /*
1103  * Detach an interface from everything in the kernel.  Also deallocate
1104  * private resources.
1105  */
1106 void
1107 if_detach(struct ifnet *ifp)
1108 {
1109 	struct ifaddr *ifa;
1110 	struct ifg_list *ifg;
1111 	int i, s;
1112 
1113 	/* Undo pseudo-driver changes. */
1114 	if_deactivate(ifp);
1115 
1116 	/* Other CPUs must not have a reference before we start destroying. */
1117 	if_remove(ifp);
1118 
1119 	ifp->if_qstart = if_detached_qstart;
1120 
1121 	/* Wait until the start routines finished. */
1122 	ifq_barrier(&ifp->if_snd);
1123 	ifq_clr_oactive(&ifp->if_snd);
1124 
1125 #if NBPFILTER > 0
1126 	bpfdetach(ifp);
1127 #endif
1128 
1129 	NET_LOCK();
1130 	s = splnet();
1131 	ifp->if_ioctl = if_detached_ioctl;
1132 	ifp->if_watchdog = NULL;
1133 
1134 	/* Remove the watchdog timeout & task */
1135 	timeout_del(&ifp->if_slowtimo);
1136 	task_del(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1137 
1138 	/* Remove the link state task */
1139 	task_del(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1140 
1141 	rti_delete(ifp);
1142 #if NETHER > 0 && defined(NFSCLIENT)
1143 	if (ifp->if_index == revarp_ifidx)
1144 		revarp_ifidx = 0;
1145 #endif
1146 #ifdef MROUTING
1147 	vif_delete(ifp);
1148 #endif
1149 	in_ifdetach(ifp);
1150 #ifdef INET6
1151 	in6_ifdetach(ifp);
1152 #endif
1153 #if NPF > 0
1154 	pfi_detach_ifnet(ifp);
1155 #endif
1156 
1157 	while ((ifg = TAILQ_FIRST(&ifp->if_groups)) != NULL)
1158 		if_delgroup(ifp, ifg->ifgl_group->ifg_group);
1159 
1160 	if_free_sadl(ifp);
1161 
1162 	/* We should not have any address left at this point. */
1163 	if (!TAILQ_EMPTY(&ifp->if_addrlist)) {
1164 #ifdef DIAGNOSTIC
1165 		printf("%s: address list non empty\n", ifp->if_xname);
1166 #endif
1167 		while ((ifa = TAILQ_FIRST(&ifp->if_addrlist)) != NULL) {
1168 			ifa_del(ifp, ifa);
1169 			ifa->ifa_ifp = NULL;
1170 			ifafree(ifa);
1171 		}
1172 	}
1173 
1174 	KASSERT(TAILQ_EMPTY(&ifp->if_addrhooks));
1175 	KASSERT(TAILQ_EMPTY(&ifp->if_linkstatehooks));
1176 	KASSERT(TAILQ_EMPTY(&ifp->if_detachhooks));
1177 
1178 #ifdef INET6
1179 	nd6_ifdetach(ifp);
1180 #endif
1181 	splx(s);
1182 	NET_UNLOCK();
1183 
1184 	/* Announce that the interface is gone. */
1185 	rtm_ifannounce(ifp, IFAN_DEPARTURE);
1186 
1187 	if (ifp->if_counters != NULL)
1188 		if_counters_free(ifp);
1189 
1190 	for (i = 0; i < ifp->if_nifqs; i++)
1191 		ifq_destroy(ifp->if_ifqs[i]);
1192 	if (ifp->if_ifqs != ifp->if_snd.ifq_ifqs) {
1193 		for (i = 1; i < ifp->if_nifqs; i++) {
1194 			free(ifp->if_ifqs[i], M_DEVBUF,
1195 			    sizeof(struct ifqueue));
1196 		}
1197 		free(ifp->if_ifqs, M_DEVBUF,
1198 		    sizeof(struct ifqueue *) * ifp->if_nifqs);
1199 	}
1200 
1201 	for (i = 0; i < ifp->if_niqs; i++)
1202 		ifiq_destroy(ifp->if_iqs[i]);
1203 	if (ifp->if_iqs != ifp->if_rcv.ifiq_ifiqs) {
1204 		for (i = 1; i < ifp->if_niqs; i++) {
1205 			free(ifp->if_iqs[i], M_DEVBUF,
1206 			    sizeof(struct ifiqueue));
1207 		}
1208 		free(ifp->if_iqs, M_DEVBUF,
1209 		    sizeof(struct ifiqueue *) * ifp->if_niqs);
1210 	}
1211 }
1212 
1213 /*
1214  * Returns true if ``ifp0'' is connected to the interface with index ``ifidx''.
1215  */
1216 int
1217 if_isconnected(const struct ifnet *ifp0, unsigned int ifidx)
1218 {
1219 	struct ifnet *ifp;
1220 	int connected = 0;
1221 
1222 	ifp = if_get(ifidx);
1223 	if (ifp == NULL)
1224 		return (0);
1225 
1226 	if (ifp0->if_index == ifp->if_index)
1227 		connected = 1;
1228 
1229 #if NBRIDGE > 0
1230 	if (ifp0->if_bridgeidx != 0 && ifp0->if_bridgeidx == ifp->if_bridgeidx)
1231 		connected = 1;
1232 #endif
1233 #if NCARP > 0
1234 	if ((ifp0->if_type == IFT_CARP &&
1235 	    ifp0->if_carpdevidx == ifp->if_index) ||
1236 	    (ifp->if_type == IFT_CARP && ifp->if_carpdevidx == ifp0->if_index))
1237 		connected = 1;
1238 #endif
1239 
1240 	if_put(ifp);
1241 	return (connected);
1242 }
1243 
1244 /*
1245  * Create a clone network interface.
1246  */
1247 int
1248 if_clone_create(const char *name, int rdomain)
1249 {
1250 	struct if_clone *ifc;
1251 	struct ifnet *ifp;
1252 	int unit, ret;
1253 
1254 	ifc = if_clone_lookup(name, &unit);
1255 	if (ifc == NULL)
1256 		return (EINVAL);
1257 
1258 	rw_enter_write(&if_cloners_lock);
1259 
1260 	if ((ifp = if_unit(name)) != NULL) {
1261 		ret = EEXIST;
1262 		goto unlock;
1263 	}
1264 
1265 	ret = (*ifc->ifc_create)(ifc, unit);
1266 
1267 	if (ret != 0 || (ifp = if_unit(name)) == NULL)
1268 		goto unlock;
1269 
1270 	NET_LOCK();
1271 	if_addgroup(ifp, ifc->ifc_name);
1272 	if (rdomain != 0)
1273 		if_setrdomain(ifp, rdomain);
1274 	NET_UNLOCK();
1275 unlock:
1276 	rw_exit_write(&if_cloners_lock);
1277 	if_put(ifp);
1278 
1279 	return (ret);
1280 }
1281 
1282 /*
1283  * Destroy a clone network interface.
1284  */
1285 int
1286 if_clone_destroy(const char *name)
1287 {
1288 	struct if_clone *ifc;
1289 	struct ifnet *ifp;
1290 	int ret;
1291 
1292 	ifc = if_clone_lookup(name, NULL);
1293 	if (ifc == NULL)
1294 		return (EINVAL);
1295 
1296 	if (ifc->ifc_destroy == NULL)
1297 		return (EOPNOTSUPP);
1298 
1299 	rw_enter_write(&if_cloners_lock);
1300 
1301 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1302 		if (strcmp(ifp->if_xname, name) == 0)
1303 			break;
1304 	}
1305 	if (ifp == NULL) {
1306 		rw_exit_write(&if_cloners_lock);
1307 		return (ENXIO);
1308 	}
1309 
1310 	NET_LOCK();
1311 	if (ifp->if_flags & IFF_UP) {
1312 		int s;
1313 		s = splnet();
1314 		if_down(ifp);
1315 		splx(s);
1316 	}
1317 	NET_UNLOCK();
1318 	ret = (*ifc->ifc_destroy)(ifp);
1319 
1320 	rw_exit_write(&if_cloners_lock);
1321 
1322 	return (ret);
1323 }
1324 
1325 /*
1326  * Look up a network interface cloner.
1327  */
1328 struct if_clone *
1329 if_clone_lookup(const char *name, int *unitp)
1330 {
1331 	struct if_clone *ifc;
1332 	const char *cp;
1333 	int unit;
1334 
1335 	/* separate interface name from unit */
1336 	for (cp = name;
1337 	    cp - name < IFNAMSIZ && *cp && (*cp < '0' || *cp > '9');
1338 	    cp++)
1339 		continue;
1340 
1341 	if (cp == name || cp - name == IFNAMSIZ || !*cp)
1342 		return (NULL);	/* No name or unit number */
1343 
1344 	if (cp - name < IFNAMSIZ-1 && *cp == '0' && cp[1] != '\0')
1345 		return (NULL);	/* unit number 0 padded */
1346 
1347 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1348 		if (strlen(ifc->ifc_name) == cp - name &&
1349 		    !strncmp(name, ifc->ifc_name, cp - name))
1350 			break;
1351 	}
1352 
1353 	if (ifc == NULL)
1354 		return (NULL);
1355 
1356 	unit = 0;
1357 	while (cp - name < IFNAMSIZ && *cp) {
1358 		if (*cp < '0' || *cp > '9' ||
1359 		    unit > (INT_MAX - (*cp - '0')) / 10) {
1360 			/* Bogus unit number. */
1361 			return (NULL);
1362 		}
1363 		unit = (unit * 10) + (*cp++ - '0');
1364 	}
1365 
1366 	if (unitp != NULL)
1367 		*unitp = unit;
1368 	return (ifc);
1369 }
1370 
1371 /*
1372  * Register a network interface cloner.
1373  */
1374 void
1375 if_clone_attach(struct if_clone *ifc)
1376 {
1377 	/*
1378 	 * we are called at kernel boot by main(), when pseudo devices are
1379 	 * being attached. The main() is the only guy which may alter the
1380 	 * if_cloners. While system is running and main() is done with
1381 	 * initialization, the if_cloners becomes immutable.
1382 	 */
1383 	KASSERT(pdevinit_done == 0);
1384 	LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
1385 	if_cloners_count++;
1386 }
1387 
1388 /*
1389  * Provide list of interface cloners to userspace.
1390  */
1391 int
1392 if_clone_list(struct if_clonereq *ifcr)
1393 {
1394 	char outbuf[IFNAMSIZ], *dst;
1395 	struct if_clone *ifc;
1396 	int count, error = 0;
1397 
1398 	if ((dst = ifcr->ifcr_buffer) == NULL) {
1399 		/* Just asking how many there are. */
1400 		ifcr->ifcr_total = if_cloners_count;
1401 		return (0);
1402 	}
1403 
1404 	if (ifcr->ifcr_count < 0)
1405 		return (EINVAL);
1406 
1407 	ifcr->ifcr_total = if_cloners_count;
1408 	count = MIN(if_cloners_count, ifcr->ifcr_count);
1409 
1410 	LIST_FOREACH(ifc, &if_cloners, ifc_list) {
1411 		if (count == 0)
1412 			break;
1413 		bzero(outbuf, sizeof outbuf);
1414 		strlcpy(outbuf, ifc->ifc_name, IFNAMSIZ);
1415 		error = copyout(outbuf, dst, IFNAMSIZ);
1416 		if (error)
1417 			break;
1418 		count--;
1419 		dst += IFNAMSIZ;
1420 	}
1421 
1422 	return (error);
1423 }
1424 
1425 /*
1426  * set queue congestion marker
1427  */
1428 void
1429 if_congestion(void)
1430 {
1431 	extern int ticks;
1432 
1433 	ifq_congestion = ticks;
1434 }
1435 
1436 int
1437 if_congested(void)
1438 {
1439 	extern int ticks;
1440 	int diff;
1441 
1442 	diff = ticks - ifq_congestion;
1443 	if (diff < 0) {
1444 		ifq_congestion = ticks - hz;
1445 		return (0);
1446 	}
1447 
1448 	return (diff <= (hz / 100));
1449 }
1450 
1451 #define	equal(a1, a2)	\
1452 	(bcmp((caddr_t)(a1), (caddr_t)(a2),	\
1453 	(a1)->sa_len) == 0)
1454 
1455 /*
1456  * Locate an interface based on a complete address.
1457  */
1458 struct ifaddr *
1459 ifa_ifwithaddr(struct sockaddr *addr, u_int rtableid)
1460 {
1461 	struct ifnet *ifp;
1462 	struct ifaddr *ifa;
1463 	u_int rdomain;
1464 
1465 	NET_ASSERT_LOCKED();
1466 
1467 	rdomain = rtable_l2(rtableid);
1468 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1469 		if (ifp->if_rdomain != rdomain)
1470 			continue;
1471 
1472 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1473 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1474 				continue;
1475 
1476 			if (equal(addr, ifa->ifa_addr)) {
1477 				return (ifa);
1478 			}
1479 		}
1480 	}
1481 	return (NULL);
1482 }
1483 
1484 /*
1485  * Locate the point to point interface with a given destination address.
1486  */
1487 struct ifaddr *
1488 ifa_ifwithdstaddr(struct sockaddr *addr, u_int rdomain)
1489 {
1490 	struct ifnet *ifp;
1491 	struct ifaddr *ifa;
1492 
1493 	NET_ASSERT_LOCKED();
1494 
1495 	rdomain = rtable_l2(rdomain);
1496 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1497 		if (ifp->if_rdomain != rdomain)
1498 			continue;
1499 		if (ifp->if_flags & IFF_POINTOPOINT) {
1500 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1501 				if (ifa->ifa_addr->sa_family !=
1502 				    addr->sa_family || ifa->ifa_dstaddr == NULL)
1503 					continue;
1504 				if (equal(addr, ifa->ifa_dstaddr)) {
1505 					return (ifa);
1506 				}
1507 			}
1508 		}
1509 	}
1510 	return (NULL);
1511 }
1512 
1513 /*
1514  * Find an interface address specific to an interface best matching
1515  * a given address.
1516  */
1517 struct ifaddr *
1518 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1519 {
1520 	struct ifaddr *ifa;
1521 	char *cp, *cp2, *cp3;
1522 	char *cplim;
1523 	struct ifaddr *ifa_maybe = NULL;
1524 	u_int af = addr->sa_family;
1525 
1526 	if (af >= AF_MAX)
1527 		return (NULL);
1528 	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1529 		if (ifa->ifa_addr->sa_family != af)
1530 			continue;
1531 		if (ifa_maybe == NULL)
1532 			ifa_maybe = ifa;
1533 		if (ifa->ifa_netmask == 0 || ifp->if_flags & IFF_POINTOPOINT) {
1534 			if (equal(addr, ifa->ifa_addr) ||
1535 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
1536 				return (ifa);
1537 			continue;
1538 		}
1539 		cp = addr->sa_data;
1540 		cp2 = ifa->ifa_addr->sa_data;
1541 		cp3 = ifa->ifa_netmask->sa_data;
1542 		cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1543 		for (; cp3 < cplim; cp3++)
1544 			if ((*cp++ ^ *cp2++) & *cp3)
1545 				break;
1546 		if (cp3 == cplim)
1547 			return (ifa);
1548 	}
1549 	return (ifa_maybe);
1550 }
1551 
1552 void
1553 if_rtrequest_dummy(struct ifnet *ifp, int req, struct rtentry *rt)
1554 {
1555 }
1556 
1557 /*
1558  * Default action when installing a local route on a point-to-point
1559  * interface.
1560  */
1561 void
1562 p2p_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1563 {
1564 	struct ifnet *lo0ifp;
1565 	struct ifaddr *ifa, *lo0ifa;
1566 
1567 	switch (req) {
1568 	case RTM_ADD:
1569 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1570 			break;
1571 
1572 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1573 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1574 			    rt_key(rt)->sa_len) == 0)
1575 				break;
1576 		}
1577 
1578 		if (ifa == NULL)
1579 			break;
1580 
1581 		KASSERT(ifa == rt->rt_ifa);
1582 
1583 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1584 		KASSERT(lo0ifp != NULL);
1585 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1586 			if (lo0ifa->ifa_addr->sa_family ==
1587 			    ifa->ifa_addr->sa_family)
1588 				break;
1589 		}
1590 		if_put(lo0ifp);
1591 
1592 		if (lo0ifa == NULL)
1593 			break;
1594 
1595 		rt->rt_flags &= ~RTF_LLINFO;
1596 		break;
1597 	case RTM_DELETE:
1598 	case RTM_RESOLVE:
1599 	default:
1600 		break;
1601 	}
1602 }
1603 
1604 int
1605 p2p_bpf_mtap(caddr_t if_bpf, const struct mbuf *m, u_int dir)
1606 {
1607 #if NBPFILTER > 0
1608 	return (bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, m, dir));
1609 #else
1610 	return (0);
1611 #endif
1612 }
1613 
1614 void
1615 p2p_input(struct ifnet *ifp, struct mbuf *m)
1616 {
1617 	void (*input)(struct ifnet *, struct mbuf *);
1618 
1619 	switch (m->m_pkthdr.ph_family) {
1620 	case AF_INET:
1621 		input = ipv4_input;
1622 		break;
1623 #ifdef INET6
1624 	case AF_INET6:
1625 		input = ipv6_input;
1626 		break;
1627 #endif
1628 #ifdef MPLS
1629 	case AF_MPLS:
1630 		input = mpls_input;
1631 		break;
1632 #endif
1633 	default:
1634 		m_freem(m);
1635 		return;
1636 	}
1637 
1638 	(*input)(ifp, m);
1639 }
1640 
1641 /*
1642  * Bring down all interfaces
1643  */
1644 void
1645 if_downall(void)
1646 {
1647 	struct ifreq ifrq;	/* XXX only partly built */
1648 	struct ifnet *ifp;
1649 
1650 	NET_LOCK();
1651 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1652 		if ((ifp->if_flags & IFF_UP) == 0)
1653 			continue;
1654 		if_down(ifp);
1655 		ifrq.ifr_flags = ifp->if_flags;
1656 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
1657 	}
1658 	NET_UNLOCK();
1659 }
1660 
1661 /*
1662  * Mark an interface down and notify protocols of
1663  * the transition.
1664  */
1665 void
1666 if_down(struct ifnet *ifp)
1667 {
1668 	NET_ASSERT_LOCKED();
1669 
1670 	ifp->if_flags &= ~IFF_UP;
1671 	getmicrotime(&ifp->if_lastchange);
1672 	ifq_purge(&ifp->if_snd);
1673 
1674 	if_linkstate(ifp);
1675 }
1676 
1677 /*
1678  * Mark an interface up and notify protocols of
1679  * the transition.
1680  */
1681 void
1682 if_up(struct ifnet *ifp)
1683 {
1684 	NET_ASSERT_LOCKED();
1685 
1686 	ifp->if_flags |= IFF_UP;
1687 	getmicrotime(&ifp->if_lastchange);
1688 
1689 #ifdef INET6
1690 	/* Userland expects the kernel to set ::1 on default lo(4). */
1691 	if (ifp->if_index == rtable_loindex(ifp->if_rdomain))
1692 		in6_ifattach(ifp);
1693 #endif
1694 
1695 	if_linkstate(ifp);
1696 }
1697 
1698 /*
1699  * Notify userland, the routing table and hooks owner of
1700  * a link-state transition.
1701  */
1702 void
1703 if_linkstate_task(void *xifidx)
1704 {
1705 	unsigned int ifidx = (unsigned long)xifidx;
1706 	struct ifnet *ifp;
1707 
1708 	KERNEL_LOCK();
1709 	NET_LOCK();
1710 
1711 	ifp = if_get(ifidx);
1712 	if (ifp != NULL)
1713 		if_linkstate(ifp);
1714 	if_put(ifp);
1715 
1716 	NET_UNLOCK();
1717 	KERNEL_UNLOCK();
1718 }
1719 
1720 void
1721 if_linkstate(struct ifnet *ifp)
1722 {
1723 	NET_ASSERT_LOCKED();
1724 
1725 	rtm_ifchg(ifp);
1726 	rt_if_track(ifp);
1727 
1728 	if_hooks_run(&ifp->if_linkstatehooks);
1729 }
1730 
1731 void
1732 if_linkstatehook_add(struct ifnet *ifp, struct task *t)
1733 {
1734 	mtx_enter(&if_hooks_mtx);
1735 	TAILQ_INSERT_HEAD(&ifp->if_linkstatehooks, t, t_entry);
1736 	mtx_leave(&if_hooks_mtx);
1737 }
1738 
1739 void
1740 if_linkstatehook_del(struct ifnet *ifp, struct task *t)
1741 {
1742 	mtx_enter(&if_hooks_mtx);
1743 	TAILQ_REMOVE(&ifp->if_linkstatehooks, t, t_entry);
1744 	mtx_leave(&if_hooks_mtx);
1745 }
1746 
1747 /*
1748  * Schedule a link state change task.
1749  */
1750 void
1751 if_link_state_change(struct ifnet *ifp)
1752 {
1753 	task_add(net_tq(ifp->if_index), &ifp->if_linkstatetask);
1754 }
1755 
1756 /*
1757  * Handle interface watchdog timer routine.  Called
1758  * from softclock, we decrement timer (if set) and
1759  * call the appropriate interface routine on expiration.
1760  */
1761 void
1762 if_slowtimo(void *arg)
1763 {
1764 	struct ifnet *ifp = arg;
1765 	int s = splnet();
1766 
1767 	if (ifp->if_watchdog) {
1768 		if (ifp->if_timer > 0 && --ifp->if_timer == 0)
1769 			task_add(net_tq(ifp->if_index), &ifp->if_watchdogtask);
1770 		timeout_add_sec(&ifp->if_slowtimo, IFNET_SLOWTIMO);
1771 	}
1772 	splx(s);
1773 }
1774 
1775 void
1776 if_watchdog_task(void *xifidx)
1777 {
1778 	unsigned int ifidx = (unsigned long)xifidx;
1779 	struct ifnet *ifp;
1780 	int s;
1781 
1782 	ifp = if_get(ifidx);
1783 	if (ifp == NULL)
1784 		return;
1785 
1786 	KERNEL_LOCK();
1787 	s = splnet();
1788 	if (ifp->if_watchdog)
1789 		(*ifp->if_watchdog)(ifp);
1790 	splx(s);
1791 	KERNEL_UNLOCK();
1792 
1793 	if_put(ifp);
1794 }
1795 
1796 /*
1797  * Map interface name to interface structure pointer.
1798  */
1799 struct ifnet *
1800 if_unit(const char *name)
1801 {
1802 	struct ifnet *ifp;
1803 
1804 	KERNEL_ASSERT_LOCKED();
1805 
1806 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
1807 		if (strcmp(ifp->if_xname, name) == 0) {
1808 			if_ref(ifp);
1809 			return (ifp);
1810 		}
1811 	}
1812 
1813 	return (NULL);
1814 }
1815 
1816 /*
1817  * Map interface index to interface structure pointer.
1818  */
1819 struct ifnet *
1820 if_get(unsigned int index)
1821 {
1822 	struct ifnet **if_map;
1823 	struct ifnet *ifp = NULL;
1824 
1825 	if (index == 0)
1826 		return (NULL);
1827 
1828 	smr_read_enter();
1829 	if_map = SMR_PTR_GET(&if_idxmap.map);
1830 	if (index < if_idxmap_limit(if_map)) {
1831 		ifp = SMR_PTR_GET(&if_map[index]);
1832 		if (ifp != NULL) {
1833 			KASSERT(ifp->if_index == index);
1834 			if_ref(ifp);
1835 		}
1836 	}
1837 	smr_read_leave();
1838 
1839 	return (ifp);
1840 }
1841 
1842 struct ifnet *
1843 if_ref(struct ifnet *ifp)
1844 {
1845 	refcnt_take(&ifp->if_refcnt);
1846 
1847 	return (ifp);
1848 }
1849 
1850 void
1851 if_put(struct ifnet *ifp)
1852 {
1853 	if (ifp == NULL)
1854 		return;
1855 
1856 	refcnt_rele_wake(&ifp->if_refcnt);
1857 }
1858 
1859 int
1860 if_setlladdr(struct ifnet *ifp, const uint8_t *lladdr)
1861 {
1862 	if (ifp->if_sadl == NULL)
1863 		return (EINVAL);
1864 
1865 	memcpy(((struct arpcom *)ifp)->ac_enaddr, lladdr, ETHER_ADDR_LEN);
1866 	memcpy(LLADDR(ifp->if_sadl), lladdr, ETHER_ADDR_LEN);
1867 
1868 	return (0);
1869 }
1870 
1871 int
1872 if_createrdomain(int rdomain, struct ifnet *ifp)
1873 {
1874 	int error;
1875 	struct ifnet *loifp;
1876 	char loifname[IFNAMSIZ];
1877 	unsigned int unit = rdomain;
1878 
1879 	if ((error = rtable_add(rdomain)) != 0)
1880 		return (error);
1881 	if (!rtable_empty(rdomain))
1882 		return (EEXIST);
1883 
1884 	/* Create rdomain including its loopback if with unit == rdomain */
1885 	snprintf(loifname, sizeof(loifname), "lo%u", unit);
1886 	error = if_clone_create(loifname, 0);
1887 	if ((loifp = if_unit(loifname)) == NULL)
1888 		return (ENXIO);
1889 	if (error && (ifp != loifp || error != EEXIST)) {
1890 		if_put(loifp);
1891 		return (error);
1892 	}
1893 
1894 	rtable_l2set(rdomain, rdomain, loifp->if_index);
1895 	loifp->if_rdomain = rdomain;
1896 	if_put(loifp);
1897 
1898 	return (0);
1899 }
1900 
1901 int
1902 if_setrdomain(struct ifnet *ifp, int rdomain)
1903 {
1904 	struct ifreq ifr;
1905 	int error, up = 0, s;
1906 
1907 	if (rdomain < 0 || rdomain > RT_TABLEID_MAX)
1908 		return (EINVAL);
1909 
1910 	if (rdomain != ifp->if_rdomain &&
1911 	    (ifp->if_flags & IFF_LOOPBACK) &&
1912 	    (ifp->if_index == rtable_loindex(ifp->if_rdomain)))
1913 		return (EPERM);
1914 
1915 	if (!rtable_exists(rdomain))
1916 		return (ESRCH);
1917 
1918 	/* make sure that the routing table is a real rdomain */
1919 	if (rdomain != rtable_l2(rdomain))
1920 		return (EINVAL);
1921 
1922 	if (rdomain != ifp->if_rdomain) {
1923 		s = splnet();
1924 		/*
1925 		 * We are tearing down the world.
1926 		 * Take down the IF so:
1927 		 * 1. everything that cares gets a message
1928 		 * 2. the automagic IPv6 bits are recreated
1929 		 */
1930 		if (ifp->if_flags & IFF_UP) {
1931 			up = 1;
1932 			if_down(ifp);
1933 		}
1934 		rti_delete(ifp);
1935 #ifdef MROUTING
1936 		vif_delete(ifp);
1937 #endif
1938 		in_ifdetach(ifp);
1939 #ifdef INET6
1940 		in6_ifdetach(ifp);
1941 #endif
1942 		splx(s);
1943 	}
1944 
1945 	/* Let devices like enc(4) or mpe(4) know about the change */
1946 	ifr.ifr_rdomainid = rdomain;
1947 	if ((error = (*ifp->if_ioctl)(ifp, SIOCSIFRDOMAIN,
1948 	    (caddr_t)&ifr)) != ENOTTY)
1949 		return (error);
1950 	error = 0;
1951 
1952 	/* Add interface to the specified rdomain */
1953 	ifp->if_rdomain = rdomain;
1954 
1955 	/* If we took down the IF, bring it back */
1956 	if (up) {
1957 		s = splnet();
1958 		if_up(ifp);
1959 		splx(s);
1960 	}
1961 
1962 	return (0);
1963 }
1964 
1965 /*
1966  * Interface ioctls.
1967  */
1968 int
1969 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p)
1970 {
1971 	struct ifnet *ifp;
1972 	struct ifreq *ifr = (struct ifreq *)data;
1973 	struct ifgroupreq *ifgr = (struct ifgroupreq *)data;
1974 	struct if_afreq *ifar = (struct if_afreq *)data;
1975 	char ifdescrbuf[IFDESCRSIZE];
1976 	char ifrtlabelbuf[RTLABEL_LEN];
1977 	int s, error = 0, oif_xflags;
1978 	size_t bytesdone;
1979 	unsigned short oif_flags;
1980 
1981 	switch (cmd) {
1982 	case SIOCIFCREATE:
1983 		if ((error = suser(p)) != 0)
1984 			return (error);
1985 		KERNEL_LOCK();
1986 		error = if_clone_create(ifr->ifr_name, 0);
1987 		KERNEL_UNLOCK();
1988 		return (error);
1989 	case SIOCIFDESTROY:
1990 		if ((error = suser(p)) != 0)
1991 			return (error);
1992 		KERNEL_LOCK();
1993 		error = if_clone_destroy(ifr->ifr_name);
1994 		KERNEL_UNLOCK();
1995 		return (error);
1996 	case SIOCSIFGATTR:
1997 		if ((error = suser(p)) != 0)
1998 			return (error);
1999 		KERNEL_LOCK();
2000 		NET_LOCK();
2001 		error = if_setgroupattribs(data);
2002 		NET_UNLOCK();
2003 		KERNEL_UNLOCK();
2004 		return (error);
2005 	case SIOCGIFCONF:
2006 	case SIOCIFGCLONERS:
2007 	case SIOCGIFGMEMB:
2008 	case SIOCGIFGATTR:
2009 	case SIOCGIFGLIST:
2010 	case SIOCGIFFLAGS:
2011 	case SIOCGIFXFLAGS:
2012 	case SIOCGIFMETRIC:
2013 	case SIOCGIFMTU:
2014 	case SIOCGIFHARDMTU:
2015 	case SIOCGIFDATA:
2016 	case SIOCGIFDESCR:
2017 	case SIOCGIFRTLABEL:
2018 	case SIOCGIFPRIORITY:
2019 	case SIOCGIFRDOMAIN:
2020 	case SIOCGIFGROUP:
2021 	case SIOCGIFLLPRIO:
2022 		error = ifioctl_get(cmd, data);
2023 		return (error);
2024 	}
2025 
2026 	KERNEL_LOCK();
2027 
2028 	ifp = if_unit(ifr->ifr_name);
2029 	if (ifp == NULL) {
2030 		KERNEL_UNLOCK();
2031 		return (ENXIO);
2032 	}
2033 	oif_flags = ifp->if_flags;
2034 	oif_xflags = ifp->if_xflags;
2035 
2036 	switch (cmd) {
2037 	case SIOCIFAFATTACH:
2038 	case SIOCIFAFDETACH:
2039 		if ((error = suser(p)) != 0)
2040 			break;
2041 		NET_LOCK();
2042 		switch (ifar->ifar_af) {
2043 		case AF_INET:
2044 			/* attach is a noop for AF_INET */
2045 			if (cmd == SIOCIFAFDETACH)
2046 				in_ifdetach(ifp);
2047 			break;
2048 #ifdef INET6
2049 		case AF_INET6:
2050 			if (cmd == SIOCIFAFATTACH)
2051 				error = in6_ifattach(ifp);
2052 			else
2053 				in6_ifdetach(ifp);
2054 			break;
2055 #endif /* INET6 */
2056 		default:
2057 			error = EAFNOSUPPORT;
2058 		}
2059 		NET_UNLOCK();
2060 		break;
2061 
2062 	case SIOCSIFXFLAGS:
2063 		if ((error = suser(p)) != 0)
2064 			break;
2065 
2066 		NET_LOCK();
2067 #ifdef INET6
2068 		if ((ISSET(ifr->ifr_flags, IFXF_AUTOCONF6) ||
2069 		    ISSET(ifr->ifr_flags, IFXF_AUTOCONF6TEMP)) &&
2070 		    !ISSET(ifp->if_xflags, IFXF_AUTOCONF6) &&
2071 		    !ISSET(ifp->if_xflags, IFXF_AUTOCONF6TEMP)) {
2072 			error = in6_ifattach(ifp);
2073 			if (error != 0) {
2074 				NET_UNLOCK();
2075 				break;
2076 			}
2077 		}
2078 
2079 		if (ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
2080 		    !ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
2081 			ifp->if_xflags |= IFXF_INET6_NOSOII;
2082 
2083 		if (!ISSET(ifr->ifr_flags, IFXF_INET6_NOSOII) &&
2084 		    ISSET(ifp->if_xflags, IFXF_INET6_NOSOII))
2085 			ifp->if_xflags &= ~IFXF_INET6_NOSOII;
2086 
2087 #endif	/* INET6 */
2088 
2089 #ifdef MPLS
2090 		if (ISSET(ifr->ifr_flags, IFXF_MPLS) &&
2091 		    !ISSET(ifp->if_xflags, IFXF_MPLS)) {
2092 			s = splnet();
2093 			ifp->if_xflags |= IFXF_MPLS;
2094 			ifp->if_ll_output = ifp->if_output;
2095 			ifp->if_output = mpls_output;
2096 			splx(s);
2097 		}
2098 		if (ISSET(ifp->if_xflags, IFXF_MPLS) &&
2099 		    !ISSET(ifr->ifr_flags, IFXF_MPLS)) {
2100 			s = splnet();
2101 			ifp->if_xflags &= ~IFXF_MPLS;
2102 			ifp->if_output = ifp->if_ll_output;
2103 			ifp->if_ll_output = NULL;
2104 			splx(s);
2105 		}
2106 #endif	/* MPLS */
2107 
2108 #ifndef SMALL_KERNEL
2109 		if (ifp->if_capabilities & IFCAP_WOL) {
2110 			if (ISSET(ifr->ifr_flags, IFXF_WOL) &&
2111 			    !ISSET(ifp->if_xflags, IFXF_WOL)) {
2112 				s = splnet();
2113 				ifp->if_xflags |= IFXF_WOL;
2114 				error = ifp->if_wol(ifp, 1);
2115 				splx(s);
2116 			}
2117 			if (ISSET(ifp->if_xflags, IFXF_WOL) &&
2118 			    !ISSET(ifr->ifr_flags, IFXF_WOL)) {
2119 				s = splnet();
2120 				ifp->if_xflags &= ~IFXF_WOL;
2121 				error = ifp->if_wol(ifp, 0);
2122 				splx(s);
2123 			}
2124 		} else if (ISSET(ifr->ifr_flags, IFXF_WOL)) {
2125 			ifr->ifr_flags &= ~IFXF_WOL;
2126 			error = ENOTSUP;
2127 		}
2128 #endif
2129 		if (ISSET(ifr->ifr_flags, IFXF_LRO) !=
2130 		    ISSET(ifp->if_xflags, IFXF_LRO))
2131 			error = ifsetlro(ifp, ISSET(ifr->ifr_flags, IFXF_LRO));
2132 
2133 		if (error == 0)
2134 			ifp->if_xflags = (ifp->if_xflags & IFXF_CANTCHANGE) |
2135 				(ifr->ifr_flags & ~IFXF_CANTCHANGE);
2136 
2137 		if (!ISSET(ifp->if_flags, IFF_UP) &&
2138 		    ((!ISSET(oif_xflags, IFXF_AUTOCONF4) &&
2139 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF4)) ||
2140 		    (!ISSET(oif_xflags, IFXF_AUTOCONF6) &&
2141 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF6)) ||
2142 		    (!ISSET(oif_xflags, IFXF_AUTOCONF6TEMP) &&
2143 		    ISSET(ifp->if_xflags, IFXF_AUTOCONF6TEMP)))) {
2144 			ifr->ifr_flags = ifp->if_flags | IFF_UP;
2145 			goto forceup;
2146 		}
2147 
2148 		NET_UNLOCK();
2149 		break;
2150 
2151 	case SIOCSIFFLAGS:
2152 		if ((error = suser(p)) != 0)
2153 			break;
2154 
2155 		NET_LOCK();
2156 forceup:
2157 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
2158 			(ifr->ifr_flags & ~IFF_CANTCHANGE);
2159 		error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, data);
2160 		if (error != 0) {
2161 			ifp->if_flags = oif_flags;
2162 			if (cmd == SIOCSIFXFLAGS)
2163 				ifp->if_xflags = oif_xflags;
2164 		} else if (ISSET(oif_flags ^ ifp->if_flags, IFF_UP)) {
2165 			s = splnet();
2166 			if (ISSET(ifp->if_flags, IFF_UP))
2167 				if_up(ifp);
2168 			else
2169 				if_down(ifp);
2170 			splx(s);
2171 		}
2172 		NET_UNLOCK();
2173 		break;
2174 
2175 	case SIOCSIFMETRIC:
2176 		if ((error = suser(p)) != 0)
2177 			break;
2178 		NET_LOCK();
2179 		ifp->if_metric = ifr->ifr_metric;
2180 		NET_UNLOCK();
2181 		break;
2182 
2183 	case SIOCSIFMTU:
2184 		if ((error = suser(p)) != 0)
2185 			break;
2186 		NET_LOCK();
2187 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2188 		NET_UNLOCK();
2189 		if (error == 0)
2190 			rtm_ifchg(ifp);
2191 		break;
2192 
2193 	case SIOCSIFDESCR:
2194 		if ((error = suser(p)) != 0)
2195 			break;
2196 		error = copyinstr(ifr->ifr_data, ifdescrbuf,
2197 		    IFDESCRSIZE, &bytesdone);
2198 		if (error == 0) {
2199 			(void)memset(ifp->if_description, 0, IFDESCRSIZE);
2200 			strlcpy(ifp->if_description, ifdescrbuf, IFDESCRSIZE);
2201 		}
2202 		break;
2203 
2204 	case SIOCSIFRTLABEL:
2205 		if ((error = suser(p)) != 0)
2206 			break;
2207 		error = copyinstr(ifr->ifr_data, ifrtlabelbuf,
2208 		    RTLABEL_LEN, &bytesdone);
2209 		if (error == 0) {
2210 			rtlabel_unref(ifp->if_rtlabelid);
2211 			ifp->if_rtlabelid = rtlabel_name2id(ifrtlabelbuf);
2212 		}
2213 		break;
2214 
2215 	case SIOCSIFPRIORITY:
2216 		if ((error = suser(p)) != 0)
2217 			break;
2218 		if (ifr->ifr_metric < 0 || ifr->ifr_metric > 15) {
2219 			error = EINVAL;
2220 			break;
2221 		}
2222 		ifp->if_priority = ifr->ifr_metric;
2223 		break;
2224 
2225 	case SIOCSIFRDOMAIN:
2226 		if ((error = suser(p)) != 0)
2227 			break;
2228 		error = if_createrdomain(ifr->ifr_rdomainid, ifp);
2229 		if (!error || error == EEXIST) {
2230 			NET_LOCK();
2231 			error = if_setrdomain(ifp, ifr->ifr_rdomainid);
2232 			NET_UNLOCK();
2233 		}
2234 		break;
2235 
2236 	case SIOCAIFGROUP:
2237 		if ((error = suser(p)))
2238 			break;
2239 		NET_LOCK();
2240 		error = if_addgroup(ifp, ifgr->ifgr_group);
2241 		if (error == 0) {
2242 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2243 			if (error == ENOTTY)
2244 				error = 0;
2245 		}
2246 		NET_UNLOCK();
2247 		break;
2248 
2249 	case SIOCDIFGROUP:
2250 		if ((error = suser(p)))
2251 			break;
2252 		NET_LOCK();
2253 		error = (*ifp->if_ioctl)(ifp, cmd, data);
2254 		if (error == ENOTTY)
2255 			error = 0;
2256 		if (error == 0)
2257 			error = if_delgroup(ifp, ifgr->ifgr_group);
2258 		NET_UNLOCK();
2259 		break;
2260 
2261 	case SIOCSIFLLADDR:
2262 		if ((error = suser(p)))
2263 			break;
2264 		if ((ifp->if_sadl == NULL) ||
2265 		    (ifr->ifr_addr.sa_len != ETHER_ADDR_LEN) ||
2266 		    (ETHER_IS_MULTICAST(ifr->ifr_addr.sa_data))) {
2267 			error = EINVAL;
2268 			break;
2269 		}
2270 		NET_LOCK();
2271 		switch (ifp->if_type) {
2272 		case IFT_ETHER:
2273 		case IFT_CARP:
2274 		case IFT_XETHER:
2275 		case IFT_ISO88025:
2276 			error = (*ifp->if_ioctl)(ifp, cmd, data);
2277 			if (error == ENOTTY)
2278 				error = 0;
2279 			if (error == 0)
2280 				error = if_setlladdr(ifp,
2281 				    ifr->ifr_addr.sa_data);
2282 			break;
2283 		default:
2284 			error = ENODEV;
2285 		}
2286 
2287 		if (error == 0)
2288 			ifnewlladdr(ifp);
2289 		NET_UNLOCK();
2290 		if (error == 0)
2291 			rtm_ifchg(ifp);
2292 		break;
2293 
2294 	case SIOCSIFLLPRIO:
2295 		if ((error = suser(p)))
2296 			break;
2297 		if (ifr->ifr_llprio < IFQ_MINPRIO ||
2298 		    ifr->ifr_llprio > IFQ_MAXPRIO) {
2299 			error = EINVAL;
2300 			break;
2301 		}
2302 		NET_LOCK();
2303 		ifp->if_llprio = ifr->ifr_llprio;
2304 		NET_UNLOCK();
2305 		break;
2306 
2307 	case SIOCGIFSFFPAGE:
2308 		error = suser(p);
2309 		if (error != 0)
2310 			break;
2311 
2312 		error = if_sffpage_check(data);
2313 		if (error != 0)
2314 			break;
2315 
2316 		/* don't take NET_LOCK because i2c reads take a long time */
2317 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2318 		break;
2319 
2320 	case SIOCSIFMEDIA:
2321 		if ((error = suser(p)) != 0)
2322 			break;
2323 		/* FALLTHROUGH */
2324 	case SIOCGIFMEDIA:
2325 		/* net lock is not needed */
2326 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2327 		break;
2328 
2329 	case SIOCSETKALIVE:
2330 	case SIOCDIFPHYADDR:
2331 	case SIOCSLIFPHYADDR:
2332 	case SIOCSLIFPHYRTABLE:
2333 	case SIOCSLIFPHYTTL:
2334 	case SIOCSLIFPHYDF:
2335 	case SIOCSLIFPHYECN:
2336 	case SIOCADDMULTI:
2337 	case SIOCDELMULTI:
2338 	case SIOCSVNETID:
2339 	case SIOCDVNETID:
2340 	case SIOCSVNETFLOWID:
2341 	case SIOCSTXHPRIO:
2342 	case SIOCSRXHPRIO:
2343 	case SIOCSIFPAIR:
2344 	case SIOCSIFPARENT:
2345 	case SIOCDIFPARENT:
2346 	case SIOCSETMPWCFG:
2347 	case SIOCSETLABEL:
2348 	case SIOCDELLABEL:
2349 	case SIOCSPWE3CTRLWORD:
2350 	case SIOCSPWE3FAT:
2351 	case SIOCSPWE3NEIGHBOR:
2352 	case SIOCDPWE3NEIGHBOR:
2353 #if NBRIDGE > 0
2354 	case SIOCBRDGADD:
2355 	case SIOCBRDGDEL:
2356 	case SIOCBRDGSIFFLGS:
2357 	case SIOCBRDGSCACHE:
2358 	case SIOCBRDGADDS:
2359 	case SIOCBRDGDELS:
2360 	case SIOCBRDGSADDR:
2361 	case SIOCBRDGSTO:
2362 	case SIOCBRDGDADDR:
2363 	case SIOCBRDGFLUSH:
2364 	case SIOCBRDGADDL:
2365 	case SIOCBRDGSIFPROT:
2366 	case SIOCBRDGARL:
2367 	case SIOCBRDGFRL:
2368 	case SIOCBRDGSPRI:
2369 	case SIOCBRDGSHT:
2370 	case SIOCBRDGSFD:
2371 	case SIOCBRDGSMA:
2372 	case SIOCBRDGSIFPRIO:
2373 	case SIOCBRDGSIFCOST:
2374 	case SIOCBRDGSTXHC:
2375 	case SIOCBRDGSPROTO:
2376 #endif
2377 		if ((error = suser(p)) != 0)
2378 			break;
2379 		/* FALLTHROUGH */
2380 	default:
2381 		error = pru_control(so, cmd, data, ifp);
2382 		if (error != EOPNOTSUPP)
2383 			break;
2384 		switch (cmd) {
2385 		case SIOCAIFADDR:
2386 		case SIOCDIFADDR:
2387 		case SIOCSIFADDR:
2388 		case SIOCSIFNETMASK:
2389 		case SIOCSIFDSTADDR:
2390 		case SIOCSIFBRDADDR:
2391 #ifdef INET6
2392 		case SIOCAIFADDR_IN6:
2393 		case SIOCDIFADDR_IN6:
2394 #endif
2395 			error = suser(p);
2396 			break;
2397 		default:
2398 			error = 0;
2399 			break;
2400 		}
2401 		if (error)
2402 			break;
2403 		NET_LOCK();
2404 		error = ((*ifp->if_ioctl)(ifp, cmd, data));
2405 		NET_UNLOCK();
2406 		break;
2407 	}
2408 
2409 	if (oif_flags != ifp->if_flags || oif_xflags != ifp->if_xflags) {
2410 		/* if_up() and if_down() already sent an update, skip here */
2411 		if (((oif_flags ^ ifp->if_flags) & IFF_UP) == 0)
2412 			rtm_ifchg(ifp);
2413 	}
2414 
2415 	if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0)
2416 		getmicrotime(&ifp->if_lastchange);
2417 
2418 	KERNEL_UNLOCK();
2419 
2420 	if_put(ifp);
2421 
2422 	return (error);
2423 }
2424 
2425 int
2426 ifioctl_get(u_long cmd, caddr_t data)
2427 {
2428 	struct ifnet *ifp;
2429 	struct ifreq *ifr = (struct ifreq *)data;
2430 	char ifdescrbuf[IFDESCRSIZE];
2431 	char ifrtlabelbuf[RTLABEL_LEN];
2432 	int error = 0;
2433 	size_t bytesdone;
2434 
2435 	switch(cmd) {
2436 	case SIOCGIFCONF:
2437 		NET_LOCK_SHARED();
2438 		error = ifconf(data);
2439 		NET_UNLOCK_SHARED();
2440 		return (error);
2441 	case SIOCIFGCLONERS:
2442 		error = if_clone_list((struct if_clonereq *)data);
2443 		return (error);
2444 	case SIOCGIFGMEMB:
2445 		NET_LOCK_SHARED();
2446 		error = if_getgroupmembers(data);
2447 		NET_UNLOCK_SHARED();
2448 		return (error);
2449 	case SIOCGIFGATTR:
2450 		NET_LOCK_SHARED();
2451 		error = if_getgroupattribs(data);
2452 		NET_UNLOCK_SHARED();
2453 		return (error);
2454 	case SIOCGIFGLIST:
2455 		NET_LOCK_SHARED();
2456 		error = if_getgrouplist(data);
2457 		NET_UNLOCK_SHARED();
2458 		return (error);
2459 	}
2460 
2461 	KERNEL_LOCK();
2462 
2463 	ifp = if_unit(ifr->ifr_name);
2464 	if (ifp == NULL) {
2465 		KERNEL_UNLOCK();
2466 		return (ENXIO);
2467 	}
2468 
2469 	NET_LOCK_SHARED();
2470 
2471 	switch(cmd) {
2472 	case SIOCGIFFLAGS:
2473 		ifr->ifr_flags = ifp->if_flags;
2474 		if (ifq_is_oactive(&ifp->if_snd))
2475 			ifr->ifr_flags |= IFF_OACTIVE;
2476 		break;
2477 
2478 	case SIOCGIFXFLAGS:
2479 		ifr->ifr_flags = ifp->if_xflags & ~(IFXF_MPSAFE|IFXF_CLONED);
2480 		break;
2481 
2482 	case SIOCGIFMETRIC:
2483 		ifr->ifr_metric = ifp->if_metric;
2484 		break;
2485 
2486 	case SIOCGIFMTU:
2487 		ifr->ifr_mtu = ifp->if_mtu;
2488 		break;
2489 
2490 	case SIOCGIFHARDMTU:
2491 		ifr->ifr_hardmtu = ifp->if_hardmtu;
2492 		break;
2493 
2494 	case SIOCGIFDATA: {
2495 		struct if_data ifdata;
2496 		if_getdata(ifp, &ifdata);
2497 		error = copyout(&ifdata, ifr->ifr_data, sizeof(ifdata));
2498 		break;
2499 	}
2500 
2501 	case SIOCGIFDESCR:
2502 		strlcpy(ifdescrbuf, ifp->if_description, IFDESCRSIZE);
2503 		error = copyoutstr(ifdescrbuf, ifr->ifr_data, IFDESCRSIZE,
2504 		    &bytesdone);
2505 		break;
2506 
2507 	case SIOCGIFRTLABEL:
2508 		if (ifp->if_rtlabelid && rtlabel_id2name(ifp->if_rtlabelid,
2509 		    ifrtlabelbuf, RTLABEL_LEN) != NULL) {
2510 			error = copyoutstr(ifrtlabelbuf, ifr->ifr_data,
2511 			    RTLABEL_LEN, &bytesdone);
2512 		} else
2513 			error = ENOENT;
2514 		break;
2515 
2516 	case SIOCGIFPRIORITY:
2517 		ifr->ifr_metric = ifp->if_priority;
2518 		break;
2519 
2520 	case SIOCGIFRDOMAIN:
2521 		ifr->ifr_rdomainid = ifp->if_rdomain;
2522 		break;
2523 
2524 	case SIOCGIFGROUP:
2525 		error = if_getgroup(data, ifp);
2526 		break;
2527 
2528 	case SIOCGIFLLPRIO:
2529 		ifr->ifr_llprio = ifp->if_llprio;
2530 		break;
2531 
2532 	default:
2533 		panic("invalid ioctl %lu", cmd);
2534 	}
2535 
2536 	NET_UNLOCK_SHARED();
2537 
2538 	KERNEL_UNLOCK();
2539 
2540 	if_put(ifp);
2541 
2542 	return (error);
2543 }
2544 
2545 static int
2546 if_sffpage_check(const caddr_t data)
2547 {
2548 	const struct if_sffpage *sff = (const struct if_sffpage *)data;
2549 
2550 	switch (sff->sff_addr) {
2551 	case IFSFF_ADDR_EEPROM:
2552 	case IFSFF_ADDR_DDM:
2553 		break;
2554 	default:
2555 		return (EINVAL);
2556 	}
2557 
2558 	return (0);
2559 }
2560 
2561 int
2562 if_txhprio_l2_check(int hdrprio)
2563 {
2564 	switch (hdrprio) {
2565 	case IF_HDRPRIO_PACKET:
2566 		return (0);
2567 	default:
2568 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2569 			return (0);
2570 		break;
2571 	}
2572 
2573 	return (EINVAL);
2574 }
2575 
2576 int
2577 if_txhprio_l3_check(int hdrprio)
2578 {
2579 	switch (hdrprio) {
2580 	case IF_HDRPRIO_PACKET:
2581 	case IF_HDRPRIO_PAYLOAD:
2582 		return (0);
2583 	default:
2584 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2585 			return (0);
2586 		break;
2587 	}
2588 
2589 	return (EINVAL);
2590 }
2591 
2592 int
2593 if_rxhprio_l2_check(int hdrprio)
2594 {
2595 	switch (hdrprio) {
2596 	case IF_HDRPRIO_PACKET:
2597 	case IF_HDRPRIO_OUTER:
2598 		return (0);
2599 	default:
2600 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2601 			return (0);
2602 		break;
2603 	}
2604 
2605 	return (EINVAL);
2606 }
2607 
2608 int
2609 if_rxhprio_l3_check(int hdrprio)
2610 {
2611 	switch (hdrprio) {
2612 	case IF_HDRPRIO_PACKET:
2613 	case IF_HDRPRIO_PAYLOAD:
2614 	case IF_HDRPRIO_OUTER:
2615 		return (0);
2616 	default:
2617 		if (hdrprio >= IF_HDRPRIO_MIN && hdrprio <= IF_HDRPRIO_MAX)
2618 			return (0);
2619 		break;
2620 	}
2621 
2622 	return (EINVAL);
2623 }
2624 
2625 /*
2626  * Return interface configuration
2627  * of system.  List may be used
2628  * in later ioctl's (above) to get
2629  * other information.
2630  */
2631 int
2632 ifconf(caddr_t data)
2633 {
2634 	struct ifconf *ifc = (struct ifconf *)data;
2635 	struct ifnet *ifp;
2636 	struct ifaddr *ifa;
2637 	struct ifreq ifr, *ifrp;
2638 	int space = ifc->ifc_len, error = 0;
2639 
2640 	/* If ifc->ifc_len is 0, fill it in with the needed size and return. */
2641 	if (space == 0) {
2642 		TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
2643 			struct sockaddr *sa;
2644 
2645 			if (TAILQ_EMPTY(&ifp->if_addrlist))
2646 				space += sizeof (ifr);
2647 			else
2648 				TAILQ_FOREACH(ifa,
2649 				    &ifp->if_addrlist, ifa_list) {
2650 					sa = ifa->ifa_addr;
2651 					if (sa->sa_len > sizeof(*sa))
2652 						space += sa->sa_len -
2653 						    sizeof(*sa);
2654 					space += sizeof(ifr);
2655 				}
2656 		}
2657 		ifc->ifc_len = space;
2658 		return (0);
2659 	}
2660 
2661 	ifrp = ifc->ifc_req;
2662 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
2663 		if (space < sizeof(ifr))
2664 			break;
2665 		bcopy(ifp->if_xname, ifr.ifr_name, IFNAMSIZ);
2666 		if (TAILQ_EMPTY(&ifp->if_addrlist)) {
2667 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
2668 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
2669 			    sizeof(ifr));
2670 			if (error)
2671 				break;
2672 			space -= sizeof (ifr), ifrp++;
2673 		} else
2674 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2675 				struct sockaddr *sa = ifa->ifa_addr;
2676 
2677 				if (space < sizeof(ifr))
2678 					break;
2679 				if (sa->sa_len <= sizeof(*sa)) {
2680 					ifr.ifr_addr = *sa;
2681 					error = copyout((caddr_t)&ifr,
2682 					    (caddr_t)ifrp, sizeof (ifr));
2683 					ifrp++;
2684 				} else {
2685 					space -= sa->sa_len - sizeof(*sa);
2686 					if (space < sizeof (ifr))
2687 						break;
2688 					error = copyout((caddr_t)&ifr,
2689 					    (caddr_t)ifrp,
2690 					    sizeof(ifr.ifr_name));
2691 					if (error == 0)
2692 						error = copyout((caddr_t)sa,
2693 						    (caddr_t)&ifrp->ifr_addr,
2694 						    sa->sa_len);
2695 					ifrp = (struct ifreq *)(sa->sa_len +
2696 					    (caddr_t)&ifrp->ifr_addr);
2697 				}
2698 				if (error)
2699 					break;
2700 				space -= sizeof (ifr);
2701 			}
2702 	}
2703 	ifc->ifc_len -= space;
2704 	return (error);
2705 }
2706 
2707 void
2708 if_counters_alloc(struct ifnet *ifp)
2709 {
2710 	KASSERT(ifp->if_counters == NULL);
2711 
2712 	ifp->if_counters = counters_alloc(ifc_ncounters);
2713 }
2714 
2715 void
2716 if_counters_free(struct ifnet *ifp)
2717 {
2718 	KASSERT(ifp->if_counters != NULL);
2719 
2720 	counters_free(ifp->if_counters, ifc_ncounters);
2721 	ifp->if_counters = NULL;
2722 }
2723 
2724 void
2725 if_getdata(struct ifnet *ifp, struct if_data *data)
2726 {
2727 	unsigned int i;
2728 
2729 	*data = ifp->if_data;
2730 
2731 	if (ifp->if_counters != NULL) {
2732 		uint64_t counters[ifc_ncounters];
2733 
2734 		counters_read(ifp->if_counters, counters, nitems(counters));
2735 
2736 		data->ifi_ipackets += counters[ifc_ipackets];
2737 		data->ifi_ierrors += counters[ifc_ierrors];
2738 		data->ifi_opackets += counters[ifc_opackets];
2739 		data->ifi_oerrors += counters[ifc_oerrors];
2740 		data->ifi_collisions += counters[ifc_collisions];
2741 		data->ifi_ibytes += counters[ifc_ibytes];
2742 		data->ifi_obytes += counters[ifc_obytes];
2743 		data->ifi_imcasts += counters[ifc_imcasts];
2744 		data->ifi_omcasts += counters[ifc_omcasts];
2745 		data->ifi_iqdrops += counters[ifc_iqdrops];
2746 		data->ifi_oqdrops += counters[ifc_oqdrops];
2747 		data->ifi_noproto += counters[ifc_noproto];
2748 	}
2749 
2750 	for (i = 0; i < ifp->if_nifqs; i++) {
2751 		struct ifqueue *ifq = ifp->if_ifqs[i];
2752 
2753 		ifq_add_data(ifq, data);
2754 	}
2755 
2756 	for (i = 0; i < ifp->if_niqs; i++) {
2757 		struct ifiqueue *ifiq = ifp->if_iqs[i];
2758 
2759 		ifiq_add_data(ifiq, data);
2760 	}
2761 }
2762 
2763 /*
2764  * Dummy functions replaced in ifnet during detach (if protocols decide to
2765  * fiddle with the if during detach.
2766  */
2767 void
2768 if_detached_qstart(struct ifqueue *ifq)
2769 {
2770 	ifq_purge(ifq);
2771 }
2772 
2773 int
2774 if_detached_ioctl(struct ifnet *ifp, u_long a, caddr_t b)
2775 {
2776 	return ENODEV;
2777 }
2778 
2779 /*
2780  * Create interface group without members
2781  */
2782 struct ifg_group *
2783 if_creategroup(const char *groupname)
2784 {
2785 	struct ifg_group	*ifg;
2786 
2787 	if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
2788 		return (NULL);
2789 
2790 	strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
2791 	ifg->ifg_refcnt = 1;
2792 	ifg->ifg_carp_demoted = 0;
2793 	TAILQ_INIT(&ifg->ifg_members);
2794 #if NPF > 0
2795 	pfi_attach_ifgroup(ifg);
2796 #endif
2797 	TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
2798 
2799 	return (ifg);
2800 }
2801 
2802 /*
2803  * Add a group to an interface
2804  */
2805 int
2806 if_addgroup(struct ifnet *ifp, const char *groupname)
2807 {
2808 	struct ifg_list		*ifgl;
2809 	struct ifg_group	*ifg = NULL;
2810 	struct ifg_member	*ifgm;
2811 	size_t			 namelen;
2812 
2813 	namelen = strlen(groupname);
2814 	if (namelen == 0 || namelen >= IFNAMSIZ ||
2815 	    (groupname[namelen - 1] >= '0' && groupname[namelen - 1] <= '9'))
2816 		return (EINVAL);
2817 
2818 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2819 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2820 			return (EEXIST);
2821 
2822 	if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
2823 		return (ENOMEM);
2824 
2825 	if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
2826 		free(ifgl, M_TEMP, sizeof(*ifgl));
2827 		return (ENOMEM);
2828 	}
2829 
2830 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2831 		if (!strcmp(ifg->ifg_group, groupname))
2832 			break;
2833 
2834 	if (ifg == NULL) {
2835 		ifg = if_creategroup(groupname);
2836 		if (ifg == NULL) {
2837 			free(ifgl, M_TEMP, sizeof(*ifgl));
2838 			free(ifgm, M_TEMP, sizeof(*ifgm));
2839 			return (ENOMEM);
2840 		}
2841 	} else
2842 		ifg->ifg_refcnt++;
2843 	KASSERT(ifg->ifg_refcnt != 0);
2844 
2845 	ifgl->ifgl_group = ifg;
2846 	ifgm->ifgm_ifp = ifp;
2847 
2848 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
2849 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
2850 
2851 #if NPF > 0
2852 	pfi_group_addmember(groupname);
2853 #endif
2854 
2855 	return (0);
2856 }
2857 
2858 /*
2859  * Remove a group from an interface
2860  */
2861 int
2862 if_delgroup(struct ifnet *ifp, const char *groupname)
2863 {
2864 	struct ifg_list		*ifgl;
2865 	struct ifg_member	*ifgm;
2866 
2867 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2868 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
2869 			break;
2870 	if (ifgl == NULL)
2871 		return (ENOENT);
2872 
2873 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
2874 
2875 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
2876 		if (ifgm->ifgm_ifp == ifp)
2877 			break;
2878 
2879 	if (ifgm != NULL) {
2880 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
2881 		free(ifgm, M_TEMP, sizeof(*ifgm));
2882 	}
2883 
2884 #if NPF > 0
2885 	pfi_group_delmember(groupname);
2886 #endif
2887 
2888 	KASSERT(ifgl->ifgl_group->ifg_refcnt != 0);
2889 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
2890 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
2891 #if NPF > 0
2892 		pfi_detach_ifgroup(ifgl->ifgl_group);
2893 #endif
2894 		free(ifgl->ifgl_group, M_TEMP, sizeof(*ifgl->ifgl_group));
2895 	}
2896 
2897 	free(ifgl, M_TEMP, sizeof(*ifgl));
2898 
2899 	return (0);
2900 }
2901 
2902 /*
2903  * Stores all groups from an interface in memory pointed
2904  * to by data
2905  */
2906 int
2907 if_getgroup(caddr_t data, struct ifnet *ifp)
2908 {
2909 	int			 len, error;
2910 	struct ifg_list		*ifgl;
2911 	struct ifg_req		 ifgrq, *ifgp;
2912 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2913 
2914 	if (ifgr->ifgr_len == 0) {
2915 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2916 			ifgr->ifgr_len += sizeof(struct ifg_req);
2917 		return (0);
2918 	}
2919 
2920 	len = ifgr->ifgr_len;
2921 	ifgp = ifgr->ifgr_groups;
2922 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2923 		if (len < sizeof(ifgrq))
2924 			return (EINVAL);
2925 		bzero(&ifgrq, sizeof ifgrq);
2926 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
2927 		    sizeof(ifgrq.ifgrq_group));
2928 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2929 		    sizeof(struct ifg_req))))
2930 			return (error);
2931 		len -= sizeof(ifgrq);
2932 		ifgp++;
2933 	}
2934 
2935 	return (0);
2936 }
2937 
2938 /*
2939  * Stores all members of a group in memory pointed to by data
2940  */
2941 int
2942 if_getgroupmembers(caddr_t data)
2943 {
2944 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2945 	struct ifg_group	*ifg;
2946 	struct ifg_member	*ifgm;
2947 	struct ifg_req		 ifgrq, *ifgp;
2948 	int			 len, error;
2949 
2950 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2951 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2952 			break;
2953 	if (ifg == NULL)
2954 		return (ENOENT);
2955 
2956 	if (ifgr->ifgr_len == 0) {
2957 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
2958 			ifgr->ifgr_len += sizeof(ifgrq);
2959 		return (0);
2960 	}
2961 
2962 	len = ifgr->ifgr_len;
2963 	ifgp = ifgr->ifgr_groups;
2964 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
2965 		if (len < sizeof(ifgrq))
2966 			return (EINVAL);
2967 		bzero(&ifgrq, sizeof ifgrq);
2968 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
2969 		    sizeof(ifgrq.ifgrq_member));
2970 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
2971 		    sizeof(struct ifg_req))))
2972 			return (error);
2973 		len -= sizeof(ifgrq);
2974 		ifgp++;
2975 	}
2976 
2977 	return (0);
2978 }
2979 
2980 int
2981 if_getgroupattribs(caddr_t data)
2982 {
2983 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
2984 	struct ifg_group	*ifg;
2985 
2986 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
2987 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
2988 			break;
2989 	if (ifg == NULL)
2990 		return (ENOENT);
2991 
2992 	ifgr->ifgr_attrib.ifg_carp_demoted = ifg->ifg_carp_demoted;
2993 
2994 	return (0);
2995 }
2996 
2997 int
2998 if_setgroupattribs(caddr_t data)
2999 {
3000 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
3001 	struct ifg_group	*ifg;
3002 	struct ifg_member	*ifgm;
3003 	int			 demote;
3004 
3005 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3006 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
3007 			break;
3008 	if (ifg == NULL)
3009 		return (ENOENT);
3010 
3011 	demote = ifgr->ifgr_attrib.ifg_carp_demoted;
3012 	if (demote + ifg->ifg_carp_demoted > 0xff ||
3013 	    demote + ifg->ifg_carp_demoted < 0)
3014 		return (EINVAL);
3015 
3016 	ifg->ifg_carp_demoted += demote;
3017 
3018 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
3019 		ifgm->ifgm_ifp->if_ioctl(ifgm->ifgm_ifp, SIOCSIFGATTR, data);
3020 
3021 	return (0);
3022 }
3023 
3024 /*
3025  * Stores all groups in memory pointed to by data
3026  */
3027 int
3028 if_getgrouplist(caddr_t data)
3029 {
3030 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
3031 	struct ifg_group	*ifg;
3032 	struct ifg_req		 ifgrq, *ifgp;
3033 	int			 len, error;
3034 
3035 	if (ifgr->ifgr_len == 0) {
3036 		TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3037 			ifgr->ifgr_len += sizeof(ifgrq);
3038 		return (0);
3039 	}
3040 
3041 	len = ifgr->ifgr_len;
3042 	ifgp = ifgr->ifgr_groups;
3043 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next) {
3044 		if (len < sizeof(ifgrq))
3045 			return (EINVAL);
3046 		bzero(&ifgrq, sizeof ifgrq);
3047 		strlcpy(ifgrq.ifgrq_group, ifg->ifg_group,
3048 		    sizeof(ifgrq.ifgrq_group));
3049 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
3050 		    sizeof(struct ifg_req))))
3051 			return (error);
3052 		len -= sizeof(ifgrq);
3053 		ifgp++;
3054 	}
3055 
3056 	return (0);
3057 }
3058 
3059 void
3060 if_group_routechange(struct sockaddr *dst, struct sockaddr *mask)
3061 {
3062 	switch (dst->sa_family) {
3063 	case AF_INET:
3064 		if (satosin(dst)->sin_addr.s_addr == INADDR_ANY &&
3065 		    mask && (mask->sa_len == 0 ||
3066 		    satosin(mask)->sin_addr.s_addr == INADDR_ANY))
3067 			if_group_egress_build();
3068 		break;
3069 #ifdef INET6
3070 	case AF_INET6:
3071 		if (IN6_ARE_ADDR_EQUAL(&(satosin6(dst))->sin6_addr,
3072 		    &in6addr_any) && mask && (mask->sa_len == 0 ||
3073 		    IN6_ARE_ADDR_EQUAL(&(satosin6(mask))->sin6_addr,
3074 		    &in6addr_any)))
3075 			if_group_egress_build();
3076 		break;
3077 #endif
3078 	}
3079 }
3080 
3081 int
3082 if_group_egress_build(void)
3083 {
3084 	struct ifnet		*ifp;
3085 	struct ifg_group	*ifg;
3086 	struct ifg_member	*ifgm, *next;
3087 	struct sockaddr_in	 sa_in;
3088 #ifdef INET6
3089 	struct sockaddr_in6	 sa_in6;
3090 #endif
3091 	struct rtentry		*rt;
3092 
3093 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
3094 		if (!strcmp(ifg->ifg_group, IFG_EGRESS))
3095 			break;
3096 
3097 	if (ifg != NULL)
3098 		TAILQ_FOREACH_SAFE(ifgm, &ifg->ifg_members, ifgm_next, next)
3099 			if_delgroup(ifgm->ifgm_ifp, IFG_EGRESS);
3100 
3101 	bzero(&sa_in, sizeof(sa_in));
3102 	sa_in.sin_len = sizeof(sa_in);
3103 	sa_in.sin_family = AF_INET;
3104 	rt = rtable_lookup(0, sintosa(&sa_in), sintosa(&sa_in), NULL, RTP_ANY);
3105 	while (rt != NULL) {
3106 		ifp = if_get(rt->rt_ifidx);
3107 		if (ifp != NULL) {
3108 			if_addgroup(ifp, IFG_EGRESS);
3109 			if_put(ifp);
3110 		}
3111 		rt = rtable_iterate(rt);
3112 	}
3113 
3114 #ifdef INET6
3115 	bcopy(&sa6_any, &sa_in6, sizeof(sa_in6));
3116 	rt = rtable_lookup(0, sin6tosa(&sa_in6), sin6tosa(&sa_in6), NULL,
3117 	    RTP_ANY);
3118 	while (rt != NULL) {
3119 		ifp = if_get(rt->rt_ifidx);
3120 		if (ifp != NULL) {
3121 			if_addgroup(ifp, IFG_EGRESS);
3122 			if_put(ifp);
3123 		}
3124 		rt = rtable_iterate(rt);
3125 	}
3126 #endif /* INET6 */
3127 
3128 	return (0);
3129 }
3130 
3131 /*
3132  * Set/clear promiscuous mode on interface ifp based on the truth value
3133  * of pswitch.  The calls are reference counted so that only the first
3134  * "on" request actually has an effect, as does the final "off" request.
3135  * Results are undefined if the "off" and "on" requests are not matched.
3136  */
3137 int
3138 ifpromisc(struct ifnet *ifp, int pswitch)
3139 {
3140 	struct ifreq ifr;
3141 	unsigned short oif_flags;
3142 	int oif_pcount, error;
3143 
3144 	NET_ASSERT_LOCKED(); /* modifying if_flags and if_pcount */
3145 
3146 	oif_flags = ifp->if_flags;
3147 	oif_pcount = ifp->if_pcount;
3148 	if (pswitch) {
3149 		if (ifp->if_pcount++ != 0)
3150 			return (0);
3151 		ifp->if_flags |= IFF_PROMISC;
3152 	} else {
3153 		if (--ifp->if_pcount > 0)
3154 			return (0);
3155 		ifp->if_flags &= ~IFF_PROMISC;
3156 	}
3157 
3158 	if ((ifp->if_flags & IFF_UP) == 0)
3159 		return (0);
3160 
3161 	memset(&ifr, 0, sizeof(ifr));
3162 	ifr.ifr_flags = ifp->if_flags;
3163 	error = ((*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
3164 	if (error) {
3165 		ifp->if_flags = oif_flags;
3166 		ifp->if_pcount = oif_pcount;
3167 	}
3168 
3169 	return (error);
3170 }
3171 
3172 /* Set/clear LRO flag and restart interface if needed. */
3173 int
3174 ifsetlro(struct ifnet *ifp, int on)
3175 {
3176 	struct ifreq ifrq;
3177 	int error = 0;
3178 	int s = splnet();
3179 
3180 	if (!ISSET(ifp->if_capabilities, IFCAP_LRO)) {
3181 		error = ENOTSUP;
3182 		goto out;
3183 	}
3184 
3185 	NET_ASSERT_LOCKED();	/* for ioctl */
3186 	KERNEL_ASSERT_LOCKED();	/* for if_flags */
3187 
3188 	if (on && !ISSET(ifp->if_xflags, IFXF_LRO)) {
3189 		if (ether_brport_isset(ifp)) {
3190 			error = EBUSY;
3191 			goto out;
3192 		}
3193 		SET(ifp->if_xflags, IFXF_LRO);
3194 	} else if (!on && ISSET(ifp->if_xflags, IFXF_LRO))
3195 		CLR(ifp->if_xflags, IFXF_LRO);
3196 	else
3197 		goto out;
3198 
3199 	/* restart interface */
3200 	if (ISSET(ifp->if_flags, IFF_UP)) {
3201 		/* go down for a moment... */
3202 		CLR(ifp->if_flags, IFF_UP);
3203 		ifrq.ifr_flags = ifp->if_flags;
3204 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3205 
3206 		/* ... and up again */
3207 		SET(ifp->if_flags, IFF_UP);
3208 		ifrq.ifr_flags = ifp->if_flags;
3209 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3210 	}
3211  out:
3212 	splx(s);
3213 
3214 	return error;
3215 }
3216 
3217 void
3218 ifa_add(struct ifnet *ifp, struct ifaddr *ifa)
3219 {
3220 	NET_ASSERT_LOCKED_EXCLUSIVE();
3221 	TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
3222 }
3223 
3224 void
3225 ifa_del(struct ifnet *ifp, struct ifaddr *ifa)
3226 {
3227 	NET_ASSERT_LOCKED_EXCLUSIVE();
3228 	TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
3229 }
3230 
3231 void
3232 ifa_update_broadaddr(struct ifnet *ifp, struct ifaddr *ifa, struct sockaddr *sa)
3233 {
3234 	if (ifa->ifa_broadaddr->sa_len != sa->sa_len)
3235 		panic("ifa_update_broadaddr does not support dynamic length");
3236 	bcopy(sa, ifa->ifa_broadaddr, sa->sa_len);
3237 }
3238 
3239 #ifdef DDB
3240 /* debug function, can be called from ddb> */
3241 void
3242 ifa_print_all(void)
3243 {
3244 	struct ifnet *ifp;
3245 	struct ifaddr *ifa;
3246 
3247 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
3248 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
3249 			char addr[INET6_ADDRSTRLEN];
3250 
3251 			switch (ifa->ifa_addr->sa_family) {
3252 			case AF_INET:
3253 				printf("%s", inet_ntop(AF_INET,
3254 				    &satosin(ifa->ifa_addr)->sin_addr,
3255 				    addr, sizeof(addr)));
3256 				break;
3257 #ifdef INET6
3258 			case AF_INET6:
3259 				printf("%s", inet_ntop(AF_INET6,
3260 				    &(satosin6(ifa->ifa_addr))->sin6_addr,
3261 				    addr, sizeof(addr)));
3262 				break;
3263 #endif
3264 			}
3265 			printf(" on %s\n", ifp->if_xname);
3266 		}
3267 	}
3268 }
3269 #endif /* DDB */
3270 
3271 void
3272 ifnewlladdr(struct ifnet *ifp)
3273 {
3274 #ifdef INET6
3275 	struct ifaddr *ifa;
3276 #endif
3277 	struct ifreq ifrq;
3278 	short up;
3279 
3280 	NET_ASSERT_LOCKED();	/* for ioctl and in6 */
3281 	KERNEL_ASSERT_LOCKED();	/* for if_flags */
3282 
3283 	up = ifp->if_flags & IFF_UP;
3284 
3285 	if (up) {
3286 		/* go down for a moment... */
3287 		ifp->if_flags &= ~IFF_UP;
3288 		ifrq.ifr_flags = ifp->if_flags;
3289 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3290 	}
3291 
3292 	ifp->if_flags |= IFF_UP;
3293 	ifrq.ifr_flags = ifp->if_flags;
3294 	(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3295 
3296 #ifdef INET6
3297 	/*
3298 	 * Update the link-local address.  Don't do it if we're
3299 	 * a router to avoid confusing hosts on the network.
3300 	 */
3301 	if (!ip6_forwarding) {
3302 		ifa = &in6ifa_ifpforlinklocal(ifp, 0)->ia_ifa;
3303 		if (ifa) {
3304 			in6_purgeaddr(ifa);
3305 			if_hooks_run(&ifp->if_addrhooks);
3306 			in6_ifattach(ifp);
3307 		}
3308 	}
3309 #endif
3310 	if (!up) {
3311 		/* go back down */
3312 		ifp->if_flags &= ~IFF_UP;
3313 		ifrq.ifr_flags = ifp->if_flags;
3314 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifrq);
3315 	}
3316 }
3317 
3318 void
3319 if_addrhook_add(struct ifnet *ifp, struct task *t)
3320 {
3321 	mtx_enter(&if_hooks_mtx);
3322 	TAILQ_INSERT_TAIL(&ifp->if_addrhooks, t, t_entry);
3323 	mtx_leave(&if_hooks_mtx);
3324 }
3325 
3326 void
3327 if_addrhook_del(struct ifnet *ifp, struct task *t)
3328 {
3329 	mtx_enter(&if_hooks_mtx);
3330 	TAILQ_REMOVE(&ifp->if_addrhooks, t, t_entry);
3331 	mtx_leave(&if_hooks_mtx);
3332 }
3333 
3334 void
3335 if_addrhooks_run(struct ifnet *ifp)
3336 {
3337 	if_hooks_run(&ifp->if_addrhooks);
3338 }
3339 
3340 void
3341 if_rxr_init(struct if_rxring *rxr, u_int lwm, u_int hwm)
3342 {
3343 	extern int ticks;
3344 
3345 	memset(rxr, 0, sizeof(*rxr));
3346 
3347 	rxr->rxr_adjusted = ticks;
3348 	rxr->rxr_cwm = rxr->rxr_lwm = lwm;
3349 	rxr->rxr_hwm = hwm;
3350 }
3351 
3352 static inline void
3353 if_rxr_adjust_cwm(struct if_rxring *rxr)
3354 {
3355 	extern int ticks;
3356 
3357 	if (rxr->rxr_alive >= rxr->rxr_lwm)
3358 		return;
3359 	else if (rxr->rxr_cwm < rxr->rxr_hwm)
3360 		rxr->rxr_cwm++;
3361 
3362 	rxr->rxr_adjusted = ticks;
3363 }
3364 
3365 void
3366 if_rxr_livelocked(struct if_rxring *rxr)
3367 {
3368 	extern int ticks;
3369 
3370 	if (ticks - rxr->rxr_adjusted >= 1) {
3371 		if (rxr->rxr_cwm > rxr->rxr_lwm)
3372 			rxr->rxr_cwm--;
3373 
3374 		rxr->rxr_adjusted = ticks;
3375 	}
3376 }
3377 
3378 u_int
3379 if_rxr_get(struct if_rxring *rxr, u_int max)
3380 {
3381 	extern int ticks;
3382 	u_int diff;
3383 
3384 	if (ticks - rxr->rxr_adjusted >= 1) {
3385 		/* we're free to try for an adjustment */
3386 		if_rxr_adjust_cwm(rxr);
3387 	}
3388 
3389 	if (rxr->rxr_alive >= rxr->rxr_cwm)
3390 		return (0);
3391 
3392 	diff = min(rxr->rxr_cwm - rxr->rxr_alive, max);
3393 	rxr->rxr_alive += diff;
3394 
3395 	return (diff);
3396 }
3397 
3398 int
3399 if_rxr_info_ioctl(struct if_rxrinfo *uifri, u_int t, struct if_rxring_info *e)
3400 {
3401 	struct if_rxrinfo kifri;
3402 	int error;
3403 	u_int n;
3404 
3405 	error = copyin(uifri, &kifri, sizeof(kifri));
3406 	if (error)
3407 		return (error);
3408 
3409 	n = min(t, kifri.ifri_total);
3410 	kifri.ifri_total = t;
3411 
3412 	if (n > 0) {
3413 		error = copyout(e, kifri.ifri_entries, sizeof(*e) * n);
3414 		if (error)
3415 			return (error);
3416 	}
3417 
3418 	return (copyout(&kifri, uifri, sizeof(kifri)));
3419 }
3420 
3421 int
3422 if_rxr_ioctl(struct if_rxrinfo *ifri, const char *name, u_int size,
3423     struct if_rxring *rxr)
3424 {
3425 	struct if_rxring_info ifr;
3426 
3427 	memset(&ifr, 0, sizeof(ifr));
3428 
3429 	if (name != NULL)
3430 		strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
3431 
3432 	ifr.ifr_size = size;
3433 	ifr.ifr_info = *rxr;
3434 
3435 	return (if_rxr_info_ioctl(ifri, 1, &ifr));
3436 }
3437 
3438 /*
3439  * Network stack input queues.
3440  */
3441 
3442 void
3443 niq_init(struct niqueue *niq, u_int maxlen, u_int isr)
3444 {
3445 	mq_init(&niq->ni_q, maxlen, IPL_NET);
3446 	niq->ni_isr = isr;
3447 }
3448 
3449 int
3450 niq_enqueue(struct niqueue *niq, struct mbuf *m)
3451 {
3452 	int rv;
3453 
3454 	rv = mq_enqueue(&niq->ni_q, m);
3455 	if (rv == 0)
3456 		schednetisr(niq->ni_isr);
3457 	else
3458 		if_congestion();
3459 
3460 	return (rv);
3461 }
3462 
3463 int
3464 niq_enlist(struct niqueue *niq, struct mbuf_list *ml)
3465 {
3466 	int rv;
3467 
3468 	rv = mq_enlist(&niq->ni_q, ml);
3469 	if (rv == 0)
3470 		schednetisr(niq->ni_isr);
3471 	else
3472 		if_congestion();
3473 
3474 	return (rv);
3475 }
3476 
3477 __dead void
3478 unhandled_af(int af)
3479 {
3480 	panic("unhandled af %d", af);
3481 }
3482 
3483 struct taskq *
3484 net_tq(unsigned int ifindex)
3485 {
3486 	struct softnet *sn;
3487 	static int nettaskqs;
3488 
3489 	if (nettaskqs == 0)
3490 		nettaskqs = min(NET_TASKQ, ncpus);
3491 
3492 	sn = &softnets[ifindex % nettaskqs];
3493 
3494 	return (sn->sn_taskq);
3495 }
3496 
3497 void
3498 net_tq_barriers(const char *wmesg)
3499 {
3500 	struct task barriers[NET_TASKQ];
3501 	struct refcnt r = REFCNT_INITIALIZER();
3502 	int i;
3503 
3504 	for (i = 0; i < nitems(barriers); i++) {
3505 		task_set(&barriers[i], (void (*)(void *))refcnt_rele_wake, &r);
3506 		refcnt_take(&r);
3507 		task_add(softnets[i].sn_taskq, &barriers[i]);
3508 	}
3509 
3510 	refcnt_finalize(&r, wmesg);
3511 }
3512