xref: /dflybsd-src/sys/net/if.c (revision 5e8a14a38da718561403517e00d2ab76880c681f)
1 /*
2  * Copyright (c) 1980, 1986, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)if.c	8.3 (Berkeley) 1/4/94
34  * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $
35  */
36 
37 #include "opt_compat.h"
38 #include "opt_inet6.h"
39 #include "opt_inet.h"
40 #include "opt_ifpoll.h"
41 
42 #include <sys/param.h>
43 #include <sys/malloc.h>
44 #include <sys/mbuf.h>
45 #include <sys/systm.h>
46 #include <sys/proc.h>
47 #include <sys/priv.h>
48 #include <sys/protosw.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/socketops.h>
52 #include <sys/protosw.h>
53 #include <sys/kernel.h>
54 #include <sys/ktr.h>
55 #include <sys/mutex.h>
56 #include <sys/sockio.h>
57 #include <sys/syslog.h>
58 #include <sys/sysctl.h>
59 #include <sys/domain.h>
60 #include <sys/thread.h>
61 #include <sys/serialize.h>
62 #include <sys/bus.h>
63 
64 #include <sys/thread2.h>
65 #include <sys/msgport2.h>
66 #include <sys/mutex2.h>
67 
68 #include <net/if.h>
69 #include <net/if_arp.h>
70 #include <net/if_dl.h>
71 #include <net/if_types.h>
72 #include <net/if_var.h>
73 #include <net/ifq_var.h>
74 #include <net/radix.h>
75 #include <net/route.h>
76 #include <net/if_clone.h>
77 #include <net/netisr.h>
78 #include <net/netmsg2.h>
79 
80 #include <machine/atomic.h>
81 #include <machine/stdarg.h>
82 #include <machine/smp.h>
83 
84 #if defined(INET) || defined(INET6)
85 /*XXX*/
86 #include <netinet/in.h>
87 #include <netinet/in_var.h>
88 #include <netinet/if_ether.h>
89 #ifdef INET6
90 #include <netinet6/in6_var.h>
91 #include <netinet6/in6_ifattach.h>
92 #endif
93 #endif
94 
95 #if defined(COMPAT_43)
96 #include <emulation/43bsd/43bsd_socket.h>
97 #endif /* COMPAT_43 */
98 
99 struct netmsg_ifaddr {
100 	struct netmsg_base base;
101 	struct ifaddr	*ifa;
102 	struct ifnet	*ifp;
103 	int		tail;
104 };
105 
106 /*
107  * System initialization
108  */
109 static void	if_attachdomain(void *);
110 static void	if_attachdomain1(struct ifnet *);
111 static int	ifconf(u_long, caddr_t, struct ucred *);
112 static void	ifinit(void *);
113 static void	ifnetinit(void *);
114 static void	if_slowtimo(void *);
115 static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
116 static int	if_rtdel(struct radix_node *, void *);
117 
118 #ifdef INET6
119 /*
120  * XXX: declare here to avoid to include many inet6 related files..
121  * should be more generalized?
122  */
123 extern void	nd6_setmtu(struct ifnet *);
124 #endif
125 
126 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
127 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
128 
129 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL)
130 /* Must be after netisr_init */
131 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_SECOND, ifnetinit, NULL)
132 
133 static  if_com_alloc_t *if_com_alloc[256];
134 static  if_com_free_t *if_com_free[256];
135 
136 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
137 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
138 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure");
139 
140 int			ifqmaxlen = IFQ_MAXLEN;
141 struct ifnethead	ifnet = TAILQ_HEAD_INITIALIZER(ifnet);
142 
143 struct callout		if_slowtimo_timer;
144 
145 int			if_index = 0;
146 struct ifnet		**ifindex2ifnet = NULL;
147 static struct thread	ifnet_threads[MAXCPU];
148 
149 #define IFQ_KTR_STRING		"ifq=%p"
150 #define IFQ_KTR_ARGS	struct ifaltq *ifq
151 #ifndef KTR_IFQ
152 #define KTR_IFQ			KTR_ALL
153 #endif
154 KTR_INFO_MASTER(ifq);
155 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS);
156 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS);
157 #define logifq(name, arg)	KTR_LOG(ifq_ ## name, arg)
158 
159 #define IF_START_KTR_STRING	"ifp=%p"
160 #define IF_START_KTR_ARGS	struct ifnet *ifp
161 #ifndef KTR_IF_START
162 #define KTR_IF_START		KTR_ALL
163 #endif
164 KTR_INFO_MASTER(if_start);
165 KTR_INFO(KTR_IF_START, if_start, run, 0,
166 	 IF_START_KTR_STRING, IF_START_KTR_ARGS);
167 KTR_INFO(KTR_IF_START, if_start, sched, 1,
168 	 IF_START_KTR_STRING, IF_START_KTR_ARGS);
169 KTR_INFO(KTR_IF_START, if_start, avoid, 2,
170 	 IF_START_KTR_STRING, IF_START_KTR_ARGS);
171 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3,
172 	 IF_START_KTR_STRING, IF_START_KTR_ARGS);
173 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4,
174 	 IF_START_KTR_STRING, IF_START_KTR_ARGS);
175 #define logifstart(name, arg)	KTR_LOG(if_start_ ## name, arg)
176 
177 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head);
178 
179 /*
180  * Network interface utility routines.
181  *
182  * Routines with ifa_ifwith* names take sockaddr *'s as
183  * parameters.
184  */
185 /* ARGSUSED*/
186 void
187 ifinit(void *dummy)
188 {
189 	struct ifnet *ifp;
190 
191 	callout_init(&if_slowtimo_timer);
192 
193 	crit_enter();
194 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
195 		if (ifp->if_snd.ifq_maxlen == 0) {
196 			if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
197 			ifp->if_snd.ifq_maxlen = ifqmaxlen;
198 		}
199 	}
200 	crit_exit();
201 
202 	if_slowtimo(0);
203 }
204 
205 static int
206 if_start_cpuid(struct ifnet *ifp)
207 {
208 	return ifp->if_cpuid;
209 }
210 
211 #ifdef IFPOLL_ENABLE
212 static int
213 if_start_cpuid_npoll(struct ifnet *ifp)
214 {
215 	int poll_cpuid = ifp->if_npoll_cpuid;
216 
217 	if (poll_cpuid >= 0)
218 		return poll_cpuid;
219 	else
220 		return ifp->if_cpuid;
221 }
222 #endif
223 
224 static void
225 if_start_ipifunc(void *arg)
226 {
227 	struct ifnet *ifp = arg;
228 	struct lwkt_msg *lmsg = &ifp->if_start_nmsg[mycpuid].lmsg;
229 
230 	crit_enter();
231 	if (lmsg->ms_flags & MSGF_DONE)
232 		lwkt_sendmsg(netisr_portfn(mycpuid), lmsg);
233 	crit_exit();
234 }
235 
236 /*
237  * Schedule ifnet.if_start on ifnet's CPU
238  */
239 static void
240 if_start_schedule(struct ifnet *ifp)
241 {
242 	int cpu;
243 
244 	cpu = ifp->if_start_cpuid(ifp);
245 	if (cpu != mycpuid)
246 		lwkt_send_ipiq(globaldata_find(cpu), if_start_ipifunc, ifp);
247 	else
248 	if_start_ipifunc(ifp);
249 }
250 
251 /*
252  * NOTE:
253  * This function will release ifnet.if_start interlock,
254  * if ifnet.if_start does not need to be scheduled
255  */
256 static __inline int
257 if_start_need_schedule(struct ifaltq *ifq, int running)
258 {
259 	if (!running || ifq_is_empty(ifq)
260 #ifdef ALTQ
261 	    || ifq->altq_tbr != NULL
262 #endif
263 	) {
264 		ALTQ_LOCK(ifq);
265 		/*
266 		 * ifnet.if_start interlock is released, if:
267 		 * 1) Hardware can not take any packets, due to
268 		 *    o  interface is marked down
269 		 *    o  hardware queue is full (IFF_OACTIVE)
270 		 *    Under the second situation, hardware interrupt
271 		 *    or polling(4) will call/schedule ifnet.if_start
272 		 *    when hardware queue is ready
273 		 * 2) There is not packet in the ifnet.if_snd.
274 		 *    Further ifq_dispatch or ifq_handoff will call/
275 		 *    schedule ifnet.if_start
276 		 * 3) TBR is used and it does not allow further
277 		 *    dequeueing.
278 		 *    TBR callout will call ifnet.if_start
279 		 */
280 		if (!running || !ifq_data_ready(ifq)) {
281 			ifq->altq_started = 0;
282 			ALTQ_UNLOCK(ifq);
283 			return 0;
284 		}
285 		ALTQ_UNLOCK(ifq);
286 	}
287 	return 1;
288 }
289 
290 static void
291 if_start_dispatch(netmsg_t msg)
292 {
293 	struct lwkt_msg *lmsg = &msg->base.lmsg;
294 	struct ifnet *ifp = lmsg->u.ms_resultp;
295 	struct ifaltq *ifq = &ifp->if_snd;
296 	int running = 0;
297 
298 	crit_enter();
299 	lwkt_replymsg(lmsg, 0);	/* reply ASAP */
300 	crit_exit();
301 
302 	if (mycpuid != ifp->if_start_cpuid(ifp)) {
303 		/*
304 		 * If the ifnet is still up, we need to
305 		 * chase its CPU change.
306 		 */
307 		if (ifp->if_flags & IFF_UP) {
308 			logifstart(chase_sched, ifp);
309 			if_start_schedule(ifp);
310 			return;
311 		} else {
312 			goto check;
313 		}
314 	}
315 
316 	if (ifp->if_flags & IFF_UP) {
317 		ifnet_serialize_tx(ifp); /* XXX try? */
318 		if ((ifp->if_flags & IFF_OACTIVE) == 0) {
319 			logifstart(run, ifp);
320 			ifp->if_start(ifp);
321 			if ((ifp->if_flags &
322 			(IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
323 				running = 1;
324 		}
325 		ifnet_deserialize_tx(ifp);
326 	}
327 check:
328 	if (if_start_need_schedule(ifq, running)) {
329 		crit_enter();
330 		if (lmsg->ms_flags & MSGF_DONE)	{ /* XXX necessary? */
331 			logifstart(sched, ifp);
332 			lwkt_sendmsg(netisr_portfn(mycpuid), lmsg);
333 		}
334 		crit_exit();
335 	}
336 }
337 
338 /* Device driver ifnet.if_start helper function */
339 void
340 if_devstart(struct ifnet *ifp)
341 {
342 	struct ifaltq *ifq = &ifp->if_snd;
343 	int running = 0;
344 
345 	ASSERT_IFNET_SERIALIZED_TX(ifp);
346 
347 	ALTQ_LOCK(ifq);
348 	if (ifq->altq_started || !ifq_data_ready(ifq)) {
349 		logifstart(avoid, ifp);
350 		ALTQ_UNLOCK(ifq);
351 		return;
352 	}
353 	ifq->altq_started = 1;
354 	ALTQ_UNLOCK(ifq);
355 
356 	logifstart(run, ifp);
357 	ifp->if_start(ifp);
358 
359 	if ((ifp->if_flags & (IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
360 		running = 1;
361 
362 	if (if_start_need_schedule(ifq, running)) {
363 		/*
364 		 * More data need to be transmitted, ifnet.if_start is
365 		 * scheduled on ifnet's CPU, and we keep going.
366 		 * NOTE: ifnet.if_start interlock is not released.
367 		 */
368 		logifstart(sched, ifp);
369 		if_start_schedule(ifp);
370 	}
371 }
372 
373 static void
374 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
375 {
376 	lwkt_serialize_enter(ifp->if_serializer);
377 }
378 
379 static void
380 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
381 {
382 	lwkt_serialize_exit(ifp->if_serializer);
383 }
384 
385 static int
386 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused)
387 {
388 	return lwkt_serialize_try(ifp->if_serializer);
389 }
390 
391 #ifdef INVARIANTS
392 static void
393 if_default_serialize_assert(struct ifnet *ifp,
394 			    enum ifnet_serialize slz __unused,
395 			    boolean_t serialized)
396 {
397 	if (serialized)
398 		ASSERT_SERIALIZED(ifp->if_serializer);
399 	else
400 		ASSERT_NOT_SERIALIZED(ifp->if_serializer);
401 }
402 #endif
403 
404 /*
405  * Attach an interface to the list of "active" interfaces.
406  *
407  * The serializer is optional.  If non-NULL access to the interface
408  * may be MPSAFE.
409  */
410 void
411 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer)
412 {
413 	unsigned socksize, ifasize;
414 	int namelen, masklen;
415 	struct sockaddr_dl *sdl;
416 	struct ifaddr *ifa;
417 	struct ifaltq *ifq;
418 	int i;
419 
420 	static int if_indexlim = 8;
421 
422 	if (ifp->if_serialize != NULL) {
423 		KASSERT(ifp->if_deserialize != NULL &&
424 			ifp->if_tryserialize != NULL &&
425 			ifp->if_serialize_assert != NULL,
426 			("serialize functions are partially setup"));
427 
428 		/*
429 		 * If the device supplies serialize functions,
430 		 * then clear if_serializer to catch any invalid
431 		 * usage of this field.
432 		 */
433 		KASSERT(serializer == NULL,
434 			("both serialize functions and default serializer "
435 			 "are supplied"));
436 		ifp->if_serializer = NULL;
437 	} else {
438 		KASSERT(ifp->if_deserialize == NULL &&
439 			ifp->if_tryserialize == NULL &&
440 			ifp->if_serialize_assert == NULL,
441 			("serialize functions are partially setup"));
442 		ifp->if_serialize = if_default_serialize;
443 		ifp->if_deserialize = if_default_deserialize;
444 		ifp->if_tryserialize = if_default_tryserialize;
445 #ifdef INVARIANTS
446 		ifp->if_serialize_assert = if_default_serialize_assert;
447 #endif
448 
449 		/*
450 		 * The serializer can be passed in from the device,
451 		 * allowing the same serializer to be used for both
452 		 * the interrupt interlock and the device queue.
453 		 * If not specified, the netif structure will use an
454 		 * embedded serializer.
455 		 */
456 		if (serializer == NULL) {
457 			serializer = &ifp->if_default_serializer;
458 			lwkt_serialize_init(serializer);
459 		}
460 		ifp->if_serializer = serializer;
461 	}
462 
463 	ifp->if_start_cpuid = if_start_cpuid;
464 	ifp->if_cpuid = 0;
465 
466 #ifdef IFPOLL_ENABLE
467 	/* Device is not in polling mode by default */
468 	ifp->if_npoll_cpuid = -1;
469 	if (ifp->if_npoll != NULL)
470 		ifp->if_start_cpuid = if_start_cpuid_npoll;
471 #endif
472 
473 	ifp->if_start_nmsg = kmalloc(ncpus * sizeof(*ifp->if_start_nmsg),
474 				     M_LWKTMSG, M_WAITOK);
475 	for (i = 0; i < ncpus; ++i) {
476 		netmsg_init(&ifp->if_start_nmsg[i], NULL, &netisr_adone_rport,
477 			    0, if_start_dispatch);
478 		ifp->if_start_nmsg[i].lmsg.u.ms_resultp = ifp;
479 	}
480 
481 	mtx_init(&ifp->if_ioctl_mtx);
482 	mtx_lock(&ifp->if_ioctl_mtx);
483 
484 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
485 	ifp->if_index = ++if_index;
486 
487 	/*
488 	 * XXX -
489 	 * The old code would work if the interface passed a pre-existing
490 	 * chain of ifaddrs to this code.  We don't trust our callers to
491 	 * properly initialize the tailq, however, so we no longer allow
492 	 * this unlikely case.
493 	 */
494 	ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead),
495 				    M_IFADDR, M_WAITOK | M_ZERO);
496 	for (i = 0; i < ncpus; ++i)
497 		TAILQ_INIT(&ifp->if_addrheads[i]);
498 
499 	TAILQ_INIT(&ifp->if_prefixhead);
500 	TAILQ_INIT(&ifp->if_multiaddrs);
501 	TAILQ_INIT(&ifp->if_groups);
502 	getmicrotime(&ifp->if_lastchange);
503 	if (ifindex2ifnet == NULL || if_index >= if_indexlim) {
504 		unsigned int n;
505 		struct ifnet **q;
506 
507 		if_indexlim <<= 1;
508 
509 		/* grow ifindex2ifnet */
510 		n = if_indexlim * sizeof(*q);
511 		q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO);
512 		if (ifindex2ifnet) {
513 			bcopy(ifindex2ifnet, q, n/2);
514 			kfree(ifindex2ifnet, M_IFADDR);
515 		}
516 		ifindex2ifnet = q;
517 	}
518 
519 	ifindex2ifnet[if_index] = ifp;
520 
521 	/*
522 	 * create a Link Level name for this device
523 	 */
524 	namelen = strlen(ifp->if_xname);
525 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen;
526 	socksize = masklen + ifp->if_addrlen;
527 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1)))
528 	if (socksize < sizeof(*sdl))
529 		socksize = sizeof(*sdl);
530 	socksize = ROUNDUP(socksize);
531 #undef ROUNDUP
532 	ifasize = sizeof(struct ifaddr) + 2 * socksize;
533 	ifa = ifa_create(ifasize, M_WAITOK);
534 	sdl = (struct sockaddr_dl *)(ifa + 1);
535 	sdl->sdl_len = socksize;
536 	sdl->sdl_family = AF_LINK;
537 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
538 	sdl->sdl_nlen = namelen;
539 	sdl->sdl_index = ifp->if_index;
540 	sdl->sdl_type = ifp->if_type;
541 	ifp->if_lladdr = ifa;
542 	ifa->ifa_ifp = ifp;
543 	ifa->ifa_rtrequest = link_rtrequest;
544 	ifa->ifa_addr = (struct sockaddr *)sdl;
545 	sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
546 	ifa->ifa_netmask = (struct sockaddr *)sdl;
547 	sdl->sdl_len = masklen;
548 	while (namelen != 0)
549 		sdl->sdl_data[--namelen] = 0xff;
550 	ifa_iflink(ifa, ifp, 0 /* Insert head */);
551 
552 	EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
553 	devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
554 
555 	ifq = &ifp->if_snd;
556 	ifq->altq_type = 0;
557 	ifq->altq_disc = NULL;
558 	ifq->altq_flags &= ALTQF_CANTCHANGE;
559 	ifq->altq_tbr = NULL;
560 	ifq->altq_ifp = ifp;
561 	ifq->altq_started = 0;
562 	ifq->altq_prepended = NULL;
563 	ALTQ_LOCK_INIT(ifq);
564 	ifq_set_classic(ifq);
565 
566 	if (!SLIST_EMPTY(&domains))
567 		if_attachdomain1(ifp);
568 
569 	/* Announce the interface. */
570 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
571 
572 	mtx_unlock(&ifp->if_ioctl_mtx);
573 }
574 
575 static void
576 if_attachdomain(void *dummy)
577 {
578 	struct ifnet *ifp;
579 
580 	crit_enter();
581 	TAILQ_FOREACH(ifp, &ifnet, if_list)
582 		if_attachdomain1(ifp);
583 	crit_exit();
584 }
585 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
586 	if_attachdomain, NULL);
587 
588 static void
589 if_attachdomain1(struct ifnet *ifp)
590 {
591 	struct domain *dp;
592 
593 	crit_enter();
594 
595 	/* address family dependent data region */
596 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
597 	SLIST_FOREACH(dp, &domains, dom_next)
598 		if (dp->dom_ifattach)
599 			ifp->if_afdata[dp->dom_family] =
600 				(*dp->dom_ifattach)(ifp);
601 	crit_exit();
602 }
603 
604 /*
605  * Purge all addresses whose type is _not_ AF_LINK
606  */
607 void
608 if_purgeaddrs_nolink(struct ifnet *ifp)
609 {
610 	struct ifaddr_container *ifac, *next;
611 
612 	TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid],
613 			      ifa_link, next) {
614 		struct ifaddr *ifa = ifac->ifa;
615 
616 		/* Leave link ifaddr as it is */
617 		if (ifa->ifa_addr->sa_family == AF_LINK)
618 			continue;
619 #ifdef INET
620 		/* XXX: Ugly!! ad hoc just for INET */
621 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
622 			struct ifaliasreq ifr;
623 #ifdef IFADDR_DEBUG_VERBOSE
624 			int i;
625 
626 			kprintf("purge in4 addr %p: ", ifa);
627 			for (i = 0; i < ncpus; ++i)
628 				kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
629 			kprintf("\n");
630 #endif
631 
632 			bzero(&ifr, sizeof ifr);
633 			ifr.ifra_addr = *ifa->ifa_addr;
634 			if (ifa->ifa_dstaddr)
635 				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
636 			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
637 				       NULL) == 0)
638 				continue;
639 		}
640 #endif /* INET */
641 #ifdef INET6
642 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
643 #ifdef IFADDR_DEBUG_VERBOSE
644 			int i;
645 
646 			kprintf("purge in6 addr %p: ", ifa);
647 			for (i = 0; i < ncpus; ++i)
648 				kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt);
649 			kprintf("\n");
650 #endif
651 
652 			in6_purgeaddr(ifa);
653 			/* ifp_addrhead is already updated */
654 			continue;
655 		}
656 #endif /* INET6 */
657 		ifa_ifunlink(ifa, ifp);
658 		ifa_destroy(ifa);
659 	}
660 }
661 
662 /*
663  * Detach an interface, removing it from the
664  * list of "active" interfaces.
665  */
666 void
667 if_detach(struct ifnet *ifp)
668 {
669 	struct radix_node_head	*rnh;
670 	int i;
671 	int cpu, origcpu;
672 	struct domain *dp;
673 
674 	EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
675 
676 	/*
677 	 * Remove routes and flush queues.
678 	 */
679 	crit_enter();
680 #ifdef IFPOLL_ENABLE
681 	if (ifp->if_flags & IFF_NPOLLING)
682 		ifpoll_deregister(ifp);
683 #endif
684 	if_down(ifp);
685 
686 #ifdef ALTQ
687 	if (ifq_is_enabled(&ifp->if_snd))
688 		altq_disable(&ifp->if_snd);
689 	if (ifq_is_attached(&ifp->if_snd))
690 		altq_detach(&ifp->if_snd);
691 #endif
692 
693 	/*
694 	 * Clean up all addresses.
695 	 */
696 	ifp->if_lladdr = NULL;
697 
698 	if_purgeaddrs_nolink(ifp);
699 	if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) {
700 		struct ifaddr *ifa;
701 
702 		ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
703 		KASSERT(ifa->ifa_addr->sa_family == AF_LINK,
704 			("non-link ifaddr is left on if_addrheads"));
705 
706 		ifa_ifunlink(ifa, ifp);
707 		ifa_destroy(ifa);
708 		KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]),
709 			("there are still ifaddrs left on if_addrheads"));
710 	}
711 
712 #ifdef INET
713 	/*
714 	 * Remove all IPv4 kernel structures related to ifp.
715 	 */
716 	in_ifdetach(ifp);
717 #endif
718 
719 #ifdef INET6
720 	/*
721 	 * Remove all IPv6 kernel structs related to ifp.  This should be done
722 	 * before removing routing entries below, since IPv6 interface direct
723 	 * routes are expected to be removed by the IPv6-specific kernel API.
724 	 * Otherwise, the kernel will detect some inconsistency and bark it.
725 	 */
726 	in6_ifdetach(ifp);
727 #endif
728 
729 	/*
730 	 * Delete all remaining routes using this interface
731 	 * Unfortuneatly the only way to do this is to slog through
732 	 * the entire routing table looking for routes which point
733 	 * to this interface...oh well...
734 	 */
735 	origcpu = mycpuid;
736 	for (cpu = 0; cpu < ncpus; cpu++) {
737 		lwkt_migratecpu(cpu);
738 		for (i = 1; i <= AF_MAX; i++) {
739 			if ((rnh = rt_tables[cpu][i]) == NULL)
740 				continue;
741 			rnh->rnh_walktree(rnh, if_rtdel, ifp);
742 		}
743 	}
744 	lwkt_migratecpu(origcpu);
745 
746 	/* Announce that the interface is gone. */
747 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
748 	devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
749 
750 	SLIST_FOREACH(dp, &domains, dom_next)
751 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
752 			(*dp->dom_ifdetach)(ifp,
753 				ifp->if_afdata[dp->dom_family]);
754 
755 	/*
756 	 * Remove interface from ifindex2ifp[] and maybe decrement if_index.
757 	 */
758 	ifindex2ifnet[ifp->if_index] = NULL;
759 	while (if_index > 0 && ifindex2ifnet[if_index] == NULL)
760 		if_index--;
761 
762 	TAILQ_REMOVE(&ifnet, ifp, if_link);
763 	kfree(ifp->if_addrheads, M_IFADDR);
764 	kfree(ifp->if_start_nmsg, M_LWKTMSG);
765 	crit_exit();
766 }
767 
768 /*
769  * Create interface group without members
770  */
771 struct ifg_group *
772 if_creategroup(const char *groupname)
773 {
774         struct ifg_group        *ifg = NULL;
775 
776         if ((ifg = (struct ifg_group *)kmalloc(sizeof(struct ifg_group),
777             M_TEMP, M_NOWAIT)) == NULL)
778                 return (NULL);
779 
780         strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
781         ifg->ifg_refcnt = 0;
782         ifg->ifg_carp_demoted = 0;
783         TAILQ_INIT(&ifg->ifg_members);
784 #if NPF > 0
785         pfi_attach_ifgroup(ifg);
786 #endif
787         TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next);
788 
789         return (ifg);
790 }
791 
792 /*
793  * Add a group to an interface
794  */
795 int
796 if_addgroup(struct ifnet *ifp, const char *groupname)
797 {
798 	struct ifg_list		*ifgl;
799 	struct ifg_group	*ifg = NULL;
800 	struct ifg_member	*ifgm;
801 
802 	if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
803 	    groupname[strlen(groupname) - 1] <= '9')
804 		return (EINVAL);
805 
806 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
807 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
808 			return (EEXIST);
809 
810 	if ((ifgl = kmalloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
811 		return (ENOMEM);
812 
813 	if ((ifgm = kmalloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
814 		kfree(ifgl, M_TEMP);
815 		return (ENOMEM);
816 	}
817 
818 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
819 		if (!strcmp(ifg->ifg_group, groupname))
820 			break;
821 
822 	if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
823 		kfree(ifgl, M_TEMP);
824 		kfree(ifgm, M_TEMP);
825 		return (ENOMEM);
826 	}
827 
828 	ifg->ifg_refcnt++;
829 	ifgl->ifgl_group = ifg;
830 	ifgm->ifgm_ifp = ifp;
831 
832 	TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
833 	TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
834 
835 #if NPF > 0
836 	pfi_group_change(groupname);
837 #endif
838 
839 	return (0);
840 }
841 
842 /*
843  * Remove a group from an interface
844  */
845 int
846 if_delgroup(struct ifnet *ifp, const char *groupname)
847 {
848 	struct ifg_list		*ifgl;
849 	struct ifg_member	*ifgm;
850 
851 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
852 		if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
853 			break;
854 	if (ifgl == NULL)
855 		return (ENOENT);
856 
857 	TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
858 
859 	TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
860 		if (ifgm->ifgm_ifp == ifp)
861 			break;
862 
863 	if (ifgm != NULL) {
864 		TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
865 		kfree(ifgm, M_TEMP);
866 	}
867 
868 	if (--ifgl->ifgl_group->ifg_refcnt == 0) {
869 		TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next);
870 #if NPF > 0
871 		pfi_detach_ifgroup(ifgl->ifgl_group);
872 #endif
873 		kfree(ifgl->ifgl_group, M_TEMP);
874 	}
875 
876 	kfree(ifgl, M_TEMP);
877 
878 #if NPF > 0
879 	pfi_group_change(groupname);
880 #endif
881 
882 	return (0);
883 }
884 
885 /*
886  * Stores all groups from an interface in memory pointed
887  * to by data
888  */
889 int
890 if_getgroup(caddr_t data, struct ifnet *ifp)
891 {
892 	int			 len, error;
893 	struct ifg_list		*ifgl;
894 	struct ifg_req		 ifgrq, *ifgp;
895 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
896 
897 	if (ifgr->ifgr_len == 0) {
898 		TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
899 			ifgr->ifgr_len += sizeof(struct ifg_req);
900 		return (0);
901 	}
902 
903 	len = ifgr->ifgr_len;
904 	ifgp = ifgr->ifgr_groups;
905 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
906 		if (len < sizeof(ifgrq))
907 			return (EINVAL);
908 		bzero(&ifgrq, sizeof ifgrq);
909 		strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
910 		    sizeof(ifgrq.ifgrq_group));
911 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
912 		    sizeof(struct ifg_req))))
913 			return (error);
914 		len -= sizeof(ifgrq);
915 		ifgp++;
916 	}
917 
918 	return (0);
919 }
920 
921 /*
922  * Stores all members of a group in memory pointed to by data
923  */
924 int
925 if_getgroupmembers(caddr_t data)
926 {
927 	struct ifgroupreq	*ifgr = (struct ifgroupreq *)data;
928 	struct ifg_group	*ifg;
929 	struct ifg_member	*ifgm;
930 	struct ifg_req		 ifgrq, *ifgp;
931 	int			 len, error;
932 
933 	TAILQ_FOREACH(ifg, &ifg_head, ifg_next)
934 		if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
935 			break;
936 	if (ifg == NULL)
937 		return (ENOENT);
938 
939 	if (ifgr->ifgr_len == 0) {
940 		TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
941 			ifgr->ifgr_len += sizeof(ifgrq);
942 		return (0);
943 	}
944 
945 	len = ifgr->ifgr_len;
946 	ifgp = ifgr->ifgr_groups;
947 	TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
948 		if (len < sizeof(ifgrq))
949 			return (EINVAL);
950 		bzero(&ifgrq, sizeof ifgrq);
951 		strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
952 		    sizeof(ifgrq.ifgrq_member));
953 		if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp,
954 		    sizeof(struct ifg_req))))
955 			return (error);
956 		len -= sizeof(ifgrq);
957 		ifgp++;
958 	}
959 
960 	return (0);
961 }
962 
963 /*
964  * Delete Routes for a Network Interface
965  *
966  * Called for each routing entry via the rnh->rnh_walktree() call above
967  * to delete all route entries referencing a detaching network interface.
968  *
969  * Arguments:
970  *	rn	pointer to node in the routing table
971  *	arg	argument passed to rnh->rnh_walktree() - detaching interface
972  *
973  * Returns:
974  *	0	successful
975  *	errno	failed - reason indicated
976  *
977  */
978 static int
979 if_rtdel(struct radix_node *rn, void *arg)
980 {
981 	struct rtentry	*rt = (struct rtentry *)rn;
982 	struct ifnet	*ifp = arg;
983 	int		err;
984 
985 	if (rt->rt_ifp == ifp) {
986 
987 		/*
988 		 * Protect (sorta) against walktree recursion problems
989 		 * with cloned routes
990 		 */
991 		if (!(rt->rt_flags & RTF_UP))
992 			return (0);
993 
994 		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
995 				rt_mask(rt), rt->rt_flags,
996 				NULL);
997 		if (err) {
998 			log(LOG_WARNING, "if_rtdel: error %d\n", err);
999 		}
1000 	}
1001 
1002 	return (0);
1003 }
1004 
1005 /*
1006  * Locate an interface based on a complete address.
1007  */
1008 struct ifaddr *
1009 ifa_ifwithaddr(struct sockaddr *addr)
1010 {
1011 	struct ifnet *ifp;
1012 
1013 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1014 		struct ifaddr_container *ifac;
1015 
1016 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1017 			struct ifaddr *ifa = ifac->ifa;
1018 
1019 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1020 				continue;
1021 			if (sa_equal(addr, ifa->ifa_addr))
1022 				return (ifa);
1023 			if ((ifp->if_flags & IFF_BROADCAST) &&
1024 			    ifa->ifa_broadaddr &&
1025 			    /* IPv6 doesn't have broadcast */
1026 			    ifa->ifa_broadaddr->sa_len != 0 &&
1027 			    sa_equal(ifa->ifa_broadaddr, addr))
1028 				return (ifa);
1029 		}
1030 	}
1031 	return (NULL);
1032 }
1033 /*
1034  * Locate the point to point interface with a given destination address.
1035  */
1036 struct ifaddr *
1037 ifa_ifwithdstaddr(struct sockaddr *addr)
1038 {
1039 	struct ifnet *ifp;
1040 
1041 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1042 		struct ifaddr_container *ifac;
1043 
1044 		if (!(ifp->if_flags & IFF_POINTOPOINT))
1045 			continue;
1046 
1047 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1048 			struct ifaddr *ifa = ifac->ifa;
1049 
1050 			if (ifa->ifa_addr->sa_family != addr->sa_family)
1051 				continue;
1052 			if (ifa->ifa_dstaddr &&
1053 			    sa_equal(addr, ifa->ifa_dstaddr))
1054 				return (ifa);
1055 		}
1056 	}
1057 	return (NULL);
1058 }
1059 
1060 /*
1061  * Find an interface on a specific network.  If many, choice
1062  * is most specific found.
1063  */
1064 struct ifaddr *
1065 ifa_ifwithnet(struct sockaddr *addr)
1066 {
1067 	struct ifnet *ifp;
1068 	struct ifaddr *ifa_maybe = NULL;
1069 	u_int af = addr->sa_family;
1070 	char *addr_data = addr->sa_data, *cplim;
1071 
1072 	/*
1073 	 * AF_LINK addresses can be looked up directly by their index number,
1074 	 * so do that if we can.
1075 	 */
1076 	if (af == AF_LINK) {
1077 		struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
1078 
1079 		if (sdl->sdl_index && sdl->sdl_index <= if_index)
1080 			return (ifindex2ifnet[sdl->sdl_index]->if_lladdr);
1081 	}
1082 
1083 	/*
1084 	 * Scan though each interface, looking for ones that have
1085 	 * addresses in this address family.
1086 	 */
1087 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1088 		struct ifaddr_container *ifac;
1089 
1090 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1091 			struct ifaddr *ifa = ifac->ifa;
1092 			char *cp, *cp2, *cp3;
1093 
1094 			if (ifa->ifa_addr->sa_family != af)
1095 next:				continue;
1096 			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
1097 				/*
1098 				 * This is a bit broken as it doesn't
1099 				 * take into account that the remote end may
1100 				 * be a single node in the network we are
1101 				 * looking for.
1102 				 * The trouble is that we don't know the
1103 				 * netmask for the remote end.
1104 				 */
1105 				if (ifa->ifa_dstaddr != NULL &&
1106 				    sa_equal(addr, ifa->ifa_dstaddr))
1107 					return (ifa);
1108 			} else {
1109 				/*
1110 				 * if we have a special address handler,
1111 				 * then use it instead of the generic one.
1112 				 */
1113 				if (ifa->ifa_claim_addr) {
1114 					if ((*ifa->ifa_claim_addr)(ifa, addr)) {
1115 						return (ifa);
1116 					} else {
1117 						continue;
1118 					}
1119 				}
1120 
1121 				/*
1122 				 * Scan all the bits in the ifa's address.
1123 				 * If a bit dissagrees with what we are
1124 				 * looking for, mask it with the netmask
1125 				 * to see if it really matters.
1126 				 * (A byte at a time)
1127 				 */
1128 				if (ifa->ifa_netmask == 0)
1129 					continue;
1130 				cp = addr_data;
1131 				cp2 = ifa->ifa_addr->sa_data;
1132 				cp3 = ifa->ifa_netmask->sa_data;
1133 				cplim = ifa->ifa_netmask->sa_len +
1134 					(char *)ifa->ifa_netmask;
1135 				while (cp3 < cplim)
1136 					if ((*cp++ ^ *cp2++) & *cp3++)
1137 						goto next; /* next address! */
1138 				/*
1139 				 * If the netmask of what we just found
1140 				 * is more specific than what we had before
1141 				 * (if we had one) then remember the new one
1142 				 * before continuing to search
1143 				 * for an even better one.
1144 				 */
1145 				if (ifa_maybe == NULL ||
1146 				    rn_refines((char *)ifa->ifa_netmask,
1147 					       (char *)ifa_maybe->ifa_netmask))
1148 					ifa_maybe = ifa;
1149 			}
1150 		}
1151 	}
1152 	return (ifa_maybe);
1153 }
1154 
1155 /*
1156  * Find an interface address specific to an interface best matching
1157  * a given address.
1158  */
1159 struct ifaddr *
1160 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
1161 {
1162 	struct ifaddr_container *ifac;
1163 	char *cp, *cp2, *cp3;
1164 	char *cplim;
1165 	struct ifaddr *ifa_maybe = NULL;
1166 	u_int af = addr->sa_family;
1167 
1168 	if (af >= AF_MAX)
1169 		return (0);
1170 	TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1171 		struct ifaddr *ifa = ifac->ifa;
1172 
1173 		if (ifa->ifa_addr->sa_family != af)
1174 			continue;
1175 		if (ifa_maybe == NULL)
1176 			ifa_maybe = ifa;
1177 		if (ifa->ifa_netmask == NULL) {
1178 			if (sa_equal(addr, ifa->ifa_addr) ||
1179 			    (ifa->ifa_dstaddr != NULL &&
1180 			     sa_equal(addr, ifa->ifa_dstaddr)))
1181 				return (ifa);
1182 			continue;
1183 		}
1184 		if (ifp->if_flags & IFF_POINTOPOINT) {
1185 			if (sa_equal(addr, ifa->ifa_dstaddr))
1186 				return (ifa);
1187 		} else {
1188 			cp = addr->sa_data;
1189 			cp2 = ifa->ifa_addr->sa_data;
1190 			cp3 = ifa->ifa_netmask->sa_data;
1191 			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
1192 			for (; cp3 < cplim; cp3++)
1193 				if ((*cp++ ^ *cp2++) & *cp3)
1194 					break;
1195 			if (cp3 == cplim)
1196 				return (ifa);
1197 		}
1198 	}
1199 	return (ifa_maybe);
1200 }
1201 
1202 /*
1203  * Default action when installing a route with a Link Level gateway.
1204  * Lookup an appropriate real ifa to point to.
1205  * This should be moved to /sys/net/link.c eventually.
1206  */
1207 static void
1208 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
1209 {
1210 	struct ifaddr *ifa;
1211 	struct sockaddr *dst;
1212 	struct ifnet *ifp;
1213 
1214 	if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL ||
1215 	    (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL)
1216 		return;
1217 	ifa = ifaof_ifpforaddr(dst, ifp);
1218 	if (ifa != NULL) {
1219 		IFAFREE(rt->rt_ifa);
1220 		IFAREF(ifa);
1221 		rt->rt_ifa = ifa;
1222 		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
1223 			ifa->ifa_rtrequest(cmd, rt, info);
1224 	}
1225 }
1226 
1227 /*
1228  * Mark an interface down and notify protocols of
1229  * the transition.
1230  * NOTE: must be called at splnet or eqivalent.
1231  */
1232 void
1233 if_unroute(struct ifnet *ifp, int flag, int fam)
1234 {
1235 	struct ifaddr_container *ifac;
1236 
1237 	ifp->if_flags &= ~flag;
1238 	getmicrotime(&ifp->if_lastchange);
1239 	TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1240 		struct ifaddr *ifa = ifac->ifa;
1241 
1242 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1243 			kpfctlinput(PRC_IFDOWN, ifa->ifa_addr);
1244 	}
1245 	ifq_purge(&ifp->if_snd);
1246 	rt_ifmsg(ifp);
1247 }
1248 
1249 /*
1250  * Mark an interface up and notify protocols of
1251  * the transition.
1252  * NOTE: must be called at splnet or eqivalent.
1253  */
1254 void
1255 if_route(struct ifnet *ifp, int flag, int fam)
1256 {
1257 	struct ifaddr_container *ifac;
1258 
1259 	ifq_purge(&ifp->if_snd);
1260 	ifp->if_flags |= flag;
1261 	getmicrotime(&ifp->if_lastchange);
1262 	TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1263 		struct ifaddr *ifa = ifac->ifa;
1264 
1265 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
1266 			kpfctlinput(PRC_IFUP, ifa->ifa_addr);
1267 	}
1268 	rt_ifmsg(ifp);
1269 #ifdef INET6
1270 	in6_if_up(ifp);
1271 #endif
1272 }
1273 
1274 /*
1275  * Mark an interface down and notify protocols of the transition.  An
1276  * interface going down is also considered to be a synchronizing event.
1277  * We must ensure that all packet processing related to the interface
1278  * has completed before we return so e.g. the caller can free the ifnet
1279  * structure that the mbufs may be referencing.
1280  *
1281  * NOTE: must be called at splnet or eqivalent.
1282  */
1283 void
1284 if_down(struct ifnet *ifp)
1285 {
1286 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
1287 	netmsg_service_sync();
1288 }
1289 
1290 /*
1291  * Mark an interface up and notify protocols of
1292  * the transition.
1293  * NOTE: must be called at splnet or eqivalent.
1294  */
1295 void
1296 if_up(struct ifnet *ifp)
1297 {
1298 	if_route(ifp, IFF_UP, AF_UNSPEC);
1299 }
1300 
1301 /*
1302  * Process a link state change.
1303  * NOTE: must be called at splsoftnet or equivalent.
1304  */
1305 void
1306 if_link_state_change(struct ifnet *ifp)
1307 {
1308 	int link_state = ifp->if_link_state;
1309 
1310 	rt_ifmsg(ifp);
1311 	devctl_notify("IFNET", ifp->if_xname,
1312 	    (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
1313 }
1314 
1315 /*
1316  * Handle interface watchdog timer routines.  Called
1317  * from softclock, we decrement timers (if set) and
1318  * call the appropriate interface routine on expiration.
1319  */
1320 static void
1321 if_slowtimo(void *arg)
1322 {
1323 	struct ifnet *ifp;
1324 
1325 	crit_enter();
1326 
1327 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1328 		if (ifp->if_timer == 0 || --ifp->if_timer)
1329 			continue;
1330 		if (ifp->if_watchdog) {
1331 			if (ifnet_tryserialize_all(ifp)) {
1332 				(*ifp->if_watchdog)(ifp);
1333 				ifnet_deserialize_all(ifp);
1334 			} else {
1335 				/* try again next timeout */
1336 				++ifp->if_timer;
1337 			}
1338 		}
1339 	}
1340 
1341 	crit_exit();
1342 
1343 	callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL);
1344 }
1345 
1346 /*
1347  * Map interface name to
1348  * interface structure pointer.
1349  */
1350 struct ifnet *
1351 ifunit(const char *name)
1352 {
1353 	struct ifnet *ifp;
1354 
1355 	/*
1356 	 * Search all the interfaces for this name/number
1357 	 */
1358 
1359 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1360 		if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0)
1361 			break;
1362 	}
1363 	return (ifp);
1364 }
1365 
1366 
1367 /*
1368  * Map interface name in a sockaddr_dl to
1369  * interface structure pointer.
1370  */
1371 struct ifnet *
1372 if_withname(struct sockaddr *sa)
1373 {
1374 	char ifname[IFNAMSIZ+1];
1375 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa;
1376 
1377 	if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) ||
1378 	     (sdl->sdl_nlen > IFNAMSIZ) )
1379 		return NULL;
1380 
1381 	/*
1382 	 * ifunit wants a null-terminated name.  It may not be null-terminated
1383 	 * in the sockaddr.  We don't want to change the caller's sockaddr,
1384 	 * and there might not be room to put the trailing null anyway, so we
1385 	 * make a local copy that we know we can null terminate safely.
1386 	 */
1387 
1388 	bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen);
1389 	ifname[sdl->sdl_nlen] = '\0';
1390 	return ifunit(ifname);
1391 }
1392 
1393 
1394 /*
1395  * Interface ioctls.
1396  */
1397 int
1398 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred)
1399 {
1400 	struct ifnet *ifp;
1401 	struct ifreq *ifr;
1402 	struct ifstat *ifs;
1403 	int error;
1404 	short oif_flags;
1405 	int new_flags;
1406 #ifdef COMPAT_43
1407 	int ocmd;
1408 #endif
1409 	size_t namelen, onamelen;
1410 	char new_name[IFNAMSIZ];
1411 	struct ifaddr *ifa;
1412 	struct sockaddr_dl *sdl;
1413 
1414 	switch (cmd) {
1415 	case SIOCGIFCONF:
1416 	case OSIOCGIFCONF:
1417 		return (ifconf(cmd, data, cred));
1418 	default:
1419 		break;
1420 	}
1421 
1422 	ifr = (struct ifreq *)data;
1423 
1424 	switch (cmd) {
1425 	case SIOCIFCREATE:
1426 	case SIOCIFCREATE2:
1427 		if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
1428 			return (error);
1429 		return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
1430 		    	cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
1431 	case SIOCIFDESTROY:
1432 		if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0)
1433 			return (error);
1434 		return (if_clone_destroy(ifr->ifr_name));
1435 	case SIOCIFGCLONERS:
1436 		return (if_clone_list((struct if_clonereq *)data));
1437 	default:
1438 		break;
1439 	}
1440 
1441 	/*
1442 	 * Nominal ioctl through interface, lookup the ifp and obtain a
1443 	 * lock to serialize the ifconfig ioctl operation.
1444 	 */
1445 	ifp = ifunit(ifr->ifr_name);
1446 	if (ifp == NULL)
1447 		return (ENXIO);
1448 	error = 0;
1449 	mtx_lock(&ifp->if_ioctl_mtx);
1450 
1451 	switch (cmd) {
1452 	case SIOCGIFINDEX:
1453 		ifr->ifr_index = ifp->if_index;
1454 		break;
1455 
1456 	case SIOCGIFFLAGS:
1457 		ifr->ifr_flags = ifp->if_flags;
1458 		ifr->ifr_flagshigh = ifp->if_flags >> 16;
1459 		break;
1460 
1461 	case SIOCGIFCAP:
1462 		ifr->ifr_reqcap = ifp->if_capabilities;
1463 		ifr->ifr_curcap = ifp->if_capenable;
1464 		break;
1465 
1466 	case SIOCGIFMETRIC:
1467 		ifr->ifr_metric = ifp->if_metric;
1468 		break;
1469 
1470 	case SIOCGIFMTU:
1471 		ifr->ifr_mtu = ifp->if_mtu;
1472 		break;
1473 
1474 	case SIOCGIFDATA:
1475 		error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data,
1476 				sizeof(ifp->if_data));
1477 		break;
1478 
1479 	case SIOCGIFPHYS:
1480 		ifr->ifr_phys = ifp->if_physical;
1481 		break;
1482 
1483 	case SIOCGIFPOLLCPU:
1484 		ifr->ifr_pollcpu = -1;
1485 		break;
1486 
1487 	case SIOCSIFPOLLCPU:
1488 		break;
1489 
1490 	case SIOCSIFFLAGS:
1491 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1492 		if (error)
1493 			break;
1494 		new_flags = (ifr->ifr_flags & 0xffff) |
1495 		    (ifr->ifr_flagshigh << 16);
1496 		if (ifp->if_flags & IFF_SMART) {
1497 			/* Smart drivers twiddle their own routes */
1498 		} else if (ifp->if_flags & IFF_UP &&
1499 		    (new_flags & IFF_UP) == 0) {
1500 			crit_enter();
1501 			if_down(ifp);
1502 			crit_exit();
1503 		} else if (new_flags & IFF_UP &&
1504 		    (ifp->if_flags & IFF_UP) == 0) {
1505 			crit_enter();
1506 			if_up(ifp);
1507 			crit_exit();
1508 		}
1509 
1510 #ifdef IFPOLL_ENABLE
1511 		if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) {
1512 			if (new_flags & IFF_NPOLLING)
1513 				ifpoll_register(ifp);
1514 			else
1515 				ifpoll_deregister(ifp);
1516 		}
1517 #endif
1518 
1519 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1520 			(new_flags &~ IFF_CANTCHANGE);
1521 		if (new_flags & IFF_PPROMISC) {
1522 			/* Permanently promiscuous mode requested */
1523 			ifp->if_flags |= IFF_PROMISC;
1524 		} else if (ifp->if_pcount == 0) {
1525 			ifp->if_flags &= ~IFF_PROMISC;
1526 		}
1527 		if (ifp->if_ioctl) {
1528 			ifnet_serialize_all(ifp);
1529 			ifp->if_ioctl(ifp, cmd, data, cred);
1530 			ifnet_deserialize_all(ifp);
1531 		}
1532 		getmicrotime(&ifp->if_lastchange);
1533 		break;
1534 
1535 	case SIOCSIFCAP:
1536 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1537 		if (error)
1538 			break;
1539 		if (ifr->ifr_reqcap & ~ifp->if_capabilities) {
1540 			error = EINVAL;
1541 			break;
1542 		}
1543 		ifnet_serialize_all(ifp);
1544 		ifp->if_ioctl(ifp, cmd, data, cred);
1545 		ifnet_deserialize_all(ifp);
1546 		break;
1547 
1548 	case SIOCSIFNAME:
1549 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1550 		if (error)
1551 			break;
1552 		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
1553 		if (error)
1554 			break;
1555 		if (new_name[0] == '\0') {
1556 			error = EINVAL;
1557 			break;
1558 		}
1559 		if (ifunit(new_name) != NULL) {
1560 			error = EEXIST;
1561 			break;
1562 		}
1563 
1564 		EVENTHANDLER_INVOKE(ifnet_detach_event, ifp);
1565 
1566 		/* Announce the departure of the interface. */
1567 		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1568 
1569 		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1570 		ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
1571 		/* XXX IFA_LOCK(ifa); */
1572 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1573 		namelen = strlen(new_name);
1574 		onamelen = sdl->sdl_nlen;
1575 		/*
1576 		 * Move the address if needed.  This is safe because we
1577 		 * allocate space for a name of length IFNAMSIZ when we
1578 		 * create this in if_attach().
1579 		 */
1580 		if (namelen != onamelen) {
1581 			bcopy(sdl->sdl_data + onamelen,
1582 			    sdl->sdl_data + namelen, sdl->sdl_alen);
1583 		}
1584 		bcopy(new_name, sdl->sdl_data, namelen);
1585 		sdl->sdl_nlen = namelen;
1586 		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1587 		bzero(sdl->sdl_data, onamelen);
1588 		while (namelen != 0)
1589 			sdl->sdl_data[--namelen] = 0xff;
1590 		/* XXX IFA_UNLOCK(ifa) */
1591 
1592 		EVENTHANDLER_INVOKE(ifnet_attach_event, ifp);
1593 
1594 		/* Announce the return of the interface. */
1595 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1596 		break;
1597 
1598 	case SIOCSIFMETRIC:
1599 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1600 		if (error)
1601 			break;
1602 		ifp->if_metric = ifr->ifr_metric;
1603 		getmicrotime(&ifp->if_lastchange);
1604 		break;
1605 
1606 	case SIOCSIFPHYS:
1607 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1608 		if (error)
1609 			break;
1610 		if (ifp->if_ioctl == NULL) {
1611 		        error = EOPNOTSUPP;
1612 			break;
1613 		}
1614 		ifnet_serialize_all(ifp);
1615 		error = ifp->if_ioctl(ifp, cmd, data, cred);
1616 		ifnet_deserialize_all(ifp);
1617 		if (error == 0)
1618 			getmicrotime(&ifp->if_lastchange);
1619 		break;
1620 
1621 	case SIOCSIFMTU:
1622 	{
1623 		u_long oldmtu = ifp->if_mtu;
1624 
1625 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1626 		if (error)
1627 			break;
1628 		if (ifp->if_ioctl == NULL) {
1629 			error = EOPNOTSUPP;
1630 			break;
1631 		}
1632 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) {
1633 			error = EINVAL;
1634 			break;
1635 		}
1636 		ifnet_serialize_all(ifp);
1637 		error = ifp->if_ioctl(ifp, cmd, data, cred);
1638 		ifnet_deserialize_all(ifp);
1639 		if (error == 0) {
1640 			getmicrotime(&ifp->if_lastchange);
1641 			rt_ifmsg(ifp);
1642 		}
1643 		/*
1644 		 * If the link MTU changed, do network layer specific procedure.
1645 		 */
1646 		if (ifp->if_mtu != oldmtu) {
1647 #ifdef INET6
1648 			nd6_setmtu(ifp);
1649 #endif
1650 		}
1651 		break;
1652 	}
1653 
1654 	case SIOCADDMULTI:
1655 	case SIOCDELMULTI:
1656 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1657 		if (error)
1658 			break;
1659 
1660 		/* Don't allow group membership on non-multicast interfaces. */
1661 		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1662 			error = EOPNOTSUPP;
1663 			break;
1664 		}
1665 
1666 		/* Don't let users screw up protocols' entries. */
1667 		if (ifr->ifr_addr.sa_family != AF_LINK) {
1668 			error = EINVAL;
1669 			break;
1670 		}
1671 
1672 		if (cmd == SIOCADDMULTI) {
1673 			struct ifmultiaddr *ifma;
1674 			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1675 		} else {
1676 			error = if_delmulti(ifp, &ifr->ifr_addr);
1677 		}
1678 		if (error == 0)
1679 			getmicrotime(&ifp->if_lastchange);
1680 		break;
1681 
1682 	case SIOCSIFPHYADDR:
1683 	case SIOCDIFPHYADDR:
1684 #ifdef INET6
1685 	case SIOCSIFPHYADDR_IN6:
1686 #endif
1687 	case SIOCSLIFPHYADDR:
1688         case SIOCSIFMEDIA:
1689 	case SIOCSIFGENERIC:
1690 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1691 		if (error)
1692 			break;
1693 		if (ifp->if_ioctl == 0) {
1694 			error = EOPNOTSUPP;
1695 			break;
1696 		}
1697 		ifnet_serialize_all(ifp);
1698 		error = ifp->if_ioctl(ifp, cmd, data, cred);
1699 		ifnet_deserialize_all(ifp);
1700 		if (error == 0)
1701 			getmicrotime(&ifp->if_lastchange);
1702 		break;
1703 
1704 	case SIOCGIFSTATUS:
1705 		ifs = (struct ifstat *)data;
1706 		ifs->ascii[0] = '\0';
1707 		/* fall through */
1708 	case SIOCGIFPSRCADDR:
1709 	case SIOCGIFPDSTADDR:
1710 	case SIOCGLIFPHYADDR:
1711 	case SIOCGIFMEDIA:
1712 	case SIOCGIFGENERIC:
1713 		if (ifp->if_ioctl == NULL) {
1714 			error = EOPNOTSUPP;
1715 			break;
1716 		}
1717 		ifnet_serialize_all(ifp);
1718 		error = ifp->if_ioctl(ifp, cmd, data, cred);
1719 		ifnet_deserialize_all(ifp);
1720 		break;
1721 
1722 	case SIOCSIFLLADDR:
1723 		error = priv_check_cred(cred, PRIV_ROOT, 0);
1724 		if (error)
1725 			break;
1726 		error = if_setlladdr(ifp, ifr->ifr_addr.sa_data,
1727 				     ifr->ifr_addr.sa_len);
1728 		EVENTHANDLER_INVOKE(iflladdr_event, ifp);
1729 		break;
1730 
1731 	default:
1732 		oif_flags = ifp->if_flags;
1733 		if (so->so_proto == 0) {
1734 			error = EOPNOTSUPP;
1735 			break;
1736 		}
1737 #ifndef COMPAT_43
1738 		error = so_pru_control_direct(so, cmd, data, ifp);
1739 #else
1740 		ocmd = cmd;
1741 
1742 		switch (cmd) {
1743 		case SIOCSIFDSTADDR:
1744 		case SIOCSIFADDR:
1745 		case SIOCSIFBRDADDR:
1746 		case SIOCSIFNETMASK:
1747 #if BYTE_ORDER != BIG_ENDIAN
1748 			if (ifr->ifr_addr.sa_family == 0 &&
1749 			    ifr->ifr_addr.sa_len < 16) {
1750 				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1751 				ifr->ifr_addr.sa_len = 16;
1752 			}
1753 #else
1754 			if (ifr->ifr_addr.sa_len == 0)
1755 				ifr->ifr_addr.sa_len = 16;
1756 #endif
1757 			break;
1758 		case OSIOCGIFADDR:
1759 			cmd = SIOCGIFADDR;
1760 			break;
1761 		case OSIOCGIFDSTADDR:
1762 			cmd = SIOCGIFDSTADDR;
1763 			break;
1764 		case OSIOCGIFBRDADDR:
1765 			cmd = SIOCGIFBRDADDR;
1766 			break;
1767 		case OSIOCGIFNETMASK:
1768 			cmd = SIOCGIFNETMASK;
1769 			break;
1770 		default:
1771 			break;
1772 		}
1773 
1774 		error = so_pru_control_direct(so, cmd, data, ifp);
1775 
1776 		switch (ocmd) {
1777 		case OSIOCGIFADDR:
1778 		case OSIOCGIFDSTADDR:
1779 		case OSIOCGIFBRDADDR:
1780 		case OSIOCGIFNETMASK:
1781 			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1782 			break;
1783 		}
1784 #endif /* COMPAT_43 */
1785 
1786 		if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1787 #ifdef INET6
1788 			DELAY(100);/* XXX: temporary workaround for fxp issue*/
1789 			if (ifp->if_flags & IFF_UP) {
1790 				crit_enter();
1791 				in6_if_up(ifp);
1792 				crit_exit();
1793 			}
1794 #endif
1795 		}
1796 		break;
1797 	}
1798 
1799 	mtx_unlock(&ifp->if_ioctl_mtx);
1800 	return (error);
1801 }
1802 
1803 /*
1804  * Set/clear promiscuous mode on interface ifp based on the truth value
1805  * of pswitch.  The calls are reference counted so that only the first
1806  * "on" request actually has an effect, as does the final "off" request.
1807  * Results are undefined if the "off" and "on" requests are not matched.
1808  */
1809 int
1810 ifpromisc(struct ifnet *ifp, int pswitch)
1811 {
1812 	struct ifreq ifr;
1813 	int error;
1814 	int oldflags;
1815 
1816 	oldflags = ifp->if_flags;
1817 	if (ifp->if_flags & IFF_PPROMISC) {
1818 		/* Do nothing if device is in permanently promiscuous mode */
1819 		ifp->if_pcount += pswitch ? 1 : -1;
1820 		return (0);
1821 	}
1822 	if (pswitch) {
1823 		/*
1824 		 * If the device is not configured up, we cannot put it in
1825 		 * promiscuous mode.
1826 		 */
1827 		if ((ifp->if_flags & IFF_UP) == 0)
1828 			return (ENETDOWN);
1829 		if (ifp->if_pcount++ != 0)
1830 			return (0);
1831 		ifp->if_flags |= IFF_PROMISC;
1832 		log(LOG_INFO, "%s: promiscuous mode enabled\n",
1833 		    ifp->if_xname);
1834 	} else {
1835 		if (--ifp->if_pcount > 0)
1836 			return (0);
1837 		ifp->if_flags &= ~IFF_PROMISC;
1838 		log(LOG_INFO, "%s: promiscuous mode disabled\n",
1839 		    ifp->if_xname);
1840 	}
1841 	ifr.ifr_flags = ifp->if_flags;
1842 	ifr.ifr_flagshigh = ifp->if_flags >> 16;
1843 	ifnet_serialize_all(ifp);
1844 	error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL);
1845 	ifnet_deserialize_all(ifp);
1846 	if (error == 0)
1847 		rt_ifmsg(ifp);
1848 	else
1849 		ifp->if_flags = oldflags;
1850 	return error;
1851 }
1852 
1853 /*
1854  * Return interface configuration
1855  * of system.  List may be used
1856  * in later ioctl's (above) to get
1857  * other information.
1858  */
1859 static int
1860 ifconf(u_long cmd, caddr_t data, struct ucred *cred)
1861 {
1862 	struct ifconf *ifc = (struct ifconf *)data;
1863 	struct ifnet *ifp;
1864 	struct sockaddr *sa;
1865 	struct ifreq ifr, *ifrp;
1866 	int space = ifc->ifc_len, error = 0;
1867 
1868 	ifrp = ifc->ifc_req;
1869 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1870 		struct ifaddr_container *ifac;
1871 		int addrs;
1872 
1873 		if (space <= sizeof ifr)
1874 			break;
1875 
1876 		/*
1877 		 * Zero the stack declared structure first to prevent
1878 		 * memory disclosure.
1879 		 */
1880 		bzero(&ifr, sizeof(ifr));
1881 		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
1882 		    >= sizeof(ifr.ifr_name)) {
1883 			error = ENAMETOOLONG;
1884 			break;
1885 		}
1886 
1887 		addrs = 0;
1888 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
1889 			struct ifaddr *ifa = ifac->ifa;
1890 
1891 			if (space <= sizeof ifr)
1892 				break;
1893 			sa = ifa->ifa_addr;
1894 			if (cred->cr_prison &&
1895 			    prison_if(cred, sa))
1896 				continue;
1897 			addrs++;
1898 #ifdef COMPAT_43
1899 			if (cmd == OSIOCGIFCONF) {
1900 				struct osockaddr *osa =
1901 					 (struct osockaddr *)&ifr.ifr_addr;
1902 				ifr.ifr_addr = *sa;
1903 				osa->sa_family = sa->sa_family;
1904 				error = copyout(&ifr, ifrp, sizeof ifr);
1905 				ifrp++;
1906 			} else
1907 #endif
1908 			if (sa->sa_len <= sizeof(*sa)) {
1909 				ifr.ifr_addr = *sa;
1910 				error = copyout(&ifr, ifrp, sizeof ifr);
1911 				ifrp++;
1912 			} else {
1913 				if (space < (sizeof ifr) + sa->sa_len -
1914 					    sizeof(*sa))
1915 					break;
1916 				space -= sa->sa_len - sizeof(*sa);
1917 				error = copyout(&ifr, ifrp,
1918 						sizeof ifr.ifr_name);
1919 				if (error == 0)
1920 					error = copyout(sa, &ifrp->ifr_addr,
1921 							sa->sa_len);
1922 				ifrp = (struct ifreq *)
1923 					(sa->sa_len + (caddr_t)&ifrp->ifr_addr);
1924 			}
1925 			if (error)
1926 				break;
1927 			space -= sizeof ifr;
1928 		}
1929 		if (error)
1930 			break;
1931 		if (!addrs) {
1932 			bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr);
1933 			error = copyout(&ifr, ifrp, sizeof ifr);
1934 			if (error)
1935 				break;
1936 			space -= sizeof ifr;
1937 			ifrp++;
1938 		}
1939 	}
1940 	ifc->ifc_len -= space;
1941 	return (error);
1942 }
1943 
1944 /*
1945  * Just like if_promisc(), but for all-multicast-reception mode.
1946  */
1947 int
1948 if_allmulti(struct ifnet *ifp, int onswitch)
1949 {
1950 	int error = 0;
1951 	struct ifreq ifr;
1952 
1953 	crit_enter();
1954 
1955 	if (onswitch) {
1956 		if (ifp->if_amcount++ == 0) {
1957 			ifp->if_flags |= IFF_ALLMULTI;
1958 			ifr.ifr_flags = ifp->if_flags;
1959 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1960 			ifnet_serialize_all(ifp);
1961 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
1962 					      NULL);
1963 			ifnet_deserialize_all(ifp);
1964 		}
1965 	} else {
1966 		if (ifp->if_amcount > 1) {
1967 			ifp->if_amcount--;
1968 		} else {
1969 			ifp->if_amcount = 0;
1970 			ifp->if_flags &= ~IFF_ALLMULTI;
1971 			ifr.ifr_flags = ifp->if_flags;
1972 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1973 			ifnet_serialize_all(ifp);
1974 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
1975 					      NULL);
1976 			ifnet_deserialize_all(ifp);
1977 		}
1978 	}
1979 
1980 	crit_exit();
1981 
1982 	if (error == 0)
1983 		rt_ifmsg(ifp);
1984 	return error;
1985 }
1986 
1987 /*
1988  * Add a multicast listenership to the interface in question.
1989  * The link layer provides a routine which converts
1990  */
1991 int
1992 if_addmulti(
1993 	struct ifnet *ifp,	/* interface to manipulate */
1994 	struct sockaddr *sa,	/* address to add */
1995 	struct ifmultiaddr **retifma)
1996 {
1997 	struct sockaddr *llsa, *dupsa;
1998 	int error;
1999 	struct ifmultiaddr *ifma;
2000 
2001 	/*
2002 	 * If the matching multicast address already exists
2003 	 * then don't add a new one, just add a reference
2004 	 */
2005 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2006 		if (sa_equal(sa, ifma->ifma_addr)) {
2007 			ifma->ifma_refcount++;
2008 			if (retifma)
2009 				*retifma = ifma;
2010 			return 0;
2011 		}
2012 	}
2013 
2014 	/*
2015 	 * Give the link layer a chance to accept/reject it, and also
2016 	 * find out which AF_LINK address this maps to, if it isn't one
2017 	 * already.
2018 	 */
2019 	if (ifp->if_resolvemulti) {
2020 		ifnet_serialize_all(ifp);
2021 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
2022 		ifnet_deserialize_all(ifp);
2023 		if (error)
2024 			return error;
2025 	} else {
2026 		llsa = NULL;
2027 	}
2028 
2029 	ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK);
2030 	dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_WAITOK);
2031 	bcopy(sa, dupsa, sa->sa_len);
2032 
2033 	ifma->ifma_addr = dupsa;
2034 	ifma->ifma_lladdr = llsa;
2035 	ifma->ifma_ifp = ifp;
2036 	ifma->ifma_refcount = 1;
2037 	ifma->ifma_protospec = 0;
2038 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
2039 
2040 	/*
2041 	 * Some network interfaces can scan the address list at
2042 	 * interrupt time; lock them out.
2043 	 */
2044 	crit_enter();
2045 	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2046 	crit_exit();
2047 	if (retifma)
2048 		*retifma = ifma;
2049 
2050 	if (llsa != NULL) {
2051 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2052 			if (sa_equal(ifma->ifma_addr, llsa))
2053 				break;
2054 		}
2055 		if (ifma) {
2056 			ifma->ifma_refcount++;
2057 		} else {
2058 			ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK);
2059 			dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_WAITOK);
2060 			bcopy(llsa, dupsa, llsa->sa_len);
2061 			ifma->ifma_addr = dupsa;
2062 			ifma->ifma_ifp = ifp;
2063 			ifma->ifma_refcount = 1;
2064 			crit_enter();
2065 			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
2066 			crit_exit();
2067 		}
2068 	}
2069 	/*
2070 	 * We are certain we have added something, so call down to the
2071 	 * interface to let them know about it.
2072 	 */
2073 	crit_enter();
2074 	ifnet_serialize_all(ifp);
2075 	if (ifp->if_ioctl)
2076 		ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL);
2077 	ifnet_deserialize_all(ifp);
2078 	crit_exit();
2079 
2080 	return 0;
2081 }
2082 
2083 /*
2084  * Remove a reference to a multicast address on this interface.  Yell
2085  * if the request does not match an existing membership.
2086  */
2087 int
2088 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
2089 {
2090 	struct ifmultiaddr *ifma;
2091 
2092 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2093 		if (sa_equal(sa, ifma->ifma_addr))
2094 			break;
2095 	if (ifma == NULL)
2096 		return ENOENT;
2097 
2098 	if (ifma->ifma_refcount > 1) {
2099 		ifma->ifma_refcount--;
2100 		return 0;
2101 	}
2102 
2103 	rt_newmaddrmsg(RTM_DELMADDR, ifma);
2104 	sa = ifma->ifma_lladdr;
2105 	crit_enter();
2106 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2107 	/*
2108 	 * Make sure the interface driver is notified
2109 	 * in the case of a link layer mcast group being left.
2110 	 */
2111 	if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) {
2112 		ifnet_serialize_all(ifp);
2113 		ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
2114 		ifnet_deserialize_all(ifp);
2115 	}
2116 	crit_exit();
2117 	kfree(ifma->ifma_addr, M_IFMADDR);
2118 	kfree(ifma, M_IFMADDR);
2119 	if (sa == NULL)
2120 		return 0;
2121 
2122 	/*
2123 	 * Now look for the link-layer address which corresponds to
2124 	 * this network address.  It had been squirreled away in
2125 	 * ifma->ifma_lladdr for this purpose (so we don't have
2126 	 * to call ifp->if_resolvemulti() again), and we saved that
2127 	 * value in sa above.  If some nasty deleted the
2128 	 * link-layer address out from underneath us, we can deal because
2129 	 * the address we stored was is not the same as the one which was
2130 	 * in the record for the link-layer address.  (So we don't complain
2131 	 * in that case.)
2132 	 */
2133 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2134 		if (sa_equal(sa, ifma->ifma_addr))
2135 			break;
2136 	if (ifma == NULL)
2137 		return 0;
2138 
2139 	if (ifma->ifma_refcount > 1) {
2140 		ifma->ifma_refcount--;
2141 		return 0;
2142 	}
2143 
2144 	crit_enter();
2145 	ifnet_serialize_all(ifp);
2146 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
2147 	ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL);
2148 	ifnet_deserialize_all(ifp);
2149 	crit_exit();
2150 	kfree(ifma->ifma_addr, M_IFMADDR);
2151 	kfree(sa, M_IFMADDR);
2152 	kfree(ifma, M_IFMADDR);
2153 
2154 	return 0;
2155 }
2156 
2157 /*
2158  * Delete all multicast group membership for an interface.
2159  * Should be used to quickly flush all multicast filters.
2160  */
2161 void
2162 if_delallmulti(struct ifnet *ifp)
2163 {
2164 	struct ifmultiaddr *ifma;
2165 	struct ifmultiaddr *next;
2166 
2167 	TAILQ_FOREACH_MUTABLE(ifma, &ifp->if_multiaddrs, ifma_link, next)
2168 		if_delmulti(ifp, ifma->ifma_addr);
2169 }
2170 
2171 
2172 /*
2173  * Set the link layer address on an interface.
2174  *
2175  * At this time we only support certain types of interfaces,
2176  * and we don't allow the length of the address to change.
2177  */
2178 int
2179 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
2180 {
2181 	struct sockaddr_dl *sdl;
2182 	struct ifreq ifr;
2183 
2184 	sdl = IF_LLSOCKADDR(ifp);
2185 	if (sdl == NULL)
2186 		return (EINVAL);
2187 	if (len != sdl->sdl_alen)	/* don't allow length to change */
2188 		return (EINVAL);
2189 	switch (ifp->if_type) {
2190 	case IFT_ETHER:			/* these types use struct arpcom */
2191 	case IFT_XETHER:
2192 	case IFT_L2VLAN:
2193 		bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len);
2194 		bcopy(lladdr, LLADDR(sdl), len);
2195 		break;
2196 	default:
2197 		return (ENODEV);
2198 	}
2199 	/*
2200 	 * If the interface is already up, we need
2201 	 * to re-init it in order to reprogram its
2202 	 * address filter.
2203 	 */
2204 	ifnet_serialize_all(ifp);
2205 	if ((ifp->if_flags & IFF_UP) != 0) {
2206 #ifdef INET
2207 		struct ifaddr_container *ifac;
2208 #endif
2209 
2210 		ifp->if_flags &= ~IFF_UP;
2211 		ifr.ifr_flags = ifp->if_flags;
2212 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
2213 		ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2214 			      NULL);
2215 		ifp->if_flags |= IFF_UP;
2216 		ifr.ifr_flags = ifp->if_flags;
2217 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
2218 		ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr,
2219 				 NULL);
2220 #ifdef INET
2221 		/*
2222 		 * Also send gratuitous ARPs to notify other nodes about
2223 		 * the address change.
2224 		 */
2225 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
2226 			struct ifaddr *ifa = ifac->ifa;
2227 
2228 			if (ifa->ifa_addr != NULL &&
2229 			    ifa->ifa_addr->sa_family == AF_INET)
2230 				arp_gratuitous(ifp, ifa);
2231 		}
2232 #endif
2233 	}
2234 	ifnet_deserialize_all(ifp);
2235 	return (0);
2236 }
2237 
2238 struct ifmultiaddr *
2239 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
2240 {
2241 	struct ifmultiaddr *ifma;
2242 
2243 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
2244 		if (sa_equal(ifma->ifma_addr, sa))
2245 			break;
2246 
2247 	return ifma;
2248 }
2249 
2250 /*
2251  * This function locates the first real ethernet MAC from a network
2252  * card and loads it into node, returning 0 on success or ENOENT if
2253  * no suitable interfaces were found.  It is used by the uuid code to
2254  * generate a unique 6-byte number.
2255  */
2256 int
2257 if_getanyethermac(uint16_t *node, int minlen)
2258 {
2259 	struct ifnet *ifp;
2260 	struct sockaddr_dl *sdl;
2261 
2262 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
2263 		if (ifp->if_type != IFT_ETHER)
2264 			continue;
2265 		sdl = IF_LLSOCKADDR(ifp);
2266 		if (sdl->sdl_alen < minlen)
2267 			continue;
2268 		bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node,
2269 		      minlen);
2270 		return(0);
2271 	}
2272 	return (ENOENT);
2273 }
2274 
2275 /*
2276  * The name argument must be a pointer to storage which will last as
2277  * long as the interface does.  For physical devices, the result of
2278  * device_get_name(dev) is a good choice and for pseudo-devices a
2279  * static string works well.
2280  */
2281 void
2282 if_initname(struct ifnet *ifp, const char *name, int unit)
2283 {
2284 	ifp->if_dname = name;
2285 	ifp->if_dunit = unit;
2286 	if (unit != IF_DUNIT_NONE)
2287 		ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
2288 	else
2289 		strlcpy(ifp->if_xname, name, IFNAMSIZ);
2290 }
2291 
2292 int
2293 if_printf(struct ifnet *ifp, const char *fmt, ...)
2294 {
2295 	__va_list ap;
2296 	int retval;
2297 
2298 	retval = kprintf("%s: ", ifp->if_xname);
2299 	__va_start(ap, fmt);
2300 	retval += kvprintf(fmt, ap);
2301 	__va_end(ap);
2302 	return (retval);
2303 }
2304 
2305 struct ifnet *
2306 if_alloc(uint8_t type)
2307 {
2308         struct ifnet *ifp;
2309 	size_t size;
2310 
2311 	/*
2312 	 * XXX temporary hack until arpcom is setup in if_l2com
2313 	 */
2314 	if (type == IFT_ETHER)
2315 		size = sizeof(struct arpcom);
2316 	else
2317 		size = sizeof(struct ifnet);
2318 
2319 	ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO);
2320 
2321 	ifp->if_type = type;
2322 
2323 	if (if_com_alloc[type] != NULL) {
2324 		ifp->if_l2com = if_com_alloc[type](type, ifp);
2325 		if (ifp->if_l2com == NULL) {
2326 			kfree(ifp, M_IFNET);
2327 			return (NULL);
2328 		}
2329 	}
2330 	return (ifp);
2331 }
2332 
2333 void
2334 if_free(struct ifnet *ifp)
2335 {
2336 	kfree(ifp, M_IFNET);
2337 }
2338 
2339 void
2340 ifq_set_classic(struct ifaltq *ifq)
2341 {
2342 	ifq->altq_enqueue = ifq_classic_enqueue;
2343 	ifq->altq_dequeue = ifq_classic_dequeue;
2344 	ifq->altq_request = ifq_classic_request;
2345 }
2346 
2347 int
2348 ifq_classic_enqueue(struct ifaltq *ifq, struct mbuf *m,
2349 		    struct altq_pktattr *pa __unused)
2350 {
2351 	logifq(enqueue, ifq);
2352 	if (IF_QFULL(ifq)) {
2353 		m_freem(m);
2354 		return(ENOBUFS);
2355 	} else {
2356 		IF_ENQUEUE(ifq, m);
2357 		return(0);
2358 	}
2359 }
2360 
2361 struct mbuf *
2362 ifq_classic_dequeue(struct ifaltq *ifq, struct mbuf *mpolled, int op)
2363 {
2364 	struct mbuf *m;
2365 
2366 	switch (op) {
2367 	case ALTDQ_POLL:
2368 		IF_POLL(ifq, m);
2369 		break;
2370 	case ALTDQ_REMOVE:
2371 		logifq(dequeue, ifq);
2372 		IF_DEQUEUE(ifq, m);
2373 		break;
2374 	default:
2375 		panic("unsupported ALTQ dequeue op: %d", op);
2376 	}
2377 	KKASSERT(mpolled == NULL || mpolled == m);
2378 	return(m);
2379 }
2380 
2381 int
2382 ifq_classic_request(struct ifaltq *ifq, int req, void *arg)
2383 {
2384 	switch (req) {
2385 	case ALTRQ_PURGE:
2386 		IF_DRAIN(ifq);
2387 		break;
2388 	default:
2389 		panic("unsupported ALTQ request: %d", req);
2390 	}
2391 	return(0);
2392 }
2393 
2394 int
2395 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa)
2396 {
2397 	struct ifaltq *ifq = &ifp->if_snd;
2398 	int running = 0, error, start = 0;
2399 
2400 	ASSERT_IFNET_NOT_SERIALIZED_TX(ifp);
2401 
2402 	ALTQ_LOCK(ifq);
2403 	error = ifq_enqueue_locked(ifq, m, pa);
2404 	if (error) {
2405 		ALTQ_UNLOCK(ifq);
2406 		return error;
2407 	}
2408 	if (!ifq->altq_started) {
2409 		/*
2410 		 * Hold the interlock of ifnet.if_start
2411 		 */
2412 		ifq->altq_started = 1;
2413 		start = 1;
2414 	}
2415 	ALTQ_UNLOCK(ifq);
2416 
2417 	ifp->if_obytes += m->m_pkthdr.len;
2418 	if (m->m_flags & M_MCAST)
2419 		ifp->if_omcasts++;
2420 
2421 	if (!start) {
2422 		logifstart(avoid, ifp);
2423 		return 0;
2424 	}
2425 
2426 	/*
2427 	 * Try to do direct ifnet.if_start first, if there is
2428 	 * contention on ifnet's serializer, ifnet.if_start will
2429 	 * be scheduled on ifnet's CPU.
2430 	 */
2431 	if (!ifnet_tryserialize_tx(ifp)) {
2432 		/*
2433 		 * ifnet serializer contention happened,
2434 		 * ifnet.if_start is scheduled on ifnet's
2435 		 * CPU, and we keep going.
2436 		 */
2437 		logifstart(contend_sched, ifp);
2438 		if_start_schedule(ifp);
2439 		return 0;
2440 	}
2441 
2442 	if ((ifp->if_flags & IFF_OACTIVE) == 0) {
2443 		logifstart(run, ifp);
2444 		ifp->if_start(ifp);
2445 		if ((ifp->if_flags &
2446 		     (IFF_OACTIVE | IFF_RUNNING)) == IFF_RUNNING)
2447 			running = 1;
2448 	}
2449 
2450 	ifnet_deserialize_tx(ifp);
2451 
2452 	if (if_start_need_schedule(ifq, running)) {
2453 		/*
2454 		 * More data need to be transmitted, ifnet.if_start is
2455 		 * scheduled on ifnet's CPU, and we keep going.
2456 		 * NOTE: ifnet.if_start interlock is not released.
2457 		 */
2458 		logifstart(sched, ifp);
2459 		if_start_schedule(ifp);
2460 	}
2461 	return 0;
2462 }
2463 
2464 void *
2465 ifa_create(int size, int flags)
2466 {
2467 	struct ifaddr *ifa;
2468 	int i;
2469 
2470 	KASSERT(size >= sizeof(*ifa), ("ifaddr size too small"));
2471 
2472 	ifa = kmalloc(size, M_IFADDR, flags | M_ZERO);
2473 	if (ifa == NULL)
2474 		return NULL;
2475 
2476 	ifa->ifa_containers = kmalloc(ncpus * sizeof(struct ifaddr_container),
2477 				      M_IFADDR, M_WAITOK | M_ZERO);
2478 	ifa->ifa_ncnt = ncpus;
2479 	for (i = 0; i < ncpus; ++i) {
2480 		struct ifaddr_container *ifac = &ifa->ifa_containers[i];
2481 
2482 		ifac->ifa_magic = IFA_CONTAINER_MAGIC;
2483 		ifac->ifa = ifa;
2484 		ifac->ifa_refcnt = 1;
2485 	}
2486 #ifdef IFADDR_DEBUG
2487 	kprintf("alloc ifa %p %d\n", ifa, size);
2488 #endif
2489 	return ifa;
2490 }
2491 
2492 void
2493 ifac_free(struct ifaddr_container *ifac, int cpu_id)
2494 {
2495 	struct ifaddr *ifa = ifac->ifa;
2496 
2497 	KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC);
2498 	KKASSERT(ifac->ifa_refcnt == 0);
2499 	KASSERT(ifac->ifa_listmask == 0,
2500 		("ifa is still on %#x lists", ifac->ifa_listmask));
2501 
2502 	ifac->ifa_magic = IFA_CONTAINER_DEAD;
2503 
2504 #ifdef IFADDR_DEBUG_VERBOSE
2505 	kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id);
2506 #endif
2507 
2508 	KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus,
2509 		("invalid # of ifac, %d", ifa->ifa_ncnt));
2510 	if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) {
2511 #ifdef IFADDR_DEBUG
2512 		kprintf("free ifa %p\n", ifa);
2513 #endif
2514 		kfree(ifa->ifa_containers, M_IFADDR);
2515 		kfree(ifa, M_IFADDR);
2516 	}
2517 }
2518 
2519 static void
2520 ifa_iflink_dispatch(netmsg_t nmsg)
2521 {
2522 	struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2523 	struct ifaddr *ifa = msg->ifa;
2524 	struct ifnet *ifp = msg->ifp;
2525 	int cpu = mycpuid;
2526 	struct ifaddr_container *ifac;
2527 
2528 	crit_enter();
2529 
2530 	ifac = &ifa->ifa_containers[cpu];
2531 	ASSERT_IFAC_VALID(ifac);
2532 	KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0,
2533 		("ifaddr is on if_addrheads"));
2534 
2535 	ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD;
2536 	if (msg->tail)
2537 		TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link);
2538 	else
2539 		TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link);
2540 
2541 	crit_exit();
2542 
2543 	ifa_forwardmsg(&nmsg->lmsg, cpu + 1);
2544 }
2545 
2546 void
2547 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail)
2548 {
2549 	struct netmsg_ifaddr msg;
2550 
2551 	netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2552 		    0, ifa_iflink_dispatch);
2553 	msg.ifa = ifa;
2554 	msg.ifp = ifp;
2555 	msg.tail = tail;
2556 
2557 	ifa_domsg(&msg.base.lmsg, 0);
2558 }
2559 
2560 static void
2561 ifa_ifunlink_dispatch(netmsg_t nmsg)
2562 {
2563 	struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2564 	struct ifaddr *ifa = msg->ifa;
2565 	struct ifnet *ifp = msg->ifp;
2566 	int cpu = mycpuid;
2567 	struct ifaddr_container *ifac;
2568 
2569 	crit_enter();
2570 
2571 	ifac = &ifa->ifa_containers[cpu];
2572 	ASSERT_IFAC_VALID(ifac);
2573 	KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD,
2574 		("ifaddr is not on if_addrhead"));
2575 
2576 	TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link);
2577 	ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD;
2578 
2579 	crit_exit();
2580 
2581 	ifa_forwardmsg(&nmsg->lmsg, cpu + 1);
2582 }
2583 
2584 void
2585 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp)
2586 {
2587 	struct netmsg_ifaddr msg;
2588 
2589 	netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2590 		    0, ifa_ifunlink_dispatch);
2591 	msg.ifa = ifa;
2592 	msg.ifp = ifp;
2593 
2594 	ifa_domsg(&msg.base.lmsg, 0);
2595 }
2596 
2597 static void
2598 ifa_destroy_dispatch(netmsg_t nmsg)
2599 {
2600 	struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg;
2601 
2602 	IFAFREE(msg->ifa);
2603 	ifa_forwardmsg(&nmsg->lmsg, mycpuid + 1);
2604 }
2605 
2606 void
2607 ifa_destroy(struct ifaddr *ifa)
2608 {
2609 	struct netmsg_ifaddr msg;
2610 
2611 	netmsg_init(&msg.base, NULL, &curthread->td_msgport,
2612 		    0, ifa_destroy_dispatch);
2613 	msg.ifa = ifa;
2614 
2615 	ifa_domsg(&msg.base.lmsg, 0);
2616 }
2617 
2618 struct lwkt_port *
2619 ifnet_portfn(int cpu)
2620 {
2621 	return &ifnet_threads[cpu].td_msgport;
2622 }
2623 
2624 void
2625 ifnet_forwardmsg(struct lwkt_msg *lmsg, int next_cpu)
2626 {
2627 	KKASSERT(next_cpu > mycpuid && next_cpu <= ncpus);
2628 
2629 	if (next_cpu < ncpus)
2630 		lwkt_forwardmsg(ifnet_portfn(next_cpu), lmsg);
2631 	else
2632 		lwkt_replymsg(lmsg, 0);
2633 }
2634 
2635 int
2636 ifnet_domsg(struct lwkt_msg *lmsg, int cpu)
2637 {
2638 	KKASSERT(cpu < ncpus);
2639 	return lwkt_domsg(ifnet_portfn(cpu), lmsg, 0);
2640 }
2641 
2642 void
2643 ifnet_sendmsg(struct lwkt_msg *lmsg, int cpu)
2644 {
2645 	KKASSERT(cpu < ncpus);
2646 	lwkt_sendmsg(ifnet_portfn(cpu), lmsg);
2647 }
2648 
2649 /*
2650  * Generic netmsg service loop.  Some protocols may roll their own but all
2651  * must do the basic command dispatch function call done here.
2652  */
2653 static void
2654 ifnet_service_loop(void *arg __unused)
2655 {
2656 	netmsg_t msg;
2657 
2658 	while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) {
2659 		KASSERT(msg->base.nm_dispatch, ("ifnet_service: badmsg"));
2660 		msg->base.nm_dispatch(msg);
2661 	}
2662 }
2663 
2664 static void
2665 ifnetinit(void *dummy __unused)
2666 {
2667 	int i;
2668 
2669 	for (i = 0; i < ncpus; ++i) {
2670 		struct thread *thr = &ifnet_threads[i];
2671 
2672 		lwkt_create(ifnet_service_loop, NULL, NULL,
2673 			    thr, TDF_NOSTART|TDF_FORCE_SPINPORT,
2674 			    i, "ifnet %d", i);
2675 		netmsg_service_port_init(&thr->td_msgport);
2676 		lwkt_schedule(thr);
2677 	}
2678 }
2679 
2680 struct ifnet *
2681 ifnet_byindex(unsigned short idx)
2682 {
2683 	if (idx > if_index)
2684 		return NULL;
2685 	return ifindex2ifnet[idx];
2686 }
2687 
2688 struct ifaddr *
2689 ifaddr_byindex(unsigned short idx)
2690 {
2691 	struct ifnet *ifp;
2692 
2693 	ifp = ifnet_byindex(idx);
2694 	if (!ifp)
2695 		return NULL;
2696 	return TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa;
2697 }
2698 
2699 void
2700 if_register_com_alloc(u_char type,
2701     if_com_alloc_t *a, if_com_free_t *f)
2702 {
2703 
2704         KASSERT(if_com_alloc[type] == NULL,
2705             ("if_register_com_alloc: %d already registered", type));
2706         KASSERT(if_com_free[type] == NULL,
2707             ("if_register_com_alloc: %d free already registered", type));
2708 
2709         if_com_alloc[type] = a;
2710         if_com_free[type] = f;
2711 }
2712 
2713 void
2714 if_deregister_com_alloc(u_char type)
2715 {
2716 
2717         KASSERT(if_com_alloc[type] != NULL,
2718             ("if_deregister_com_alloc: %d not registered", type));
2719         KASSERT(if_com_free[type] != NULL,
2720             ("if_deregister_com_alloc: %d free not registered", type));
2721         if_com_alloc[type] = NULL;
2722         if_com_free[type] = NULL;
2723 }
2724 
2725 int
2726 if_ring_count2(int cnt, int cnt_max)
2727 {
2728 	int shift = 0;
2729 
2730 	KASSERT(cnt_max >= 1 && powerof2(cnt_max),
2731 	    ("invalid ring count max %d", cnt_max));
2732 
2733 	if (cnt <= 0)
2734 		cnt = cnt_max;
2735 	if (cnt > ncpus2)
2736 		cnt = ncpus2;
2737 	if (cnt > cnt_max)
2738 		cnt = cnt_max;
2739 
2740 	while ((1 << (shift + 1)) <= cnt)
2741 		++shift;
2742 	cnt = 1 << shift;
2743 
2744 	KASSERT(cnt >= 1 && cnt <= ncpus2 && cnt <= cnt_max,
2745 	    ("calculate cnt %d, ncpus2 %d, cnt max %d",
2746 	     cnt, ncpus2, cnt_max));
2747 	return cnt;
2748 }
2749