xref: /freebsd-src/sys/net/if.c (revision 7cc9e6ddbb1a6cd38b2f44d4dd9dfc890d2fc9d0)
1 /*
2  * Copyright (c) 1980, 1986, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	@(#)if.c	8.5 (Berkeley) 1/9/95
30  * $FreeBSD$
31  */
32 
33 #include "opt_compat.h"
34 #include "opt_inet6.h"
35 #include "opt_inet.h"
36 #include "opt_mac.h"
37 
38 #include <sys/param.h>
39 #include <sys/conf.h>
40 #include <sys/mac.h>
41 #include <sys/malloc.h>
42 #include <sys/bus.h>
43 #include <sys/mbuf.h>
44 #include <sys/systm.h>
45 #include <sys/proc.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/protosw.h>
49 #include <sys/kernel.h>
50 #include <sys/sockio.h>
51 #include <sys/syslog.h>
52 #include <sys/sysctl.h>
53 #include <sys/domain.h>
54 #include <sys/jail.h>
55 #include <machine/stdarg.h>
56 
57 #include <net/if.h>
58 #include <net/if_arp.h>
59 #include <net/if_clone.h>
60 #include <net/if_dl.h>
61 #include <net/if_types.h>
62 #include <net/if_var.h>
63 #include <net/radix.h>
64 #include <net/route.h>
65 
66 #if defined(INET) || defined(INET6)
67 /*XXX*/
68 #include <netinet/in.h>
69 #include <netinet/in_var.h>
70 #ifdef INET6
71 #include <netinet6/in6_var.h>
72 #include <netinet6/in6_ifattach.h>
73 #endif
74 #endif
75 #ifdef INET
76 #include <netinet/if_ether.h>
77 #endif
78 
79 struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
80 
81 static void	if_attachdomain(void *);
82 static void	if_attachdomain1(struct ifnet *);
83 static int	ifconf(u_long, caddr_t);
84 static void	if_grow(void);
85 static void	if_init(void *);
86 static void	if_check(void *);
87 static int	if_findindex(struct ifnet *);
88 static void	if_qflush(struct ifaltq *);
89 static void	if_route(struct ifnet *, int flag, int fam);
90 static void	if_slowtimo(void *);
91 static void	if_unroute(struct ifnet *, int flag, int fam);
92 static void	link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
93 static int	if_rtdel(struct radix_node *, void *);
94 static int	ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
95 #ifdef INET6
96 /*
97  * XXX: declare here to avoid to include many inet6 related files..
98  * should be more generalized?
99  */
100 extern void	nd6_setmtu(struct ifnet *);
101 #endif
102 
103 int	if_index = 0;
104 struct	ifindex_entry *ifindex_table = NULL;
105 int	ifqmaxlen = IFQ_MAXLEN;
106 struct	ifnethead ifnet;	/* depend on static init XXX */
107 struct	mtx ifnet_lock;
108 
109 static int	if_indexlim = 8;
110 static struct	klist ifklist;
111 
112 static void	filt_netdetach(struct knote *kn);
113 static int	filt_netdev(struct knote *kn, long hint);
114 
115 static struct filterops netdev_filtops =
116     { 1, NULL, filt_netdetach, filt_netdev };
117 
118 /*
119  * System initialization
120  */
121 SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL)
122 SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_check, NULL)
123 
124 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
125 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
126 
127 static d_open_t		netopen;
128 static d_close_t	netclose;
129 static d_ioctl_t	netioctl;
130 static d_kqfilter_t	netkqfilter;
131 
132 static struct cdevsw net_cdevsw = {
133 	.d_version =	D_VERSION,
134 	.d_flags =	D_NEEDGIANT,
135 	.d_open =	netopen,
136 	.d_close =	netclose,
137 	.d_ioctl =	netioctl,
138 	.d_name =	"net",
139 	.d_kqfilter =	netkqfilter,
140 };
141 
142 static int
143 netopen(struct cdev *dev, int flag, int mode, struct thread *td)
144 {
145 	return (0);
146 }
147 
148 static int
149 netclose(struct cdev *dev, int flags, int fmt, struct thread *td)
150 {
151 	return (0);
152 }
153 
154 static int
155 netioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
156 {
157 	struct ifnet *ifp;
158 	int error, idx;
159 
160 	/* only support interface specific ioctls */
161 	if (IOCGROUP(cmd) != 'i')
162 		return (EOPNOTSUPP);
163 	idx = minor(dev);
164 	if (idx == 0) {
165 		/*
166 		 * special network device, not interface.
167 		 */
168 		if (cmd == SIOCGIFCONF)
169 			return (ifconf(cmd, data));	/* XXX remove cmd */
170 		return (EOPNOTSUPP);
171 	}
172 
173 	ifp = ifnet_byindex(idx);
174 	if (ifp == NULL)
175 		return (ENXIO);
176 
177 	error = ifhwioctl(cmd, ifp, data, td);
178 	if (error == ENOIOCTL)
179 		error = EOPNOTSUPP;
180 	return (error);
181 }
182 
183 static int
184 netkqfilter(struct cdev *dev, struct knote *kn)
185 {
186 	struct klist *klist;
187 	struct ifnet *ifp;
188 	int idx;
189 
190 	idx = minor(dev);
191 	if (idx == 0) {
192 		klist = &ifklist;
193 	} else {
194 		ifp = ifnet_byindex(idx);
195 		if (ifp == NULL)
196 			return (1);
197 		klist = &ifp->if_klist;
198 	}
199 
200 	switch (kn->kn_filter) {
201 	case EVFILT_NETDEV:
202 		kn->kn_fop = &netdev_filtops;
203 		break;
204 	default:
205 		return (1);
206 	}
207 
208 	kn->kn_hook = (caddr_t)klist;
209 
210 	/* XXX locking? */
211 	SLIST_INSERT_HEAD(klist, kn, kn_selnext);
212 
213 	return (0);
214 }
215 
216 static void
217 filt_netdetach(struct knote *kn)
218 {
219 	struct klist *klist = (struct klist *)kn->kn_hook;
220 
221 	if (kn->kn_status & KN_DETACHED)
222 		return;
223 	SLIST_REMOVE(klist, kn, knote, kn_selnext);
224 }
225 
226 static int
227 filt_netdev(struct knote *kn, long hint)
228 {
229 
230 	/*
231 	 * Currently NOTE_EXIT is abused to indicate device detach.
232 	 */
233 	if (hint == NOTE_EXIT) {
234 		kn->kn_data = NOTE_LINKINV;
235 		kn->kn_status |= KN_DETACHED;
236 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
237 		return (1);
238 	}
239 	kn->kn_data = hint;			/* current status */
240 	if (kn->kn_sfflags & hint)
241 		kn->kn_fflags |= hint;
242 	return (kn->kn_fflags != 0);
243 }
244 
245 /*
246  * Network interface utility routines.
247  *
248  * Routines with ifa_ifwith* names take sockaddr *'s as
249  * parameters.
250  */
251 /* ARGSUSED*/
252 static void
253 if_init(void *dummy __unused)
254 {
255 
256 	IFNET_LOCK_INIT();
257 	TAILQ_INIT(&ifnet);
258 	SLIST_INIT(&ifklist);
259 	if_grow();				/* create initial table */
260 	ifdev_byindex(0) = make_dev(&net_cdevsw, 0,
261 	    UID_ROOT, GID_WHEEL, 0600, "network");
262 	if_clone_init();
263 }
264 
265 static void
266 if_grow(void)
267 {
268 	u_int n;
269 	struct ifindex_entry *e;
270 
271 	if_indexlim <<= 1;
272 	n = if_indexlim * sizeof(*e);
273 	e = malloc(n, M_IFADDR, M_WAITOK | M_ZERO);
274 	if (ifindex_table != NULL) {
275 		memcpy((caddr_t)e, (caddr_t)ifindex_table, n/2);
276 		free((caddr_t)ifindex_table, M_IFADDR);
277 	}
278 	ifindex_table = e;
279 }
280 
281 /* ARGSUSED*/
282 static void
283 if_check(void *dummy __unused)
284 {
285 	struct ifnet *ifp;
286 	int s;
287 
288 	s = splimp();
289 	IFNET_RLOCK();	/* could sleep on rare error; mostly okay XXX */
290 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
291 		if (ifp->if_snd.ifq_maxlen == 0) {
292 			if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
293 			ifp->if_snd.ifq_maxlen = ifqmaxlen;
294 		}
295 		if (!mtx_initialized(&ifp->if_snd.ifq_mtx)) {
296 			if_printf(ifp,
297 			    "XXX: driver didn't initialize queue mtx\n");
298 			mtx_init(&ifp->if_snd.ifq_mtx, "unknown",
299 			    MTX_NETWORK_LOCK, MTX_DEF);
300 		}
301 	}
302 	IFNET_RUNLOCK();
303 	splx(s);
304 	if_slowtimo(0);
305 }
306 
307 static int
308 if_findindex(struct ifnet *ifp)
309 {
310 	int i, unit;
311 	char eaddr[18], devname[32];
312 	const char *name, *p;
313 
314 	switch (ifp->if_type) {
315 	case IFT_ETHER:			/* these types use struct arpcom */
316 	case IFT_FDDI:
317 	case IFT_XETHER:
318 	case IFT_ISO88025:
319 	case IFT_L2VLAN:
320 		snprintf(eaddr, 18, "%6D", IFP2AC(ifp)->ac_enaddr, ":");
321 		break;
322 	default:
323 		eaddr[0] = '\0';
324 		break;
325 	}
326 	strlcpy(devname, ifp->if_xname, sizeof(devname));
327 	name = net_cdevsw.d_name;
328 	i = 0;
329 	while ((resource_find_dev(&i, name, &unit, NULL, NULL)) == 0) {
330 		if (resource_string_value(name, unit, "ether", &p) == 0)
331 			if (strcmp(p, eaddr) == 0)
332 				goto found;
333 		if (resource_string_value(name, unit, "dev", &p) == 0)
334 			if (strcmp(p, devname) == 0)
335 				goto found;
336 	}
337 	unit = 0;
338 found:
339 	if (unit != 0) {
340 		if (ifaddr_byindex(unit) == NULL)
341 			return (unit);
342 		printf("%s%d in use, cannot hardwire it to %s.\n",
343 		    name, unit, devname);
344 	}
345 	for (unit = 1; ; unit++) {
346 		if (unit <= if_index && ifaddr_byindex(unit) != NULL)
347 			continue;
348 		if (resource_string_value(name, unit, "ether", &p) == 0 ||
349 		    resource_string_value(name, unit, "dev", &p) == 0)
350 			continue;
351 		break;
352 	}
353 	return (unit);
354 }
355 
356 /*
357  * Attach an interface to the
358  * list of "active" interfaces.
359  */
360 void
361 if_attach(struct ifnet *ifp)
362 {
363 	unsigned socksize, ifasize;
364 	int namelen, masklen;
365 	struct sockaddr_dl *sdl;
366 	struct ifaddr *ifa;
367 
368 	IF_AFDATA_LOCK_INIT(ifp);
369 	ifp->if_afdata_initialized = 0;
370 	IFNET_WLOCK();
371 	TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
372 	IFNET_WUNLOCK();
373 	/*
374 	 * XXX -
375 	 * The old code would work if the interface passed a pre-existing
376 	 * chain of ifaddrs to this code.  We don't trust our callers to
377 	 * properly initialize the tailq, however, so we no longer allow
378 	 * this unlikely case.
379 	 */
380 	TAILQ_INIT(&ifp->if_addrhead);
381 	TAILQ_INIT(&ifp->if_prefixhead);
382 	TAILQ_INIT(&ifp->if_multiaddrs);
383 	SLIST_INIT(&ifp->if_klist);
384 	getmicrotime(&ifp->if_lastchange);
385 
386 #ifdef MAC
387 	mac_init_ifnet(ifp);
388 	mac_create_ifnet(ifp);
389 #endif
390 
391 	ifp->if_index = if_findindex(ifp);
392 	if (ifp->if_index > if_index)
393 		if_index = ifp->if_index;
394 	if (if_index >= if_indexlim)
395 		if_grow();
396 
397 	ifnet_byindex(ifp->if_index) = ifp;
398 	ifdev_byindex(ifp->if_index) = make_dev(&net_cdevsw,
399 	    unit2minor(ifp->if_index),
400 	    UID_ROOT, GID_WHEEL, 0600, "%s/%s",
401 	    net_cdevsw.d_name, ifp->if_xname);
402 	make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
403 	    net_cdevsw.d_name, ifp->if_index);
404 
405 	mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
406 
407 	/*
408 	 * create a Link Level name for this device
409 	 */
410 	namelen = strlen(ifp->if_xname);
411 	/*
412 	 * Always save enough space for any possiable name so we can do
413 	 * a rename in place later.
414 	 */
415 	masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
416 	socksize = masklen + ifp->if_addrlen;
417 	if (socksize < sizeof(*sdl))
418 		socksize = sizeof(*sdl);
419 	socksize = roundup2(socksize, sizeof(long));
420 	ifasize = sizeof(*ifa) + 2 * socksize;
421 	ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
422 	IFA_LOCK_INIT(ifa);
423 	sdl = (struct sockaddr_dl *)(ifa + 1);
424 	sdl->sdl_len = socksize;
425 	sdl->sdl_family = AF_LINK;
426 	bcopy(ifp->if_xname, sdl->sdl_data, namelen);
427 	sdl->sdl_nlen = namelen;
428 	sdl->sdl_index = ifp->if_index;
429 	sdl->sdl_type = ifp->if_type;
430 	ifaddr_byindex(ifp->if_index) = ifa;
431 	ifa->ifa_ifp = ifp;
432 	ifa->ifa_rtrequest = link_rtrequest;
433 	ifa->ifa_addr = (struct sockaddr *)sdl;
434 	sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
435 	ifa->ifa_netmask = (struct sockaddr *)sdl;
436 	sdl->sdl_len = masklen;
437 	while (namelen != 0)
438 		sdl->sdl_data[--namelen] = 0xff;
439 	ifa->ifa_refcnt = 1;
440 	TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
441 	ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */
442 	ifp->if_snd.altq_type = 0;
443 	ifp->if_snd.altq_disc = NULL;
444 	ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE;
445 	ifp->if_snd.altq_tbr  = NULL;
446 	ifp->if_snd.altq_ifp  = ifp;
447 
448 	if (domains)
449 		if_attachdomain1(ifp);
450 
451 	EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
452 
453 	/* Announce the interface. */
454 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
455 }
456 
457 static void
458 if_attachdomain(void *dummy)
459 {
460 	struct ifnet *ifp;
461 	int s;
462 
463 	s = splnet();
464 	TAILQ_FOREACH(ifp, &ifnet, if_link)
465 		if_attachdomain1(ifp);
466 	splx(s);
467 }
468 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST,
469     if_attachdomain, NULL);
470 
471 static void
472 if_attachdomain1(struct ifnet *ifp)
473 {
474 	struct domain *dp;
475 	int s;
476 
477 	s = splnet();
478 
479 	/*
480 	 * Since dp->dom_ifattach calls malloc() with M_WAITOK, we
481 	 * cannot lock ifp->if_afdata initialization, entirely.
482 	 */
483 	if (IF_AFDATA_TRYLOCK(ifp) == 0) {
484 		splx(s);
485 		return;
486 	}
487 	if (ifp->if_afdata_initialized) {
488 		IF_AFDATA_UNLOCK(ifp);
489 		splx(s);
490 		return;
491 	}
492 	ifp->if_afdata_initialized = 1;
493 	IF_AFDATA_UNLOCK(ifp);
494 
495 	/* address family dependent data region */
496 	bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
497 	for (dp = domains; dp; dp = dp->dom_next) {
498 		if (dp->dom_ifattach)
499 			ifp->if_afdata[dp->dom_family] =
500 			    (*dp->dom_ifattach)(ifp);
501 	}
502 
503 	splx(s);
504 }
505 
506 /*
507  * Detach an interface, removing it from the
508  * list of "active" interfaces.
509  */
510 void
511 if_detach(struct ifnet *ifp)
512 {
513 	struct ifaddr *ifa, *next;
514 	struct radix_node_head	*rnh;
515 	int s;
516 	int i;
517 	struct domain *dp;
518 
519 	EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
520 	/*
521 	 * Remove routes and flush queues.
522 	 */
523 	s = splnet();
524 	if_down(ifp);
525 #ifdef ALTQ
526 	if (ALTQ_IS_ENABLED(&ifp->if_snd))
527 		altq_disable(&ifp->if_snd);
528 	if (ALTQ_IS_ATTACHED(&ifp->if_snd))
529 		altq_detach(&ifp->if_snd);
530 #endif
531 
532 	for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa; ifa = next) {
533 		next = TAILQ_NEXT(ifa, ifa_link);
534 
535 		if (ifa->ifa_addr->sa_family == AF_LINK)
536 			continue;
537 #ifdef INET
538 		/* XXX: Ugly!! ad hoc just for INET */
539 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
540 			struct ifaliasreq ifr;
541 
542 			bzero(&ifr, sizeof(ifr));
543 			ifr.ifra_addr = *ifa->ifa_addr;
544 			if (ifa->ifa_dstaddr)
545 				ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
546 			if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
547 			    NULL) == 0)
548 				continue;
549 		}
550 #endif /* INET */
551 #ifdef INET6
552 		if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) {
553 			in6_purgeaddr(ifa);
554 			/* ifp_addrhead is already updated */
555 			continue;
556 		}
557 #endif /* INET6 */
558 		TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
559 		IFAFREE(ifa);
560 	}
561 
562 #ifdef INET6
563 	/*
564 	 * Remove all IPv6 kernel structs related to ifp.  This should be done
565 	 * before removing routing entries below, since IPv6 interface direct
566 	 * routes are expected to be removed by the IPv6-specific kernel API.
567 	 * Otherwise, the kernel will detect some inconsistency and bark it.
568 	 */
569 	in6_ifdetach(ifp);
570 #endif
571 	/*
572 	 * Remove address from ifindex_table[] and maybe decrement if_index.
573 	 * Clean up all addresses.
574 	 */
575 	ifaddr_byindex(ifp->if_index) = NULL;
576 	destroy_dev(ifdev_byindex(ifp->if_index));
577 	ifdev_byindex(ifp->if_index) = NULL;
578 
579 	while (if_index > 0 && ifaddr_byindex(if_index) == NULL)
580 		if_index--;
581 
582 
583 	/* We can now free link ifaddr. */
584 	ifa = TAILQ_FIRST(&ifp->if_addrhead);
585 	TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
586 	IFAFREE(ifa);
587 
588 	/*
589 	 * Delete all remaining routes using this interface
590 	 * Unfortuneatly the only way to do this is to slog through
591 	 * the entire routing table looking for routes which point
592 	 * to this interface...oh well...
593 	 */
594 	for (i = 1; i <= AF_MAX; i++) {
595 		if ((rnh = rt_tables[i]) == NULL)
596 			continue;
597 		RADIX_NODE_HEAD_LOCK(rnh);
598 		(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
599 		RADIX_NODE_HEAD_UNLOCK(rnh);
600 	}
601 
602 	/* Announce that the interface is gone. */
603 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
604 
605 	IF_AFDATA_LOCK(ifp);
606 	for (dp = domains; dp; dp = dp->dom_next) {
607 		if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
608 			(*dp->dom_ifdetach)(ifp,
609 			    ifp->if_afdata[dp->dom_family]);
610 	}
611 	IF_AFDATA_UNLOCK(ifp);
612 
613 #ifdef MAC
614 	mac_destroy_ifnet(ifp);
615 #endif /* MAC */
616 	KNOTE(&ifp->if_klist, NOTE_EXIT);
617 	IFNET_WLOCK();
618 	TAILQ_REMOVE(&ifnet, ifp, if_link);
619 	IFNET_WUNLOCK();
620 	mtx_destroy(&ifp->if_snd.ifq_mtx);
621 	IF_AFDATA_DESTROY(ifp);
622 	splx(s);
623 }
624 
625 /*
626  * Delete Routes for a Network Interface
627  *
628  * Called for each routing entry via the rnh->rnh_walktree() call above
629  * to delete all route entries referencing a detaching network interface.
630  *
631  * Arguments:
632  *	rn	pointer to node in the routing table
633  *	arg	argument passed to rnh->rnh_walktree() - detaching interface
634  *
635  * Returns:
636  *	0	successful
637  *	errno	failed - reason indicated
638  *
639  */
640 static int
641 if_rtdel(struct radix_node *rn, void *arg)
642 {
643 	struct rtentry	*rt = (struct rtentry *)rn;
644 	struct ifnet	*ifp = arg;
645 	int		err;
646 
647 	if (rt->rt_ifp == ifp) {
648 
649 		/*
650 		 * Protect (sorta) against walktree recursion problems
651 		 * with cloned routes
652 		 */
653 		if ((rt->rt_flags & RTF_UP) == 0)
654 			return (0);
655 
656 		err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
657 				rt_mask(rt), rt->rt_flags,
658 				(struct rtentry **) NULL);
659 		if (err) {
660 			log(LOG_WARNING, "if_rtdel: error %d\n", err);
661 		}
662 	}
663 
664 	return (0);
665 }
666 
667 #define	equal(a1, a2)	(bcmp((a1), (a2), ((a1))->sa_len) == 0)
668 
669 /*
670  * Locate an interface based on a complete address.
671  */
672 /*ARGSUSED*/
673 struct ifaddr *
674 ifa_ifwithaddr(struct sockaddr *addr)
675 {
676 	struct ifnet *ifp;
677 	struct ifaddr *ifa;
678 
679 	IFNET_RLOCK();
680 	TAILQ_FOREACH(ifp, &ifnet, if_link)
681 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
682 			if (ifa->ifa_addr->sa_family != addr->sa_family)
683 				continue;
684 			if (equal(addr, ifa->ifa_addr))
685 				goto done;
686 			/* IP6 doesn't have broadcast */
687 			if ((ifp->if_flags & IFF_BROADCAST) &&
688 			    ifa->ifa_broadaddr &&
689 			    ifa->ifa_broadaddr->sa_len != 0 &&
690 			    equal(ifa->ifa_broadaddr, addr))
691 				goto done;
692 		}
693 	ifa = NULL;
694 done:
695 	IFNET_RUNLOCK();
696 	return (ifa);
697 }
698 
699 /*
700  * Locate the point to point interface with a given destination address.
701  */
702 /*ARGSUSED*/
703 struct ifaddr *
704 ifa_ifwithdstaddr(struct sockaddr *addr)
705 {
706 	struct ifnet *ifp;
707 	struct ifaddr *ifa;
708 
709 	IFNET_RLOCK();
710 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
711 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
712 			continue;
713 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
714 			if (ifa->ifa_addr->sa_family != addr->sa_family)
715 				continue;
716 			if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr))
717 				goto done;
718 		}
719 	}
720 	ifa = NULL;
721 done:
722 	IFNET_RUNLOCK();
723 	return (ifa);
724 }
725 
726 /*
727  * Find an interface on a specific network.  If many, choice
728  * is most specific found.
729  */
730 struct ifaddr *
731 ifa_ifwithnet(struct sockaddr *addr)
732 {
733 	struct ifnet *ifp;
734 	struct ifaddr *ifa;
735 	struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
736 	u_int af = addr->sa_family;
737 	char *addr_data = addr->sa_data, *cplim;
738 
739 	/*
740 	 * AF_LINK addresses can be looked up directly by their index number,
741 	 * so do that if we can.
742 	 */
743 	if (af == AF_LINK) {
744 	    struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
745 	    if (sdl->sdl_index && sdl->sdl_index <= if_index)
746 		return (ifaddr_byindex(sdl->sdl_index));
747 	}
748 
749 	/*
750 	 * Scan though each interface, looking for ones that have
751 	 * addresses in this address family.
752 	 */
753 	IFNET_RLOCK();
754 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
755 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
756 			char *cp, *cp2, *cp3;
757 
758 			if (ifa->ifa_addr->sa_family != af)
759 next:				continue;
760 			if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
761 				/*
762 				 * This is a bit broken as it doesn't
763 				 * take into account that the remote end may
764 				 * be a single node in the network we are
765 				 * looking for.
766 				 * The trouble is that we don't know the
767 				 * netmask for the remote end.
768 				 */
769 				if (ifa->ifa_dstaddr != 0
770 				    && equal(addr, ifa->ifa_dstaddr))
771 					goto done;
772 			} else {
773 				/*
774 				 * if we have a special address handler,
775 				 * then use it instead of the generic one.
776 				 */
777 				if (ifa->ifa_claim_addr) {
778 					if ((*ifa->ifa_claim_addr)(ifa, addr))
779 						goto done;
780 					continue;
781 				}
782 
783 				/*
784 				 * Scan all the bits in the ifa's address.
785 				 * If a bit dissagrees with what we are
786 				 * looking for, mask it with the netmask
787 				 * to see if it really matters.
788 				 * (A byte at a time)
789 				 */
790 				if (ifa->ifa_netmask == 0)
791 					continue;
792 				cp = addr_data;
793 				cp2 = ifa->ifa_addr->sa_data;
794 				cp3 = ifa->ifa_netmask->sa_data;
795 				cplim = ifa->ifa_netmask->sa_len
796 					+ (char *)ifa->ifa_netmask;
797 				while (cp3 < cplim)
798 					if ((*cp++ ^ *cp2++) & *cp3++)
799 						goto next; /* next address! */
800 				/*
801 				 * If the netmask of what we just found
802 				 * is more specific than what we had before
803 				 * (if we had one) then remember the new one
804 				 * before continuing to search
805 				 * for an even better one.
806 				 */
807 				if (ifa_maybe == 0 ||
808 				    rn_refines((caddr_t)ifa->ifa_netmask,
809 				    (caddr_t)ifa_maybe->ifa_netmask))
810 					ifa_maybe = ifa;
811 			}
812 		}
813 	}
814 	ifa = ifa_maybe;
815 done:
816 	IFNET_RUNLOCK();
817 	return (ifa);
818 }
819 
820 /*
821  * Find an interface address specific to an interface best matching
822  * a given address.
823  */
824 struct ifaddr *
825 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
826 {
827 	struct ifaddr *ifa;
828 	char *cp, *cp2, *cp3;
829 	char *cplim;
830 	struct ifaddr *ifa_maybe = 0;
831 	u_int af = addr->sa_family;
832 
833 	if (af >= AF_MAX)
834 		return (0);
835 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
836 		if (ifa->ifa_addr->sa_family != af)
837 			continue;
838 		if (ifa_maybe == 0)
839 			ifa_maybe = ifa;
840 		if (ifa->ifa_netmask == 0) {
841 			if (equal(addr, ifa->ifa_addr) ||
842 			    (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)))
843 				goto done;
844 			continue;
845 		}
846 		if (ifp->if_flags & IFF_POINTOPOINT) {
847 			if (equal(addr, ifa->ifa_dstaddr))
848 				goto done;
849 		} else {
850 			cp = addr->sa_data;
851 			cp2 = ifa->ifa_addr->sa_data;
852 			cp3 = ifa->ifa_netmask->sa_data;
853 			cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
854 			for (; cp3 < cplim; cp3++)
855 				if ((*cp++ ^ *cp2++) & *cp3)
856 					break;
857 			if (cp3 == cplim)
858 				goto done;
859 		}
860 	}
861 	ifa = ifa_maybe;
862 done:
863 	return (ifa);
864 }
865 
866 #include <net/route.h>
867 
868 /*
869  * Default action when installing a route with a Link Level gateway.
870  * Lookup an appropriate real ifa to point to.
871  * This should be moved to /sys/net/link.c eventually.
872  */
873 static void
874 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
875 {
876 	struct ifaddr *ifa, *oifa;
877 	struct sockaddr *dst;
878 	struct ifnet *ifp;
879 
880 	RT_LOCK_ASSERT(rt);
881 
882 	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
883 	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
884 		return;
885 	ifa = ifaof_ifpforaddr(dst, ifp);
886 	if (ifa) {
887 		IFAREF(ifa);		/* XXX */
888 		oifa = rt->rt_ifa;
889 		rt->rt_ifa = ifa;
890 		IFAFREE(oifa);
891 		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
892 			ifa->ifa_rtrequest(cmd, rt, info);
893 	}
894 }
895 
896 /*
897  * Mark an interface down and notify protocols of
898  * the transition.
899  * NOTE: must be called at splnet or eqivalent.
900  */
901 static void
902 if_unroute(struct ifnet *ifp, int flag, int fam)
903 {
904 	struct ifaddr *ifa;
905 
906 	ifp->if_flags &= ~flag;
907 	getmicrotime(&ifp->if_lastchange);
908 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
909 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
910 			pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
911 	if_qflush(&ifp->if_snd);
912 	rt_ifmsg(ifp);
913 }
914 
915 /*
916  * Mark an interface up and notify protocols of
917  * the transition.
918  * NOTE: must be called at splnet or eqivalent.
919  */
920 static void
921 if_route(struct ifnet *ifp, int flag, int fam)
922 {
923 	struct ifaddr *ifa;
924 
925 	ifp->if_flags |= flag;
926 	getmicrotime(&ifp->if_lastchange);
927 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
928 		if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
929 			pfctlinput(PRC_IFUP, ifa->ifa_addr);
930 	rt_ifmsg(ifp);
931 #ifdef INET6
932 	in6_if_up(ifp);
933 #endif
934 }
935 
936 /*
937  * Mark an interface down and notify protocols of
938  * the transition.
939  * NOTE: must be called at splnet or eqivalent.
940  */
941 void
942 if_down(struct ifnet *ifp)
943 {
944 
945 	if_unroute(ifp, IFF_UP, AF_UNSPEC);
946 }
947 
948 /*
949  * Mark an interface up and notify protocols of
950  * the transition.
951  * NOTE: must be called at splnet or eqivalent.
952  */
953 void
954 if_up(struct ifnet *ifp)
955 {
956 
957 	if_route(ifp, IFF_UP, AF_UNSPEC);
958 }
959 
960 /*
961  * Flush an interface queue.
962  */
963 static void
964 if_qflush(struct ifaltq *ifq)
965 {
966 	struct mbuf *m, *n;
967 
968 #ifdef ALTQ
969 	if (ALTQ_IS_ENABLED(ifq))
970 		ALTQ_PURGE(ifq);
971 #endif
972 	n = ifq->ifq_head;
973 	while ((m = n) != 0) {
974 		n = m->m_act;
975 		m_freem(m);
976 	}
977 	ifq->ifq_head = 0;
978 	ifq->ifq_tail = 0;
979 	ifq->ifq_len = 0;
980 }
981 
982 /*
983  * Handle interface watchdog timer routines.  Called
984  * from softclock, we decrement timers (if set) and
985  * call the appropriate interface routine on expiration.
986  */
987 static void
988 if_slowtimo(void *arg)
989 {
990 	struct ifnet *ifp;
991 	int s = splimp();
992 
993 	IFNET_RLOCK();
994 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
995 		if (ifp->if_timer == 0 || --ifp->if_timer)
996 			continue;
997 		if (ifp->if_watchdog)
998 			(*ifp->if_watchdog)(ifp);
999 	}
1000 	IFNET_RUNLOCK();
1001 	splx(s);
1002 	timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
1003 }
1004 
1005 /*
1006  * Map interface name to
1007  * interface structure pointer.
1008  */
1009 struct ifnet *
1010 ifunit(const char *name)
1011 {
1012 	struct ifnet *ifp;
1013 
1014 	IFNET_RLOCK();
1015 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1016 		if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
1017 			break;
1018 	}
1019 	IFNET_RUNLOCK();
1020 	return (ifp);
1021 }
1022 
1023 /*
1024  * Hardware specific interface ioctls.
1025  */
1026 static int
1027 ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
1028 {
1029 	struct ifreq *ifr;
1030 	struct ifstat *ifs;
1031 	int error = 0;
1032 	int new_flags;
1033 	size_t namelen, onamelen;
1034 	char new_name[IFNAMSIZ];
1035 	struct ifaddr *ifa;
1036 	struct sockaddr_dl *sdl;
1037 
1038 	ifr = (struct ifreq *)data;
1039 	switch (cmd) {
1040 	case SIOCGIFINDEX:
1041 		ifr->ifr_index = ifp->if_index;
1042 		break;
1043 
1044 	case SIOCGIFFLAGS:
1045 		ifr->ifr_flags = ifp->if_flags & 0xffff;
1046 		ifr->ifr_flagshigh = ifp->if_flags >> 16;
1047 		break;
1048 
1049 	case SIOCGIFCAP:
1050 		ifr->ifr_reqcap = ifp->if_capabilities;
1051 		ifr->ifr_curcap = ifp->if_capenable;
1052 		break;
1053 
1054 #ifdef MAC
1055 	case SIOCGIFMAC:
1056 		error = mac_ioctl_ifnet_get(td->td_ucred, ifr, ifp);
1057 		break;
1058 #endif
1059 
1060 	case SIOCGIFMETRIC:
1061 		ifr->ifr_metric = ifp->if_metric;
1062 		break;
1063 
1064 	case SIOCGIFMTU:
1065 		ifr->ifr_mtu = ifp->if_mtu;
1066 		break;
1067 
1068 	case SIOCGIFPHYS:
1069 		ifr->ifr_phys = ifp->if_physical;
1070 		break;
1071 
1072 	case SIOCSIFFLAGS:
1073 		error = suser(td);
1074 		if (error)
1075 			return (error);
1076 		new_flags = (ifr->ifr_flags & 0xffff) |
1077 		    (ifr->ifr_flagshigh << 16);
1078 		if (ifp->if_flags & IFF_SMART) {
1079 			/* Smart drivers twiddle their own routes */
1080 		} else if (ifp->if_flags & IFF_UP &&
1081 		    (new_flags & IFF_UP) == 0) {
1082 			int s = splimp();
1083 			if_down(ifp);
1084 			splx(s);
1085 		} else if (new_flags & IFF_UP &&
1086 		    (ifp->if_flags & IFF_UP) == 0) {
1087 			int s = splimp();
1088 			if_up(ifp);
1089 			splx(s);
1090 		}
1091 		ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
1092 			(new_flags &~ IFF_CANTCHANGE);
1093 		if (new_flags & IFF_PPROMISC) {
1094 			/* Permanently promiscuous mode requested */
1095 			ifp->if_flags |= IFF_PROMISC;
1096 		} else if (ifp->if_pcount == 0) {
1097 			ifp->if_flags &= ~IFF_PROMISC;
1098 		}
1099 		if (ifp->if_ioctl)
1100 			(void) (*ifp->if_ioctl)(ifp, cmd, data);
1101 		getmicrotime(&ifp->if_lastchange);
1102 		break;
1103 
1104 	case SIOCSIFCAP:
1105 		error = suser(td);
1106 		if (error)
1107 			return (error);
1108 		if (ifp->if_ioctl == NULL)
1109 			return (EOPNOTSUPP);
1110 		if (ifr->ifr_reqcap & ~ifp->if_capabilities)
1111 			return (EINVAL);
1112 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1113 		if (error == 0)
1114 			getmicrotime(&ifp->if_lastchange);
1115 		break;
1116 
1117 #ifdef MAC
1118 	case SIOCSIFMAC:
1119 		error = mac_ioctl_ifnet_set(td->td_ucred, ifr, ifp);
1120 		break;
1121 #endif
1122 
1123 	case SIOCSIFNAME:
1124 		error = suser(td);
1125 		if (error != 0)
1126 			return (error);
1127 		error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
1128 		if (error != 0)
1129 			return (error);
1130 		if (new_name[0] == '\0')
1131 			return (EINVAL);
1132 		if (ifunit(new_name) != NULL)
1133 			return (EEXIST);
1134 
1135 		EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
1136 		/* Announce the departure of the interface. */
1137 		rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
1138 
1139 		strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
1140 		ifa = ifaddr_byindex(ifp->if_index);
1141 		IFA_LOCK(ifa);
1142 		sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1143 		namelen = strlen(new_name);
1144 		onamelen = sdl->sdl_nlen;
1145 		/*
1146 		 * Move the address if needed.  This is safe because we
1147 		 * allocate space for a name of length IFNAMSIZ when we
1148 		 * create this in if_attach().
1149 		 */
1150 		if (namelen != onamelen) {
1151 			bcopy(sdl->sdl_data + onamelen,
1152 			    sdl->sdl_data + namelen, sdl->sdl_alen);
1153 		}
1154 		bcopy(new_name, sdl->sdl_data, namelen);
1155 		sdl->sdl_nlen = namelen;
1156 		sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
1157 		bzero(sdl->sdl_data, onamelen);
1158 		while (namelen != 0)
1159 			sdl->sdl_data[--namelen] = 0xff;
1160 		IFA_UNLOCK(ifa);
1161 
1162 		EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
1163 		/* Announce the return of the interface. */
1164 		rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
1165 		break;
1166 
1167 	case SIOCSIFMETRIC:
1168 		error = suser(td);
1169 		if (error)
1170 			return (error);
1171 		ifp->if_metric = ifr->ifr_metric;
1172 		getmicrotime(&ifp->if_lastchange);
1173 		break;
1174 
1175 	case SIOCSIFPHYS:
1176 		error = suser(td);
1177 		if (error)
1178 			return (error);
1179 		if (ifp->if_ioctl == NULL)
1180 			return (EOPNOTSUPP);
1181 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1182 		if (error == 0)
1183 			getmicrotime(&ifp->if_lastchange);
1184 		break;
1185 
1186 	case SIOCSIFMTU:
1187 	{
1188 		u_long oldmtu = ifp->if_mtu;
1189 
1190 		error = suser(td);
1191 		if (error)
1192 			return (error);
1193 		if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
1194 			return (EINVAL);
1195 		if (ifp->if_ioctl == NULL)
1196 			return (EOPNOTSUPP);
1197 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1198 		if (error == 0) {
1199 			getmicrotime(&ifp->if_lastchange);
1200 			rt_ifmsg(ifp);
1201 		}
1202 		/*
1203 		 * If the link MTU changed, do network layer specific procedure.
1204 		 */
1205 		if (ifp->if_mtu != oldmtu) {
1206 #ifdef INET6
1207 			nd6_setmtu(ifp);
1208 #endif
1209 		}
1210 		break;
1211 	}
1212 
1213 	case SIOCADDMULTI:
1214 	case SIOCDELMULTI:
1215 		error = suser(td);
1216 		if (error)
1217 			return (error);
1218 
1219 		/* Don't allow group membership on non-multicast interfaces. */
1220 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1221 			return (EOPNOTSUPP);
1222 
1223 		/* Don't let users screw up protocols' entries. */
1224 		if (ifr->ifr_addr.sa_family != AF_LINK)
1225 			return (EINVAL);
1226 
1227 		if (cmd == SIOCADDMULTI) {
1228 			struct ifmultiaddr *ifma;
1229 			error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
1230 		} else {
1231 			error = if_delmulti(ifp, &ifr->ifr_addr);
1232 		}
1233 		if (error == 0)
1234 			getmicrotime(&ifp->if_lastchange);
1235 		break;
1236 
1237 	case SIOCSIFPHYADDR:
1238 	case SIOCDIFPHYADDR:
1239 #ifdef INET6
1240 	case SIOCSIFPHYADDR_IN6:
1241 #endif
1242 	case SIOCSLIFPHYADDR:
1243 	case SIOCSIFMEDIA:
1244 	case SIOCSIFGENERIC:
1245 		error = suser(td);
1246 		if (error)
1247 			return (error);
1248 		if (ifp->if_ioctl == NULL)
1249 			return (EOPNOTSUPP);
1250 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1251 		if (error == 0)
1252 			getmicrotime(&ifp->if_lastchange);
1253 		break;
1254 
1255 	case SIOCGIFSTATUS:
1256 		ifs = (struct ifstat *)data;
1257 		ifs->ascii[0] = '\0';
1258 
1259 	case SIOCGIFPSRCADDR:
1260 	case SIOCGIFPDSTADDR:
1261 	case SIOCGLIFPHYADDR:
1262 	case SIOCGIFMEDIA:
1263 	case SIOCGIFGENERIC:
1264 		if (ifp->if_ioctl == NULL)
1265 			return (EOPNOTSUPP);
1266 		error = (*ifp->if_ioctl)(ifp, cmd, data);
1267 		break;
1268 
1269 	case SIOCSIFLLADDR:
1270 		error = suser(td);
1271 		if (error)
1272 			return (error);
1273 		error = if_setlladdr(ifp,
1274 		    ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
1275 		break;
1276 
1277 	default:
1278 		error = ENOIOCTL;
1279 		break;
1280 	}
1281 	return (error);
1282 }
1283 
1284 /*
1285  * Interface ioctls.
1286  */
1287 int
1288 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
1289 {
1290 	struct ifnet *ifp;
1291 	struct ifreq *ifr;
1292 	int error;
1293 	int oif_flags;
1294 
1295 	switch (cmd) {
1296 	case SIOCGIFCONF:
1297 	case OSIOCGIFCONF:
1298 		return (ifconf(cmd, data));
1299 	}
1300 	ifr = (struct ifreq *)data;
1301 
1302 	switch (cmd) {
1303 	case SIOCIFCREATE:
1304 	case SIOCIFDESTROY:
1305 		if ((error = suser(td)) != 0)
1306 			return (error);
1307 		return ((cmd == SIOCIFCREATE) ?
1308 			if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) :
1309 			if_clone_destroy(ifr->ifr_name));
1310 
1311 	case SIOCIFGCLONERS:
1312 		return (if_clone_list((struct if_clonereq *)data));
1313 	}
1314 
1315 	ifp = ifunit(ifr->ifr_name);
1316 	if (ifp == 0)
1317 		return (ENXIO);
1318 
1319 	error = ifhwioctl(cmd, ifp, data, td);
1320 	if (error != ENOIOCTL)
1321 		return (error);
1322 
1323 	oif_flags = ifp->if_flags;
1324 	if (so->so_proto == 0)
1325 		return (EOPNOTSUPP);
1326 #ifndef COMPAT_43
1327 	error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
1328 								 data,
1329 								 ifp, td));
1330 #else
1331 	{
1332 		int ocmd = cmd;
1333 
1334 		switch (cmd) {
1335 
1336 		case SIOCSIFDSTADDR:
1337 		case SIOCSIFADDR:
1338 		case SIOCSIFBRDADDR:
1339 		case SIOCSIFNETMASK:
1340 #if BYTE_ORDER != BIG_ENDIAN
1341 			if (ifr->ifr_addr.sa_family == 0 &&
1342 			    ifr->ifr_addr.sa_len < 16) {
1343 				ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
1344 				ifr->ifr_addr.sa_len = 16;
1345 			}
1346 #else
1347 			if (ifr->ifr_addr.sa_len == 0)
1348 				ifr->ifr_addr.sa_len = 16;
1349 #endif
1350 			break;
1351 
1352 		case OSIOCGIFADDR:
1353 			cmd = SIOCGIFADDR;
1354 			break;
1355 
1356 		case OSIOCGIFDSTADDR:
1357 			cmd = SIOCGIFDSTADDR;
1358 			break;
1359 
1360 		case OSIOCGIFBRDADDR:
1361 			cmd = SIOCGIFBRDADDR;
1362 			break;
1363 
1364 		case OSIOCGIFNETMASK:
1365 			cmd = SIOCGIFNETMASK;
1366 		}
1367 		error =  ((*so->so_proto->pr_usrreqs->pru_control)(so,
1368 								   cmd,
1369 								   data,
1370 								   ifp, td));
1371 		switch (ocmd) {
1372 
1373 		case OSIOCGIFADDR:
1374 		case OSIOCGIFDSTADDR:
1375 		case OSIOCGIFBRDADDR:
1376 		case OSIOCGIFNETMASK:
1377 			*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
1378 
1379 		}
1380 	}
1381 #endif /* COMPAT_43 */
1382 
1383 	if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
1384 #ifdef INET6
1385 		DELAY(100);/* XXX: temporary workaround for fxp issue*/
1386 		if (ifp->if_flags & IFF_UP) {
1387 			int s = splimp();
1388 			in6_if_up(ifp);
1389 			splx(s);
1390 		}
1391 #endif
1392 	}
1393 	return (error);
1394 }
1395 
1396 /*
1397  * Set/clear promiscuous mode on interface ifp based on the truth value
1398  * of pswitch.  The calls are reference counted so that only the first
1399  * "on" request actually has an effect, as does the final "off" request.
1400  * Results are undefined if the "off" and "on" requests are not matched.
1401  */
1402 int
1403 ifpromisc(struct ifnet *ifp, int pswitch)
1404 {
1405 	struct ifreq ifr;
1406 	int error;
1407 	int oldflags, oldpcount;
1408 
1409 	oldpcount = ifp->if_pcount;
1410 	oldflags = ifp->if_flags;
1411 	if (ifp->if_flags & IFF_PPROMISC) {
1412 		/* Do nothing if device is in permanently promiscuous mode */
1413 		ifp->if_pcount += pswitch ? 1 : -1;
1414 		return (0);
1415 	}
1416 	if (pswitch) {
1417 		/*
1418 		 * If the device is not configured up, we cannot put it in
1419 		 * promiscuous mode.
1420 		 */
1421 		if ((ifp->if_flags & IFF_UP) == 0)
1422 			return (ENETDOWN);
1423 		if (ifp->if_pcount++ != 0)
1424 			return (0);
1425 		ifp->if_flags |= IFF_PROMISC;
1426 	} else {
1427 		if (--ifp->if_pcount > 0)
1428 			return (0);
1429 		ifp->if_flags &= ~IFF_PROMISC;
1430 	}
1431 	ifr.ifr_flags = ifp->if_flags & 0xffff;
1432 	ifr.ifr_flagshigh = ifp->if_flags >> 16;
1433 	error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1434 	if (error == 0) {
1435 		log(LOG_INFO, "%s: promiscuous mode %s\n",
1436 		    ifp->if_xname,
1437 		    (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
1438 		rt_ifmsg(ifp);
1439 	} else {
1440 		ifp->if_pcount = oldpcount;
1441 		ifp->if_flags = oldflags;
1442 	}
1443 	return error;
1444 }
1445 
1446 /*
1447  * Return interface configuration
1448  * of system.  List may be used
1449  * in later ioctl's (above) to get
1450  * other information.
1451  */
1452 /*ARGSUSED*/
1453 static int
1454 ifconf(u_long cmd, caddr_t data)
1455 {
1456 	struct ifconf *ifc = (struct ifconf *)data;
1457 	struct ifnet *ifp;
1458 	struct ifaddr *ifa;
1459 	struct ifreq ifr, *ifrp;
1460 	int space = ifc->ifc_len, error = 0;
1461 
1462 	ifrp = ifc->ifc_req;
1463 	IFNET_RLOCK();		/* could sleep XXX */
1464 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1465 		int addrs;
1466 
1467 		if (space < sizeof(ifr))
1468 			break;
1469 		if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
1470 		    >= sizeof(ifr.ifr_name)) {
1471 			error = ENAMETOOLONG;
1472 			break;
1473 		}
1474 
1475 		addrs = 0;
1476 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1477 			struct sockaddr *sa = ifa->ifa_addr;
1478 
1479 			if (space < sizeof(ifr))
1480 				break;
1481 			if (jailed(curthread->td_ucred) &&
1482 			    prison_if(curthread->td_ucred, sa))
1483 				continue;
1484 			addrs++;
1485 #ifdef COMPAT_43
1486 			if (cmd == OSIOCGIFCONF) {
1487 				struct osockaddr *osa =
1488 					 (struct osockaddr *)&ifr.ifr_addr;
1489 				ifr.ifr_addr = *sa;
1490 				osa->sa_family = sa->sa_family;
1491 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1492 						sizeof (ifr));
1493 				ifrp++;
1494 			} else
1495 #endif
1496 			if (sa->sa_len <= sizeof(*sa)) {
1497 				ifr.ifr_addr = *sa;
1498 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1499 						sizeof (ifr));
1500 				ifrp++;
1501 			} else {
1502 				if (space < sizeof (ifr) + sa->sa_len -
1503 					    sizeof(*sa))
1504 					break;
1505 				space -= sa->sa_len - sizeof(*sa);
1506 				error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1507 						sizeof (ifr.ifr_name));
1508 				if (error == 0)
1509 				    error = copyout((caddr_t)sa,
1510 				      (caddr_t)&ifrp->ifr_addr, sa->sa_len);
1511 				ifrp = (struct ifreq *)
1512 					(sa->sa_len + (caddr_t)&ifrp->ifr_addr);
1513 			}
1514 			if (error)
1515 				break;
1516 			space -= sizeof (ifr);
1517 		}
1518 		if (error)
1519 			break;
1520 		if (!addrs) {
1521 			bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
1522 			error = copyout((caddr_t)&ifr, (caddr_t)ifrp,
1523 			    sizeof (ifr));
1524 			if (error)
1525 				break;
1526 			space -= sizeof (ifr);
1527 			ifrp++;
1528 		}
1529 	}
1530 	IFNET_RUNLOCK();
1531 	ifc->ifc_len -= space;
1532 	return (error);
1533 }
1534 
1535 /*
1536  * Just like if_promisc(), but for all-multicast-reception mode.
1537  */
1538 int
1539 if_allmulti(struct ifnet *ifp, int onswitch)
1540 {
1541 	int error = 0;
1542 	int s = splimp();
1543 	struct ifreq ifr;
1544 
1545 	if (onswitch) {
1546 		if (ifp->if_amcount++ == 0) {
1547 			ifp->if_flags |= IFF_ALLMULTI;
1548 			ifr.ifr_flags = ifp->if_flags & 0xffff;
1549 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1550 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1551 		}
1552 	} else {
1553 		if (ifp->if_amcount > 1) {
1554 			ifp->if_amcount--;
1555 		} else {
1556 			ifp->if_amcount = 0;
1557 			ifp->if_flags &= ~IFF_ALLMULTI;
1558 			ifr.ifr_flags = ifp->if_flags & 0xffff;;
1559 			ifr.ifr_flagshigh = ifp->if_flags >> 16;
1560 			error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1561 		}
1562 	}
1563 	splx(s);
1564 
1565 	if (error == 0)
1566 		rt_ifmsg(ifp);
1567 	return error;
1568 }
1569 
1570 /*
1571  * Add a multicast listenership to the interface in question.
1572  * The link layer provides a routine which converts
1573  */
1574 int
1575 if_addmulti(struct ifnet *ifp, struct sockaddr *sa, struct ifmultiaddr **retifma)
1576 {
1577 	struct sockaddr *llsa, *dupsa;
1578 	int error, s;
1579 	struct ifmultiaddr *ifma;
1580 
1581 	/*
1582 	 * If the matching multicast address already exists
1583 	 * then don't add a new one, just add a reference
1584 	 */
1585 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1586 		if (equal(sa, ifma->ifma_addr)) {
1587 			ifma->ifma_refcount++;
1588 			if (retifma)
1589 				*retifma = ifma;
1590 			return 0;
1591 		}
1592 	}
1593 
1594 	/*
1595 	 * Give the link layer a chance to accept/reject it, and also
1596 	 * find out which AF_LINK address this maps to, if it isn't one
1597 	 * already.
1598 	 */
1599 	if (ifp->if_resolvemulti) {
1600 		error = ifp->if_resolvemulti(ifp, &llsa, sa);
1601 		if (error) return error;
1602 	} else {
1603 		llsa = 0;
1604 	}
1605 
1606 	MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, M_WAITOK);
1607 	MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, M_WAITOK);
1608 	bcopy(sa, dupsa, sa->sa_len);
1609 
1610 	ifma->ifma_addr = dupsa;
1611 	ifma->ifma_lladdr = llsa;
1612 	ifma->ifma_ifp = ifp;
1613 	ifma->ifma_refcount = 1;
1614 	ifma->ifma_protospec = 0;
1615 	rt_newmaddrmsg(RTM_NEWMADDR, ifma);
1616 
1617 	/*
1618 	 * Some network interfaces can scan the address list at
1619 	 * interrupt time; lock them out.
1620 	 */
1621 	s = splimp();
1622 	TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1623 	splx(s);
1624 	if (retifma != NULL)
1625 		*retifma = ifma;
1626 
1627 	if (llsa != 0) {
1628 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1629 			if (equal(ifma->ifma_addr, llsa))
1630 				break;
1631 		}
1632 		if (ifma) {
1633 			ifma->ifma_refcount++;
1634 		} else {
1635 			MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma,
1636 			       M_IFMADDR, M_WAITOK);
1637 			MALLOC(dupsa, struct sockaddr *, llsa->sa_len,
1638 			       M_IFMADDR, M_WAITOK);
1639 			bcopy(llsa, dupsa, llsa->sa_len);
1640 			ifma->ifma_addr = dupsa;
1641 			ifma->ifma_lladdr = NULL;
1642 			ifma->ifma_ifp = ifp;
1643 			ifma->ifma_refcount = 1;
1644 			ifma->ifma_protospec = 0;
1645 			s = splimp();
1646 			TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
1647 			splx(s);
1648 		}
1649 	}
1650 	/*
1651 	 * We are certain we have added something, so call down to the
1652 	 * interface to let them know about it.
1653 	 */
1654 	s = splimp();
1655 	ifp->if_ioctl(ifp, SIOCADDMULTI, 0);
1656 	splx(s);
1657 
1658 	return 0;
1659 }
1660 
1661 /*
1662  * Remove a reference to a multicast address on this interface.  Yell
1663  * if the request does not match an existing membership.
1664  */
1665 int
1666 if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
1667 {
1668 	struct ifmultiaddr *ifma;
1669 	int s;
1670 
1671 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1672 		if (equal(sa, ifma->ifma_addr))
1673 			break;
1674 	if (ifma == 0)
1675 		return ENOENT;
1676 
1677 	if (ifma->ifma_refcount > 1) {
1678 		ifma->ifma_refcount--;
1679 		return 0;
1680 	}
1681 
1682 	rt_newmaddrmsg(RTM_DELMADDR, ifma);
1683 	sa = ifma->ifma_lladdr;
1684 	s = splimp();
1685 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1686 	/*
1687 	 * Make sure the interface driver is notified
1688 	 * in the case of a link layer mcast group being left.
1689 	 */
1690 	if (ifma->ifma_addr->sa_family == AF_LINK && sa == 0)
1691 		ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1692 	splx(s);
1693 	free(ifma->ifma_addr, M_IFMADDR);
1694 	free(ifma, M_IFMADDR);
1695 	if (sa == 0)
1696 		return 0;
1697 
1698 	/*
1699 	 * Now look for the link-layer address which corresponds to
1700 	 * this network address.  It had been squirreled away in
1701 	 * ifma->ifma_lladdr for this purpose (so we don't have
1702 	 * to call ifp->if_resolvemulti() again), and we saved that
1703 	 * value in sa above.  If some nasty deleted the
1704 	 * link-layer address out from underneath us, we can deal because
1705 	 * the address we stored was is not the same as the one which was
1706 	 * in the record for the link-layer address.  (So we don't complain
1707 	 * in that case.)
1708 	 */
1709 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1710 		if (equal(sa, ifma->ifma_addr))
1711 			break;
1712 	if (ifma == 0)
1713 		return 0;
1714 
1715 	if (ifma->ifma_refcount > 1) {
1716 		ifma->ifma_refcount--;
1717 		return 0;
1718 	}
1719 
1720 	s = splimp();
1721 	TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1722 	ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1723 	splx(s);
1724 	free(ifma->ifma_addr, M_IFMADDR);
1725 	free(sa, M_IFMADDR);
1726 	free(ifma, M_IFMADDR);
1727 
1728 	return 0;
1729 }
1730 
1731 /*
1732  * Set the link layer address on an interface.
1733  *
1734  * At this time we only support certain types of interfaces,
1735  * and we don't allow the length of the address to change.
1736  */
1737 int
1738 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
1739 {
1740 	struct sockaddr_dl *sdl;
1741 	struct ifaddr *ifa;
1742 	struct ifreq ifr;
1743 
1744 	ifa = ifaddr_byindex(ifp->if_index);
1745 	if (ifa == NULL)
1746 		return (EINVAL);
1747 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
1748 	if (sdl == NULL)
1749 		return (EINVAL);
1750 	if (len != sdl->sdl_alen)	/* don't allow length to change */
1751 		return (EINVAL);
1752 	switch (ifp->if_type) {
1753 	case IFT_ETHER:			/* these types use struct arpcom */
1754 	case IFT_FDDI:
1755 	case IFT_XETHER:
1756 	case IFT_ISO88025:
1757 	case IFT_L2VLAN:
1758 		bcopy(lladdr, IFP2AC(ifp)->ac_enaddr, len);
1759 		/*
1760 		 * XXX We also need to store the lladdr in LLADDR(sdl),
1761 		 * which is done below. This is a pain because we must
1762 		 * remember to keep the info in sync.
1763 		 */
1764 		/* FALLTHROUGH */
1765 	case IFT_ARCNET:
1766 		bcopy(lladdr, LLADDR(sdl), len);
1767 		break;
1768 	default:
1769 		return (ENODEV);
1770 	}
1771 	/*
1772 	 * If the interface is already up, we need
1773 	 * to re-init it in order to reprogram its
1774 	 * address filter.
1775 	 */
1776 	if ((ifp->if_flags & IFF_UP) != 0) {
1777 		ifp->if_flags &= ~IFF_UP;
1778 		ifr.ifr_flags = ifp->if_flags & 0xffff;
1779 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
1780 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1781 		ifp->if_flags |= IFF_UP;
1782 		ifr.ifr_flags = ifp->if_flags & 0xffff;
1783 		ifr.ifr_flagshigh = ifp->if_flags >> 16;
1784 		(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
1785 #ifdef INET
1786 		/*
1787 		 * Also send gratuitous ARPs to notify other nodes about
1788 		 * the address change.
1789 		 */
1790 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1791 			if (ifa->ifa_addr != NULL &&
1792 			    ifa->ifa_addr->sa_family == AF_INET)
1793 				arp_ifinit(ifp, ifa);
1794 		}
1795 #endif
1796 	}
1797 	return (0);
1798 }
1799 
1800 struct ifmultiaddr *
1801 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp)
1802 {
1803 	struct ifmultiaddr *ifma;
1804 
1805 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
1806 		if (equal(ifma->ifma_addr, sa))
1807 			break;
1808 
1809 	return ifma;
1810 }
1811 
1812 /*
1813  * The name argument must be a pointer to storage which will last as
1814  * long as the interface does.  For physical devices, the result of
1815  * device_get_name(dev) is a good choice and for pseudo-devices a
1816  * static string works well.
1817  */
1818 void
1819 if_initname(struct ifnet *ifp, const char *name, int unit)
1820 {
1821 	ifp->if_dname = name;
1822 	ifp->if_dunit = unit;
1823 	if (unit != IF_DUNIT_NONE)
1824 		snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
1825 	else
1826 		strlcpy(ifp->if_xname, name, IFNAMSIZ);
1827 }
1828 
1829 int
1830 if_printf(struct ifnet *ifp, const char * fmt, ...)
1831 {
1832 	va_list ap;
1833 	int retval;
1834 
1835 	retval = printf("%s: ", ifp->if_xname);
1836 	va_start(ap, fmt);
1837 	retval += vprintf(fmt, ap);
1838 	va_end(ap);
1839 	return (retval);
1840 }
1841 
1842 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
1843 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
1844