1 /* $NetBSD: if.c,v 1.397 2017/11/17 07:37:12 ozaki-r Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by William Studenmund and Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 34 * All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the project nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 */ 60 61 /* 62 * Copyright (c) 1980, 1986, 1993 63 * The Regents of the University of California. All rights reserved. 64 * 65 * Redistribution and use in source and binary forms, with or without 66 * modification, are permitted provided that the following conditions 67 * are met: 68 * 1. Redistributions of source code must retain the above copyright 69 * notice, this list of conditions and the following disclaimer. 70 * 2. Redistributions in binary form must reproduce the above copyright 71 * notice, this list of conditions and the following disclaimer in the 72 * documentation and/or other materials provided with the distribution. 73 * 3. Neither the name of the University nor the names of its contributors 74 * may be used to endorse or promote products derived from this software 75 * without specific prior written permission. 76 * 77 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 78 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 79 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 80 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 81 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 82 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 83 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 84 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 85 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 86 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 87 * SUCH DAMAGE. 88 * 89 * @(#)if.c 8.5 (Berkeley) 1/9/95 90 */ 91 92 #include <sys/cdefs.h> 93 __KERNEL_RCSID(0, "$NetBSD: if.c,v 1.397 2017/11/17 07:37:12 ozaki-r Exp $"); 94 95 #if defined(_KERNEL_OPT) 96 #include "opt_inet.h" 97 #include "opt_ipsec.h" 98 99 #include "opt_atalk.h" 100 #include "opt_natm.h" 101 #include "opt_wlan.h" 102 #include "opt_net_mpsafe.h" 103 #endif 104 105 #include <sys/param.h> 106 #include <sys/mbuf.h> 107 #include <sys/systm.h> 108 #include <sys/callout.h> 109 #include <sys/proc.h> 110 #include <sys/socket.h> 111 #include <sys/socketvar.h> 112 #include <sys/domain.h> 113 #include <sys/protosw.h> 114 #include <sys/kernel.h> 115 #include <sys/ioctl.h> 116 #include <sys/sysctl.h> 117 #include <sys/syslog.h> 118 #include <sys/kauth.h> 119 #include <sys/kmem.h> 120 #include <sys/xcall.h> 121 #include <sys/cpu.h> 122 #include <sys/intr.h> 123 124 #include <net/if.h> 125 #include <net/if_dl.h> 126 #include <net/if_ether.h> 127 #include <net/if_media.h> 128 #include <net80211/ieee80211.h> 129 #include <net80211/ieee80211_ioctl.h> 130 #include <net/if_types.h> 131 #include <net/route.h> 132 #include <net/netisr.h> 133 #include <sys/module.h> 134 #ifdef NETATALK 135 #include <netatalk/at_extern.h> 136 #include <netatalk/at.h> 137 #endif 138 #include <net/pfil.h> 139 #include <netinet/in.h> 140 #include <netinet/in_var.h> 141 #include <netinet/ip_encap.h> 142 #include <net/bpf.h> 143 144 #ifdef INET6 145 #include <netinet6/in6_var.h> 146 #include <netinet6/nd6.h> 147 #endif 148 149 #include "ether.h" 150 #include "fddi.h" 151 #include "token.h" 152 153 #include "carp.h" 154 #if NCARP > 0 155 #include <netinet/ip_carp.h> 156 #endif 157 158 #include <compat/sys/sockio.h> 159 #include <compat/sys/socket.h> 160 161 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); 162 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); 163 164 /* 165 * Global list of interfaces. 166 */ 167 /* DEPRECATED. Remove it once kvm(3) users disappeared */ 168 struct ifnet_head ifnet_list; 169 170 struct pslist_head ifnet_pslist; 171 static ifnet_t ** ifindex2ifnet = NULL; 172 static u_int if_index = 1; 173 static size_t if_indexlim = 0; 174 static uint64_t index_gen; 175 /* Mutex to protect the above objects. */ 176 kmutex_t ifnet_mtx __cacheline_aligned; 177 static struct psref_class *ifnet_psref_class __read_mostly; 178 static pserialize_t ifnet_psz; 179 180 static kmutex_t if_clone_mtx; 181 182 struct ifnet *lo0ifp; 183 int ifqmaxlen = IFQ_MAXLEN; 184 185 struct psref_class *ifa_psref_class __read_mostly; 186 187 static int if_delroute_matcher(struct rtentry *, void *); 188 189 static bool if_is_unit(const char *); 190 static struct if_clone *if_clone_lookup(const char *, int *); 191 192 static LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners); 193 static int if_cloners_count; 194 195 /* Packet filtering hook for interfaces. */ 196 pfil_head_t * if_pfil __read_mostly; 197 198 static kauth_listener_t if_listener; 199 200 static int doifioctl(struct socket *, u_long, void *, struct lwp *); 201 static void if_detach_queues(struct ifnet *, struct ifqueue *); 202 static void sysctl_sndq_setup(struct sysctllog **, const char *, 203 struct ifaltq *); 204 static void if_slowtimo(void *); 205 static void if_free_sadl(struct ifnet *); 206 static void if_attachdomain1(struct ifnet *); 207 static int ifconf(u_long, void *); 208 static int if_transmit(struct ifnet *, struct mbuf *); 209 static int if_clone_create(const char *); 210 static int if_clone_destroy(const char *); 211 static void if_link_state_change_si(void *); 212 213 struct if_percpuq { 214 struct ifnet *ipq_ifp; 215 void *ipq_si; 216 struct percpu *ipq_ifqs; /* struct ifqueue */ 217 }; 218 219 static struct mbuf *if_percpuq_dequeue(struct if_percpuq *); 220 221 static void if_percpuq_drops(void *, void *, struct cpu_info *); 222 static int sysctl_percpuq_drops_handler(SYSCTLFN_PROTO); 223 static void sysctl_percpuq_setup(struct sysctllog **, const char *, 224 struct if_percpuq *); 225 226 struct if_deferred_start { 227 struct ifnet *ids_ifp; 228 void (*ids_if_start)(struct ifnet *); 229 void *ids_si; 230 }; 231 232 static void if_deferred_start_softint(void *); 233 static void if_deferred_start_common(struct ifnet *); 234 static void if_deferred_start_destroy(struct ifnet *); 235 236 #if defined(INET) || defined(INET6) 237 static void sysctl_net_pktq_setup(struct sysctllog **, int); 238 #endif 239 240 static void if_sysctl_setup(struct sysctllog **); 241 242 /* 243 * Pointer to stub or real compat_cvtcmd() depending on presence of 244 * the compat module 245 */ 246 u_long stub_compat_cvtcmd(u_long); 247 u_long (*vec_compat_cvtcmd)(u_long) = stub_compat_cvtcmd; 248 249 /* Similarly, pointer to compat_ifioctl() if it is present */ 250 251 int (*vec_compat_ifioctl)(struct socket *, u_long, u_long, void *, 252 struct lwp *) = NULL; 253 254 /* The stub version of compat_cvtcmd() */ 255 u_long stub_compat_cvtcmd(u_long cmd) 256 { 257 258 return cmd; 259 } 260 261 static int 262 if_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 263 void *arg0, void *arg1, void *arg2, void *arg3) 264 { 265 int result; 266 enum kauth_network_req req; 267 268 result = KAUTH_RESULT_DEFER; 269 req = (enum kauth_network_req)arg1; 270 271 if (action != KAUTH_NETWORK_INTERFACE) 272 return result; 273 274 if ((req == KAUTH_REQ_NETWORK_INTERFACE_GET) || 275 (req == KAUTH_REQ_NETWORK_INTERFACE_SET)) 276 result = KAUTH_RESULT_ALLOW; 277 278 return result; 279 } 280 281 /* 282 * Network interface utility routines. 283 * 284 * Routines with ifa_ifwith* names take sockaddr *'s as 285 * parameters. 286 */ 287 void 288 ifinit(void) 289 { 290 291 if_sysctl_setup(NULL); 292 293 #if (defined(INET) || defined(INET6)) 294 encapinit(); 295 #endif 296 297 if_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK, 298 if_listener_cb, NULL); 299 300 /* interfaces are available, inform socket code */ 301 ifioctl = doifioctl; 302 } 303 304 /* 305 * XXX Initialization before configure(). 306 * XXX hack to get pfil_add_hook working in autoconf. 307 */ 308 void 309 ifinit1(void) 310 { 311 mutex_init(&if_clone_mtx, MUTEX_DEFAULT, IPL_NONE); 312 313 TAILQ_INIT(&ifnet_list); 314 mutex_init(&ifnet_mtx, MUTEX_DEFAULT, IPL_NONE); 315 ifnet_psz = pserialize_create(); 316 ifnet_psref_class = psref_class_create("ifnet", IPL_SOFTNET); 317 ifa_psref_class = psref_class_create("ifa", IPL_SOFTNET); 318 PSLIST_INIT(&ifnet_pslist); 319 320 if_indexlim = 8; 321 322 if_pfil = pfil_head_create(PFIL_TYPE_IFNET, NULL); 323 KASSERT(if_pfil != NULL); 324 325 #if NETHER > 0 || NFDDI > 0 || defined(NETATALK) || NTOKEN > 0 || defined(WLAN) 326 etherinit(); 327 #endif 328 } 329 330 ifnet_t * 331 if_alloc(u_char type) 332 { 333 return kmem_zalloc(sizeof(ifnet_t), KM_SLEEP); 334 } 335 336 void 337 if_free(ifnet_t *ifp) 338 { 339 kmem_free(ifp, sizeof(ifnet_t)); 340 } 341 342 void 343 if_initname(struct ifnet *ifp, const char *name, int unit) 344 { 345 (void)snprintf(ifp->if_xname, sizeof(ifp->if_xname), 346 "%s%d", name, unit); 347 } 348 349 /* 350 * Null routines used while an interface is going away. These routines 351 * just return an error. 352 */ 353 354 int 355 if_nulloutput(struct ifnet *ifp, struct mbuf *m, 356 const struct sockaddr *so, const struct rtentry *rt) 357 { 358 359 return ENXIO; 360 } 361 362 void 363 if_nullinput(struct ifnet *ifp, struct mbuf *m) 364 { 365 366 /* Nothing. */ 367 } 368 369 void 370 if_nullstart(struct ifnet *ifp) 371 { 372 373 /* Nothing. */ 374 } 375 376 int 377 if_nulltransmit(struct ifnet *ifp, struct mbuf *m) 378 { 379 380 m_freem(m); 381 return ENXIO; 382 } 383 384 int 385 if_nullioctl(struct ifnet *ifp, u_long cmd, void *data) 386 { 387 388 return ENXIO; 389 } 390 391 int 392 if_nullinit(struct ifnet *ifp) 393 { 394 395 return ENXIO; 396 } 397 398 void 399 if_nullstop(struct ifnet *ifp, int disable) 400 { 401 402 /* Nothing. */ 403 } 404 405 void 406 if_nullslowtimo(struct ifnet *ifp) 407 { 408 409 /* Nothing. */ 410 } 411 412 void 413 if_nulldrain(struct ifnet *ifp) 414 { 415 416 /* Nothing. */ 417 } 418 419 void 420 if_set_sadl(struct ifnet *ifp, const void *lla, u_char addrlen, bool factory) 421 { 422 struct ifaddr *ifa; 423 struct sockaddr_dl *sdl; 424 425 ifp->if_addrlen = addrlen; 426 if_alloc_sadl(ifp); 427 ifa = ifp->if_dl; 428 sdl = satosdl(ifa->ifa_addr); 429 430 (void)sockaddr_dl_setaddr(sdl, sdl->sdl_len, lla, ifp->if_addrlen); 431 if (factory) { 432 ifp->if_hwdl = ifp->if_dl; 433 ifaref(ifp->if_hwdl); 434 } 435 /* TBD routing socket */ 436 } 437 438 struct ifaddr * 439 if_dl_create(const struct ifnet *ifp, const struct sockaddr_dl **sdlp) 440 { 441 unsigned socksize, ifasize; 442 int addrlen, namelen; 443 struct sockaddr_dl *mask, *sdl; 444 struct ifaddr *ifa; 445 446 namelen = strlen(ifp->if_xname); 447 addrlen = ifp->if_addrlen; 448 socksize = roundup(sockaddr_dl_measure(namelen, addrlen), sizeof(long)); 449 ifasize = sizeof(*ifa) + 2 * socksize; 450 ifa = (struct ifaddr *)malloc(ifasize, M_IFADDR, M_WAITOK|M_ZERO); 451 452 sdl = (struct sockaddr_dl *)(ifa + 1); 453 mask = (struct sockaddr_dl *)(socksize + (char *)sdl); 454 455 sockaddr_dl_init(sdl, socksize, ifp->if_index, ifp->if_type, 456 ifp->if_xname, namelen, NULL, addrlen); 457 mask->sdl_len = sockaddr_dl_measure(namelen, 0); 458 memset(&mask->sdl_data[0], 0xff, namelen); 459 ifa->ifa_rtrequest = link_rtrequest; 460 ifa->ifa_addr = (struct sockaddr *)sdl; 461 ifa->ifa_netmask = (struct sockaddr *)mask; 462 ifa_psref_init(ifa); 463 464 *sdlp = sdl; 465 466 return ifa; 467 } 468 469 static void 470 if_sadl_setrefs(struct ifnet *ifp, struct ifaddr *ifa) 471 { 472 const struct sockaddr_dl *sdl; 473 474 ifp->if_dl = ifa; 475 ifaref(ifa); 476 sdl = satosdl(ifa->ifa_addr); 477 ifp->if_sadl = sdl; 478 } 479 480 /* 481 * Allocate the link level name for the specified interface. This 482 * is an attachment helper. It must be called after ifp->if_addrlen 483 * is initialized, which may not be the case when if_attach() is 484 * called. 485 */ 486 void 487 if_alloc_sadl(struct ifnet *ifp) 488 { 489 struct ifaddr *ifa; 490 const struct sockaddr_dl *sdl; 491 492 /* 493 * If the interface already has a link name, release it 494 * now. This is useful for interfaces that can change 495 * link types, and thus switch link names often. 496 */ 497 if (ifp->if_sadl != NULL) 498 if_free_sadl(ifp); 499 500 ifa = if_dl_create(ifp, &sdl); 501 502 ifa_insert(ifp, ifa); 503 if_sadl_setrefs(ifp, ifa); 504 } 505 506 static void 507 if_deactivate_sadl(struct ifnet *ifp) 508 { 509 struct ifaddr *ifa; 510 511 KASSERT(ifp->if_dl != NULL); 512 513 ifa = ifp->if_dl; 514 515 ifp->if_sadl = NULL; 516 517 ifp->if_dl = NULL; 518 ifafree(ifa); 519 } 520 521 static void 522 if_replace_sadl(struct ifnet *ifp, struct ifaddr *ifa) 523 { 524 struct ifaddr *old; 525 526 KASSERT(ifp->if_dl != NULL); 527 528 old = ifp->if_dl; 529 530 ifaref(ifa); 531 /* XXX Update if_dl and if_sadl atomically */ 532 ifp->if_dl = ifa; 533 ifp->if_sadl = satosdl(ifa->ifa_addr); 534 535 ifafree(old); 536 } 537 538 void 539 if_activate_sadl(struct ifnet *ifp, struct ifaddr *ifa0, 540 const struct sockaddr_dl *sdl) 541 { 542 int s, ss; 543 struct ifaddr *ifa; 544 int bound = curlwp_bind(); 545 546 KASSERT(ifa_held(ifa0)); 547 548 s = splsoftnet(); 549 550 if_replace_sadl(ifp, ifa0); 551 552 ss = pserialize_read_enter(); 553 IFADDR_READER_FOREACH(ifa, ifp) { 554 struct psref psref; 555 ifa_acquire(ifa, &psref); 556 pserialize_read_exit(ss); 557 558 rtinit(ifa, RTM_LLINFO_UPD, 0); 559 560 ss = pserialize_read_enter(); 561 ifa_release(ifa, &psref); 562 } 563 pserialize_read_exit(ss); 564 565 splx(s); 566 curlwp_bindx(bound); 567 } 568 569 /* 570 * Free the link level name for the specified interface. This is 571 * a detach helper. This is called from if_detach(). 572 */ 573 static void 574 if_free_sadl(struct ifnet *ifp) 575 { 576 struct ifaddr *ifa; 577 int s; 578 579 ifa = ifp->if_dl; 580 if (ifa == NULL) { 581 KASSERT(ifp->if_sadl == NULL); 582 return; 583 } 584 585 KASSERT(ifp->if_sadl != NULL); 586 587 s = splsoftnet(); 588 rtinit(ifa, RTM_DELETE, 0); 589 ifa_remove(ifp, ifa); 590 if_deactivate_sadl(ifp); 591 if (ifp->if_hwdl == ifa) { 592 ifafree(ifa); 593 ifp->if_hwdl = NULL; 594 } 595 splx(s); 596 } 597 598 static void 599 if_getindex(ifnet_t *ifp) 600 { 601 bool hitlimit = false; 602 603 ifp->if_index_gen = index_gen++; 604 605 ifp->if_index = if_index; 606 if (ifindex2ifnet == NULL) { 607 if_index++; 608 goto skip; 609 } 610 while (if_byindex(ifp->if_index)) { 611 /* 612 * If we hit USHRT_MAX, we skip back to 0 since 613 * there are a number of places where the value 614 * of if_index or if_index itself is compared 615 * to or stored in an unsigned short. By 616 * jumping back, we won't botch those assignments 617 * or comparisons. 618 */ 619 if (++if_index == 0) { 620 if_index = 1; 621 } else if (if_index == USHRT_MAX) { 622 /* 623 * However, if we have to jump back to 624 * zero *twice* without finding an empty 625 * slot in ifindex2ifnet[], then there 626 * there are too many (>65535) interfaces. 627 */ 628 if (hitlimit) { 629 panic("too many interfaces"); 630 } 631 hitlimit = true; 632 if_index = 1; 633 } 634 ifp->if_index = if_index; 635 } 636 skip: 637 /* 638 * ifindex2ifnet is indexed by if_index. Since if_index will 639 * grow dynamically, it should grow too. 640 */ 641 if (ifindex2ifnet == NULL || ifp->if_index >= if_indexlim) { 642 size_t m, n, oldlim; 643 void *q; 644 645 oldlim = if_indexlim; 646 while (ifp->if_index >= if_indexlim) 647 if_indexlim <<= 1; 648 649 /* grow ifindex2ifnet */ 650 m = oldlim * sizeof(struct ifnet *); 651 n = if_indexlim * sizeof(struct ifnet *); 652 q = malloc(n, M_IFADDR, M_WAITOK|M_ZERO); 653 if (ifindex2ifnet != NULL) { 654 memcpy(q, ifindex2ifnet, m); 655 free(ifindex2ifnet, M_IFADDR); 656 } 657 ifindex2ifnet = (struct ifnet **)q; 658 } 659 ifindex2ifnet[ifp->if_index] = ifp; 660 } 661 662 /* 663 * Initialize an interface and assign an index for it. 664 * 665 * It must be called prior to a device specific attach routine 666 * (e.g., ether_ifattach and ieee80211_ifattach) or if_alloc_sadl, 667 * and be followed by if_register: 668 * 669 * if_initialize(ifp); 670 * ether_ifattach(ifp, enaddr); 671 * if_register(ifp); 672 */ 673 int 674 if_initialize(ifnet_t *ifp) 675 { 676 int rv = 0; 677 678 KASSERT(if_indexlim > 0); 679 TAILQ_INIT(&ifp->if_addrlist); 680 681 /* 682 * Link level name is allocated later by a separate call to 683 * if_alloc_sadl(). 684 */ 685 686 if (ifp->if_snd.ifq_maxlen == 0) 687 ifp->if_snd.ifq_maxlen = ifqmaxlen; 688 689 ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */ 690 691 ifp->if_link_state = LINK_STATE_UNKNOWN; 692 ifp->if_link_queue = -1; /* all bits set, see link_state_change() */ 693 694 ifp->if_capenable = 0; 695 ifp->if_csum_flags_tx = 0; 696 ifp->if_csum_flags_rx = 0; 697 698 #ifdef ALTQ 699 ifp->if_snd.altq_type = 0; 700 ifp->if_snd.altq_disc = NULL; 701 ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE; 702 ifp->if_snd.altq_tbr = NULL; 703 ifp->if_snd.altq_ifp = ifp; 704 #endif 705 706 IFQ_LOCK_INIT(&ifp->if_snd); 707 708 ifp->if_pfil = pfil_head_create(PFIL_TYPE_IFNET, ifp); 709 pfil_run_ifhooks(if_pfil, PFIL_IFNET_ATTACH, ifp); 710 711 IF_AFDATA_LOCK_INIT(ifp); 712 713 if (if_is_link_state_changeable(ifp)) { 714 ifp->if_link_si = softint_establish(SOFTINT_NET, 715 if_link_state_change_si, ifp); 716 if (ifp->if_link_si == NULL) { 717 rv = ENOMEM; 718 goto fail; 719 } 720 } 721 722 PSLIST_ENTRY_INIT(ifp, if_pslist_entry); 723 PSLIST_INIT(&ifp->if_addr_pslist); 724 psref_target_init(&ifp->if_psref, ifnet_psref_class); 725 ifp->if_ioctl_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 726 LIST_INIT(&ifp->if_multiaddrs); 727 728 IFNET_LOCK(); 729 if_getindex(ifp); 730 IFNET_UNLOCK(); 731 732 return 0; 733 734 fail: 735 IF_AFDATA_LOCK_DESTROY(ifp); 736 737 pfil_run_ifhooks(if_pfil, PFIL_IFNET_DETACH, ifp); 738 (void)pfil_head_destroy(ifp->if_pfil); 739 740 IFQ_LOCK_DESTROY(&ifp->if_snd); 741 742 return rv; 743 } 744 745 /* 746 * Register an interface to the list of "active" interfaces. 747 */ 748 void 749 if_register(ifnet_t *ifp) 750 { 751 /* 752 * If the driver has not supplied its own if_ioctl, then 753 * supply the default. 754 */ 755 if (ifp->if_ioctl == NULL) 756 ifp->if_ioctl = ifioctl_common; 757 758 sysctl_sndq_setup(&ifp->if_sysctl_log, ifp->if_xname, &ifp->if_snd); 759 760 if (!STAILQ_EMPTY(&domains)) 761 if_attachdomain1(ifp); 762 763 /* Announce the interface. */ 764 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 765 766 if (ifp->if_slowtimo != NULL) { 767 ifp->if_slowtimo_ch = 768 kmem_zalloc(sizeof(*ifp->if_slowtimo_ch), KM_SLEEP); 769 callout_init(ifp->if_slowtimo_ch, 0); 770 callout_setfunc(ifp->if_slowtimo_ch, if_slowtimo, ifp); 771 if_slowtimo(ifp); 772 } 773 774 if (ifp->if_transmit == NULL || ifp->if_transmit == if_nulltransmit) 775 ifp->if_transmit = if_transmit; 776 777 IFNET_LOCK(); 778 TAILQ_INSERT_TAIL(&ifnet_list, ifp, if_list); 779 IFNET_WRITER_INSERT_TAIL(ifp); 780 IFNET_UNLOCK(); 781 } 782 783 /* 784 * The if_percpuq framework 785 * 786 * It allows network device drivers to execute the network stack 787 * in softint (so called softint-based if_input). It utilizes 788 * softint and percpu ifqueue. It doesn't distribute any packets 789 * between CPUs, unlike pktqueue(9). 790 * 791 * Currently we support two options for device drivers to apply the framework: 792 * - Use it implicitly with less changes 793 * - If you use if_attach in driver's _attach function and if_input in 794 * driver's Rx interrupt handler, a packet is queued and a softint handles 795 * the packet implicitly 796 * - Use it explicitly in each driver (recommended) 797 * - You can use if_percpuq_* directly in your driver 798 * - In this case, you need to allocate struct if_percpuq in driver's softc 799 * - See wm(4) as a reference implementation 800 */ 801 802 static void 803 if_percpuq_softint(void *arg) 804 { 805 struct if_percpuq *ipq = arg; 806 struct ifnet *ifp = ipq->ipq_ifp; 807 struct mbuf *m; 808 809 while ((m = if_percpuq_dequeue(ipq)) != NULL) { 810 ifp->if_ipackets++; 811 bpf_mtap(ifp, m); 812 813 ifp->_if_input(ifp, m); 814 } 815 } 816 817 static void 818 if_percpuq_init_ifq(void *p, void *arg __unused, struct cpu_info *ci __unused) 819 { 820 struct ifqueue *const ifq = p; 821 822 memset(ifq, 0, sizeof(*ifq)); 823 ifq->ifq_maxlen = IFQ_MAXLEN; 824 } 825 826 struct if_percpuq * 827 if_percpuq_create(struct ifnet *ifp) 828 { 829 struct if_percpuq *ipq; 830 831 ipq = kmem_zalloc(sizeof(*ipq), KM_SLEEP); 832 ipq->ipq_ifp = ifp; 833 ipq->ipq_si = softint_establish(SOFTINT_NET|SOFTINT_MPSAFE, 834 if_percpuq_softint, ipq); 835 ipq->ipq_ifqs = percpu_alloc(sizeof(struct ifqueue)); 836 percpu_foreach(ipq->ipq_ifqs, &if_percpuq_init_ifq, NULL); 837 838 sysctl_percpuq_setup(&ifp->if_sysctl_log, ifp->if_xname, ipq); 839 840 return ipq; 841 } 842 843 static struct mbuf * 844 if_percpuq_dequeue(struct if_percpuq *ipq) 845 { 846 struct mbuf *m; 847 struct ifqueue *ifq; 848 int s; 849 850 s = splnet(); 851 ifq = percpu_getref(ipq->ipq_ifqs); 852 IF_DEQUEUE(ifq, m); 853 percpu_putref(ipq->ipq_ifqs); 854 splx(s); 855 856 return m; 857 } 858 859 static void 860 if_percpuq_purge_ifq(void *p, void *arg __unused, struct cpu_info *ci __unused) 861 { 862 struct ifqueue *const ifq = p; 863 864 IF_PURGE(ifq); 865 } 866 867 void 868 if_percpuq_destroy(struct if_percpuq *ipq) 869 { 870 871 /* if_detach may already destroy it */ 872 if (ipq == NULL) 873 return; 874 875 softint_disestablish(ipq->ipq_si); 876 percpu_foreach(ipq->ipq_ifqs, &if_percpuq_purge_ifq, NULL); 877 percpu_free(ipq->ipq_ifqs, sizeof(struct ifqueue)); 878 kmem_free(ipq, sizeof(*ipq)); 879 } 880 881 void 882 if_percpuq_enqueue(struct if_percpuq *ipq, struct mbuf *m) 883 { 884 struct ifqueue *ifq; 885 int s; 886 887 KASSERT(ipq != NULL); 888 889 s = splnet(); 890 ifq = percpu_getref(ipq->ipq_ifqs); 891 if (IF_QFULL(ifq)) { 892 IF_DROP(ifq); 893 percpu_putref(ipq->ipq_ifqs); 894 m_freem(m); 895 goto out; 896 } 897 IF_ENQUEUE(ifq, m); 898 percpu_putref(ipq->ipq_ifqs); 899 900 softint_schedule(ipq->ipq_si); 901 out: 902 splx(s); 903 } 904 905 static void 906 if_percpuq_drops(void *p, void *arg, struct cpu_info *ci __unused) 907 { 908 struct ifqueue *const ifq = p; 909 int *sum = arg; 910 911 *sum += ifq->ifq_drops; 912 } 913 914 static int 915 sysctl_percpuq_drops_handler(SYSCTLFN_ARGS) 916 { 917 struct sysctlnode node; 918 struct if_percpuq *ipq; 919 int sum = 0; 920 int error; 921 922 node = *rnode; 923 ipq = node.sysctl_data; 924 925 percpu_foreach(ipq->ipq_ifqs, if_percpuq_drops, &sum); 926 927 node.sysctl_data = ∑ 928 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 929 if (error != 0 || newp == NULL) 930 return error; 931 932 return 0; 933 } 934 935 static void 936 sysctl_percpuq_setup(struct sysctllog **clog, const char* ifname, 937 struct if_percpuq *ipq) 938 { 939 const struct sysctlnode *cnode, *rnode; 940 941 if (sysctl_createv(clog, 0, NULL, &rnode, 942 CTLFLAG_PERMANENT, 943 CTLTYPE_NODE, "interfaces", 944 SYSCTL_DESCR("Per-interface controls"), 945 NULL, 0, NULL, 0, 946 CTL_NET, CTL_CREATE, CTL_EOL) != 0) 947 goto bad; 948 949 if (sysctl_createv(clog, 0, &rnode, &rnode, 950 CTLFLAG_PERMANENT, 951 CTLTYPE_NODE, ifname, 952 SYSCTL_DESCR("Interface controls"), 953 NULL, 0, NULL, 0, 954 CTL_CREATE, CTL_EOL) != 0) 955 goto bad; 956 957 if (sysctl_createv(clog, 0, &rnode, &rnode, 958 CTLFLAG_PERMANENT, 959 CTLTYPE_NODE, "rcvq", 960 SYSCTL_DESCR("Interface input queue controls"), 961 NULL, 0, NULL, 0, 962 CTL_CREATE, CTL_EOL) != 0) 963 goto bad; 964 965 #ifdef NOTYET 966 /* XXX Should show each per-CPU queue length? */ 967 if (sysctl_createv(clog, 0, &rnode, &rnode, 968 CTLFLAG_PERMANENT, 969 CTLTYPE_INT, "len", 970 SYSCTL_DESCR("Current input queue length"), 971 sysctl_percpuq_len, 0, NULL, 0, 972 CTL_CREATE, CTL_EOL) != 0) 973 goto bad; 974 975 if (sysctl_createv(clog, 0, &rnode, &cnode, 976 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 977 CTLTYPE_INT, "maxlen", 978 SYSCTL_DESCR("Maximum allowed input queue length"), 979 sysctl_percpuq_maxlen_handler, 0, (void *)ipq, 0, 980 CTL_CREATE, CTL_EOL) != 0) 981 goto bad; 982 #endif 983 984 if (sysctl_createv(clog, 0, &rnode, &cnode, 985 CTLFLAG_PERMANENT, 986 CTLTYPE_INT, "drops", 987 SYSCTL_DESCR("Total packets dropped due to full input queue"), 988 sysctl_percpuq_drops_handler, 0, (void *)ipq, 0, 989 CTL_CREATE, CTL_EOL) != 0) 990 goto bad; 991 992 return; 993 bad: 994 printf("%s: could not attach sysctl nodes\n", ifname); 995 return; 996 } 997 998 /* 999 * The deferred if_start framework 1000 * 1001 * The common APIs to defer if_start to softint when if_start is requested 1002 * from a device driver running in hardware interrupt context. 1003 */ 1004 /* 1005 * Call ifp->if_start (or equivalent) in a dedicated softint for 1006 * deferred if_start. 1007 */ 1008 static void 1009 if_deferred_start_softint(void *arg) 1010 { 1011 struct if_deferred_start *ids = arg; 1012 struct ifnet *ifp = ids->ids_ifp; 1013 1014 ids->ids_if_start(ifp); 1015 } 1016 1017 /* 1018 * The default callback function for deferred if_start. 1019 */ 1020 static void 1021 if_deferred_start_common(struct ifnet *ifp) 1022 { 1023 int s; 1024 1025 s = splnet(); 1026 if_start_lock(ifp); 1027 splx(s); 1028 } 1029 1030 static inline bool 1031 if_snd_is_used(struct ifnet *ifp) 1032 { 1033 1034 return ifp->if_transmit == NULL || ifp->if_transmit == if_nulltransmit || 1035 ALTQ_IS_ENABLED(&ifp->if_snd); 1036 } 1037 1038 /* 1039 * Schedule deferred if_start. 1040 */ 1041 void 1042 if_schedule_deferred_start(struct ifnet *ifp) 1043 { 1044 1045 KASSERT(ifp->if_deferred_start != NULL); 1046 1047 if (if_snd_is_used(ifp) && IFQ_IS_EMPTY(&ifp->if_snd)) 1048 return; 1049 1050 softint_schedule(ifp->if_deferred_start->ids_si); 1051 } 1052 1053 /* 1054 * Create an instance of deferred if_start. A driver should call the function 1055 * only if the driver needs deferred if_start. Drivers can setup their own 1056 * deferred if_start function via 2nd argument. 1057 */ 1058 void 1059 if_deferred_start_init(struct ifnet *ifp, void (*func)(struct ifnet *)) 1060 { 1061 struct if_deferred_start *ids; 1062 1063 ids = kmem_zalloc(sizeof(*ids), KM_SLEEP); 1064 ids->ids_ifp = ifp; 1065 ids->ids_si = softint_establish(SOFTINT_NET|SOFTINT_MPSAFE, 1066 if_deferred_start_softint, ids); 1067 if (func != NULL) 1068 ids->ids_if_start = func; 1069 else 1070 ids->ids_if_start = if_deferred_start_common; 1071 1072 ifp->if_deferred_start = ids; 1073 } 1074 1075 static void 1076 if_deferred_start_destroy(struct ifnet *ifp) 1077 { 1078 1079 if (ifp->if_deferred_start == NULL) 1080 return; 1081 1082 softint_disestablish(ifp->if_deferred_start->ids_si); 1083 kmem_free(ifp->if_deferred_start, sizeof(*ifp->if_deferred_start)); 1084 ifp->if_deferred_start = NULL; 1085 } 1086 1087 /* 1088 * The common interface input routine that is called by device drivers, 1089 * which should be used only when the driver's rx handler already runs 1090 * in softint. 1091 */ 1092 void 1093 if_input(struct ifnet *ifp, struct mbuf *m) 1094 { 1095 1096 KASSERT(ifp->if_percpuq == NULL); 1097 KASSERT(!cpu_intr_p()); 1098 1099 ifp->if_ipackets++; 1100 bpf_mtap(ifp, m); 1101 1102 ifp->_if_input(ifp, m); 1103 } 1104 1105 /* 1106 * DEPRECATED. Use if_initialize and if_register instead. 1107 * See the above comment of if_initialize. 1108 * 1109 * Note that it implicitly enables if_percpuq to make drivers easy to 1110 * migrate softint-based if_input without much changes. If you don't 1111 * want to enable it, use if_initialize instead. 1112 */ 1113 int 1114 if_attach(ifnet_t *ifp) 1115 { 1116 int rv; 1117 1118 rv = if_initialize(ifp); 1119 if (rv != 0) 1120 return rv; 1121 1122 ifp->if_percpuq = if_percpuq_create(ifp); 1123 if_register(ifp); 1124 1125 return 0; 1126 } 1127 1128 void 1129 if_attachdomain(void) 1130 { 1131 struct ifnet *ifp; 1132 int s; 1133 int bound = curlwp_bind(); 1134 1135 s = pserialize_read_enter(); 1136 IFNET_READER_FOREACH(ifp) { 1137 struct psref psref; 1138 psref_acquire(&psref, &ifp->if_psref, ifnet_psref_class); 1139 pserialize_read_exit(s); 1140 if_attachdomain1(ifp); 1141 s = pserialize_read_enter(); 1142 psref_release(&psref, &ifp->if_psref, ifnet_psref_class); 1143 } 1144 pserialize_read_exit(s); 1145 curlwp_bindx(bound); 1146 } 1147 1148 static void 1149 if_attachdomain1(struct ifnet *ifp) 1150 { 1151 struct domain *dp; 1152 int s; 1153 1154 s = splsoftnet(); 1155 1156 /* address family dependent data region */ 1157 memset(ifp->if_afdata, 0, sizeof(ifp->if_afdata)); 1158 DOMAIN_FOREACH(dp) { 1159 if (dp->dom_ifattach != NULL) 1160 ifp->if_afdata[dp->dom_family] = 1161 (*dp->dom_ifattach)(ifp); 1162 } 1163 1164 splx(s); 1165 } 1166 1167 /* 1168 * Deactivate an interface. This points all of the procedure 1169 * handles at error stubs. May be called from interrupt context. 1170 */ 1171 void 1172 if_deactivate(struct ifnet *ifp) 1173 { 1174 int s; 1175 1176 s = splsoftnet(); 1177 1178 ifp->if_output = if_nulloutput; 1179 ifp->_if_input = if_nullinput; 1180 ifp->if_start = if_nullstart; 1181 ifp->if_transmit = if_nulltransmit; 1182 ifp->if_ioctl = if_nullioctl; 1183 ifp->if_init = if_nullinit; 1184 ifp->if_stop = if_nullstop; 1185 ifp->if_slowtimo = if_nullslowtimo; 1186 ifp->if_drain = if_nulldrain; 1187 1188 /* No more packets may be enqueued. */ 1189 ifp->if_snd.ifq_maxlen = 0; 1190 1191 splx(s); 1192 } 1193 1194 bool 1195 if_is_deactivated(const struct ifnet *ifp) 1196 { 1197 1198 return ifp->if_output == if_nulloutput; 1199 } 1200 1201 void 1202 if_purgeaddrs(struct ifnet *ifp, int family, void (*purgeaddr)(struct ifaddr *)) 1203 { 1204 struct ifaddr *ifa, *nifa; 1205 int s; 1206 1207 s = pserialize_read_enter(); 1208 for (ifa = IFADDR_READER_FIRST(ifp); ifa; ifa = nifa) { 1209 nifa = IFADDR_READER_NEXT(ifa); 1210 if (ifa->ifa_addr->sa_family != family) 1211 continue; 1212 pserialize_read_exit(s); 1213 1214 (*purgeaddr)(ifa); 1215 1216 s = pserialize_read_enter(); 1217 } 1218 pserialize_read_exit(s); 1219 } 1220 1221 #ifdef IFAREF_DEBUG 1222 static struct ifaddr **ifa_list; 1223 static int ifa_list_size; 1224 1225 /* Depends on only one if_attach runs at once */ 1226 static void 1227 if_build_ifa_list(struct ifnet *ifp) 1228 { 1229 struct ifaddr *ifa; 1230 int i; 1231 1232 KASSERT(ifa_list == NULL); 1233 KASSERT(ifa_list_size == 0); 1234 1235 IFADDR_READER_FOREACH(ifa, ifp) 1236 ifa_list_size++; 1237 1238 ifa_list = kmem_alloc(sizeof(*ifa) * ifa_list_size, KM_SLEEP); 1239 i = 0; 1240 IFADDR_READER_FOREACH(ifa, ifp) { 1241 ifa_list[i++] = ifa; 1242 ifaref(ifa); 1243 } 1244 } 1245 1246 static void 1247 if_check_and_free_ifa_list(struct ifnet *ifp) 1248 { 1249 int i; 1250 struct ifaddr *ifa; 1251 1252 if (ifa_list == NULL) 1253 return; 1254 1255 for (i = 0; i < ifa_list_size; i++) { 1256 char buf[64]; 1257 1258 ifa = ifa_list[i]; 1259 sockaddr_format(ifa->ifa_addr, buf, sizeof(buf)); 1260 if (ifa->ifa_refcnt > 1) { 1261 log(LOG_WARNING, 1262 "ifa(%s) still referenced (refcnt=%d)\n", 1263 buf, ifa->ifa_refcnt - 1); 1264 } else 1265 log(LOG_DEBUG, 1266 "ifa(%s) not referenced (refcnt=%d)\n", 1267 buf, ifa->ifa_refcnt - 1); 1268 ifafree(ifa); 1269 } 1270 1271 kmem_free(ifa_list, sizeof(*ifa) * ifa_list_size); 1272 ifa_list = NULL; 1273 ifa_list_size = 0; 1274 } 1275 #endif 1276 1277 /* 1278 * Detach an interface from the list of "active" interfaces, 1279 * freeing any resources as we go along. 1280 * 1281 * NOTE: This routine must be called with a valid thread context, 1282 * as it may block. 1283 */ 1284 void 1285 if_detach(struct ifnet *ifp) 1286 { 1287 struct socket so; 1288 struct ifaddr *ifa; 1289 #ifdef IFAREF_DEBUG 1290 struct ifaddr *last_ifa = NULL; 1291 #endif 1292 struct domain *dp; 1293 const struct protosw *pr; 1294 int s, i, family, purged; 1295 uint64_t xc; 1296 1297 #ifdef IFAREF_DEBUG 1298 if_build_ifa_list(ifp); 1299 #endif 1300 /* 1301 * XXX It's kind of lame that we have to have the 1302 * XXX socket structure... 1303 */ 1304 memset(&so, 0, sizeof(so)); 1305 1306 s = splnet(); 1307 1308 sysctl_teardown(&ifp->if_sysctl_log); 1309 mutex_enter(ifp->if_ioctl_lock); 1310 if_deactivate(ifp); 1311 mutex_exit(ifp->if_ioctl_lock); 1312 1313 IFNET_LOCK(); 1314 ifindex2ifnet[ifp->if_index] = NULL; 1315 TAILQ_REMOVE(&ifnet_list, ifp, if_list); 1316 IFNET_WRITER_REMOVE(ifp); 1317 pserialize_perform(ifnet_psz); 1318 IFNET_UNLOCK(); 1319 1320 /* Wait for all readers to drain before freeing. */ 1321 psref_target_destroy(&ifp->if_psref, ifnet_psref_class); 1322 PSLIST_ENTRY_DESTROY(ifp, if_pslist_entry); 1323 1324 mutex_obj_free(ifp->if_ioctl_lock); 1325 ifp->if_ioctl_lock = NULL; 1326 1327 if (ifp->if_slowtimo != NULL && ifp->if_slowtimo_ch != NULL) { 1328 ifp->if_slowtimo = NULL; 1329 callout_halt(ifp->if_slowtimo_ch, NULL); 1330 callout_destroy(ifp->if_slowtimo_ch); 1331 kmem_free(ifp->if_slowtimo_ch, sizeof(*ifp->if_slowtimo_ch)); 1332 } 1333 if_deferred_start_destroy(ifp); 1334 1335 /* 1336 * Do an if_down() to give protocols a chance to do something. 1337 */ 1338 if_down(ifp); 1339 1340 #ifdef ALTQ 1341 if (ALTQ_IS_ENABLED(&ifp->if_snd)) 1342 altq_disable(&ifp->if_snd); 1343 if (ALTQ_IS_ATTACHED(&ifp->if_snd)) 1344 altq_detach(&ifp->if_snd); 1345 #endif 1346 1347 mutex_obj_free(ifp->if_snd.ifq_lock); 1348 1349 #if NCARP > 0 1350 /* Remove the interface from any carp group it is a part of. */ 1351 if (ifp->if_carp != NULL && ifp->if_type != IFT_CARP) 1352 carp_ifdetach(ifp); 1353 #endif 1354 1355 /* 1356 * Rip all the addresses off the interface. This should make 1357 * all of the routes go away. 1358 * 1359 * pr_usrreq calls can remove an arbitrary number of ifaddrs 1360 * from the list, including our "cursor", ifa. For safety, 1361 * and to honor the TAILQ abstraction, I just restart the 1362 * loop after each removal. Note that the loop will exit 1363 * when all of the remaining ifaddrs belong to the AF_LINK 1364 * family. I am counting on the historical fact that at 1365 * least one pr_usrreq in each address domain removes at 1366 * least one ifaddr. 1367 */ 1368 again: 1369 /* 1370 * At this point, no other one tries to remove ifa in the list, 1371 * so we don't need to take a lock or psref. 1372 */ 1373 IFADDR_READER_FOREACH(ifa, ifp) { 1374 family = ifa->ifa_addr->sa_family; 1375 #ifdef IFAREF_DEBUG 1376 printf("if_detach: ifaddr %p, family %d, refcnt %d\n", 1377 ifa, family, ifa->ifa_refcnt); 1378 if (last_ifa != NULL && ifa == last_ifa) 1379 panic("if_detach: loop detected"); 1380 last_ifa = ifa; 1381 #endif 1382 if (family == AF_LINK) 1383 continue; 1384 dp = pffinddomain(family); 1385 KASSERTMSG(dp != NULL, "no domain for AF %d", family); 1386 /* 1387 * XXX These PURGEIF calls are redundant with the 1388 * purge-all-families calls below, but are left in for 1389 * now both to make a smaller change, and to avoid 1390 * unplanned interactions with clearing of 1391 * ifp->if_addrlist. 1392 */ 1393 purged = 0; 1394 for (pr = dp->dom_protosw; 1395 pr < dp->dom_protoswNPROTOSW; pr++) { 1396 so.so_proto = pr; 1397 if (pr->pr_usrreqs) { 1398 (void) (*pr->pr_usrreqs->pr_purgeif)(&so, ifp); 1399 purged = 1; 1400 } 1401 } 1402 if (purged == 0) { 1403 /* 1404 * XXX What's really the best thing to do 1405 * XXX here? --thorpej@NetBSD.org 1406 */ 1407 printf("if_detach: WARNING: AF %d not purged\n", 1408 family); 1409 ifa_remove(ifp, ifa); 1410 } 1411 goto again; 1412 } 1413 1414 if_free_sadl(ifp); 1415 1416 /* Delete stray routes from the routing table. */ 1417 for (i = 0; i <= AF_MAX; i++) 1418 rt_delete_matched_entries(i, if_delroute_matcher, ifp); 1419 1420 DOMAIN_FOREACH(dp) { 1421 if (dp->dom_ifdetach != NULL && ifp->if_afdata[dp->dom_family]) 1422 { 1423 void *p = ifp->if_afdata[dp->dom_family]; 1424 if (p) { 1425 ifp->if_afdata[dp->dom_family] = NULL; 1426 (*dp->dom_ifdetach)(ifp, p); 1427 } 1428 } 1429 1430 /* 1431 * One would expect multicast memberships (INET and 1432 * INET6) on UDP sockets to be purged by the PURGEIF 1433 * calls above, but if all addresses were removed from 1434 * the interface prior to destruction, the calls will 1435 * not be made (e.g. ppp, for which pppd(8) generally 1436 * removes addresses before destroying the interface). 1437 * Because there is no invariant that multicast 1438 * memberships only exist for interfaces with IPv4 1439 * addresses, we must call PURGEIF regardless of 1440 * addresses. (Protocols which might store ifnet 1441 * pointers are marked with PR_PURGEIF.) 1442 */ 1443 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) { 1444 so.so_proto = pr; 1445 if (pr->pr_usrreqs && pr->pr_flags & PR_PURGEIF) 1446 (void)(*pr->pr_usrreqs->pr_purgeif)(&so, ifp); 1447 } 1448 } 1449 1450 pfil_run_ifhooks(if_pfil, PFIL_IFNET_DETACH, ifp); 1451 (void)pfil_head_destroy(ifp->if_pfil); 1452 1453 /* Announce that the interface is gone. */ 1454 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 1455 1456 IF_AFDATA_LOCK_DESTROY(ifp); 1457 1458 if (if_is_link_state_changeable(ifp)) { 1459 softint_disestablish(ifp->if_link_si); 1460 ifp->if_link_si = NULL; 1461 } 1462 1463 /* 1464 * remove packets that came from ifp, from software interrupt queues. 1465 */ 1466 DOMAIN_FOREACH(dp) { 1467 for (i = 0; i < __arraycount(dp->dom_ifqueues); i++) { 1468 struct ifqueue *iq = dp->dom_ifqueues[i]; 1469 if (iq == NULL) 1470 break; 1471 dp->dom_ifqueues[i] = NULL; 1472 if_detach_queues(ifp, iq); 1473 } 1474 } 1475 1476 /* 1477 * IP queues have to be processed separately: net-queue barrier 1478 * ensures that the packets are dequeued while a cross-call will 1479 * ensure that the interrupts have completed. FIXME: not quite.. 1480 */ 1481 #ifdef INET 1482 pktq_barrier(ip_pktq); 1483 #endif 1484 #ifdef INET6 1485 if (in6_present) 1486 pktq_barrier(ip6_pktq); 1487 #endif 1488 xc = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL); 1489 xc_wait(xc); 1490 1491 if (ifp->if_percpuq != NULL) { 1492 if_percpuq_destroy(ifp->if_percpuq); 1493 ifp->if_percpuq = NULL; 1494 } 1495 1496 splx(s); 1497 1498 #ifdef IFAREF_DEBUG 1499 if_check_and_free_ifa_list(ifp); 1500 #endif 1501 } 1502 1503 static void 1504 if_detach_queues(struct ifnet *ifp, struct ifqueue *q) 1505 { 1506 struct mbuf *m, *prev, *next; 1507 1508 prev = NULL; 1509 for (m = q->ifq_head; m != NULL; m = next) { 1510 KASSERT((m->m_flags & M_PKTHDR) != 0); 1511 1512 next = m->m_nextpkt; 1513 if (m->m_pkthdr.rcvif_index != ifp->if_index) { 1514 prev = m; 1515 continue; 1516 } 1517 1518 if (prev != NULL) 1519 prev->m_nextpkt = m->m_nextpkt; 1520 else 1521 q->ifq_head = m->m_nextpkt; 1522 if (q->ifq_tail == m) 1523 q->ifq_tail = prev; 1524 q->ifq_len--; 1525 1526 m->m_nextpkt = NULL; 1527 m_freem(m); 1528 IF_DROP(q); 1529 } 1530 } 1531 1532 /* 1533 * Callback for a radix tree walk to delete all references to an 1534 * ifnet. 1535 */ 1536 static int 1537 if_delroute_matcher(struct rtentry *rt, void *v) 1538 { 1539 struct ifnet *ifp = (struct ifnet *)v; 1540 1541 if (rt->rt_ifp == ifp) 1542 return 1; 1543 else 1544 return 0; 1545 } 1546 1547 /* 1548 * Create a clone network interface. 1549 */ 1550 static int 1551 if_clone_create(const char *name) 1552 { 1553 struct if_clone *ifc; 1554 int unit; 1555 struct ifnet *ifp; 1556 struct psref psref; 1557 1558 KASSERT(mutex_owned(&if_clone_mtx)); 1559 1560 ifc = if_clone_lookup(name, &unit); 1561 if (ifc == NULL) 1562 return EINVAL; 1563 1564 ifp = if_get(name, &psref); 1565 if (ifp != NULL) { 1566 if_put(ifp, &psref); 1567 return EEXIST; 1568 } 1569 1570 return (*ifc->ifc_create)(ifc, unit); 1571 } 1572 1573 /* 1574 * Destroy a clone network interface. 1575 */ 1576 static int 1577 if_clone_destroy(const char *name) 1578 { 1579 struct if_clone *ifc; 1580 struct ifnet *ifp; 1581 struct psref psref; 1582 1583 KASSERT(mutex_owned(&if_clone_mtx)); 1584 1585 ifc = if_clone_lookup(name, NULL); 1586 if (ifc == NULL) 1587 return EINVAL; 1588 1589 if (ifc->ifc_destroy == NULL) 1590 return EOPNOTSUPP; 1591 1592 ifp = if_get(name, &psref); 1593 if (ifp == NULL) 1594 return ENXIO; 1595 1596 /* We have to disable ioctls here */ 1597 mutex_enter(ifp->if_ioctl_lock); 1598 ifp->if_ioctl = if_nullioctl; 1599 mutex_exit(ifp->if_ioctl_lock); 1600 1601 /* 1602 * We cannot call ifc_destroy with holding ifp. 1603 * Releasing ifp here is safe thanks to if_clone_mtx. 1604 */ 1605 if_put(ifp, &psref); 1606 1607 return (*ifc->ifc_destroy)(ifp); 1608 } 1609 1610 static bool 1611 if_is_unit(const char *name) 1612 { 1613 1614 while(*name != '\0') { 1615 if (*name < '0' || *name > '9') 1616 return false; 1617 name++; 1618 } 1619 1620 return true; 1621 } 1622 1623 /* 1624 * Look up a network interface cloner. 1625 */ 1626 static struct if_clone * 1627 if_clone_lookup(const char *name, int *unitp) 1628 { 1629 struct if_clone *ifc; 1630 const char *cp; 1631 char *dp, ifname[IFNAMSIZ + 3]; 1632 int unit; 1633 1634 KASSERT(mutex_owned(&if_clone_mtx)); 1635 1636 strcpy(ifname, "if_"); 1637 /* separate interface name from unit */ 1638 /* TODO: search unit number from backward */ 1639 for (dp = ifname + 3, cp = name; cp - name < IFNAMSIZ && 1640 *cp && !if_is_unit(cp);) 1641 *dp++ = *cp++; 1642 1643 if (cp == name || cp - name == IFNAMSIZ || !*cp) 1644 return NULL; /* No name or unit number */ 1645 *dp++ = '\0'; 1646 1647 again: 1648 LIST_FOREACH(ifc, &if_cloners, ifc_list) { 1649 if (strcmp(ifname + 3, ifc->ifc_name) == 0) 1650 break; 1651 } 1652 1653 if (ifc == NULL) { 1654 int error; 1655 if (*ifname == '\0') 1656 return NULL; 1657 mutex_exit(&if_clone_mtx); 1658 error = module_autoload(ifname, MODULE_CLASS_DRIVER); 1659 mutex_enter(&if_clone_mtx); 1660 if (error) 1661 return NULL; 1662 *ifname = '\0'; 1663 goto again; 1664 } 1665 1666 unit = 0; 1667 while (cp - name < IFNAMSIZ && *cp) { 1668 if (*cp < '0' || *cp > '9' || unit >= INT_MAX / 10) { 1669 /* Bogus unit number. */ 1670 return NULL; 1671 } 1672 unit = (unit * 10) + (*cp++ - '0'); 1673 } 1674 1675 if (unitp != NULL) 1676 *unitp = unit; 1677 return ifc; 1678 } 1679 1680 /* 1681 * Register a network interface cloner. 1682 */ 1683 void 1684 if_clone_attach(struct if_clone *ifc) 1685 { 1686 1687 mutex_enter(&if_clone_mtx); 1688 LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list); 1689 if_cloners_count++; 1690 mutex_exit(&if_clone_mtx); 1691 } 1692 1693 /* 1694 * Unregister a network interface cloner. 1695 */ 1696 void 1697 if_clone_detach(struct if_clone *ifc) 1698 { 1699 1700 mutex_enter(&if_clone_mtx); 1701 LIST_REMOVE(ifc, ifc_list); 1702 if_cloners_count--; 1703 mutex_exit(&if_clone_mtx); 1704 } 1705 1706 /* 1707 * Provide list of interface cloners to userspace. 1708 */ 1709 int 1710 if_clone_list(int buf_count, char *buffer, int *total) 1711 { 1712 char outbuf[IFNAMSIZ], *dst; 1713 struct if_clone *ifc; 1714 int count, error = 0; 1715 1716 mutex_enter(&if_clone_mtx); 1717 *total = if_cloners_count; 1718 if ((dst = buffer) == NULL) { 1719 /* Just asking how many there are. */ 1720 goto out; 1721 } 1722 1723 if (buf_count < 0) { 1724 error = EINVAL; 1725 goto out; 1726 } 1727 1728 count = (if_cloners_count < buf_count) ? 1729 if_cloners_count : buf_count; 1730 1731 for (ifc = LIST_FIRST(&if_cloners); ifc != NULL && count != 0; 1732 ifc = LIST_NEXT(ifc, ifc_list), count--, dst += IFNAMSIZ) { 1733 (void)strncpy(outbuf, ifc->ifc_name, sizeof(outbuf)); 1734 if (outbuf[sizeof(outbuf) - 1] != '\0') { 1735 error = ENAMETOOLONG; 1736 goto out; 1737 } 1738 error = copyout(outbuf, dst, sizeof(outbuf)); 1739 if (error != 0) 1740 break; 1741 } 1742 1743 out: 1744 mutex_exit(&if_clone_mtx); 1745 return error; 1746 } 1747 1748 void 1749 ifa_psref_init(struct ifaddr *ifa) 1750 { 1751 1752 psref_target_init(&ifa->ifa_psref, ifa_psref_class); 1753 } 1754 1755 void 1756 ifaref(struct ifaddr *ifa) 1757 { 1758 KASSERT(!ISSET(ifa->ifa_flags, IFA_DESTROYING)); 1759 ifa->ifa_refcnt++; 1760 } 1761 1762 void 1763 ifafree(struct ifaddr *ifa) 1764 { 1765 KASSERT(ifa != NULL); 1766 KASSERT(ifa->ifa_refcnt > 0); 1767 1768 if (--ifa->ifa_refcnt == 0) { 1769 free(ifa, M_IFADDR); 1770 } 1771 } 1772 1773 bool 1774 ifa_is_destroying(struct ifaddr *ifa) 1775 { 1776 1777 return ISSET(ifa->ifa_flags, IFA_DESTROYING); 1778 } 1779 1780 void 1781 ifa_insert(struct ifnet *ifp, struct ifaddr *ifa) 1782 { 1783 1784 ifa->ifa_ifp = ifp; 1785 1786 IFNET_LOCK(); 1787 TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list); 1788 IFADDR_ENTRY_INIT(ifa); 1789 IFADDR_WRITER_INSERT_TAIL(ifp, ifa); 1790 IFNET_UNLOCK(); 1791 1792 ifaref(ifa); 1793 } 1794 1795 void 1796 ifa_remove(struct ifnet *ifp, struct ifaddr *ifa) 1797 { 1798 1799 KASSERT(ifa->ifa_ifp == ifp); 1800 1801 IFNET_LOCK(); 1802 TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list); 1803 IFADDR_WRITER_REMOVE(ifa); 1804 #ifdef NET_MPSAFE 1805 pserialize_perform(ifnet_psz); 1806 #endif 1807 IFNET_UNLOCK(); 1808 1809 #ifdef NET_MPSAFE 1810 psref_target_destroy(&ifa->ifa_psref, ifa_psref_class); 1811 #endif 1812 IFADDR_ENTRY_DESTROY(ifa); 1813 ifafree(ifa); 1814 } 1815 1816 void 1817 ifa_acquire(struct ifaddr *ifa, struct psref *psref) 1818 { 1819 1820 psref_acquire(psref, &ifa->ifa_psref, ifa_psref_class); 1821 } 1822 1823 void 1824 ifa_release(struct ifaddr *ifa, struct psref *psref) 1825 { 1826 1827 if (ifa == NULL) 1828 return; 1829 1830 psref_release(psref, &ifa->ifa_psref, ifa_psref_class); 1831 } 1832 1833 bool 1834 ifa_held(struct ifaddr *ifa) 1835 { 1836 1837 return psref_held(&ifa->ifa_psref, ifa_psref_class); 1838 } 1839 1840 static inline int 1841 equal(const struct sockaddr *sa1, const struct sockaddr *sa2) 1842 { 1843 return sockaddr_cmp(sa1, sa2) == 0; 1844 } 1845 1846 /* 1847 * Locate an interface based on a complete address. 1848 */ 1849 /*ARGSUSED*/ 1850 struct ifaddr * 1851 ifa_ifwithaddr(const struct sockaddr *addr) 1852 { 1853 struct ifnet *ifp; 1854 struct ifaddr *ifa; 1855 1856 IFNET_READER_FOREACH(ifp) { 1857 if (if_is_deactivated(ifp)) 1858 continue; 1859 IFADDR_READER_FOREACH(ifa, ifp) { 1860 if (ifa->ifa_addr->sa_family != addr->sa_family) 1861 continue; 1862 if (equal(addr, ifa->ifa_addr)) 1863 return ifa; 1864 if ((ifp->if_flags & IFF_BROADCAST) && 1865 ifa->ifa_broadaddr && 1866 /* IP6 doesn't have broadcast */ 1867 ifa->ifa_broadaddr->sa_len != 0 && 1868 equal(ifa->ifa_broadaddr, addr)) 1869 return ifa; 1870 } 1871 } 1872 return NULL; 1873 } 1874 1875 struct ifaddr * 1876 ifa_ifwithaddr_psref(const struct sockaddr *addr, struct psref *psref) 1877 { 1878 struct ifaddr *ifa; 1879 int s = pserialize_read_enter(); 1880 1881 ifa = ifa_ifwithaddr(addr); 1882 if (ifa != NULL) 1883 ifa_acquire(ifa, psref); 1884 pserialize_read_exit(s); 1885 1886 return ifa; 1887 } 1888 1889 /* 1890 * Locate the point to point interface with a given destination address. 1891 */ 1892 /*ARGSUSED*/ 1893 struct ifaddr * 1894 ifa_ifwithdstaddr(const struct sockaddr *addr) 1895 { 1896 struct ifnet *ifp; 1897 struct ifaddr *ifa; 1898 1899 IFNET_READER_FOREACH(ifp) { 1900 if (if_is_deactivated(ifp)) 1901 continue; 1902 if ((ifp->if_flags & IFF_POINTOPOINT) == 0) 1903 continue; 1904 IFADDR_READER_FOREACH(ifa, ifp) { 1905 if (ifa->ifa_addr->sa_family != addr->sa_family || 1906 ifa->ifa_dstaddr == NULL) 1907 continue; 1908 if (equal(addr, ifa->ifa_dstaddr)) 1909 return ifa; 1910 } 1911 } 1912 1913 return NULL; 1914 } 1915 1916 struct ifaddr * 1917 ifa_ifwithdstaddr_psref(const struct sockaddr *addr, struct psref *psref) 1918 { 1919 struct ifaddr *ifa; 1920 int s; 1921 1922 s = pserialize_read_enter(); 1923 ifa = ifa_ifwithdstaddr(addr); 1924 if (ifa != NULL) 1925 ifa_acquire(ifa, psref); 1926 pserialize_read_exit(s); 1927 1928 return ifa; 1929 } 1930 1931 /* 1932 * Find an interface on a specific network. If many, choice 1933 * is most specific found. 1934 */ 1935 struct ifaddr * 1936 ifa_ifwithnet(const struct sockaddr *addr) 1937 { 1938 struct ifnet *ifp; 1939 struct ifaddr *ifa, *ifa_maybe = NULL; 1940 const struct sockaddr_dl *sdl; 1941 u_int af = addr->sa_family; 1942 const char *addr_data = addr->sa_data, *cplim; 1943 1944 if (af == AF_LINK) { 1945 sdl = satocsdl(addr); 1946 if (sdl->sdl_index && sdl->sdl_index < if_indexlim && 1947 ifindex2ifnet[sdl->sdl_index] && 1948 !if_is_deactivated(ifindex2ifnet[sdl->sdl_index])) { 1949 return ifindex2ifnet[sdl->sdl_index]->if_dl; 1950 } 1951 } 1952 #ifdef NETATALK 1953 if (af == AF_APPLETALK) { 1954 const struct sockaddr_at *sat, *sat2; 1955 sat = (const struct sockaddr_at *)addr; 1956 IFNET_READER_FOREACH(ifp) { 1957 if (if_is_deactivated(ifp)) 1958 continue; 1959 ifa = at_ifawithnet((const struct sockaddr_at *)addr, ifp); 1960 if (ifa == NULL) 1961 continue; 1962 sat2 = (struct sockaddr_at *)ifa->ifa_addr; 1963 if (sat2->sat_addr.s_net == sat->sat_addr.s_net) 1964 return ifa; /* exact match */ 1965 if (ifa_maybe == NULL) { 1966 /* else keep the if with the right range */ 1967 ifa_maybe = ifa; 1968 } 1969 } 1970 return ifa_maybe; 1971 } 1972 #endif 1973 IFNET_READER_FOREACH(ifp) { 1974 if (if_is_deactivated(ifp)) 1975 continue; 1976 IFADDR_READER_FOREACH(ifa, ifp) { 1977 const char *cp, *cp2, *cp3; 1978 1979 if (ifa->ifa_addr->sa_family != af || 1980 ifa->ifa_netmask == NULL) 1981 next: continue; 1982 cp = addr_data; 1983 cp2 = ifa->ifa_addr->sa_data; 1984 cp3 = ifa->ifa_netmask->sa_data; 1985 cplim = (const char *)ifa->ifa_netmask + 1986 ifa->ifa_netmask->sa_len; 1987 while (cp3 < cplim) { 1988 if ((*cp++ ^ *cp2++) & *cp3++) { 1989 /* want to continue for() loop */ 1990 goto next; 1991 } 1992 } 1993 if (ifa_maybe == NULL || 1994 rt_refines(ifa->ifa_netmask, 1995 ifa_maybe->ifa_netmask)) 1996 ifa_maybe = ifa; 1997 } 1998 } 1999 return ifa_maybe; 2000 } 2001 2002 struct ifaddr * 2003 ifa_ifwithnet_psref(const struct sockaddr *addr, struct psref *psref) 2004 { 2005 struct ifaddr *ifa; 2006 int s; 2007 2008 s = pserialize_read_enter(); 2009 ifa = ifa_ifwithnet(addr); 2010 if (ifa != NULL) 2011 ifa_acquire(ifa, psref); 2012 pserialize_read_exit(s); 2013 2014 return ifa; 2015 } 2016 2017 /* 2018 * Find the interface of the addresss. 2019 */ 2020 struct ifaddr * 2021 ifa_ifwithladdr(const struct sockaddr *addr) 2022 { 2023 struct ifaddr *ia; 2024 2025 if ((ia = ifa_ifwithaddr(addr)) || (ia = ifa_ifwithdstaddr(addr)) || 2026 (ia = ifa_ifwithnet(addr))) 2027 return ia; 2028 return NULL; 2029 } 2030 2031 struct ifaddr * 2032 ifa_ifwithladdr_psref(const struct sockaddr *addr, struct psref *psref) 2033 { 2034 struct ifaddr *ifa; 2035 int s; 2036 2037 s = pserialize_read_enter(); 2038 ifa = ifa_ifwithladdr(addr); 2039 if (ifa != NULL) 2040 ifa_acquire(ifa, psref); 2041 pserialize_read_exit(s); 2042 2043 return ifa; 2044 } 2045 2046 /* 2047 * Find an interface using a specific address family 2048 */ 2049 struct ifaddr * 2050 ifa_ifwithaf(int af) 2051 { 2052 struct ifnet *ifp; 2053 struct ifaddr *ifa = NULL; 2054 int s; 2055 2056 s = pserialize_read_enter(); 2057 IFNET_READER_FOREACH(ifp) { 2058 if (if_is_deactivated(ifp)) 2059 continue; 2060 IFADDR_READER_FOREACH(ifa, ifp) { 2061 if (ifa->ifa_addr->sa_family == af) 2062 goto out; 2063 } 2064 } 2065 out: 2066 pserialize_read_exit(s); 2067 return ifa; 2068 } 2069 2070 /* 2071 * Find an interface address specific to an interface best matching 2072 * a given address. 2073 */ 2074 struct ifaddr * 2075 ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp) 2076 { 2077 struct ifaddr *ifa; 2078 const char *cp, *cp2, *cp3; 2079 const char *cplim; 2080 struct ifaddr *ifa_maybe = 0; 2081 u_int af = addr->sa_family; 2082 2083 if (if_is_deactivated(ifp)) 2084 return NULL; 2085 2086 if (af >= AF_MAX) 2087 return NULL; 2088 2089 IFADDR_READER_FOREACH(ifa, ifp) { 2090 if (ifa->ifa_addr->sa_family != af) 2091 continue; 2092 ifa_maybe = ifa; 2093 if (ifa->ifa_netmask == NULL) { 2094 if (equal(addr, ifa->ifa_addr) || 2095 (ifa->ifa_dstaddr && 2096 equal(addr, ifa->ifa_dstaddr))) 2097 return ifa; 2098 continue; 2099 } 2100 cp = addr->sa_data; 2101 cp2 = ifa->ifa_addr->sa_data; 2102 cp3 = ifa->ifa_netmask->sa_data; 2103 cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; 2104 for (; cp3 < cplim; cp3++) { 2105 if ((*cp++ ^ *cp2++) & *cp3) 2106 break; 2107 } 2108 if (cp3 == cplim) 2109 return ifa; 2110 } 2111 return ifa_maybe; 2112 } 2113 2114 struct ifaddr * 2115 ifaof_ifpforaddr_psref(const struct sockaddr *addr, struct ifnet *ifp, 2116 struct psref *psref) 2117 { 2118 struct ifaddr *ifa; 2119 int s; 2120 2121 s = pserialize_read_enter(); 2122 ifa = ifaof_ifpforaddr(addr, ifp); 2123 if (ifa != NULL) 2124 ifa_acquire(ifa, psref); 2125 pserialize_read_exit(s); 2126 2127 return ifa; 2128 } 2129 2130 /* 2131 * Default action when installing a route with a Link Level gateway. 2132 * Lookup an appropriate real ifa to point to. 2133 * This should be moved to /sys/net/link.c eventually. 2134 */ 2135 void 2136 link_rtrequest(int cmd, struct rtentry *rt, const struct rt_addrinfo *info) 2137 { 2138 struct ifaddr *ifa; 2139 const struct sockaddr *dst; 2140 struct ifnet *ifp; 2141 struct psref psref; 2142 2143 if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL || 2144 (ifp = ifa->ifa_ifp) == NULL || (dst = rt_getkey(rt)) == NULL) 2145 return; 2146 if ((ifa = ifaof_ifpforaddr_psref(dst, ifp, &psref)) != NULL) { 2147 rt_replace_ifa(rt, ifa); 2148 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) 2149 ifa->ifa_rtrequest(cmd, rt, info); 2150 ifa_release(ifa, &psref); 2151 } 2152 } 2153 2154 /* 2155 * bitmask macros to manage a densely packed link_state change queue. 2156 * Because we need to store LINK_STATE_UNKNOWN(0), LINK_STATE_DOWN(1) and 2157 * LINK_STATE_UP(2) we need 2 bits for each state change. 2158 * As a state change to store is 0, treat all bits set as an unset item. 2159 */ 2160 #define LQ_ITEM_BITS 2 2161 #define LQ_ITEM_MASK ((1 << LQ_ITEM_BITS) - 1) 2162 #define LQ_MASK(i) (LQ_ITEM_MASK << (i) * LQ_ITEM_BITS) 2163 #define LINK_STATE_UNSET LQ_ITEM_MASK 2164 #define LQ_ITEM(q, i) (((q) & LQ_MASK((i))) >> (i) * LQ_ITEM_BITS) 2165 #define LQ_STORE(q, i, v) \ 2166 do { \ 2167 (q) &= ~LQ_MASK((i)); \ 2168 (q) |= (v) << (i) * LQ_ITEM_BITS; \ 2169 } while (0 /* CONSTCOND */) 2170 #define LQ_MAX(q) ((sizeof((q)) * NBBY) / LQ_ITEM_BITS) 2171 #define LQ_POP(q, v) \ 2172 do { \ 2173 (v) = LQ_ITEM((q), 0); \ 2174 (q) >>= LQ_ITEM_BITS; \ 2175 (q) |= LINK_STATE_UNSET << (LQ_MAX((q)) - 1) * LQ_ITEM_BITS; \ 2176 } while (0 /* CONSTCOND */) 2177 #define LQ_PUSH(q, v) \ 2178 do { \ 2179 (q) >>= LQ_ITEM_BITS; \ 2180 (q) |= (v) << (LQ_MAX((q)) - 1) * LQ_ITEM_BITS; \ 2181 } while (0 /* CONSTCOND */) 2182 #define LQ_FIND_UNSET(q, i) \ 2183 for ((i) = 0; i < LQ_MAX((q)); (i)++) { \ 2184 if (LQ_ITEM((q), (i)) == LINK_STATE_UNSET) \ 2185 break; \ 2186 } 2187 /* 2188 * Handle a change in the interface link state and 2189 * queue notifications. 2190 */ 2191 void 2192 if_link_state_change(struct ifnet *ifp, int link_state) 2193 { 2194 int s, idx; 2195 2196 KASSERTMSG(if_is_link_state_changeable(ifp), 2197 "%s: IFEF_NO_LINK_STATE_CHANGE must not be set, but if_extflags=0x%x", 2198 ifp->if_xname, ifp->if_extflags); 2199 2200 /* Ensure change is to a valid state */ 2201 switch (link_state) { 2202 case LINK_STATE_UNKNOWN: /* FALLTHROUGH */ 2203 case LINK_STATE_DOWN: /* FALLTHROUGH */ 2204 case LINK_STATE_UP: 2205 break; 2206 default: 2207 #ifdef DEBUG 2208 printf("%s: invalid link state %d\n", 2209 ifp->if_xname, link_state); 2210 #endif 2211 return; 2212 } 2213 2214 s = splnet(); 2215 2216 /* Find the last unset event in the queue. */ 2217 LQ_FIND_UNSET(ifp->if_link_queue, idx); 2218 2219 /* 2220 * Ensure link_state doesn't match the last event in the queue. 2221 * ifp->if_link_state is not checked and set here because 2222 * that would present an inconsistent picture to the system. 2223 */ 2224 if (idx != 0 && 2225 LQ_ITEM(ifp->if_link_queue, idx - 1) == (uint8_t)link_state) 2226 goto out; 2227 2228 /* Handle queue overflow. */ 2229 if (idx == LQ_MAX(ifp->if_link_queue)) { 2230 uint8_t lost; 2231 2232 /* 2233 * The DOWN state must be protected from being pushed off 2234 * the queue to ensure that userland will always be 2235 * in a sane state. 2236 * Because DOWN is protected, there is no need to protect 2237 * UNKNOWN. 2238 * It should be invalid to change from any other state to 2239 * UNKNOWN anyway ... 2240 */ 2241 lost = LQ_ITEM(ifp->if_link_queue, 0); 2242 LQ_PUSH(ifp->if_link_queue, (uint8_t)link_state); 2243 if (lost == LINK_STATE_DOWN) { 2244 lost = LQ_ITEM(ifp->if_link_queue, 0); 2245 LQ_STORE(ifp->if_link_queue, 0, LINK_STATE_DOWN); 2246 } 2247 printf("%s: lost link state change %s\n", 2248 ifp->if_xname, 2249 lost == LINK_STATE_UP ? "UP" : 2250 lost == LINK_STATE_DOWN ? "DOWN" : 2251 "UNKNOWN"); 2252 } else 2253 LQ_STORE(ifp->if_link_queue, idx, (uint8_t)link_state); 2254 2255 softint_schedule(ifp->if_link_si); 2256 2257 out: 2258 splx(s); 2259 } 2260 2261 /* 2262 * Handle interface link state change notifications. 2263 */ 2264 void 2265 if_link_state_change_softint(struct ifnet *ifp, int link_state) 2266 { 2267 struct domain *dp; 2268 int s = splnet(); 2269 2270 KASSERT(!cpu_intr_p()); 2271 2272 /* Ensure the change is still valid. */ 2273 if (ifp->if_link_state == link_state) { 2274 splx(s); 2275 return; 2276 } 2277 2278 #ifdef DEBUG 2279 log(LOG_DEBUG, "%s: link state %s (was %s)\n", ifp->if_xname, 2280 link_state == LINK_STATE_UP ? "UP" : 2281 link_state == LINK_STATE_DOWN ? "DOWN" : 2282 "UNKNOWN", 2283 ifp->if_link_state == LINK_STATE_UP ? "UP" : 2284 ifp->if_link_state == LINK_STATE_DOWN ? "DOWN" : 2285 "UNKNOWN"); 2286 #endif 2287 2288 /* 2289 * When going from UNKNOWN to UP, we need to mark existing 2290 * addresses as tentative and restart DAD as we may have 2291 * erroneously not found a duplicate. 2292 * 2293 * This needs to happen before rt_ifmsg to avoid a race where 2294 * listeners would have an address and expect it to work right 2295 * away. 2296 */ 2297 if (link_state == LINK_STATE_UP && 2298 ifp->if_link_state == LINK_STATE_UNKNOWN) 2299 { 2300 DOMAIN_FOREACH(dp) { 2301 if (dp->dom_if_link_state_change != NULL) 2302 dp->dom_if_link_state_change(ifp, 2303 LINK_STATE_DOWN); 2304 } 2305 } 2306 2307 ifp->if_link_state = link_state; 2308 2309 /* Notify that the link state has changed. */ 2310 rt_ifmsg(ifp); 2311 2312 #if NCARP > 0 2313 if (ifp->if_carp) 2314 carp_carpdev_state(ifp); 2315 #endif 2316 2317 DOMAIN_FOREACH(dp) { 2318 if (dp->dom_if_link_state_change != NULL) 2319 dp->dom_if_link_state_change(ifp, link_state); 2320 } 2321 splx(s); 2322 } 2323 2324 /* 2325 * Process the interface link state change queue. 2326 */ 2327 static void 2328 if_link_state_change_si(void *arg) 2329 { 2330 struct ifnet *ifp = arg; 2331 int s; 2332 uint8_t state; 2333 2334 SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE(); 2335 s = splnet(); 2336 2337 /* Pop a link state change from the queue and process it. */ 2338 LQ_POP(ifp->if_link_queue, state); 2339 if_link_state_change_softint(ifp, state); 2340 2341 /* If there is a link state change to come, schedule it. */ 2342 if (LQ_ITEM(ifp->if_link_queue, 0) != LINK_STATE_UNSET) 2343 softint_schedule(ifp->if_link_si); 2344 2345 splx(s); 2346 SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); 2347 } 2348 2349 /* 2350 * Default action when installing a local route on a point-to-point 2351 * interface. 2352 */ 2353 void 2354 p2p_rtrequest(int req, struct rtentry *rt, 2355 __unused const struct rt_addrinfo *info) 2356 { 2357 struct ifnet *ifp = rt->rt_ifp; 2358 struct ifaddr *ifa, *lo0ifa; 2359 int s = pserialize_read_enter(); 2360 2361 switch (req) { 2362 case RTM_ADD: 2363 if ((rt->rt_flags & RTF_LOCAL) == 0) 2364 break; 2365 2366 rt->rt_ifp = lo0ifp; 2367 2368 IFADDR_READER_FOREACH(ifa, ifp) { 2369 if (equal(rt_getkey(rt), ifa->ifa_addr)) 2370 break; 2371 } 2372 if (ifa == NULL) 2373 break; 2374 2375 /* 2376 * Ensure lo0 has an address of the same family. 2377 */ 2378 IFADDR_READER_FOREACH(lo0ifa, lo0ifp) { 2379 if (lo0ifa->ifa_addr->sa_family == 2380 ifa->ifa_addr->sa_family) 2381 break; 2382 } 2383 if (lo0ifa == NULL) 2384 break; 2385 2386 /* 2387 * Make sure to set rt->rt_ifa to the interface 2388 * address we are using, otherwise we will have trouble 2389 * with source address selection. 2390 */ 2391 if (ifa != rt->rt_ifa) 2392 rt_replace_ifa(rt, ifa); 2393 break; 2394 case RTM_DELETE: 2395 default: 2396 break; 2397 } 2398 pserialize_read_exit(s); 2399 } 2400 2401 /* 2402 * Mark an interface down and notify protocols of 2403 * the transition. 2404 * NOTE: must be called at splsoftnet or equivalent. 2405 */ 2406 void 2407 if_down(struct ifnet *ifp) 2408 { 2409 struct ifaddr *ifa; 2410 struct domain *dp; 2411 int s, bound; 2412 struct psref psref; 2413 2414 ifp->if_flags &= ~IFF_UP; 2415 nanotime(&ifp->if_lastchange); 2416 2417 bound = curlwp_bind(); 2418 s = pserialize_read_enter(); 2419 IFADDR_READER_FOREACH(ifa, ifp) { 2420 ifa_acquire(ifa, &psref); 2421 pserialize_read_exit(s); 2422 2423 pfctlinput(PRC_IFDOWN, ifa->ifa_addr); 2424 2425 s = pserialize_read_enter(); 2426 ifa_release(ifa, &psref); 2427 } 2428 pserialize_read_exit(s); 2429 curlwp_bindx(bound); 2430 2431 IFQ_PURGE(&ifp->if_snd); 2432 #if NCARP > 0 2433 if (ifp->if_carp) 2434 carp_carpdev_state(ifp); 2435 #endif 2436 rt_ifmsg(ifp); 2437 DOMAIN_FOREACH(dp) { 2438 if (dp->dom_if_down) 2439 dp->dom_if_down(ifp); 2440 } 2441 } 2442 2443 /* 2444 * Mark an interface up and notify protocols of 2445 * the transition. 2446 * NOTE: must be called at splsoftnet or equivalent. 2447 */ 2448 void 2449 if_up(struct ifnet *ifp) 2450 { 2451 #ifdef notyet 2452 struct ifaddr *ifa; 2453 #endif 2454 struct domain *dp; 2455 2456 ifp->if_flags |= IFF_UP; 2457 nanotime(&ifp->if_lastchange); 2458 #ifdef notyet 2459 /* this has no effect on IP, and will kill all ISO connections XXX */ 2460 IFADDR_READER_FOREACH(ifa, ifp) 2461 pfctlinput(PRC_IFUP, ifa->ifa_addr); 2462 #endif 2463 #if NCARP > 0 2464 if (ifp->if_carp) 2465 carp_carpdev_state(ifp); 2466 #endif 2467 rt_ifmsg(ifp); 2468 DOMAIN_FOREACH(dp) { 2469 if (dp->dom_if_up) 2470 dp->dom_if_up(ifp); 2471 } 2472 } 2473 2474 /* 2475 * Handle interface slowtimo timer routine. Called 2476 * from softclock, we decrement timer (if set) and 2477 * call the appropriate interface routine on expiration. 2478 */ 2479 static void 2480 if_slowtimo(void *arg) 2481 { 2482 void (*slowtimo)(struct ifnet *); 2483 struct ifnet *ifp = arg; 2484 int s; 2485 2486 slowtimo = ifp->if_slowtimo; 2487 if (__predict_false(slowtimo == NULL)) 2488 return; 2489 2490 s = splnet(); 2491 if (ifp->if_timer != 0 && --ifp->if_timer == 0) 2492 (*slowtimo)(ifp); 2493 2494 splx(s); 2495 2496 if (__predict_true(ifp->if_slowtimo != NULL)) 2497 callout_schedule(ifp->if_slowtimo_ch, hz / IFNET_SLOWHZ); 2498 } 2499 2500 /* 2501 * Set/clear promiscuous mode on interface ifp based on the truth value 2502 * of pswitch. The calls are reference counted so that only the first 2503 * "on" request actually has an effect, as does the final "off" request. 2504 * Results are undefined if the "off" and "on" requests are not matched. 2505 */ 2506 int 2507 ifpromisc(struct ifnet *ifp, int pswitch) 2508 { 2509 int pcount, ret; 2510 short nflags; 2511 2512 pcount = ifp->if_pcount; 2513 if (pswitch) { 2514 /* 2515 * Allow the device to be "placed" into promiscuous 2516 * mode even if it is not configured up. It will 2517 * consult IFF_PROMISC when it is brought up. 2518 */ 2519 if (ifp->if_pcount++ != 0) 2520 return 0; 2521 nflags = ifp->if_flags | IFF_PROMISC; 2522 } else { 2523 if (--ifp->if_pcount > 0) 2524 return 0; 2525 nflags = ifp->if_flags & ~IFF_PROMISC; 2526 } 2527 ret = if_flags_set(ifp, nflags); 2528 /* Restore interface state if not successful. */ 2529 if (ret != 0) { 2530 ifp->if_pcount = pcount; 2531 } 2532 return ret; 2533 } 2534 2535 /* 2536 * Map interface name to 2537 * interface structure pointer. 2538 */ 2539 struct ifnet * 2540 ifunit(const char *name) 2541 { 2542 struct ifnet *ifp; 2543 const char *cp = name; 2544 u_int unit = 0; 2545 u_int i; 2546 int s; 2547 2548 /* 2549 * If the entire name is a number, treat it as an ifindex. 2550 */ 2551 for (i = 0; i < IFNAMSIZ && *cp >= '0' && *cp <= '9'; i++, cp++) { 2552 unit = unit * 10 + (*cp - '0'); 2553 } 2554 2555 /* 2556 * If the number took all of the name, then it's a valid ifindex. 2557 */ 2558 if (i == IFNAMSIZ || (cp != name && *cp == '\0')) 2559 return if_byindex(unit); 2560 2561 ifp = NULL; 2562 s = pserialize_read_enter(); 2563 IFNET_READER_FOREACH(ifp) { 2564 if (if_is_deactivated(ifp)) 2565 continue; 2566 if (strcmp(ifp->if_xname, name) == 0) 2567 goto out; 2568 } 2569 out: 2570 pserialize_read_exit(s); 2571 return ifp; 2572 } 2573 2574 /* 2575 * Get a reference of an ifnet object by an interface name. 2576 * The returned reference is protected by psref(9). The caller 2577 * must release a returned reference by if_put after use. 2578 */ 2579 struct ifnet * 2580 if_get(const char *name, struct psref *psref) 2581 { 2582 struct ifnet *ifp; 2583 const char *cp = name; 2584 u_int unit = 0; 2585 u_int i; 2586 int s; 2587 2588 /* 2589 * If the entire name is a number, treat it as an ifindex. 2590 */ 2591 for (i = 0; i < IFNAMSIZ && *cp >= '0' && *cp <= '9'; i++, cp++) { 2592 unit = unit * 10 + (*cp - '0'); 2593 } 2594 2595 /* 2596 * If the number took all of the name, then it's a valid ifindex. 2597 */ 2598 if (i == IFNAMSIZ || (cp != name && *cp == '\0')) 2599 return if_get_byindex(unit, psref); 2600 2601 ifp = NULL; 2602 s = pserialize_read_enter(); 2603 IFNET_READER_FOREACH(ifp) { 2604 if (if_is_deactivated(ifp)) 2605 continue; 2606 if (strcmp(ifp->if_xname, name) == 0) { 2607 psref_acquire(psref, &ifp->if_psref, 2608 ifnet_psref_class); 2609 goto out; 2610 } 2611 } 2612 out: 2613 pserialize_read_exit(s); 2614 return ifp; 2615 } 2616 2617 /* 2618 * Release a reference of an ifnet object given by if_get, if_get_byindex 2619 * or if_get_bylla. 2620 */ 2621 void 2622 if_put(const struct ifnet *ifp, struct psref *psref) 2623 { 2624 2625 if (ifp == NULL) 2626 return; 2627 2628 psref_release(psref, &ifp->if_psref, ifnet_psref_class); 2629 } 2630 2631 ifnet_t * 2632 if_byindex(u_int idx) 2633 { 2634 ifnet_t *ifp; 2635 2636 ifp = (__predict_true(idx < if_indexlim)) ? ifindex2ifnet[idx] : NULL; 2637 if (ifp != NULL && if_is_deactivated(ifp)) 2638 ifp = NULL; 2639 return ifp; 2640 } 2641 2642 /* 2643 * Get a reference of an ifnet object by an interface index. 2644 * The returned reference is protected by psref(9). The caller 2645 * must release a returned reference by if_put after use. 2646 */ 2647 ifnet_t * 2648 if_get_byindex(u_int idx, struct psref *psref) 2649 { 2650 ifnet_t *ifp; 2651 int s; 2652 2653 s = pserialize_read_enter(); 2654 ifp = if_byindex(idx); 2655 if (__predict_true(ifp != NULL)) 2656 psref_acquire(psref, &ifp->if_psref, ifnet_psref_class); 2657 pserialize_read_exit(s); 2658 2659 return ifp; 2660 } 2661 2662 ifnet_t * 2663 if_get_bylla(const void *lla, unsigned char lla_len, struct psref *psref) 2664 { 2665 ifnet_t *ifp; 2666 int s; 2667 2668 s = pserialize_read_enter(); 2669 IFNET_READER_FOREACH(ifp) { 2670 if (if_is_deactivated(ifp)) 2671 continue; 2672 if (ifp->if_addrlen != lla_len) 2673 continue; 2674 if (memcmp(lla, CLLADDR(ifp->if_sadl), lla_len) == 0) { 2675 psref_acquire(psref, &ifp->if_psref, 2676 ifnet_psref_class); 2677 break; 2678 } 2679 } 2680 pserialize_read_exit(s); 2681 2682 return ifp; 2683 } 2684 2685 /* 2686 * Note that it's safe only if the passed ifp is guaranteed to not be freed, 2687 * for example using pserialize or the ifp is already held or some other 2688 * object is held which guarantes the ifp to not be freed indirectly. 2689 */ 2690 void 2691 if_acquire(struct ifnet *ifp, struct psref *psref) 2692 { 2693 2694 KASSERT(ifp->if_index != 0); 2695 psref_acquire(psref, &ifp->if_psref, ifnet_psref_class); 2696 } 2697 2698 bool 2699 if_held(struct ifnet *ifp) 2700 { 2701 2702 return psref_held(&ifp->if_psref, ifnet_psref_class); 2703 } 2704 2705 2706 /* common */ 2707 int 2708 ifioctl_common(struct ifnet *ifp, u_long cmd, void *data) 2709 { 2710 int s; 2711 struct ifreq *ifr; 2712 struct ifcapreq *ifcr; 2713 struct ifdatareq *ifdr; 2714 2715 switch (cmd) { 2716 case SIOCSIFCAP: 2717 ifcr = data; 2718 if ((ifcr->ifcr_capenable & ~ifp->if_capabilities) != 0) 2719 return EINVAL; 2720 2721 if (ifcr->ifcr_capenable == ifp->if_capenable) 2722 return 0; 2723 2724 ifp->if_capenable = ifcr->ifcr_capenable; 2725 2726 /* Pre-compute the checksum flags mask. */ 2727 ifp->if_csum_flags_tx = 0; 2728 ifp->if_csum_flags_rx = 0; 2729 if (ifp->if_capenable & IFCAP_CSUM_IPv4_Tx) { 2730 ifp->if_csum_flags_tx |= M_CSUM_IPv4; 2731 } 2732 if (ifp->if_capenable & IFCAP_CSUM_IPv4_Rx) { 2733 ifp->if_csum_flags_rx |= M_CSUM_IPv4; 2734 } 2735 2736 if (ifp->if_capenable & IFCAP_CSUM_TCPv4_Tx) { 2737 ifp->if_csum_flags_tx |= M_CSUM_TCPv4; 2738 } 2739 if (ifp->if_capenable & IFCAP_CSUM_TCPv4_Rx) { 2740 ifp->if_csum_flags_rx |= M_CSUM_TCPv4; 2741 } 2742 2743 if (ifp->if_capenable & IFCAP_CSUM_UDPv4_Tx) { 2744 ifp->if_csum_flags_tx |= M_CSUM_UDPv4; 2745 } 2746 if (ifp->if_capenable & IFCAP_CSUM_UDPv4_Rx) { 2747 ifp->if_csum_flags_rx |= M_CSUM_UDPv4; 2748 } 2749 2750 if (ifp->if_capenable & IFCAP_CSUM_TCPv6_Tx) { 2751 ifp->if_csum_flags_tx |= M_CSUM_TCPv6; 2752 } 2753 if (ifp->if_capenable & IFCAP_CSUM_TCPv6_Rx) { 2754 ifp->if_csum_flags_rx |= M_CSUM_TCPv6; 2755 } 2756 2757 if (ifp->if_capenable & IFCAP_CSUM_UDPv6_Tx) { 2758 ifp->if_csum_flags_tx |= M_CSUM_UDPv6; 2759 } 2760 if (ifp->if_capenable & IFCAP_CSUM_UDPv6_Rx) { 2761 ifp->if_csum_flags_rx |= M_CSUM_UDPv6; 2762 } 2763 if (ifp->if_flags & IFF_UP) 2764 return ENETRESET; 2765 return 0; 2766 case SIOCSIFFLAGS: 2767 ifr = data; 2768 if (ifp->if_flags & IFF_UP && (ifr->ifr_flags & IFF_UP) == 0) { 2769 s = splsoftnet(); 2770 if_down(ifp); 2771 splx(s); 2772 } 2773 if (ifr->ifr_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) { 2774 s = splsoftnet(); 2775 if_up(ifp); 2776 splx(s); 2777 } 2778 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | 2779 (ifr->ifr_flags &~ IFF_CANTCHANGE); 2780 break; 2781 case SIOCGIFFLAGS: 2782 ifr = data; 2783 ifr->ifr_flags = ifp->if_flags; 2784 break; 2785 2786 case SIOCGIFMETRIC: 2787 ifr = data; 2788 ifr->ifr_metric = ifp->if_metric; 2789 break; 2790 2791 case SIOCGIFMTU: 2792 ifr = data; 2793 ifr->ifr_mtu = ifp->if_mtu; 2794 break; 2795 2796 case SIOCGIFDLT: 2797 ifr = data; 2798 ifr->ifr_dlt = ifp->if_dlt; 2799 break; 2800 2801 case SIOCGIFCAP: 2802 ifcr = data; 2803 ifcr->ifcr_capabilities = ifp->if_capabilities; 2804 ifcr->ifcr_capenable = ifp->if_capenable; 2805 break; 2806 2807 case SIOCSIFMETRIC: 2808 ifr = data; 2809 ifp->if_metric = ifr->ifr_metric; 2810 break; 2811 2812 case SIOCGIFDATA: 2813 ifdr = data; 2814 ifdr->ifdr_data = ifp->if_data; 2815 break; 2816 2817 case SIOCGIFINDEX: 2818 ifr = data; 2819 ifr->ifr_index = ifp->if_index; 2820 break; 2821 2822 case SIOCZIFDATA: 2823 ifdr = data; 2824 ifdr->ifdr_data = ifp->if_data; 2825 /* 2826 * Assumes that the volatile counters that can be 2827 * zero'ed are at the end of if_data. 2828 */ 2829 memset(&ifp->if_data.ifi_ipackets, 0, sizeof(ifp->if_data) - 2830 offsetof(struct if_data, ifi_ipackets)); 2831 /* 2832 * The memset() clears to the bottm of if_data. In the area, 2833 * if_lastchange is included. Please be careful if new entry 2834 * will be added into if_data or rewite this. 2835 * 2836 * And also, update if_lastchnage. 2837 */ 2838 getnanotime(&ifp->if_lastchange); 2839 break; 2840 case SIOCSIFMTU: 2841 ifr = data; 2842 if (ifp->if_mtu == ifr->ifr_mtu) 2843 break; 2844 ifp->if_mtu = ifr->ifr_mtu; 2845 /* 2846 * If the link MTU changed, do network layer specific procedure. 2847 */ 2848 #ifdef INET6 2849 if (in6_present) 2850 nd6_setmtu(ifp); 2851 #endif 2852 return ENETRESET; 2853 default: 2854 return ENOTTY; 2855 } 2856 return 0; 2857 } 2858 2859 int 2860 ifaddrpref_ioctl(struct socket *so, u_long cmd, void *data, struct ifnet *ifp) 2861 { 2862 struct if_addrprefreq *ifap = (struct if_addrprefreq *)data; 2863 struct ifaddr *ifa; 2864 const struct sockaddr *any, *sa; 2865 union { 2866 struct sockaddr sa; 2867 struct sockaddr_storage ss; 2868 } u, v; 2869 int s, error = 0; 2870 2871 switch (cmd) { 2872 case SIOCSIFADDRPREF: 2873 if (kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_INTERFACE, 2874 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd, 2875 NULL) != 0) 2876 return EPERM; 2877 case SIOCGIFADDRPREF: 2878 break; 2879 default: 2880 return EOPNOTSUPP; 2881 } 2882 2883 /* sanity checks */ 2884 if (data == NULL || ifp == NULL) { 2885 panic("invalid argument to %s", __func__); 2886 /*NOTREACHED*/ 2887 } 2888 2889 /* address must be specified on ADD and DELETE */ 2890 sa = sstocsa(&ifap->ifap_addr); 2891 if (sa->sa_family != sofamily(so)) 2892 return EINVAL; 2893 if ((any = sockaddr_any(sa)) == NULL || sa->sa_len != any->sa_len) 2894 return EINVAL; 2895 2896 sockaddr_externalize(&v.sa, sizeof(v.ss), sa); 2897 2898 s = pserialize_read_enter(); 2899 IFADDR_READER_FOREACH(ifa, ifp) { 2900 if (ifa->ifa_addr->sa_family != sa->sa_family) 2901 continue; 2902 sockaddr_externalize(&u.sa, sizeof(u.ss), ifa->ifa_addr); 2903 if (sockaddr_cmp(&u.sa, &v.sa) == 0) 2904 break; 2905 } 2906 if (ifa == NULL) { 2907 error = EADDRNOTAVAIL; 2908 goto out; 2909 } 2910 2911 switch (cmd) { 2912 case SIOCSIFADDRPREF: 2913 ifa->ifa_preference = ifap->ifap_preference; 2914 goto out; 2915 case SIOCGIFADDRPREF: 2916 /* fill in the if_laddrreq structure */ 2917 (void)sockaddr_copy(sstosa(&ifap->ifap_addr), 2918 sizeof(ifap->ifap_addr), ifa->ifa_addr); 2919 ifap->ifap_preference = ifa->ifa_preference; 2920 goto out; 2921 default: 2922 error = EOPNOTSUPP; 2923 } 2924 out: 2925 pserialize_read_exit(s); 2926 return error; 2927 } 2928 2929 /* 2930 * Interface ioctls. 2931 */ 2932 static int 2933 doifioctl(struct socket *so, u_long cmd, void *data, struct lwp *l) 2934 { 2935 struct ifnet *ifp; 2936 struct ifreq *ifr; 2937 int error = 0; 2938 #if defined(COMPAT_OSOCK) || defined(COMPAT_OIFREQ) 2939 u_long ocmd = cmd; 2940 #endif 2941 short oif_flags; 2942 #ifdef COMPAT_OIFREQ 2943 struct ifreq ifrb; 2944 struct oifreq *oifr = NULL; 2945 #endif 2946 int r; 2947 struct psref psref; 2948 int bound; 2949 2950 switch (cmd) { 2951 #ifdef COMPAT_OIFREQ 2952 case OSIOCGIFCONF: 2953 case OOSIOCGIFCONF: 2954 return compat_ifconf(cmd, data); 2955 #endif 2956 #ifdef COMPAT_OIFDATA 2957 case OSIOCGIFDATA: 2958 case OSIOCZIFDATA: 2959 return compat_ifdatareq(l, cmd, data); 2960 #endif 2961 case SIOCGIFCONF: 2962 return ifconf(cmd, data); 2963 case SIOCINITIFADDR: 2964 return EPERM; 2965 } 2966 2967 #ifdef COMPAT_OIFREQ 2968 cmd = (*vec_compat_cvtcmd)(cmd); 2969 if (cmd != ocmd) { 2970 oifr = data; 2971 data = ifr = &ifrb; 2972 ifreqo2n(oifr, ifr); 2973 } else 2974 #endif 2975 ifr = data; 2976 2977 switch (cmd) { 2978 case SIOCIFCREATE: 2979 case SIOCIFDESTROY: 2980 bound = curlwp_bind(); 2981 if (l != NULL) { 2982 ifp = if_get(ifr->ifr_name, &psref); 2983 error = kauth_authorize_network(l->l_cred, 2984 KAUTH_NETWORK_INTERFACE, 2985 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, 2986 (void *)cmd, NULL); 2987 if (ifp != NULL) 2988 if_put(ifp, &psref); 2989 if (error != 0) { 2990 curlwp_bindx(bound); 2991 return error; 2992 } 2993 } 2994 mutex_enter(&if_clone_mtx); 2995 r = (cmd == SIOCIFCREATE) ? 2996 if_clone_create(ifr->ifr_name) : 2997 if_clone_destroy(ifr->ifr_name); 2998 mutex_exit(&if_clone_mtx); 2999 curlwp_bindx(bound); 3000 return r; 3001 3002 case SIOCIFGCLONERS: 3003 { 3004 struct if_clonereq *req = (struct if_clonereq *)data; 3005 return if_clone_list(req->ifcr_count, req->ifcr_buffer, 3006 &req->ifcr_total); 3007 } 3008 } 3009 3010 bound = curlwp_bind(); 3011 ifp = if_get(ifr->ifr_name, &psref); 3012 if (ifp == NULL) { 3013 curlwp_bindx(bound); 3014 return ENXIO; 3015 } 3016 3017 switch (cmd) { 3018 case SIOCALIFADDR: 3019 case SIOCDLIFADDR: 3020 case SIOCSIFADDRPREF: 3021 case SIOCSIFFLAGS: 3022 case SIOCSIFCAP: 3023 case SIOCSIFMETRIC: 3024 case SIOCZIFDATA: 3025 case SIOCSIFMTU: 3026 case SIOCSIFPHYADDR: 3027 case SIOCDIFPHYADDR: 3028 #ifdef INET6 3029 case SIOCSIFPHYADDR_IN6: 3030 #endif 3031 case SIOCSLIFPHYADDR: 3032 case SIOCADDMULTI: 3033 case SIOCDELMULTI: 3034 case SIOCSIFMEDIA: 3035 case SIOCSDRVSPEC: 3036 case SIOCG80211: 3037 case SIOCS80211: 3038 case SIOCS80211NWID: 3039 case SIOCS80211NWKEY: 3040 case SIOCS80211POWER: 3041 case SIOCS80211BSSID: 3042 case SIOCS80211CHANNEL: 3043 case SIOCSLINKSTR: 3044 if (l != NULL) { 3045 error = kauth_authorize_network(l->l_cred, 3046 KAUTH_NETWORK_INTERFACE, 3047 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, 3048 (void *)cmd, NULL); 3049 if (error != 0) 3050 goto out; 3051 } 3052 } 3053 3054 oif_flags = ifp->if_flags; 3055 3056 mutex_enter(ifp->if_ioctl_lock); 3057 3058 error = (*ifp->if_ioctl)(ifp, cmd, data); 3059 if (error != ENOTTY) 3060 ; 3061 else if (so->so_proto == NULL) 3062 error = EOPNOTSUPP; 3063 else { 3064 #ifdef COMPAT_OSOCK 3065 if (vec_compat_ifioctl != NULL) 3066 error = (*vec_compat_ifioctl)(so, ocmd, cmd, data, l); 3067 else 3068 #endif 3069 error = (*so->so_proto->pr_usrreqs->pr_ioctl)(so, 3070 cmd, data, ifp); 3071 } 3072 3073 if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0) { 3074 if ((ifp->if_flags & IFF_UP) != 0) { 3075 int s = splsoftnet(); 3076 if_up(ifp); 3077 splx(s); 3078 } 3079 } 3080 #ifdef COMPAT_OIFREQ 3081 if (cmd != ocmd) 3082 ifreqn2o(oifr, ifr); 3083 #endif 3084 3085 mutex_exit(ifp->if_ioctl_lock); 3086 out: 3087 if_put(ifp, &psref); 3088 curlwp_bindx(bound); 3089 return error; 3090 } 3091 3092 /* 3093 * Return interface configuration 3094 * of system. List may be used 3095 * in later ioctl's (above) to get 3096 * other information. 3097 * 3098 * Each record is a struct ifreq. Before the addition of 3099 * sockaddr_storage, the API rule was that sockaddr flavors that did 3100 * not fit would extend beyond the struct ifreq, with the next struct 3101 * ifreq starting sa_len beyond the struct sockaddr. Because the 3102 * union in struct ifreq includes struct sockaddr_storage, every kind 3103 * of sockaddr must fit. Thus, there are no longer any overlength 3104 * records. 3105 * 3106 * Records are added to the user buffer if they fit, and ifc_len is 3107 * adjusted to the length that was written. Thus, the user is only 3108 * assured of getting the complete list if ifc_len on return is at 3109 * least sizeof(struct ifreq) less than it was on entry. 3110 * 3111 * If the user buffer pointer is NULL, this routine copies no data and 3112 * returns the amount of space that would be needed. 3113 * 3114 * Invariants: 3115 * ifrp points to the next part of the user's buffer to be used. If 3116 * ifrp != NULL, space holds the number of bytes remaining that we may 3117 * write at ifrp. Otherwise, space holds the number of bytes that 3118 * would have been written had there been adequate space. 3119 */ 3120 /*ARGSUSED*/ 3121 static int 3122 ifconf(u_long cmd, void *data) 3123 { 3124 struct ifconf *ifc = (struct ifconf *)data; 3125 struct ifnet *ifp; 3126 struct ifaddr *ifa; 3127 struct ifreq ifr, *ifrp = NULL; 3128 int space = 0, error = 0; 3129 const int sz = (int)sizeof(struct ifreq); 3130 const bool docopy = ifc->ifc_req != NULL; 3131 int s; 3132 int bound; 3133 struct psref psref; 3134 3135 if (docopy) { 3136 space = ifc->ifc_len; 3137 ifrp = ifc->ifc_req; 3138 } 3139 3140 bound = curlwp_bind(); 3141 s = pserialize_read_enter(); 3142 IFNET_READER_FOREACH(ifp) { 3143 psref_acquire(&psref, &ifp->if_psref, ifnet_psref_class); 3144 pserialize_read_exit(s); 3145 3146 (void)strncpy(ifr.ifr_name, ifp->if_xname, 3147 sizeof(ifr.ifr_name)); 3148 if (ifr.ifr_name[sizeof(ifr.ifr_name) - 1] != '\0') { 3149 error = ENAMETOOLONG; 3150 goto release_exit; 3151 } 3152 if (IFADDR_READER_EMPTY(ifp)) { 3153 /* Interface with no addresses - send zero sockaddr. */ 3154 memset(&ifr.ifr_addr, 0, sizeof(ifr.ifr_addr)); 3155 if (!docopy) { 3156 space += sz; 3157 continue; 3158 } 3159 if (space >= sz) { 3160 error = copyout(&ifr, ifrp, sz); 3161 if (error != 0) 3162 goto release_exit; 3163 ifrp++; 3164 space -= sz; 3165 } 3166 } 3167 3168 IFADDR_READER_FOREACH(ifa, ifp) { 3169 struct sockaddr *sa = ifa->ifa_addr; 3170 /* all sockaddrs must fit in sockaddr_storage */ 3171 KASSERT(sa->sa_len <= sizeof(ifr.ifr_ifru)); 3172 3173 if (!docopy) { 3174 space += sz; 3175 continue; 3176 } 3177 memcpy(&ifr.ifr_space, sa, sa->sa_len); 3178 if (space >= sz) { 3179 error = copyout(&ifr, ifrp, sz); 3180 if (error != 0) 3181 goto release_exit; 3182 ifrp++; space -= sz; 3183 } 3184 } 3185 3186 s = pserialize_read_enter(); 3187 psref_release(&psref, &ifp->if_psref, ifnet_psref_class); 3188 } 3189 pserialize_read_exit(s); 3190 curlwp_bindx(bound); 3191 3192 if (docopy) { 3193 KASSERT(0 <= space && space <= ifc->ifc_len); 3194 ifc->ifc_len -= space; 3195 } else { 3196 KASSERT(space >= 0); 3197 ifc->ifc_len = space; 3198 } 3199 return (0); 3200 3201 release_exit: 3202 psref_release(&psref, &ifp->if_psref, ifnet_psref_class); 3203 curlwp_bindx(bound); 3204 return error; 3205 } 3206 3207 int 3208 ifreq_setaddr(u_long cmd, struct ifreq *ifr, const struct sockaddr *sa) 3209 { 3210 uint8_t len; 3211 #ifdef COMPAT_OIFREQ 3212 struct ifreq ifrb; 3213 struct oifreq *oifr = NULL; 3214 u_long ocmd = cmd; 3215 cmd = (*vec_compat_cvtcmd)(cmd); 3216 if (cmd != ocmd) { 3217 oifr = (struct oifreq *)(void *)ifr; 3218 ifr = &ifrb; 3219 ifreqo2n(oifr, ifr); 3220 len = sizeof(oifr->ifr_addr); 3221 } else 3222 #endif 3223 len = sizeof(ifr->ifr_ifru.ifru_space); 3224 3225 if (len < sa->sa_len) 3226 return EFBIG; 3227 3228 memset(&ifr->ifr_addr, 0, len); 3229 sockaddr_copy(&ifr->ifr_addr, len, sa); 3230 3231 #ifdef COMPAT_OIFREQ 3232 if (cmd != ocmd) 3233 ifreqn2o(oifr, ifr); 3234 #endif 3235 return 0; 3236 } 3237 3238 /* 3239 * wrapper function for the drivers which doesn't have if_transmit(). 3240 */ 3241 static int 3242 if_transmit(struct ifnet *ifp, struct mbuf *m) 3243 { 3244 int s, error; 3245 size_t pktlen = m->m_pkthdr.len; 3246 bool mcast = (m->m_flags & M_MCAST) != 0; 3247 3248 s = splnet(); 3249 3250 IFQ_ENQUEUE(&ifp->if_snd, m, error); 3251 if (error != 0) { 3252 /* mbuf is already freed */ 3253 goto out; 3254 } 3255 3256 ifp->if_obytes += pktlen; 3257 if (mcast) 3258 ifp->if_omcasts++; 3259 3260 if ((ifp->if_flags & IFF_OACTIVE) == 0) 3261 if_start_lock(ifp); 3262 out: 3263 splx(s); 3264 3265 return error; 3266 } 3267 3268 int 3269 if_transmit_lock(struct ifnet *ifp, struct mbuf *m) 3270 { 3271 int error; 3272 3273 #ifdef ALTQ 3274 KERNEL_LOCK(1, NULL); 3275 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 3276 error = if_transmit(ifp, m); 3277 KERNEL_UNLOCK_ONE(NULL); 3278 } else { 3279 KERNEL_UNLOCK_ONE(NULL); 3280 error = (*ifp->if_transmit)(ifp, m); 3281 /* mbuf is alredy freed */ 3282 } 3283 #else /* !ALTQ */ 3284 error = (*ifp->if_transmit)(ifp, m); 3285 /* mbuf is alredy freed */ 3286 #endif /* !ALTQ */ 3287 3288 return error; 3289 } 3290 3291 /* 3292 * Queue message on interface, and start output if interface 3293 * not yet active. 3294 */ 3295 int 3296 ifq_enqueue(struct ifnet *ifp, struct mbuf *m) 3297 { 3298 3299 return if_transmit_lock(ifp, m); 3300 } 3301 3302 /* 3303 * Queue message on interface, possibly using a second fast queue 3304 */ 3305 int 3306 ifq_enqueue2(struct ifnet *ifp, struct ifqueue *ifq, struct mbuf *m) 3307 { 3308 int error = 0; 3309 3310 if (ifq != NULL 3311 #ifdef ALTQ 3312 && ALTQ_IS_ENABLED(&ifp->if_snd) == 0 3313 #endif 3314 ) { 3315 if (IF_QFULL(ifq)) { 3316 IF_DROP(&ifp->if_snd); 3317 m_freem(m); 3318 if (error == 0) 3319 error = ENOBUFS; 3320 } else 3321 IF_ENQUEUE(ifq, m); 3322 } else 3323 IFQ_ENQUEUE(&ifp->if_snd, m, error); 3324 if (error != 0) { 3325 ++ifp->if_oerrors; 3326 return error; 3327 } 3328 return 0; 3329 } 3330 3331 int 3332 if_addr_init(ifnet_t *ifp, struct ifaddr *ifa, const bool src) 3333 { 3334 int rc; 3335 3336 if (ifp->if_initaddr != NULL) 3337 rc = (*ifp->if_initaddr)(ifp, ifa, src); 3338 else if (src || 3339 /* FIXME: may not hold if_ioctl_lock */ 3340 (rc = (*ifp->if_ioctl)(ifp, SIOCSIFDSTADDR, ifa)) == ENOTTY) 3341 rc = (*ifp->if_ioctl)(ifp, SIOCINITIFADDR, ifa); 3342 3343 return rc; 3344 } 3345 3346 int 3347 if_do_dad(struct ifnet *ifp) 3348 { 3349 if ((ifp->if_flags & IFF_LOOPBACK) != 0) 3350 return 0; 3351 3352 switch (ifp->if_type) { 3353 case IFT_FAITH: 3354 /* 3355 * These interfaces do not have the IFF_LOOPBACK flag, 3356 * but loop packets back. We do not have to do DAD on such 3357 * interfaces. We should even omit it, because loop-backed 3358 * responses would confuse the DAD procedure. 3359 */ 3360 return 0; 3361 default: 3362 /* 3363 * Our DAD routine requires the interface up and running. 3364 * However, some interfaces can be up before the RUNNING 3365 * status. Additionaly, users may try to assign addresses 3366 * before the interface becomes up (or running). 3367 * We simply skip DAD in such a case as a work around. 3368 * XXX: we should rather mark "tentative" on such addresses, 3369 * and do DAD after the interface becomes ready. 3370 */ 3371 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != 3372 (IFF_UP|IFF_RUNNING)) 3373 return 0; 3374 3375 return 1; 3376 } 3377 } 3378 3379 int 3380 if_flags_set(ifnet_t *ifp, const short flags) 3381 { 3382 int rc; 3383 3384 if (ifp->if_setflags != NULL) 3385 rc = (*ifp->if_setflags)(ifp, flags); 3386 else { 3387 short cantflags, chgdflags; 3388 struct ifreq ifr; 3389 3390 chgdflags = ifp->if_flags ^ flags; 3391 cantflags = chgdflags & IFF_CANTCHANGE; 3392 3393 if (cantflags != 0) 3394 ifp->if_flags ^= cantflags; 3395 3396 /* Traditionally, we do not call if_ioctl after 3397 * setting/clearing only IFF_PROMISC if the interface 3398 * isn't IFF_UP. Uphold that tradition. 3399 */ 3400 if (chgdflags == IFF_PROMISC && (ifp->if_flags & IFF_UP) == 0) 3401 return 0; 3402 3403 memset(&ifr, 0, sizeof(ifr)); 3404 3405 ifr.ifr_flags = flags & ~IFF_CANTCHANGE; 3406 /* FIXME: may not hold if_ioctl_lock */ 3407 rc = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, &ifr); 3408 3409 if (rc != 0 && cantflags != 0) 3410 ifp->if_flags ^= cantflags; 3411 } 3412 3413 return rc; 3414 } 3415 3416 int 3417 if_mcast_op(ifnet_t *ifp, const unsigned long cmd, const struct sockaddr *sa) 3418 { 3419 int rc; 3420 struct ifreq ifr; 3421 3422 if (ifp->if_mcastop != NULL) 3423 rc = (*ifp->if_mcastop)(ifp, cmd, sa); 3424 else { 3425 ifreq_setaddr(cmd, &ifr, sa); 3426 rc = (*ifp->if_ioctl)(ifp, cmd, &ifr); 3427 } 3428 3429 return rc; 3430 } 3431 3432 static void 3433 sysctl_sndq_setup(struct sysctllog **clog, const char *ifname, 3434 struct ifaltq *ifq) 3435 { 3436 const struct sysctlnode *cnode, *rnode; 3437 3438 if (sysctl_createv(clog, 0, NULL, &rnode, 3439 CTLFLAG_PERMANENT, 3440 CTLTYPE_NODE, "interfaces", 3441 SYSCTL_DESCR("Per-interface controls"), 3442 NULL, 0, NULL, 0, 3443 CTL_NET, CTL_CREATE, CTL_EOL) != 0) 3444 goto bad; 3445 3446 if (sysctl_createv(clog, 0, &rnode, &rnode, 3447 CTLFLAG_PERMANENT, 3448 CTLTYPE_NODE, ifname, 3449 SYSCTL_DESCR("Interface controls"), 3450 NULL, 0, NULL, 0, 3451 CTL_CREATE, CTL_EOL) != 0) 3452 goto bad; 3453 3454 if (sysctl_createv(clog, 0, &rnode, &rnode, 3455 CTLFLAG_PERMANENT, 3456 CTLTYPE_NODE, "sndq", 3457 SYSCTL_DESCR("Interface output queue controls"), 3458 NULL, 0, NULL, 0, 3459 CTL_CREATE, CTL_EOL) != 0) 3460 goto bad; 3461 3462 if (sysctl_createv(clog, 0, &rnode, &cnode, 3463 CTLFLAG_PERMANENT, 3464 CTLTYPE_INT, "len", 3465 SYSCTL_DESCR("Current output queue length"), 3466 NULL, 0, &ifq->ifq_len, 0, 3467 CTL_CREATE, CTL_EOL) != 0) 3468 goto bad; 3469 3470 if (sysctl_createv(clog, 0, &rnode, &cnode, 3471 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 3472 CTLTYPE_INT, "maxlen", 3473 SYSCTL_DESCR("Maximum allowed output queue length"), 3474 NULL, 0, &ifq->ifq_maxlen, 0, 3475 CTL_CREATE, CTL_EOL) != 0) 3476 goto bad; 3477 3478 if (sysctl_createv(clog, 0, &rnode, &cnode, 3479 CTLFLAG_PERMANENT, 3480 CTLTYPE_INT, "drops", 3481 SYSCTL_DESCR("Packets dropped due to full output queue"), 3482 NULL, 0, &ifq->ifq_drops, 0, 3483 CTL_CREATE, CTL_EOL) != 0) 3484 goto bad; 3485 3486 return; 3487 bad: 3488 printf("%s: could not attach sysctl nodes\n", ifname); 3489 return; 3490 } 3491 3492 #if defined(INET) || defined(INET6) 3493 3494 #define SYSCTL_NET_PKTQ(q, cn, c) \ 3495 static int \ 3496 sysctl_net_##q##_##cn(SYSCTLFN_ARGS) \ 3497 { \ 3498 return sysctl_pktq_count(SYSCTLFN_CALL(rnode), q, c); \ 3499 } 3500 3501 #if defined(INET) 3502 static int 3503 sysctl_net_ip_pktq_maxlen(SYSCTLFN_ARGS) 3504 { 3505 return sysctl_pktq_maxlen(SYSCTLFN_CALL(rnode), ip_pktq); 3506 } 3507 SYSCTL_NET_PKTQ(ip_pktq, items, PKTQ_NITEMS) 3508 SYSCTL_NET_PKTQ(ip_pktq, drops, PKTQ_DROPS) 3509 #endif 3510 3511 #if defined(INET6) 3512 static int 3513 sysctl_net_ip6_pktq_maxlen(SYSCTLFN_ARGS) 3514 { 3515 return sysctl_pktq_maxlen(SYSCTLFN_CALL(rnode), ip6_pktq); 3516 } 3517 SYSCTL_NET_PKTQ(ip6_pktq, items, PKTQ_NITEMS) 3518 SYSCTL_NET_PKTQ(ip6_pktq, drops, PKTQ_DROPS) 3519 #endif 3520 3521 static void 3522 sysctl_net_pktq_setup(struct sysctllog **clog, int pf) 3523 { 3524 sysctlfn len_func = NULL, maxlen_func = NULL, drops_func = NULL; 3525 const char *pfname = NULL, *ipname = NULL; 3526 int ipn = 0, qid = 0; 3527 3528 switch (pf) { 3529 #if defined(INET) 3530 case PF_INET: 3531 len_func = sysctl_net_ip_pktq_items; 3532 maxlen_func = sysctl_net_ip_pktq_maxlen; 3533 drops_func = sysctl_net_ip_pktq_drops; 3534 pfname = "inet", ipn = IPPROTO_IP; 3535 ipname = "ip", qid = IPCTL_IFQ; 3536 break; 3537 #endif 3538 #if defined(INET6) 3539 case PF_INET6: 3540 len_func = sysctl_net_ip6_pktq_items; 3541 maxlen_func = sysctl_net_ip6_pktq_maxlen; 3542 drops_func = sysctl_net_ip6_pktq_drops; 3543 pfname = "inet6", ipn = IPPROTO_IPV6; 3544 ipname = "ip6", qid = IPV6CTL_IFQ; 3545 break; 3546 #endif 3547 default: 3548 KASSERT(false); 3549 } 3550 3551 sysctl_createv(clog, 0, NULL, NULL, 3552 CTLFLAG_PERMANENT, 3553 CTLTYPE_NODE, pfname, NULL, 3554 NULL, 0, NULL, 0, 3555 CTL_NET, pf, CTL_EOL); 3556 sysctl_createv(clog, 0, NULL, NULL, 3557 CTLFLAG_PERMANENT, 3558 CTLTYPE_NODE, ipname, NULL, 3559 NULL, 0, NULL, 0, 3560 CTL_NET, pf, ipn, CTL_EOL); 3561 sysctl_createv(clog, 0, NULL, NULL, 3562 CTLFLAG_PERMANENT, 3563 CTLTYPE_NODE, "ifq", 3564 SYSCTL_DESCR("Protocol input queue controls"), 3565 NULL, 0, NULL, 0, 3566 CTL_NET, pf, ipn, qid, CTL_EOL); 3567 3568 sysctl_createv(clog, 0, NULL, NULL, 3569 CTLFLAG_PERMANENT, 3570 CTLTYPE_INT, "len", 3571 SYSCTL_DESCR("Current input queue length"), 3572 len_func, 0, NULL, 0, 3573 CTL_NET, pf, ipn, qid, IFQCTL_LEN, CTL_EOL); 3574 sysctl_createv(clog, 0, NULL, NULL, 3575 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 3576 CTLTYPE_INT, "maxlen", 3577 SYSCTL_DESCR("Maximum allowed input queue length"), 3578 maxlen_func, 0, NULL, 0, 3579 CTL_NET, pf, ipn, qid, IFQCTL_MAXLEN, CTL_EOL); 3580 sysctl_createv(clog, 0, NULL, NULL, 3581 CTLFLAG_PERMANENT, 3582 CTLTYPE_INT, "drops", 3583 SYSCTL_DESCR("Packets dropped due to full input queue"), 3584 drops_func, 0, NULL, 0, 3585 CTL_NET, pf, ipn, qid, IFQCTL_DROPS, CTL_EOL); 3586 } 3587 #endif /* INET || INET6 */ 3588 3589 static int 3590 if_sdl_sysctl(SYSCTLFN_ARGS) 3591 { 3592 struct ifnet *ifp; 3593 const struct sockaddr_dl *sdl; 3594 struct psref psref; 3595 int error = 0; 3596 int bound; 3597 3598 if (namelen != 1) 3599 return EINVAL; 3600 3601 bound = curlwp_bind(); 3602 ifp = if_get_byindex(name[0], &psref); 3603 if (ifp == NULL) { 3604 error = ENODEV; 3605 goto out0; 3606 } 3607 3608 sdl = ifp->if_sadl; 3609 if (sdl == NULL) { 3610 *oldlenp = 0; 3611 goto out1; 3612 } 3613 3614 if (oldp == NULL) { 3615 *oldlenp = sdl->sdl_alen; 3616 goto out1; 3617 } 3618 3619 if (*oldlenp >= sdl->sdl_alen) 3620 *oldlenp = sdl->sdl_alen; 3621 error = sysctl_copyout(l, &sdl->sdl_data[sdl->sdl_nlen], oldp, *oldlenp); 3622 out1: 3623 if_put(ifp, &psref); 3624 out0: 3625 curlwp_bindx(bound); 3626 return error; 3627 } 3628 3629 static void 3630 if_sysctl_setup(struct sysctllog **clog) 3631 { 3632 const struct sysctlnode *rnode = NULL; 3633 3634 sysctl_createv(clog, 0, NULL, &rnode, 3635 CTLFLAG_PERMANENT, 3636 CTLTYPE_NODE, "sdl", 3637 SYSCTL_DESCR("Get active link-layer address"), 3638 if_sdl_sysctl, 0, NULL, 0, 3639 CTL_NET, CTL_CREATE, CTL_EOL); 3640 3641 #if defined(INET) 3642 sysctl_net_pktq_setup(NULL, PF_INET); 3643 #endif 3644 #ifdef INET6 3645 if (in6_present) 3646 sysctl_net_pktq_setup(NULL, PF_INET6); 3647 #endif 3648 } 3649