1 /* $NetBSD: if.c,v 1.400 2017/11/22 10:19:14 ozaki-r Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by William Studenmund and Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 34 * All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the project nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 */ 60 61 /* 62 * Copyright (c) 1980, 1986, 1993 63 * The Regents of the University of California. All rights reserved. 64 * 65 * Redistribution and use in source and binary forms, with or without 66 * modification, are permitted provided that the following conditions 67 * are met: 68 * 1. Redistributions of source code must retain the above copyright 69 * notice, this list of conditions and the following disclaimer. 70 * 2. Redistributions in binary form must reproduce the above copyright 71 * notice, this list of conditions and the following disclaimer in the 72 * documentation and/or other materials provided with the distribution. 73 * 3. Neither the name of the University nor the names of its contributors 74 * may be used to endorse or promote products derived from this software 75 * without specific prior written permission. 76 * 77 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 78 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 79 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 80 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 81 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 82 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 83 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 84 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 85 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 86 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 87 * SUCH DAMAGE. 88 * 89 * @(#)if.c 8.5 (Berkeley) 1/9/95 90 */ 91 92 #include <sys/cdefs.h> 93 __KERNEL_RCSID(0, "$NetBSD: if.c,v 1.400 2017/11/22 10:19:14 ozaki-r Exp $"); 94 95 #if defined(_KERNEL_OPT) 96 #include "opt_inet.h" 97 #include "opt_ipsec.h" 98 99 #include "opt_atalk.h" 100 #include "opt_natm.h" 101 #include "opt_wlan.h" 102 #include "opt_net_mpsafe.h" 103 #endif 104 105 #include <sys/param.h> 106 #include <sys/mbuf.h> 107 #include <sys/systm.h> 108 #include <sys/callout.h> 109 #include <sys/proc.h> 110 #include <sys/socket.h> 111 #include <sys/socketvar.h> 112 #include <sys/domain.h> 113 #include <sys/protosw.h> 114 #include <sys/kernel.h> 115 #include <sys/ioctl.h> 116 #include <sys/sysctl.h> 117 #include <sys/syslog.h> 118 #include <sys/kauth.h> 119 #include <sys/kmem.h> 120 #include <sys/xcall.h> 121 #include <sys/cpu.h> 122 #include <sys/intr.h> 123 124 #include <net/if.h> 125 #include <net/if_dl.h> 126 #include <net/if_ether.h> 127 #include <net/if_media.h> 128 #include <net80211/ieee80211.h> 129 #include <net80211/ieee80211_ioctl.h> 130 #include <net/if_types.h> 131 #include <net/route.h> 132 #include <net/netisr.h> 133 #include <sys/module.h> 134 #ifdef NETATALK 135 #include <netatalk/at_extern.h> 136 #include <netatalk/at.h> 137 #endif 138 #include <net/pfil.h> 139 #include <netinet/in.h> 140 #include <netinet/in_var.h> 141 #include <netinet/ip_encap.h> 142 #include <net/bpf.h> 143 144 #ifdef INET6 145 #include <netinet6/in6_var.h> 146 #include <netinet6/nd6.h> 147 #endif 148 149 #include "ether.h" 150 #include "fddi.h" 151 #include "token.h" 152 153 #include "carp.h" 154 #if NCARP > 0 155 #include <netinet/ip_carp.h> 156 #endif 157 158 #include <compat/sys/sockio.h> 159 #include <compat/sys/socket.h> 160 161 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); 162 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); 163 164 /* 165 * Global list of interfaces. 166 */ 167 /* DEPRECATED. Remove it once kvm(3) users disappeared */ 168 struct ifnet_head ifnet_list; 169 170 struct pslist_head ifnet_pslist; 171 static ifnet_t ** ifindex2ifnet = NULL; 172 static u_int if_index = 1; 173 static size_t if_indexlim = 0; 174 static uint64_t index_gen; 175 /* Mutex to protect the above objects. */ 176 kmutex_t ifnet_mtx __cacheline_aligned; 177 static struct psref_class *ifnet_psref_class __read_mostly; 178 static pserialize_t ifnet_psz; 179 180 static kmutex_t if_clone_mtx; 181 182 struct ifnet *lo0ifp; 183 int ifqmaxlen = IFQ_MAXLEN; 184 185 struct psref_class *ifa_psref_class __read_mostly; 186 187 static int if_delroute_matcher(struct rtentry *, void *); 188 189 static bool if_is_unit(const char *); 190 static struct if_clone *if_clone_lookup(const char *, int *); 191 192 static LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners); 193 static int if_cloners_count; 194 195 /* Packet filtering hook for interfaces. */ 196 pfil_head_t * if_pfil __read_mostly; 197 198 static kauth_listener_t if_listener; 199 200 static int doifioctl(struct socket *, u_long, void *, struct lwp *); 201 static void if_detach_queues(struct ifnet *, struct ifqueue *); 202 static void sysctl_sndq_setup(struct sysctllog **, const char *, 203 struct ifaltq *); 204 static void if_slowtimo(void *); 205 static void if_free_sadl(struct ifnet *); 206 static void if_attachdomain1(struct ifnet *); 207 static int ifconf(u_long, void *); 208 static int if_transmit(struct ifnet *, struct mbuf *); 209 static int if_clone_create(const char *); 210 static int if_clone_destroy(const char *); 211 static void if_link_state_change_si(void *); 212 213 struct if_percpuq { 214 struct ifnet *ipq_ifp; 215 void *ipq_si; 216 struct percpu *ipq_ifqs; /* struct ifqueue */ 217 }; 218 219 static struct mbuf *if_percpuq_dequeue(struct if_percpuq *); 220 221 static void if_percpuq_drops(void *, void *, struct cpu_info *); 222 static int sysctl_percpuq_drops_handler(SYSCTLFN_PROTO); 223 static void sysctl_percpuq_setup(struct sysctllog **, const char *, 224 struct if_percpuq *); 225 226 struct if_deferred_start { 227 struct ifnet *ids_ifp; 228 void (*ids_if_start)(struct ifnet *); 229 void *ids_si; 230 }; 231 232 static void if_deferred_start_softint(void *); 233 static void if_deferred_start_common(struct ifnet *); 234 static void if_deferred_start_destroy(struct ifnet *); 235 236 #if defined(INET) || defined(INET6) 237 static void sysctl_net_pktq_setup(struct sysctllog **, int); 238 #endif 239 240 static void if_sysctl_setup(struct sysctllog **); 241 242 /* 243 * Pointer to stub or real compat_cvtcmd() depending on presence of 244 * the compat module 245 */ 246 u_long stub_compat_cvtcmd(u_long); 247 u_long (*vec_compat_cvtcmd)(u_long) = stub_compat_cvtcmd; 248 249 /* Similarly, pointer to compat_ifioctl() if it is present */ 250 251 int (*vec_compat_ifioctl)(struct socket *, u_long, u_long, void *, 252 struct lwp *) = NULL; 253 254 /* The stub version of compat_cvtcmd() */ 255 u_long stub_compat_cvtcmd(u_long cmd) 256 { 257 258 return cmd; 259 } 260 261 static int 262 if_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 263 void *arg0, void *arg1, void *arg2, void *arg3) 264 { 265 int result; 266 enum kauth_network_req req; 267 268 result = KAUTH_RESULT_DEFER; 269 req = (enum kauth_network_req)arg1; 270 271 if (action != KAUTH_NETWORK_INTERFACE) 272 return result; 273 274 if ((req == KAUTH_REQ_NETWORK_INTERFACE_GET) || 275 (req == KAUTH_REQ_NETWORK_INTERFACE_SET)) 276 result = KAUTH_RESULT_ALLOW; 277 278 return result; 279 } 280 281 /* 282 * Network interface utility routines. 283 * 284 * Routines with ifa_ifwith* names take sockaddr *'s as 285 * parameters. 286 */ 287 void 288 ifinit(void) 289 { 290 291 if_sysctl_setup(NULL); 292 293 #if (defined(INET) || defined(INET6)) 294 encapinit(); 295 #endif 296 297 if_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK, 298 if_listener_cb, NULL); 299 300 /* interfaces are available, inform socket code */ 301 ifioctl = doifioctl; 302 } 303 304 /* 305 * XXX Initialization before configure(). 306 * XXX hack to get pfil_add_hook working in autoconf. 307 */ 308 void 309 ifinit1(void) 310 { 311 mutex_init(&if_clone_mtx, MUTEX_DEFAULT, IPL_NONE); 312 313 TAILQ_INIT(&ifnet_list); 314 mutex_init(&ifnet_mtx, MUTEX_DEFAULT, IPL_NONE); 315 ifnet_psz = pserialize_create(); 316 ifnet_psref_class = psref_class_create("ifnet", IPL_SOFTNET); 317 ifa_psref_class = psref_class_create("ifa", IPL_SOFTNET); 318 PSLIST_INIT(&ifnet_pslist); 319 320 if_indexlim = 8; 321 322 if_pfil = pfil_head_create(PFIL_TYPE_IFNET, NULL); 323 KASSERT(if_pfil != NULL); 324 325 #if NETHER > 0 || NFDDI > 0 || defined(NETATALK) || NTOKEN > 0 || defined(WLAN) 326 etherinit(); 327 #endif 328 } 329 330 ifnet_t * 331 if_alloc(u_char type) 332 { 333 return kmem_zalloc(sizeof(ifnet_t), KM_SLEEP); 334 } 335 336 void 337 if_free(ifnet_t *ifp) 338 { 339 kmem_free(ifp, sizeof(ifnet_t)); 340 } 341 342 void 343 if_initname(struct ifnet *ifp, const char *name, int unit) 344 { 345 (void)snprintf(ifp->if_xname, sizeof(ifp->if_xname), 346 "%s%d", name, unit); 347 } 348 349 /* 350 * Null routines used while an interface is going away. These routines 351 * just return an error. 352 */ 353 354 int 355 if_nulloutput(struct ifnet *ifp, struct mbuf *m, 356 const struct sockaddr *so, const struct rtentry *rt) 357 { 358 359 return ENXIO; 360 } 361 362 void 363 if_nullinput(struct ifnet *ifp, struct mbuf *m) 364 { 365 366 /* Nothing. */ 367 } 368 369 void 370 if_nullstart(struct ifnet *ifp) 371 { 372 373 /* Nothing. */ 374 } 375 376 int 377 if_nulltransmit(struct ifnet *ifp, struct mbuf *m) 378 { 379 380 m_freem(m); 381 return ENXIO; 382 } 383 384 int 385 if_nullioctl(struct ifnet *ifp, u_long cmd, void *data) 386 { 387 388 return ENXIO; 389 } 390 391 int 392 if_nullinit(struct ifnet *ifp) 393 { 394 395 return ENXIO; 396 } 397 398 void 399 if_nullstop(struct ifnet *ifp, int disable) 400 { 401 402 /* Nothing. */ 403 } 404 405 void 406 if_nullslowtimo(struct ifnet *ifp) 407 { 408 409 /* Nothing. */ 410 } 411 412 void 413 if_nulldrain(struct ifnet *ifp) 414 { 415 416 /* Nothing. */ 417 } 418 419 void 420 if_set_sadl(struct ifnet *ifp, const void *lla, u_char addrlen, bool factory) 421 { 422 struct ifaddr *ifa; 423 struct sockaddr_dl *sdl; 424 425 ifp->if_addrlen = addrlen; 426 if_alloc_sadl(ifp); 427 ifa = ifp->if_dl; 428 sdl = satosdl(ifa->ifa_addr); 429 430 (void)sockaddr_dl_setaddr(sdl, sdl->sdl_len, lla, ifp->if_addrlen); 431 if (factory) { 432 ifp->if_hwdl = ifp->if_dl; 433 ifaref(ifp->if_hwdl); 434 } 435 /* TBD routing socket */ 436 } 437 438 struct ifaddr * 439 if_dl_create(const struct ifnet *ifp, const struct sockaddr_dl **sdlp) 440 { 441 unsigned socksize, ifasize; 442 int addrlen, namelen; 443 struct sockaddr_dl *mask, *sdl; 444 struct ifaddr *ifa; 445 446 namelen = strlen(ifp->if_xname); 447 addrlen = ifp->if_addrlen; 448 socksize = roundup(sockaddr_dl_measure(namelen, addrlen), sizeof(long)); 449 ifasize = sizeof(*ifa) + 2 * socksize; 450 ifa = malloc(ifasize, M_IFADDR, M_WAITOK|M_ZERO); 451 452 sdl = (struct sockaddr_dl *)(ifa + 1); 453 mask = (struct sockaddr_dl *)(socksize + (char *)sdl); 454 455 sockaddr_dl_init(sdl, socksize, ifp->if_index, ifp->if_type, 456 ifp->if_xname, namelen, NULL, addrlen); 457 mask->sdl_family = AF_LINK; 458 mask->sdl_len = sockaddr_dl_measure(namelen, 0); 459 memset(&mask->sdl_data[0], 0xff, namelen); 460 ifa->ifa_rtrequest = link_rtrequest; 461 ifa->ifa_addr = (struct sockaddr *)sdl; 462 ifa->ifa_netmask = (struct sockaddr *)mask; 463 ifa_psref_init(ifa); 464 465 *sdlp = sdl; 466 467 return ifa; 468 } 469 470 static void 471 if_sadl_setrefs(struct ifnet *ifp, struct ifaddr *ifa) 472 { 473 const struct sockaddr_dl *sdl; 474 475 ifp->if_dl = ifa; 476 ifaref(ifa); 477 sdl = satosdl(ifa->ifa_addr); 478 ifp->if_sadl = sdl; 479 } 480 481 /* 482 * Allocate the link level name for the specified interface. This 483 * is an attachment helper. It must be called after ifp->if_addrlen 484 * is initialized, which may not be the case when if_attach() is 485 * called. 486 */ 487 void 488 if_alloc_sadl(struct ifnet *ifp) 489 { 490 struct ifaddr *ifa; 491 const struct sockaddr_dl *sdl; 492 493 /* 494 * If the interface already has a link name, release it 495 * now. This is useful for interfaces that can change 496 * link types, and thus switch link names often. 497 */ 498 if (ifp->if_sadl != NULL) 499 if_free_sadl(ifp); 500 501 ifa = if_dl_create(ifp, &sdl); 502 503 ifa_insert(ifp, ifa); 504 if_sadl_setrefs(ifp, ifa); 505 } 506 507 static void 508 if_deactivate_sadl(struct ifnet *ifp) 509 { 510 struct ifaddr *ifa; 511 512 KASSERT(ifp->if_dl != NULL); 513 514 ifa = ifp->if_dl; 515 516 ifp->if_sadl = NULL; 517 518 ifp->if_dl = NULL; 519 ifafree(ifa); 520 } 521 522 static void 523 if_replace_sadl(struct ifnet *ifp, struct ifaddr *ifa) 524 { 525 struct ifaddr *old; 526 527 KASSERT(ifp->if_dl != NULL); 528 529 old = ifp->if_dl; 530 531 ifaref(ifa); 532 /* XXX Update if_dl and if_sadl atomically */ 533 ifp->if_dl = ifa; 534 ifp->if_sadl = satosdl(ifa->ifa_addr); 535 536 ifafree(old); 537 } 538 539 void 540 if_activate_sadl(struct ifnet *ifp, struct ifaddr *ifa0, 541 const struct sockaddr_dl *sdl) 542 { 543 int s, ss; 544 struct ifaddr *ifa; 545 int bound = curlwp_bind(); 546 547 KASSERT(ifa_held(ifa0)); 548 549 s = splsoftnet(); 550 551 if_replace_sadl(ifp, ifa0); 552 553 ss = pserialize_read_enter(); 554 IFADDR_READER_FOREACH(ifa, ifp) { 555 struct psref psref; 556 ifa_acquire(ifa, &psref); 557 pserialize_read_exit(ss); 558 559 rtinit(ifa, RTM_LLINFO_UPD, 0); 560 561 ss = pserialize_read_enter(); 562 ifa_release(ifa, &psref); 563 } 564 pserialize_read_exit(ss); 565 566 splx(s); 567 curlwp_bindx(bound); 568 } 569 570 /* 571 * Free the link level name for the specified interface. This is 572 * a detach helper. This is called from if_detach(). 573 */ 574 static void 575 if_free_sadl(struct ifnet *ifp) 576 { 577 struct ifaddr *ifa; 578 int s; 579 580 ifa = ifp->if_dl; 581 if (ifa == NULL) { 582 KASSERT(ifp->if_sadl == NULL); 583 return; 584 } 585 586 KASSERT(ifp->if_sadl != NULL); 587 588 s = splsoftnet(); 589 rtinit(ifa, RTM_DELETE, 0); 590 ifa_remove(ifp, ifa); 591 if_deactivate_sadl(ifp); 592 if (ifp->if_hwdl == ifa) { 593 ifafree(ifa); 594 ifp->if_hwdl = NULL; 595 } 596 splx(s); 597 } 598 599 static void 600 if_getindex(ifnet_t *ifp) 601 { 602 bool hitlimit = false; 603 604 ifp->if_index_gen = index_gen++; 605 606 ifp->if_index = if_index; 607 if (ifindex2ifnet == NULL) { 608 if_index++; 609 goto skip; 610 } 611 while (if_byindex(ifp->if_index)) { 612 /* 613 * If we hit USHRT_MAX, we skip back to 0 since 614 * there are a number of places where the value 615 * of if_index or if_index itself is compared 616 * to or stored in an unsigned short. By 617 * jumping back, we won't botch those assignments 618 * or comparisons. 619 */ 620 if (++if_index == 0) { 621 if_index = 1; 622 } else if (if_index == USHRT_MAX) { 623 /* 624 * However, if we have to jump back to 625 * zero *twice* without finding an empty 626 * slot in ifindex2ifnet[], then there 627 * there are too many (>65535) interfaces. 628 */ 629 if (hitlimit) { 630 panic("too many interfaces"); 631 } 632 hitlimit = true; 633 if_index = 1; 634 } 635 ifp->if_index = if_index; 636 } 637 skip: 638 /* 639 * ifindex2ifnet is indexed by if_index. Since if_index will 640 * grow dynamically, it should grow too. 641 */ 642 if (ifindex2ifnet == NULL || ifp->if_index >= if_indexlim) { 643 size_t m, n, oldlim; 644 void *q; 645 646 oldlim = if_indexlim; 647 while (ifp->if_index >= if_indexlim) 648 if_indexlim <<= 1; 649 650 /* grow ifindex2ifnet */ 651 m = oldlim * sizeof(struct ifnet *); 652 n = if_indexlim * sizeof(struct ifnet *); 653 q = malloc(n, M_IFADDR, M_WAITOK|M_ZERO); 654 if (ifindex2ifnet != NULL) { 655 memcpy(q, ifindex2ifnet, m); 656 free(ifindex2ifnet, M_IFADDR); 657 } 658 ifindex2ifnet = (struct ifnet **)q; 659 } 660 ifindex2ifnet[ifp->if_index] = ifp; 661 } 662 663 /* 664 * Initialize an interface and assign an index for it. 665 * 666 * It must be called prior to a device specific attach routine 667 * (e.g., ether_ifattach and ieee80211_ifattach) or if_alloc_sadl, 668 * and be followed by if_register: 669 * 670 * if_initialize(ifp); 671 * ether_ifattach(ifp, enaddr); 672 * if_register(ifp); 673 */ 674 int 675 if_initialize(ifnet_t *ifp) 676 { 677 int rv = 0; 678 679 KASSERT(if_indexlim > 0); 680 TAILQ_INIT(&ifp->if_addrlist); 681 682 /* 683 * Link level name is allocated later by a separate call to 684 * if_alloc_sadl(). 685 */ 686 687 if (ifp->if_snd.ifq_maxlen == 0) 688 ifp->if_snd.ifq_maxlen = ifqmaxlen; 689 690 ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */ 691 692 ifp->if_link_state = LINK_STATE_UNKNOWN; 693 ifp->if_link_queue = -1; /* all bits set, see link_state_change() */ 694 695 ifp->if_capenable = 0; 696 ifp->if_csum_flags_tx = 0; 697 ifp->if_csum_flags_rx = 0; 698 699 #ifdef ALTQ 700 ifp->if_snd.altq_type = 0; 701 ifp->if_snd.altq_disc = NULL; 702 ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE; 703 ifp->if_snd.altq_tbr = NULL; 704 ifp->if_snd.altq_ifp = ifp; 705 #endif 706 707 IFQ_LOCK_INIT(&ifp->if_snd); 708 709 ifp->if_pfil = pfil_head_create(PFIL_TYPE_IFNET, ifp); 710 pfil_run_ifhooks(if_pfil, PFIL_IFNET_ATTACH, ifp); 711 712 IF_AFDATA_LOCK_INIT(ifp); 713 714 if (if_is_link_state_changeable(ifp)) { 715 ifp->if_link_si = softint_establish(SOFTINT_NET, 716 if_link_state_change_si, ifp); 717 if (ifp->if_link_si == NULL) { 718 rv = ENOMEM; 719 goto fail; 720 } 721 } 722 723 PSLIST_ENTRY_INIT(ifp, if_pslist_entry); 724 PSLIST_INIT(&ifp->if_addr_pslist); 725 psref_target_init(&ifp->if_psref, ifnet_psref_class); 726 ifp->if_ioctl_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 727 LIST_INIT(&ifp->if_multiaddrs); 728 729 IFNET_LOCK(); 730 if_getindex(ifp); 731 IFNET_UNLOCK(); 732 733 return 0; 734 735 fail: 736 IF_AFDATA_LOCK_DESTROY(ifp); 737 738 pfil_run_ifhooks(if_pfil, PFIL_IFNET_DETACH, ifp); 739 (void)pfil_head_destroy(ifp->if_pfil); 740 741 IFQ_LOCK_DESTROY(&ifp->if_snd); 742 743 return rv; 744 } 745 746 /* 747 * Register an interface to the list of "active" interfaces. 748 */ 749 void 750 if_register(ifnet_t *ifp) 751 { 752 /* 753 * If the driver has not supplied its own if_ioctl, then 754 * supply the default. 755 */ 756 if (ifp->if_ioctl == NULL) 757 ifp->if_ioctl = ifioctl_common; 758 759 sysctl_sndq_setup(&ifp->if_sysctl_log, ifp->if_xname, &ifp->if_snd); 760 761 if (!STAILQ_EMPTY(&domains)) 762 if_attachdomain1(ifp); 763 764 /* Announce the interface. */ 765 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 766 767 if (ifp->if_slowtimo != NULL) { 768 ifp->if_slowtimo_ch = 769 kmem_zalloc(sizeof(*ifp->if_slowtimo_ch), KM_SLEEP); 770 callout_init(ifp->if_slowtimo_ch, 0); 771 callout_setfunc(ifp->if_slowtimo_ch, if_slowtimo, ifp); 772 if_slowtimo(ifp); 773 } 774 775 if (ifp->if_transmit == NULL || ifp->if_transmit == if_nulltransmit) 776 ifp->if_transmit = if_transmit; 777 778 IFNET_LOCK(); 779 TAILQ_INSERT_TAIL(&ifnet_list, ifp, if_list); 780 IFNET_WRITER_INSERT_TAIL(ifp); 781 IFNET_UNLOCK(); 782 } 783 784 /* 785 * The if_percpuq framework 786 * 787 * It allows network device drivers to execute the network stack 788 * in softint (so called softint-based if_input). It utilizes 789 * softint and percpu ifqueue. It doesn't distribute any packets 790 * between CPUs, unlike pktqueue(9). 791 * 792 * Currently we support two options for device drivers to apply the framework: 793 * - Use it implicitly with less changes 794 * - If you use if_attach in driver's _attach function and if_input in 795 * driver's Rx interrupt handler, a packet is queued and a softint handles 796 * the packet implicitly 797 * - Use it explicitly in each driver (recommended) 798 * - You can use if_percpuq_* directly in your driver 799 * - In this case, you need to allocate struct if_percpuq in driver's softc 800 * - See wm(4) as a reference implementation 801 */ 802 803 static void 804 if_percpuq_softint(void *arg) 805 { 806 struct if_percpuq *ipq = arg; 807 struct ifnet *ifp = ipq->ipq_ifp; 808 struct mbuf *m; 809 810 while ((m = if_percpuq_dequeue(ipq)) != NULL) { 811 ifp->if_ipackets++; 812 bpf_mtap(ifp, m); 813 814 ifp->_if_input(ifp, m); 815 } 816 } 817 818 static void 819 if_percpuq_init_ifq(void *p, void *arg __unused, struct cpu_info *ci __unused) 820 { 821 struct ifqueue *const ifq = p; 822 823 memset(ifq, 0, sizeof(*ifq)); 824 ifq->ifq_maxlen = IFQ_MAXLEN; 825 } 826 827 struct if_percpuq * 828 if_percpuq_create(struct ifnet *ifp) 829 { 830 struct if_percpuq *ipq; 831 832 ipq = kmem_zalloc(sizeof(*ipq), KM_SLEEP); 833 ipq->ipq_ifp = ifp; 834 ipq->ipq_si = softint_establish(SOFTINT_NET|SOFTINT_MPSAFE, 835 if_percpuq_softint, ipq); 836 ipq->ipq_ifqs = percpu_alloc(sizeof(struct ifqueue)); 837 percpu_foreach(ipq->ipq_ifqs, &if_percpuq_init_ifq, NULL); 838 839 sysctl_percpuq_setup(&ifp->if_sysctl_log, ifp->if_xname, ipq); 840 841 return ipq; 842 } 843 844 static struct mbuf * 845 if_percpuq_dequeue(struct if_percpuq *ipq) 846 { 847 struct mbuf *m; 848 struct ifqueue *ifq; 849 int s; 850 851 s = splnet(); 852 ifq = percpu_getref(ipq->ipq_ifqs); 853 IF_DEQUEUE(ifq, m); 854 percpu_putref(ipq->ipq_ifqs); 855 splx(s); 856 857 return m; 858 } 859 860 static void 861 if_percpuq_purge_ifq(void *p, void *arg __unused, struct cpu_info *ci __unused) 862 { 863 struct ifqueue *const ifq = p; 864 865 IF_PURGE(ifq); 866 } 867 868 void 869 if_percpuq_destroy(struct if_percpuq *ipq) 870 { 871 872 /* if_detach may already destroy it */ 873 if (ipq == NULL) 874 return; 875 876 softint_disestablish(ipq->ipq_si); 877 percpu_foreach(ipq->ipq_ifqs, &if_percpuq_purge_ifq, NULL); 878 percpu_free(ipq->ipq_ifqs, sizeof(struct ifqueue)); 879 kmem_free(ipq, sizeof(*ipq)); 880 } 881 882 void 883 if_percpuq_enqueue(struct if_percpuq *ipq, struct mbuf *m) 884 { 885 struct ifqueue *ifq; 886 int s; 887 888 KASSERT(ipq != NULL); 889 890 s = splnet(); 891 ifq = percpu_getref(ipq->ipq_ifqs); 892 if (IF_QFULL(ifq)) { 893 IF_DROP(ifq); 894 percpu_putref(ipq->ipq_ifqs); 895 m_freem(m); 896 goto out; 897 } 898 IF_ENQUEUE(ifq, m); 899 percpu_putref(ipq->ipq_ifqs); 900 901 softint_schedule(ipq->ipq_si); 902 out: 903 splx(s); 904 } 905 906 static void 907 if_percpuq_drops(void *p, void *arg, struct cpu_info *ci __unused) 908 { 909 struct ifqueue *const ifq = p; 910 int *sum = arg; 911 912 *sum += ifq->ifq_drops; 913 } 914 915 static int 916 sysctl_percpuq_drops_handler(SYSCTLFN_ARGS) 917 { 918 struct sysctlnode node; 919 struct if_percpuq *ipq; 920 int sum = 0; 921 int error; 922 923 node = *rnode; 924 ipq = node.sysctl_data; 925 926 percpu_foreach(ipq->ipq_ifqs, if_percpuq_drops, &sum); 927 928 node.sysctl_data = ∑ 929 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 930 if (error != 0 || newp == NULL) 931 return error; 932 933 return 0; 934 } 935 936 static void 937 sysctl_percpuq_setup(struct sysctllog **clog, const char* ifname, 938 struct if_percpuq *ipq) 939 { 940 const struct sysctlnode *cnode, *rnode; 941 942 if (sysctl_createv(clog, 0, NULL, &rnode, 943 CTLFLAG_PERMANENT, 944 CTLTYPE_NODE, "interfaces", 945 SYSCTL_DESCR("Per-interface controls"), 946 NULL, 0, NULL, 0, 947 CTL_NET, CTL_CREATE, CTL_EOL) != 0) 948 goto bad; 949 950 if (sysctl_createv(clog, 0, &rnode, &rnode, 951 CTLFLAG_PERMANENT, 952 CTLTYPE_NODE, ifname, 953 SYSCTL_DESCR("Interface controls"), 954 NULL, 0, NULL, 0, 955 CTL_CREATE, CTL_EOL) != 0) 956 goto bad; 957 958 if (sysctl_createv(clog, 0, &rnode, &rnode, 959 CTLFLAG_PERMANENT, 960 CTLTYPE_NODE, "rcvq", 961 SYSCTL_DESCR("Interface input queue controls"), 962 NULL, 0, NULL, 0, 963 CTL_CREATE, CTL_EOL) != 0) 964 goto bad; 965 966 #ifdef NOTYET 967 /* XXX Should show each per-CPU queue length? */ 968 if (sysctl_createv(clog, 0, &rnode, &rnode, 969 CTLFLAG_PERMANENT, 970 CTLTYPE_INT, "len", 971 SYSCTL_DESCR("Current input queue length"), 972 sysctl_percpuq_len, 0, NULL, 0, 973 CTL_CREATE, CTL_EOL) != 0) 974 goto bad; 975 976 if (sysctl_createv(clog, 0, &rnode, &cnode, 977 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 978 CTLTYPE_INT, "maxlen", 979 SYSCTL_DESCR("Maximum allowed input queue length"), 980 sysctl_percpuq_maxlen_handler, 0, (void *)ipq, 0, 981 CTL_CREATE, CTL_EOL) != 0) 982 goto bad; 983 #endif 984 985 if (sysctl_createv(clog, 0, &rnode, &cnode, 986 CTLFLAG_PERMANENT, 987 CTLTYPE_INT, "drops", 988 SYSCTL_DESCR("Total packets dropped due to full input queue"), 989 sysctl_percpuq_drops_handler, 0, (void *)ipq, 0, 990 CTL_CREATE, CTL_EOL) != 0) 991 goto bad; 992 993 return; 994 bad: 995 printf("%s: could not attach sysctl nodes\n", ifname); 996 return; 997 } 998 999 /* 1000 * The deferred if_start framework 1001 * 1002 * The common APIs to defer if_start to softint when if_start is requested 1003 * from a device driver running in hardware interrupt context. 1004 */ 1005 /* 1006 * Call ifp->if_start (or equivalent) in a dedicated softint for 1007 * deferred if_start. 1008 */ 1009 static void 1010 if_deferred_start_softint(void *arg) 1011 { 1012 struct if_deferred_start *ids = arg; 1013 struct ifnet *ifp = ids->ids_ifp; 1014 1015 ids->ids_if_start(ifp); 1016 } 1017 1018 /* 1019 * The default callback function for deferred if_start. 1020 */ 1021 static void 1022 if_deferred_start_common(struct ifnet *ifp) 1023 { 1024 int s; 1025 1026 s = splnet(); 1027 if_start_lock(ifp); 1028 splx(s); 1029 } 1030 1031 static inline bool 1032 if_snd_is_used(struct ifnet *ifp) 1033 { 1034 1035 return ifp->if_transmit == NULL || ifp->if_transmit == if_nulltransmit || 1036 ALTQ_IS_ENABLED(&ifp->if_snd); 1037 } 1038 1039 /* 1040 * Schedule deferred if_start. 1041 */ 1042 void 1043 if_schedule_deferred_start(struct ifnet *ifp) 1044 { 1045 1046 KASSERT(ifp->if_deferred_start != NULL); 1047 1048 if (if_snd_is_used(ifp) && IFQ_IS_EMPTY(&ifp->if_snd)) 1049 return; 1050 1051 softint_schedule(ifp->if_deferred_start->ids_si); 1052 } 1053 1054 /* 1055 * Create an instance of deferred if_start. A driver should call the function 1056 * only if the driver needs deferred if_start. Drivers can setup their own 1057 * deferred if_start function via 2nd argument. 1058 */ 1059 void 1060 if_deferred_start_init(struct ifnet *ifp, void (*func)(struct ifnet *)) 1061 { 1062 struct if_deferred_start *ids; 1063 1064 ids = kmem_zalloc(sizeof(*ids), KM_SLEEP); 1065 ids->ids_ifp = ifp; 1066 ids->ids_si = softint_establish(SOFTINT_NET|SOFTINT_MPSAFE, 1067 if_deferred_start_softint, ids); 1068 if (func != NULL) 1069 ids->ids_if_start = func; 1070 else 1071 ids->ids_if_start = if_deferred_start_common; 1072 1073 ifp->if_deferred_start = ids; 1074 } 1075 1076 static void 1077 if_deferred_start_destroy(struct ifnet *ifp) 1078 { 1079 1080 if (ifp->if_deferred_start == NULL) 1081 return; 1082 1083 softint_disestablish(ifp->if_deferred_start->ids_si); 1084 kmem_free(ifp->if_deferred_start, sizeof(*ifp->if_deferred_start)); 1085 ifp->if_deferred_start = NULL; 1086 } 1087 1088 /* 1089 * The common interface input routine that is called by device drivers, 1090 * which should be used only when the driver's rx handler already runs 1091 * in softint. 1092 */ 1093 void 1094 if_input(struct ifnet *ifp, struct mbuf *m) 1095 { 1096 1097 KASSERT(ifp->if_percpuq == NULL); 1098 KASSERT(!cpu_intr_p()); 1099 1100 ifp->if_ipackets++; 1101 bpf_mtap(ifp, m); 1102 1103 ifp->_if_input(ifp, m); 1104 } 1105 1106 /* 1107 * DEPRECATED. Use if_initialize and if_register instead. 1108 * See the above comment of if_initialize. 1109 * 1110 * Note that it implicitly enables if_percpuq to make drivers easy to 1111 * migrate softint-based if_input without much changes. If you don't 1112 * want to enable it, use if_initialize instead. 1113 */ 1114 int 1115 if_attach(ifnet_t *ifp) 1116 { 1117 int rv; 1118 1119 rv = if_initialize(ifp); 1120 if (rv != 0) 1121 return rv; 1122 1123 ifp->if_percpuq = if_percpuq_create(ifp); 1124 if_register(ifp); 1125 1126 return 0; 1127 } 1128 1129 void 1130 if_attachdomain(void) 1131 { 1132 struct ifnet *ifp; 1133 int s; 1134 int bound = curlwp_bind(); 1135 1136 s = pserialize_read_enter(); 1137 IFNET_READER_FOREACH(ifp) { 1138 struct psref psref; 1139 psref_acquire(&psref, &ifp->if_psref, ifnet_psref_class); 1140 pserialize_read_exit(s); 1141 if_attachdomain1(ifp); 1142 s = pserialize_read_enter(); 1143 psref_release(&psref, &ifp->if_psref, ifnet_psref_class); 1144 } 1145 pserialize_read_exit(s); 1146 curlwp_bindx(bound); 1147 } 1148 1149 static void 1150 if_attachdomain1(struct ifnet *ifp) 1151 { 1152 struct domain *dp; 1153 int s; 1154 1155 s = splsoftnet(); 1156 1157 /* address family dependent data region */ 1158 memset(ifp->if_afdata, 0, sizeof(ifp->if_afdata)); 1159 DOMAIN_FOREACH(dp) { 1160 if (dp->dom_ifattach != NULL) 1161 ifp->if_afdata[dp->dom_family] = 1162 (*dp->dom_ifattach)(ifp); 1163 } 1164 1165 splx(s); 1166 } 1167 1168 /* 1169 * Deactivate an interface. This points all of the procedure 1170 * handles at error stubs. May be called from interrupt context. 1171 */ 1172 void 1173 if_deactivate(struct ifnet *ifp) 1174 { 1175 int s; 1176 1177 s = splsoftnet(); 1178 1179 ifp->if_output = if_nulloutput; 1180 ifp->_if_input = if_nullinput; 1181 ifp->if_start = if_nullstart; 1182 ifp->if_transmit = if_nulltransmit; 1183 ifp->if_ioctl = if_nullioctl; 1184 ifp->if_init = if_nullinit; 1185 ifp->if_stop = if_nullstop; 1186 ifp->if_slowtimo = if_nullslowtimo; 1187 ifp->if_drain = if_nulldrain; 1188 1189 /* No more packets may be enqueued. */ 1190 ifp->if_snd.ifq_maxlen = 0; 1191 1192 splx(s); 1193 } 1194 1195 bool 1196 if_is_deactivated(const struct ifnet *ifp) 1197 { 1198 1199 return ifp->if_output == if_nulloutput; 1200 } 1201 1202 void 1203 if_purgeaddrs(struct ifnet *ifp, int family, void (*purgeaddr)(struct ifaddr *)) 1204 { 1205 struct ifaddr *ifa, *nifa; 1206 int s; 1207 1208 s = pserialize_read_enter(); 1209 for (ifa = IFADDR_READER_FIRST(ifp); ifa; ifa = nifa) { 1210 nifa = IFADDR_READER_NEXT(ifa); 1211 if (ifa->ifa_addr->sa_family != family) 1212 continue; 1213 pserialize_read_exit(s); 1214 1215 (*purgeaddr)(ifa); 1216 1217 s = pserialize_read_enter(); 1218 } 1219 pserialize_read_exit(s); 1220 } 1221 1222 #ifdef IFAREF_DEBUG 1223 static struct ifaddr **ifa_list; 1224 static int ifa_list_size; 1225 1226 /* Depends on only one if_attach runs at once */ 1227 static void 1228 if_build_ifa_list(struct ifnet *ifp) 1229 { 1230 struct ifaddr *ifa; 1231 int i; 1232 1233 KASSERT(ifa_list == NULL); 1234 KASSERT(ifa_list_size == 0); 1235 1236 IFADDR_READER_FOREACH(ifa, ifp) 1237 ifa_list_size++; 1238 1239 ifa_list = kmem_alloc(sizeof(*ifa) * ifa_list_size, KM_SLEEP); 1240 i = 0; 1241 IFADDR_READER_FOREACH(ifa, ifp) { 1242 ifa_list[i++] = ifa; 1243 ifaref(ifa); 1244 } 1245 } 1246 1247 static void 1248 if_check_and_free_ifa_list(struct ifnet *ifp) 1249 { 1250 int i; 1251 struct ifaddr *ifa; 1252 1253 if (ifa_list == NULL) 1254 return; 1255 1256 for (i = 0; i < ifa_list_size; i++) { 1257 char buf[64]; 1258 1259 ifa = ifa_list[i]; 1260 sockaddr_format(ifa->ifa_addr, buf, sizeof(buf)); 1261 if (ifa->ifa_refcnt > 1) { 1262 log(LOG_WARNING, 1263 "ifa(%s) still referenced (refcnt=%d)\n", 1264 buf, ifa->ifa_refcnt - 1); 1265 } else 1266 log(LOG_DEBUG, 1267 "ifa(%s) not referenced (refcnt=%d)\n", 1268 buf, ifa->ifa_refcnt - 1); 1269 ifafree(ifa); 1270 } 1271 1272 kmem_free(ifa_list, sizeof(*ifa) * ifa_list_size); 1273 ifa_list = NULL; 1274 ifa_list_size = 0; 1275 } 1276 #endif 1277 1278 /* 1279 * Detach an interface from the list of "active" interfaces, 1280 * freeing any resources as we go along. 1281 * 1282 * NOTE: This routine must be called with a valid thread context, 1283 * as it may block. 1284 */ 1285 void 1286 if_detach(struct ifnet *ifp) 1287 { 1288 struct socket so; 1289 struct ifaddr *ifa; 1290 #ifdef IFAREF_DEBUG 1291 struct ifaddr *last_ifa = NULL; 1292 #endif 1293 struct domain *dp; 1294 const struct protosw *pr; 1295 int s, i, family, purged; 1296 uint64_t xc; 1297 1298 #ifdef IFAREF_DEBUG 1299 if_build_ifa_list(ifp); 1300 #endif 1301 /* 1302 * XXX It's kind of lame that we have to have the 1303 * XXX socket structure... 1304 */ 1305 memset(&so, 0, sizeof(so)); 1306 1307 s = splnet(); 1308 1309 sysctl_teardown(&ifp->if_sysctl_log); 1310 mutex_enter(ifp->if_ioctl_lock); 1311 if_deactivate(ifp); 1312 mutex_exit(ifp->if_ioctl_lock); 1313 1314 IFNET_LOCK(); 1315 ifindex2ifnet[ifp->if_index] = NULL; 1316 TAILQ_REMOVE(&ifnet_list, ifp, if_list); 1317 IFNET_WRITER_REMOVE(ifp); 1318 pserialize_perform(ifnet_psz); 1319 IFNET_UNLOCK(); 1320 1321 /* Wait for all readers to drain before freeing. */ 1322 psref_target_destroy(&ifp->if_psref, ifnet_psref_class); 1323 PSLIST_ENTRY_DESTROY(ifp, if_pslist_entry); 1324 1325 mutex_obj_free(ifp->if_ioctl_lock); 1326 ifp->if_ioctl_lock = NULL; 1327 1328 if (ifp->if_slowtimo != NULL && ifp->if_slowtimo_ch != NULL) { 1329 ifp->if_slowtimo = NULL; 1330 callout_halt(ifp->if_slowtimo_ch, NULL); 1331 callout_destroy(ifp->if_slowtimo_ch); 1332 kmem_free(ifp->if_slowtimo_ch, sizeof(*ifp->if_slowtimo_ch)); 1333 } 1334 if_deferred_start_destroy(ifp); 1335 1336 /* 1337 * Do an if_down() to give protocols a chance to do something. 1338 */ 1339 if_down(ifp); 1340 1341 #ifdef ALTQ 1342 if (ALTQ_IS_ENABLED(&ifp->if_snd)) 1343 altq_disable(&ifp->if_snd); 1344 if (ALTQ_IS_ATTACHED(&ifp->if_snd)) 1345 altq_detach(&ifp->if_snd); 1346 #endif 1347 1348 mutex_obj_free(ifp->if_snd.ifq_lock); 1349 1350 #if NCARP > 0 1351 /* Remove the interface from any carp group it is a part of. */ 1352 if (ifp->if_carp != NULL && ifp->if_type != IFT_CARP) 1353 carp_ifdetach(ifp); 1354 #endif 1355 1356 /* 1357 * Rip all the addresses off the interface. This should make 1358 * all of the routes go away. 1359 * 1360 * pr_usrreq calls can remove an arbitrary number of ifaddrs 1361 * from the list, including our "cursor", ifa. For safety, 1362 * and to honor the TAILQ abstraction, I just restart the 1363 * loop after each removal. Note that the loop will exit 1364 * when all of the remaining ifaddrs belong to the AF_LINK 1365 * family. I am counting on the historical fact that at 1366 * least one pr_usrreq in each address domain removes at 1367 * least one ifaddr. 1368 */ 1369 again: 1370 /* 1371 * At this point, no other one tries to remove ifa in the list, 1372 * so we don't need to take a lock or psref. 1373 */ 1374 IFADDR_READER_FOREACH(ifa, ifp) { 1375 family = ifa->ifa_addr->sa_family; 1376 #ifdef IFAREF_DEBUG 1377 printf("if_detach: ifaddr %p, family %d, refcnt %d\n", 1378 ifa, family, ifa->ifa_refcnt); 1379 if (last_ifa != NULL && ifa == last_ifa) 1380 panic("if_detach: loop detected"); 1381 last_ifa = ifa; 1382 #endif 1383 if (family == AF_LINK) 1384 continue; 1385 dp = pffinddomain(family); 1386 KASSERTMSG(dp != NULL, "no domain for AF %d", family); 1387 /* 1388 * XXX These PURGEIF calls are redundant with the 1389 * purge-all-families calls below, but are left in for 1390 * now both to make a smaller change, and to avoid 1391 * unplanned interactions with clearing of 1392 * ifp->if_addrlist. 1393 */ 1394 purged = 0; 1395 for (pr = dp->dom_protosw; 1396 pr < dp->dom_protoswNPROTOSW; pr++) { 1397 so.so_proto = pr; 1398 if (pr->pr_usrreqs) { 1399 (void) (*pr->pr_usrreqs->pr_purgeif)(&so, ifp); 1400 purged = 1; 1401 } 1402 } 1403 if (purged == 0) { 1404 /* 1405 * XXX What's really the best thing to do 1406 * XXX here? --thorpej@NetBSD.org 1407 */ 1408 printf("if_detach: WARNING: AF %d not purged\n", 1409 family); 1410 ifa_remove(ifp, ifa); 1411 } 1412 goto again; 1413 } 1414 1415 if_free_sadl(ifp); 1416 1417 /* Delete stray routes from the routing table. */ 1418 for (i = 0; i <= AF_MAX; i++) 1419 rt_delete_matched_entries(i, if_delroute_matcher, ifp); 1420 1421 DOMAIN_FOREACH(dp) { 1422 if (dp->dom_ifdetach != NULL && ifp->if_afdata[dp->dom_family]) 1423 { 1424 void *p = ifp->if_afdata[dp->dom_family]; 1425 if (p) { 1426 ifp->if_afdata[dp->dom_family] = NULL; 1427 (*dp->dom_ifdetach)(ifp, p); 1428 } 1429 } 1430 1431 /* 1432 * One would expect multicast memberships (INET and 1433 * INET6) on UDP sockets to be purged by the PURGEIF 1434 * calls above, but if all addresses were removed from 1435 * the interface prior to destruction, the calls will 1436 * not be made (e.g. ppp, for which pppd(8) generally 1437 * removes addresses before destroying the interface). 1438 * Because there is no invariant that multicast 1439 * memberships only exist for interfaces with IPv4 1440 * addresses, we must call PURGEIF regardless of 1441 * addresses. (Protocols which might store ifnet 1442 * pointers are marked with PR_PURGEIF.) 1443 */ 1444 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) { 1445 so.so_proto = pr; 1446 if (pr->pr_usrreqs && pr->pr_flags & PR_PURGEIF) 1447 (void)(*pr->pr_usrreqs->pr_purgeif)(&so, ifp); 1448 } 1449 } 1450 1451 pfil_run_ifhooks(if_pfil, PFIL_IFNET_DETACH, ifp); 1452 (void)pfil_head_destroy(ifp->if_pfil); 1453 1454 /* Announce that the interface is gone. */ 1455 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 1456 1457 IF_AFDATA_LOCK_DESTROY(ifp); 1458 1459 if (if_is_link_state_changeable(ifp)) { 1460 softint_disestablish(ifp->if_link_si); 1461 ifp->if_link_si = NULL; 1462 } 1463 1464 /* 1465 * remove packets that came from ifp, from software interrupt queues. 1466 */ 1467 DOMAIN_FOREACH(dp) { 1468 for (i = 0; i < __arraycount(dp->dom_ifqueues); i++) { 1469 struct ifqueue *iq = dp->dom_ifqueues[i]; 1470 if (iq == NULL) 1471 break; 1472 dp->dom_ifqueues[i] = NULL; 1473 if_detach_queues(ifp, iq); 1474 } 1475 } 1476 1477 /* 1478 * IP queues have to be processed separately: net-queue barrier 1479 * ensures that the packets are dequeued while a cross-call will 1480 * ensure that the interrupts have completed. FIXME: not quite.. 1481 */ 1482 #ifdef INET 1483 pktq_barrier(ip_pktq); 1484 #endif 1485 #ifdef INET6 1486 if (in6_present) 1487 pktq_barrier(ip6_pktq); 1488 #endif 1489 xc = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL); 1490 xc_wait(xc); 1491 1492 if (ifp->if_percpuq != NULL) { 1493 if_percpuq_destroy(ifp->if_percpuq); 1494 ifp->if_percpuq = NULL; 1495 } 1496 1497 splx(s); 1498 1499 #ifdef IFAREF_DEBUG 1500 if_check_and_free_ifa_list(ifp); 1501 #endif 1502 } 1503 1504 static void 1505 if_detach_queues(struct ifnet *ifp, struct ifqueue *q) 1506 { 1507 struct mbuf *m, *prev, *next; 1508 1509 prev = NULL; 1510 for (m = q->ifq_head; m != NULL; m = next) { 1511 KASSERT((m->m_flags & M_PKTHDR) != 0); 1512 1513 next = m->m_nextpkt; 1514 if (m->m_pkthdr.rcvif_index != ifp->if_index) { 1515 prev = m; 1516 continue; 1517 } 1518 1519 if (prev != NULL) 1520 prev->m_nextpkt = m->m_nextpkt; 1521 else 1522 q->ifq_head = m->m_nextpkt; 1523 if (q->ifq_tail == m) 1524 q->ifq_tail = prev; 1525 q->ifq_len--; 1526 1527 m->m_nextpkt = NULL; 1528 m_freem(m); 1529 IF_DROP(q); 1530 } 1531 } 1532 1533 /* 1534 * Callback for a radix tree walk to delete all references to an 1535 * ifnet. 1536 */ 1537 static int 1538 if_delroute_matcher(struct rtentry *rt, void *v) 1539 { 1540 struct ifnet *ifp = (struct ifnet *)v; 1541 1542 if (rt->rt_ifp == ifp) 1543 return 1; 1544 else 1545 return 0; 1546 } 1547 1548 /* 1549 * Create a clone network interface. 1550 */ 1551 static int 1552 if_clone_create(const char *name) 1553 { 1554 struct if_clone *ifc; 1555 int unit; 1556 struct ifnet *ifp; 1557 struct psref psref; 1558 1559 KASSERT(mutex_owned(&if_clone_mtx)); 1560 1561 ifc = if_clone_lookup(name, &unit); 1562 if (ifc == NULL) 1563 return EINVAL; 1564 1565 ifp = if_get(name, &psref); 1566 if (ifp != NULL) { 1567 if_put(ifp, &psref); 1568 return EEXIST; 1569 } 1570 1571 return (*ifc->ifc_create)(ifc, unit); 1572 } 1573 1574 /* 1575 * Destroy a clone network interface. 1576 */ 1577 static int 1578 if_clone_destroy(const char *name) 1579 { 1580 struct if_clone *ifc; 1581 struct ifnet *ifp; 1582 struct psref psref; 1583 1584 KASSERT(mutex_owned(&if_clone_mtx)); 1585 1586 ifc = if_clone_lookup(name, NULL); 1587 if (ifc == NULL) 1588 return EINVAL; 1589 1590 if (ifc->ifc_destroy == NULL) 1591 return EOPNOTSUPP; 1592 1593 ifp = if_get(name, &psref); 1594 if (ifp == NULL) 1595 return ENXIO; 1596 1597 /* We have to disable ioctls here */ 1598 mutex_enter(ifp->if_ioctl_lock); 1599 ifp->if_ioctl = if_nullioctl; 1600 mutex_exit(ifp->if_ioctl_lock); 1601 1602 /* 1603 * We cannot call ifc_destroy with holding ifp. 1604 * Releasing ifp here is safe thanks to if_clone_mtx. 1605 */ 1606 if_put(ifp, &psref); 1607 1608 return (*ifc->ifc_destroy)(ifp); 1609 } 1610 1611 static bool 1612 if_is_unit(const char *name) 1613 { 1614 1615 while(*name != '\0') { 1616 if (*name < '0' || *name > '9') 1617 return false; 1618 name++; 1619 } 1620 1621 return true; 1622 } 1623 1624 /* 1625 * Look up a network interface cloner. 1626 */ 1627 static struct if_clone * 1628 if_clone_lookup(const char *name, int *unitp) 1629 { 1630 struct if_clone *ifc; 1631 const char *cp; 1632 char *dp, ifname[IFNAMSIZ + 3]; 1633 int unit; 1634 1635 KASSERT(mutex_owned(&if_clone_mtx)); 1636 1637 strcpy(ifname, "if_"); 1638 /* separate interface name from unit */ 1639 /* TODO: search unit number from backward */ 1640 for (dp = ifname + 3, cp = name; cp - name < IFNAMSIZ && 1641 *cp && !if_is_unit(cp);) 1642 *dp++ = *cp++; 1643 1644 if (cp == name || cp - name == IFNAMSIZ || !*cp) 1645 return NULL; /* No name or unit number */ 1646 *dp++ = '\0'; 1647 1648 again: 1649 LIST_FOREACH(ifc, &if_cloners, ifc_list) { 1650 if (strcmp(ifname + 3, ifc->ifc_name) == 0) 1651 break; 1652 } 1653 1654 if (ifc == NULL) { 1655 int error; 1656 if (*ifname == '\0') 1657 return NULL; 1658 mutex_exit(&if_clone_mtx); 1659 error = module_autoload(ifname, MODULE_CLASS_DRIVER); 1660 mutex_enter(&if_clone_mtx); 1661 if (error) 1662 return NULL; 1663 *ifname = '\0'; 1664 goto again; 1665 } 1666 1667 unit = 0; 1668 while (cp - name < IFNAMSIZ && *cp) { 1669 if (*cp < '0' || *cp > '9' || unit >= INT_MAX / 10) { 1670 /* Bogus unit number. */ 1671 return NULL; 1672 } 1673 unit = (unit * 10) + (*cp++ - '0'); 1674 } 1675 1676 if (unitp != NULL) 1677 *unitp = unit; 1678 return ifc; 1679 } 1680 1681 /* 1682 * Register a network interface cloner. 1683 */ 1684 void 1685 if_clone_attach(struct if_clone *ifc) 1686 { 1687 1688 mutex_enter(&if_clone_mtx); 1689 LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list); 1690 if_cloners_count++; 1691 mutex_exit(&if_clone_mtx); 1692 } 1693 1694 /* 1695 * Unregister a network interface cloner. 1696 */ 1697 void 1698 if_clone_detach(struct if_clone *ifc) 1699 { 1700 1701 mutex_enter(&if_clone_mtx); 1702 LIST_REMOVE(ifc, ifc_list); 1703 if_cloners_count--; 1704 mutex_exit(&if_clone_mtx); 1705 } 1706 1707 /* 1708 * Provide list of interface cloners to userspace. 1709 */ 1710 int 1711 if_clone_list(int buf_count, char *buffer, int *total) 1712 { 1713 char outbuf[IFNAMSIZ], *dst; 1714 struct if_clone *ifc; 1715 int count, error = 0; 1716 1717 mutex_enter(&if_clone_mtx); 1718 *total = if_cloners_count; 1719 if ((dst = buffer) == NULL) { 1720 /* Just asking how many there are. */ 1721 goto out; 1722 } 1723 1724 if (buf_count < 0) { 1725 error = EINVAL; 1726 goto out; 1727 } 1728 1729 count = (if_cloners_count < buf_count) ? 1730 if_cloners_count : buf_count; 1731 1732 for (ifc = LIST_FIRST(&if_cloners); ifc != NULL && count != 0; 1733 ifc = LIST_NEXT(ifc, ifc_list), count--, dst += IFNAMSIZ) { 1734 (void)strncpy(outbuf, ifc->ifc_name, sizeof(outbuf)); 1735 if (outbuf[sizeof(outbuf) - 1] != '\0') { 1736 error = ENAMETOOLONG; 1737 goto out; 1738 } 1739 error = copyout(outbuf, dst, sizeof(outbuf)); 1740 if (error != 0) 1741 break; 1742 } 1743 1744 out: 1745 mutex_exit(&if_clone_mtx); 1746 return error; 1747 } 1748 1749 void 1750 ifa_psref_init(struct ifaddr *ifa) 1751 { 1752 1753 psref_target_init(&ifa->ifa_psref, ifa_psref_class); 1754 } 1755 1756 void 1757 ifaref(struct ifaddr *ifa) 1758 { 1759 KASSERT(!ISSET(ifa->ifa_flags, IFA_DESTROYING)); 1760 ifa->ifa_refcnt++; 1761 } 1762 1763 void 1764 ifafree(struct ifaddr *ifa) 1765 { 1766 KASSERT(ifa != NULL); 1767 KASSERT(ifa->ifa_refcnt > 0); 1768 1769 if (--ifa->ifa_refcnt == 0) { 1770 free(ifa, M_IFADDR); 1771 } 1772 } 1773 1774 bool 1775 ifa_is_destroying(struct ifaddr *ifa) 1776 { 1777 1778 return ISSET(ifa->ifa_flags, IFA_DESTROYING); 1779 } 1780 1781 void 1782 ifa_insert(struct ifnet *ifp, struct ifaddr *ifa) 1783 { 1784 1785 ifa->ifa_ifp = ifp; 1786 1787 IFNET_LOCK(); 1788 TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list); 1789 IFADDR_ENTRY_INIT(ifa); 1790 IFADDR_WRITER_INSERT_TAIL(ifp, ifa); 1791 IFNET_UNLOCK(); 1792 1793 ifaref(ifa); 1794 } 1795 1796 void 1797 ifa_remove(struct ifnet *ifp, struct ifaddr *ifa) 1798 { 1799 1800 KASSERT(ifa->ifa_ifp == ifp); 1801 1802 IFNET_LOCK(); 1803 TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list); 1804 IFADDR_WRITER_REMOVE(ifa); 1805 #ifdef NET_MPSAFE 1806 pserialize_perform(ifnet_psz); 1807 #endif 1808 IFNET_UNLOCK(); 1809 1810 #ifdef NET_MPSAFE 1811 psref_target_destroy(&ifa->ifa_psref, ifa_psref_class); 1812 #endif 1813 IFADDR_ENTRY_DESTROY(ifa); 1814 ifafree(ifa); 1815 } 1816 1817 void 1818 ifa_acquire(struct ifaddr *ifa, struct psref *psref) 1819 { 1820 1821 psref_acquire(psref, &ifa->ifa_psref, ifa_psref_class); 1822 } 1823 1824 void 1825 ifa_release(struct ifaddr *ifa, struct psref *psref) 1826 { 1827 1828 if (ifa == NULL) 1829 return; 1830 1831 psref_release(psref, &ifa->ifa_psref, ifa_psref_class); 1832 } 1833 1834 bool 1835 ifa_held(struct ifaddr *ifa) 1836 { 1837 1838 return psref_held(&ifa->ifa_psref, ifa_psref_class); 1839 } 1840 1841 static inline int 1842 equal(const struct sockaddr *sa1, const struct sockaddr *sa2) 1843 { 1844 return sockaddr_cmp(sa1, sa2) == 0; 1845 } 1846 1847 /* 1848 * Locate an interface based on a complete address. 1849 */ 1850 /*ARGSUSED*/ 1851 struct ifaddr * 1852 ifa_ifwithaddr(const struct sockaddr *addr) 1853 { 1854 struct ifnet *ifp; 1855 struct ifaddr *ifa; 1856 1857 IFNET_READER_FOREACH(ifp) { 1858 if (if_is_deactivated(ifp)) 1859 continue; 1860 IFADDR_READER_FOREACH(ifa, ifp) { 1861 if (ifa->ifa_addr->sa_family != addr->sa_family) 1862 continue; 1863 if (equal(addr, ifa->ifa_addr)) 1864 return ifa; 1865 if ((ifp->if_flags & IFF_BROADCAST) && 1866 ifa->ifa_broadaddr && 1867 /* IP6 doesn't have broadcast */ 1868 ifa->ifa_broadaddr->sa_len != 0 && 1869 equal(ifa->ifa_broadaddr, addr)) 1870 return ifa; 1871 } 1872 } 1873 return NULL; 1874 } 1875 1876 struct ifaddr * 1877 ifa_ifwithaddr_psref(const struct sockaddr *addr, struct psref *psref) 1878 { 1879 struct ifaddr *ifa; 1880 int s = pserialize_read_enter(); 1881 1882 ifa = ifa_ifwithaddr(addr); 1883 if (ifa != NULL) 1884 ifa_acquire(ifa, psref); 1885 pserialize_read_exit(s); 1886 1887 return ifa; 1888 } 1889 1890 /* 1891 * Locate the point to point interface with a given destination address. 1892 */ 1893 /*ARGSUSED*/ 1894 struct ifaddr * 1895 ifa_ifwithdstaddr(const struct sockaddr *addr) 1896 { 1897 struct ifnet *ifp; 1898 struct ifaddr *ifa; 1899 1900 IFNET_READER_FOREACH(ifp) { 1901 if (if_is_deactivated(ifp)) 1902 continue; 1903 if ((ifp->if_flags & IFF_POINTOPOINT) == 0) 1904 continue; 1905 IFADDR_READER_FOREACH(ifa, ifp) { 1906 if (ifa->ifa_addr->sa_family != addr->sa_family || 1907 ifa->ifa_dstaddr == NULL) 1908 continue; 1909 if (equal(addr, ifa->ifa_dstaddr)) 1910 return ifa; 1911 } 1912 } 1913 1914 return NULL; 1915 } 1916 1917 struct ifaddr * 1918 ifa_ifwithdstaddr_psref(const struct sockaddr *addr, struct psref *psref) 1919 { 1920 struct ifaddr *ifa; 1921 int s; 1922 1923 s = pserialize_read_enter(); 1924 ifa = ifa_ifwithdstaddr(addr); 1925 if (ifa != NULL) 1926 ifa_acquire(ifa, psref); 1927 pserialize_read_exit(s); 1928 1929 return ifa; 1930 } 1931 1932 /* 1933 * Find an interface on a specific network. If many, choice 1934 * is most specific found. 1935 */ 1936 struct ifaddr * 1937 ifa_ifwithnet(const struct sockaddr *addr) 1938 { 1939 struct ifnet *ifp; 1940 struct ifaddr *ifa, *ifa_maybe = NULL; 1941 const struct sockaddr_dl *sdl; 1942 u_int af = addr->sa_family; 1943 const char *addr_data = addr->sa_data, *cplim; 1944 1945 if (af == AF_LINK) { 1946 sdl = satocsdl(addr); 1947 if (sdl->sdl_index && sdl->sdl_index < if_indexlim && 1948 ifindex2ifnet[sdl->sdl_index] && 1949 !if_is_deactivated(ifindex2ifnet[sdl->sdl_index])) { 1950 return ifindex2ifnet[sdl->sdl_index]->if_dl; 1951 } 1952 } 1953 #ifdef NETATALK 1954 if (af == AF_APPLETALK) { 1955 const struct sockaddr_at *sat, *sat2; 1956 sat = (const struct sockaddr_at *)addr; 1957 IFNET_READER_FOREACH(ifp) { 1958 if (if_is_deactivated(ifp)) 1959 continue; 1960 ifa = at_ifawithnet((const struct sockaddr_at *)addr, ifp); 1961 if (ifa == NULL) 1962 continue; 1963 sat2 = (struct sockaddr_at *)ifa->ifa_addr; 1964 if (sat2->sat_addr.s_net == sat->sat_addr.s_net) 1965 return ifa; /* exact match */ 1966 if (ifa_maybe == NULL) { 1967 /* else keep the if with the right range */ 1968 ifa_maybe = ifa; 1969 } 1970 } 1971 return ifa_maybe; 1972 } 1973 #endif 1974 IFNET_READER_FOREACH(ifp) { 1975 if (if_is_deactivated(ifp)) 1976 continue; 1977 IFADDR_READER_FOREACH(ifa, ifp) { 1978 const char *cp, *cp2, *cp3; 1979 1980 if (ifa->ifa_addr->sa_family != af || 1981 ifa->ifa_netmask == NULL) 1982 next: continue; 1983 cp = addr_data; 1984 cp2 = ifa->ifa_addr->sa_data; 1985 cp3 = ifa->ifa_netmask->sa_data; 1986 cplim = (const char *)ifa->ifa_netmask + 1987 ifa->ifa_netmask->sa_len; 1988 while (cp3 < cplim) { 1989 if ((*cp++ ^ *cp2++) & *cp3++) { 1990 /* want to continue for() loop */ 1991 goto next; 1992 } 1993 } 1994 if (ifa_maybe == NULL || 1995 rt_refines(ifa->ifa_netmask, 1996 ifa_maybe->ifa_netmask)) 1997 ifa_maybe = ifa; 1998 } 1999 } 2000 return ifa_maybe; 2001 } 2002 2003 struct ifaddr * 2004 ifa_ifwithnet_psref(const struct sockaddr *addr, struct psref *psref) 2005 { 2006 struct ifaddr *ifa; 2007 int s; 2008 2009 s = pserialize_read_enter(); 2010 ifa = ifa_ifwithnet(addr); 2011 if (ifa != NULL) 2012 ifa_acquire(ifa, psref); 2013 pserialize_read_exit(s); 2014 2015 return ifa; 2016 } 2017 2018 /* 2019 * Find the interface of the addresss. 2020 */ 2021 struct ifaddr * 2022 ifa_ifwithladdr(const struct sockaddr *addr) 2023 { 2024 struct ifaddr *ia; 2025 2026 if ((ia = ifa_ifwithaddr(addr)) || (ia = ifa_ifwithdstaddr(addr)) || 2027 (ia = ifa_ifwithnet(addr))) 2028 return ia; 2029 return NULL; 2030 } 2031 2032 struct ifaddr * 2033 ifa_ifwithladdr_psref(const struct sockaddr *addr, struct psref *psref) 2034 { 2035 struct ifaddr *ifa; 2036 int s; 2037 2038 s = pserialize_read_enter(); 2039 ifa = ifa_ifwithladdr(addr); 2040 if (ifa != NULL) 2041 ifa_acquire(ifa, psref); 2042 pserialize_read_exit(s); 2043 2044 return ifa; 2045 } 2046 2047 /* 2048 * Find an interface using a specific address family 2049 */ 2050 struct ifaddr * 2051 ifa_ifwithaf(int af) 2052 { 2053 struct ifnet *ifp; 2054 struct ifaddr *ifa = NULL; 2055 int s; 2056 2057 s = pserialize_read_enter(); 2058 IFNET_READER_FOREACH(ifp) { 2059 if (if_is_deactivated(ifp)) 2060 continue; 2061 IFADDR_READER_FOREACH(ifa, ifp) { 2062 if (ifa->ifa_addr->sa_family == af) 2063 goto out; 2064 } 2065 } 2066 out: 2067 pserialize_read_exit(s); 2068 return ifa; 2069 } 2070 2071 /* 2072 * Find an interface address specific to an interface best matching 2073 * a given address. 2074 */ 2075 struct ifaddr * 2076 ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp) 2077 { 2078 struct ifaddr *ifa; 2079 const char *cp, *cp2, *cp3; 2080 const char *cplim; 2081 struct ifaddr *ifa_maybe = 0; 2082 u_int af = addr->sa_family; 2083 2084 if (if_is_deactivated(ifp)) 2085 return NULL; 2086 2087 if (af >= AF_MAX) 2088 return NULL; 2089 2090 IFADDR_READER_FOREACH(ifa, ifp) { 2091 if (ifa->ifa_addr->sa_family != af) 2092 continue; 2093 ifa_maybe = ifa; 2094 if (ifa->ifa_netmask == NULL) { 2095 if (equal(addr, ifa->ifa_addr) || 2096 (ifa->ifa_dstaddr && 2097 equal(addr, ifa->ifa_dstaddr))) 2098 return ifa; 2099 continue; 2100 } 2101 cp = addr->sa_data; 2102 cp2 = ifa->ifa_addr->sa_data; 2103 cp3 = ifa->ifa_netmask->sa_data; 2104 cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; 2105 for (; cp3 < cplim; cp3++) { 2106 if ((*cp++ ^ *cp2++) & *cp3) 2107 break; 2108 } 2109 if (cp3 == cplim) 2110 return ifa; 2111 } 2112 return ifa_maybe; 2113 } 2114 2115 struct ifaddr * 2116 ifaof_ifpforaddr_psref(const struct sockaddr *addr, struct ifnet *ifp, 2117 struct psref *psref) 2118 { 2119 struct ifaddr *ifa; 2120 int s; 2121 2122 s = pserialize_read_enter(); 2123 ifa = ifaof_ifpforaddr(addr, ifp); 2124 if (ifa != NULL) 2125 ifa_acquire(ifa, psref); 2126 pserialize_read_exit(s); 2127 2128 return ifa; 2129 } 2130 2131 /* 2132 * Default action when installing a route with a Link Level gateway. 2133 * Lookup an appropriate real ifa to point to. 2134 * This should be moved to /sys/net/link.c eventually. 2135 */ 2136 void 2137 link_rtrequest(int cmd, struct rtentry *rt, const struct rt_addrinfo *info) 2138 { 2139 struct ifaddr *ifa; 2140 const struct sockaddr *dst; 2141 struct ifnet *ifp; 2142 struct psref psref; 2143 2144 if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL || 2145 (ifp = ifa->ifa_ifp) == NULL || (dst = rt_getkey(rt)) == NULL) 2146 return; 2147 if ((ifa = ifaof_ifpforaddr_psref(dst, ifp, &psref)) != NULL) { 2148 rt_replace_ifa(rt, ifa); 2149 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) 2150 ifa->ifa_rtrequest(cmd, rt, info); 2151 ifa_release(ifa, &psref); 2152 } 2153 } 2154 2155 /* 2156 * bitmask macros to manage a densely packed link_state change queue. 2157 * Because we need to store LINK_STATE_UNKNOWN(0), LINK_STATE_DOWN(1) and 2158 * LINK_STATE_UP(2) we need 2 bits for each state change. 2159 * As a state change to store is 0, treat all bits set as an unset item. 2160 */ 2161 #define LQ_ITEM_BITS 2 2162 #define LQ_ITEM_MASK ((1 << LQ_ITEM_BITS) - 1) 2163 #define LQ_MASK(i) (LQ_ITEM_MASK << (i) * LQ_ITEM_BITS) 2164 #define LINK_STATE_UNSET LQ_ITEM_MASK 2165 #define LQ_ITEM(q, i) (((q) & LQ_MASK((i))) >> (i) * LQ_ITEM_BITS) 2166 #define LQ_STORE(q, i, v) \ 2167 do { \ 2168 (q) &= ~LQ_MASK((i)); \ 2169 (q) |= (v) << (i) * LQ_ITEM_BITS; \ 2170 } while (0 /* CONSTCOND */) 2171 #define LQ_MAX(q) ((sizeof((q)) * NBBY) / LQ_ITEM_BITS) 2172 #define LQ_POP(q, v) \ 2173 do { \ 2174 (v) = LQ_ITEM((q), 0); \ 2175 (q) >>= LQ_ITEM_BITS; \ 2176 (q) |= LINK_STATE_UNSET << (LQ_MAX((q)) - 1) * LQ_ITEM_BITS; \ 2177 } while (0 /* CONSTCOND */) 2178 #define LQ_PUSH(q, v) \ 2179 do { \ 2180 (q) >>= LQ_ITEM_BITS; \ 2181 (q) |= (v) << (LQ_MAX((q)) - 1) * LQ_ITEM_BITS; \ 2182 } while (0 /* CONSTCOND */) 2183 #define LQ_FIND_UNSET(q, i) \ 2184 for ((i) = 0; i < LQ_MAX((q)); (i)++) { \ 2185 if (LQ_ITEM((q), (i)) == LINK_STATE_UNSET) \ 2186 break; \ 2187 } 2188 /* 2189 * Handle a change in the interface link state and 2190 * queue notifications. 2191 */ 2192 void 2193 if_link_state_change(struct ifnet *ifp, int link_state) 2194 { 2195 int s, idx; 2196 2197 KASSERTMSG(if_is_link_state_changeable(ifp), 2198 "%s: IFEF_NO_LINK_STATE_CHANGE must not be set, but if_extflags=0x%x", 2199 ifp->if_xname, ifp->if_extflags); 2200 2201 /* Ensure change is to a valid state */ 2202 switch (link_state) { 2203 case LINK_STATE_UNKNOWN: /* FALLTHROUGH */ 2204 case LINK_STATE_DOWN: /* FALLTHROUGH */ 2205 case LINK_STATE_UP: 2206 break; 2207 default: 2208 #ifdef DEBUG 2209 printf("%s: invalid link state %d\n", 2210 ifp->if_xname, link_state); 2211 #endif 2212 return; 2213 } 2214 2215 s = splnet(); 2216 2217 /* Find the last unset event in the queue. */ 2218 LQ_FIND_UNSET(ifp->if_link_queue, idx); 2219 2220 /* 2221 * Ensure link_state doesn't match the last event in the queue. 2222 * ifp->if_link_state is not checked and set here because 2223 * that would present an inconsistent picture to the system. 2224 */ 2225 if (idx != 0 && 2226 LQ_ITEM(ifp->if_link_queue, idx - 1) == (uint8_t)link_state) 2227 goto out; 2228 2229 /* Handle queue overflow. */ 2230 if (idx == LQ_MAX(ifp->if_link_queue)) { 2231 uint8_t lost; 2232 2233 /* 2234 * The DOWN state must be protected from being pushed off 2235 * the queue to ensure that userland will always be 2236 * in a sane state. 2237 * Because DOWN is protected, there is no need to protect 2238 * UNKNOWN. 2239 * It should be invalid to change from any other state to 2240 * UNKNOWN anyway ... 2241 */ 2242 lost = LQ_ITEM(ifp->if_link_queue, 0); 2243 LQ_PUSH(ifp->if_link_queue, (uint8_t)link_state); 2244 if (lost == LINK_STATE_DOWN) { 2245 lost = LQ_ITEM(ifp->if_link_queue, 0); 2246 LQ_STORE(ifp->if_link_queue, 0, LINK_STATE_DOWN); 2247 } 2248 printf("%s: lost link state change %s\n", 2249 ifp->if_xname, 2250 lost == LINK_STATE_UP ? "UP" : 2251 lost == LINK_STATE_DOWN ? "DOWN" : 2252 "UNKNOWN"); 2253 } else 2254 LQ_STORE(ifp->if_link_queue, idx, (uint8_t)link_state); 2255 2256 softint_schedule(ifp->if_link_si); 2257 2258 out: 2259 splx(s); 2260 } 2261 2262 /* 2263 * Handle interface link state change notifications. 2264 */ 2265 void 2266 if_link_state_change_softint(struct ifnet *ifp, int link_state) 2267 { 2268 struct domain *dp; 2269 int s = splnet(); 2270 2271 KASSERT(!cpu_intr_p()); 2272 2273 /* Ensure the change is still valid. */ 2274 if (ifp->if_link_state == link_state) { 2275 splx(s); 2276 return; 2277 } 2278 2279 #ifdef DEBUG 2280 log(LOG_DEBUG, "%s: link state %s (was %s)\n", ifp->if_xname, 2281 link_state == LINK_STATE_UP ? "UP" : 2282 link_state == LINK_STATE_DOWN ? "DOWN" : 2283 "UNKNOWN", 2284 ifp->if_link_state == LINK_STATE_UP ? "UP" : 2285 ifp->if_link_state == LINK_STATE_DOWN ? "DOWN" : 2286 "UNKNOWN"); 2287 #endif 2288 2289 /* 2290 * When going from UNKNOWN to UP, we need to mark existing 2291 * addresses as tentative and restart DAD as we may have 2292 * erroneously not found a duplicate. 2293 * 2294 * This needs to happen before rt_ifmsg to avoid a race where 2295 * listeners would have an address and expect it to work right 2296 * away. 2297 */ 2298 if (link_state == LINK_STATE_UP && 2299 ifp->if_link_state == LINK_STATE_UNKNOWN) 2300 { 2301 DOMAIN_FOREACH(dp) { 2302 if (dp->dom_if_link_state_change != NULL) 2303 dp->dom_if_link_state_change(ifp, 2304 LINK_STATE_DOWN); 2305 } 2306 } 2307 2308 ifp->if_link_state = link_state; 2309 2310 /* Notify that the link state has changed. */ 2311 rt_ifmsg(ifp); 2312 2313 #if NCARP > 0 2314 if (ifp->if_carp) 2315 carp_carpdev_state(ifp); 2316 #endif 2317 2318 DOMAIN_FOREACH(dp) { 2319 if (dp->dom_if_link_state_change != NULL) 2320 dp->dom_if_link_state_change(ifp, link_state); 2321 } 2322 splx(s); 2323 } 2324 2325 /* 2326 * Process the interface link state change queue. 2327 */ 2328 static void 2329 if_link_state_change_si(void *arg) 2330 { 2331 struct ifnet *ifp = arg; 2332 int s; 2333 uint8_t state; 2334 2335 SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE(); 2336 s = splnet(); 2337 2338 /* Pop a link state change from the queue and process it. */ 2339 LQ_POP(ifp->if_link_queue, state); 2340 if_link_state_change_softint(ifp, state); 2341 2342 /* If there is a link state change to come, schedule it. */ 2343 if (LQ_ITEM(ifp->if_link_queue, 0) != LINK_STATE_UNSET) 2344 softint_schedule(ifp->if_link_si); 2345 2346 splx(s); 2347 SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); 2348 } 2349 2350 /* 2351 * Default action when installing a local route on a point-to-point 2352 * interface. 2353 */ 2354 void 2355 p2p_rtrequest(int req, struct rtentry *rt, 2356 __unused const struct rt_addrinfo *info) 2357 { 2358 struct ifnet *ifp = rt->rt_ifp; 2359 struct ifaddr *ifa, *lo0ifa; 2360 int s = pserialize_read_enter(); 2361 2362 switch (req) { 2363 case RTM_ADD: 2364 if ((rt->rt_flags & RTF_LOCAL) == 0) 2365 break; 2366 2367 rt->rt_ifp = lo0ifp; 2368 2369 IFADDR_READER_FOREACH(ifa, ifp) { 2370 if (equal(rt_getkey(rt), ifa->ifa_addr)) 2371 break; 2372 } 2373 if (ifa == NULL) 2374 break; 2375 2376 /* 2377 * Ensure lo0 has an address of the same family. 2378 */ 2379 IFADDR_READER_FOREACH(lo0ifa, lo0ifp) { 2380 if (lo0ifa->ifa_addr->sa_family == 2381 ifa->ifa_addr->sa_family) 2382 break; 2383 } 2384 if (lo0ifa == NULL) 2385 break; 2386 2387 /* 2388 * Make sure to set rt->rt_ifa to the interface 2389 * address we are using, otherwise we will have trouble 2390 * with source address selection. 2391 */ 2392 if (ifa != rt->rt_ifa) 2393 rt_replace_ifa(rt, ifa); 2394 break; 2395 case RTM_DELETE: 2396 default: 2397 break; 2398 } 2399 pserialize_read_exit(s); 2400 } 2401 2402 /* 2403 * Mark an interface down and notify protocols of 2404 * the transition. 2405 * NOTE: must be called at splsoftnet or equivalent. 2406 */ 2407 void 2408 if_down(struct ifnet *ifp) 2409 { 2410 struct ifaddr *ifa; 2411 struct domain *dp; 2412 int s, bound; 2413 struct psref psref; 2414 2415 ifp->if_flags &= ~IFF_UP; 2416 nanotime(&ifp->if_lastchange); 2417 2418 bound = curlwp_bind(); 2419 s = pserialize_read_enter(); 2420 IFADDR_READER_FOREACH(ifa, ifp) { 2421 ifa_acquire(ifa, &psref); 2422 pserialize_read_exit(s); 2423 2424 pfctlinput(PRC_IFDOWN, ifa->ifa_addr); 2425 2426 s = pserialize_read_enter(); 2427 ifa_release(ifa, &psref); 2428 } 2429 pserialize_read_exit(s); 2430 curlwp_bindx(bound); 2431 2432 IFQ_PURGE(&ifp->if_snd); 2433 #if NCARP > 0 2434 if (ifp->if_carp) 2435 carp_carpdev_state(ifp); 2436 #endif 2437 rt_ifmsg(ifp); 2438 DOMAIN_FOREACH(dp) { 2439 if (dp->dom_if_down) 2440 dp->dom_if_down(ifp); 2441 } 2442 } 2443 2444 /* 2445 * Mark an interface up and notify protocols of 2446 * the transition. 2447 * NOTE: must be called at splsoftnet or equivalent. 2448 */ 2449 void 2450 if_up(struct ifnet *ifp) 2451 { 2452 #ifdef notyet 2453 struct ifaddr *ifa; 2454 #endif 2455 struct domain *dp; 2456 2457 ifp->if_flags |= IFF_UP; 2458 nanotime(&ifp->if_lastchange); 2459 #ifdef notyet 2460 /* this has no effect on IP, and will kill all ISO connections XXX */ 2461 IFADDR_READER_FOREACH(ifa, ifp) 2462 pfctlinput(PRC_IFUP, ifa->ifa_addr); 2463 #endif 2464 #if NCARP > 0 2465 if (ifp->if_carp) 2466 carp_carpdev_state(ifp); 2467 #endif 2468 rt_ifmsg(ifp); 2469 DOMAIN_FOREACH(dp) { 2470 if (dp->dom_if_up) 2471 dp->dom_if_up(ifp); 2472 } 2473 } 2474 2475 /* 2476 * Handle interface slowtimo timer routine. Called 2477 * from softclock, we decrement timer (if set) and 2478 * call the appropriate interface routine on expiration. 2479 */ 2480 static void 2481 if_slowtimo(void *arg) 2482 { 2483 void (*slowtimo)(struct ifnet *); 2484 struct ifnet *ifp = arg; 2485 int s; 2486 2487 slowtimo = ifp->if_slowtimo; 2488 if (__predict_false(slowtimo == NULL)) 2489 return; 2490 2491 s = splnet(); 2492 if (ifp->if_timer != 0 && --ifp->if_timer == 0) 2493 (*slowtimo)(ifp); 2494 2495 splx(s); 2496 2497 if (__predict_true(ifp->if_slowtimo != NULL)) 2498 callout_schedule(ifp->if_slowtimo_ch, hz / IFNET_SLOWHZ); 2499 } 2500 2501 /* 2502 * Set/clear promiscuous mode on interface ifp based on the truth value 2503 * of pswitch. The calls are reference counted so that only the first 2504 * "on" request actually has an effect, as does the final "off" request. 2505 * Results are undefined if the "off" and "on" requests are not matched. 2506 */ 2507 int 2508 ifpromisc(struct ifnet *ifp, int pswitch) 2509 { 2510 int pcount, ret; 2511 short nflags; 2512 2513 pcount = ifp->if_pcount; 2514 if (pswitch) { 2515 /* 2516 * Allow the device to be "placed" into promiscuous 2517 * mode even if it is not configured up. It will 2518 * consult IFF_PROMISC when it is brought up. 2519 */ 2520 if (ifp->if_pcount++ != 0) 2521 return 0; 2522 nflags = ifp->if_flags | IFF_PROMISC; 2523 } else { 2524 if (--ifp->if_pcount > 0) 2525 return 0; 2526 nflags = ifp->if_flags & ~IFF_PROMISC; 2527 } 2528 ret = if_flags_set(ifp, nflags); 2529 /* Restore interface state if not successful. */ 2530 if (ret != 0) { 2531 ifp->if_pcount = pcount; 2532 } 2533 return ret; 2534 } 2535 2536 /* 2537 * Map interface name to 2538 * interface structure pointer. 2539 */ 2540 struct ifnet * 2541 ifunit(const char *name) 2542 { 2543 struct ifnet *ifp; 2544 const char *cp = name; 2545 u_int unit = 0; 2546 u_int i; 2547 int s; 2548 2549 /* 2550 * If the entire name is a number, treat it as an ifindex. 2551 */ 2552 for (i = 0; i < IFNAMSIZ && *cp >= '0' && *cp <= '9'; i++, cp++) { 2553 unit = unit * 10 + (*cp - '0'); 2554 } 2555 2556 /* 2557 * If the number took all of the name, then it's a valid ifindex. 2558 */ 2559 if (i == IFNAMSIZ || (cp != name && *cp == '\0')) 2560 return if_byindex(unit); 2561 2562 ifp = NULL; 2563 s = pserialize_read_enter(); 2564 IFNET_READER_FOREACH(ifp) { 2565 if (if_is_deactivated(ifp)) 2566 continue; 2567 if (strcmp(ifp->if_xname, name) == 0) 2568 goto out; 2569 } 2570 out: 2571 pserialize_read_exit(s); 2572 return ifp; 2573 } 2574 2575 /* 2576 * Get a reference of an ifnet object by an interface name. 2577 * The returned reference is protected by psref(9). The caller 2578 * must release a returned reference by if_put after use. 2579 */ 2580 struct ifnet * 2581 if_get(const char *name, struct psref *psref) 2582 { 2583 struct ifnet *ifp; 2584 const char *cp = name; 2585 u_int unit = 0; 2586 u_int i; 2587 int s; 2588 2589 /* 2590 * If the entire name is a number, treat it as an ifindex. 2591 */ 2592 for (i = 0; i < IFNAMSIZ && *cp >= '0' && *cp <= '9'; i++, cp++) { 2593 unit = unit * 10 + (*cp - '0'); 2594 } 2595 2596 /* 2597 * If the number took all of the name, then it's a valid ifindex. 2598 */ 2599 if (i == IFNAMSIZ || (cp != name && *cp == '\0')) 2600 return if_get_byindex(unit, psref); 2601 2602 ifp = NULL; 2603 s = pserialize_read_enter(); 2604 IFNET_READER_FOREACH(ifp) { 2605 if (if_is_deactivated(ifp)) 2606 continue; 2607 if (strcmp(ifp->if_xname, name) == 0) { 2608 psref_acquire(psref, &ifp->if_psref, 2609 ifnet_psref_class); 2610 goto out; 2611 } 2612 } 2613 out: 2614 pserialize_read_exit(s); 2615 return ifp; 2616 } 2617 2618 /* 2619 * Release a reference of an ifnet object given by if_get, if_get_byindex 2620 * or if_get_bylla. 2621 */ 2622 void 2623 if_put(const struct ifnet *ifp, struct psref *psref) 2624 { 2625 2626 if (ifp == NULL) 2627 return; 2628 2629 psref_release(psref, &ifp->if_psref, ifnet_psref_class); 2630 } 2631 2632 ifnet_t * 2633 if_byindex(u_int idx) 2634 { 2635 ifnet_t *ifp; 2636 2637 ifp = (__predict_true(idx < if_indexlim)) ? ifindex2ifnet[idx] : NULL; 2638 if (ifp != NULL && if_is_deactivated(ifp)) 2639 ifp = NULL; 2640 return ifp; 2641 } 2642 2643 /* 2644 * Get a reference of an ifnet object by an interface index. 2645 * The returned reference is protected by psref(9). The caller 2646 * must release a returned reference by if_put after use. 2647 */ 2648 ifnet_t * 2649 if_get_byindex(u_int idx, struct psref *psref) 2650 { 2651 ifnet_t *ifp; 2652 int s; 2653 2654 s = pserialize_read_enter(); 2655 ifp = if_byindex(idx); 2656 if (__predict_true(ifp != NULL)) 2657 psref_acquire(psref, &ifp->if_psref, ifnet_psref_class); 2658 pserialize_read_exit(s); 2659 2660 return ifp; 2661 } 2662 2663 ifnet_t * 2664 if_get_bylla(const void *lla, unsigned char lla_len, struct psref *psref) 2665 { 2666 ifnet_t *ifp; 2667 int s; 2668 2669 s = pserialize_read_enter(); 2670 IFNET_READER_FOREACH(ifp) { 2671 if (if_is_deactivated(ifp)) 2672 continue; 2673 if (ifp->if_addrlen != lla_len) 2674 continue; 2675 if (memcmp(lla, CLLADDR(ifp->if_sadl), lla_len) == 0) { 2676 psref_acquire(psref, &ifp->if_psref, 2677 ifnet_psref_class); 2678 break; 2679 } 2680 } 2681 pserialize_read_exit(s); 2682 2683 return ifp; 2684 } 2685 2686 /* 2687 * Note that it's safe only if the passed ifp is guaranteed to not be freed, 2688 * for example using pserialize or the ifp is already held or some other 2689 * object is held which guarantes the ifp to not be freed indirectly. 2690 */ 2691 void 2692 if_acquire(struct ifnet *ifp, struct psref *psref) 2693 { 2694 2695 KASSERT(ifp->if_index != 0); 2696 psref_acquire(psref, &ifp->if_psref, ifnet_psref_class); 2697 } 2698 2699 bool 2700 if_held(struct ifnet *ifp) 2701 { 2702 2703 return psref_held(&ifp->if_psref, ifnet_psref_class); 2704 } 2705 2706 2707 /* common */ 2708 int 2709 ifioctl_common(struct ifnet *ifp, u_long cmd, void *data) 2710 { 2711 int s; 2712 struct ifreq *ifr; 2713 struct ifcapreq *ifcr; 2714 struct ifdatareq *ifdr; 2715 2716 switch (cmd) { 2717 case SIOCSIFCAP: 2718 ifcr = data; 2719 if ((ifcr->ifcr_capenable & ~ifp->if_capabilities) != 0) 2720 return EINVAL; 2721 2722 if (ifcr->ifcr_capenable == ifp->if_capenable) 2723 return 0; 2724 2725 ifp->if_capenable = ifcr->ifcr_capenable; 2726 2727 /* Pre-compute the checksum flags mask. */ 2728 ifp->if_csum_flags_tx = 0; 2729 ifp->if_csum_flags_rx = 0; 2730 if (ifp->if_capenable & IFCAP_CSUM_IPv4_Tx) { 2731 ifp->if_csum_flags_tx |= M_CSUM_IPv4; 2732 } 2733 if (ifp->if_capenable & IFCAP_CSUM_IPv4_Rx) { 2734 ifp->if_csum_flags_rx |= M_CSUM_IPv4; 2735 } 2736 2737 if (ifp->if_capenable & IFCAP_CSUM_TCPv4_Tx) { 2738 ifp->if_csum_flags_tx |= M_CSUM_TCPv4; 2739 } 2740 if (ifp->if_capenable & IFCAP_CSUM_TCPv4_Rx) { 2741 ifp->if_csum_flags_rx |= M_CSUM_TCPv4; 2742 } 2743 2744 if (ifp->if_capenable & IFCAP_CSUM_UDPv4_Tx) { 2745 ifp->if_csum_flags_tx |= M_CSUM_UDPv4; 2746 } 2747 if (ifp->if_capenable & IFCAP_CSUM_UDPv4_Rx) { 2748 ifp->if_csum_flags_rx |= M_CSUM_UDPv4; 2749 } 2750 2751 if (ifp->if_capenable & IFCAP_CSUM_TCPv6_Tx) { 2752 ifp->if_csum_flags_tx |= M_CSUM_TCPv6; 2753 } 2754 if (ifp->if_capenable & IFCAP_CSUM_TCPv6_Rx) { 2755 ifp->if_csum_flags_rx |= M_CSUM_TCPv6; 2756 } 2757 2758 if (ifp->if_capenable & IFCAP_CSUM_UDPv6_Tx) { 2759 ifp->if_csum_flags_tx |= M_CSUM_UDPv6; 2760 } 2761 if (ifp->if_capenable & IFCAP_CSUM_UDPv6_Rx) { 2762 ifp->if_csum_flags_rx |= M_CSUM_UDPv6; 2763 } 2764 if (ifp->if_flags & IFF_UP) 2765 return ENETRESET; 2766 return 0; 2767 case SIOCSIFFLAGS: 2768 ifr = data; 2769 /* 2770 * If if_is_mpsafe(ifp), KERNEL_LOCK isn't held here, but if_up 2771 * and if_down aren't MP-safe yet, so we must hold the lock. 2772 */ 2773 KERNEL_LOCK_IF_IFP_MPSAFE(ifp); 2774 if (ifp->if_flags & IFF_UP && (ifr->ifr_flags & IFF_UP) == 0) { 2775 s = splsoftnet(); 2776 if_down(ifp); 2777 splx(s); 2778 } 2779 if (ifr->ifr_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) { 2780 s = splsoftnet(); 2781 if_up(ifp); 2782 splx(s); 2783 } 2784 KERNEL_UNLOCK_IF_IFP_MPSAFE(ifp); 2785 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | 2786 (ifr->ifr_flags &~ IFF_CANTCHANGE); 2787 break; 2788 case SIOCGIFFLAGS: 2789 ifr = data; 2790 ifr->ifr_flags = ifp->if_flags; 2791 break; 2792 2793 case SIOCGIFMETRIC: 2794 ifr = data; 2795 ifr->ifr_metric = ifp->if_metric; 2796 break; 2797 2798 case SIOCGIFMTU: 2799 ifr = data; 2800 ifr->ifr_mtu = ifp->if_mtu; 2801 break; 2802 2803 case SIOCGIFDLT: 2804 ifr = data; 2805 ifr->ifr_dlt = ifp->if_dlt; 2806 break; 2807 2808 case SIOCGIFCAP: 2809 ifcr = data; 2810 ifcr->ifcr_capabilities = ifp->if_capabilities; 2811 ifcr->ifcr_capenable = ifp->if_capenable; 2812 break; 2813 2814 case SIOCSIFMETRIC: 2815 ifr = data; 2816 ifp->if_metric = ifr->ifr_metric; 2817 break; 2818 2819 case SIOCGIFDATA: 2820 ifdr = data; 2821 ifdr->ifdr_data = ifp->if_data; 2822 break; 2823 2824 case SIOCGIFINDEX: 2825 ifr = data; 2826 ifr->ifr_index = ifp->if_index; 2827 break; 2828 2829 case SIOCZIFDATA: 2830 ifdr = data; 2831 ifdr->ifdr_data = ifp->if_data; 2832 /* 2833 * Assumes that the volatile counters that can be 2834 * zero'ed are at the end of if_data. 2835 */ 2836 memset(&ifp->if_data.ifi_ipackets, 0, sizeof(ifp->if_data) - 2837 offsetof(struct if_data, ifi_ipackets)); 2838 /* 2839 * The memset() clears to the bottm of if_data. In the area, 2840 * if_lastchange is included. Please be careful if new entry 2841 * will be added into if_data or rewite this. 2842 * 2843 * And also, update if_lastchnage. 2844 */ 2845 getnanotime(&ifp->if_lastchange); 2846 break; 2847 case SIOCSIFMTU: 2848 ifr = data; 2849 if (ifp->if_mtu == ifr->ifr_mtu) 2850 break; 2851 ifp->if_mtu = ifr->ifr_mtu; 2852 /* 2853 * If the link MTU changed, do network layer specific procedure. 2854 */ 2855 #ifdef INET6 2856 KERNEL_LOCK_UNLESS_NET_MPSAFE(); 2857 if (in6_present) 2858 nd6_setmtu(ifp); 2859 KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); 2860 #endif 2861 return ENETRESET; 2862 default: 2863 return ENOTTY; 2864 } 2865 return 0; 2866 } 2867 2868 int 2869 ifaddrpref_ioctl(struct socket *so, u_long cmd, void *data, struct ifnet *ifp) 2870 { 2871 struct if_addrprefreq *ifap = (struct if_addrprefreq *)data; 2872 struct ifaddr *ifa; 2873 const struct sockaddr *any, *sa; 2874 union { 2875 struct sockaddr sa; 2876 struct sockaddr_storage ss; 2877 } u, v; 2878 int s, error = 0; 2879 2880 switch (cmd) { 2881 case SIOCSIFADDRPREF: 2882 if (kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_INTERFACE, 2883 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd, 2884 NULL) != 0) 2885 return EPERM; 2886 case SIOCGIFADDRPREF: 2887 break; 2888 default: 2889 return EOPNOTSUPP; 2890 } 2891 2892 /* sanity checks */ 2893 if (data == NULL || ifp == NULL) { 2894 panic("invalid argument to %s", __func__); 2895 /*NOTREACHED*/ 2896 } 2897 2898 /* address must be specified on ADD and DELETE */ 2899 sa = sstocsa(&ifap->ifap_addr); 2900 if (sa->sa_family != sofamily(so)) 2901 return EINVAL; 2902 if ((any = sockaddr_any(sa)) == NULL || sa->sa_len != any->sa_len) 2903 return EINVAL; 2904 2905 sockaddr_externalize(&v.sa, sizeof(v.ss), sa); 2906 2907 s = pserialize_read_enter(); 2908 IFADDR_READER_FOREACH(ifa, ifp) { 2909 if (ifa->ifa_addr->sa_family != sa->sa_family) 2910 continue; 2911 sockaddr_externalize(&u.sa, sizeof(u.ss), ifa->ifa_addr); 2912 if (sockaddr_cmp(&u.sa, &v.sa) == 0) 2913 break; 2914 } 2915 if (ifa == NULL) { 2916 error = EADDRNOTAVAIL; 2917 goto out; 2918 } 2919 2920 switch (cmd) { 2921 case SIOCSIFADDRPREF: 2922 ifa->ifa_preference = ifap->ifap_preference; 2923 goto out; 2924 case SIOCGIFADDRPREF: 2925 /* fill in the if_laddrreq structure */ 2926 (void)sockaddr_copy(sstosa(&ifap->ifap_addr), 2927 sizeof(ifap->ifap_addr), ifa->ifa_addr); 2928 ifap->ifap_preference = ifa->ifa_preference; 2929 goto out; 2930 default: 2931 error = EOPNOTSUPP; 2932 } 2933 out: 2934 pserialize_read_exit(s); 2935 return error; 2936 } 2937 2938 /* 2939 * Interface ioctls. 2940 */ 2941 static int 2942 doifioctl(struct socket *so, u_long cmd, void *data, struct lwp *l) 2943 { 2944 struct ifnet *ifp; 2945 struct ifreq *ifr; 2946 int error = 0; 2947 #if defined(COMPAT_OSOCK) || defined(COMPAT_OIFREQ) 2948 u_long ocmd = cmd; 2949 #endif 2950 short oif_flags; 2951 #ifdef COMPAT_OIFREQ 2952 struct ifreq ifrb; 2953 struct oifreq *oifr = NULL; 2954 #endif 2955 int r; 2956 struct psref psref; 2957 int bound; 2958 2959 switch (cmd) { 2960 #ifdef COMPAT_OIFREQ 2961 case OSIOCGIFCONF: 2962 case OOSIOCGIFCONF: 2963 return compat_ifconf(cmd, data); 2964 #endif 2965 #ifdef COMPAT_OIFDATA 2966 case OSIOCGIFDATA: 2967 case OSIOCZIFDATA: 2968 return compat_ifdatareq(l, cmd, data); 2969 #endif 2970 case SIOCGIFCONF: 2971 return ifconf(cmd, data); 2972 case SIOCINITIFADDR: 2973 return EPERM; 2974 } 2975 2976 #ifdef COMPAT_OIFREQ 2977 cmd = (*vec_compat_cvtcmd)(cmd); 2978 if (cmd != ocmd) { 2979 oifr = data; 2980 data = ifr = &ifrb; 2981 ifreqo2n(oifr, ifr); 2982 } else 2983 #endif 2984 ifr = data; 2985 2986 switch (cmd) { 2987 case SIOCIFCREATE: 2988 case SIOCIFDESTROY: 2989 bound = curlwp_bind(); 2990 if (l != NULL) { 2991 ifp = if_get(ifr->ifr_name, &psref); 2992 error = kauth_authorize_network(l->l_cred, 2993 KAUTH_NETWORK_INTERFACE, 2994 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, 2995 (void *)cmd, NULL); 2996 if (ifp != NULL) 2997 if_put(ifp, &psref); 2998 if (error != 0) { 2999 curlwp_bindx(bound); 3000 return error; 3001 } 3002 } 3003 KERNEL_LOCK_UNLESS_NET_MPSAFE(); 3004 mutex_enter(&if_clone_mtx); 3005 r = (cmd == SIOCIFCREATE) ? 3006 if_clone_create(ifr->ifr_name) : 3007 if_clone_destroy(ifr->ifr_name); 3008 mutex_exit(&if_clone_mtx); 3009 KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); 3010 curlwp_bindx(bound); 3011 return r; 3012 3013 case SIOCIFGCLONERS: 3014 { 3015 struct if_clonereq *req = (struct if_clonereq *)data; 3016 return if_clone_list(req->ifcr_count, req->ifcr_buffer, 3017 &req->ifcr_total); 3018 } 3019 } 3020 3021 bound = curlwp_bind(); 3022 ifp = if_get(ifr->ifr_name, &psref); 3023 if (ifp == NULL) { 3024 curlwp_bindx(bound); 3025 return ENXIO; 3026 } 3027 3028 switch (cmd) { 3029 case SIOCALIFADDR: 3030 case SIOCDLIFADDR: 3031 case SIOCSIFADDRPREF: 3032 case SIOCSIFFLAGS: 3033 case SIOCSIFCAP: 3034 case SIOCSIFMETRIC: 3035 case SIOCZIFDATA: 3036 case SIOCSIFMTU: 3037 case SIOCSIFPHYADDR: 3038 case SIOCDIFPHYADDR: 3039 #ifdef INET6 3040 case SIOCSIFPHYADDR_IN6: 3041 #endif 3042 case SIOCSLIFPHYADDR: 3043 case SIOCADDMULTI: 3044 case SIOCDELMULTI: 3045 case SIOCSIFMEDIA: 3046 case SIOCSDRVSPEC: 3047 case SIOCG80211: 3048 case SIOCS80211: 3049 case SIOCS80211NWID: 3050 case SIOCS80211NWKEY: 3051 case SIOCS80211POWER: 3052 case SIOCS80211BSSID: 3053 case SIOCS80211CHANNEL: 3054 case SIOCSLINKSTR: 3055 if (l != NULL) { 3056 error = kauth_authorize_network(l->l_cred, 3057 KAUTH_NETWORK_INTERFACE, 3058 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, 3059 (void *)cmd, NULL); 3060 if (error != 0) 3061 goto out; 3062 } 3063 } 3064 3065 oif_flags = ifp->if_flags; 3066 3067 KERNEL_LOCK_UNLESS_IFP_MPSAFE(ifp); 3068 mutex_enter(ifp->if_ioctl_lock); 3069 3070 error = (*ifp->if_ioctl)(ifp, cmd, data); 3071 if (error != ENOTTY) 3072 ; 3073 else if (so->so_proto == NULL) 3074 error = EOPNOTSUPP; 3075 else { 3076 KERNEL_LOCK_IF_IFP_MPSAFE(ifp); 3077 #ifdef COMPAT_OSOCK 3078 if (vec_compat_ifioctl != NULL) 3079 error = (*vec_compat_ifioctl)(so, ocmd, cmd, data, l); 3080 else 3081 #endif 3082 error = (*so->so_proto->pr_usrreqs->pr_ioctl)(so, 3083 cmd, data, ifp); 3084 KERNEL_UNLOCK_IF_IFP_MPSAFE(ifp); 3085 } 3086 3087 if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0) { 3088 if ((ifp->if_flags & IFF_UP) != 0) { 3089 int s = splsoftnet(); 3090 if_up(ifp); 3091 splx(s); 3092 } 3093 } 3094 #ifdef COMPAT_OIFREQ 3095 if (cmd != ocmd) 3096 ifreqn2o(oifr, ifr); 3097 #endif 3098 3099 mutex_exit(ifp->if_ioctl_lock); 3100 KERNEL_UNLOCK_UNLESS_IFP_MPSAFE(ifp); 3101 out: 3102 if_put(ifp, &psref); 3103 curlwp_bindx(bound); 3104 return error; 3105 } 3106 3107 /* 3108 * Return interface configuration 3109 * of system. List may be used 3110 * in later ioctl's (above) to get 3111 * other information. 3112 * 3113 * Each record is a struct ifreq. Before the addition of 3114 * sockaddr_storage, the API rule was that sockaddr flavors that did 3115 * not fit would extend beyond the struct ifreq, with the next struct 3116 * ifreq starting sa_len beyond the struct sockaddr. Because the 3117 * union in struct ifreq includes struct sockaddr_storage, every kind 3118 * of sockaddr must fit. Thus, there are no longer any overlength 3119 * records. 3120 * 3121 * Records are added to the user buffer if they fit, and ifc_len is 3122 * adjusted to the length that was written. Thus, the user is only 3123 * assured of getting the complete list if ifc_len on return is at 3124 * least sizeof(struct ifreq) less than it was on entry. 3125 * 3126 * If the user buffer pointer is NULL, this routine copies no data and 3127 * returns the amount of space that would be needed. 3128 * 3129 * Invariants: 3130 * ifrp points to the next part of the user's buffer to be used. If 3131 * ifrp != NULL, space holds the number of bytes remaining that we may 3132 * write at ifrp. Otherwise, space holds the number of bytes that 3133 * would have been written had there been adequate space. 3134 */ 3135 /*ARGSUSED*/ 3136 static int 3137 ifconf(u_long cmd, void *data) 3138 { 3139 struct ifconf *ifc = (struct ifconf *)data; 3140 struct ifnet *ifp; 3141 struct ifaddr *ifa; 3142 struct ifreq ifr, *ifrp = NULL; 3143 int space = 0, error = 0; 3144 const int sz = (int)sizeof(struct ifreq); 3145 const bool docopy = ifc->ifc_req != NULL; 3146 int s; 3147 int bound; 3148 struct psref psref; 3149 3150 if (docopy) { 3151 space = ifc->ifc_len; 3152 ifrp = ifc->ifc_req; 3153 } 3154 3155 bound = curlwp_bind(); 3156 s = pserialize_read_enter(); 3157 IFNET_READER_FOREACH(ifp) { 3158 psref_acquire(&psref, &ifp->if_psref, ifnet_psref_class); 3159 pserialize_read_exit(s); 3160 3161 (void)strncpy(ifr.ifr_name, ifp->if_xname, 3162 sizeof(ifr.ifr_name)); 3163 if (ifr.ifr_name[sizeof(ifr.ifr_name) - 1] != '\0') { 3164 error = ENAMETOOLONG; 3165 goto release_exit; 3166 } 3167 if (IFADDR_READER_EMPTY(ifp)) { 3168 /* Interface with no addresses - send zero sockaddr. */ 3169 memset(&ifr.ifr_addr, 0, sizeof(ifr.ifr_addr)); 3170 if (!docopy) { 3171 space += sz; 3172 goto next; 3173 } 3174 if (space >= sz) { 3175 error = copyout(&ifr, ifrp, sz); 3176 if (error != 0) 3177 goto release_exit; 3178 ifrp++; 3179 space -= sz; 3180 } 3181 } 3182 3183 s = pserialize_read_enter(); 3184 IFADDR_READER_FOREACH(ifa, ifp) { 3185 struct sockaddr *sa = ifa->ifa_addr; 3186 /* all sockaddrs must fit in sockaddr_storage */ 3187 KASSERT(sa->sa_len <= sizeof(ifr.ifr_ifru)); 3188 3189 if (!docopy) { 3190 space += sz; 3191 continue; 3192 } 3193 memcpy(&ifr.ifr_space, sa, sa->sa_len); 3194 pserialize_read_exit(s); 3195 3196 if (space >= sz) { 3197 error = copyout(&ifr, ifrp, sz); 3198 if (error != 0) 3199 goto release_exit; 3200 ifrp++; space -= sz; 3201 } 3202 s = pserialize_read_enter(); 3203 } 3204 pserialize_read_exit(s); 3205 3206 next: 3207 s = pserialize_read_enter(); 3208 psref_release(&psref, &ifp->if_psref, ifnet_psref_class); 3209 } 3210 pserialize_read_exit(s); 3211 curlwp_bindx(bound); 3212 3213 if (docopy) { 3214 KASSERT(0 <= space && space <= ifc->ifc_len); 3215 ifc->ifc_len -= space; 3216 } else { 3217 KASSERT(space >= 0); 3218 ifc->ifc_len = space; 3219 } 3220 return (0); 3221 3222 release_exit: 3223 psref_release(&psref, &ifp->if_psref, ifnet_psref_class); 3224 curlwp_bindx(bound); 3225 return error; 3226 } 3227 3228 int 3229 ifreq_setaddr(u_long cmd, struct ifreq *ifr, const struct sockaddr *sa) 3230 { 3231 uint8_t len; 3232 #ifdef COMPAT_OIFREQ 3233 struct ifreq ifrb; 3234 struct oifreq *oifr = NULL; 3235 u_long ocmd = cmd; 3236 cmd = (*vec_compat_cvtcmd)(cmd); 3237 if (cmd != ocmd) { 3238 oifr = (struct oifreq *)(void *)ifr; 3239 ifr = &ifrb; 3240 ifreqo2n(oifr, ifr); 3241 len = sizeof(oifr->ifr_addr); 3242 } else 3243 #endif 3244 len = sizeof(ifr->ifr_ifru.ifru_space); 3245 3246 if (len < sa->sa_len) 3247 return EFBIG; 3248 3249 memset(&ifr->ifr_addr, 0, len); 3250 sockaddr_copy(&ifr->ifr_addr, len, sa); 3251 3252 #ifdef COMPAT_OIFREQ 3253 if (cmd != ocmd) 3254 ifreqn2o(oifr, ifr); 3255 #endif 3256 return 0; 3257 } 3258 3259 /* 3260 * wrapper function for the drivers which doesn't have if_transmit(). 3261 */ 3262 static int 3263 if_transmit(struct ifnet *ifp, struct mbuf *m) 3264 { 3265 int s, error; 3266 size_t pktlen = m->m_pkthdr.len; 3267 bool mcast = (m->m_flags & M_MCAST) != 0; 3268 3269 s = splnet(); 3270 3271 IFQ_ENQUEUE(&ifp->if_snd, m, error); 3272 if (error != 0) { 3273 /* mbuf is already freed */ 3274 goto out; 3275 } 3276 3277 ifp->if_obytes += pktlen; 3278 if (mcast) 3279 ifp->if_omcasts++; 3280 3281 if ((ifp->if_flags & IFF_OACTIVE) == 0) 3282 if_start_lock(ifp); 3283 out: 3284 splx(s); 3285 3286 return error; 3287 } 3288 3289 int 3290 if_transmit_lock(struct ifnet *ifp, struct mbuf *m) 3291 { 3292 int error; 3293 3294 #ifdef ALTQ 3295 KERNEL_LOCK(1, NULL); 3296 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 3297 error = if_transmit(ifp, m); 3298 KERNEL_UNLOCK_ONE(NULL); 3299 } else { 3300 KERNEL_UNLOCK_ONE(NULL); 3301 error = (*ifp->if_transmit)(ifp, m); 3302 /* mbuf is alredy freed */ 3303 } 3304 #else /* !ALTQ */ 3305 error = (*ifp->if_transmit)(ifp, m); 3306 /* mbuf is alredy freed */ 3307 #endif /* !ALTQ */ 3308 3309 return error; 3310 } 3311 3312 /* 3313 * Queue message on interface, and start output if interface 3314 * not yet active. 3315 */ 3316 int 3317 ifq_enqueue(struct ifnet *ifp, struct mbuf *m) 3318 { 3319 3320 return if_transmit_lock(ifp, m); 3321 } 3322 3323 /* 3324 * Queue message on interface, possibly using a second fast queue 3325 */ 3326 int 3327 ifq_enqueue2(struct ifnet *ifp, struct ifqueue *ifq, struct mbuf *m) 3328 { 3329 int error = 0; 3330 3331 if (ifq != NULL 3332 #ifdef ALTQ 3333 && ALTQ_IS_ENABLED(&ifp->if_snd) == 0 3334 #endif 3335 ) { 3336 if (IF_QFULL(ifq)) { 3337 IF_DROP(&ifp->if_snd); 3338 m_freem(m); 3339 if (error == 0) 3340 error = ENOBUFS; 3341 } else 3342 IF_ENQUEUE(ifq, m); 3343 } else 3344 IFQ_ENQUEUE(&ifp->if_snd, m, error); 3345 if (error != 0) { 3346 ++ifp->if_oerrors; 3347 return error; 3348 } 3349 return 0; 3350 } 3351 3352 int 3353 if_addr_init(ifnet_t *ifp, struct ifaddr *ifa, const bool src) 3354 { 3355 int rc; 3356 3357 if (ifp->if_initaddr != NULL) 3358 rc = (*ifp->if_initaddr)(ifp, ifa, src); 3359 else if (src || 3360 /* FIXME: may not hold if_ioctl_lock */ 3361 (rc = (*ifp->if_ioctl)(ifp, SIOCSIFDSTADDR, ifa)) == ENOTTY) 3362 rc = (*ifp->if_ioctl)(ifp, SIOCINITIFADDR, ifa); 3363 3364 return rc; 3365 } 3366 3367 int 3368 if_do_dad(struct ifnet *ifp) 3369 { 3370 if ((ifp->if_flags & IFF_LOOPBACK) != 0) 3371 return 0; 3372 3373 switch (ifp->if_type) { 3374 case IFT_FAITH: 3375 /* 3376 * These interfaces do not have the IFF_LOOPBACK flag, 3377 * but loop packets back. We do not have to do DAD on such 3378 * interfaces. We should even omit it, because loop-backed 3379 * responses would confuse the DAD procedure. 3380 */ 3381 return 0; 3382 default: 3383 /* 3384 * Our DAD routine requires the interface up and running. 3385 * However, some interfaces can be up before the RUNNING 3386 * status. Additionaly, users may try to assign addresses 3387 * before the interface becomes up (or running). 3388 * We simply skip DAD in such a case as a work around. 3389 * XXX: we should rather mark "tentative" on such addresses, 3390 * and do DAD after the interface becomes ready. 3391 */ 3392 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != 3393 (IFF_UP|IFF_RUNNING)) 3394 return 0; 3395 3396 return 1; 3397 } 3398 } 3399 3400 int 3401 if_flags_set(ifnet_t *ifp, const short flags) 3402 { 3403 int rc; 3404 3405 if (ifp->if_setflags != NULL) 3406 rc = (*ifp->if_setflags)(ifp, flags); 3407 else { 3408 short cantflags, chgdflags; 3409 struct ifreq ifr; 3410 3411 chgdflags = ifp->if_flags ^ flags; 3412 cantflags = chgdflags & IFF_CANTCHANGE; 3413 3414 if (cantflags != 0) 3415 ifp->if_flags ^= cantflags; 3416 3417 /* Traditionally, we do not call if_ioctl after 3418 * setting/clearing only IFF_PROMISC if the interface 3419 * isn't IFF_UP. Uphold that tradition. 3420 */ 3421 if (chgdflags == IFF_PROMISC && (ifp->if_flags & IFF_UP) == 0) 3422 return 0; 3423 3424 memset(&ifr, 0, sizeof(ifr)); 3425 3426 ifr.ifr_flags = flags & ~IFF_CANTCHANGE; 3427 /* FIXME: may not hold if_ioctl_lock */ 3428 rc = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, &ifr); 3429 3430 if (rc != 0 && cantflags != 0) 3431 ifp->if_flags ^= cantflags; 3432 } 3433 3434 return rc; 3435 } 3436 3437 int 3438 if_mcast_op(ifnet_t *ifp, const unsigned long cmd, const struct sockaddr *sa) 3439 { 3440 int rc; 3441 struct ifreq ifr; 3442 3443 if (ifp->if_mcastop != NULL) 3444 rc = (*ifp->if_mcastop)(ifp, cmd, sa); 3445 else { 3446 ifreq_setaddr(cmd, &ifr, sa); 3447 rc = (*ifp->if_ioctl)(ifp, cmd, &ifr); 3448 } 3449 3450 return rc; 3451 } 3452 3453 static void 3454 sysctl_sndq_setup(struct sysctllog **clog, const char *ifname, 3455 struct ifaltq *ifq) 3456 { 3457 const struct sysctlnode *cnode, *rnode; 3458 3459 if (sysctl_createv(clog, 0, NULL, &rnode, 3460 CTLFLAG_PERMANENT, 3461 CTLTYPE_NODE, "interfaces", 3462 SYSCTL_DESCR("Per-interface controls"), 3463 NULL, 0, NULL, 0, 3464 CTL_NET, CTL_CREATE, CTL_EOL) != 0) 3465 goto bad; 3466 3467 if (sysctl_createv(clog, 0, &rnode, &rnode, 3468 CTLFLAG_PERMANENT, 3469 CTLTYPE_NODE, ifname, 3470 SYSCTL_DESCR("Interface controls"), 3471 NULL, 0, NULL, 0, 3472 CTL_CREATE, CTL_EOL) != 0) 3473 goto bad; 3474 3475 if (sysctl_createv(clog, 0, &rnode, &rnode, 3476 CTLFLAG_PERMANENT, 3477 CTLTYPE_NODE, "sndq", 3478 SYSCTL_DESCR("Interface output queue controls"), 3479 NULL, 0, NULL, 0, 3480 CTL_CREATE, CTL_EOL) != 0) 3481 goto bad; 3482 3483 if (sysctl_createv(clog, 0, &rnode, &cnode, 3484 CTLFLAG_PERMANENT, 3485 CTLTYPE_INT, "len", 3486 SYSCTL_DESCR("Current output queue length"), 3487 NULL, 0, &ifq->ifq_len, 0, 3488 CTL_CREATE, CTL_EOL) != 0) 3489 goto bad; 3490 3491 if (sysctl_createv(clog, 0, &rnode, &cnode, 3492 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 3493 CTLTYPE_INT, "maxlen", 3494 SYSCTL_DESCR("Maximum allowed output queue length"), 3495 NULL, 0, &ifq->ifq_maxlen, 0, 3496 CTL_CREATE, CTL_EOL) != 0) 3497 goto bad; 3498 3499 if (sysctl_createv(clog, 0, &rnode, &cnode, 3500 CTLFLAG_PERMANENT, 3501 CTLTYPE_INT, "drops", 3502 SYSCTL_DESCR("Packets dropped due to full output queue"), 3503 NULL, 0, &ifq->ifq_drops, 0, 3504 CTL_CREATE, CTL_EOL) != 0) 3505 goto bad; 3506 3507 return; 3508 bad: 3509 printf("%s: could not attach sysctl nodes\n", ifname); 3510 return; 3511 } 3512 3513 #if defined(INET) || defined(INET6) 3514 3515 #define SYSCTL_NET_PKTQ(q, cn, c) \ 3516 static int \ 3517 sysctl_net_##q##_##cn(SYSCTLFN_ARGS) \ 3518 { \ 3519 return sysctl_pktq_count(SYSCTLFN_CALL(rnode), q, c); \ 3520 } 3521 3522 #if defined(INET) 3523 static int 3524 sysctl_net_ip_pktq_maxlen(SYSCTLFN_ARGS) 3525 { 3526 return sysctl_pktq_maxlen(SYSCTLFN_CALL(rnode), ip_pktq); 3527 } 3528 SYSCTL_NET_PKTQ(ip_pktq, items, PKTQ_NITEMS) 3529 SYSCTL_NET_PKTQ(ip_pktq, drops, PKTQ_DROPS) 3530 #endif 3531 3532 #if defined(INET6) 3533 static int 3534 sysctl_net_ip6_pktq_maxlen(SYSCTLFN_ARGS) 3535 { 3536 return sysctl_pktq_maxlen(SYSCTLFN_CALL(rnode), ip6_pktq); 3537 } 3538 SYSCTL_NET_PKTQ(ip6_pktq, items, PKTQ_NITEMS) 3539 SYSCTL_NET_PKTQ(ip6_pktq, drops, PKTQ_DROPS) 3540 #endif 3541 3542 static void 3543 sysctl_net_pktq_setup(struct sysctllog **clog, int pf) 3544 { 3545 sysctlfn len_func = NULL, maxlen_func = NULL, drops_func = NULL; 3546 const char *pfname = NULL, *ipname = NULL; 3547 int ipn = 0, qid = 0; 3548 3549 switch (pf) { 3550 #if defined(INET) 3551 case PF_INET: 3552 len_func = sysctl_net_ip_pktq_items; 3553 maxlen_func = sysctl_net_ip_pktq_maxlen; 3554 drops_func = sysctl_net_ip_pktq_drops; 3555 pfname = "inet", ipn = IPPROTO_IP; 3556 ipname = "ip", qid = IPCTL_IFQ; 3557 break; 3558 #endif 3559 #if defined(INET6) 3560 case PF_INET6: 3561 len_func = sysctl_net_ip6_pktq_items; 3562 maxlen_func = sysctl_net_ip6_pktq_maxlen; 3563 drops_func = sysctl_net_ip6_pktq_drops; 3564 pfname = "inet6", ipn = IPPROTO_IPV6; 3565 ipname = "ip6", qid = IPV6CTL_IFQ; 3566 break; 3567 #endif 3568 default: 3569 KASSERT(false); 3570 } 3571 3572 sysctl_createv(clog, 0, NULL, NULL, 3573 CTLFLAG_PERMANENT, 3574 CTLTYPE_NODE, pfname, NULL, 3575 NULL, 0, NULL, 0, 3576 CTL_NET, pf, CTL_EOL); 3577 sysctl_createv(clog, 0, NULL, NULL, 3578 CTLFLAG_PERMANENT, 3579 CTLTYPE_NODE, ipname, NULL, 3580 NULL, 0, NULL, 0, 3581 CTL_NET, pf, ipn, CTL_EOL); 3582 sysctl_createv(clog, 0, NULL, NULL, 3583 CTLFLAG_PERMANENT, 3584 CTLTYPE_NODE, "ifq", 3585 SYSCTL_DESCR("Protocol input queue controls"), 3586 NULL, 0, NULL, 0, 3587 CTL_NET, pf, ipn, qid, CTL_EOL); 3588 3589 sysctl_createv(clog, 0, NULL, NULL, 3590 CTLFLAG_PERMANENT, 3591 CTLTYPE_INT, "len", 3592 SYSCTL_DESCR("Current input queue length"), 3593 len_func, 0, NULL, 0, 3594 CTL_NET, pf, ipn, qid, IFQCTL_LEN, CTL_EOL); 3595 sysctl_createv(clog, 0, NULL, NULL, 3596 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 3597 CTLTYPE_INT, "maxlen", 3598 SYSCTL_DESCR("Maximum allowed input queue length"), 3599 maxlen_func, 0, NULL, 0, 3600 CTL_NET, pf, ipn, qid, IFQCTL_MAXLEN, CTL_EOL); 3601 sysctl_createv(clog, 0, NULL, NULL, 3602 CTLFLAG_PERMANENT, 3603 CTLTYPE_INT, "drops", 3604 SYSCTL_DESCR("Packets dropped due to full input queue"), 3605 drops_func, 0, NULL, 0, 3606 CTL_NET, pf, ipn, qid, IFQCTL_DROPS, CTL_EOL); 3607 } 3608 #endif /* INET || INET6 */ 3609 3610 static int 3611 if_sdl_sysctl(SYSCTLFN_ARGS) 3612 { 3613 struct ifnet *ifp; 3614 const struct sockaddr_dl *sdl; 3615 struct psref psref; 3616 int error = 0; 3617 int bound; 3618 3619 if (namelen != 1) 3620 return EINVAL; 3621 3622 bound = curlwp_bind(); 3623 ifp = if_get_byindex(name[0], &psref); 3624 if (ifp == NULL) { 3625 error = ENODEV; 3626 goto out0; 3627 } 3628 3629 sdl = ifp->if_sadl; 3630 if (sdl == NULL) { 3631 *oldlenp = 0; 3632 goto out1; 3633 } 3634 3635 if (oldp == NULL) { 3636 *oldlenp = sdl->sdl_alen; 3637 goto out1; 3638 } 3639 3640 if (*oldlenp >= sdl->sdl_alen) 3641 *oldlenp = sdl->sdl_alen; 3642 error = sysctl_copyout(l, &sdl->sdl_data[sdl->sdl_nlen], oldp, *oldlenp); 3643 out1: 3644 if_put(ifp, &psref); 3645 out0: 3646 curlwp_bindx(bound); 3647 return error; 3648 } 3649 3650 static void 3651 if_sysctl_setup(struct sysctllog **clog) 3652 { 3653 const struct sysctlnode *rnode = NULL; 3654 3655 sysctl_createv(clog, 0, NULL, &rnode, 3656 CTLFLAG_PERMANENT, 3657 CTLTYPE_NODE, "sdl", 3658 SYSCTL_DESCR("Get active link-layer address"), 3659 if_sdl_sysctl, 0, NULL, 0, 3660 CTL_NET, CTL_CREATE, CTL_EOL); 3661 3662 #if defined(INET) 3663 sysctl_net_pktq_setup(NULL, PF_INET); 3664 #endif 3665 #ifdef INET6 3666 if (in6_present) 3667 sysctl_net_pktq_setup(NULL, PF_INET6); 3668 #endif 3669 } 3670