1 /* 2 * Copyright (c) 1980, 1986, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)if.c 8.3 (Berkeley) 1/4/94 34 * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $ 35 */ 36 37 #include "opt_compat.h" 38 #include "opt_inet6.h" 39 #include "opt_inet.h" 40 #include "opt_ifpoll.h" 41 42 #include <sys/param.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/systm.h> 46 #include <sys/proc.h> 47 #include <sys/priv.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/socketops.h> 52 #include <sys/protosw.h> 53 #include <sys/kernel.h> 54 #include <sys/ktr.h> 55 #include <sys/mutex.h> 56 #include <sys/sockio.h> 57 #include <sys/syslog.h> 58 #include <sys/sysctl.h> 59 #include <sys/domain.h> 60 #include <sys/thread.h> 61 #include <sys/serialize.h> 62 #include <sys/bus.h> 63 64 #include <sys/thread2.h> 65 #include <sys/msgport2.h> 66 #include <sys/mutex2.h> 67 68 #include <net/if.h> 69 #include <net/if_arp.h> 70 #include <net/if_dl.h> 71 #include <net/if_types.h> 72 #include <net/if_var.h> 73 #include <net/ifq_var.h> 74 #include <net/radix.h> 75 #include <net/route.h> 76 #include <net/if_clone.h> 77 #include <net/netisr.h> 78 #include <net/netmsg2.h> 79 80 #include <machine/atomic.h> 81 #include <machine/stdarg.h> 82 #include <machine/smp.h> 83 84 #if defined(INET) || defined(INET6) 85 /*XXX*/ 86 #include <netinet/in.h> 87 #include <netinet/in_var.h> 88 #include <netinet/if_ether.h> 89 #ifdef INET6 90 #include <netinet6/in6_var.h> 91 #include <netinet6/in6_ifattach.h> 92 #endif 93 #endif 94 95 #if defined(COMPAT_43) 96 #include <emulation/43bsd/43bsd_socket.h> 97 #endif /* COMPAT_43 */ 98 99 struct netmsg_ifaddr { 100 struct netmsg_base base; 101 struct ifaddr *ifa; 102 struct ifnet *ifp; 103 int tail; 104 }; 105 106 struct ifaltq_stage_head { 107 TAILQ_HEAD(, ifaltq_stage) ifqs_head; 108 } __cachealign; 109 110 /* 111 * System initialization 112 */ 113 static void if_attachdomain(void *); 114 static void if_attachdomain1(struct ifnet *); 115 static int ifconf(u_long, caddr_t, struct ucred *); 116 static void ifinit(void *); 117 static void ifnetinit(void *); 118 static void if_slowtimo(void *); 119 static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *); 120 static int if_rtdel(struct radix_node *, void *); 121 122 #ifdef INET6 123 /* 124 * XXX: declare here to avoid to include many inet6 related files.. 125 * should be more generalized? 126 */ 127 extern void nd6_setmtu(struct ifnet *); 128 #endif 129 130 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); 131 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); 132 133 static int ifq_stage_cntmax = 4; 134 TUNABLE_INT("net.link.stage_cntmax", &ifq_stage_cntmax); 135 SYSCTL_INT(_net_link, OID_AUTO, stage_cntmax, CTLFLAG_RW, 136 &ifq_stage_cntmax, 0, "ifq staging packet count max"); 137 138 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL) 139 /* Must be after netisr_init */ 140 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_SECOND, ifnetinit, NULL) 141 142 static if_com_alloc_t *if_com_alloc[256]; 143 static if_com_free_t *if_com_free[256]; 144 145 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); 146 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); 147 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure"); 148 149 int ifqmaxlen = IFQ_MAXLEN; 150 struct ifnethead ifnet = TAILQ_HEAD_INITIALIZER(ifnet); 151 152 struct callout if_slowtimo_timer; 153 154 int if_index = 0; 155 struct ifnet **ifindex2ifnet = NULL; 156 static struct thread ifnet_threads[MAXCPU]; 157 158 static struct ifaltq_stage_head ifq_stage_heads[MAXCPU]; 159 160 #define IFQ_KTR_STRING "ifq=%p" 161 #define IFQ_KTR_ARGS struct ifaltq *ifq 162 #ifndef KTR_IFQ 163 #define KTR_IFQ KTR_ALL 164 #endif 165 KTR_INFO_MASTER(ifq); 166 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS); 167 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS); 168 #define logifq(name, arg) KTR_LOG(ifq_ ## name, arg) 169 170 #define IF_START_KTR_STRING "ifp=%p" 171 #define IF_START_KTR_ARGS struct ifnet *ifp 172 #ifndef KTR_IF_START 173 #define KTR_IF_START KTR_ALL 174 #endif 175 KTR_INFO_MASTER(if_start); 176 KTR_INFO(KTR_IF_START, if_start, run, 0, 177 IF_START_KTR_STRING, IF_START_KTR_ARGS); 178 KTR_INFO(KTR_IF_START, if_start, sched, 1, 179 IF_START_KTR_STRING, IF_START_KTR_ARGS); 180 KTR_INFO(KTR_IF_START, if_start, avoid, 2, 181 IF_START_KTR_STRING, IF_START_KTR_ARGS); 182 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3, 183 IF_START_KTR_STRING, IF_START_KTR_ARGS); 184 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4, 185 IF_START_KTR_STRING, IF_START_KTR_ARGS); 186 #define logifstart(name, arg) KTR_LOG(if_start_ ## name, arg) 187 188 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head); 189 190 /* 191 * Network interface utility routines. 192 * 193 * Routines with ifa_ifwith* names take sockaddr *'s as 194 * parameters. 195 */ 196 /* ARGSUSED*/ 197 void 198 ifinit(void *dummy) 199 { 200 struct ifnet *ifp; 201 202 callout_init(&if_slowtimo_timer); 203 204 crit_enter(); 205 TAILQ_FOREACH(ifp, &ifnet, if_link) { 206 if (ifp->if_snd.ifq_maxlen == 0) { 207 if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n"); 208 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen); 209 } 210 } 211 crit_exit(); 212 213 if_slowtimo(0); 214 } 215 216 static int 217 if_start_cpuid(struct ifnet *ifp) 218 { 219 return ifp->if_cpuid; 220 } 221 222 #ifdef IFPOLL_ENABLE 223 static int 224 if_start_cpuid_npoll(struct ifnet *ifp) 225 { 226 int poll_cpuid = ifp->if_npoll_cpuid; 227 228 if (poll_cpuid >= 0) 229 return poll_cpuid; 230 else 231 return ifp->if_cpuid; 232 } 233 #endif 234 235 static void 236 if_start_ipifunc(void *arg) 237 { 238 struct ifnet *ifp = arg; 239 struct lwkt_msg *lmsg = &ifp->if_start_nmsg[mycpuid].lmsg; 240 241 crit_enter(); 242 if (lmsg->ms_flags & MSGF_DONE) 243 lwkt_sendmsg(netisr_portfn(mycpuid), lmsg); 244 crit_exit(); 245 } 246 247 static __inline void 248 ifq_stage_remove(struct ifaltq_stage_head *head, struct ifaltq_stage *stage) 249 { 250 KKASSERT(stage->ifqs_flags & IFQ_STAGE_FLAG_QUED); 251 TAILQ_REMOVE(&head->ifqs_head, stage, ifqs_link); 252 stage->ifqs_flags &= ~(IFQ_STAGE_FLAG_QUED | IFQ_STAGE_FLAG_SCHED); 253 stage->ifqs_cnt = 0; 254 stage->ifqs_len = 0; 255 } 256 257 static __inline void 258 ifq_stage_insert(struct ifaltq_stage_head *head, struct ifaltq_stage *stage) 259 { 260 KKASSERT((stage->ifqs_flags & 261 (IFQ_STAGE_FLAG_QUED | IFQ_STAGE_FLAG_SCHED)) == 0); 262 stage->ifqs_flags |= IFQ_STAGE_FLAG_QUED; 263 TAILQ_INSERT_TAIL(&head->ifqs_head, stage, ifqs_link); 264 } 265 266 /* 267 * Schedule ifnet.if_start on ifnet's CPU 268 */ 269 static void 270 if_start_schedule(struct ifnet *ifp, int force) 271 { 272 int cpu; 273 274 if (!force && curthread->td_type == TD_TYPE_NETISR && 275 ifq_stage_cntmax > 0) { 276 struct ifaltq_stage *stage = &ifp->if_snd.altq_stage[mycpuid]; 277 278 stage->ifqs_cnt = 0; 279 stage->ifqs_len = 0; 280 if ((stage->ifqs_flags & IFQ_STAGE_FLAG_QUED) == 0) 281 ifq_stage_insert(&ifq_stage_heads[mycpuid], stage); 282 stage->ifqs_flags |= IFQ_STAGE_FLAG_SCHED; 283 return; 284 } 285 286 cpu = ifp->if_start_cpuid(ifp); 287 if (cpu != mycpuid) 288 lwkt_send_ipiq(globaldata_find(cpu), if_start_ipifunc, ifp); 289 else 290 if_start_ipifunc(ifp); 291 } 292 293 /* 294 * NOTE: 295 * This function will release ifnet.if_start interlock, 296 * if ifnet.if_start does not need to be scheduled 297 */ 298 static __inline int 299 if_start_need_schedule(struct ifaltq *ifq, int running) 300 { 301 if (!running || ifq_is_empty(ifq) 302 #ifdef ALTQ 303 || ifq->altq_tbr != NULL 304 #endif 305 ) { 306 ALTQ_LOCK(ifq); 307 /* 308 * ifnet.if_start interlock is released, if: 309 * 1) Hardware can not take any packets, due to 310 * o interface is marked down 311 * o hardware queue is full (ifq_is_oactive) 312 * Under the second situation, hardware interrupt 313 * or polling(4) will call/schedule ifnet.if_start 314 * when hardware queue is ready 315 * 2) There is not packet in the ifnet.if_snd. 316 * Further ifq_dispatch or ifq_handoff will call/ 317 * schedule ifnet.if_start 318 * 3) TBR is used and it does not allow further 319 * dequeueing. 320 * TBR callout will call ifnet.if_start 321 */ 322 if (!running || !ifq_data_ready(ifq)) { 323 ifq->altq_started = 0; 324 ALTQ_UNLOCK(ifq); 325 return 0; 326 } 327 ALTQ_UNLOCK(ifq); 328 } 329 return 1; 330 } 331 332 static void 333 if_start_dispatch(netmsg_t msg) 334 { 335 struct lwkt_msg *lmsg = &msg->base.lmsg; 336 struct ifnet *ifp = lmsg->u.ms_resultp; 337 struct ifaltq *ifq = &ifp->if_snd; 338 int running = 0, need_sched; 339 340 crit_enter(); 341 lwkt_replymsg(lmsg, 0); /* reply ASAP */ 342 crit_exit(); 343 344 if (mycpuid != ifp->if_start_cpuid(ifp)) { 345 /* 346 * We need to chase the ifnet CPU change. 347 */ 348 logifstart(chase_sched, ifp); 349 if_start_schedule(ifp, 1); 350 return; 351 } 352 353 ifnet_serialize_tx(ifp); 354 if ((ifp->if_flags & IFF_RUNNING) && !ifq_is_oactive(ifq)) { 355 logifstart(run, ifp); 356 ifp->if_start(ifp); 357 if ((ifp->if_flags & IFF_RUNNING) && !ifq_is_oactive(ifq)) 358 running = 1; 359 } 360 need_sched = if_start_need_schedule(ifq, running); 361 ifnet_deserialize_tx(ifp); 362 363 if (need_sched) { 364 /* 365 * More data need to be transmitted, ifnet.if_start is 366 * scheduled on ifnet's CPU, and we keep going. 367 * NOTE: ifnet.if_start interlock is not released. 368 */ 369 logifstart(sched, ifp); 370 if_start_schedule(ifp, 0); 371 } 372 } 373 374 /* Device driver ifnet.if_start helper function */ 375 void 376 if_devstart(struct ifnet *ifp) 377 { 378 struct ifaltq *ifq = &ifp->if_snd; 379 int running = 0; 380 381 ASSERT_IFNET_SERIALIZED_TX(ifp); 382 383 ALTQ_LOCK(ifq); 384 if (ifq->altq_started || !ifq_data_ready(ifq)) { 385 logifstart(avoid, ifp); 386 ALTQ_UNLOCK(ifq); 387 return; 388 } 389 ifq->altq_started = 1; 390 ALTQ_UNLOCK(ifq); 391 392 logifstart(run, ifp); 393 ifp->if_start(ifp); 394 395 if ((ifp->if_flags & IFF_RUNNING) && !ifq_is_oactive(ifq)) 396 running = 1; 397 398 if (if_start_need_schedule(ifq, running)) { 399 /* 400 * More data need to be transmitted, ifnet.if_start is 401 * scheduled on ifnet's CPU, and we keep going. 402 * NOTE: ifnet.if_start interlock is not released. 403 */ 404 logifstart(sched, ifp); 405 if_start_schedule(ifp, 0); 406 } 407 } 408 409 static void 410 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 411 { 412 lwkt_serialize_enter(ifp->if_serializer); 413 } 414 415 static void 416 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 417 { 418 lwkt_serialize_exit(ifp->if_serializer); 419 } 420 421 static int 422 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 423 { 424 return lwkt_serialize_try(ifp->if_serializer); 425 } 426 427 #ifdef INVARIANTS 428 static void 429 if_default_serialize_assert(struct ifnet *ifp, 430 enum ifnet_serialize slz __unused, 431 boolean_t serialized) 432 { 433 if (serialized) 434 ASSERT_SERIALIZED(ifp->if_serializer); 435 else 436 ASSERT_NOT_SERIALIZED(ifp->if_serializer); 437 } 438 #endif 439 440 /* 441 * Attach an interface to the list of "active" interfaces. 442 * 443 * The serializer is optional. If non-NULL access to the interface 444 * may be MPSAFE. 445 */ 446 void 447 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer) 448 { 449 unsigned socksize, ifasize; 450 int namelen, masklen; 451 struct sockaddr_dl *sdl; 452 struct ifaddr *ifa; 453 struct ifaltq *ifq; 454 int i; 455 456 static int if_indexlim = 8; 457 458 if (ifp->if_serialize != NULL) { 459 KASSERT(ifp->if_deserialize != NULL && 460 ifp->if_tryserialize != NULL && 461 ifp->if_serialize_assert != NULL, 462 ("serialize functions are partially setup")); 463 464 /* 465 * If the device supplies serialize functions, 466 * then clear if_serializer to catch any invalid 467 * usage of this field. 468 */ 469 KASSERT(serializer == NULL, 470 ("both serialize functions and default serializer " 471 "are supplied")); 472 ifp->if_serializer = NULL; 473 } else { 474 KASSERT(ifp->if_deserialize == NULL && 475 ifp->if_tryserialize == NULL && 476 ifp->if_serialize_assert == NULL, 477 ("serialize functions are partially setup")); 478 ifp->if_serialize = if_default_serialize; 479 ifp->if_deserialize = if_default_deserialize; 480 ifp->if_tryserialize = if_default_tryserialize; 481 #ifdef INVARIANTS 482 ifp->if_serialize_assert = if_default_serialize_assert; 483 #endif 484 485 /* 486 * The serializer can be passed in from the device, 487 * allowing the same serializer to be used for both 488 * the interrupt interlock and the device queue. 489 * If not specified, the netif structure will use an 490 * embedded serializer. 491 */ 492 if (serializer == NULL) { 493 serializer = &ifp->if_default_serializer; 494 lwkt_serialize_init(serializer); 495 } 496 ifp->if_serializer = serializer; 497 } 498 499 ifp->if_start_cpuid = if_start_cpuid; 500 ifp->if_cpuid = 0; 501 502 #ifdef IFPOLL_ENABLE 503 /* Device is not in polling mode by default */ 504 ifp->if_npoll_cpuid = -1; 505 if (ifp->if_npoll != NULL) 506 ifp->if_start_cpuid = if_start_cpuid_npoll; 507 #endif 508 509 ifp->if_start_nmsg = kmalloc(ncpus * sizeof(*ifp->if_start_nmsg), 510 M_LWKTMSG, M_WAITOK); 511 for (i = 0; i < ncpus; ++i) { 512 netmsg_init(&ifp->if_start_nmsg[i], NULL, &netisr_adone_rport, 513 0, if_start_dispatch); 514 ifp->if_start_nmsg[i].lmsg.u.ms_resultp = ifp; 515 } 516 517 mtx_init(&ifp->if_ioctl_mtx); 518 mtx_lock(&ifp->if_ioctl_mtx); 519 520 TAILQ_INSERT_TAIL(&ifnet, ifp, if_link); 521 ifp->if_index = ++if_index; 522 523 /* 524 * XXX - 525 * The old code would work if the interface passed a pre-existing 526 * chain of ifaddrs to this code. We don't trust our callers to 527 * properly initialize the tailq, however, so we no longer allow 528 * this unlikely case. 529 */ 530 ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead), 531 M_IFADDR, M_WAITOK | M_ZERO); 532 for (i = 0; i < ncpus; ++i) 533 TAILQ_INIT(&ifp->if_addrheads[i]); 534 535 TAILQ_INIT(&ifp->if_prefixhead); 536 TAILQ_INIT(&ifp->if_multiaddrs); 537 TAILQ_INIT(&ifp->if_groups); 538 getmicrotime(&ifp->if_lastchange); 539 if (ifindex2ifnet == NULL || if_index >= if_indexlim) { 540 unsigned int n; 541 struct ifnet **q; 542 543 if_indexlim <<= 1; 544 545 /* grow ifindex2ifnet */ 546 n = if_indexlim * sizeof(*q); 547 q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO); 548 if (ifindex2ifnet) { 549 bcopy(ifindex2ifnet, q, n/2); 550 kfree(ifindex2ifnet, M_IFADDR); 551 } 552 ifindex2ifnet = q; 553 } 554 555 ifindex2ifnet[if_index] = ifp; 556 557 /* 558 * create a Link Level name for this device 559 */ 560 namelen = strlen(ifp->if_xname); 561 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; 562 socksize = masklen + ifp->if_addrlen; 563 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1))) 564 if (socksize < sizeof(*sdl)) 565 socksize = sizeof(*sdl); 566 socksize = ROUNDUP(socksize); 567 #undef ROUNDUP 568 ifasize = sizeof(struct ifaddr) + 2 * socksize; 569 ifa = ifa_create(ifasize, M_WAITOK); 570 sdl = (struct sockaddr_dl *)(ifa + 1); 571 sdl->sdl_len = socksize; 572 sdl->sdl_family = AF_LINK; 573 bcopy(ifp->if_xname, sdl->sdl_data, namelen); 574 sdl->sdl_nlen = namelen; 575 sdl->sdl_index = ifp->if_index; 576 sdl->sdl_type = ifp->if_type; 577 ifp->if_lladdr = ifa; 578 ifa->ifa_ifp = ifp; 579 ifa->ifa_rtrequest = link_rtrequest; 580 ifa->ifa_addr = (struct sockaddr *)sdl; 581 sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); 582 ifa->ifa_netmask = (struct sockaddr *)sdl; 583 sdl->sdl_len = masklen; 584 while (namelen != 0) 585 sdl->sdl_data[--namelen] = 0xff; 586 ifa_iflink(ifa, ifp, 0 /* Insert head */); 587 588 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 589 devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); 590 591 ifq = &ifp->if_snd; 592 ifq->altq_type = 0; 593 ifq->altq_disc = NULL; 594 ifq->altq_flags &= ALTQF_CANTCHANGE; 595 ifq->altq_tbr = NULL; 596 ifq->altq_ifp = ifp; 597 ifq->altq_started = 0; 598 ifq->altq_prepended = NULL; 599 ALTQ_LOCK_INIT(ifq); 600 ifq_set_classic(ifq); 601 602 ifq->altq_stage = 603 kmalloc_cachealign(ncpus * sizeof(struct ifaltq_stage), 604 M_DEVBUF, M_WAITOK | M_ZERO); 605 for (i = 0; i < ncpus; ++i) 606 ifq->altq_stage[i].ifqs_altq = ifq; 607 608 if (!SLIST_EMPTY(&domains)) 609 if_attachdomain1(ifp); 610 611 /* Announce the interface. */ 612 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 613 614 mtx_unlock(&ifp->if_ioctl_mtx); 615 } 616 617 static void 618 if_attachdomain(void *dummy) 619 { 620 struct ifnet *ifp; 621 622 crit_enter(); 623 TAILQ_FOREACH(ifp, &ifnet, if_list) 624 if_attachdomain1(ifp); 625 crit_exit(); 626 } 627 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, 628 if_attachdomain, NULL); 629 630 static void 631 if_attachdomain1(struct ifnet *ifp) 632 { 633 struct domain *dp; 634 635 crit_enter(); 636 637 /* address family dependent data region */ 638 bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); 639 SLIST_FOREACH(dp, &domains, dom_next) 640 if (dp->dom_ifattach) 641 ifp->if_afdata[dp->dom_family] = 642 (*dp->dom_ifattach)(ifp); 643 crit_exit(); 644 } 645 646 /* 647 * Purge all addresses whose type is _not_ AF_LINK 648 */ 649 void 650 if_purgeaddrs_nolink(struct ifnet *ifp) 651 { 652 struct ifaddr_container *ifac, *next; 653 654 TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid], 655 ifa_link, next) { 656 struct ifaddr *ifa = ifac->ifa; 657 658 /* Leave link ifaddr as it is */ 659 if (ifa->ifa_addr->sa_family == AF_LINK) 660 continue; 661 #ifdef INET 662 /* XXX: Ugly!! ad hoc just for INET */ 663 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) { 664 struct ifaliasreq ifr; 665 #ifdef IFADDR_DEBUG_VERBOSE 666 int i; 667 668 kprintf("purge in4 addr %p: ", ifa); 669 for (i = 0; i < ncpus; ++i) 670 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt); 671 kprintf("\n"); 672 #endif 673 674 bzero(&ifr, sizeof ifr); 675 ifr.ifra_addr = *ifa->ifa_addr; 676 if (ifa->ifa_dstaddr) 677 ifr.ifra_broadaddr = *ifa->ifa_dstaddr; 678 if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp, 679 NULL) == 0) 680 continue; 681 } 682 #endif /* INET */ 683 #ifdef INET6 684 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) { 685 #ifdef IFADDR_DEBUG_VERBOSE 686 int i; 687 688 kprintf("purge in6 addr %p: ", ifa); 689 for (i = 0; i < ncpus; ++i) 690 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt); 691 kprintf("\n"); 692 #endif 693 694 in6_purgeaddr(ifa); 695 /* ifp_addrhead is already updated */ 696 continue; 697 } 698 #endif /* INET6 */ 699 ifa_ifunlink(ifa, ifp); 700 ifa_destroy(ifa); 701 } 702 } 703 704 static void 705 ifq_stage_detach_handler(netmsg_t nmsg) 706 { 707 struct ifaltq *ifq = nmsg->lmsg.u.ms_resultp; 708 struct ifaltq_stage *stage = &ifq->altq_stage[mycpuid]; 709 710 if (stage->ifqs_flags & IFQ_STAGE_FLAG_QUED) 711 ifq_stage_remove(&ifq_stage_heads[mycpuid], stage); 712 lwkt_replymsg(&nmsg->lmsg, 0); 713 } 714 715 static void 716 ifq_stage_detach(struct ifaltq *ifq) 717 { 718 struct netmsg_base base; 719 int cpu; 720 721 netmsg_init(&base, NULL, &curthread->td_msgport, 0, 722 ifq_stage_detach_handler); 723 base.lmsg.u.ms_resultp = ifq; 724 725 for (cpu = 0; cpu < ncpus; ++cpu) 726 lwkt_domsg(netisr_portfn(cpu), &base.lmsg, 0); 727 } 728 729 /* 730 * Detach an interface, removing it from the 731 * list of "active" interfaces. 732 */ 733 void 734 if_detach(struct ifnet *ifp) 735 { 736 struct radix_node_head *rnh; 737 int i; 738 int cpu, origcpu; 739 struct domain *dp; 740 741 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 742 743 /* 744 * Remove routes and flush queues. 745 */ 746 crit_enter(); 747 #ifdef IFPOLL_ENABLE 748 if (ifp->if_flags & IFF_NPOLLING) 749 ifpoll_deregister(ifp); 750 #endif 751 if_down(ifp); 752 753 #ifdef ALTQ 754 if (ifq_is_enabled(&ifp->if_snd)) 755 altq_disable(&ifp->if_snd); 756 if (ifq_is_attached(&ifp->if_snd)) 757 altq_detach(&ifp->if_snd); 758 #endif 759 760 /* 761 * Clean up all addresses. 762 */ 763 ifp->if_lladdr = NULL; 764 765 if_purgeaddrs_nolink(ifp); 766 if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) { 767 struct ifaddr *ifa; 768 769 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 770 KASSERT(ifa->ifa_addr->sa_family == AF_LINK, 771 ("non-link ifaddr is left on if_addrheads")); 772 773 ifa_ifunlink(ifa, ifp); 774 ifa_destroy(ifa); 775 KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]), 776 ("there are still ifaddrs left on if_addrheads")); 777 } 778 779 #ifdef INET 780 /* 781 * Remove all IPv4 kernel structures related to ifp. 782 */ 783 in_ifdetach(ifp); 784 #endif 785 786 #ifdef INET6 787 /* 788 * Remove all IPv6 kernel structs related to ifp. This should be done 789 * before removing routing entries below, since IPv6 interface direct 790 * routes are expected to be removed by the IPv6-specific kernel API. 791 * Otherwise, the kernel will detect some inconsistency and bark it. 792 */ 793 in6_ifdetach(ifp); 794 #endif 795 796 /* 797 * Delete all remaining routes using this interface 798 * Unfortuneatly the only way to do this is to slog through 799 * the entire routing table looking for routes which point 800 * to this interface...oh well... 801 */ 802 origcpu = mycpuid; 803 for (cpu = 0; cpu < ncpus; cpu++) { 804 lwkt_migratecpu(cpu); 805 for (i = 1; i <= AF_MAX; i++) { 806 if ((rnh = rt_tables[cpu][i]) == NULL) 807 continue; 808 rnh->rnh_walktree(rnh, if_rtdel, ifp); 809 } 810 } 811 lwkt_migratecpu(origcpu); 812 813 /* Announce that the interface is gone. */ 814 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 815 devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); 816 817 SLIST_FOREACH(dp, &domains, dom_next) 818 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) 819 (*dp->dom_ifdetach)(ifp, 820 ifp->if_afdata[dp->dom_family]); 821 822 /* 823 * Remove interface from ifindex2ifp[] and maybe decrement if_index. 824 */ 825 ifindex2ifnet[ifp->if_index] = NULL; 826 while (if_index > 0 && ifindex2ifnet[if_index] == NULL) 827 if_index--; 828 829 TAILQ_REMOVE(&ifnet, ifp, if_link); 830 kfree(ifp->if_addrheads, M_IFADDR); 831 832 lwkt_synchronize_ipiqs("if_detach"); 833 ifq_stage_detach(&ifp->if_snd); 834 835 kfree(ifp->if_start_nmsg, M_LWKTMSG); 836 kfree(ifp->if_snd.altq_stage, M_DEVBUF); 837 crit_exit(); 838 } 839 840 /* 841 * Create interface group without members 842 */ 843 struct ifg_group * 844 if_creategroup(const char *groupname) 845 { 846 struct ifg_group *ifg = NULL; 847 848 if ((ifg = (struct ifg_group *)kmalloc(sizeof(struct ifg_group), 849 M_TEMP, M_NOWAIT)) == NULL) 850 return (NULL); 851 852 strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); 853 ifg->ifg_refcnt = 0; 854 ifg->ifg_carp_demoted = 0; 855 TAILQ_INIT(&ifg->ifg_members); 856 #if NPF > 0 857 pfi_attach_ifgroup(ifg); 858 #endif 859 TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next); 860 861 return (ifg); 862 } 863 864 /* 865 * Add a group to an interface 866 */ 867 int 868 if_addgroup(struct ifnet *ifp, const char *groupname) 869 { 870 struct ifg_list *ifgl; 871 struct ifg_group *ifg = NULL; 872 struct ifg_member *ifgm; 873 874 if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' && 875 groupname[strlen(groupname) - 1] <= '9') 876 return (EINVAL); 877 878 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 879 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) 880 return (EEXIST); 881 882 if ((ifgl = kmalloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL) 883 return (ENOMEM); 884 885 if ((ifgm = kmalloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) { 886 kfree(ifgl, M_TEMP); 887 return (ENOMEM); 888 } 889 890 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) 891 if (!strcmp(ifg->ifg_group, groupname)) 892 break; 893 894 if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) { 895 kfree(ifgl, M_TEMP); 896 kfree(ifgm, M_TEMP); 897 return (ENOMEM); 898 } 899 900 ifg->ifg_refcnt++; 901 ifgl->ifgl_group = ifg; 902 ifgm->ifgm_ifp = ifp; 903 904 TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); 905 TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); 906 907 #if NPF > 0 908 pfi_group_change(groupname); 909 #endif 910 911 return (0); 912 } 913 914 /* 915 * Remove a group from an interface 916 */ 917 int 918 if_delgroup(struct ifnet *ifp, const char *groupname) 919 { 920 struct ifg_list *ifgl; 921 struct ifg_member *ifgm; 922 923 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 924 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) 925 break; 926 if (ifgl == NULL) 927 return (ENOENT); 928 929 TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); 930 931 TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) 932 if (ifgm->ifgm_ifp == ifp) 933 break; 934 935 if (ifgm != NULL) { 936 TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); 937 kfree(ifgm, M_TEMP); 938 } 939 940 if (--ifgl->ifgl_group->ifg_refcnt == 0) { 941 TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next); 942 #if NPF > 0 943 pfi_detach_ifgroup(ifgl->ifgl_group); 944 #endif 945 kfree(ifgl->ifgl_group, M_TEMP); 946 } 947 948 kfree(ifgl, M_TEMP); 949 950 #if NPF > 0 951 pfi_group_change(groupname); 952 #endif 953 954 return (0); 955 } 956 957 /* 958 * Stores all groups from an interface in memory pointed 959 * to by data 960 */ 961 int 962 if_getgroup(caddr_t data, struct ifnet *ifp) 963 { 964 int len, error; 965 struct ifg_list *ifgl; 966 struct ifg_req ifgrq, *ifgp; 967 struct ifgroupreq *ifgr = (struct ifgroupreq *)data; 968 969 if (ifgr->ifgr_len == 0) { 970 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 971 ifgr->ifgr_len += sizeof(struct ifg_req); 972 return (0); 973 } 974 975 len = ifgr->ifgr_len; 976 ifgp = ifgr->ifgr_groups; 977 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 978 if (len < sizeof(ifgrq)) 979 return (EINVAL); 980 bzero(&ifgrq, sizeof ifgrq); 981 strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group, 982 sizeof(ifgrq.ifgrq_group)); 983 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp, 984 sizeof(struct ifg_req)))) 985 return (error); 986 len -= sizeof(ifgrq); 987 ifgp++; 988 } 989 990 return (0); 991 } 992 993 /* 994 * Stores all members of a group in memory pointed to by data 995 */ 996 int 997 if_getgroupmembers(caddr_t data) 998 { 999 struct ifgroupreq *ifgr = (struct ifgroupreq *)data; 1000 struct ifg_group *ifg; 1001 struct ifg_member *ifgm; 1002 struct ifg_req ifgrq, *ifgp; 1003 int len, error; 1004 1005 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) 1006 if (!strcmp(ifg->ifg_group, ifgr->ifgr_name)) 1007 break; 1008 if (ifg == NULL) 1009 return (ENOENT); 1010 1011 if (ifgr->ifgr_len == 0) { 1012 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) 1013 ifgr->ifgr_len += sizeof(ifgrq); 1014 return (0); 1015 } 1016 1017 len = ifgr->ifgr_len; 1018 ifgp = ifgr->ifgr_groups; 1019 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { 1020 if (len < sizeof(ifgrq)) 1021 return (EINVAL); 1022 bzero(&ifgrq, sizeof ifgrq); 1023 strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname, 1024 sizeof(ifgrq.ifgrq_member)); 1025 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp, 1026 sizeof(struct ifg_req)))) 1027 return (error); 1028 len -= sizeof(ifgrq); 1029 ifgp++; 1030 } 1031 1032 return (0); 1033 } 1034 1035 /* 1036 * Delete Routes for a Network Interface 1037 * 1038 * Called for each routing entry via the rnh->rnh_walktree() call above 1039 * to delete all route entries referencing a detaching network interface. 1040 * 1041 * Arguments: 1042 * rn pointer to node in the routing table 1043 * arg argument passed to rnh->rnh_walktree() - detaching interface 1044 * 1045 * Returns: 1046 * 0 successful 1047 * errno failed - reason indicated 1048 * 1049 */ 1050 static int 1051 if_rtdel(struct radix_node *rn, void *arg) 1052 { 1053 struct rtentry *rt = (struct rtentry *)rn; 1054 struct ifnet *ifp = arg; 1055 int err; 1056 1057 if (rt->rt_ifp == ifp) { 1058 1059 /* 1060 * Protect (sorta) against walktree recursion problems 1061 * with cloned routes 1062 */ 1063 if (!(rt->rt_flags & RTF_UP)) 1064 return (0); 1065 1066 err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, 1067 rt_mask(rt), rt->rt_flags, 1068 NULL); 1069 if (err) { 1070 log(LOG_WARNING, "if_rtdel: error %d\n", err); 1071 } 1072 } 1073 1074 return (0); 1075 } 1076 1077 /* 1078 * Locate an interface based on a complete address. 1079 */ 1080 struct ifaddr * 1081 ifa_ifwithaddr(struct sockaddr *addr) 1082 { 1083 struct ifnet *ifp; 1084 1085 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1086 struct ifaddr_container *ifac; 1087 1088 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1089 struct ifaddr *ifa = ifac->ifa; 1090 1091 if (ifa->ifa_addr->sa_family != addr->sa_family) 1092 continue; 1093 if (sa_equal(addr, ifa->ifa_addr)) 1094 return (ifa); 1095 if ((ifp->if_flags & IFF_BROADCAST) && 1096 ifa->ifa_broadaddr && 1097 /* IPv6 doesn't have broadcast */ 1098 ifa->ifa_broadaddr->sa_len != 0 && 1099 sa_equal(ifa->ifa_broadaddr, addr)) 1100 return (ifa); 1101 } 1102 } 1103 return (NULL); 1104 } 1105 /* 1106 * Locate the point to point interface with a given destination address. 1107 */ 1108 struct ifaddr * 1109 ifa_ifwithdstaddr(struct sockaddr *addr) 1110 { 1111 struct ifnet *ifp; 1112 1113 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1114 struct ifaddr_container *ifac; 1115 1116 if (!(ifp->if_flags & IFF_POINTOPOINT)) 1117 continue; 1118 1119 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1120 struct ifaddr *ifa = ifac->ifa; 1121 1122 if (ifa->ifa_addr->sa_family != addr->sa_family) 1123 continue; 1124 if (ifa->ifa_dstaddr && 1125 sa_equal(addr, ifa->ifa_dstaddr)) 1126 return (ifa); 1127 } 1128 } 1129 return (NULL); 1130 } 1131 1132 /* 1133 * Find an interface on a specific network. If many, choice 1134 * is most specific found. 1135 */ 1136 struct ifaddr * 1137 ifa_ifwithnet(struct sockaddr *addr) 1138 { 1139 struct ifnet *ifp; 1140 struct ifaddr *ifa_maybe = NULL; 1141 u_int af = addr->sa_family; 1142 char *addr_data = addr->sa_data, *cplim; 1143 1144 /* 1145 * AF_LINK addresses can be looked up directly by their index number, 1146 * so do that if we can. 1147 */ 1148 if (af == AF_LINK) { 1149 struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr; 1150 1151 if (sdl->sdl_index && sdl->sdl_index <= if_index) 1152 return (ifindex2ifnet[sdl->sdl_index]->if_lladdr); 1153 } 1154 1155 /* 1156 * Scan though each interface, looking for ones that have 1157 * addresses in this address family. 1158 */ 1159 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1160 struct ifaddr_container *ifac; 1161 1162 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1163 struct ifaddr *ifa = ifac->ifa; 1164 char *cp, *cp2, *cp3; 1165 1166 if (ifa->ifa_addr->sa_family != af) 1167 next: continue; 1168 if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) { 1169 /* 1170 * This is a bit broken as it doesn't 1171 * take into account that the remote end may 1172 * be a single node in the network we are 1173 * looking for. 1174 * The trouble is that we don't know the 1175 * netmask for the remote end. 1176 */ 1177 if (ifa->ifa_dstaddr != NULL && 1178 sa_equal(addr, ifa->ifa_dstaddr)) 1179 return (ifa); 1180 } else { 1181 /* 1182 * if we have a special address handler, 1183 * then use it instead of the generic one. 1184 */ 1185 if (ifa->ifa_claim_addr) { 1186 if ((*ifa->ifa_claim_addr)(ifa, addr)) { 1187 return (ifa); 1188 } else { 1189 continue; 1190 } 1191 } 1192 1193 /* 1194 * Scan all the bits in the ifa's address. 1195 * If a bit dissagrees with what we are 1196 * looking for, mask it with the netmask 1197 * to see if it really matters. 1198 * (A byte at a time) 1199 */ 1200 if (ifa->ifa_netmask == 0) 1201 continue; 1202 cp = addr_data; 1203 cp2 = ifa->ifa_addr->sa_data; 1204 cp3 = ifa->ifa_netmask->sa_data; 1205 cplim = ifa->ifa_netmask->sa_len + 1206 (char *)ifa->ifa_netmask; 1207 while (cp3 < cplim) 1208 if ((*cp++ ^ *cp2++) & *cp3++) 1209 goto next; /* next address! */ 1210 /* 1211 * If the netmask of what we just found 1212 * is more specific than what we had before 1213 * (if we had one) then remember the new one 1214 * before continuing to search 1215 * for an even better one. 1216 */ 1217 if (ifa_maybe == NULL || 1218 rn_refines((char *)ifa->ifa_netmask, 1219 (char *)ifa_maybe->ifa_netmask)) 1220 ifa_maybe = ifa; 1221 } 1222 } 1223 } 1224 return (ifa_maybe); 1225 } 1226 1227 /* 1228 * Find an interface address specific to an interface best matching 1229 * a given address. 1230 */ 1231 struct ifaddr * 1232 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp) 1233 { 1234 struct ifaddr_container *ifac; 1235 char *cp, *cp2, *cp3; 1236 char *cplim; 1237 struct ifaddr *ifa_maybe = NULL; 1238 u_int af = addr->sa_family; 1239 1240 if (af >= AF_MAX) 1241 return (0); 1242 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1243 struct ifaddr *ifa = ifac->ifa; 1244 1245 if (ifa->ifa_addr->sa_family != af) 1246 continue; 1247 if (ifa_maybe == NULL) 1248 ifa_maybe = ifa; 1249 if (ifa->ifa_netmask == NULL) { 1250 if (sa_equal(addr, ifa->ifa_addr) || 1251 (ifa->ifa_dstaddr != NULL && 1252 sa_equal(addr, ifa->ifa_dstaddr))) 1253 return (ifa); 1254 continue; 1255 } 1256 if (ifp->if_flags & IFF_POINTOPOINT) { 1257 if (sa_equal(addr, ifa->ifa_dstaddr)) 1258 return (ifa); 1259 } else { 1260 cp = addr->sa_data; 1261 cp2 = ifa->ifa_addr->sa_data; 1262 cp3 = ifa->ifa_netmask->sa_data; 1263 cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; 1264 for (; cp3 < cplim; cp3++) 1265 if ((*cp++ ^ *cp2++) & *cp3) 1266 break; 1267 if (cp3 == cplim) 1268 return (ifa); 1269 } 1270 } 1271 return (ifa_maybe); 1272 } 1273 1274 /* 1275 * Default action when installing a route with a Link Level gateway. 1276 * Lookup an appropriate real ifa to point to. 1277 * This should be moved to /sys/net/link.c eventually. 1278 */ 1279 static void 1280 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info) 1281 { 1282 struct ifaddr *ifa; 1283 struct sockaddr *dst; 1284 struct ifnet *ifp; 1285 1286 if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL || 1287 (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL) 1288 return; 1289 ifa = ifaof_ifpforaddr(dst, ifp); 1290 if (ifa != NULL) { 1291 IFAFREE(rt->rt_ifa); 1292 IFAREF(ifa); 1293 rt->rt_ifa = ifa; 1294 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) 1295 ifa->ifa_rtrequest(cmd, rt, info); 1296 } 1297 } 1298 1299 /* 1300 * Mark an interface down and notify protocols of 1301 * the transition. 1302 * NOTE: must be called at splnet or eqivalent. 1303 */ 1304 void 1305 if_unroute(struct ifnet *ifp, int flag, int fam) 1306 { 1307 struct ifaddr_container *ifac; 1308 1309 ifp->if_flags &= ~flag; 1310 getmicrotime(&ifp->if_lastchange); 1311 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1312 struct ifaddr *ifa = ifac->ifa; 1313 1314 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) 1315 kpfctlinput(PRC_IFDOWN, ifa->ifa_addr); 1316 } 1317 ifq_purge_all(&ifp->if_snd); 1318 rt_ifmsg(ifp); 1319 } 1320 1321 /* 1322 * Mark an interface up and notify protocols of 1323 * the transition. 1324 * NOTE: must be called at splnet or eqivalent. 1325 */ 1326 void 1327 if_route(struct ifnet *ifp, int flag, int fam) 1328 { 1329 struct ifaddr_container *ifac; 1330 1331 ifq_purge_all(&ifp->if_snd); 1332 ifp->if_flags |= flag; 1333 getmicrotime(&ifp->if_lastchange); 1334 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1335 struct ifaddr *ifa = ifac->ifa; 1336 1337 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) 1338 kpfctlinput(PRC_IFUP, ifa->ifa_addr); 1339 } 1340 rt_ifmsg(ifp); 1341 #ifdef INET6 1342 in6_if_up(ifp); 1343 #endif 1344 } 1345 1346 /* 1347 * Mark an interface down and notify protocols of the transition. An 1348 * interface going down is also considered to be a synchronizing event. 1349 * We must ensure that all packet processing related to the interface 1350 * has completed before we return so e.g. the caller can free the ifnet 1351 * structure that the mbufs may be referencing. 1352 * 1353 * NOTE: must be called at splnet or eqivalent. 1354 */ 1355 void 1356 if_down(struct ifnet *ifp) 1357 { 1358 if_unroute(ifp, IFF_UP, AF_UNSPEC); 1359 netmsg_service_sync(); 1360 } 1361 1362 /* 1363 * Mark an interface up and notify protocols of 1364 * the transition. 1365 * NOTE: must be called at splnet or eqivalent. 1366 */ 1367 void 1368 if_up(struct ifnet *ifp) 1369 { 1370 if_route(ifp, IFF_UP, AF_UNSPEC); 1371 } 1372 1373 /* 1374 * Process a link state change. 1375 * NOTE: must be called at splsoftnet or equivalent. 1376 */ 1377 void 1378 if_link_state_change(struct ifnet *ifp) 1379 { 1380 int link_state = ifp->if_link_state; 1381 1382 rt_ifmsg(ifp); 1383 devctl_notify("IFNET", ifp->if_xname, 1384 (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); 1385 } 1386 1387 /* 1388 * Handle interface watchdog timer routines. Called 1389 * from softclock, we decrement timers (if set) and 1390 * call the appropriate interface routine on expiration. 1391 */ 1392 static void 1393 if_slowtimo(void *arg) 1394 { 1395 struct ifnet *ifp; 1396 1397 crit_enter(); 1398 1399 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1400 if (ifp->if_timer == 0 || --ifp->if_timer) 1401 continue; 1402 if (ifp->if_watchdog) { 1403 if (ifnet_tryserialize_all(ifp)) { 1404 (*ifp->if_watchdog)(ifp); 1405 ifnet_deserialize_all(ifp); 1406 } else { 1407 /* try again next timeout */ 1408 ++ifp->if_timer; 1409 } 1410 } 1411 } 1412 1413 crit_exit(); 1414 1415 callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL); 1416 } 1417 1418 /* 1419 * Map interface name to 1420 * interface structure pointer. 1421 */ 1422 struct ifnet * 1423 ifunit(const char *name) 1424 { 1425 struct ifnet *ifp; 1426 1427 /* 1428 * Search all the interfaces for this name/number 1429 */ 1430 1431 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1432 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) 1433 break; 1434 } 1435 return (ifp); 1436 } 1437 1438 1439 /* 1440 * Map interface name in a sockaddr_dl to 1441 * interface structure pointer. 1442 */ 1443 struct ifnet * 1444 if_withname(struct sockaddr *sa) 1445 { 1446 char ifname[IFNAMSIZ+1]; 1447 struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa; 1448 1449 if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) || 1450 (sdl->sdl_nlen > IFNAMSIZ) ) 1451 return NULL; 1452 1453 /* 1454 * ifunit wants a null-terminated name. It may not be null-terminated 1455 * in the sockaddr. We don't want to change the caller's sockaddr, 1456 * and there might not be room to put the trailing null anyway, so we 1457 * make a local copy that we know we can null terminate safely. 1458 */ 1459 1460 bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen); 1461 ifname[sdl->sdl_nlen] = '\0'; 1462 return ifunit(ifname); 1463 } 1464 1465 1466 /* 1467 * Interface ioctls. 1468 */ 1469 int 1470 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred) 1471 { 1472 struct ifnet *ifp; 1473 struct ifreq *ifr; 1474 struct ifstat *ifs; 1475 int error; 1476 short oif_flags; 1477 int new_flags; 1478 #ifdef COMPAT_43 1479 int ocmd; 1480 #endif 1481 size_t namelen, onamelen; 1482 char new_name[IFNAMSIZ]; 1483 struct ifaddr *ifa; 1484 struct sockaddr_dl *sdl; 1485 1486 switch (cmd) { 1487 case SIOCGIFCONF: 1488 case OSIOCGIFCONF: 1489 return (ifconf(cmd, data, cred)); 1490 default: 1491 break; 1492 } 1493 1494 ifr = (struct ifreq *)data; 1495 1496 switch (cmd) { 1497 case SIOCIFCREATE: 1498 case SIOCIFCREATE2: 1499 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0) 1500 return (error); 1501 return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name), 1502 cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL)); 1503 case SIOCIFDESTROY: 1504 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0) 1505 return (error); 1506 return (if_clone_destroy(ifr->ifr_name)); 1507 case SIOCIFGCLONERS: 1508 return (if_clone_list((struct if_clonereq *)data)); 1509 default: 1510 break; 1511 } 1512 1513 /* 1514 * Nominal ioctl through interface, lookup the ifp and obtain a 1515 * lock to serialize the ifconfig ioctl operation. 1516 */ 1517 ifp = ifunit(ifr->ifr_name); 1518 if (ifp == NULL) 1519 return (ENXIO); 1520 error = 0; 1521 mtx_lock(&ifp->if_ioctl_mtx); 1522 1523 switch (cmd) { 1524 case SIOCGIFINDEX: 1525 ifr->ifr_index = ifp->if_index; 1526 break; 1527 1528 case SIOCGIFFLAGS: 1529 ifr->ifr_flags = ifp->if_flags; 1530 ifr->ifr_flagshigh = ifp->if_flags >> 16; 1531 break; 1532 1533 case SIOCGIFCAP: 1534 ifr->ifr_reqcap = ifp->if_capabilities; 1535 ifr->ifr_curcap = ifp->if_capenable; 1536 break; 1537 1538 case SIOCGIFMETRIC: 1539 ifr->ifr_metric = ifp->if_metric; 1540 break; 1541 1542 case SIOCGIFMTU: 1543 ifr->ifr_mtu = ifp->if_mtu; 1544 break; 1545 1546 case SIOCGIFDATA: 1547 error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data, 1548 sizeof(ifp->if_data)); 1549 break; 1550 1551 case SIOCGIFPHYS: 1552 ifr->ifr_phys = ifp->if_physical; 1553 break; 1554 1555 case SIOCGIFPOLLCPU: 1556 ifr->ifr_pollcpu = -1; 1557 break; 1558 1559 case SIOCSIFPOLLCPU: 1560 break; 1561 1562 case SIOCSIFFLAGS: 1563 error = priv_check_cred(cred, PRIV_ROOT, 0); 1564 if (error) 1565 break; 1566 new_flags = (ifr->ifr_flags & 0xffff) | 1567 (ifr->ifr_flagshigh << 16); 1568 if (ifp->if_flags & IFF_SMART) { 1569 /* Smart drivers twiddle their own routes */ 1570 } else if (ifp->if_flags & IFF_UP && 1571 (new_flags & IFF_UP) == 0) { 1572 crit_enter(); 1573 if_down(ifp); 1574 crit_exit(); 1575 } else if (new_flags & IFF_UP && 1576 (ifp->if_flags & IFF_UP) == 0) { 1577 crit_enter(); 1578 if_up(ifp); 1579 crit_exit(); 1580 } 1581 1582 #ifdef IFPOLL_ENABLE 1583 if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) { 1584 if (new_flags & IFF_NPOLLING) 1585 ifpoll_register(ifp); 1586 else 1587 ifpoll_deregister(ifp); 1588 } 1589 #endif 1590 1591 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | 1592 (new_flags &~ IFF_CANTCHANGE); 1593 if (new_flags & IFF_PPROMISC) { 1594 /* Permanently promiscuous mode requested */ 1595 ifp->if_flags |= IFF_PROMISC; 1596 } else if (ifp->if_pcount == 0) { 1597 ifp->if_flags &= ~IFF_PROMISC; 1598 } 1599 if (ifp->if_ioctl) { 1600 ifnet_serialize_all(ifp); 1601 ifp->if_ioctl(ifp, cmd, data, cred); 1602 ifnet_deserialize_all(ifp); 1603 } 1604 getmicrotime(&ifp->if_lastchange); 1605 break; 1606 1607 case SIOCSIFCAP: 1608 error = priv_check_cred(cred, PRIV_ROOT, 0); 1609 if (error) 1610 break; 1611 if (ifr->ifr_reqcap & ~ifp->if_capabilities) { 1612 error = EINVAL; 1613 break; 1614 } 1615 ifnet_serialize_all(ifp); 1616 ifp->if_ioctl(ifp, cmd, data, cred); 1617 ifnet_deserialize_all(ifp); 1618 break; 1619 1620 case SIOCSIFNAME: 1621 error = priv_check_cred(cred, PRIV_ROOT, 0); 1622 if (error) 1623 break; 1624 error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL); 1625 if (error) 1626 break; 1627 if (new_name[0] == '\0') { 1628 error = EINVAL; 1629 break; 1630 } 1631 if (ifunit(new_name) != NULL) { 1632 error = EEXIST; 1633 break; 1634 } 1635 1636 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 1637 1638 /* Announce the departure of the interface. */ 1639 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 1640 1641 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); 1642 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 1643 /* XXX IFA_LOCK(ifa); */ 1644 sdl = (struct sockaddr_dl *)ifa->ifa_addr; 1645 namelen = strlen(new_name); 1646 onamelen = sdl->sdl_nlen; 1647 /* 1648 * Move the address if needed. This is safe because we 1649 * allocate space for a name of length IFNAMSIZ when we 1650 * create this in if_attach(). 1651 */ 1652 if (namelen != onamelen) { 1653 bcopy(sdl->sdl_data + onamelen, 1654 sdl->sdl_data + namelen, sdl->sdl_alen); 1655 } 1656 bcopy(new_name, sdl->sdl_data, namelen); 1657 sdl->sdl_nlen = namelen; 1658 sdl = (struct sockaddr_dl *)ifa->ifa_netmask; 1659 bzero(sdl->sdl_data, onamelen); 1660 while (namelen != 0) 1661 sdl->sdl_data[--namelen] = 0xff; 1662 /* XXX IFA_UNLOCK(ifa) */ 1663 1664 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 1665 1666 /* Announce the return of the interface. */ 1667 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 1668 break; 1669 1670 case SIOCSIFMETRIC: 1671 error = priv_check_cred(cred, PRIV_ROOT, 0); 1672 if (error) 1673 break; 1674 ifp->if_metric = ifr->ifr_metric; 1675 getmicrotime(&ifp->if_lastchange); 1676 break; 1677 1678 case SIOCSIFPHYS: 1679 error = priv_check_cred(cred, PRIV_ROOT, 0); 1680 if (error) 1681 break; 1682 if (ifp->if_ioctl == NULL) { 1683 error = EOPNOTSUPP; 1684 break; 1685 } 1686 ifnet_serialize_all(ifp); 1687 error = ifp->if_ioctl(ifp, cmd, data, cred); 1688 ifnet_deserialize_all(ifp); 1689 if (error == 0) 1690 getmicrotime(&ifp->if_lastchange); 1691 break; 1692 1693 case SIOCSIFMTU: 1694 { 1695 u_long oldmtu = ifp->if_mtu; 1696 1697 error = priv_check_cred(cred, PRIV_ROOT, 0); 1698 if (error) 1699 break; 1700 if (ifp->if_ioctl == NULL) { 1701 error = EOPNOTSUPP; 1702 break; 1703 } 1704 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) { 1705 error = EINVAL; 1706 break; 1707 } 1708 ifnet_serialize_all(ifp); 1709 error = ifp->if_ioctl(ifp, cmd, data, cred); 1710 ifnet_deserialize_all(ifp); 1711 if (error == 0) { 1712 getmicrotime(&ifp->if_lastchange); 1713 rt_ifmsg(ifp); 1714 } 1715 /* 1716 * If the link MTU changed, do network layer specific procedure. 1717 */ 1718 if (ifp->if_mtu != oldmtu) { 1719 #ifdef INET6 1720 nd6_setmtu(ifp); 1721 #endif 1722 } 1723 break; 1724 } 1725 1726 case SIOCADDMULTI: 1727 case SIOCDELMULTI: 1728 error = priv_check_cred(cred, PRIV_ROOT, 0); 1729 if (error) 1730 break; 1731 1732 /* Don't allow group membership on non-multicast interfaces. */ 1733 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 1734 error = EOPNOTSUPP; 1735 break; 1736 } 1737 1738 /* Don't let users screw up protocols' entries. */ 1739 if (ifr->ifr_addr.sa_family != AF_LINK) { 1740 error = EINVAL; 1741 break; 1742 } 1743 1744 if (cmd == SIOCADDMULTI) { 1745 struct ifmultiaddr *ifma; 1746 error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); 1747 } else { 1748 error = if_delmulti(ifp, &ifr->ifr_addr); 1749 } 1750 if (error == 0) 1751 getmicrotime(&ifp->if_lastchange); 1752 break; 1753 1754 case SIOCSIFPHYADDR: 1755 case SIOCDIFPHYADDR: 1756 #ifdef INET6 1757 case SIOCSIFPHYADDR_IN6: 1758 #endif 1759 case SIOCSLIFPHYADDR: 1760 case SIOCSIFMEDIA: 1761 case SIOCSIFGENERIC: 1762 error = priv_check_cred(cred, PRIV_ROOT, 0); 1763 if (error) 1764 break; 1765 if (ifp->if_ioctl == 0) { 1766 error = EOPNOTSUPP; 1767 break; 1768 } 1769 ifnet_serialize_all(ifp); 1770 error = ifp->if_ioctl(ifp, cmd, data, cred); 1771 ifnet_deserialize_all(ifp); 1772 if (error == 0) 1773 getmicrotime(&ifp->if_lastchange); 1774 break; 1775 1776 case SIOCGIFSTATUS: 1777 ifs = (struct ifstat *)data; 1778 ifs->ascii[0] = '\0'; 1779 /* fall through */ 1780 case SIOCGIFPSRCADDR: 1781 case SIOCGIFPDSTADDR: 1782 case SIOCGLIFPHYADDR: 1783 case SIOCGIFMEDIA: 1784 case SIOCGIFGENERIC: 1785 if (ifp->if_ioctl == NULL) { 1786 error = EOPNOTSUPP; 1787 break; 1788 } 1789 ifnet_serialize_all(ifp); 1790 error = ifp->if_ioctl(ifp, cmd, data, cred); 1791 ifnet_deserialize_all(ifp); 1792 break; 1793 1794 case SIOCSIFLLADDR: 1795 error = priv_check_cred(cred, PRIV_ROOT, 0); 1796 if (error) 1797 break; 1798 error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, 1799 ifr->ifr_addr.sa_len); 1800 EVENTHANDLER_INVOKE(iflladdr_event, ifp); 1801 break; 1802 1803 default: 1804 oif_flags = ifp->if_flags; 1805 if (so->so_proto == 0) { 1806 error = EOPNOTSUPP; 1807 break; 1808 } 1809 #ifndef COMPAT_43 1810 error = so_pru_control_direct(so, cmd, data, ifp); 1811 #else 1812 ocmd = cmd; 1813 1814 switch (cmd) { 1815 case SIOCSIFDSTADDR: 1816 case SIOCSIFADDR: 1817 case SIOCSIFBRDADDR: 1818 case SIOCSIFNETMASK: 1819 #if BYTE_ORDER != BIG_ENDIAN 1820 if (ifr->ifr_addr.sa_family == 0 && 1821 ifr->ifr_addr.sa_len < 16) { 1822 ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len; 1823 ifr->ifr_addr.sa_len = 16; 1824 } 1825 #else 1826 if (ifr->ifr_addr.sa_len == 0) 1827 ifr->ifr_addr.sa_len = 16; 1828 #endif 1829 break; 1830 case OSIOCGIFADDR: 1831 cmd = SIOCGIFADDR; 1832 break; 1833 case OSIOCGIFDSTADDR: 1834 cmd = SIOCGIFDSTADDR; 1835 break; 1836 case OSIOCGIFBRDADDR: 1837 cmd = SIOCGIFBRDADDR; 1838 break; 1839 case OSIOCGIFNETMASK: 1840 cmd = SIOCGIFNETMASK; 1841 break; 1842 default: 1843 break; 1844 } 1845 1846 error = so_pru_control_direct(so, cmd, data, ifp); 1847 1848 switch (ocmd) { 1849 case OSIOCGIFADDR: 1850 case OSIOCGIFDSTADDR: 1851 case OSIOCGIFBRDADDR: 1852 case OSIOCGIFNETMASK: 1853 *(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family; 1854 break; 1855 } 1856 #endif /* COMPAT_43 */ 1857 1858 if ((oif_flags ^ ifp->if_flags) & IFF_UP) { 1859 #ifdef INET6 1860 DELAY(100);/* XXX: temporary workaround for fxp issue*/ 1861 if (ifp->if_flags & IFF_UP) { 1862 crit_enter(); 1863 in6_if_up(ifp); 1864 crit_exit(); 1865 } 1866 #endif 1867 } 1868 break; 1869 } 1870 1871 mtx_unlock(&ifp->if_ioctl_mtx); 1872 return (error); 1873 } 1874 1875 /* 1876 * Set/clear promiscuous mode on interface ifp based on the truth value 1877 * of pswitch. The calls are reference counted so that only the first 1878 * "on" request actually has an effect, as does the final "off" request. 1879 * Results are undefined if the "off" and "on" requests are not matched. 1880 */ 1881 int 1882 ifpromisc(struct ifnet *ifp, int pswitch) 1883 { 1884 struct ifreq ifr; 1885 int error; 1886 int oldflags; 1887 1888 oldflags = ifp->if_flags; 1889 if (ifp->if_flags & IFF_PPROMISC) { 1890 /* Do nothing if device is in permanently promiscuous mode */ 1891 ifp->if_pcount += pswitch ? 1 : -1; 1892 return (0); 1893 } 1894 if (pswitch) { 1895 /* 1896 * If the device is not configured up, we cannot put it in 1897 * promiscuous mode. 1898 */ 1899 if ((ifp->if_flags & IFF_UP) == 0) 1900 return (ENETDOWN); 1901 if (ifp->if_pcount++ != 0) 1902 return (0); 1903 ifp->if_flags |= IFF_PROMISC; 1904 log(LOG_INFO, "%s: promiscuous mode enabled\n", 1905 ifp->if_xname); 1906 } else { 1907 if (--ifp->if_pcount > 0) 1908 return (0); 1909 ifp->if_flags &= ~IFF_PROMISC; 1910 log(LOG_INFO, "%s: promiscuous mode disabled\n", 1911 ifp->if_xname); 1912 } 1913 ifr.ifr_flags = ifp->if_flags; 1914 ifr.ifr_flagshigh = ifp->if_flags >> 16; 1915 ifnet_serialize_all(ifp); 1916 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL); 1917 ifnet_deserialize_all(ifp); 1918 if (error == 0) 1919 rt_ifmsg(ifp); 1920 else 1921 ifp->if_flags = oldflags; 1922 return error; 1923 } 1924 1925 /* 1926 * Return interface configuration 1927 * of system. List may be used 1928 * in later ioctl's (above) to get 1929 * other information. 1930 */ 1931 static int 1932 ifconf(u_long cmd, caddr_t data, struct ucred *cred) 1933 { 1934 struct ifconf *ifc = (struct ifconf *)data; 1935 struct ifnet *ifp; 1936 struct sockaddr *sa; 1937 struct ifreq ifr, *ifrp; 1938 int space = ifc->ifc_len, error = 0; 1939 1940 ifrp = ifc->ifc_req; 1941 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1942 struct ifaddr_container *ifac; 1943 int addrs; 1944 1945 if (space <= sizeof ifr) 1946 break; 1947 1948 /* 1949 * Zero the stack declared structure first to prevent 1950 * memory disclosure. 1951 */ 1952 bzero(&ifr, sizeof(ifr)); 1953 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) 1954 >= sizeof(ifr.ifr_name)) { 1955 error = ENAMETOOLONG; 1956 break; 1957 } 1958 1959 addrs = 0; 1960 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1961 struct ifaddr *ifa = ifac->ifa; 1962 1963 if (space <= sizeof ifr) 1964 break; 1965 sa = ifa->ifa_addr; 1966 if (cred->cr_prison && 1967 prison_if(cred, sa)) 1968 continue; 1969 addrs++; 1970 #ifdef COMPAT_43 1971 if (cmd == OSIOCGIFCONF) { 1972 struct osockaddr *osa = 1973 (struct osockaddr *)&ifr.ifr_addr; 1974 ifr.ifr_addr = *sa; 1975 osa->sa_family = sa->sa_family; 1976 error = copyout(&ifr, ifrp, sizeof ifr); 1977 ifrp++; 1978 } else 1979 #endif 1980 if (sa->sa_len <= sizeof(*sa)) { 1981 ifr.ifr_addr = *sa; 1982 error = copyout(&ifr, ifrp, sizeof ifr); 1983 ifrp++; 1984 } else { 1985 if (space < (sizeof ifr) + sa->sa_len - 1986 sizeof(*sa)) 1987 break; 1988 space -= sa->sa_len - sizeof(*sa); 1989 error = copyout(&ifr, ifrp, 1990 sizeof ifr.ifr_name); 1991 if (error == 0) 1992 error = copyout(sa, &ifrp->ifr_addr, 1993 sa->sa_len); 1994 ifrp = (struct ifreq *) 1995 (sa->sa_len + (caddr_t)&ifrp->ifr_addr); 1996 } 1997 if (error) 1998 break; 1999 space -= sizeof ifr; 2000 } 2001 if (error) 2002 break; 2003 if (!addrs) { 2004 bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr); 2005 error = copyout(&ifr, ifrp, sizeof ifr); 2006 if (error) 2007 break; 2008 space -= sizeof ifr; 2009 ifrp++; 2010 } 2011 } 2012 ifc->ifc_len -= space; 2013 return (error); 2014 } 2015 2016 /* 2017 * Just like if_promisc(), but for all-multicast-reception mode. 2018 */ 2019 int 2020 if_allmulti(struct ifnet *ifp, int onswitch) 2021 { 2022 int error = 0; 2023 struct ifreq ifr; 2024 2025 crit_enter(); 2026 2027 if (onswitch) { 2028 if (ifp->if_amcount++ == 0) { 2029 ifp->if_flags |= IFF_ALLMULTI; 2030 ifr.ifr_flags = ifp->if_flags; 2031 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2032 ifnet_serialize_all(ifp); 2033 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2034 NULL); 2035 ifnet_deserialize_all(ifp); 2036 } 2037 } else { 2038 if (ifp->if_amcount > 1) { 2039 ifp->if_amcount--; 2040 } else { 2041 ifp->if_amcount = 0; 2042 ifp->if_flags &= ~IFF_ALLMULTI; 2043 ifr.ifr_flags = ifp->if_flags; 2044 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2045 ifnet_serialize_all(ifp); 2046 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2047 NULL); 2048 ifnet_deserialize_all(ifp); 2049 } 2050 } 2051 2052 crit_exit(); 2053 2054 if (error == 0) 2055 rt_ifmsg(ifp); 2056 return error; 2057 } 2058 2059 /* 2060 * Add a multicast listenership to the interface in question. 2061 * The link layer provides a routine which converts 2062 */ 2063 int 2064 if_addmulti( 2065 struct ifnet *ifp, /* interface to manipulate */ 2066 struct sockaddr *sa, /* address to add */ 2067 struct ifmultiaddr **retifma) 2068 { 2069 struct sockaddr *llsa, *dupsa; 2070 int error; 2071 struct ifmultiaddr *ifma; 2072 2073 /* 2074 * If the matching multicast address already exists 2075 * then don't add a new one, just add a reference 2076 */ 2077 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2078 if (sa_equal(sa, ifma->ifma_addr)) { 2079 ifma->ifma_refcount++; 2080 if (retifma) 2081 *retifma = ifma; 2082 return 0; 2083 } 2084 } 2085 2086 /* 2087 * Give the link layer a chance to accept/reject it, and also 2088 * find out which AF_LINK address this maps to, if it isn't one 2089 * already. 2090 */ 2091 if (ifp->if_resolvemulti) { 2092 ifnet_serialize_all(ifp); 2093 error = ifp->if_resolvemulti(ifp, &llsa, sa); 2094 ifnet_deserialize_all(ifp); 2095 if (error) 2096 return error; 2097 } else { 2098 llsa = NULL; 2099 } 2100 2101 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK); 2102 dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_WAITOK); 2103 bcopy(sa, dupsa, sa->sa_len); 2104 2105 ifma->ifma_addr = dupsa; 2106 ifma->ifma_lladdr = llsa; 2107 ifma->ifma_ifp = ifp; 2108 ifma->ifma_refcount = 1; 2109 ifma->ifma_protospec = 0; 2110 rt_newmaddrmsg(RTM_NEWMADDR, ifma); 2111 2112 /* 2113 * Some network interfaces can scan the address list at 2114 * interrupt time; lock them out. 2115 */ 2116 crit_enter(); 2117 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2118 crit_exit(); 2119 if (retifma) 2120 *retifma = ifma; 2121 2122 if (llsa != NULL) { 2123 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2124 if (sa_equal(ifma->ifma_addr, llsa)) 2125 break; 2126 } 2127 if (ifma) { 2128 ifma->ifma_refcount++; 2129 } else { 2130 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK); 2131 dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_WAITOK); 2132 bcopy(llsa, dupsa, llsa->sa_len); 2133 ifma->ifma_addr = dupsa; 2134 ifma->ifma_ifp = ifp; 2135 ifma->ifma_refcount = 1; 2136 crit_enter(); 2137 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2138 crit_exit(); 2139 } 2140 } 2141 /* 2142 * We are certain we have added something, so call down to the 2143 * interface to let them know about it. 2144 */ 2145 crit_enter(); 2146 ifnet_serialize_all(ifp); 2147 if (ifp->if_ioctl) 2148 ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL); 2149 ifnet_deserialize_all(ifp); 2150 crit_exit(); 2151 2152 return 0; 2153 } 2154 2155 /* 2156 * Remove a reference to a multicast address on this interface. Yell 2157 * if the request does not match an existing membership. 2158 */ 2159 int 2160 if_delmulti(struct ifnet *ifp, struct sockaddr *sa) 2161 { 2162 struct ifmultiaddr *ifma; 2163 2164 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2165 if (sa_equal(sa, ifma->ifma_addr)) 2166 break; 2167 if (ifma == NULL) 2168 return ENOENT; 2169 2170 if (ifma->ifma_refcount > 1) { 2171 ifma->ifma_refcount--; 2172 return 0; 2173 } 2174 2175 rt_newmaddrmsg(RTM_DELMADDR, ifma); 2176 sa = ifma->ifma_lladdr; 2177 crit_enter(); 2178 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2179 /* 2180 * Make sure the interface driver is notified 2181 * in the case of a link layer mcast group being left. 2182 */ 2183 if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) { 2184 ifnet_serialize_all(ifp); 2185 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2186 ifnet_deserialize_all(ifp); 2187 } 2188 crit_exit(); 2189 kfree(ifma->ifma_addr, M_IFMADDR); 2190 kfree(ifma, M_IFMADDR); 2191 if (sa == NULL) 2192 return 0; 2193 2194 /* 2195 * Now look for the link-layer address which corresponds to 2196 * this network address. It had been squirreled away in 2197 * ifma->ifma_lladdr for this purpose (so we don't have 2198 * to call ifp->if_resolvemulti() again), and we saved that 2199 * value in sa above. If some nasty deleted the 2200 * link-layer address out from underneath us, we can deal because 2201 * the address we stored was is not the same as the one which was 2202 * in the record for the link-layer address. (So we don't complain 2203 * in that case.) 2204 */ 2205 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2206 if (sa_equal(sa, ifma->ifma_addr)) 2207 break; 2208 if (ifma == NULL) 2209 return 0; 2210 2211 if (ifma->ifma_refcount > 1) { 2212 ifma->ifma_refcount--; 2213 return 0; 2214 } 2215 2216 crit_enter(); 2217 ifnet_serialize_all(ifp); 2218 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2219 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2220 ifnet_deserialize_all(ifp); 2221 crit_exit(); 2222 kfree(ifma->ifma_addr, M_IFMADDR); 2223 kfree(sa, M_IFMADDR); 2224 kfree(ifma, M_IFMADDR); 2225 2226 return 0; 2227 } 2228 2229 /* 2230 * Delete all multicast group membership for an interface. 2231 * Should be used to quickly flush all multicast filters. 2232 */ 2233 void 2234 if_delallmulti(struct ifnet *ifp) 2235 { 2236 struct ifmultiaddr *ifma; 2237 struct ifmultiaddr *next; 2238 2239 TAILQ_FOREACH_MUTABLE(ifma, &ifp->if_multiaddrs, ifma_link, next) 2240 if_delmulti(ifp, ifma->ifma_addr); 2241 } 2242 2243 2244 /* 2245 * Set the link layer address on an interface. 2246 * 2247 * At this time we only support certain types of interfaces, 2248 * and we don't allow the length of the address to change. 2249 */ 2250 int 2251 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) 2252 { 2253 struct sockaddr_dl *sdl; 2254 struct ifreq ifr; 2255 2256 sdl = IF_LLSOCKADDR(ifp); 2257 if (sdl == NULL) 2258 return (EINVAL); 2259 if (len != sdl->sdl_alen) /* don't allow length to change */ 2260 return (EINVAL); 2261 switch (ifp->if_type) { 2262 case IFT_ETHER: /* these types use struct arpcom */ 2263 case IFT_XETHER: 2264 case IFT_L2VLAN: 2265 bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len); 2266 bcopy(lladdr, LLADDR(sdl), len); 2267 break; 2268 default: 2269 return (ENODEV); 2270 } 2271 /* 2272 * If the interface is already up, we need 2273 * to re-init it in order to reprogram its 2274 * address filter. 2275 */ 2276 ifnet_serialize_all(ifp); 2277 if ((ifp->if_flags & IFF_UP) != 0) { 2278 #ifdef INET 2279 struct ifaddr_container *ifac; 2280 #endif 2281 2282 ifp->if_flags &= ~IFF_UP; 2283 ifr.ifr_flags = ifp->if_flags; 2284 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2285 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2286 NULL); 2287 ifp->if_flags |= IFF_UP; 2288 ifr.ifr_flags = ifp->if_flags; 2289 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2290 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2291 NULL); 2292 #ifdef INET 2293 /* 2294 * Also send gratuitous ARPs to notify other nodes about 2295 * the address change. 2296 */ 2297 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 2298 struct ifaddr *ifa = ifac->ifa; 2299 2300 if (ifa->ifa_addr != NULL && 2301 ifa->ifa_addr->sa_family == AF_INET) 2302 arp_gratuitous(ifp, ifa); 2303 } 2304 #endif 2305 } 2306 ifnet_deserialize_all(ifp); 2307 return (0); 2308 } 2309 2310 struct ifmultiaddr * 2311 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp) 2312 { 2313 struct ifmultiaddr *ifma; 2314 2315 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2316 if (sa_equal(ifma->ifma_addr, sa)) 2317 break; 2318 2319 return ifma; 2320 } 2321 2322 /* 2323 * This function locates the first real ethernet MAC from a network 2324 * card and loads it into node, returning 0 on success or ENOENT if 2325 * no suitable interfaces were found. It is used by the uuid code to 2326 * generate a unique 6-byte number. 2327 */ 2328 int 2329 if_getanyethermac(uint16_t *node, int minlen) 2330 { 2331 struct ifnet *ifp; 2332 struct sockaddr_dl *sdl; 2333 2334 TAILQ_FOREACH(ifp, &ifnet, if_link) { 2335 if (ifp->if_type != IFT_ETHER) 2336 continue; 2337 sdl = IF_LLSOCKADDR(ifp); 2338 if (sdl->sdl_alen < minlen) 2339 continue; 2340 bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node, 2341 minlen); 2342 return(0); 2343 } 2344 return (ENOENT); 2345 } 2346 2347 /* 2348 * The name argument must be a pointer to storage which will last as 2349 * long as the interface does. For physical devices, the result of 2350 * device_get_name(dev) is a good choice and for pseudo-devices a 2351 * static string works well. 2352 */ 2353 void 2354 if_initname(struct ifnet *ifp, const char *name, int unit) 2355 { 2356 ifp->if_dname = name; 2357 ifp->if_dunit = unit; 2358 if (unit != IF_DUNIT_NONE) 2359 ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); 2360 else 2361 strlcpy(ifp->if_xname, name, IFNAMSIZ); 2362 } 2363 2364 int 2365 if_printf(struct ifnet *ifp, const char *fmt, ...) 2366 { 2367 __va_list ap; 2368 int retval; 2369 2370 retval = kprintf("%s: ", ifp->if_xname); 2371 __va_start(ap, fmt); 2372 retval += kvprintf(fmt, ap); 2373 __va_end(ap); 2374 return (retval); 2375 } 2376 2377 struct ifnet * 2378 if_alloc(uint8_t type) 2379 { 2380 struct ifnet *ifp; 2381 size_t size; 2382 2383 /* 2384 * XXX temporary hack until arpcom is setup in if_l2com 2385 */ 2386 if (type == IFT_ETHER) 2387 size = sizeof(struct arpcom); 2388 else 2389 size = sizeof(struct ifnet); 2390 2391 ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO); 2392 2393 ifp->if_type = type; 2394 2395 if (if_com_alloc[type] != NULL) { 2396 ifp->if_l2com = if_com_alloc[type](type, ifp); 2397 if (ifp->if_l2com == NULL) { 2398 kfree(ifp, M_IFNET); 2399 return (NULL); 2400 } 2401 } 2402 return (ifp); 2403 } 2404 2405 void 2406 if_free(struct ifnet *ifp) 2407 { 2408 kfree(ifp, M_IFNET); 2409 } 2410 2411 void 2412 ifq_set_classic(struct ifaltq *ifq) 2413 { 2414 ifq->altq_enqueue = ifq_classic_enqueue; 2415 ifq->altq_dequeue = ifq_classic_dequeue; 2416 ifq->altq_request = ifq_classic_request; 2417 } 2418 2419 int 2420 ifq_classic_enqueue(struct ifaltq *ifq, struct mbuf *m, 2421 struct altq_pktattr *pa __unused) 2422 { 2423 logifq(enqueue, ifq); 2424 if (IF_QFULL(ifq)) { 2425 m_freem(m); 2426 return(ENOBUFS); 2427 } else { 2428 IF_ENQUEUE(ifq, m); 2429 return(0); 2430 } 2431 } 2432 2433 struct mbuf * 2434 ifq_classic_dequeue(struct ifaltq *ifq, struct mbuf *mpolled, int op) 2435 { 2436 struct mbuf *m; 2437 2438 switch (op) { 2439 case ALTDQ_POLL: 2440 IF_POLL(ifq, m); 2441 break; 2442 case ALTDQ_REMOVE: 2443 logifq(dequeue, ifq); 2444 IF_DEQUEUE(ifq, m); 2445 break; 2446 default: 2447 panic("unsupported ALTQ dequeue op: %d", op); 2448 } 2449 KKASSERT(mpolled == NULL || mpolled == m); 2450 return(m); 2451 } 2452 2453 int 2454 ifq_classic_request(struct ifaltq *ifq, int req, void *arg) 2455 { 2456 switch (req) { 2457 case ALTRQ_PURGE: 2458 IF_DRAIN(ifq); 2459 break; 2460 default: 2461 panic("unsupported ALTQ request: %d", req); 2462 } 2463 return(0); 2464 } 2465 2466 static void 2467 ifq_try_ifstart(struct ifaltq *ifq, int force_sched) 2468 { 2469 struct ifnet *ifp = ifq->altq_ifp; 2470 int running = 0, need_sched; 2471 2472 /* 2473 * Try to do direct ifnet.if_start first, if there is 2474 * contention on ifnet's serializer, ifnet.if_start will 2475 * be scheduled on ifnet's CPU. 2476 */ 2477 if (!ifnet_tryserialize_tx(ifp)) { 2478 /* 2479 * ifnet serializer contention happened, 2480 * ifnet.if_start is scheduled on ifnet's 2481 * CPU, and we keep going. 2482 */ 2483 logifstart(contend_sched, ifp); 2484 if_start_schedule(ifp, 1); 2485 return; 2486 } 2487 2488 if ((ifp->if_flags & IFF_RUNNING) && !ifq_is_oactive(ifq)) { 2489 logifstart(run, ifp); 2490 ifp->if_start(ifp); 2491 if ((ifp->if_flags & IFF_RUNNING) && !ifq_is_oactive(ifq)) 2492 running = 1; 2493 } 2494 need_sched = if_start_need_schedule(ifq, running); 2495 2496 ifnet_deserialize_tx(ifp); 2497 2498 if (need_sched) { 2499 /* 2500 * More data need to be transmitted, ifnet.if_start is 2501 * scheduled on ifnet's CPU, and we keep going. 2502 * NOTE: ifnet.if_start interlock is not released. 2503 */ 2504 logifstart(sched, ifp); 2505 if_start_schedule(ifp, force_sched); 2506 } 2507 } 2508 2509 /* 2510 * IFQ packets staging mechanism: 2511 * 2512 * The packets enqueued into IFQ are staged to a certain amount before the 2513 * ifnet's if_start is called. In this way, the driver could avoid writing 2514 * to hardware registers upon every packet, instead, hardware registers 2515 * could be written when certain amount of packets are put onto hardware 2516 * TX ring. The measurement on several modern NICs (emx(4), igb(4), bnx(4), 2517 * bge(4), jme(4)) shows that the hardware registers writing aggregation 2518 * could save ~20% CPU time when 18bytes UDP datagrams are transmitted at 2519 * 1.48Mpps. The performance improvement by hardware registers writing 2520 * aggeregation is also mentioned by Luigi Rizzo's netmap paper 2521 * (http://info.iet.unipi.it/~luigi/netmap/). 2522 * 2523 * IFQ packets staging is performed for two entry points into drivers's 2524 * transmission function: 2525 * - Direct ifnet's if_start calling, i.e. ifq_try_ifstart() 2526 * - ifnet's if_start scheduling, i.e. if_start_schedule() 2527 * 2528 * IFQ packets staging will be stopped upon any of the following conditions: 2529 * - If the count of packets enqueued on the current CPU is great than or 2530 * equal to ifq_stage_cntmax. (XXX this should be per-interface) 2531 * - If the total length of packets enqueued on the current CPU is great 2532 * than or equal to the hardware's MTU - max_protohdr. max_protohdr is 2533 * cut from the hardware's MTU mainly bacause a full TCP segment's size 2534 * is usually less than hardware's MTU. 2535 * - if_start_schedule() is not pending on the current CPU and if_start 2536 * interlock (if_snd.altq_started) is not released. 2537 * - The if_start_rollup(), which is registered as low priority netisr 2538 * rollup function, is called; probably because no more work is pending 2539 * for netisr. 2540 * 2541 * NOTE: 2542 * Currently IFQ packet staging is only performed in netisr threads. 2543 */ 2544 int 2545 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa) 2546 { 2547 struct ifaltq *ifq = &ifp->if_snd; 2548 int error, start = 0, len, mcast = 0, avoid_start = 0; 2549 struct ifaltq_stage_head *head = NULL; 2550 struct ifaltq_stage *stage = NULL; 2551 2552 ASSERT_IFNET_NOT_SERIALIZED_TX(ifp); 2553 2554 len = m->m_pkthdr.len; 2555 if (m->m_flags & M_MCAST) 2556 mcast = 1; 2557 2558 if (curthread->td_type == TD_TYPE_NETISR) { 2559 head = &ifq_stage_heads[mycpuid]; 2560 stage = &ifq->altq_stage[mycpuid]; 2561 2562 stage->ifqs_cnt++; 2563 stage->ifqs_len += len; 2564 if (stage->ifqs_cnt < ifq_stage_cntmax && 2565 stage->ifqs_len < (ifp->if_mtu - max_protohdr)) 2566 avoid_start = 1; 2567 } 2568 2569 ALTQ_LOCK(ifq); 2570 error = ifq_enqueue_locked(ifq, m, pa); 2571 if (error) { 2572 if (!ifq_data_ready(ifq)) { 2573 ALTQ_UNLOCK(ifq); 2574 return error; 2575 } 2576 avoid_start = 0; 2577 } 2578 if (!ifq->altq_started) { 2579 if (avoid_start) { 2580 ALTQ_UNLOCK(ifq); 2581 2582 KKASSERT(!error); 2583 if ((stage->ifqs_flags & IFQ_STAGE_FLAG_QUED) == 0) 2584 ifq_stage_insert(head, stage); 2585 2586 ifp->if_obytes += len; 2587 if (mcast) 2588 ifp->if_omcasts++; 2589 return error; 2590 } 2591 2592 /* 2593 * Hold the interlock of ifnet.if_start 2594 */ 2595 ifq->altq_started = 1; 2596 start = 1; 2597 } 2598 ALTQ_UNLOCK(ifq); 2599 2600 if (!error) { 2601 ifp->if_obytes += len; 2602 if (mcast) 2603 ifp->if_omcasts++; 2604 } 2605 2606 if (stage != NULL) { 2607 if (!start && (stage->ifqs_flags & IFQ_STAGE_FLAG_SCHED)) { 2608 KKASSERT(stage->ifqs_flags & IFQ_STAGE_FLAG_QUED); 2609 if (!avoid_start) { 2610 ifq_stage_remove(head, stage); 2611 if_start_schedule(ifp, 1); 2612 } 2613 return error; 2614 } 2615 2616 if (stage->ifqs_flags & IFQ_STAGE_FLAG_QUED) { 2617 ifq_stage_remove(head, stage); 2618 } else { 2619 stage->ifqs_cnt = 0; 2620 stage->ifqs_len = 0; 2621 } 2622 } 2623 2624 if (!start) { 2625 logifstart(avoid, ifp); 2626 return error; 2627 } 2628 2629 ifq_try_ifstart(ifq, 0); 2630 return error; 2631 } 2632 2633 void * 2634 ifa_create(int size, int flags) 2635 { 2636 struct ifaddr *ifa; 2637 int i; 2638 2639 KASSERT(size >= sizeof(*ifa), ("ifaddr size too small")); 2640 2641 ifa = kmalloc(size, M_IFADDR, flags | M_ZERO); 2642 if (ifa == NULL) 2643 return NULL; 2644 2645 ifa->ifa_containers = kmalloc(ncpus * sizeof(struct ifaddr_container), 2646 M_IFADDR, M_WAITOK | M_ZERO); 2647 ifa->ifa_ncnt = ncpus; 2648 for (i = 0; i < ncpus; ++i) { 2649 struct ifaddr_container *ifac = &ifa->ifa_containers[i]; 2650 2651 ifac->ifa_magic = IFA_CONTAINER_MAGIC; 2652 ifac->ifa = ifa; 2653 ifac->ifa_refcnt = 1; 2654 } 2655 #ifdef IFADDR_DEBUG 2656 kprintf("alloc ifa %p %d\n", ifa, size); 2657 #endif 2658 return ifa; 2659 } 2660 2661 void 2662 ifac_free(struct ifaddr_container *ifac, int cpu_id) 2663 { 2664 struct ifaddr *ifa = ifac->ifa; 2665 2666 KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC); 2667 KKASSERT(ifac->ifa_refcnt == 0); 2668 KASSERT(ifac->ifa_listmask == 0, 2669 ("ifa is still on %#x lists", ifac->ifa_listmask)); 2670 2671 ifac->ifa_magic = IFA_CONTAINER_DEAD; 2672 2673 #ifdef IFADDR_DEBUG_VERBOSE 2674 kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id); 2675 #endif 2676 2677 KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus, 2678 ("invalid # of ifac, %d", ifa->ifa_ncnt)); 2679 if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) { 2680 #ifdef IFADDR_DEBUG 2681 kprintf("free ifa %p\n", ifa); 2682 #endif 2683 kfree(ifa->ifa_containers, M_IFADDR); 2684 kfree(ifa, M_IFADDR); 2685 } 2686 } 2687 2688 static void 2689 ifa_iflink_dispatch(netmsg_t nmsg) 2690 { 2691 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 2692 struct ifaddr *ifa = msg->ifa; 2693 struct ifnet *ifp = msg->ifp; 2694 int cpu = mycpuid; 2695 struct ifaddr_container *ifac; 2696 2697 crit_enter(); 2698 2699 ifac = &ifa->ifa_containers[cpu]; 2700 ASSERT_IFAC_VALID(ifac); 2701 KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0, 2702 ("ifaddr is on if_addrheads")); 2703 2704 ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD; 2705 if (msg->tail) 2706 TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link); 2707 else 2708 TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link); 2709 2710 crit_exit(); 2711 2712 ifa_forwardmsg(&nmsg->lmsg, cpu + 1); 2713 } 2714 2715 void 2716 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail) 2717 { 2718 struct netmsg_ifaddr msg; 2719 2720 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 2721 0, ifa_iflink_dispatch); 2722 msg.ifa = ifa; 2723 msg.ifp = ifp; 2724 msg.tail = tail; 2725 2726 ifa_domsg(&msg.base.lmsg, 0); 2727 } 2728 2729 static void 2730 ifa_ifunlink_dispatch(netmsg_t nmsg) 2731 { 2732 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 2733 struct ifaddr *ifa = msg->ifa; 2734 struct ifnet *ifp = msg->ifp; 2735 int cpu = mycpuid; 2736 struct ifaddr_container *ifac; 2737 2738 crit_enter(); 2739 2740 ifac = &ifa->ifa_containers[cpu]; 2741 ASSERT_IFAC_VALID(ifac); 2742 KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD, 2743 ("ifaddr is not on if_addrhead")); 2744 2745 TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link); 2746 ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD; 2747 2748 crit_exit(); 2749 2750 ifa_forwardmsg(&nmsg->lmsg, cpu + 1); 2751 } 2752 2753 void 2754 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp) 2755 { 2756 struct netmsg_ifaddr msg; 2757 2758 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 2759 0, ifa_ifunlink_dispatch); 2760 msg.ifa = ifa; 2761 msg.ifp = ifp; 2762 2763 ifa_domsg(&msg.base.lmsg, 0); 2764 } 2765 2766 static void 2767 ifa_destroy_dispatch(netmsg_t nmsg) 2768 { 2769 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 2770 2771 IFAFREE(msg->ifa); 2772 ifa_forwardmsg(&nmsg->lmsg, mycpuid + 1); 2773 } 2774 2775 void 2776 ifa_destroy(struct ifaddr *ifa) 2777 { 2778 struct netmsg_ifaddr msg; 2779 2780 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 2781 0, ifa_destroy_dispatch); 2782 msg.ifa = ifa; 2783 2784 ifa_domsg(&msg.base.lmsg, 0); 2785 } 2786 2787 struct lwkt_port * 2788 ifnet_portfn(int cpu) 2789 { 2790 return &ifnet_threads[cpu].td_msgport; 2791 } 2792 2793 void 2794 ifnet_forwardmsg(struct lwkt_msg *lmsg, int next_cpu) 2795 { 2796 KKASSERT(next_cpu > mycpuid && next_cpu <= ncpus); 2797 2798 if (next_cpu < ncpus) 2799 lwkt_forwardmsg(ifnet_portfn(next_cpu), lmsg); 2800 else 2801 lwkt_replymsg(lmsg, 0); 2802 } 2803 2804 int 2805 ifnet_domsg(struct lwkt_msg *lmsg, int cpu) 2806 { 2807 KKASSERT(cpu < ncpus); 2808 return lwkt_domsg(ifnet_portfn(cpu), lmsg, 0); 2809 } 2810 2811 void 2812 ifnet_sendmsg(struct lwkt_msg *lmsg, int cpu) 2813 { 2814 KKASSERT(cpu < ncpus); 2815 lwkt_sendmsg(ifnet_portfn(cpu), lmsg); 2816 } 2817 2818 /* 2819 * Generic netmsg service loop. Some protocols may roll their own but all 2820 * must do the basic command dispatch function call done here. 2821 */ 2822 static void 2823 ifnet_service_loop(void *arg __unused) 2824 { 2825 netmsg_t msg; 2826 2827 while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) { 2828 KASSERT(msg->base.nm_dispatch, ("ifnet_service: badmsg")); 2829 msg->base.nm_dispatch(msg); 2830 } 2831 } 2832 2833 static void 2834 if_start_rollup(void) 2835 { 2836 struct ifaltq_stage_head *head = &ifq_stage_heads[mycpuid]; 2837 struct ifaltq_stage *stage; 2838 2839 while ((stage = TAILQ_FIRST(&head->ifqs_head)) != NULL) { 2840 struct ifaltq *ifq = stage->ifqs_altq; 2841 int is_sched = 0; 2842 2843 if (stage->ifqs_flags & IFQ_STAGE_FLAG_SCHED) 2844 is_sched = 1; 2845 ifq_stage_remove(head, stage); 2846 2847 if (is_sched) { 2848 if_start_schedule(ifq->altq_ifp, 1); 2849 } else { 2850 int start = 0; 2851 2852 ALTQ_LOCK(ifq); 2853 if (!ifq->altq_started) { 2854 /* 2855 * Hold the interlock of ifnet.if_start 2856 */ 2857 ifq->altq_started = 1; 2858 start = 1; 2859 } 2860 ALTQ_UNLOCK(ifq); 2861 2862 if (start) 2863 ifq_try_ifstart(ifq, 1); 2864 } 2865 KKASSERT((stage->ifqs_flags & 2866 (IFQ_STAGE_FLAG_QUED | IFQ_STAGE_FLAG_SCHED)) == 0); 2867 } 2868 } 2869 2870 static void 2871 ifnetinit(void *dummy __unused) 2872 { 2873 int i; 2874 2875 for (i = 0; i < ncpus; ++i) { 2876 struct thread *thr = &ifnet_threads[i]; 2877 2878 lwkt_create(ifnet_service_loop, NULL, NULL, 2879 thr, TDF_NOSTART|TDF_FORCE_SPINPORT, 2880 i, "ifnet %d", i); 2881 netmsg_service_port_init(&thr->td_msgport); 2882 lwkt_schedule(thr); 2883 } 2884 2885 for (i = 0; i < ncpus; ++i) 2886 TAILQ_INIT(&ifq_stage_heads[i].ifqs_head); 2887 netisr_register_rollup(if_start_rollup, NETISR_ROLLUP_PRIO_IFSTART); 2888 } 2889 2890 struct ifnet * 2891 ifnet_byindex(unsigned short idx) 2892 { 2893 if (idx > if_index) 2894 return NULL; 2895 return ifindex2ifnet[idx]; 2896 } 2897 2898 struct ifaddr * 2899 ifaddr_byindex(unsigned short idx) 2900 { 2901 struct ifnet *ifp; 2902 2903 ifp = ifnet_byindex(idx); 2904 if (!ifp) 2905 return NULL; 2906 return TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 2907 } 2908 2909 void 2910 if_register_com_alloc(u_char type, 2911 if_com_alloc_t *a, if_com_free_t *f) 2912 { 2913 2914 KASSERT(if_com_alloc[type] == NULL, 2915 ("if_register_com_alloc: %d already registered", type)); 2916 KASSERT(if_com_free[type] == NULL, 2917 ("if_register_com_alloc: %d free already registered", type)); 2918 2919 if_com_alloc[type] = a; 2920 if_com_free[type] = f; 2921 } 2922 2923 void 2924 if_deregister_com_alloc(u_char type) 2925 { 2926 2927 KASSERT(if_com_alloc[type] != NULL, 2928 ("if_deregister_com_alloc: %d not registered", type)); 2929 KASSERT(if_com_free[type] != NULL, 2930 ("if_deregister_com_alloc: %d free not registered", type)); 2931 if_com_alloc[type] = NULL; 2932 if_com_free[type] = NULL; 2933 } 2934 2935 int 2936 if_ring_count2(int cnt, int cnt_max) 2937 { 2938 int shift = 0; 2939 2940 KASSERT(cnt_max >= 1 && powerof2(cnt_max), 2941 ("invalid ring count max %d", cnt_max)); 2942 2943 if (cnt <= 0) 2944 cnt = cnt_max; 2945 if (cnt > ncpus2) 2946 cnt = ncpus2; 2947 if (cnt > cnt_max) 2948 cnt = cnt_max; 2949 2950 while ((1 << (shift + 1)) <= cnt) 2951 ++shift; 2952 cnt = 1 << shift; 2953 2954 KASSERT(cnt >= 1 && cnt <= ncpus2 && cnt <= cnt_max, 2955 ("calculate cnt %d, ncpus2 %d, cnt max %d", 2956 cnt, ncpus2, cnt_max)); 2957 return cnt; 2958 } 2959 2960 void 2961 ifq_set_maxlen(struct ifaltq *ifq, int len) 2962 { 2963 ifq->ifq_maxlen = len + (ncpus * ifq_stage_cntmax); 2964 } 2965