1 /* 2 * Copyright (c) 1980, 1986, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)if.c 8.3 (Berkeley) 1/4/94 34 * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $ 35 */ 36 37 #include "opt_compat.h" 38 #include "opt_inet6.h" 39 #include "opt_inet.h" 40 #include "opt_ifpoll.h" 41 42 #include <sys/param.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/systm.h> 46 #include <sys/proc.h> 47 #include <sys/priv.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/socketops.h> 52 #include <sys/protosw.h> 53 #include <sys/kernel.h> 54 #include <sys/ktr.h> 55 #include <sys/mutex.h> 56 #include <sys/sockio.h> 57 #include <sys/syslog.h> 58 #include <sys/sysctl.h> 59 #include <sys/domain.h> 60 #include <sys/thread.h> 61 #include <sys/serialize.h> 62 #include <sys/bus.h> 63 64 #include <sys/thread2.h> 65 #include <sys/msgport2.h> 66 #include <sys/mutex2.h> 67 68 #include <net/if.h> 69 #include <net/if_arp.h> 70 #include <net/if_dl.h> 71 #include <net/if_types.h> 72 #include <net/if_var.h> 73 #include <net/ifq_var.h> 74 #include <net/radix.h> 75 #include <net/route.h> 76 #include <net/if_clone.h> 77 #include <net/netisr.h> 78 #include <net/netmsg2.h> 79 80 #include <machine/atomic.h> 81 #include <machine/stdarg.h> 82 #include <machine/smp.h> 83 84 #if defined(INET) || defined(INET6) 85 /*XXX*/ 86 #include <netinet/in.h> 87 #include <netinet/in_var.h> 88 #include <netinet/if_ether.h> 89 #ifdef INET6 90 #include <netinet6/in6_var.h> 91 #include <netinet6/in6_ifattach.h> 92 #endif 93 #endif 94 95 #if defined(COMPAT_43) 96 #include <emulation/43bsd/43bsd_socket.h> 97 #endif /* COMPAT_43 */ 98 99 struct netmsg_ifaddr { 100 struct netmsg_base base; 101 struct ifaddr *ifa; 102 struct ifnet *ifp; 103 int tail; 104 }; 105 106 struct ifsubq_stage_head { 107 TAILQ_HEAD(, ifsubq_stage) stg_head; 108 } __cachealign; 109 110 /* 111 * System initialization 112 */ 113 static void if_attachdomain(void *); 114 static void if_attachdomain1(struct ifnet *); 115 static int ifconf(u_long, caddr_t, struct ucred *); 116 static void ifinit(void *); 117 static void ifnetinit(void *); 118 static void if_slowtimo(void *); 119 static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *); 120 static int if_rtdel(struct radix_node *, void *); 121 122 #ifdef INET6 123 /* 124 * XXX: declare here to avoid to include many inet6 related files.. 125 * should be more generalized? 126 */ 127 extern void nd6_setmtu(struct ifnet *); 128 #endif 129 130 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); 131 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); 132 133 static int ifsq_stage_cntmax = 4; 134 TUNABLE_INT("net.link.stage_cntmax", &ifsq_stage_cntmax); 135 SYSCTL_INT(_net_link, OID_AUTO, stage_cntmax, CTLFLAG_RW, 136 &ifsq_stage_cntmax, 0, "ifq staging packet count max"); 137 138 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL) 139 /* Must be after netisr_init */ 140 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_SECOND, ifnetinit, NULL) 141 142 static if_com_alloc_t *if_com_alloc[256]; 143 static if_com_free_t *if_com_free[256]; 144 145 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); 146 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); 147 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure"); 148 149 int ifqmaxlen = IFQ_MAXLEN; 150 struct ifnethead ifnet = TAILQ_HEAD_INITIALIZER(ifnet); 151 152 struct callout if_slowtimo_timer; 153 154 int if_index = 0; 155 struct ifnet **ifindex2ifnet = NULL; 156 static struct thread ifnet_threads[MAXCPU]; 157 158 static struct ifsubq_stage_head ifsubq_stage_heads[MAXCPU]; 159 160 #ifdef notyet 161 #define IFQ_KTR_STRING "ifq=%p" 162 #define IFQ_KTR_ARGS struct ifaltq *ifq 163 #ifndef KTR_IFQ 164 #define KTR_IFQ KTR_ALL 165 #endif 166 KTR_INFO_MASTER(ifq); 167 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS); 168 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS); 169 #define logifq(name, arg) KTR_LOG(ifq_ ## name, arg) 170 171 #define IF_START_KTR_STRING "ifp=%p" 172 #define IF_START_KTR_ARGS struct ifnet *ifp 173 #ifndef KTR_IF_START 174 #define KTR_IF_START KTR_ALL 175 #endif 176 KTR_INFO_MASTER(if_start); 177 KTR_INFO(KTR_IF_START, if_start, run, 0, 178 IF_START_KTR_STRING, IF_START_KTR_ARGS); 179 KTR_INFO(KTR_IF_START, if_start, sched, 1, 180 IF_START_KTR_STRING, IF_START_KTR_ARGS); 181 KTR_INFO(KTR_IF_START, if_start, avoid, 2, 182 IF_START_KTR_STRING, IF_START_KTR_ARGS); 183 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3, 184 IF_START_KTR_STRING, IF_START_KTR_ARGS); 185 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4, 186 IF_START_KTR_STRING, IF_START_KTR_ARGS); 187 #define logifstart(name, arg) KTR_LOG(if_start_ ## name, arg) 188 #endif 189 190 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head); 191 192 /* 193 * Network interface utility routines. 194 * 195 * Routines with ifa_ifwith* names take sockaddr *'s as 196 * parameters. 197 */ 198 /* ARGSUSED*/ 199 void 200 ifinit(void *dummy) 201 { 202 struct ifnet *ifp; 203 204 callout_init(&if_slowtimo_timer); 205 206 crit_enter(); 207 TAILQ_FOREACH(ifp, &ifnet, if_link) { 208 if (ifp->if_snd.altq_maxlen == 0) { 209 if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n"); 210 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen); 211 } 212 } 213 crit_exit(); 214 215 if_slowtimo(0); 216 } 217 218 static void 219 ifsq_ifstart_ipifunc(void *arg) 220 { 221 struct ifaltq_subque *ifsq = arg; 222 struct lwkt_msg *lmsg = ifsq_get_ifstart_lmsg(ifsq, mycpuid); 223 224 crit_enter(); 225 if (lmsg->ms_flags & MSGF_DONE) 226 lwkt_sendmsg(netisr_portfn(mycpuid), lmsg); 227 crit_exit(); 228 } 229 230 static __inline void 231 ifsq_stage_remove(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) 232 { 233 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); 234 TAILQ_REMOVE(&head->stg_head, stage, stg_link); 235 stage->stg_flags &= ~(IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED); 236 stage->stg_cnt = 0; 237 stage->stg_len = 0; 238 } 239 240 static __inline void 241 ifsq_stage_insert(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) 242 { 243 KKASSERT((stage->stg_flags & 244 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); 245 stage->stg_flags |= IFSQ_STAGE_FLAG_QUED; 246 TAILQ_INSERT_TAIL(&head->stg_head, stage, stg_link); 247 } 248 249 /* 250 * Schedule ifnet.if_start on ifnet's CPU 251 */ 252 static void 253 ifsq_ifstart_schedule(struct ifaltq_subque *ifsq, int force) 254 { 255 int cpu; 256 257 if (!force && curthread->td_type == TD_TYPE_NETISR && 258 ifsq_stage_cntmax > 0) { 259 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); 260 261 stage->stg_cnt = 0; 262 stage->stg_len = 0; 263 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) 264 ifsq_stage_insert(&ifsubq_stage_heads[mycpuid], stage); 265 stage->stg_flags |= IFSQ_STAGE_FLAG_SCHED; 266 return; 267 } 268 269 cpu = ifsq_get_cpuid(ifsq); 270 if (cpu != mycpuid) 271 lwkt_send_ipiq(globaldata_find(cpu), ifsq_ifstart_ipifunc, ifsq); 272 else 273 ifsq_ifstart_ipifunc(ifsq); 274 } 275 276 /* 277 * NOTE: 278 * This function will release ifnet.if_start interlock, 279 * if ifnet.if_start does not need to be scheduled 280 */ 281 static __inline int 282 ifsq_ifstart_need_schedule(struct ifaltq_subque *ifsq, int running) 283 { 284 if (!running || ifsq_is_empty(ifsq) 285 #ifdef ALTQ 286 || ifsq->ifsq_altq->altq_tbr != NULL 287 #endif 288 ) { 289 ALTQ_SQ_LOCK(ifsq); 290 /* 291 * ifnet.if_start interlock is released, if: 292 * 1) Hardware can not take any packets, due to 293 * o interface is marked down 294 * o hardware queue is full (ifq_is_oactive) 295 * Under the second situation, hardware interrupt 296 * or polling(4) will call/schedule ifnet.if_start 297 * when hardware queue is ready 298 * 2) There is not packet in the ifnet.if_snd. 299 * Further ifq_dispatch or ifq_handoff will call/ 300 * schedule ifnet.if_start 301 * 3) TBR is used and it does not allow further 302 * dequeueing. 303 * TBR callout will call ifnet.if_start 304 */ 305 if (!running || !ifsq_data_ready(ifsq)) { 306 ifsq_clr_started(ifsq); 307 ALTQ_SQ_UNLOCK(ifsq); 308 return 0; 309 } 310 ALTQ_SQ_UNLOCK(ifsq); 311 } 312 return 1; 313 } 314 315 static void 316 ifsq_ifstart_dispatch(netmsg_t msg) 317 { 318 struct lwkt_msg *lmsg = &msg->base.lmsg; 319 struct ifaltq_subque *ifsq = lmsg->u.ms_resultp; 320 struct ifnet *ifp = ifsq_get_ifp(ifsq); 321 int running = 0, need_sched; 322 323 crit_enter(); 324 lwkt_replymsg(lmsg, 0); /* reply ASAP */ 325 crit_exit(); 326 327 if (mycpuid != ifsq_get_cpuid(ifsq)) { 328 /* 329 * We need to chase the ifnet CPU change. 330 */ 331 ifsq_ifstart_schedule(ifsq, 1); 332 return; 333 } 334 335 ifnet_serialize_tx(ifp, ifsq); 336 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { 337 ifp->if_start(ifp, ifsq); 338 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 339 running = 1; 340 } 341 need_sched = ifsq_ifstart_need_schedule(ifsq, running); 342 ifnet_deserialize_tx(ifp, ifsq); 343 344 if (need_sched) { 345 /* 346 * More data need to be transmitted, ifnet.if_start is 347 * scheduled on ifnet's CPU, and we keep going. 348 * NOTE: ifnet.if_start interlock is not released. 349 */ 350 ifsq_ifstart_schedule(ifsq, 0); 351 } 352 } 353 354 /* Device driver ifnet.if_start helper function */ 355 void 356 ifsq_devstart(struct ifaltq_subque *ifsq) 357 { 358 struct ifnet *ifp = ifsq_get_ifp(ifsq); 359 int running = 0; 360 361 ASSERT_IFNET_SERIALIZED_TX(ifp, ifsq); 362 363 ALTQ_SQ_LOCK(ifsq); 364 if (ifsq_is_started(ifsq) || !ifsq_data_ready(ifsq)) { 365 ALTQ_SQ_UNLOCK(ifsq); 366 return; 367 } 368 ifsq_set_started(ifsq); 369 ALTQ_SQ_UNLOCK(ifsq); 370 371 ifp->if_start(ifp, ifsq); 372 373 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 374 running = 1; 375 376 if (ifsq_ifstart_need_schedule(ifsq, running)) { 377 /* 378 * More data need to be transmitted, ifnet.if_start is 379 * scheduled on ifnet's CPU, and we keep going. 380 * NOTE: ifnet.if_start interlock is not released. 381 */ 382 ifsq_ifstart_schedule(ifsq, 0); 383 } 384 } 385 386 void 387 if_devstart(struct ifnet *ifp) 388 { 389 ifsq_devstart(ifq_get_subq_default(&ifp->if_snd)); 390 } 391 392 /* Device driver ifnet.if_start schedule helper function */ 393 void 394 ifsq_devstart_sched(struct ifaltq_subque *ifsq) 395 { 396 ifsq_ifstart_schedule(ifsq, 1); 397 } 398 399 void 400 if_devstart_sched(struct ifnet *ifp) 401 { 402 ifsq_devstart_sched(ifq_get_subq_default(&ifp->if_snd)); 403 } 404 405 static void 406 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 407 { 408 lwkt_serialize_enter(ifp->if_serializer); 409 } 410 411 static void 412 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 413 { 414 lwkt_serialize_exit(ifp->if_serializer); 415 } 416 417 static int 418 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 419 { 420 return lwkt_serialize_try(ifp->if_serializer); 421 } 422 423 #ifdef INVARIANTS 424 static void 425 if_default_serialize_assert(struct ifnet *ifp, 426 enum ifnet_serialize slz __unused, 427 boolean_t serialized) 428 { 429 if (serialized) 430 ASSERT_SERIALIZED(ifp->if_serializer); 431 else 432 ASSERT_NOT_SERIALIZED(ifp->if_serializer); 433 } 434 #endif 435 436 /* 437 * Attach an interface to the list of "active" interfaces. 438 * 439 * The serializer is optional. If non-NULL access to the interface 440 * may be MPSAFE. 441 */ 442 void 443 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer) 444 { 445 unsigned socksize, ifasize; 446 int namelen, masklen; 447 struct sockaddr_dl *sdl; 448 struct ifaddr *ifa; 449 struct ifaltq *ifq; 450 int i, q; 451 452 static int if_indexlim = 8; 453 454 if (ifp->if_serialize != NULL) { 455 KASSERT(ifp->if_deserialize != NULL && 456 ifp->if_tryserialize != NULL && 457 ifp->if_serialize_assert != NULL, 458 ("serialize functions are partially setup")); 459 460 /* 461 * If the device supplies serialize functions, 462 * then clear if_serializer to catch any invalid 463 * usage of this field. 464 */ 465 KASSERT(serializer == NULL, 466 ("both serialize functions and default serializer " 467 "are supplied")); 468 ifp->if_serializer = NULL; 469 } else { 470 KASSERT(ifp->if_deserialize == NULL && 471 ifp->if_tryserialize == NULL && 472 ifp->if_serialize_assert == NULL, 473 ("serialize functions are partially setup")); 474 ifp->if_serialize = if_default_serialize; 475 ifp->if_deserialize = if_default_deserialize; 476 ifp->if_tryserialize = if_default_tryserialize; 477 #ifdef INVARIANTS 478 ifp->if_serialize_assert = if_default_serialize_assert; 479 #endif 480 481 /* 482 * The serializer can be passed in from the device, 483 * allowing the same serializer to be used for both 484 * the interrupt interlock and the device queue. 485 * If not specified, the netif structure will use an 486 * embedded serializer. 487 */ 488 if (serializer == NULL) { 489 serializer = &ifp->if_default_serializer; 490 lwkt_serialize_init(serializer); 491 } 492 ifp->if_serializer = serializer; 493 } 494 495 mtx_init(&ifp->if_ioctl_mtx); 496 mtx_lock(&ifp->if_ioctl_mtx); 497 498 TAILQ_INSERT_TAIL(&ifnet, ifp, if_link); 499 ifp->if_index = ++if_index; 500 501 /* 502 * XXX - 503 * The old code would work if the interface passed a pre-existing 504 * chain of ifaddrs to this code. We don't trust our callers to 505 * properly initialize the tailq, however, so we no longer allow 506 * this unlikely case. 507 */ 508 ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead), 509 M_IFADDR, M_WAITOK | M_ZERO); 510 for (i = 0; i < ncpus; ++i) 511 TAILQ_INIT(&ifp->if_addrheads[i]); 512 513 TAILQ_INIT(&ifp->if_prefixhead); 514 TAILQ_INIT(&ifp->if_multiaddrs); 515 TAILQ_INIT(&ifp->if_groups); 516 getmicrotime(&ifp->if_lastchange); 517 if (ifindex2ifnet == NULL || if_index >= if_indexlim) { 518 unsigned int n; 519 struct ifnet **q; 520 521 if_indexlim <<= 1; 522 523 /* grow ifindex2ifnet */ 524 n = if_indexlim * sizeof(*q); 525 q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO); 526 if (ifindex2ifnet) { 527 bcopy(ifindex2ifnet, q, n/2); 528 kfree(ifindex2ifnet, M_IFADDR); 529 } 530 ifindex2ifnet = q; 531 } 532 533 ifindex2ifnet[if_index] = ifp; 534 535 /* 536 * create a Link Level name for this device 537 */ 538 namelen = strlen(ifp->if_xname); 539 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; 540 socksize = masklen + ifp->if_addrlen; 541 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1))) 542 if (socksize < sizeof(*sdl)) 543 socksize = sizeof(*sdl); 544 socksize = ROUNDUP(socksize); 545 #undef ROUNDUP 546 ifasize = sizeof(struct ifaddr) + 2 * socksize; 547 ifa = ifa_create(ifasize, M_WAITOK); 548 sdl = (struct sockaddr_dl *)(ifa + 1); 549 sdl->sdl_len = socksize; 550 sdl->sdl_family = AF_LINK; 551 bcopy(ifp->if_xname, sdl->sdl_data, namelen); 552 sdl->sdl_nlen = namelen; 553 sdl->sdl_index = ifp->if_index; 554 sdl->sdl_type = ifp->if_type; 555 ifp->if_lladdr = ifa; 556 ifa->ifa_ifp = ifp; 557 ifa->ifa_rtrequest = link_rtrequest; 558 ifa->ifa_addr = (struct sockaddr *)sdl; 559 sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); 560 ifa->ifa_netmask = (struct sockaddr *)sdl; 561 sdl->sdl_len = masklen; 562 while (namelen != 0) 563 sdl->sdl_data[--namelen] = 0xff; 564 ifa_iflink(ifa, ifp, 0 /* Insert head */); 565 566 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 567 devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); 568 569 if (ifp->if_mapsubq == NULL) 570 ifp->if_mapsubq = ifq_mapsubq_default; 571 572 ifq = &ifp->if_snd; 573 ifq->altq_type = 0; 574 ifq->altq_disc = NULL; 575 ifq->altq_flags &= ALTQF_CANTCHANGE; 576 ifq->altq_tbr = NULL; 577 ifq->altq_ifp = ifp; 578 579 if (ifq->altq_subq_cnt <= 0) 580 ifq->altq_subq_cnt = 1; 581 ifq->altq_subq = kmalloc_cachealign( 582 ifq->altq_subq_cnt * sizeof(struct ifaltq_subque), 583 M_DEVBUF, M_WAITOK | M_ZERO); 584 585 if (ifq->altq_maxlen == 0) { 586 if_printf(ifp, "driver didn't set ifq_maxlen\n"); 587 ifq_set_maxlen(ifq, ifqmaxlen); 588 } 589 590 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 591 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 592 593 ALTQ_SQ_LOCK_INIT(ifsq); 594 ifsq->ifsq_index = q; 595 596 ifsq->ifsq_altq = ifq; 597 ifsq->ifsq_ifp = ifp; 598 599 ifsq->ifq_maxlen = ifq->altq_maxlen; 600 ifsq->ifsq_prepended = NULL; 601 ifsq->ifsq_started = 0; 602 ifsq->ifsq_hw_oactive = 0; 603 ifsq_set_cpuid(ifsq, 0); 604 605 ifsq->ifsq_stage = 606 kmalloc_cachealign(ncpus * sizeof(struct ifsubq_stage), 607 M_DEVBUF, M_WAITOK | M_ZERO); 608 for (i = 0; i < ncpus; ++i) 609 ifsq->ifsq_stage[i].stg_subq = ifsq; 610 611 ifsq->ifsq_ifstart_nmsg = 612 kmalloc(ncpus * sizeof(struct netmsg_base), 613 M_LWKTMSG, M_WAITOK); 614 for (i = 0; i < ncpus; ++i) { 615 netmsg_init(&ifsq->ifsq_ifstart_nmsg[i], NULL, 616 &netisr_adone_rport, 0, ifsq_ifstart_dispatch); 617 ifsq->ifsq_ifstart_nmsg[i].lmsg.u.ms_resultp = ifsq; 618 } 619 } 620 ifq_set_classic(ifq); 621 622 if (!SLIST_EMPTY(&domains)) 623 if_attachdomain1(ifp); 624 625 /* Announce the interface. */ 626 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 627 628 mtx_unlock(&ifp->if_ioctl_mtx); 629 } 630 631 static void 632 if_attachdomain(void *dummy) 633 { 634 struct ifnet *ifp; 635 636 crit_enter(); 637 TAILQ_FOREACH(ifp, &ifnet, if_list) 638 if_attachdomain1(ifp); 639 crit_exit(); 640 } 641 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, 642 if_attachdomain, NULL); 643 644 static void 645 if_attachdomain1(struct ifnet *ifp) 646 { 647 struct domain *dp; 648 649 crit_enter(); 650 651 /* address family dependent data region */ 652 bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); 653 SLIST_FOREACH(dp, &domains, dom_next) 654 if (dp->dom_ifattach) 655 ifp->if_afdata[dp->dom_family] = 656 (*dp->dom_ifattach)(ifp); 657 crit_exit(); 658 } 659 660 /* 661 * Purge all addresses whose type is _not_ AF_LINK 662 */ 663 void 664 if_purgeaddrs_nolink(struct ifnet *ifp) 665 { 666 struct ifaddr_container *ifac, *next; 667 668 TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid], 669 ifa_link, next) { 670 struct ifaddr *ifa = ifac->ifa; 671 672 /* Leave link ifaddr as it is */ 673 if (ifa->ifa_addr->sa_family == AF_LINK) 674 continue; 675 #ifdef INET 676 /* XXX: Ugly!! ad hoc just for INET */ 677 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) { 678 struct ifaliasreq ifr; 679 #ifdef IFADDR_DEBUG_VERBOSE 680 int i; 681 682 kprintf("purge in4 addr %p: ", ifa); 683 for (i = 0; i < ncpus; ++i) 684 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt); 685 kprintf("\n"); 686 #endif 687 688 bzero(&ifr, sizeof ifr); 689 ifr.ifra_addr = *ifa->ifa_addr; 690 if (ifa->ifa_dstaddr) 691 ifr.ifra_broadaddr = *ifa->ifa_dstaddr; 692 if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp, 693 NULL) == 0) 694 continue; 695 } 696 #endif /* INET */ 697 #ifdef INET6 698 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) { 699 #ifdef IFADDR_DEBUG_VERBOSE 700 int i; 701 702 kprintf("purge in6 addr %p: ", ifa); 703 for (i = 0; i < ncpus; ++i) 704 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt); 705 kprintf("\n"); 706 #endif 707 708 in6_purgeaddr(ifa); 709 /* ifp_addrhead is already updated */ 710 continue; 711 } 712 #endif /* INET6 */ 713 ifa_ifunlink(ifa, ifp); 714 ifa_destroy(ifa); 715 } 716 } 717 718 static void 719 ifq_stage_detach_handler(netmsg_t nmsg) 720 { 721 struct ifaltq *ifq = nmsg->lmsg.u.ms_resultp; 722 int q; 723 724 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 725 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 726 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); 727 728 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) 729 ifsq_stage_remove(&ifsubq_stage_heads[mycpuid], stage); 730 } 731 lwkt_replymsg(&nmsg->lmsg, 0); 732 } 733 734 static void 735 ifq_stage_detach(struct ifaltq *ifq) 736 { 737 struct netmsg_base base; 738 int cpu; 739 740 netmsg_init(&base, NULL, &curthread->td_msgport, 0, 741 ifq_stage_detach_handler); 742 base.lmsg.u.ms_resultp = ifq; 743 744 for (cpu = 0; cpu < ncpus; ++cpu) 745 lwkt_domsg(netisr_portfn(cpu), &base.lmsg, 0); 746 } 747 748 /* 749 * Detach an interface, removing it from the 750 * list of "active" interfaces. 751 */ 752 void 753 if_detach(struct ifnet *ifp) 754 { 755 struct radix_node_head *rnh; 756 int i, q; 757 int cpu, origcpu; 758 struct domain *dp; 759 760 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 761 762 /* 763 * Remove routes and flush queues. 764 */ 765 crit_enter(); 766 #ifdef IFPOLL_ENABLE 767 if (ifp->if_flags & IFF_NPOLLING) 768 ifpoll_deregister(ifp); 769 #endif 770 if_down(ifp); 771 772 #ifdef ALTQ 773 if (ifq_is_enabled(&ifp->if_snd)) 774 altq_disable(&ifp->if_snd); 775 if (ifq_is_attached(&ifp->if_snd)) 776 altq_detach(&ifp->if_snd); 777 #endif 778 779 /* 780 * Clean up all addresses. 781 */ 782 ifp->if_lladdr = NULL; 783 784 if_purgeaddrs_nolink(ifp); 785 if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) { 786 struct ifaddr *ifa; 787 788 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 789 KASSERT(ifa->ifa_addr->sa_family == AF_LINK, 790 ("non-link ifaddr is left on if_addrheads")); 791 792 ifa_ifunlink(ifa, ifp); 793 ifa_destroy(ifa); 794 KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]), 795 ("there are still ifaddrs left on if_addrheads")); 796 } 797 798 #ifdef INET 799 /* 800 * Remove all IPv4 kernel structures related to ifp. 801 */ 802 in_ifdetach(ifp); 803 #endif 804 805 #ifdef INET6 806 /* 807 * Remove all IPv6 kernel structs related to ifp. This should be done 808 * before removing routing entries below, since IPv6 interface direct 809 * routes are expected to be removed by the IPv6-specific kernel API. 810 * Otherwise, the kernel will detect some inconsistency and bark it. 811 */ 812 in6_ifdetach(ifp); 813 #endif 814 815 /* 816 * Delete all remaining routes using this interface 817 * Unfortuneatly the only way to do this is to slog through 818 * the entire routing table looking for routes which point 819 * to this interface...oh well... 820 */ 821 origcpu = mycpuid; 822 for (cpu = 0; cpu < ncpus; cpu++) { 823 lwkt_migratecpu(cpu); 824 for (i = 1; i <= AF_MAX; i++) { 825 if ((rnh = rt_tables[cpu][i]) == NULL) 826 continue; 827 rnh->rnh_walktree(rnh, if_rtdel, ifp); 828 } 829 } 830 lwkt_migratecpu(origcpu); 831 832 /* Announce that the interface is gone. */ 833 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 834 devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); 835 836 SLIST_FOREACH(dp, &domains, dom_next) 837 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) 838 (*dp->dom_ifdetach)(ifp, 839 ifp->if_afdata[dp->dom_family]); 840 841 /* 842 * Remove interface from ifindex2ifp[] and maybe decrement if_index. 843 */ 844 ifindex2ifnet[ifp->if_index] = NULL; 845 while (if_index > 0 && ifindex2ifnet[if_index] == NULL) 846 if_index--; 847 848 TAILQ_REMOVE(&ifnet, ifp, if_link); 849 kfree(ifp->if_addrheads, M_IFADDR); 850 851 lwkt_synchronize_ipiqs("if_detach"); 852 ifq_stage_detach(&ifp->if_snd); 853 854 for (q = 0; q < ifp->if_snd.altq_subq_cnt; ++q) { 855 struct ifaltq_subque *ifsq = &ifp->if_snd.altq_subq[q]; 856 857 kfree(ifsq->ifsq_ifstart_nmsg, M_LWKTMSG); 858 kfree(ifsq->ifsq_stage, M_DEVBUF); 859 } 860 kfree(ifp->if_snd.altq_subq, M_DEVBUF); 861 862 crit_exit(); 863 } 864 865 /* 866 * Create interface group without members 867 */ 868 struct ifg_group * 869 if_creategroup(const char *groupname) 870 { 871 struct ifg_group *ifg = NULL; 872 873 if ((ifg = (struct ifg_group *)kmalloc(sizeof(struct ifg_group), 874 M_TEMP, M_NOWAIT)) == NULL) 875 return (NULL); 876 877 strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); 878 ifg->ifg_refcnt = 0; 879 ifg->ifg_carp_demoted = 0; 880 TAILQ_INIT(&ifg->ifg_members); 881 #if NPF > 0 882 pfi_attach_ifgroup(ifg); 883 #endif 884 TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next); 885 886 return (ifg); 887 } 888 889 /* 890 * Add a group to an interface 891 */ 892 int 893 if_addgroup(struct ifnet *ifp, const char *groupname) 894 { 895 struct ifg_list *ifgl; 896 struct ifg_group *ifg = NULL; 897 struct ifg_member *ifgm; 898 899 if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' && 900 groupname[strlen(groupname) - 1] <= '9') 901 return (EINVAL); 902 903 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 904 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) 905 return (EEXIST); 906 907 if ((ifgl = kmalloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL) 908 return (ENOMEM); 909 910 if ((ifgm = kmalloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) { 911 kfree(ifgl, M_TEMP); 912 return (ENOMEM); 913 } 914 915 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) 916 if (!strcmp(ifg->ifg_group, groupname)) 917 break; 918 919 if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) { 920 kfree(ifgl, M_TEMP); 921 kfree(ifgm, M_TEMP); 922 return (ENOMEM); 923 } 924 925 ifg->ifg_refcnt++; 926 ifgl->ifgl_group = ifg; 927 ifgm->ifgm_ifp = ifp; 928 929 TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); 930 TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); 931 932 #if NPF > 0 933 pfi_group_change(groupname); 934 #endif 935 936 return (0); 937 } 938 939 /* 940 * Remove a group from an interface 941 */ 942 int 943 if_delgroup(struct ifnet *ifp, const char *groupname) 944 { 945 struct ifg_list *ifgl; 946 struct ifg_member *ifgm; 947 948 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 949 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) 950 break; 951 if (ifgl == NULL) 952 return (ENOENT); 953 954 TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); 955 956 TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) 957 if (ifgm->ifgm_ifp == ifp) 958 break; 959 960 if (ifgm != NULL) { 961 TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); 962 kfree(ifgm, M_TEMP); 963 } 964 965 if (--ifgl->ifgl_group->ifg_refcnt == 0) { 966 TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next); 967 #if NPF > 0 968 pfi_detach_ifgroup(ifgl->ifgl_group); 969 #endif 970 kfree(ifgl->ifgl_group, M_TEMP); 971 } 972 973 kfree(ifgl, M_TEMP); 974 975 #if NPF > 0 976 pfi_group_change(groupname); 977 #endif 978 979 return (0); 980 } 981 982 /* 983 * Stores all groups from an interface in memory pointed 984 * to by data 985 */ 986 int 987 if_getgroup(caddr_t data, struct ifnet *ifp) 988 { 989 int len, error; 990 struct ifg_list *ifgl; 991 struct ifg_req ifgrq, *ifgp; 992 struct ifgroupreq *ifgr = (struct ifgroupreq *)data; 993 994 if (ifgr->ifgr_len == 0) { 995 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 996 ifgr->ifgr_len += sizeof(struct ifg_req); 997 return (0); 998 } 999 1000 len = ifgr->ifgr_len; 1001 ifgp = ifgr->ifgr_groups; 1002 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1003 if (len < sizeof(ifgrq)) 1004 return (EINVAL); 1005 bzero(&ifgrq, sizeof ifgrq); 1006 strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group, 1007 sizeof(ifgrq.ifgrq_group)); 1008 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp, 1009 sizeof(struct ifg_req)))) 1010 return (error); 1011 len -= sizeof(ifgrq); 1012 ifgp++; 1013 } 1014 1015 return (0); 1016 } 1017 1018 /* 1019 * Stores all members of a group in memory pointed to by data 1020 */ 1021 int 1022 if_getgroupmembers(caddr_t data) 1023 { 1024 struct ifgroupreq *ifgr = (struct ifgroupreq *)data; 1025 struct ifg_group *ifg; 1026 struct ifg_member *ifgm; 1027 struct ifg_req ifgrq, *ifgp; 1028 int len, error; 1029 1030 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) 1031 if (!strcmp(ifg->ifg_group, ifgr->ifgr_name)) 1032 break; 1033 if (ifg == NULL) 1034 return (ENOENT); 1035 1036 if (ifgr->ifgr_len == 0) { 1037 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) 1038 ifgr->ifgr_len += sizeof(ifgrq); 1039 return (0); 1040 } 1041 1042 len = ifgr->ifgr_len; 1043 ifgp = ifgr->ifgr_groups; 1044 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { 1045 if (len < sizeof(ifgrq)) 1046 return (EINVAL); 1047 bzero(&ifgrq, sizeof ifgrq); 1048 strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname, 1049 sizeof(ifgrq.ifgrq_member)); 1050 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp, 1051 sizeof(struct ifg_req)))) 1052 return (error); 1053 len -= sizeof(ifgrq); 1054 ifgp++; 1055 } 1056 1057 return (0); 1058 } 1059 1060 /* 1061 * Delete Routes for a Network Interface 1062 * 1063 * Called for each routing entry via the rnh->rnh_walktree() call above 1064 * to delete all route entries referencing a detaching network interface. 1065 * 1066 * Arguments: 1067 * rn pointer to node in the routing table 1068 * arg argument passed to rnh->rnh_walktree() - detaching interface 1069 * 1070 * Returns: 1071 * 0 successful 1072 * errno failed - reason indicated 1073 * 1074 */ 1075 static int 1076 if_rtdel(struct radix_node *rn, void *arg) 1077 { 1078 struct rtentry *rt = (struct rtentry *)rn; 1079 struct ifnet *ifp = arg; 1080 int err; 1081 1082 if (rt->rt_ifp == ifp) { 1083 1084 /* 1085 * Protect (sorta) against walktree recursion problems 1086 * with cloned routes 1087 */ 1088 if (!(rt->rt_flags & RTF_UP)) 1089 return (0); 1090 1091 err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, 1092 rt_mask(rt), rt->rt_flags, 1093 NULL); 1094 if (err) { 1095 log(LOG_WARNING, "if_rtdel: error %d\n", err); 1096 } 1097 } 1098 1099 return (0); 1100 } 1101 1102 /* 1103 * Locate an interface based on a complete address. 1104 */ 1105 struct ifaddr * 1106 ifa_ifwithaddr(struct sockaddr *addr) 1107 { 1108 struct ifnet *ifp; 1109 1110 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1111 struct ifaddr_container *ifac; 1112 1113 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1114 struct ifaddr *ifa = ifac->ifa; 1115 1116 if (ifa->ifa_addr->sa_family != addr->sa_family) 1117 continue; 1118 if (sa_equal(addr, ifa->ifa_addr)) 1119 return (ifa); 1120 if ((ifp->if_flags & IFF_BROADCAST) && 1121 ifa->ifa_broadaddr && 1122 /* IPv6 doesn't have broadcast */ 1123 ifa->ifa_broadaddr->sa_len != 0 && 1124 sa_equal(ifa->ifa_broadaddr, addr)) 1125 return (ifa); 1126 } 1127 } 1128 return (NULL); 1129 } 1130 /* 1131 * Locate the point to point interface with a given destination address. 1132 */ 1133 struct ifaddr * 1134 ifa_ifwithdstaddr(struct sockaddr *addr) 1135 { 1136 struct ifnet *ifp; 1137 1138 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1139 struct ifaddr_container *ifac; 1140 1141 if (!(ifp->if_flags & IFF_POINTOPOINT)) 1142 continue; 1143 1144 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1145 struct ifaddr *ifa = ifac->ifa; 1146 1147 if (ifa->ifa_addr->sa_family != addr->sa_family) 1148 continue; 1149 if (ifa->ifa_dstaddr && 1150 sa_equal(addr, ifa->ifa_dstaddr)) 1151 return (ifa); 1152 } 1153 } 1154 return (NULL); 1155 } 1156 1157 /* 1158 * Find an interface on a specific network. If many, choice 1159 * is most specific found. 1160 */ 1161 struct ifaddr * 1162 ifa_ifwithnet(struct sockaddr *addr) 1163 { 1164 struct ifnet *ifp; 1165 struct ifaddr *ifa_maybe = NULL; 1166 u_int af = addr->sa_family; 1167 char *addr_data = addr->sa_data, *cplim; 1168 1169 /* 1170 * AF_LINK addresses can be looked up directly by their index number, 1171 * so do that if we can. 1172 */ 1173 if (af == AF_LINK) { 1174 struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr; 1175 1176 if (sdl->sdl_index && sdl->sdl_index <= if_index) 1177 return (ifindex2ifnet[sdl->sdl_index]->if_lladdr); 1178 } 1179 1180 /* 1181 * Scan though each interface, looking for ones that have 1182 * addresses in this address family. 1183 */ 1184 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1185 struct ifaddr_container *ifac; 1186 1187 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1188 struct ifaddr *ifa = ifac->ifa; 1189 char *cp, *cp2, *cp3; 1190 1191 if (ifa->ifa_addr->sa_family != af) 1192 next: continue; 1193 if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) { 1194 /* 1195 * This is a bit broken as it doesn't 1196 * take into account that the remote end may 1197 * be a single node in the network we are 1198 * looking for. 1199 * The trouble is that we don't know the 1200 * netmask for the remote end. 1201 */ 1202 if (ifa->ifa_dstaddr != NULL && 1203 sa_equal(addr, ifa->ifa_dstaddr)) 1204 return (ifa); 1205 } else { 1206 /* 1207 * if we have a special address handler, 1208 * then use it instead of the generic one. 1209 */ 1210 if (ifa->ifa_claim_addr) { 1211 if ((*ifa->ifa_claim_addr)(ifa, addr)) { 1212 return (ifa); 1213 } else { 1214 continue; 1215 } 1216 } 1217 1218 /* 1219 * Scan all the bits in the ifa's address. 1220 * If a bit dissagrees with what we are 1221 * looking for, mask it with the netmask 1222 * to see if it really matters. 1223 * (A byte at a time) 1224 */ 1225 if (ifa->ifa_netmask == 0) 1226 continue; 1227 cp = addr_data; 1228 cp2 = ifa->ifa_addr->sa_data; 1229 cp3 = ifa->ifa_netmask->sa_data; 1230 cplim = ifa->ifa_netmask->sa_len + 1231 (char *)ifa->ifa_netmask; 1232 while (cp3 < cplim) 1233 if ((*cp++ ^ *cp2++) & *cp3++) 1234 goto next; /* next address! */ 1235 /* 1236 * If the netmask of what we just found 1237 * is more specific than what we had before 1238 * (if we had one) then remember the new one 1239 * before continuing to search 1240 * for an even better one. 1241 */ 1242 if (ifa_maybe == NULL || 1243 rn_refines((char *)ifa->ifa_netmask, 1244 (char *)ifa_maybe->ifa_netmask)) 1245 ifa_maybe = ifa; 1246 } 1247 } 1248 } 1249 return (ifa_maybe); 1250 } 1251 1252 /* 1253 * Find an interface address specific to an interface best matching 1254 * a given address. 1255 */ 1256 struct ifaddr * 1257 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp) 1258 { 1259 struct ifaddr_container *ifac; 1260 char *cp, *cp2, *cp3; 1261 char *cplim; 1262 struct ifaddr *ifa_maybe = NULL; 1263 u_int af = addr->sa_family; 1264 1265 if (af >= AF_MAX) 1266 return (0); 1267 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1268 struct ifaddr *ifa = ifac->ifa; 1269 1270 if (ifa->ifa_addr->sa_family != af) 1271 continue; 1272 if (ifa_maybe == NULL) 1273 ifa_maybe = ifa; 1274 if (ifa->ifa_netmask == NULL) { 1275 if (sa_equal(addr, ifa->ifa_addr) || 1276 (ifa->ifa_dstaddr != NULL && 1277 sa_equal(addr, ifa->ifa_dstaddr))) 1278 return (ifa); 1279 continue; 1280 } 1281 if (ifp->if_flags & IFF_POINTOPOINT) { 1282 if (sa_equal(addr, ifa->ifa_dstaddr)) 1283 return (ifa); 1284 } else { 1285 cp = addr->sa_data; 1286 cp2 = ifa->ifa_addr->sa_data; 1287 cp3 = ifa->ifa_netmask->sa_data; 1288 cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; 1289 for (; cp3 < cplim; cp3++) 1290 if ((*cp++ ^ *cp2++) & *cp3) 1291 break; 1292 if (cp3 == cplim) 1293 return (ifa); 1294 } 1295 } 1296 return (ifa_maybe); 1297 } 1298 1299 /* 1300 * Default action when installing a route with a Link Level gateway. 1301 * Lookup an appropriate real ifa to point to. 1302 * This should be moved to /sys/net/link.c eventually. 1303 */ 1304 static void 1305 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info) 1306 { 1307 struct ifaddr *ifa; 1308 struct sockaddr *dst; 1309 struct ifnet *ifp; 1310 1311 if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL || 1312 (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL) 1313 return; 1314 ifa = ifaof_ifpforaddr(dst, ifp); 1315 if (ifa != NULL) { 1316 IFAFREE(rt->rt_ifa); 1317 IFAREF(ifa); 1318 rt->rt_ifa = ifa; 1319 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) 1320 ifa->ifa_rtrequest(cmd, rt, info); 1321 } 1322 } 1323 1324 /* 1325 * Mark an interface down and notify protocols of 1326 * the transition. 1327 * NOTE: must be called at splnet or eqivalent. 1328 */ 1329 void 1330 if_unroute(struct ifnet *ifp, int flag, int fam) 1331 { 1332 struct ifaddr_container *ifac; 1333 1334 ifp->if_flags &= ~flag; 1335 getmicrotime(&ifp->if_lastchange); 1336 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1337 struct ifaddr *ifa = ifac->ifa; 1338 1339 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) 1340 kpfctlinput(PRC_IFDOWN, ifa->ifa_addr); 1341 } 1342 ifq_purge_all(&ifp->if_snd); 1343 rt_ifmsg(ifp); 1344 } 1345 1346 /* 1347 * Mark an interface up and notify protocols of 1348 * the transition. 1349 * NOTE: must be called at splnet or eqivalent. 1350 */ 1351 void 1352 if_route(struct ifnet *ifp, int flag, int fam) 1353 { 1354 struct ifaddr_container *ifac; 1355 1356 ifq_purge_all(&ifp->if_snd); 1357 ifp->if_flags |= flag; 1358 getmicrotime(&ifp->if_lastchange); 1359 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1360 struct ifaddr *ifa = ifac->ifa; 1361 1362 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) 1363 kpfctlinput(PRC_IFUP, ifa->ifa_addr); 1364 } 1365 rt_ifmsg(ifp); 1366 #ifdef INET6 1367 in6_if_up(ifp); 1368 #endif 1369 } 1370 1371 /* 1372 * Mark an interface down and notify protocols of the transition. An 1373 * interface going down is also considered to be a synchronizing event. 1374 * We must ensure that all packet processing related to the interface 1375 * has completed before we return so e.g. the caller can free the ifnet 1376 * structure that the mbufs may be referencing. 1377 * 1378 * NOTE: must be called at splnet or eqivalent. 1379 */ 1380 void 1381 if_down(struct ifnet *ifp) 1382 { 1383 if_unroute(ifp, IFF_UP, AF_UNSPEC); 1384 netmsg_service_sync(); 1385 } 1386 1387 /* 1388 * Mark an interface up and notify protocols of 1389 * the transition. 1390 * NOTE: must be called at splnet or eqivalent. 1391 */ 1392 void 1393 if_up(struct ifnet *ifp) 1394 { 1395 if_route(ifp, IFF_UP, AF_UNSPEC); 1396 } 1397 1398 /* 1399 * Process a link state change. 1400 * NOTE: must be called at splsoftnet or equivalent. 1401 */ 1402 void 1403 if_link_state_change(struct ifnet *ifp) 1404 { 1405 int link_state = ifp->if_link_state; 1406 1407 rt_ifmsg(ifp); 1408 devctl_notify("IFNET", ifp->if_xname, 1409 (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); 1410 } 1411 1412 /* 1413 * Handle interface watchdog timer routines. Called 1414 * from softclock, we decrement timers (if set) and 1415 * call the appropriate interface routine on expiration. 1416 */ 1417 static void 1418 if_slowtimo(void *arg) 1419 { 1420 struct ifnet *ifp; 1421 1422 crit_enter(); 1423 1424 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1425 if (ifp->if_timer == 0 || --ifp->if_timer) 1426 continue; 1427 if (ifp->if_watchdog) { 1428 if (ifnet_tryserialize_all(ifp)) { 1429 (*ifp->if_watchdog)(ifp); 1430 ifnet_deserialize_all(ifp); 1431 } else { 1432 /* try again next timeout */ 1433 ++ifp->if_timer; 1434 } 1435 } 1436 } 1437 1438 crit_exit(); 1439 1440 callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL); 1441 } 1442 1443 /* 1444 * Map interface name to 1445 * interface structure pointer. 1446 */ 1447 struct ifnet * 1448 ifunit(const char *name) 1449 { 1450 struct ifnet *ifp; 1451 1452 /* 1453 * Search all the interfaces for this name/number 1454 */ 1455 1456 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1457 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) 1458 break; 1459 } 1460 return (ifp); 1461 } 1462 1463 1464 /* 1465 * Map interface name in a sockaddr_dl to 1466 * interface structure pointer. 1467 */ 1468 struct ifnet * 1469 if_withname(struct sockaddr *sa) 1470 { 1471 char ifname[IFNAMSIZ+1]; 1472 struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa; 1473 1474 if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) || 1475 (sdl->sdl_nlen > IFNAMSIZ) ) 1476 return NULL; 1477 1478 /* 1479 * ifunit wants a null-terminated name. It may not be null-terminated 1480 * in the sockaddr. We don't want to change the caller's sockaddr, 1481 * and there might not be room to put the trailing null anyway, so we 1482 * make a local copy that we know we can null terminate safely. 1483 */ 1484 1485 bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen); 1486 ifname[sdl->sdl_nlen] = '\0'; 1487 return ifunit(ifname); 1488 } 1489 1490 1491 /* 1492 * Interface ioctls. 1493 */ 1494 int 1495 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred) 1496 { 1497 struct ifnet *ifp; 1498 struct ifreq *ifr; 1499 struct ifstat *ifs; 1500 int error; 1501 short oif_flags; 1502 int new_flags; 1503 #ifdef COMPAT_43 1504 int ocmd; 1505 #endif 1506 size_t namelen, onamelen; 1507 char new_name[IFNAMSIZ]; 1508 struct ifaddr *ifa; 1509 struct sockaddr_dl *sdl; 1510 1511 switch (cmd) { 1512 case SIOCGIFCONF: 1513 case OSIOCGIFCONF: 1514 return (ifconf(cmd, data, cred)); 1515 default: 1516 break; 1517 } 1518 1519 ifr = (struct ifreq *)data; 1520 1521 switch (cmd) { 1522 case SIOCIFCREATE: 1523 case SIOCIFCREATE2: 1524 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0) 1525 return (error); 1526 return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name), 1527 cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL)); 1528 case SIOCIFDESTROY: 1529 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0) 1530 return (error); 1531 return (if_clone_destroy(ifr->ifr_name)); 1532 case SIOCIFGCLONERS: 1533 return (if_clone_list((struct if_clonereq *)data)); 1534 default: 1535 break; 1536 } 1537 1538 /* 1539 * Nominal ioctl through interface, lookup the ifp and obtain a 1540 * lock to serialize the ifconfig ioctl operation. 1541 */ 1542 ifp = ifunit(ifr->ifr_name); 1543 if (ifp == NULL) 1544 return (ENXIO); 1545 error = 0; 1546 mtx_lock(&ifp->if_ioctl_mtx); 1547 1548 switch (cmd) { 1549 case SIOCGIFINDEX: 1550 ifr->ifr_index = ifp->if_index; 1551 break; 1552 1553 case SIOCGIFFLAGS: 1554 ifr->ifr_flags = ifp->if_flags; 1555 ifr->ifr_flagshigh = ifp->if_flags >> 16; 1556 break; 1557 1558 case SIOCGIFCAP: 1559 ifr->ifr_reqcap = ifp->if_capabilities; 1560 ifr->ifr_curcap = ifp->if_capenable; 1561 break; 1562 1563 case SIOCGIFMETRIC: 1564 ifr->ifr_metric = ifp->if_metric; 1565 break; 1566 1567 case SIOCGIFMTU: 1568 ifr->ifr_mtu = ifp->if_mtu; 1569 break; 1570 1571 case SIOCGIFDATA: 1572 error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data, 1573 sizeof(ifp->if_data)); 1574 break; 1575 1576 case SIOCGIFPHYS: 1577 ifr->ifr_phys = ifp->if_physical; 1578 break; 1579 1580 case SIOCGIFPOLLCPU: 1581 ifr->ifr_pollcpu = -1; 1582 break; 1583 1584 case SIOCSIFPOLLCPU: 1585 break; 1586 1587 case SIOCSIFFLAGS: 1588 error = priv_check_cred(cred, PRIV_ROOT, 0); 1589 if (error) 1590 break; 1591 new_flags = (ifr->ifr_flags & 0xffff) | 1592 (ifr->ifr_flagshigh << 16); 1593 if (ifp->if_flags & IFF_SMART) { 1594 /* Smart drivers twiddle their own routes */ 1595 } else if (ifp->if_flags & IFF_UP && 1596 (new_flags & IFF_UP) == 0) { 1597 crit_enter(); 1598 if_down(ifp); 1599 crit_exit(); 1600 } else if (new_flags & IFF_UP && 1601 (ifp->if_flags & IFF_UP) == 0) { 1602 crit_enter(); 1603 if_up(ifp); 1604 crit_exit(); 1605 } 1606 1607 #ifdef IFPOLL_ENABLE 1608 if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) { 1609 if (new_flags & IFF_NPOLLING) 1610 ifpoll_register(ifp); 1611 else 1612 ifpoll_deregister(ifp); 1613 } 1614 #endif 1615 1616 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | 1617 (new_flags &~ IFF_CANTCHANGE); 1618 if (new_flags & IFF_PPROMISC) { 1619 /* Permanently promiscuous mode requested */ 1620 ifp->if_flags |= IFF_PROMISC; 1621 } else if (ifp->if_pcount == 0) { 1622 ifp->if_flags &= ~IFF_PROMISC; 1623 } 1624 if (ifp->if_ioctl) { 1625 ifnet_serialize_all(ifp); 1626 ifp->if_ioctl(ifp, cmd, data, cred); 1627 ifnet_deserialize_all(ifp); 1628 } 1629 getmicrotime(&ifp->if_lastchange); 1630 break; 1631 1632 case SIOCSIFCAP: 1633 error = priv_check_cred(cred, PRIV_ROOT, 0); 1634 if (error) 1635 break; 1636 if (ifr->ifr_reqcap & ~ifp->if_capabilities) { 1637 error = EINVAL; 1638 break; 1639 } 1640 ifnet_serialize_all(ifp); 1641 ifp->if_ioctl(ifp, cmd, data, cred); 1642 ifnet_deserialize_all(ifp); 1643 break; 1644 1645 case SIOCSIFNAME: 1646 error = priv_check_cred(cred, PRIV_ROOT, 0); 1647 if (error) 1648 break; 1649 error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL); 1650 if (error) 1651 break; 1652 if (new_name[0] == '\0') { 1653 error = EINVAL; 1654 break; 1655 } 1656 if (ifunit(new_name) != NULL) { 1657 error = EEXIST; 1658 break; 1659 } 1660 1661 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 1662 1663 /* Announce the departure of the interface. */ 1664 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 1665 1666 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); 1667 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 1668 /* XXX IFA_LOCK(ifa); */ 1669 sdl = (struct sockaddr_dl *)ifa->ifa_addr; 1670 namelen = strlen(new_name); 1671 onamelen = sdl->sdl_nlen; 1672 /* 1673 * Move the address if needed. This is safe because we 1674 * allocate space for a name of length IFNAMSIZ when we 1675 * create this in if_attach(). 1676 */ 1677 if (namelen != onamelen) { 1678 bcopy(sdl->sdl_data + onamelen, 1679 sdl->sdl_data + namelen, sdl->sdl_alen); 1680 } 1681 bcopy(new_name, sdl->sdl_data, namelen); 1682 sdl->sdl_nlen = namelen; 1683 sdl = (struct sockaddr_dl *)ifa->ifa_netmask; 1684 bzero(sdl->sdl_data, onamelen); 1685 while (namelen != 0) 1686 sdl->sdl_data[--namelen] = 0xff; 1687 /* XXX IFA_UNLOCK(ifa) */ 1688 1689 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 1690 1691 /* Announce the return of the interface. */ 1692 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 1693 break; 1694 1695 case SIOCSIFMETRIC: 1696 error = priv_check_cred(cred, PRIV_ROOT, 0); 1697 if (error) 1698 break; 1699 ifp->if_metric = ifr->ifr_metric; 1700 getmicrotime(&ifp->if_lastchange); 1701 break; 1702 1703 case SIOCSIFPHYS: 1704 error = priv_check_cred(cred, PRIV_ROOT, 0); 1705 if (error) 1706 break; 1707 if (ifp->if_ioctl == NULL) { 1708 error = EOPNOTSUPP; 1709 break; 1710 } 1711 ifnet_serialize_all(ifp); 1712 error = ifp->if_ioctl(ifp, cmd, data, cred); 1713 ifnet_deserialize_all(ifp); 1714 if (error == 0) 1715 getmicrotime(&ifp->if_lastchange); 1716 break; 1717 1718 case SIOCSIFMTU: 1719 { 1720 u_long oldmtu = ifp->if_mtu; 1721 1722 error = priv_check_cred(cred, PRIV_ROOT, 0); 1723 if (error) 1724 break; 1725 if (ifp->if_ioctl == NULL) { 1726 error = EOPNOTSUPP; 1727 break; 1728 } 1729 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) { 1730 error = EINVAL; 1731 break; 1732 } 1733 ifnet_serialize_all(ifp); 1734 error = ifp->if_ioctl(ifp, cmd, data, cred); 1735 ifnet_deserialize_all(ifp); 1736 if (error == 0) { 1737 getmicrotime(&ifp->if_lastchange); 1738 rt_ifmsg(ifp); 1739 } 1740 /* 1741 * If the link MTU changed, do network layer specific procedure. 1742 */ 1743 if (ifp->if_mtu != oldmtu) { 1744 #ifdef INET6 1745 nd6_setmtu(ifp); 1746 #endif 1747 } 1748 break; 1749 } 1750 1751 case SIOCADDMULTI: 1752 case SIOCDELMULTI: 1753 error = priv_check_cred(cred, PRIV_ROOT, 0); 1754 if (error) 1755 break; 1756 1757 /* Don't allow group membership on non-multicast interfaces. */ 1758 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 1759 error = EOPNOTSUPP; 1760 break; 1761 } 1762 1763 /* Don't let users screw up protocols' entries. */ 1764 if (ifr->ifr_addr.sa_family != AF_LINK) { 1765 error = EINVAL; 1766 break; 1767 } 1768 1769 if (cmd == SIOCADDMULTI) { 1770 struct ifmultiaddr *ifma; 1771 error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); 1772 } else { 1773 error = if_delmulti(ifp, &ifr->ifr_addr); 1774 } 1775 if (error == 0) 1776 getmicrotime(&ifp->if_lastchange); 1777 break; 1778 1779 case SIOCSIFPHYADDR: 1780 case SIOCDIFPHYADDR: 1781 #ifdef INET6 1782 case SIOCSIFPHYADDR_IN6: 1783 #endif 1784 case SIOCSLIFPHYADDR: 1785 case SIOCSIFMEDIA: 1786 case SIOCSIFGENERIC: 1787 error = priv_check_cred(cred, PRIV_ROOT, 0); 1788 if (error) 1789 break; 1790 if (ifp->if_ioctl == 0) { 1791 error = EOPNOTSUPP; 1792 break; 1793 } 1794 ifnet_serialize_all(ifp); 1795 error = ifp->if_ioctl(ifp, cmd, data, cred); 1796 ifnet_deserialize_all(ifp); 1797 if (error == 0) 1798 getmicrotime(&ifp->if_lastchange); 1799 break; 1800 1801 case SIOCGIFSTATUS: 1802 ifs = (struct ifstat *)data; 1803 ifs->ascii[0] = '\0'; 1804 /* fall through */ 1805 case SIOCGIFPSRCADDR: 1806 case SIOCGIFPDSTADDR: 1807 case SIOCGLIFPHYADDR: 1808 case SIOCGIFMEDIA: 1809 case SIOCGIFGENERIC: 1810 if (ifp->if_ioctl == NULL) { 1811 error = EOPNOTSUPP; 1812 break; 1813 } 1814 ifnet_serialize_all(ifp); 1815 error = ifp->if_ioctl(ifp, cmd, data, cred); 1816 ifnet_deserialize_all(ifp); 1817 break; 1818 1819 case SIOCSIFLLADDR: 1820 error = priv_check_cred(cred, PRIV_ROOT, 0); 1821 if (error) 1822 break; 1823 error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, 1824 ifr->ifr_addr.sa_len); 1825 EVENTHANDLER_INVOKE(iflladdr_event, ifp); 1826 break; 1827 1828 default: 1829 oif_flags = ifp->if_flags; 1830 if (so->so_proto == 0) { 1831 error = EOPNOTSUPP; 1832 break; 1833 } 1834 #ifndef COMPAT_43 1835 error = so_pru_control_direct(so, cmd, data, ifp); 1836 #else 1837 ocmd = cmd; 1838 1839 switch (cmd) { 1840 case SIOCSIFDSTADDR: 1841 case SIOCSIFADDR: 1842 case SIOCSIFBRDADDR: 1843 case SIOCSIFNETMASK: 1844 #if BYTE_ORDER != BIG_ENDIAN 1845 if (ifr->ifr_addr.sa_family == 0 && 1846 ifr->ifr_addr.sa_len < 16) { 1847 ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len; 1848 ifr->ifr_addr.sa_len = 16; 1849 } 1850 #else 1851 if (ifr->ifr_addr.sa_len == 0) 1852 ifr->ifr_addr.sa_len = 16; 1853 #endif 1854 break; 1855 case OSIOCGIFADDR: 1856 cmd = SIOCGIFADDR; 1857 break; 1858 case OSIOCGIFDSTADDR: 1859 cmd = SIOCGIFDSTADDR; 1860 break; 1861 case OSIOCGIFBRDADDR: 1862 cmd = SIOCGIFBRDADDR; 1863 break; 1864 case OSIOCGIFNETMASK: 1865 cmd = SIOCGIFNETMASK; 1866 break; 1867 default: 1868 break; 1869 } 1870 1871 error = so_pru_control_direct(so, cmd, data, ifp); 1872 1873 switch (ocmd) { 1874 case OSIOCGIFADDR: 1875 case OSIOCGIFDSTADDR: 1876 case OSIOCGIFBRDADDR: 1877 case OSIOCGIFNETMASK: 1878 *(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family; 1879 break; 1880 } 1881 #endif /* COMPAT_43 */ 1882 1883 if ((oif_flags ^ ifp->if_flags) & IFF_UP) { 1884 #ifdef INET6 1885 DELAY(100);/* XXX: temporary workaround for fxp issue*/ 1886 if (ifp->if_flags & IFF_UP) { 1887 crit_enter(); 1888 in6_if_up(ifp); 1889 crit_exit(); 1890 } 1891 #endif 1892 } 1893 break; 1894 } 1895 1896 mtx_unlock(&ifp->if_ioctl_mtx); 1897 return (error); 1898 } 1899 1900 /* 1901 * Set/clear promiscuous mode on interface ifp based on the truth value 1902 * of pswitch. The calls are reference counted so that only the first 1903 * "on" request actually has an effect, as does the final "off" request. 1904 * Results are undefined if the "off" and "on" requests are not matched. 1905 */ 1906 int 1907 ifpromisc(struct ifnet *ifp, int pswitch) 1908 { 1909 struct ifreq ifr; 1910 int error; 1911 int oldflags; 1912 1913 oldflags = ifp->if_flags; 1914 if (ifp->if_flags & IFF_PPROMISC) { 1915 /* Do nothing if device is in permanently promiscuous mode */ 1916 ifp->if_pcount += pswitch ? 1 : -1; 1917 return (0); 1918 } 1919 if (pswitch) { 1920 /* 1921 * If the device is not configured up, we cannot put it in 1922 * promiscuous mode. 1923 */ 1924 if ((ifp->if_flags & IFF_UP) == 0) 1925 return (ENETDOWN); 1926 if (ifp->if_pcount++ != 0) 1927 return (0); 1928 ifp->if_flags |= IFF_PROMISC; 1929 log(LOG_INFO, "%s: promiscuous mode enabled\n", 1930 ifp->if_xname); 1931 } else { 1932 if (--ifp->if_pcount > 0) 1933 return (0); 1934 ifp->if_flags &= ~IFF_PROMISC; 1935 log(LOG_INFO, "%s: promiscuous mode disabled\n", 1936 ifp->if_xname); 1937 } 1938 ifr.ifr_flags = ifp->if_flags; 1939 ifr.ifr_flagshigh = ifp->if_flags >> 16; 1940 ifnet_serialize_all(ifp); 1941 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL); 1942 ifnet_deserialize_all(ifp); 1943 if (error == 0) 1944 rt_ifmsg(ifp); 1945 else 1946 ifp->if_flags = oldflags; 1947 return error; 1948 } 1949 1950 /* 1951 * Return interface configuration 1952 * of system. List may be used 1953 * in later ioctl's (above) to get 1954 * other information. 1955 */ 1956 static int 1957 ifconf(u_long cmd, caddr_t data, struct ucred *cred) 1958 { 1959 struct ifconf *ifc = (struct ifconf *)data; 1960 struct ifnet *ifp; 1961 struct sockaddr *sa; 1962 struct ifreq ifr, *ifrp; 1963 int space = ifc->ifc_len, error = 0; 1964 1965 ifrp = ifc->ifc_req; 1966 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1967 struct ifaddr_container *ifac; 1968 int addrs; 1969 1970 if (space <= sizeof ifr) 1971 break; 1972 1973 /* 1974 * Zero the stack declared structure first to prevent 1975 * memory disclosure. 1976 */ 1977 bzero(&ifr, sizeof(ifr)); 1978 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) 1979 >= sizeof(ifr.ifr_name)) { 1980 error = ENAMETOOLONG; 1981 break; 1982 } 1983 1984 addrs = 0; 1985 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1986 struct ifaddr *ifa = ifac->ifa; 1987 1988 if (space <= sizeof ifr) 1989 break; 1990 sa = ifa->ifa_addr; 1991 if (cred->cr_prison && 1992 prison_if(cred, sa)) 1993 continue; 1994 addrs++; 1995 #ifdef COMPAT_43 1996 if (cmd == OSIOCGIFCONF) { 1997 struct osockaddr *osa = 1998 (struct osockaddr *)&ifr.ifr_addr; 1999 ifr.ifr_addr = *sa; 2000 osa->sa_family = sa->sa_family; 2001 error = copyout(&ifr, ifrp, sizeof ifr); 2002 ifrp++; 2003 } else 2004 #endif 2005 if (sa->sa_len <= sizeof(*sa)) { 2006 ifr.ifr_addr = *sa; 2007 error = copyout(&ifr, ifrp, sizeof ifr); 2008 ifrp++; 2009 } else { 2010 if (space < (sizeof ifr) + sa->sa_len - 2011 sizeof(*sa)) 2012 break; 2013 space -= sa->sa_len - sizeof(*sa); 2014 error = copyout(&ifr, ifrp, 2015 sizeof ifr.ifr_name); 2016 if (error == 0) 2017 error = copyout(sa, &ifrp->ifr_addr, 2018 sa->sa_len); 2019 ifrp = (struct ifreq *) 2020 (sa->sa_len + (caddr_t)&ifrp->ifr_addr); 2021 } 2022 if (error) 2023 break; 2024 space -= sizeof ifr; 2025 } 2026 if (error) 2027 break; 2028 if (!addrs) { 2029 bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr); 2030 error = copyout(&ifr, ifrp, sizeof ifr); 2031 if (error) 2032 break; 2033 space -= sizeof ifr; 2034 ifrp++; 2035 } 2036 } 2037 ifc->ifc_len -= space; 2038 return (error); 2039 } 2040 2041 /* 2042 * Just like if_promisc(), but for all-multicast-reception mode. 2043 */ 2044 int 2045 if_allmulti(struct ifnet *ifp, int onswitch) 2046 { 2047 int error = 0; 2048 struct ifreq ifr; 2049 2050 crit_enter(); 2051 2052 if (onswitch) { 2053 if (ifp->if_amcount++ == 0) { 2054 ifp->if_flags |= IFF_ALLMULTI; 2055 ifr.ifr_flags = ifp->if_flags; 2056 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2057 ifnet_serialize_all(ifp); 2058 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2059 NULL); 2060 ifnet_deserialize_all(ifp); 2061 } 2062 } else { 2063 if (ifp->if_amcount > 1) { 2064 ifp->if_amcount--; 2065 } else { 2066 ifp->if_amcount = 0; 2067 ifp->if_flags &= ~IFF_ALLMULTI; 2068 ifr.ifr_flags = ifp->if_flags; 2069 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2070 ifnet_serialize_all(ifp); 2071 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2072 NULL); 2073 ifnet_deserialize_all(ifp); 2074 } 2075 } 2076 2077 crit_exit(); 2078 2079 if (error == 0) 2080 rt_ifmsg(ifp); 2081 return error; 2082 } 2083 2084 /* 2085 * Add a multicast listenership to the interface in question. 2086 * The link layer provides a routine which converts 2087 */ 2088 int 2089 if_addmulti( 2090 struct ifnet *ifp, /* interface to manipulate */ 2091 struct sockaddr *sa, /* address to add */ 2092 struct ifmultiaddr **retifma) 2093 { 2094 struct sockaddr *llsa, *dupsa; 2095 int error; 2096 struct ifmultiaddr *ifma; 2097 2098 /* 2099 * If the matching multicast address already exists 2100 * then don't add a new one, just add a reference 2101 */ 2102 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2103 if (sa_equal(sa, ifma->ifma_addr)) { 2104 ifma->ifma_refcount++; 2105 if (retifma) 2106 *retifma = ifma; 2107 return 0; 2108 } 2109 } 2110 2111 /* 2112 * Give the link layer a chance to accept/reject it, and also 2113 * find out which AF_LINK address this maps to, if it isn't one 2114 * already. 2115 */ 2116 if (ifp->if_resolvemulti) { 2117 ifnet_serialize_all(ifp); 2118 error = ifp->if_resolvemulti(ifp, &llsa, sa); 2119 ifnet_deserialize_all(ifp); 2120 if (error) 2121 return error; 2122 } else { 2123 llsa = NULL; 2124 } 2125 2126 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK); 2127 dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_WAITOK); 2128 bcopy(sa, dupsa, sa->sa_len); 2129 2130 ifma->ifma_addr = dupsa; 2131 ifma->ifma_lladdr = llsa; 2132 ifma->ifma_ifp = ifp; 2133 ifma->ifma_refcount = 1; 2134 ifma->ifma_protospec = 0; 2135 rt_newmaddrmsg(RTM_NEWMADDR, ifma); 2136 2137 /* 2138 * Some network interfaces can scan the address list at 2139 * interrupt time; lock them out. 2140 */ 2141 crit_enter(); 2142 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2143 crit_exit(); 2144 if (retifma) 2145 *retifma = ifma; 2146 2147 if (llsa != NULL) { 2148 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2149 if (sa_equal(ifma->ifma_addr, llsa)) 2150 break; 2151 } 2152 if (ifma) { 2153 ifma->ifma_refcount++; 2154 } else { 2155 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK); 2156 dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_WAITOK); 2157 bcopy(llsa, dupsa, llsa->sa_len); 2158 ifma->ifma_addr = dupsa; 2159 ifma->ifma_ifp = ifp; 2160 ifma->ifma_refcount = 1; 2161 crit_enter(); 2162 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2163 crit_exit(); 2164 } 2165 } 2166 /* 2167 * We are certain we have added something, so call down to the 2168 * interface to let them know about it. 2169 */ 2170 crit_enter(); 2171 ifnet_serialize_all(ifp); 2172 if (ifp->if_ioctl) 2173 ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL); 2174 ifnet_deserialize_all(ifp); 2175 crit_exit(); 2176 2177 return 0; 2178 } 2179 2180 /* 2181 * Remove a reference to a multicast address on this interface. Yell 2182 * if the request does not match an existing membership. 2183 */ 2184 int 2185 if_delmulti(struct ifnet *ifp, struct sockaddr *sa) 2186 { 2187 struct ifmultiaddr *ifma; 2188 2189 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2190 if (sa_equal(sa, ifma->ifma_addr)) 2191 break; 2192 if (ifma == NULL) 2193 return ENOENT; 2194 2195 if (ifma->ifma_refcount > 1) { 2196 ifma->ifma_refcount--; 2197 return 0; 2198 } 2199 2200 rt_newmaddrmsg(RTM_DELMADDR, ifma); 2201 sa = ifma->ifma_lladdr; 2202 crit_enter(); 2203 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2204 /* 2205 * Make sure the interface driver is notified 2206 * in the case of a link layer mcast group being left. 2207 */ 2208 if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) { 2209 ifnet_serialize_all(ifp); 2210 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2211 ifnet_deserialize_all(ifp); 2212 } 2213 crit_exit(); 2214 kfree(ifma->ifma_addr, M_IFMADDR); 2215 kfree(ifma, M_IFMADDR); 2216 if (sa == NULL) 2217 return 0; 2218 2219 /* 2220 * Now look for the link-layer address which corresponds to 2221 * this network address. It had been squirreled away in 2222 * ifma->ifma_lladdr for this purpose (so we don't have 2223 * to call ifp->if_resolvemulti() again), and we saved that 2224 * value in sa above. If some nasty deleted the 2225 * link-layer address out from underneath us, we can deal because 2226 * the address we stored was is not the same as the one which was 2227 * in the record for the link-layer address. (So we don't complain 2228 * in that case.) 2229 */ 2230 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2231 if (sa_equal(sa, ifma->ifma_addr)) 2232 break; 2233 if (ifma == NULL) 2234 return 0; 2235 2236 if (ifma->ifma_refcount > 1) { 2237 ifma->ifma_refcount--; 2238 return 0; 2239 } 2240 2241 crit_enter(); 2242 ifnet_serialize_all(ifp); 2243 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2244 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2245 ifnet_deserialize_all(ifp); 2246 crit_exit(); 2247 kfree(ifma->ifma_addr, M_IFMADDR); 2248 kfree(sa, M_IFMADDR); 2249 kfree(ifma, M_IFMADDR); 2250 2251 return 0; 2252 } 2253 2254 /* 2255 * Delete all multicast group membership for an interface. 2256 * Should be used to quickly flush all multicast filters. 2257 */ 2258 void 2259 if_delallmulti(struct ifnet *ifp) 2260 { 2261 struct ifmultiaddr *ifma; 2262 struct ifmultiaddr *next; 2263 2264 TAILQ_FOREACH_MUTABLE(ifma, &ifp->if_multiaddrs, ifma_link, next) 2265 if_delmulti(ifp, ifma->ifma_addr); 2266 } 2267 2268 2269 /* 2270 * Set the link layer address on an interface. 2271 * 2272 * At this time we only support certain types of interfaces, 2273 * and we don't allow the length of the address to change. 2274 */ 2275 int 2276 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) 2277 { 2278 struct sockaddr_dl *sdl; 2279 struct ifreq ifr; 2280 2281 sdl = IF_LLSOCKADDR(ifp); 2282 if (sdl == NULL) 2283 return (EINVAL); 2284 if (len != sdl->sdl_alen) /* don't allow length to change */ 2285 return (EINVAL); 2286 switch (ifp->if_type) { 2287 case IFT_ETHER: /* these types use struct arpcom */ 2288 case IFT_XETHER: 2289 case IFT_L2VLAN: 2290 bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len); 2291 bcopy(lladdr, LLADDR(sdl), len); 2292 break; 2293 default: 2294 return (ENODEV); 2295 } 2296 /* 2297 * If the interface is already up, we need 2298 * to re-init it in order to reprogram its 2299 * address filter. 2300 */ 2301 ifnet_serialize_all(ifp); 2302 if ((ifp->if_flags & IFF_UP) != 0) { 2303 #ifdef INET 2304 struct ifaddr_container *ifac; 2305 #endif 2306 2307 ifp->if_flags &= ~IFF_UP; 2308 ifr.ifr_flags = ifp->if_flags; 2309 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2310 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2311 NULL); 2312 ifp->if_flags |= IFF_UP; 2313 ifr.ifr_flags = ifp->if_flags; 2314 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2315 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2316 NULL); 2317 #ifdef INET 2318 /* 2319 * Also send gratuitous ARPs to notify other nodes about 2320 * the address change. 2321 */ 2322 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 2323 struct ifaddr *ifa = ifac->ifa; 2324 2325 if (ifa->ifa_addr != NULL && 2326 ifa->ifa_addr->sa_family == AF_INET) 2327 arp_gratuitous(ifp, ifa); 2328 } 2329 #endif 2330 } 2331 ifnet_deserialize_all(ifp); 2332 return (0); 2333 } 2334 2335 struct ifmultiaddr * 2336 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp) 2337 { 2338 struct ifmultiaddr *ifma; 2339 2340 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2341 if (sa_equal(ifma->ifma_addr, sa)) 2342 break; 2343 2344 return ifma; 2345 } 2346 2347 /* 2348 * This function locates the first real ethernet MAC from a network 2349 * card and loads it into node, returning 0 on success or ENOENT if 2350 * no suitable interfaces were found. It is used by the uuid code to 2351 * generate a unique 6-byte number. 2352 */ 2353 int 2354 if_getanyethermac(uint16_t *node, int minlen) 2355 { 2356 struct ifnet *ifp; 2357 struct sockaddr_dl *sdl; 2358 2359 TAILQ_FOREACH(ifp, &ifnet, if_link) { 2360 if (ifp->if_type != IFT_ETHER) 2361 continue; 2362 sdl = IF_LLSOCKADDR(ifp); 2363 if (sdl->sdl_alen < minlen) 2364 continue; 2365 bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node, 2366 minlen); 2367 return(0); 2368 } 2369 return (ENOENT); 2370 } 2371 2372 /* 2373 * The name argument must be a pointer to storage which will last as 2374 * long as the interface does. For physical devices, the result of 2375 * device_get_name(dev) is a good choice and for pseudo-devices a 2376 * static string works well. 2377 */ 2378 void 2379 if_initname(struct ifnet *ifp, const char *name, int unit) 2380 { 2381 ifp->if_dname = name; 2382 ifp->if_dunit = unit; 2383 if (unit != IF_DUNIT_NONE) 2384 ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); 2385 else 2386 strlcpy(ifp->if_xname, name, IFNAMSIZ); 2387 } 2388 2389 int 2390 if_printf(struct ifnet *ifp, const char *fmt, ...) 2391 { 2392 __va_list ap; 2393 int retval; 2394 2395 retval = kprintf("%s: ", ifp->if_xname); 2396 __va_start(ap, fmt); 2397 retval += kvprintf(fmt, ap); 2398 __va_end(ap); 2399 return (retval); 2400 } 2401 2402 struct ifnet * 2403 if_alloc(uint8_t type) 2404 { 2405 struct ifnet *ifp; 2406 size_t size; 2407 2408 /* 2409 * XXX temporary hack until arpcom is setup in if_l2com 2410 */ 2411 if (type == IFT_ETHER) 2412 size = sizeof(struct arpcom); 2413 else 2414 size = sizeof(struct ifnet); 2415 2416 ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO); 2417 2418 ifp->if_type = type; 2419 2420 if (if_com_alloc[type] != NULL) { 2421 ifp->if_l2com = if_com_alloc[type](type, ifp); 2422 if (ifp->if_l2com == NULL) { 2423 kfree(ifp, M_IFNET); 2424 return (NULL); 2425 } 2426 } 2427 return (ifp); 2428 } 2429 2430 void 2431 if_free(struct ifnet *ifp) 2432 { 2433 kfree(ifp, M_IFNET); 2434 } 2435 2436 void 2437 ifq_set_classic(struct ifaltq *ifq) 2438 { 2439 ifq_set_methods(ifq, ifq->altq_ifp->if_mapsubq, 2440 ifsq_classic_enqueue, ifsq_classic_dequeue, ifsq_classic_request); 2441 } 2442 2443 void 2444 ifq_set_methods(struct ifaltq *ifq, altq_mapsubq_t mapsubq, 2445 ifsq_enqueue_t enqueue, ifsq_dequeue_t dequeue, ifsq_request_t request) 2446 { 2447 int q; 2448 2449 KASSERT(mapsubq != NULL, ("mapsubq is not specified")); 2450 KASSERT(enqueue != NULL, ("enqueue is not specified")); 2451 KASSERT(dequeue != NULL, ("dequeue is not specified")); 2452 KASSERT(request != NULL, ("request is not specified")); 2453 2454 ifq->altq_mapsubq = mapsubq; 2455 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 2456 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 2457 2458 ifsq->ifsq_enqueue = enqueue; 2459 ifsq->ifsq_dequeue = dequeue; 2460 ifsq->ifsq_request = request; 2461 } 2462 } 2463 2464 int 2465 ifsq_classic_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m, 2466 struct altq_pktattr *pa __unused) 2467 { 2468 if (IF_QFULL(ifsq)) { 2469 m_freem(m); 2470 return(ENOBUFS); 2471 } else { 2472 IF_ENQUEUE(ifsq, m); 2473 return(0); 2474 } 2475 } 2476 2477 struct mbuf * 2478 ifsq_classic_dequeue(struct ifaltq_subque *ifsq, struct mbuf *mpolled, int op) 2479 { 2480 struct mbuf *m; 2481 2482 switch (op) { 2483 case ALTDQ_POLL: 2484 IF_POLL(ifsq, m); 2485 break; 2486 case ALTDQ_REMOVE: 2487 IF_DEQUEUE(ifsq, m); 2488 break; 2489 default: 2490 panic("unsupported ALTQ dequeue op: %d", op); 2491 } 2492 KKASSERT(mpolled == NULL || mpolled == m); 2493 return(m); 2494 } 2495 2496 int 2497 ifsq_classic_request(struct ifaltq_subque *ifsq, int req, void *arg) 2498 { 2499 switch (req) { 2500 case ALTRQ_PURGE: 2501 IF_DRAIN(ifsq); 2502 break; 2503 default: 2504 panic("unsupported ALTQ request: %d", req); 2505 } 2506 return(0); 2507 } 2508 2509 static void 2510 ifsq_ifstart_try(struct ifaltq_subque *ifsq, int force_sched) 2511 { 2512 struct ifnet *ifp = ifsq_get_ifp(ifsq); 2513 int running = 0, need_sched; 2514 2515 /* 2516 * Try to do direct ifnet.if_start first, if there is 2517 * contention on ifnet's serializer, ifnet.if_start will 2518 * be scheduled on ifnet's CPU. 2519 */ 2520 if (!ifnet_tryserialize_tx(ifp, ifsq)) { 2521 /* 2522 * ifnet serializer contention happened, 2523 * ifnet.if_start is scheduled on ifnet's 2524 * CPU, and we keep going. 2525 */ 2526 ifsq_ifstart_schedule(ifsq, 1); 2527 return; 2528 } 2529 2530 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { 2531 ifp->if_start(ifp, ifsq); 2532 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 2533 running = 1; 2534 } 2535 need_sched = ifsq_ifstart_need_schedule(ifsq, running); 2536 2537 ifnet_deserialize_tx(ifp, ifsq); 2538 2539 if (need_sched) { 2540 /* 2541 * More data need to be transmitted, ifnet.if_start is 2542 * scheduled on ifnet's CPU, and we keep going. 2543 * NOTE: ifnet.if_start interlock is not released. 2544 */ 2545 ifsq_ifstart_schedule(ifsq, force_sched); 2546 } 2547 } 2548 2549 /* 2550 * IFSUBQ packets staging mechanism: 2551 * 2552 * The packets enqueued into IFSUBQ are staged to a certain amount before the 2553 * ifnet's if_start is called. In this way, the driver could avoid writing 2554 * to hardware registers upon every packet, instead, hardware registers 2555 * could be written when certain amount of packets are put onto hardware 2556 * TX ring. The measurement on several modern NICs (emx(4), igb(4), bnx(4), 2557 * bge(4), jme(4)) shows that the hardware registers writing aggregation 2558 * could save ~20% CPU time when 18bytes UDP datagrams are transmitted at 2559 * 1.48Mpps. The performance improvement by hardware registers writing 2560 * aggeregation is also mentioned by Luigi Rizzo's netmap paper 2561 * (http://info.iet.unipi.it/~luigi/netmap/). 2562 * 2563 * IFSUBQ packets staging is performed for two entry points into drivers's 2564 * transmission function: 2565 * - Direct ifnet's if_start calling, i.e. ifsq_ifstart_try() 2566 * - ifnet's if_start scheduling, i.e. ifsq_ifstart_schedule() 2567 * 2568 * IFSUBQ packets staging will be stopped upon any of the following conditions: 2569 * - If the count of packets enqueued on the current CPU is great than or 2570 * equal to ifsq_stage_cntmax. (XXX this should be per-interface) 2571 * - If the total length of packets enqueued on the current CPU is great 2572 * than or equal to the hardware's MTU - max_protohdr. max_protohdr is 2573 * cut from the hardware's MTU mainly bacause a full TCP segment's size 2574 * is usually less than hardware's MTU. 2575 * - ifsq_ifstart_schedule() is not pending on the current CPU and if_start 2576 * interlock (if_snd.altq_started) is not released. 2577 * - The if_start_rollup(), which is registered as low priority netisr 2578 * rollup function, is called; probably because no more work is pending 2579 * for netisr. 2580 * 2581 * NOTE: 2582 * Currently IFSUBQ packet staging is only performed in netisr threads. 2583 */ 2584 int 2585 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa) 2586 { 2587 struct ifaltq *ifq = &ifp->if_snd; 2588 struct ifaltq_subque *ifsq; 2589 int error, start = 0, len, mcast = 0, avoid_start = 0; 2590 struct ifsubq_stage_head *head = NULL; 2591 struct ifsubq_stage *stage = NULL; 2592 2593 ifsq = ifq_map_subq(ifq, mycpuid); 2594 ASSERT_IFNET_NOT_SERIALIZED_TX(ifp, ifsq); 2595 2596 len = m->m_pkthdr.len; 2597 if (m->m_flags & M_MCAST) 2598 mcast = 1; 2599 2600 if (curthread->td_type == TD_TYPE_NETISR) { 2601 head = &ifsubq_stage_heads[mycpuid]; 2602 stage = ifsq_get_stage(ifsq, mycpuid); 2603 2604 stage->stg_cnt++; 2605 stage->stg_len += len; 2606 if (stage->stg_cnt < ifsq_stage_cntmax && 2607 stage->stg_len < (ifp->if_mtu - max_protohdr)) 2608 avoid_start = 1; 2609 } 2610 2611 ALTQ_SQ_LOCK(ifsq); 2612 error = ifsq_enqueue_locked(ifsq, m, pa); 2613 if (error) { 2614 if (!ifsq_data_ready(ifsq)) { 2615 ALTQ_SQ_UNLOCK(ifsq); 2616 return error; 2617 } 2618 avoid_start = 0; 2619 } 2620 if (!ifsq_is_started(ifsq)) { 2621 if (avoid_start) { 2622 ALTQ_SQ_UNLOCK(ifsq); 2623 2624 KKASSERT(!error); 2625 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) 2626 ifsq_stage_insert(head, stage); 2627 2628 ifp->if_obytes += len; 2629 if (mcast) 2630 ifp->if_omcasts++; 2631 return error; 2632 } 2633 2634 /* 2635 * Hold the interlock of ifnet.if_start 2636 */ 2637 ifsq_set_started(ifsq); 2638 start = 1; 2639 } 2640 ALTQ_SQ_UNLOCK(ifsq); 2641 2642 if (!error) { 2643 ifp->if_obytes += len; 2644 if (mcast) 2645 ifp->if_omcasts++; 2646 } 2647 2648 if (stage != NULL) { 2649 if (!start && (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)) { 2650 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); 2651 if (!avoid_start) { 2652 ifsq_stage_remove(head, stage); 2653 ifsq_ifstart_schedule(ifsq, 1); 2654 } 2655 return error; 2656 } 2657 2658 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) { 2659 ifsq_stage_remove(head, stage); 2660 } else { 2661 stage->stg_cnt = 0; 2662 stage->stg_len = 0; 2663 } 2664 } 2665 2666 if (!start) 2667 return error; 2668 2669 ifsq_ifstart_try(ifsq, 0); 2670 return error; 2671 } 2672 2673 void * 2674 ifa_create(int size, int flags) 2675 { 2676 struct ifaddr *ifa; 2677 int i; 2678 2679 KASSERT(size >= sizeof(*ifa), ("ifaddr size too small")); 2680 2681 ifa = kmalloc(size, M_IFADDR, flags | M_ZERO); 2682 if (ifa == NULL) 2683 return NULL; 2684 2685 ifa->ifa_containers = kmalloc(ncpus * sizeof(struct ifaddr_container), 2686 M_IFADDR, M_WAITOK | M_ZERO); 2687 ifa->ifa_ncnt = ncpus; 2688 for (i = 0; i < ncpus; ++i) { 2689 struct ifaddr_container *ifac = &ifa->ifa_containers[i]; 2690 2691 ifac->ifa_magic = IFA_CONTAINER_MAGIC; 2692 ifac->ifa = ifa; 2693 ifac->ifa_refcnt = 1; 2694 } 2695 #ifdef IFADDR_DEBUG 2696 kprintf("alloc ifa %p %d\n", ifa, size); 2697 #endif 2698 return ifa; 2699 } 2700 2701 void 2702 ifac_free(struct ifaddr_container *ifac, int cpu_id) 2703 { 2704 struct ifaddr *ifa = ifac->ifa; 2705 2706 KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC); 2707 KKASSERT(ifac->ifa_refcnt == 0); 2708 KASSERT(ifac->ifa_listmask == 0, 2709 ("ifa is still on %#x lists", ifac->ifa_listmask)); 2710 2711 ifac->ifa_magic = IFA_CONTAINER_DEAD; 2712 2713 #ifdef IFADDR_DEBUG_VERBOSE 2714 kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id); 2715 #endif 2716 2717 KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus, 2718 ("invalid # of ifac, %d", ifa->ifa_ncnt)); 2719 if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) { 2720 #ifdef IFADDR_DEBUG 2721 kprintf("free ifa %p\n", ifa); 2722 #endif 2723 kfree(ifa->ifa_containers, M_IFADDR); 2724 kfree(ifa, M_IFADDR); 2725 } 2726 } 2727 2728 static void 2729 ifa_iflink_dispatch(netmsg_t nmsg) 2730 { 2731 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 2732 struct ifaddr *ifa = msg->ifa; 2733 struct ifnet *ifp = msg->ifp; 2734 int cpu = mycpuid; 2735 struct ifaddr_container *ifac; 2736 2737 crit_enter(); 2738 2739 ifac = &ifa->ifa_containers[cpu]; 2740 ASSERT_IFAC_VALID(ifac); 2741 KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0, 2742 ("ifaddr is on if_addrheads")); 2743 2744 ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD; 2745 if (msg->tail) 2746 TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link); 2747 else 2748 TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link); 2749 2750 crit_exit(); 2751 2752 ifa_forwardmsg(&nmsg->lmsg, cpu + 1); 2753 } 2754 2755 void 2756 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail) 2757 { 2758 struct netmsg_ifaddr msg; 2759 2760 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 2761 0, ifa_iflink_dispatch); 2762 msg.ifa = ifa; 2763 msg.ifp = ifp; 2764 msg.tail = tail; 2765 2766 ifa_domsg(&msg.base.lmsg, 0); 2767 } 2768 2769 static void 2770 ifa_ifunlink_dispatch(netmsg_t nmsg) 2771 { 2772 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 2773 struct ifaddr *ifa = msg->ifa; 2774 struct ifnet *ifp = msg->ifp; 2775 int cpu = mycpuid; 2776 struct ifaddr_container *ifac; 2777 2778 crit_enter(); 2779 2780 ifac = &ifa->ifa_containers[cpu]; 2781 ASSERT_IFAC_VALID(ifac); 2782 KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD, 2783 ("ifaddr is not on if_addrhead")); 2784 2785 TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link); 2786 ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD; 2787 2788 crit_exit(); 2789 2790 ifa_forwardmsg(&nmsg->lmsg, cpu + 1); 2791 } 2792 2793 void 2794 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp) 2795 { 2796 struct netmsg_ifaddr msg; 2797 2798 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 2799 0, ifa_ifunlink_dispatch); 2800 msg.ifa = ifa; 2801 msg.ifp = ifp; 2802 2803 ifa_domsg(&msg.base.lmsg, 0); 2804 } 2805 2806 static void 2807 ifa_destroy_dispatch(netmsg_t nmsg) 2808 { 2809 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 2810 2811 IFAFREE(msg->ifa); 2812 ifa_forwardmsg(&nmsg->lmsg, mycpuid + 1); 2813 } 2814 2815 void 2816 ifa_destroy(struct ifaddr *ifa) 2817 { 2818 struct netmsg_ifaddr msg; 2819 2820 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 2821 0, ifa_destroy_dispatch); 2822 msg.ifa = ifa; 2823 2824 ifa_domsg(&msg.base.lmsg, 0); 2825 } 2826 2827 struct lwkt_port * 2828 ifnet_portfn(int cpu) 2829 { 2830 return &ifnet_threads[cpu].td_msgport; 2831 } 2832 2833 void 2834 ifnet_forwardmsg(struct lwkt_msg *lmsg, int next_cpu) 2835 { 2836 KKASSERT(next_cpu > mycpuid && next_cpu <= ncpus); 2837 2838 if (next_cpu < ncpus) 2839 lwkt_forwardmsg(ifnet_portfn(next_cpu), lmsg); 2840 else 2841 lwkt_replymsg(lmsg, 0); 2842 } 2843 2844 int 2845 ifnet_domsg(struct lwkt_msg *lmsg, int cpu) 2846 { 2847 KKASSERT(cpu < ncpus); 2848 return lwkt_domsg(ifnet_portfn(cpu), lmsg, 0); 2849 } 2850 2851 void 2852 ifnet_sendmsg(struct lwkt_msg *lmsg, int cpu) 2853 { 2854 KKASSERT(cpu < ncpus); 2855 lwkt_sendmsg(ifnet_portfn(cpu), lmsg); 2856 } 2857 2858 /* 2859 * Generic netmsg service loop. Some protocols may roll their own but all 2860 * must do the basic command dispatch function call done here. 2861 */ 2862 static void 2863 ifnet_service_loop(void *arg __unused) 2864 { 2865 netmsg_t msg; 2866 2867 while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) { 2868 KASSERT(msg->base.nm_dispatch, ("ifnet_service: badmsg")); 2869 msg->base.nm_dispatch(msg); 2870 } 2871 } 2872 2873 static void 2874 if_start_rollup(void) 2875 { 2876 struct ifsubq_stage_head *head = &ifsubq_stage_heads[mycpuid]; 2877 struct ifsubq_stage *stage; 2878 2879 while ((stage = TAILQ_FIRST(&head->stg_head)) != NULL) { 2880 struct ifaltq_subque *ifsq = stage->stg_subq; 2881 int is_sched = 0; 2882 2883 if (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED) 2884 is_sched = 1; 2885 ifsq_stage_remove(head, stage); 2886 2887 if (is_sched) { 2888 ifsq_ifstart_schedule(ifsq, 1); 2889 } else { 2890 int start = 0; 2891 2892 ALTQ_SQ_LOCK(ifsq); 2893 if (!ifsq_is_started(ifsq)) { 2894 /* 2895 * Hold the interlock of ifnet.if_start 2896 */ 2897 ifsq_set_started(ifsq); 2898 start = 1; 2899 } 2900 ALTQ_SQ_UNLOCK(ifsq); 2901 2902 if (start) 2903 ifsq_ifstart_try(ifsq, 1); 2904 } 2905 KKASSERT((stage->stg_flags & 2906 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); 2907 } 2908 } 2909 2910 static void 2911 ifnetinit(void *dummy __unused) 2912 { 2913 int i; 2914 2915 for (i = 0; i < ncpus; ++i) { 2916 struct thread *thr = &ifnet_threads[i]; 2917 2918 lwkt_create(ifnet_service_loop, NULL, NULL, 2919 thr, TDF_NOSTART|TDF_FORCE_SPINPORT, 2920 i, "ifnet %d", i); 2921 netmsg_service_port_init(&thr->td_msgport); 2922 lwkt_schedule(thr); 2923 } 2924 2925 for (i = 0; i < ncpus; ++i) 2926 TAILQ_INIT(&ifsubq_stage_heads[i].stg_head); 2927 netisr_register_rollup(if_start_rollup, NETISR_ROLLUP_PRIO_IFSTART); 2928 } 2929 2930 struct ifnet * 2931 ifnet_byindex(unsigned short idx) 2932 { 2933 if (idx > if_index) 2934 return NULL; 2935 return ifindex2ifnet[idx]; 2936 } 2937 2938 struct ifaddr * 2939 ifaddr_byindex(unsigned short idx) 2940 { 2941 struct ifnet *ifp; 2942 2943 ifp = ifnet_byindex(idx); 2944 if (!ifp) 2945 return NULL; 2946 return TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 2947 } 2948 2949 void 2950 if_register_com_alloc(u_char type, 2951 if_com_alloc_t *a, if_com_free_t *f) 2952 { 2953 2954 KASSERT(if_com_alloc[type] == NULL, 2955 ("if_register_com_alloc: %d already registered", type)); 2956 KASSERT(if_com_free[type] == NULL, 2957 ("if_register_com_alloc: %d free already registered", type)); 2958 2959 if_com_alloc[type] = a; 2960 if_com_free[type] = f; 2961 } 2962 2963 void 2964 if_deregister_com_alloc(u_char type) 2965 { 2966 2967 KASSERT(if_com_alloc[type] != NULL, 2968 ("if_deregister_com_alloc: %d not registered", type)); 2969 KASSERT(if_com_free[type] != NULL, 2970 ("if_deregister_com_alloc: %d free not registered", type)); 2971 if_com_alloc[type] = NULL; 2972 if_com_free[type] = NULL; 2973 } 2974 2975 int 2976 if_ring_count2(int cnt, int cnt_max) 2977 { 2978 int shift = 0; 2979 2980 KASSERT(cnt_max >= 1 && powerof2(cnt_max), 2981 ("invalid ring count max %d", cnt_max)); 2982 2983 if (cnt <= 0) 2984 cnt = cnt_max; 2985 if (cnt > ncpus2) 2986 cnt = ncpus2; 2987 if (cnt > cnt_max) 2988 cnt = cnt_max; 2989 2990 while ((1 << (shift + 1)) <= cnt) 2991 ++shift; 2992 cnt = 1 << shift; 2993 2994 KASSERT(cnt >= 1 && cnt <= ncpus2 && cnt <= cnt_max, 2995 ("calculate cnt %d, ncpus2 %d, cnt max %d", 2996 cnt, ncpus2, cnt_max)); 2997 return cnt; 2998 } 2999 3000 void 3001 ifq_set_maxlen(struct ifaltq *ifq, int len) 3002 { 3003 ifq->altq_maxlen = len + (ncpus * ifsq_stage_cntmax); 3004 } 3005 3006 int 3007 ifq_mapsubq_default(struct ifaltq *ifq __unused, int cpuid __unused) 3008 { 3009 return ALTQ_SUBQ_INDEX_DEFAULT; 3010 } 3011