1 /* 2 * Copyright (c) 1980, 1986, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)if.c 8.3 (Berkeley) 1/4/94 34 * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $ 35 */ 36 37 #include "opt_compat.h" 38 #include "opt_inet6.h" 39 #include "opt_inet.h" 40 #include "opt_ifpoll.h" 41 42 #include <sys/param.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/systm.h> 46 #include <sys/proc.h> 47 #include <sys/priv.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/socketops.h> 52 #include <sys/protosw.h> 53 #include <sys/kernel.h> 54 #include <sys/ktr.h> 55 #include <sys/mutex.h> 56 #include <sys/sockio.h> 57 #include <sys/syslog.h> 58 #include <sys/sysctl.h> 59 #include <sys/domain.h> 60 #include <sys/thread.h> 61 #include <sys/serialize.h> 62 #include <sys/bus.h> 63 64 #include <sys/thread2.h> 65 #include <sys/msgport2.h> 66 #include <sys/mutex2.h> 67 68 #include <net/if.h> 69 #include <net/if_arp.h> 70 #include <net/if_dl.h> 71 #include <net/if_types.h> 72 #include <net/if_var.h> 73 #include <net/ifq_var.h> 74 #include <net/radix.h> 75 #include <net/route.h> 76 #include <net/if_clone.h> 77 #include <net/netisr.h> 78 #include <net/netmsg2.h> 79 80 #include <machine/atomic.h> 81 #include <machine/stdarg.h> 82 #include <machine/smp.h> 83 84 #if defined(INET) || defined(INET6) 85 /*XXX*/ 86 #include <netinet/in.h> 87 #include <netinet/in_var.h> 88 #include <netinet/if_ether.h> 89 #ifdef INET6 90 #include <netinet6/in6_var.h> 91 #include <netinet6/in6_ifattach.h> 92 #endif 93 #endif 94 95 #if defined(COMPAT_43) 96 #include <emulation/43bsd/43bsd_socket.h> 97 #endif /* COMPAT_43 */ 98 99 struct netmsg_ifaddr { 100 struct netmsg_base base; 101 struct ifaddr *ifa; 102 struct ifnet *ifp; 103 int tail; 104 }; 105 106 struct ifsubq_stage_head { 107 TAILQ_HEAD(, ifsubq_stage) stg_head; 108 } __cachealign; 109 110 /* 111 * System initialization 112 */ 113 static void if_attachdomain(void *); 114 static void if_attachdomain1(struct ifnet *); 115 static int ifconf(u_long, caddr_t, struct ucred *); 116 static void ifinit(void *); 117 static void ifnetinit(void *); 118 static void if_slowtimo(void *); 119 static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *); 120 static int if_rtdel(struct radix_node *, void *); 121 122 /* Helper functions */ 123 static void ifsq_watchdog_reset(struct ifsubq_watchdog *); 124 125 #ifdef INET6 126 /* 127 * XXX: declare here to avoid to include many inet6 related files.. 128 * should be more generalized? 129 */ 130 extern void nd6_setmtu(struct ifnet *); 131 #endif 132 133 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); 134 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); 135 136 static int ifsq_stage_cntmax = 4; 137 TUNABLE_INT("net.link.stage_cntmax", &ifsq_stage_cntmax); 138 SYSCTL_INT(_net_link, OID_AUTO, stage_cntmax, CTLFLAG_RW, 139 &ifsq_stage_cntmax, 0, "ifq staging packet count max"); 140 141 static int if_stats_compat = 0; 142 SYSCTL_INT(_net_link, OID_AUTO, stats_compat, CTLFLAG_RW, 143 &if_stats_compat, 0, "Compat the old ifnet stats"); 144 145 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL) 146 /* Must be after netisr_init */ 147 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_SECOND, ifnetinit, NULL) 148 149 static if_com_alloc_t *if_com_alloc[256]; 150 static if_com_free_t *if_com_free[256]; 151 152 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); 153 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); 154 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure"); 155 156 int ifqmaxlen = IFQ_MAXLEN; 157 struct ifnethead ifnet = TAILQ_HEAD_INITIALIZER(ifnet); 158 159 struct callout if_slowtimo_timer; 160 161 int if_index = 0; 162 struct ifnet **ifindex2ifnet = NULL; 163 static struct thread ifnet_threads[MAXCPU]; 164 165 static struct ifsubq_stage_head ifsubq_stage_heads[MAXCPU]; 166 167 #ifdef notyet 168 #define IFQ_KTR_STRING "ifq=%p" 169 #define IFQ_KTR_ARGS struct ifaltq *ifq 170 #ifndef KTR_IFQ 171 #define KTR_IFQ KTR_ALL 172 #endif 173 KTR_INFO_MASTER(ifq); 174 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS); 175 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS); 176 #define logifq(name, arg) KTR_LOG(ifq_ ## name, arg) 177 178 #define IF_START_KTR_STRING "ifp=%p" 179 #define IF_START_KTR_ARGS struct ifnet *ifp 180 #ifndef KTR_IF_START 181 #define KTR_IF_START KTR_ALL 182 #endif 183 KTR_INFO_MASTER(if_start); 184 KTR_INFO(KTR_IF_START, if_start, run, 0, 185 IF_START_KTR_STRING, IF_START_KTR_ARGS); 186 KTR_INFO(KTR_IF_START, if_start, sched, 1, 187 IF_START_KTR_STRING, IF_START_KTR_ARGS); 188 KTR_INFO(KTR_IF_START, if_start, avoid, 2, 189 IF_START_KTR_STRING, IF_START_KTR_ARGS); 190 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3, 191 IF_START_KTR_STRING, IF_START_KTR_ARGS); 192 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4, 193 IF_START_KTR_STRING, IF_START_KTR_ARGS); 194 #define logifstart(name, arg) KTR_LOG(if_start_ ## name, arg) 195 #endif 196 197 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head); 198 199 /* 200 * Network interface utility routines. 201 * 202 * Routines with ifa_ifwith* names take sockaddr *'s as 203 * parameters. 204 */ 205 /* ARGSUSED*/ 206 void 207 ifinit(void *dummy) 208 { 209 struct ifnet *ifp; 210 211 callout_init(&if_slowtimo_timer); 212 213 crit_enter(); 214 TAILQ_FOREACH(ifp, &ifnet, if_link) { 215 if (ifp->if_snd.altq_maxlen == 0) { 216 if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n"); 217 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen); 218 } 219 } 220 crit_exit(); 221 222 if_slowtimo(0); 223 } 224 225 static void 226 ifsq_ifstart_ipifunc(void *arg) 227 { 228 struct ifaltq_subque *ifsq = arg; 229 struct lwkt_msg *lmsg = ifsq_get_ifstart_lmsg(ifsq, mycpuid); 230 231 crit_enter(); 232 if (lmsg->ms_flags & MSGF_DONE) 233 lwkt_sendmsg(netisr_portfn(mycpuid), lmsg); 234 crit_exit(); 235 } 236 237 static __inline void 238 ifsq_stage_remove(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) 239 { 240 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); 241 TAILQ_REMOVE(&head->stg_head, stage, stg_link); 242 stage->stg_flags &= ~(IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED); 243 stage->stg_cnt = 0; 244 stage->stg_len = 0; 245 } 246 247 static __inline void 248 ifsq_stage_insert(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) 249 { 250 KKASSERT((stage->stg_flags & 251 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); 252 stage->stg_flags |= IFSQ_STAGE_FLAG_QUED; 253 TAILQ_INSERT_TAIL(&head->stg_head, stage, stg_link); 254 } 255 256 /* 257 * Schedule ifnet.if_start on ifnet's CPU 258 */ 259 static void 260 ifsq_ifstart_schedule(struct ifaltq_subque *ifsq, int force) 261 { 262 int cpu; 263 264 if (!force && curthread->td_type == TD_TYPE_NETISR && 265 ifsq_stage_cntmax > 0) { 266 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); 267 268 stage->stg_cnt = 0; 269 stage->stg_len = 0; 270 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) 271 ifsq_stage_insert(&ifsubq_stage_heads[mycpuid], stage); 272 stage->stg_flags |= IFSQ_STAGE_FLAG_SCHED; 273 return; 274 } 275 276 cpu = ifsq_get_cpuid(ifsq); 277 if (cpu != mycpuid) 278 lwkt_send_ipiq(globaldata_find(cpu), ifsq_ifstart_ipifunc, ifsq); 279 else 280 ifsq_ifstart_ipifunc(ifsq); 281 } 282 283 /* 284 * NOTE: 285 * This function will release ifnet.if_start interlock, 286 * if ifnet.if_start does not need to be scheduled 287 */ 288 static __inline int 289 ifsq_ifstart_need_schedule(struct ifaltq_subque *ifsq, int running) 290 { 291 if (!running || ifsq_is_empty(ifsq) 292 #ifdef ALTQ 293 || ifsq->ifsq_altq->altq_tbr != NULL 294 #endif 295 ) { 296 ALTQ_SQ_LOCK(ifsq); 297 /* 298 * ifnet.if_start interlock is released, if: 299 * 1) Hardware can not take any packets, due to 300 * o interface is marked down 301 * o hardware queue is full (ifq_is_oactive) 302 * Under the second situation, hardware interrupt 303 * or polling(4) will call/schedule ifnet.if_start 304 * when hardware queue is ready 305 * 2) There is not packet in the ifnet.if_snd. 306 * Further ifq_dispatch or ifq_handoff will call/ 307 * schedule ifnet.if_start 308 * 3) TBR is used and it does not allow further 309 * dequeueing. 310 * TBR callout will call ifnet.if_start 311 */ 312 if (!running || !ifsq_data_ready(ifsq)) { 313 ifsq_clr_started(ifsq); 314 ALTQ_SQ_UNLOCK(ifsq); 315 return 0; 316 } 317 ALTQ_SQ_UNLOCK(ifsq); 318 } 319 return 1; 320 } 321 322 static void 323 ifsq_ifstart_dispatch(netmsg_t msg) 324 { 325 struct lwkt_msg *lmsg = &msg->base.lmsg; 326 struct ifaltq_subque *ifsq = lmsg->u.ms_resultp; 327 struct ifnet *ifp = ifsq_get_ifp(ifsq); 328 int running = 0, need_sched; 329 330 crit_enter(); 331 lwkt_replymsg(lmsg, 0); /* reply ASAP */ 332 crit_exit(); 333 334 if (mycpuid != ifsq_get_cpuid(ifsq)) { 335 /* 336 * We need to chase the ifnet CPU change. 337 */ 338 ifsq_ifstart_schedule(ifsq, 1); 339 return; 340 } 341 342 ifnet_serialize_tx(ifp, ifsq); 343 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { 344 ifp->if_start(ifp, ifsq); 345 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 346 running = 1; 347 } 348 need_sched = ifsq_ifstart_need_schedule(ifsq, running); 349 ifnet_deserialize_tx(ifp, ifsq); 350 351 if (need_sched) { 352 /* 353 * More data need to be transmitted, ifnet.if_start is 354 * scheduled on ifnet's CPU, and we keep going. 355 * NOTE: ifnet.if_start interlock is not released. 356 */ 357 ifsq_ifstart_schedule(ifsq, 0); 358 } 359 } 360 361 /* Device driver ifnet.if_start helper function */ 362 void 363 ifsq_devstart(struct ifaltq_subque *ifsq) 364 { 365 struct ifnet *ifp = ifsq_get_ifp(ifsq); 366 int running = 0; 367 368 ASSERT_IFNET_SERIALIZED_TX(ifp, ifsq); 369 370 ALTQ_SQ_LOCK(ifsq); 371 if (ifsq_is_started(ifsq) || !ifsq_data_ready(ifsq)) { 372 ALTQ_SQ_UNLOCK(ifsq); 373 return; 374 } 375 ifsq_set_started(ifsq); 376 ALTQ_SQ_UNLOCK(ifsq); 377 378 ifp->if_start(ifp, ifsq); 379 380 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 381 running = 1; 382 383 if (ifsq_ifstart_need_schedule(ifsq, running)) { 384 /* 385 * More data need to be transmitted, ifnet.if_start is 386 * scheduled on ifnet's CPU, and we keep going. 387 * NOTE: ifnet.if_start interlock is not released. 388 */ 389 ifsq_ifstart_schedule(ifsq, 0); 390 } 391 } 392 393 void 394 if_devstart(struct ifnet *ifp) 395 { 396 ifsq_devstart(ifq_get_subq_default(&ifp->if_snd)); 397 } 398 399 /* Device driver ifnet.if_start schedule helper function */ 400 void 401 ifsq_devstart_sched(struct ifaltq_subque *ifsq) 402 { 403 ifsq_ifstart_schedule(ifsq, 1); 404 } 405 406 void 407 if_devstart_sched(struct ifnet *ifp) 408 { 409 ifsq_devstart_sched(ifq_get_subq_default(&ifp->if_snd)); 410 } 411 412 static void 413 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 414 { 415 lwkt_serialize_enter(ifp->if_serializer); 416 } 417 418 static void 419 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 420 { 421 lwkt_serialize_exit(ifp->if_serializer); 422 } 423 424 static int 425 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 426 { 427 return lwkt_serialize_try(ifp->if_serializer); 428 } 429 430 #ifdef INVARIANTS 431 static void 432 if_default_serialize_assert(struct ifnet *ifp, 433 enum ifnet_serialize slz __unused, 434 boolean_t serialized) 435 { 436 if (serialized) 437 ASSERT_SERIALIZED(ifp->if_serializer); 438 else 439 ASSERT_NOT_SERIALIZED(ifp->if_serializer); 440 } 441 #endif 442 443 /* 444 * Attach an interface to the list of "active" interfaces. 445 * 446 * The serializer is optional. If non-NULL access to the interface 447 * may be MPSAFE. 448 */ 449 void 450 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer) 451 { 452 unsigned socksize, ifasize; 453 int namelen, masklen; 454 struct sockaddr_dl *sdl; 455 struct ifaddr *ifa; 456 struct ifaltq *ifq; 457 int i, q; 458 459 static int if_indexlim = 8; 460 461 if (ifp->if_serialize != NULL) { 462 KASSERT(ifp->if_deserialize != NULL && 463 ifp->if_tryserialize != NULL && 464 ifp->if_serialize_assert != NULL, 465 ("serialize functions are partially setup")); 466 467 /* 468 * If the device supplies serialize functions, 469 * then clear if_serializer to catch any invalid 470 * usage of this field. 471 */ 472 KASSERT(serializer == NULL, 473 ("both serialize functions and default serializer " 474 "are supplied")); 475 ifp->if_serializer = NULL; 476 } else { 477 KASSERT(ifp->if_deserialize == NULL && 478 ifp->if_tryserialize == NULL && 479 ifp->if_serialize_assert == NULL, 480 ("serialize functions are partially setup")); 481 ifp->if_serialize = if_default_serialize; 482 ifp->if_deserialize = if_default_deserialize; 483 ifp->if_tryserialize = if_default_tryserialize; 484 #ifdef INVARIANTS 485 ifp->if_serialize_assert = if_default_serialize_assert; 486 #endif 487 488 /* 489 * The serializer can be passed in from the device, 490 * allowing the same serializer to be used for both 491 * the interrupt interlock and the device queue. 492 * If not specified, the netif structure will use an 493 * embedded serializer. 494 */ 495 if (serializer == NULL) { 496 serializer = &ifp->if_default_serializer; 497 lwkt_serialize_init(serializer); 498 } 499 ifp->if_serializer = serializer; 500 } 501 502 mtx_init(&ifp->if_ioctl_mtx); 503 mtx_lock(&ifp->if_ioctl_mtx); 504 505 TAILQ_INSERT_TAIL(&ifnet, ifp, if_link); 506 ifp->if_index = ++if_index; 507 508 /* 509 * XXX - 510 * The old code would work if the interface passed a pre-existing 511 * chain of ifaddrs to this code. We don't trust our callers to 512 * properly initialize the tailq, however, so we no longer allow 513 * this unlikely case. 514 */ 515 ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead), 516 M_IFADDR, M_WAITOK | M_ZERO); 517 for (i = 0; i < ncpus; ++i) 518 TAILQ_INIT(&ifp->if_addrheads[i]); 519 520 TAILQ_INIT(&ifp->if_prefixhead); 521 TAILQ_INIT(&ifp->if_multiaddrs); 522 TAILQ_INIT(&ifp->if_groups); 523 getmicrotime(&ifp->if_lastchange); 524 if (ifindex2ifnet == NULL || if_index >= if_indexlim) { 525 unsigned int n; 526 struct ifnet **q; 527 528 if_indexlim <<= 1; 529 530 /* grow ifindex2ifnet */ 531 n = if_indexlim * sizeof(*q); 532 q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO); 533 if (ifindex2ifnet) { 534 bcopy(ifindex2ifnet, q, n/2); 535 kfree(ifindex2ifnet, M_IFADDR); 536 } 537 ifindex2ifnet = q; 538 } 539 540 ifindex2ifnet[if_index] = ifp; 541 542 /* 543 * create a Link Level name for this device 544 */ 545 namelen = strlen(ifp->if_xname); 546 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; 547 socksize = masklen + ifp->if_addrlen; 548 #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1))) 549 if (socksize < sizeof(*sdl)) 550 socksize = sizeof(*sdl); 551 socksize = ROUNDUP(socksize); 552 #undef ROUNDUP 553 ifasize = sizeof(struct ifaddr) + 2 * socksize; 554 ifa = ifa_create(ifasize, M_WAITOK); 555 sdl = (struct sockaddr_dl *)(ifa + 1); 556 sdl->sdl_len = socksize; 557 sdl->sdl_family = AF_LINK; 558 bcopy(ifp->if_xname, sdl->sdl_data, namelen); 559 sdl->sdl_nlen = namelen; 560 sdl->sdl_index = ifp->if_index; 561 sdl->sdl_type = ifp->if_type; 562 ifp->if_lladdr = ifa; 563 ifa->ifa_ifp = ifp; 564 ifa->ifa_rtrequest = link_rtrequest; 565 ifa->ifa_addr = (struct sockaddr *)sdl; 566 sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); 567 ifa->ifa_netmask = (struct sockaddr *)sdl; 568 sdl->sdl_len = masklen; 569 while (namelen != 0) 570 sdl->sdl_data[--namelen] = 0xff; 571 ifa_iflink(ifa, ifp, 0 /* Insert head */); 572 573 ifp->if_data_pcpu = kmalloc_cachealign( 574 ncpus * sizeof(struct ifdata_pcpu), M_DEVBUF, M_WAITOK | M_ZERO); 575 576 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 577 devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); 578 579 if (ifp->if_mapsubq == NULL) 580 ifp->if_mapsubq = ifq_mapsubq_default; 581 582 ifq = &ifp->if_snd; 583 ifq->altq_type = 0; 584 ifq->altq_disc = NULL; 585 ifq->altq_flags &= ALTQF_CANTCHANGE; 586 ifq->altq_tbr = NULL; 587 ifq->altq_ifp = ifp; 588 589 if (ifq->altq_subq_cnt <= 0) 590 ifq->altq_subq_cnt = 1; 591 ifq->altq_subq = kmalloc_cachealign( 592 ifq->altq_subq_cnt * sizeof(struct ifaltq_subque), 593 M_DEVBUF, M_WAITOK | M_ZERO); 594 595 if (ifq->altq_maxlen == 0) { 596 if_printf(ifp, "driver didn't set ifq_maxlen\n"); 597 ifq_set_maxlen(ifq, ifqmaxlen); 598 } 599 600 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 601 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 602 603 ALTQ_SQ_LOCK_INIT(ifsq); 604 ifsq->ifsq_index = q; 605 606 ifsq->ifsq_altq = ifq; 607 ifsq->ifsq_ifp = ifp; 608 609 ifsq->ifq_maxlen = ifq->altq_maxlen; 610 ifsq->ifsq_prepended = NULL; 611 ifsq->ifsq_started = 0; 612 ifsq->ifsq_hw_oactive = 0; 613 ifsq_set_cpuid(ifsq, 0); 614 615 ifsq->ifsq_stage = 616 kmalloc_cachealign(ncpus * sizeof(struct ifsubq_stage), 617 M_DEVBUF, M_WAITOK | M_ZERO); 618 for (i = 0; i < ncpus; ++i) 619 ifsq->ifsq_stage[i].stg_subq = ifsq; 620 621 ifsq->ifsq_ifstart_nmsg = 622 kmalloc(ncpus * sizeof(struct netmsg_base), 623 M_LWKTMSG, M_WAITOK); 624 for (i = 0; i < ncpus; ++i) { 625 netmsg_init(&ifsq->ifsq_ifstart_nmsg[i], NULL, 626 &netisr_adone_rport, 0, ifsq_ifstart_dispatch); 627 ifsq->ifsq_ifstart_nmsg[i].lmsg.u.ms_resultp = ifsq; 628 } 629 } 630 ifq_set_classic(ifq); 631 632 if (!SLIST_EMPTY(&domains)) 633 if_attachdomain1(ifp); 634 635 /* Announce the interface. */ 636 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 637 638 mtx_unlock(&ifp->if_ioctl_mtx); 639 } 640 641 static void 642 if_attachdomain(void *dummy) 643 { 644 struct ifnet *ifp; 645 646 crit_enter(); 647 TAILQ_FOREACH(ifp, &ifnet, if_list) 648 if_attachdomain1(ifp); 649 crit_exit(); 650 } 651 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, 652 if_attachdomain, NULL); 653 654 static void 655 if_attachdomain1(struct ifnet *ifp) 656 { 657 struct domain *dp; 658 659 crit_enter(); 660 661 /* address family dependent data region */ 662 bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); 663 SLIST_FOREACH(dp, &domains, dom_next) 664 if (dp->dom_ifattach) 665 ifp->if_afdata[dp->dom_family] = 666 (*dp->dom_ifattach)(ifp); 667 crit_exit(); 668 } 669 670 /* 671 * Purge all addresses whose type is _not_ AF_LINK 672 */ 673 void 674 if_purgeaddrs_nolink(struct ifnet *ifp) 675 { 676 struct ifaddr_container *ifac, *next; 677 678 TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid], 679 ifa_link, next) { 680 struct ifaddr *ifa = ifac->ifa; 681 682 /* Leave link ifaddr as it is */ 683 if (ifa->ifa_addr->sa_family == AF_LINK) 684 continue; 685 #ifdef INET 686 /* XXX: Ugly!! ad hoc just for INET */ 687 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) { 688 struct ifaliasreq ifr; 689 #ifdef IFADDR_DEBUG_VERBOSE 690 int i; 691 692 kprintf("purge in4 addr %p: ", ifa); 693 for (i = 0; i < ncpus; ++i) 694 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt); 695 kprintf("\n"); 696 #endif 697 698 bzero(&ifr, sizeof ifr); 699 ifr.ifra_addr = *ifa->ifa_addr; 700 if (ifa->ifa_dstaddr) 701 ifr.ifra_broadaddr = *ifa->ifa_dstaddr; 702 if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp, 703 NULL) == 0) 704 continue; 705 } 706 #endif /* INET */ 707 #ifdef INET6 708 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) { 709 #ifdef IFADDR_DEBUG_VERBOSE 710 int i; 711 712 kprintf("purge in6 addr %p: ", ifa); 713 for (i = 0; i < ncpus; ++i) 714 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt); 715 kprintf("\n"); 716 #endif 717 718 in6_purgeaddr(ifa); 719 /* ifp_addrhead is already updated */ 720 continue; 721 } 722 #endif /* INET6 */ 723 ifa_ifunlink(ifa, ifp); 724 ifa_destroy(ifa); 725 } 726 } 727 728 static void 729 ifq_stage_detach_handler(netmsg_t nmsg) 730 { 731 struct ifaltq *ifq = nmsg->lmsg.u.ms_resultp; 732 int q; 733 734 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 735 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 736 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); 737 738 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) 739 ifsq_stage_remove(&ifsubq_stage_heads[mycpuid], stage); 740 } 741 lwkt_replymsg(&nmsg->lmsg, 0); 742 } 743 744 static void 745 ifq_stage_detach(struct ifaltq *ifq) 746 { 747 struct netmsg_base base; 748 int cpu; 749 750 netmsg_init(&base, NULL, &curthread->td_msgport, 0, 751 ifq_stage_detach_handler); 752 base.lmsg.u.ms_resultp = ifq; 753 754 for (cpu = 0; cpu < ncpus; ++cpu) 755 lwkt_domsg(netisr_portfn(cpu), &base.lmsg, 0); 756 } 757 758 /* 759 * Detach an interface, removing it from the 760 * list of "active" interfaces. 761 */ 762 void 763 if_detach(struct ifnet *ifp) 764 { 765 struct radix_node_head *rnh; 766 int i, q; 767 int cpu, origcpu; 768 struct domain *dp; 769 770 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 771 772 /* 773 * Remove routes and flush queues. 774 */ 775 crit_enter(); 776 #ifdef IFPOLL_ENABLE 777 if (ifp->if_flags & IFF_NPOLLING) 778 ifpoll_deregister(ifp); 779 #endif 780 if_down(ifp); 781 782 #ifdef ALTQ 783 if (ifq_is_enabled(&ifp->if_snd)) 784 altq_disable(&ifp->if_snd); 785 if (ifq_is_attached(&ifp->if_snd)) 786 altq_detach(&ifp->if_snd); 787 #endif 788 789 /* 790 * Clean up all addresses. 791 */ 792 ifp->if_lladdr = NULL; 793 794 if_purgeaddrs_nolink(ifp); 795 if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) { 796 struct ifaddr *ifa; 797 798 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 799 KASSERT(ifa->ifa_addr->sa_family == AF_LINK, 800 ("non-link ifaddr is left on if_addrheads")); 801 802 ifa_ifunlink(ifa, ifp); 803 ifa_destroy(ifa); 804 KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]), 805 ("there are still ifaddrs left on if_addrheads")); 806 } 807 808 #ifdef INET 809 /* 810 * Remove all IPv4 kernel structures related to ifp. 811 */ 812 in_ifdetach(ifp); 813 #endif 814 815 #ifdef INET6 816 /* 817 * Remove all IPv6 kernel structs related to ifp. This should be done 818 * before removing routing entries below, since IPv6 interface direct 819 * routes are expected to be removed by the IPv6-specific kernel API. 820 * Otherwise, the kernel will detect some inconsistency and bark it. 821 */ 822 in6_ifdetach(ifp); 823 #endif 824 825 /* 826 * Delete all remaining routes using this interface 827 * Unfortuneatly the only way to do this is to slog through 828 * the entire routing table looking for routes which point 829 * to this interface...oh well... 830 */ 831 origcpu = mycpuid; 832 for (cpu = 0; cpu < ncpus; cpu++) { 833 lwkt_migratecpu(cpu); 834 for (i = 1; i <= AF_MAX; i++) { 835 if ((rnh = rt_tables[cpu][i]) == NULL) 836 continue; 837 rnh->rnh_walktree(rnh, if_rtdel, ifp); 838 } 839 } 840 lwkt_migratecpu(origcpu); 841 842 /* Announce that the interface is gone. */ 843 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 844 devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); 845 846 SLIST_FOREACH(dp, &domains, dom_next) 847 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) 848 (*dp->dom_ifdetach)(ifp, 849 ifp->if_afdata[dp->dom_family]); 850 851 /* 852 * Remove interface from ifindex2ifp[] and maybe decrement if_index. 853 */ 854 ifindex2ifnet[ifp->if_index] = NULL; 855 while (if_index > 0 && ifindex2ifnet[if_index] == NULL) 856 if_index--; 857 858 TAILQ_REMOVE(&ifnet, ifp, if_link); 859 kfree(ifp->if_addrheads, M_IFADDR); 860 861 lwkt_synchronize_ipiqs("if_detach"); 862 ifq_stage_detach(&ifp->if_snd); 863 864 for (q = 0; q < ifp->if_snd.altq_subq_cnt; ++q) { 865 struct ifaltq_subque *ifsq = &ifp->if_snd.altq_subq[q]; 866 867 kfree(ifsq->ifsq_ifstart_nmsg, M_LWKTMSG); 868 kfree(ifsq->ifsq_stage, M_DEVBUF); 869 } 870 kfree(ifp->if_snd.altq_subq, M_DEVBUF); 871 872 kfree(ifp->if_data_pcpu, M_DEVBUF); 873 874 crit_exit(); 875 } 876 877 /* 878 * Create interface group without members 879 */ 880 struct ifg_group * 881 if_creategroup(const char *groupname) 882 { 883 struct ifg_group *ifg = NULL; 884 885 if ((ifg = (struct ifg_group *)kmalloc(sizeof(struct ifg_group), 886 M_TEMP, M_NOWAIT)) == NULL) 887 return (NULL); 888 889 strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); 890 ifg->ifg_refcnt = 0; 891 ifg->ifg_carp_demoted = 0; 892 TAILQ_INIT(&ifg->ifg_members); 893 #if NPF > 0 894 pfi_attach_ifgroup(ifg); 895 #endif 896 TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next); 897 898 return (ifg); 899 } 900 901 /* 902 * Add a group to an interface 903 */ 904 int 905 if_addgroup(struct ifnet *ifp, const char *groupname) 906 { 907 struct ifg_list *ifgl; 908 struct ifg_group *ifg = NULL; 909 struct ifg_member *ifgm; 910 911 if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' && 912 groupname[strlen(groupname) - 1] <= '9') 913 return (EINVAL); 914 915 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 916 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) 917 return (EEXIST); 918 919 if ((ifgl = kmalloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL) 920 return (ENOMEM); 921 922 if ((ifgm = kmalloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) { 923 kfree(ifgl, M_TEMP); 924 return (ENOMEM); 925 } 926 927 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) 928 if (!strcmp(ifg->ifg_group, groupname)) 929 break; 930 931 if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) { 932 kfree(ifgl, M_TEMP); 933 kfree(ifgm, M_TEMP); 934 return (ENOMEM); 935 } 936 937 ifg->ifg_refcnt++; 938 ifgl->ifgl_group = ifg; 939 ifgm->ifgm_ifp = ifp; 940 941 TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); 942 TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); 943 944 #if NPF > 0 945 pfi_group_change(groupname); 946 #endif 947 948 return (0); 949 } 950 951 /* 952 * Remove a group from an interface 953 */ 954 int 955 if_delgroup(struct ifnet *ifp, const char *groupname) 956 { 957 struct ifg_list *ifgl; 958 struct ifg_member *ifgm; 959 960 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 961 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) 962 break; 963 if (ifgl == NULL) 964 return (ENOENT); 965 966 TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); 967 968 TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) 969 if (ifgm->ifgm_ifp == ifp) 970 break; 971 972 if (ifgm != NULL) { 973 TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); 974 kfree(ifgm, M_TEMP); 975 } 976 977 if (--ifgl->ifgl_group->ifg_refcnt == 0) { 978 TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next); 979 #if NPF > 0 980 pfi_detach_ifgroup(ifgl->ifgl_group); 981 #endif 982 kfree(ifgl->ifgl_group, M_TEMP); 983 } 984 985 kfree(ifgl, M_TEMP); 986 987 #if NPF > 0 988 pfi_group_change(groupname); 989 #endif 990 991 return (0); 992 } 993 994 /* 995 * Stores all groups from an interface in memory pointed 996 * to by data 997 */ 998 int 999 if_getgroup(caddr_t data, struct ifnet *ifp) 1000 { 1001 int len, error; 1002 struct ifg_list *ifgl; 1003 struct ifg_req ifgrq, *ifgp; 1004 struct ifgroupreq *ifgr = (struct ifgroupreq *)data; 1005 1006 if (ifgr->ifgr_len == 0) { 1007 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 1008 ifgr->ifgr_len += sizeof(struct ifg_req); 1009 return (0); 1010 } 1011 1012 len = ifgr->ifgr_len; 1013 ifgp = ifgr->ifgr_groups; 1014 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1015 if (len < sizeof(ifgrq)) 1016 return (EINVAL); 1017 bzero(&ifgrq, sizeof ifgrq); 1018 strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group, 1019 sizeof(ifgrq.ifgrq_group)); 1020 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp, 1021 sizeof(struct ifg_req)))) 1022 return (error); 1023 len -= sizeof(ifgrq); 1024 ifgp++; 1025 } 1026 1027 return (0); 1028 } 1029 1030 /* 1031 * Stores all members of a group in memory pointed to by data 1032 */ 1033 int 1034 if_getgroupmembers(caddr_t data) 1035 { 1036 struct ifgroupreq *ifgr = (struct ifgroupreq *)data; 1037 struct ifg_group *ifg; 1038 struct ifg_member *ifgm; 1039 struct ifg_req ifgrq, *ifgp; 1040 int len, error; 1041 1042 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) 1043 if (!strcmp(ifg->ifg_group, ifgr->ifgr_name)) 1044 break; 1045 if (ifg == NULL) 1046 return (ENOENT); 1047 1048 if (ifgr->ifgr_len == 0) { 1049 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) 1050 ifgr->ifgr_len += sizeof(ifgrq); 1051 return (0); 1052 } 1053 1054 len = ifgr->ifgr_len; 1055 ifgp = ifgr->ifgr_groups; 1056 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { 1057 if (len < sizeof(ifgrq)) 1058 return (EINVAL); 1059 bzero(&ifgrq, sizeof ifgrq); 1060 strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname, 1061 sizeof(ifgrq.ifgrq_member)); 1062 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp, 1063 sizeof(struct ifg_req)))) 1064 return (error); 1065 len -= sizeof(ifgrq); 1066 ifgp++; 1067 } 1068 1069 return (0); 1070 } 1071 1072 /* 1073 * Delete Routes for a Network Interface 1074 * 1075 * Called for each routing entry via the rnh->rnh_walktree() call above 1076 * to delete all route entries referencing a detaching network interface. 1077 * 1078 * Arguments: 1079 * rn pointer to node in the routing table 1080 * arg argument passed to rnh->rnh_walktree() - detaching interface 1081 * 1082 * Returns: 1083 * 0 successful 1084 * errno failed - reason indicated 1085 * 1086 */ 1087 static int 1088 if_rtdel(struct radix_node *rn, void *arg) 1089 { 1090 struct rtentry *rt = (struct rtentry *)rn; 1091 struct ifnet *ifp = arg; 1092 int err; 1093 1094 if (rt->rt_ifp == ifp) { 1095 1096 /* 1097 * Protect (sorta) against walktree recursion problems 1098 * with cloned routes 1099 */ 1100 if (!(rt->rt_flags & RTF_UP)) 1101 return (0); 1102 1103 err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, 1104 rt_mask(rt), rt->rt_flags, 1105 NULL); 1106 if (err) { 1107 log(LOG_WARNING, "if_rtdel: error %d\n", err); 1108 } 1109 } 1110 1111 return (0); 1112 } 1113 1114 /* 1115 * Locate an interface based on a complete address. 1116 */ 1117 struct ifaddr * 1118 ifa_ifwithaddr(struct sockaddr *addr) 1119 { 1120 struct ifnet *ifp; 1121 1122 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1123 struct ifaddr_container *ifac; 1124 1125 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1126 struct ifaddr *ifa = ifac->ifa; 1127 1128 if (ifa->ifa_addr->sa_family != addr->sa_family) 1129 continue; 1130 if (sa_equal(addr, ifa->ifa_addr)) 1131 return (ifa); 1132 if ((ifp->if_flags & IFF_BROADCAST) && 1133 ifa->ifa_broadaddr && 1134 /* IPv6 doesn't have broadcast */ 1135 ifa->ifa_broadaddr->sa_len != 0 && 1136 sa_equal(ifa->ifa_broadaddr, addr)) 1137 return (ifa); 1138 } 1139 } 1140 return (NULL); 1141 } 1142 /* 1143 * Locate the point to point interface with a given destination address. 1144 */ 1145 struct ifaddr * 1146 ifa_ifwithdstaddr(struct sockaddr *addr) 1147 { 1148 struct ifnet *ifp; 1149 1150 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1151 struct ifaddr_container *ifac; 1152 1153 if (!(ifp->if_flags & IFF_POINTOPOINT)) 1154 continue; 1155 1156 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1157 struct ifaddr *ifa = ifac->ifa; 1158 1159 if (ifa->ifa_addr->sa_family != addr->sa_family) 1160 continue; 1161 if (ifa->ifa_dstaddr && 1162 sa_equal(addr, ifa->ifa_dstaddr)) 1163 return (ifa); 1164 } 1165 } 1166 return (NULL); 1167 } 1168 1169 /* 1170 * Find an interface on a specific network. If many, choice 1171 * is most specific found. 1172 */ 1173 struct ifaddr * 1174 ifa_ifwithnet(struct sockaddr *addr) 1175 { 1176 struct ifnet *ifp; 1177 struct ifaddr *ifa_maybe = NULL; 1178 u_int af = addr->sa_family; 1179 char *addr_data = addr->sa_data, *cplim; 1180 1181 /* 1182 * AF_LINK addresses can be looked up directly by their index number, 1183 * so do that if we can. 1184 */ 1185 if (af == AF_LINK) { 1186 struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr; 1187 1188 if (sdl->sdl_index && sdl->sdl_index <= if_index) 1189 return (ifindex2ifnet[sdl->sdl_index]->if_lladdr); 1190 } 1191 1192 /* 1193 * Scan though each interface, looking for ones that have 1194 * addresses in this address family. 1195 */ 1196 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1197 struct ifaddr_container *ifac; 1198 1199 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1200 struct ifaddr *ifa = ifac->ifa; 1201 char *cp, *cp2, *cp3; 1202 1203 if (ifa->ifa_addr->sa_family != af) 1204 next: continue; 1205 if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) { 1206 /* 1207 * This is a bit broken as it doesn't 1208 * take into account that the remote end may 1209 * be a single node in the network we are 1210 * looking for. 1211 * The trouble is that we don't know the 1212 * netmask for the remote end. 1213 */ 1214 if (ifa->ifa_dstaddr != NULL && 1215 sa_equal(addr, ifa->ifa_dstaddr)) 1216 return (ifa); 1217 } else { 1218 /* 1219 * if we have a special address handler, 1220 * then use it instead of the generic one. 1221 */ 1222 if (ifa->ifa_claim_addr) { 1223 if ((*ifa->ifa_claim_addr)(ifa, addr)) { 1224 return (ifa); 1225 } else { 1226 continue; 1227 } 1228 } 1229 1230 /* 1231 * Scan all the bits in the ifa's address. 1232 * If a bit dissagrees with what we are 1233 * looking for, mask it with the netmask 1234 * to see if it really matters. 1235 * (A byte at a time) 1236 */ 1237 if (ifa->ifa_netmask == 0) 1238 continue; 1239 cp = addr_data; 1240 cp2 = ifa->ifa_addr->sa_data; 1241 cp3 = ifa->ifa_netmask->sa_data; 1242 cplim = ifa->ifa_netmask->sa_len + 1243 (char *)ifa->ifa_netmask; 1244 while (cp3 < cplim) 1245 if ((*cp++ ^ *cp2++) & *cp3++) 1246 goto next; /* next address! */ 1247 /* 1248 * If the netmask of what we just found 1249 * is more specific than what we had before 1250 * (if we had one) then remember the new one 1251 * before continuing to search 1252 * for an even better one. 1253 */ 1254 if (ifa_maybe == NULL || 1255 rn_refines((char *)ifa->ifa_netmask, 1256 (char *)ifa_maybe->ifa_netmask)) 1257 ifa_maybe = ifa; 1258 } 1259 } 1260 } 1261 return (ifa_maybe); 1262 } 1263 1264 /* 1265 * Find an interface address specific to an interface best matching 1266 * a given address. 1267 */ 1268 struct ifaddr * 1269 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp) 1270 { 1271 struct ifaddr_container *ifac; 1272 char *cp, *cp2, *cp3; 1273 char *cplim; 1274 struct ifaddr *ifa_maybe = NULL; 1275 u_int af = addr->sa_family; 1276 1277 if (af >= AF_MAX) 1278 return (0); 1279 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1280 struct ifaddr *ifa = ifac->ifa; 1281 1282 if (ifa->ifa_addr->sa_family != af) 1283 continue; 1284 if (ifa_maybe == NULL) 1285 ifa_maybe = ifa; 1286 if (ifa->ifa_netmask == NULL) { 1287 if (sa_equal(addr, ifa->ifa_addr) || 1288 (ifa->ifa_dstaddr != NULL && 1289 sa_equal(addr, ifa->ifa_dstaddr))) 1290 return (ifa); 1291 continue; 1292 } 1293 if (ifp->if_flags & IFF_POINTOPOINT) { 1294 if (sa_equal(addr, ifa->ifa_dstaddr)) 1295 return (ifa); 1296 } else { 1297 cp = addr->sa_data; 1298 cp2 = ifa->ifa_addr->sa_data; 1299 cp3 = ifa->ifa_netmask->sa_data; 1300 cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; 1301 for (; cp3 < cplim; cp3++) 1302 if ((*cp++ ^ *cp2++) & *cp3) 1303 break; 1304 if (cp3 == cplim) 1305 return (ifa); 1306 } 1307 } 1308 return (ifa_maybe); 1309 } 1310 1311 /* 1312 * Default action when installing a route with a Link Level gateway. 1313 * Lookup an appropriate real ifa to point to. 1314 * This should be moved to /sys/net/link.c eventually. 1315 */ 1316 static void 1317 link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info) 1318 { 1319 struct ifaddr *ifa; 1320 struct sockaddr *dst; 1321 struct ifnet *ifp; 1322 1323 if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL || 1324 (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL) 1325 return; 1326 ifa = ifaof_ifpforaddr(dst, ifp); 1327 if (ifa != NULL) { 1328 IFAFREE(rt->rt_ifa); 1329 IFAREF(ifa); 1330 rt->rt_ifa = ifa; 1331 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) 1332 ifa->ifa_rtrequest(cmd, rt, info); 1333 } 1334 } 1335 1336 /* 1337 * Mark an interface down and notify protocols of 1338 * the transition. 1339 * NOTE: must be called at splnet or eqivalent. 1340 */ 1341 void 1342 if_unroute(struct ifnet *ifp, int flag, int fam) 1343 { 1344 struct ifaddr_container *ifac; 1345 1346 ifp->if_flags &= ~flag; 1347 getmicrotime(&ifp->if_lastchange); 1348 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1349 struct ifaddr *ifa = ifac->ifa; 1350 1351 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) 1352 kpfctlinput(PRC_IFDOWN, ifa->ifa_addr); 1353 } 1354 ifq_purge_all(&ifp->if_snd); 1355 rt_ifmsg(ifp); 1356 } 1357 1358 /* 1359 * Mark an interface up and notify protocols of 1360 * the transition. 1361 * NOTE: must be called at splnet or eqivalent. 1362 */ 1363 void 1364 if_route(struct ifnet *ifp, int flag, int fam) 1365 { 1366 struct ifaddr_container *ifac; 1367 1368 ifq_purge_all(&ifp->if_snd); 1369 ifp->if_flags |= flag; 1370 getmicrotime(&ifp->if_lastchange); 1371 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1372 struct ifaddr *ifa = ifac->ifa; 1373 1374 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) 1375 kpfctlinput(PRC_IFUP, ifa->ifa_addr); 1376 } 1377 rt_ifmsg(ifp); 1378 #ifdef INET6 1379 in6_if_up(ifp); 1380 #endif 1381 } 1382 1383 /* 1384 * Mark an interface down and notify protocols of the transition. An 1385 * interface going down is also considered to be a synchronizing event. 1386 * We must ensure that all packet processing related to the interface 1387 * has completed before we return so e.g. the caller can free the ifnet 1388 * structure that the mbufs may be referencing. 1389 * 1390 * NOTE: must be called at splnet or eqivalent. 1391 */ 1392 void 1393 if_down(struct ifnet *ifp) 1394 { 1395 if_unroute(ifp, IFF_UP, AF_UNSPEC); 1396 netmsg_service_sync(); 1397 } 1398 1399 /* 1400 * Mark an interface up and notify protocols of 1401 * the transition. 1402 * NOTE: must be called at splnet or eqivalent. 1403 */ 1404 void 1405 if_up(struct ifnet *ifp) 1406 { 1407 if_route(ifp, IFF_UP, AF_UNSPEC); 1408 } 1409 1410 /* 1411 * Process a link state change. 1412 * NOTE: must be called at splsoftnet or equivalent. 1413 */ 1414 void 1415 if_link_state_change(struct ifnet *ifp) 1416 { 1417 int link_state = ifp->if_link_state; 1418 1419 rt_ifmsg(ifp); 1420 devctl_notify("IFNET", ifp->if_xname, 1421 (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); 1422 } 1423 1424 /* 1425 * Handle interface watchdog timer routines. Called 1426 * from softclock, we decrement timers (if set) and 1427 * call the appropriate interface routine on expiration. 1428 */ 1429 static void 1430 if_slowtimo(void *arg) 1431 { 1432 struct ifnet *ifp; 1433 1434 crit_enter(); 1435 1436 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1437 if (if_stats_compat) { 1438 IFNET_STAT_GET(ifp, ipackets, ifp->if_ipackets); 1439 IFNET_STAT_GET(ifp, ierrors, ifp->if_ierrors); 1440 IFNET_STAT_GET(ifp, opackets, ifp->if_opackets); 1441 IFNET_STAT_GET(ifp, oerrors, ifp->if_oerrors); 1442 IFNET_STAT_GET(ifp, collisions, ifp->if_collisions); 1443 IFNET_STAT_GET(ifp, ibytes, ifp->if_ibytes); 1444 IFNET_STAT_GET(ifp, obytes, ifp->if_obytes); 1445 IFNET_STAT_GET(ifp, imcasts, ifp->if_imcasts); 1446 IFNET_STAT_GET(ifp, omcasts, ifp->if_omcasts); 1447 IFNET_STAT_GET(ifp, iqdrops, ifp->if_iqdrops); 1448 IFNET_STAT_GET(ifp, noproto, ifp->if_noproto); 1449 } 1450 1451 if (ifp->if_timer == 0 || --ifp->if_timer) 1452 continue; 1453 if (ifp->if_watchdog) { 1454 if (ifnet_tryserialize_all(ifp)) { 1455 (*ifp->if_watchdog)(ifp); 1456 ifnet_deserialize_all(ifp); 1457 } else { 1458 /* try again next timeout */ 1459 ++ifp->if_timer; 1460 } 1461 } 1462 } 1463 1464 crit_exit(); 1465 1466 callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL); 1467 } 1468 1469 /* 1470 * Map interface name to 1471 * interface structure pointer. 1472 */ 1473 struct ifnet * 1474 ifunit(const char *name) 1475 { 1476 struct ifnet *ifp; 1477 1478 /* 1479 * Search all the interfaces for this name/number 1480 */ 1481 1482 TAILQ_FOREACH(ifp, &ifnet, if_link) { 1483 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) 1484 break; 1485 } 1486 return (ifp); 1487 } 1488 1489 1490 /* 1491 * Map interface name in a sockaddr_dl to 1492 * interface structure pointer. 1493 */ 1494 struct ifnet * 1495 if_withname(struct sockaddr *sa) 1496 { 1497 char ifname[IFNAMSIZ+1]; 1498 struct sockaddr_dl *sdl = (struct sockaddr_dl *)sa; 1499 1500 if ( (sa->sa_family != AF_LINK) || (sdl->sdl_nlen == 0) || 1501 (sdl->sdl_nlen > IFNAMSIZ) ) 1502 return NULL; 1503 1504 /* 1505 * ifunit wants a null-terminated name. It may not be null-terminated 1506 * in the sockaddr. We don't want to change the caller's sockaddr, 1507 * and there might not be room to put the trailing null anyway, so we 1508 * make a local copy that we know we can null terminate safely. 1509 */ 1510 1511 bcopy(sdl->sdl_data, ifname, sdl->sdl_nlen); 1512 ifname[sdl->sdl_nlen] = '\0'; 1513 return ifunit(ifname); 1514 } 1515 1516 1517 /* 1518 * Interface ioctls. 1519 */ 1520 int 1521 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred) 1522 { 1523 struct ifnet *ifp; 1524 struct ifreq *ifr; 1525 struct ifstat *ifs; 1526 int error; 1527 short oif_flags; 1528 int new_flags; 1529 #ifdef COMPAT_43 1530 int ocmd; 1531 #endif 1532 size_t namelen, onamelen; 1533 char new_name[IFNAMSIZ]; 1534 struct ifaddr *ifa; 1535 struct sockaddr_dl *sdl; 1536 1537 switch (cmd) { 1538 case SIOCGIFCONF: 1539 case OSIOCGIFCONF: 1540 return (ifconf(cmd, data, cred)); 1541 default: 1542 break; 1543 } 1544 1545 ifr = (struct ifreq *)data; 1546 1547 switch (cmd) { 1548 case SIOCIFCREATE: 1549 case SIOCIFCREATE2: 1550 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0) 1551 return (error); 1552 return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name), 1553 cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL)); 1554 case SIOCIFDESTROY: 1555 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0) 1556 return (error); 1557 return (if_clone_destroy(ifr->ifr_name)); 1558 case SIOCIFGCLONERS: 1559 return (if_clone_list((struct if_clonereq *)data)); 1560 default: 1561 break; 1562 } 1563 1564 /* 1565 * Nominal ioctl through interface, lookup the ifp and obtain a 1566 * lock to serialize the ifconfig ioctl operation. 1567 */ 1568 ifp = ifunit(ifr->ifr_name); 1569 if (ifp == NULL) 1570 return (ENXIO); 1571 error = 0; 1572 mtx_lock(&ifp->if_ioctl_mtx); 1573 1574 switch (cmd) { 1575 case SIOCGIFINDEX: 1576 ifr->ifr_index = ifp->if_index; 1577 break; 1578 1579 case SIOCGIFFLAGS: 1580 ifr->ifr_flags = ifp->if_flags; 1581 ifr->ifr_flagshigh = ifp->if_flags >> 16; 1582 break; 1583 1584 case SIOCGIFCAP: 1585 ifr->ifr_reqcap = ifp->if_capabilities; 1586 ifr->ifr_curcap = ifp->if_capenable; 1587 break; 1588 1589 case SIOCGIFMETRIC: 1590 ifr->ifr_metric = ifp->if_metric; 1591 break; 1592 1593 case SIOCGIFMTU: 1594 ifr->ifr_mtu = ifp->if_mtu; 1595 break; 1596 1597 case SIOCGIFTSOLEN: 1598 ifr->ifr_tsolen = ifp->if_tsolen; 1599 break; 1600 1601 case SIOCGIFDATA: 1602 error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data, 1603 sizeof(ifp->if_data)); 1604 break; 1605 1606 case SIOCGIFPHYS: 1607 ifr->ifr_phys = ifp->if_physical; 1608 break; 1609 1610 case SIOCGIFPOLLCPU: 1611 ifr->ifr_pollcpu = -1; 1612 break; 1613 1614 case SIOCSIFPOLLCPU: 1615 break; 1616 1617 case SIOCSIFFLAGS: 1618 error = priv_check_cred(cred, PRIV_ROOT, 0); 1619 if (error) 1620 break; 1621 new_flags = (ifr->ifr_flags & 0xffff) | 1622 (ifr->ifr_flagshigh << 16); 1623 if (ifp->if_flags & IFF_SMART) { 1624 /* Smart drivers twiddle their own routes */ 1625 } else if (ifp->if_flags & IFF_UP && 1626 (new_flags & IFF_UP) == 0) { 1627 crit_enter(); 1628 if_down(ifp); 1629 crit_exit(); 1630 } else if (new_flags & IFF_UP && 1631 (ifp->if_flags & IFF_UP) == 0) { 1632 crit_enter(); 1633 if_up(ifp); 1634 crit_exit(); 1635 } 1636 1637 #ifdef IFPOLL_ENABLE 1638 if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) { 1639 if (new_flags & IFF_NPOLLING) 1640 ifpoll_register(ifp); 1641 else 1642 ifpoll_deregister(ifp); 1643 } 1644 #endif 1645 1646 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | 1647 (new_flags &~ IFF_CANTCHANGE); 1648 if (new_flags & IFF_PPROMISC) { 1649 /* Permanently promiscuous mode requested */ 1650 ifp->if_flags |= IFF_PROMISC; 1651 } else if (ifp->if_pcount == 0) { 1652 ifp->if_flags &= ~IFF_PROMISC; 1653 } 1654 if (ifp->if_ioctl) { 1655 ifnet_serialize_all(ifp); 1656 ifp->if_ioctl(ifp, cmd, data, cred); 1657 ifnet_deserialize_all(ifp); 1658 } 1659 getmicrotime(&ifp->if_lastchange); 1660 break; 1661 1662 case SIOCSIFCAP: 1663 error = priv_check_cred(cred, PRIV_ROOT, 0); 1664 if (error) 1665 break; 1666 if (ifr->ifr_reqcap & ~ifp->if_capabilities) { 1667 error = EINVAL; 1668 break; 1669 } 1670 ifnet_serialize_all(ifp); 1671 ifp->if_ioctl(ifp, cmd, data, cred); 1672 ifnet_deserialize_all(ifp); 1673 break; 1674 1675 case SIOCSIFNAME: 1676 error = priv_check_cred(cred, PRIV_ROOT, 0); 1677 if (error) 1678 break; 1679 error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL); 1680 if (error) 1681 break; 1682 if (new_name[0] == '\0') { 1683 error = EINVAL; 1684 break; 1685 } 1686 if (ifunit(new_name) != NULL) { 1687 error = EEXIST; 1688 break; 1689 } 1690 1691 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 1692 1693 /* Announce the departure of the interface. */ 1694 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 1695 1696 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); 1697 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 1698 /* XXX IFA_LOCK(ifa); */ 1699 sdl = (struct sockaddr_dl *)ifa->ifa_addr; 1700 namelen = strlen(new_name); 1701 onamelen = sdl->sdl_nlen; 1702 /* 1703 * Move the address if needed. This is safe because we 1704 * allocate space for a name of length IFNAMSIZ when we 1705 * create this in if_attach(). 1706 */ 1707 if (namelen != onamelen) { 1708 bcopy(sdl->sdl_data + onamelen, 1709 sdl->sdl_data + namelen, sdl->sdl_alen); 1710 } 1711 bcopy(new_name, sdl->sdl_data, namelen); 1712 sdl->sdl_nlen = namelen; 1713 sdl = (struct sockaddr_dl *)ifa->ifa_netmask; 1714 bzero(sdl->sdl_data, onamelen); 1715 while (namelen != 0) 1716 sdl->sdl_data[--namelen] = 0xff; 1717 /* XXX IFA_UNLOCK(ifa) */ 1718 1719 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 1720 1721 /* Announce the return of the interface. */ 1722 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 1723 break; 1724 1725 case SIOCSIFMETRIC: 1726 error = priv_check_cred(cred, PRIV_ROOT, 0); 1727 if (error) 1728 break; 1729 ifp->if_metric = ifr->ifr_metric; 1730 getmicrotime(&ifp->if_lastchange); 1731 break; 1732 1733 case SIOCSIFPHYS: 1734 error = priv_check_cred(cred, PRIV_ROOT, 0); 1735 if (error) 1736 break; 1737 if (ifp->if_ioctl == NULL) { 1738 error = EOPNOTSUPP; 1739 break; 1740 } 1741 ifnet_serialize_all(ifp); 1742 error = ifp->if_ioctl(ifp, cmd, data, cred); 1743 ifnet_deserialize_all(ifp); 1744 if (error == 0) 1745 getmicrotime(&ifp->if_lastchange); 1746 break; 1747 1748 case SIOCSIFMTU: 1749 { 1750 u_long oldmtu = ifp->if_mtu; 1751 1752 error = priv_check_cred(cred, PRIV_ROOT, 0); 1753 if (error) 1754 break; 1755 if (ifp->if_ioctl == NULL) { 1756 error = EOPNOTSUPP; 1757 break; 1758 } 1759 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) { 1760 error = EINVAL; 1761 break; 1762 } 1763 ifnet_serialize_all(ifp); 1764 error = ifp->if_ioctl(ifp, cmd, data, cred); 1765 ifnet_deserialize_all(ifp); 1766 if (error == 0) { 1767 getmicrotime(&ifp->if_lastchange); 1768 rt_ifmsg(ifp); 1769 } 1770 /* 1771 * If the link MTU changed, do network layer specific procedure. 1772 */ 1773 if (ifp->if_mtu != oldmtu) { 1774 #ifdef INET6 1775 nd6_setmtu(ifp); 1776 #endif 1777 } 1778 break; 1779 } 1780 1781 case SIOCSIFTSOLEN: 1782 error = priv_check_cred(cred, PRIV_ROOT, 0); 1783 if (error) 1784 break; 1785 1786 /* XXX need driver supplied upper limit */ 1787 if (ifr->ifr_tsolen <= 0) { 1788 error = EINVAL; 1789 break; 1790 } 1791 ifp->if_tsolen = ifr->ifr_tsolen; 1792 break; 1793 1794 case SIOCADDMULTI: 1795 case SIOCDELMULTI: 1796 error = priv_check_cred(cred, PRIV_ROOT, 0); 1797 if (error) 1798 break; 1799 1800 /* Don't allow group membership on non-multicast interfaces. */ 1801 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 1802 error = EOPNOTSUPP; 1803 break; 1804 } 1805 1806 /* Don't let users screw up protocols' entries. */ 1807 if (ifr->ifr_addr.sa_family != AF_LINK) { 1808 error = EINVAL; 1809 break; 1810 } 1811 1812 if (cmd == SIOCADDMULTI) { 1813 struct ifmultiaddr *ifma; 1814 error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); 1815 } else { 1816 error = if_delmulti(ifp, &ifr->ifr_addr); 1817 } 1818 if (error == 0) 1819 getmicrotime(&ifp->if_lastchange); 1820 break; 1821 1822 case SIOCSIFPHYADDR: 1823 case SIOCDIFPHYADDR: 1824 #ifdef INET6 1825 case SIOCSIFPHYADDR_IN6: 1826 #endif 1827 case SIOCSLIFPHYADDR: 1828 case SIOCSIFMEDIA: 1829 case SIOCSIFGENERIC: 1830 error = priv_check_cred(cred, PRIV_ROOT, 0); 1831 if (error) 1832 break; 1833 if (ifp->if_ioctl == 0) { 1834 error = EOPNOTSUPP; 1835 break; 1836 } 1837 ifnet_serialize_all(ifp); 1838 error = ifp->if_ioctl(ifp, cmd, data, cred); 1839 ifnet_deserialize_all(ifp); 1840 if (error == 0) 1841 getmicrotime(&ifp->if_lastchange); 1842 break; 1843 1844 case SIOCGIFSTATUS: 1845 ifs = (struct ifstat *)data; 1846 ifs->ascii[0] = '\0'; 1847 /* fall through */ 1848 case SIOCGIFPSRCADDR: 1849 case SIOCGIFPDSTADDR: 1850 case SIOCGLIFPHYADDR: 1851 case SIOCGIFMEDIA: 1852 case SIOCGIFGENERIC: 1853 if (ifp->if_ioctl == NULL) { 1854 error = EOPNOTSUPP; 1855 break; 1856 } 1857 ifnet_serialize_all(ifp); 1858 error = ifp->if_ioctl(ifp, cmd, data, cred); 1859 ifnet_deserialize_all(ifp); 1860 break; 1861 1862 case SIOCSIFLLADDR: 1863 error = priv_check_cred(cred, PRIV_ROOT, 0); 1864 if (error) 1865 break; 1866 error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, 1867 ifr->ifr_addr.sa_len); 1868 EVENTHANDLER_INVOKE(iflladdr_event, ifp); 1869 break; 1870 1871 default: 1872 oif_flags = ifp->if_flags; 1873 if (so->so_proto == 0) { 1874 error = EOPNOTSUPP; 1875 break; 1876 } 1877 #ifndef COMPAT_43 1878 error = so_pru_control_direct(so, cmd, data, ifp); 1879 #else 1880 ocmd = cmd; 1881 1882 switch (cmd) { 1883 case SIOCSIFDSTADDR: 1884 case SIOCSIFADDR: 1885 case SIOCSIFBRDADDR: 1886 case SIOCSIFNETMASK: 1887 #if BYTE_ORDER != BIG_ENDIAN 1888 if (ifr->ifr_addr.sa_family == 0 && 1889 ifr->ifr_addr.sa_len < 16) { 1890 ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len; 1891 ifr->ifr_addr.sa_len = 16; 1892 } 1893 #else 1894 if (ifr->ifr_addr.sa_len == 0) 1895 ifr->ifr_addr.sa_len = 16; 1896 #endif 1897 break; 1898 case OSIOCGIFADDR: 1899 cmd = SIOCGIFADDR; 1900 break; 1901 case OSIOCGIFDSTADDR: 1902 cmd = SIOCGIFDSTADDR; 1903 break; 1904 case OSIOCGIFBRDADDR: 1905 cmd = SIOCGIFBRDADDR; 1906 break; 1907 case OSIOCGIFNETMASK: 1908 cmd = SIOCGIFNETMASK; 1909 break; 1910 default: 1911 break; 1912 } 1913 1914 error = so_pru_control_direct(so, cmd, data, ifp); 1915 1916 switch (ocmd) { 1917 case OSIOCGIFADDR: 1918 case OSIOCGIFDSTADDR: 1919 case OSIOCGIFBRDADDR: 1920 case OSIOCGIFNETMASK: 1921 *(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family; 1922 break; 1923 } 1924 #endif /* COMPAT_43 */ 1925 1926 if ((oif_flags ^ ifp->if_flags) & IFF_UP) { 1927 #ifdef INET6 1928 DELAY(100);/* XXX: temporary workaround for fxp issue*/ 1929 if (ifp->if_flags & IFF_UP) { 1930 crit_enter(); 1931 in6_if_up(ifp); 1932 crit_exit(); 1933 } 1934 #endif 1935 } 1936 break; 1937 } 1938 1939 mtx_unlock(&ifp->if_ioctl_mtx); 1940 return (error); 1941 } 1942 1943 /* 1944 * Set/clear promiscuous mode on interface ifp based on the truth value 1945 * of pswitch. The calls are reference counted so that only the first 1946 * "on" request actually has an effect, as does the final "off" request. 1947 * Results are undefined if the "off" and "on" requests are not matched. 1948 */ 1949 int 1950 ifpromisc(struct ifnet *ifp, int pswitch) 1951 { 1952 struct ifreq ifr; 1953 int error; 1954 int oldflags; 1955 1956 oldflags = ifp->if_flags; 1957 if (ifp->if_flags & IFF_PPROMISC) { 1958 /* Do nothing if device is in permanently promiscuous mode */ 1959 ifp->if_pcount += pswitch ? 1 : -1; 1960 return (0); 1961 } 1962 if (pswitch) { 1963 /* 1964 * If the device is not configured up, we cannot put it in 1965 * promiscuous mode. 1966 */ 1967 if ((ifp->if_flags & IFF_UP) == 0) 1968 return (ENETDOWN); 1969 if (ifp->if_pcount++ != 0) 1970 return (0); 1971 ifp->if_flags |= IFF_PROMISC; 1972 log(LOG_INFO, "%s: promiscuous mode enabled\n", 1973 ifp->if_xname); 1974 } else { 1975 if (--ifp->if_pcount > 0) 1976 return (0); 1977 ifp->if_flags &= ~IFF_PROMISC; 1978 log(LOG_INFO, "%s: promiscuous mode disabled\n", 1979 ifp->if_xname); 1980 } 1981 ifr.ifr_flags = ifp->if_flags; 1982 ifr.ifr_flagshigh = ifp->if_flags >> 16; 1983 ifnet_serialize_all(ifp); 1984 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL); 1985 ifnet_deserialize_all(ifp); 1986 if (error == 0) 1987 rt_ifmsg(ifp); 1988 else 1989 ifp->if_flags = oldflags; 1990 return error; 1991 } 1992 1993 /* 1994 * Return interface configuration 1995 * of system. List may be used 1996 * in later ioctl's (above) to get 1997 * other information. 1998 */ 1999 static int 2000 ifconf(u_long cmd, caddr_t data, struct ucred *cred) 2001 { 2002 struct ifconf *ifc = (struct ifconf *)data; 2003 struct ifnet *ifp; 2004 struct sockaddr *sa; 2005 struct ifreq ifr, *ifrp; 2006 int space = ifc->ifc_len, error = 0; 2007 2008 ifrp = ifc->ifc_req; 2009 TAILQ_FOREACH(ifp, &ifnet, if_link) { 2010 struct ifaddr_container *ifac; 2011 int addrs; 2012 2013 if (space <= sizeof ifr) 2014 break; 2015 2016 /* 2017 * Zero the stack declared structure first to prevent 2018 * memory disclosure. 2019 */ 2020 bzero(&ifr, sizeof(ifr)); 2021 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) 2022 >= sizeof(ifr.ifr_name)) { 2023 error = ENAMETOOLONG; 2024 break; 2025 } 2026 2027 addrs = 0; 2028 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 2029 struct ifaddr *ifa = ifac->ifa; 2030 2031 if (space <= sizeof ifr) 2032 break; 2033 sa = ifa->ifa_addr; 2034 if (cred->cr_prison && 2035 prison_if(cred, sa)) 2036 continue; 2037 addrs++; 2038 #ifdef COMPAT_43 2039 if (cmd == OSIOCGIFCONF) { 2040 struct osockaddr *osa = 2041 (struct osockaddr *)&ifr.ifr_addr; 2042 ifr.ifr_addr = *sa; 2043 osa->sa_family = sa->sa_family; 2044 error = copyout(&ifr, ifrp, sizeof ifr); 2045 ifrp++; 2046 } else 2047 #endif 2048 if (sa->sa_len <= sizeof(*sa)) { 2049 ifr.ifr_addr = *sa; 2050 error = copyout(&ifr, ifrp, sizeof ifr); 2051 ifrp++; 2052 } else { 2053 if (space < (sizeof ifr) + sa->sa_len - 2054 sizeof(*sa)) 2055 break; 2056 space -= sa->sa_len - sizeof(*sa); 2057 error = copyout(&ifr, ifrp, 2058 sizeof ifr.ifr_name); 2059 if (error == 0) 2060 error = copyout(sa, &ifrp->ifr_addr, 2061 sa->sa_len); 2062 ifrp = (struct ifreq *) 2063 (sa->sa_len + (caddr_t)&ifrp->ifr_addr); 2064 } 2065 if (error) 2066 break; 2067 space -= sizeof ifr; 2068 } 2069 if (error) 2070 break; 2071 if (!addrs) { 2072 bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr); 2073 error = copyout(&ifr, ifrp, sizeof ifr); 2074 if (error) 2075 break; 2076 space -= sizeof ifr; 2077 ifrp++; 2078 } 2079 } 2080 ifc->ifc_len -= space; 2081 return (error); 2082 } 2083 2084 /* 2085 * Just like if_promisc(), but for all-multicast-reception mode. 2086 */ 2087 int 2088 if_allmulti(struct ifnet *ifp, int onswitch) 2089 { 2090 int error = 0; 2091 struct ifreq ifr; 2092 2093 crit_enter(); 2094 2095 if (onswitch) { 2096 if (ifp->if_amcount++ == 0) { 2097 ifp->if_flags |= IFF_ALLMULTI; 2098 ifr.ifr_flags = ifp->if_flags; 2099 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2100 ifnet_serialize_all(ifp); 2101 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2102 NULL); 2103 ifnet_deserialize_all(ifp); 2104 } 2105 } else { 2106 if (ifp->if_amcount > 1) { 2107 ifp->if_amcount--; 2108 } else { 2109 ifp->if_amcount = 0; 2110 ifp->if_flags &= ~IFF_ALLMULTI; 2111 ifr.ifr_flags = ifp->if_flags; 2112 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2113 ifnet_serialize_all(ifp); 2114 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2115 NULL); 2116 ifnet_deserialize_all(ifp); 2117 } 2118 } 2119 2120 crit_exit(); 2121 2122 if (error == 0) 2123 rt_ifmsg(ifp); 2124 return error; 2125 } 2126 2127 /* 2128 * Add a multicast listenership to the interface in question. 2129 * The link layer provides a routine which converts 2130 */ 2131 int 2132 if_addmulti( 2133 struct ifnet *ifp, /* interface to manipulate */ 2134 struct sockaddr *sa, /* address to add */ 2135 struct ifmultiaddr **retifma) 2136 { 2137 struct sockaddr *llsa, *dupsa; 2138 int error; 2139 struct ifmultiaddr *ifma; 2140 2141 /* 2142 * If the matching multicast address already exists 2143 * then don't add a new one, just add a reference 2144 */ 2145 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2146 if (sa_equal(sa, ifma->ifma_addr)) { 2147 ifma->ifma_refcount++; 2148 if (retifma) 2149 *retifma = ifma; 2150 return 0; 2151 } 2152 } 2153 2154 /* 2155 * Give the link layer a chance to accept/reject it, and also 2156 * find out which AF_LINK address this maps to, if it isn't one 2157 * already. 2158 */ 2159 if (ifp->if_resolvemulti) { 2160 ifnet_serialize_all(ifp); 2161 error = ifp->if_resolvemulti(ifp, &llsa, sa); 2162 ifnet_deserialize_all(ifp); 2163 if (error) 2164 return error; 2165 } else { 2166 llsa = NULL; 2167 } 2168 2169 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK); 2170 dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_WAITOK); 2171 bcopy(sa, dupsa, sa->sa_len); 2172 2173 ifma->ifma_addr = dupsa; 2174 ifma->ifma_lladdr = llsa; 2175 ifma->ifma_ifp = ifp; 2176 ifma->ifma_refcount = 1; 2177 ifma->ifma_protospec = 0; 2178 rt_newmaddrmsg(RTM_NEWMADDR, ifma); 2179 2180 /* 2181 * Some network interfaces can scan the address list at 2182 * interrupt time; lock them out. 2183 */ 2184 crit_enter(); 2185 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2186 crit_exit(); 2187 if (retifma) 2188 *retifma = ifma; 2189 2190 if (llsa != NULL) { 2191 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2192 if (sa_equal(ifma->ifma_addr, llsa)) 2193 break; 2194 } 2195 if (ifma) { 2196 ifma->ifma_refcount++; 2197 } else { 2198 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK); 2199 dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_WAITOK); 2200 bcopy(llsa, dupsa, llsa->sa_len); 2201 ifma->ifma_addr = dupsa; 2202 ifma->ifma_ifp = ifp; 2203 ifma->ifma_refcount = 1; 2204 crit_enter(); 2205 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2206 crit_exit(); 2207 } 2208 } 2209 /* 2210 * We are certain we have added something, so call down to the 2211 * interface to let them know about it. 2212 */ 2213 crit_enter(); 2214 ifnet_serialize_all(ifp); 2215 if (ifp->if_ioctl) 2216 ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL); 2217 ifnet_deserialize_all(ifp); 2218 crit_exit(); 2219 2220 return 0; 2221 } 2222 2223 /* 2224 * Remove a reference to a multicast address on this interface. Yell 2225 * if the request does not match an existing membership. 2226 */ 2227 int 2228 if_delmulti(struct ifnet *ifp, struct sockaddr *sa) 2229 { 2230 struct ifmultiaddr *ifma; 2231 2232 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2233 if (sa_equal(sa, ifma->ifma_addr)) 2234 break; 2235 if (ifma == NULL) 2236 return ENOENT; 2237 2238 if (ifma->ifma_refcount > 1) { 2239 ifma->ifma_refcount--; 2240 return 0; 2241 } 2242 2243 rt_newmaddrmsg(RTM_DELMADDR, ifma); 2244 sa = ifma->ifma_lladdr; 2245 crit_enter(); 2246 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2247 /* 2248 * Make sure the interface driver is notified 2249 * in the case of a link layer mcast group being left. 2250 */ 2251 if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) { 2252 ifnet_serialize_all(ifp); 2253 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2254 ifnet_deserialize_all(ifp); 2255 } 2256 crit_exit(); 2257 kfree(ifma->ifma_addr, M_IFMADDR); 2258 kfree(ifma, M_IFMADDR); 2259 if (sa == NULL) 2260 return 0; 2261 2262 /* 2263 * Now look for the link-layer address which corresponds to 2264 * this network address. It had been squirreled away in 2265 * ifma->ifma_lladdr for this purpose (so we don't have 2266 * to call ifp->if_resolvemulti() again), and we saved that 2267 * value in sa above. If some nasty deleted the 2268 * link-layer address out from underneath us, we can deal because 2269 * the address we stored was is not the same as the one which was 2270 * in the record for the link-layer address. (So we don't complain 2271 * in that case.) 2272 */ 2273 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2274 if (sa_equal(sa, ifma->ifma_addr)) 2275 break; 2276 if (ifma == NULL) 2277 return 0; 2278 2279 if (ifma->ifma_refcount > 1) { 2280 ifma->ifma_refcount--; 2281 return 0; 2282 } 2283 2284 crit_enter(); 2285 ifnet_serialize_all(ifp); 2286 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2287 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2288 ifnet_deserialize_all(ifp); 2289 crit_exit(); 2290 kfree(ifma->ifma_addr, M_IFMADDR); 2291 kfree(sa, M_IFMADDR); 2292 kfree(ifma, M_IFMADDR); 2293 2294 return 0; 2295 } 2296 2297 /* 2298 * Delete all multicast group membership for an interface. 2299 * Should be used to quickly flush all multicast filters. 2300 */ 2301 void 2302 if_delallmulti(struct ifnet *ifp) 2303 { 2304 struct ifmultiaddr *ifma; 2305 struct ifmultiaddr *next; 2306 2307 TAILQ_FOREACH_MUTABLE(ifma, &ifp->if_multiaddrs, ifma_link, next) 2308 if_delmulti(ifp, ifma->ifma_addr); 2309 } 2310 2311 2312 /* 2313 * Set the link layer address on an interface. 2314 * 2315 * At this time we only support certain types of interfaces, 2316 * and we don't allow the length of the address to change. 2317 */ 2318 int 2319 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) 2320 { 2321 struct sockaddr_dl *sdl; 2322 struct ifreq ifr; 2323 2324 sdl = IF_LLSOCKADDR(ifp); 2325 if (sdl == NULL) 2326 return (EINVAL); 2327 if (len != sdl->sdl_alen) /* don't allow length to change */ 2328 return (EINVAL); 2329 switch (ifp->if_type) { 2330 case IFT_ETHER: /* these types use struct arpcom */ 2331 case IFT_XETHER: 2332 case IFT_L2VLAN: 2333 bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len); 2334 bcopy(lladdr, LLADDR(sdl), len); 2335 break; 2336 default: 2337 return (ENODEV); 2338 } 2339 /* 2340 * If the interface is already up, we need 2341 * to re-init it in order to reprogram its 2342 * address filter. 2343 */ 2344 ifnet_serialize_all(ifp); 2345 if ((ifp->if_flags & IFF_UP) != 0) { 2346 #ifdef INET 2347 struct ifaddr_container *ifac; 2348 #endif 2349 2350 ifp->if_flags &= ~IFF_UP; 2351 ifr.ifr_flags = ifp->if_flags; 2352 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2353 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2354 NULL); 2355 ifp->if_flags |= IFF_UP; 2356 ifr.ifr_flags = ifp->if_flags; 2357 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2358 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2359 NULL); 2360 #ifdef INET 2361 /* 2362 * Also send gratuitous ARPs to notify other nodes about 2363 * the address change. 2364 */ 2365 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 2366 struct ifaddr *ifa = ifac->ifa; 2367 2368 if (ifa->ifa_addr != NULL && 2369 ifa->ifa_addr->sa_family == AF_INET) 2370 arp_gratuitous(ifp, ifa); 2371 } 2372 #endif 2373 } 2374 ifnet_deserialize_all(ifp); 2375 return (0); 2376 } 2377 2378 struct ifmultiaddr * 2379 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp) 2380 { 2381 struct ifmultiaddr *ifma; 2382 2383 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2384 if (sa_equal(ifma->ifma_addr, sa)) 2385 break; 2386 2387 return ifma; 2388 } 2389 2390 /* 2391 * This function locates the first real ethernet MAC from a network 2392 * card and loads it into node, returning 0 on success or ENOENT if 2393 * no suitable interfaces were found. It is used by the uuid code to 2394 * generate a unique 6-byte number. 2395 */ 2396 int 2397 if_getanyethermac(uint16_t *node, int minlen) 2398 { 2399 struct ifnet *ifp; 2400 struct sockaddr_dl *sdl; 2401 2402 TAILQ_FOREACH(ifp, &ifnet, if_link) { 2403 if (ifp->if_type != IFT_ETHER) 2404 continue; 2405 sdl = IF_LLSOCKADDR(ifp); 2406 if (sdl->sdl_alen < minlen) 2407 continue; 2408 bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node, 2409 minlen); 2410 return(0); 2411 } 2412 return (ENOENT); 2413 } 2414 2415 /* 2416 * The name argument must be a pointer to storage which will last as 2417 * long as the interface does. For physical devices, the result of 2418 * device_get_name(dev) is a good choice and for pseudo-devices a 2419 * static string works well. 2420 */ 2421 void 2422 if_initname(struct ifnet *ifp, const char *name, int unit) 2423 { 2424 ifp->if_dname = name; 2425 ifp->if_dunit = unit; 2426 if (unit != IF_DUNIT_NONE) 2427 ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); 2428 else 2429 strlcpy(ifp->if_xname, name, IFNAMSIZ); 2430 } 2431 2432 int 2433 if_printf(struct ifnet *ifp, const char *fmt, ...) 2434 { 2435 __va_list ap; 2436 int retval; 2437 2438 retval = kprintf("%s: ", ifp->if_xname); 2439 __va_start(ap, fmt); 2440 retval += kvprintf(fmt, ap); 2441 __va_end(ap); 2442 return (retval); 2443 } 2444 2445 struct ifnet * 2446 if_alloc(uint8_t type) 2447 { 2448 struct ifnet *ifp; 2449 size_t size; 2450 2451 /* 2452 * XXX temporary hack until arpcom is setup in if_l2com 2453 */ 2454 if (type == IFT_ETHER) 2455 size = sizeof(struct arpcom); 2456 else 2457 size = sizeof(struct ifnet); 2458 2459 ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO); 2460 2461 ifp->if_type = type; 2462 2463 if (if_com_alloc[type] != NULL) { 2464 ifp->if_l2com = if_com_alloc[type](type, ifp); 2465 if (ifp->if_l2com == NULL) { 2466 kfree(ifp, M_IFNET); 2467 return (NULL); 2468 } 2469 } 2470 return (ifp); 2471 } 2472 2473 void 2474 if_free(struct ifnet *ifp) 2475 { 2476 kfree(ifp, M_IFNET); 2477 } 2478 2479 void 2480 ifq_set_classic(struct ifaltq *ifq) 2481 { 2482 ifq_set_methods(ifq, ifq->altq_ifp->if_mapsubq, 2483 ifsq_classic_enqueue, ifsq_classic_dequeue, ifsq_classic_request); 2484 } 2485 2486 void 2487 ifq_set_methods(struct ifaltq *ifq, altq_mapsubq_t mapsubq, 2488 ifsq_enqueue_t enqueue, ifsq_dequeue_t dequeue, ifsq_request_t request) 2489 { 2490 int q; 2491 2492 KASSERT(mapsubq != NULL, ("mapsubq is not specified")); 2493 KASSERT(enqueue != NULL, ("enqueue is not specified")); 2494 KASSERT(dequeue != NULL, ("dequeue is not specified")); 2495 KASSERT(request != NULL, ("request is not specified")); 2496 2497 ifq->altq_mapsubq = mapsubq; 2498 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 2499 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 2500 2501 ifsq->ifsq_enqueue = enqueue; 2502 ifsq->ifsq_dequeue = dequeue; 2503 ifsq->ifsq_request = request; 2504 } 2505 } 2506 2507 int 2508 ifsq_classic_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m, 2509 struct altq_pktattr *pa __unused) 2510 { 2511 if (IF_QFULL(ifsq)) { 2512 m_freem(m); 2513 return(ENOBUFS); 2514 } else { 2515 IF_ENQUEUE(ifsq, m); 2516 return(0); 2517 } 2518 } 2519 2520 struct mbuf * 2521 ifsq_classic_dequeue(struct ifaltq_subque *ifsq, struct mbuf *mpolled, int op) 2522 { 2523 struct mbuf *m; 2524 2525 switch (op) { 2526 case ALTDQ_POLL: 2527 IF_POLL(ifsq, m); 2528 break; 2529 case ALTDQ_REMOVE: 2530 IF_DEQUEUE(ifsq, m); 2531 break; 2532 default: 2533 panic("unsupported ALTQ dequeue op: %d", op); 2534 } 2535 KKASSERT(mpolled == NULL || mpolled == m); 2536 return(m); 2537 } 2538 2539 int 2540 ifsq_classic_request(struct ifaltq_subque *ifsq, int req, void *arg) 2541 { 2542 switch (req) { 2543 case ALTRQ_PURGE: 2544 IF_DRAIN(ifsq); 2545 break; 2546 default: 2547 panic("unsupported ALTQ request: %d", req); 2548 } 2549 return(0); 2550 } 2551 2552 static void 2553 ifsq_ifstart_try(struct ifaltq_subque *ifsq, int force_sched) 2554 { 2555 struct ifnet *ifp = ifsq_get_ifp(ifsq); 2556 int running = 0, need_sched; 2557 2558 /* 2559 * Try to do direct ifnet.if_start first, if there is 2560 * contention on ifnet's serializer, ifnet.if_start will 2561 * be scheduled on ifnet's CPU. 2562 */ 2563 if (!ifnet_tryserialize_tx(ifp, ifsq)) { 2564 /* 2565 * ifnet serializer contention happened, 2566 * ifnet.if_start is scheduled on ifnet's 2567 * CPU, and we keep going. 2568 */ 2569 ifsq_ifstart_schedule(ifsq, 1); 2570 return; 2571 } 2572 2573 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { 2574 ifp->if_start(ifp, ifsq); 2575 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 2576 running = 1; 2577 } 2578 need_sched = ifsq_ifstart_need_schedule(ifsq, running); 2579 2580 ifnet_deserialize_tx(ifp, ifsq); 2581 2582 if (need_sched) { 2583 /* 2584 * More data need to be transmitted, ifnet.if_start is 2585 * scheduled on ifnet's CPU, and we keep going. 2586 * NOTE: ifnet.if_start interlock is not released. 2587 */ 2588 ifsq_ifstart_schedule(ifsq, force_sched); 2589 } 2590 } 2591 2592 /* 2593 * IFSUBQ packets staging mechanism: 2594 * 2595 * The packets enqueued into IFSUBQ are staged to a certain amount before the 2596 * ifnet's if_start is called. In this way, the driver could avoid writing 2597 * to hardware registers upon every packet, instead, hardware registers 2598 * could be written when certain amount of packets are put onto hardware 2599 * TX ring. The measurement on several modern NICs (emx(4), igb(4), bnx(4), 2600 * bge(4), jme(4)) shows that the hardware registers writing aggregation 2601 * could save ~20% CPU time when 18bytes UDP datagrams are transmitted at 2602 * 1.48Mpps. The performance improvement by hardware registers writing 2603 * aggeregation is also mentioned by Luigi Rizzo's netmap paper 2604 * (http://info.iet.unipi.it/~luigi/netmap/). 2605 * 2606 * IFSUBQ packets staging is performed for two entry points into drivers's 2607 * transmission function: 2608 * - Direct ifnet's if_start calling, i.e. ifsq_ifstart_try() 2609 * - ifnet's if_start scheduling, i.e. ifsq_ifstart_schedule() 2610 * 2611 * IFSUBQ packets staging will be stopped upon any of the following conditions: 2612 * - If the count of packets enqueued on the current CPU is great than or 2613 * equal to ifsq_stage_cntmax. (XXX this should be per-interface) 2614 * - If the total length of packets enqueued on the current CPU is great 2615 * than or equal to the hardware's MTU - max_protohdr. max_protohdr is 2616 * cut from the hardware's MTU mainly bacause a full TCP segment's size 2617 * is usually less than hardware's MTU. 2618 * - ifsq_ifstart_schedule() is not pending on the current CPU and if_start 2619 * interlock (if_snd.altq_started) is not released. 2620 * - The if_start_rollup(), which is registered as low priority netisr 2621 * rollup function, is called; probably because no more work is pending 2622 * for netisr. 2623 * 2624 * NOTE: 2625 * Currently IFSUBQ packet staging is only performed in netisr threads. 2626 */ 2627 int 2628 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa) 2629 { 2630 struct ifaltq *ifq = &ifp->if_snd; 2631 struct ifaltq_subque *ifsq; 2632 int error, start = 0, len, mcast = 0, avoid_start = 0; 2633 struct ifsubq_stage_head *head = NULL; 2634 struct ifsubq_stage *stage = NULL; 2635 2636 ifsq = ifq_map_subq(ifq, mycpuid); 2637 ASSERT_IFNET_NOT_SERIALIZED_TX(ifp, ifsq); 2638 2639 len = m->m_pkthdr.len; 2640 if (m->m_flags & M_MCAST) 2641 mcast = 1; 2642 2643 if (curthread->td_type == TD_TYPE_NETISR) { 2644 head = &ifsubq_stage_heads[mycpuid]; 2645 stage = ifsq_get_stage(ifsq, mycpuid); 2646 2647 stage->stg_cnt++; 2648 stage->stg_len += len; 2649 if (stage->stg_cnt < ifsq_stage_cntmax && 2650 stage->stg_len < (ifp->if_mtu - max_protohdr)) 2651 avoid_start = 1; 2652 } 2653 2654 ALTQ_SQ_LOCK(ifsq); 2655 error = ifsq_enqueue_locked(ifsq, m, pa); 2656 if (error) { 2657 if (!ifsq_data_ready(ifsq)) { 2658 ALTQ_SQ_UNLOCK(ifsq); 2659 return error; 2660 } 2661 avoid_start = 0; 2662 } 2663 if (!ifsq_is_started(ifsq)) { 2664 if (avoid_start) { 2665 ALTQ_SQ_UNLOCK(ifsq); 2666 2667 KKASSERT(!error); 2668 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) 2669 ifsq_stage_insert(head, stage); 2670 2671 IFNET_STAT_INC(ifp, obytes, len); 2672 if (mcast) 2673 IFNET_STAT_INC(ifp, omcasts, 1); 2674 return error; 2675 } 2676 2677 /* 2678 * Hold the interlock of ifnet.if_start 2679 */ 2680 ifsq_set_started(ifsq); 2681 start = 1; 2682 } 2683 ALTQ_SQ_UNLOCK(ifsq); 2684 2685 if (!error) { 2686 IFNET_STAT_INC(ifp, obytes, len); 2687 if (mcast) 2688 IFNET_STAT_INC(ifp, omcasts, 1); 2689 } 2690 2691 if (stage != NULL) { 2692 if (!start && (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)) { 2693 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); 2694 if (!avoid_start) { 2695 ifsq_stage_remove(head, stage); 2696 ifsq_ifstart_schedule(ifsq, 1); 2697 } 2698 return error; 2699 } 2700 2701 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) { 2702 ifsq_stage_remove(head, stage); 2703 } else { 2704 stage->stg_cnt = 0; 2705 stage->stg_len = 0; 2706 } 2707 } 2708 2709 if (!start) 2710 return error; 2711 2712 ifsq_ifstart_try(ifsq, 0); 2713 return error; 2714 } 2715 2716 void * 2717 ifa_create(int size, int flags) 2718 { 2719 struct ifaddr *ifa; 2720 int i; 2721 2722 KASSERT(size >= sizeof(*ifa), ("ifaddr size too small")); 2723 2724 ifa = kmalloc(size, M_IFADDR, flags | M_ZERO); 2725 if (ifa == NULL) 2726 return NULL; 2727 2728 ifa->ifa_containers = 2729 kmalloc_cachealign(ncpus * sizeof(struct ifaddr_container), 2730 M_IFADDR, M_WAITOK | M_ZERO); 2731 ifa->ifa_ncnt = ncpus; 2732 for (i = 0; i < ncpus; ++i) { 2733 struct ifaddr_container *ifac = &ifa->ifa_containers[i]; 2734 2735 ifac->ifa_magic = IFA_CONTAINER_MAGIC; 2736 ifac->ifa = ifa; 2737 ifac->ifa_refcnt = 1; 2738 } 2739 #ifdef IFADDR_DEBUG 2740 kprintf("alloc ifa %p %d\n", ifa, size); 2741 #endif 2742 return ifa; 2743 } 2744 2745 void 2746 ifac_free(struct ifaddr_container *ifac, int cpu_id) 2747 { 2748 struct ifaddr *ifa = ifac->ifa; 2749 2750 KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC); 2751 KKASSERT(ifac->ifa_refcnt == 0); 2752 KASSERT(ifac->ifa_listmask == 0, 2753 ("ifa is still on %#x lists", ifac->ifa_listmask)); 2754 2755 ifac->ifa_magic = IFA_CONTAINER_DEAD; 2756 2757 #ifdef IFADDR_DEBUG_VERBOSE 2758 kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id); 2759 #endif 2760 2761 KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus, 2762 ("invalid # of ifac, %d", ifa->ifa_ncnt)); 2763 if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) { 2764 #ifdef IFADDR_DEBUG 2765 kprintf("free ifa %p\n", ifa); 2766 #endif 2767 kfree(ifa->ifa_containers, M_IFADDR); 2768 kfree(ifa, M_IFADDR); 2769 } 2770 } 2771 2772 static void 2773 ifa_iflink_dispatch(netmsg_t nmsg) 2774 { 2775 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 2776 struct ifaddr *ifa = msg->ifa; 2777 struct ifnet *ifp = msg->ifp; 2778 int cpu = mycpuid; 2779 struct ifaddr_container *ifac; 2780 2781 crit_enter(); 2782 2783 ifac = &ifa->ifa_containers[cpu]; 2784 ASSERT_IFAC_VALID(ifac); 2785 KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0, 2786 ("ifaddr is on if_addrheads")); 2787 2788 ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD; 2789 if (msg->tail) 2790 TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link); 2791 else 2792 TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link); 2793 2794 crit_exit(); 2795 2796 ifa_forwardmsg(&nmsg->lmsg, cpu + 1); 2797 } 2798 2799 void 2800 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail) 2801 { 2802 struct netmsg_ifaddr msg; 2803 2804 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 2805 0, ifa_iflink_dispatch); 2806 msg.ifa = ifa; 2807 msg.ifp = ifp; 2808 msg.tail = tail; 2809 2810 ifa_domsg(&msg.base.lmsg, 0); 2811 } 2812 2813 static void 2814 ifa_ifunlink_dispatch(netmsg_t nmsg) 2815 { 2816 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 2817 struct ifaddr *ifa = msg->ifa; 2818 struct ifnet *ifp = msg->ifp; 2819 int cpu = mycpuid; 2820 struct ifaddr_container *ifac; 2821 2822 crit_enter(); 2823 2824 ifac = &ifa->ifa_containers[cpu]; 2825 ASSERT_IFAC_VALID(ifac); 2826 KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD, 2827 ("ifaddr is not on if_addrhead")); 2828 2829 TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link); 2830 ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD; 2831 2832 crit_exit(); 2833 2834 ifa_forwardmsg(&nmsg->lmsg, cpu + 1); 2835 } 2836 2837 void 2838 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp) 2839 { 2840 struct netmsg_ifaddr msg; 2841 2842 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 2843 0, ifa_ifunlink_dispatch); 2844 msg.ifa = ifa; 2845 msg.ifp = ifp; 2846 2847 ifa_domsg(&msg.base.lmsg, 0); 2848 } 2849 2850 static void 2851 ifa_destroy_dispatch(netmsg_t nmsg) 2852 { 2853 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 2854 2855 IFAFREE(msg->ifa); 2856 ifa_forwardmsg(&nmsg->lmsg, mycpuid + 1); 2857 } 2858 2859 void 2860 ifa_destroy(struct ifaddr *ifa) 2861 { 2862 struct netmsg_ifaddr msg; 2863 2864 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 2865 0, ifa_destroy_dispatch); 2866 msg.ifa = ifa; 2867 2868 ifa_domsg(&msg.base.lmsg, 0); 2869 } 2870 2871 struct lwkt_port * 2872 ifnet_portfn(int cpu) 2873 { 2874 return &ifnet_threads[cpu].td_msgport; 2875 } 2876 2877 void 2878 ifnet_forwardmsg(struct lwkt_msg *lmsg, int next_cpu) 2879 { 2880 KKASSERT(next_cpu > mycpuid && next_cpu <= ncpus); 2881 2882 if (next_cpu < ncpus) 2883 lwkt_forwardmsg(ifnet_portfn(next_cpu), lmsg); 2884 else 2885 lwkt_replymsg(lmsg, 0); 2886 } 2887 2888 int 2889 ifnet_domsg(struct lwkt_msg *lmsg, int cpu) 2890 { 2891 KKASSERT(cpu < ncpus); 2892 return lwkt_domsg(ifnet_portfn(cpu), lmsg, 0); 2893 } 2894 2895 void 2896 ifnet_sendmsg(struct lwkt_msg *lmsg, int cpu) 2897 { 2898 KKASSERT(cpu < ncpus); 2899 lwkt_sendmsg(ifnet_portfn(cpu), lmsg); 2900 } 2901 2902 /* 2903 * Generic netmsg service loop. Some protocols may roll their own but all 2904 * must do the basic command dispatch function call done here. 2905 */ 2906 static void 2907 ifnet_service_loop(void *arg __unused) 2908 { 2909 netmsg_t msg; 2910 2911 while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) { 2912 KASSERT(msg->base.nm_dispatch, ("ifnet_service: badmsg")); 2913 msg->base.nm_dispatch(msg); 2914 } 2915 } 2916 2917 static void 2918 if_start_rollup(void) 2919 { 2920 struct ifsubq_stage_head *head = &ifsubq_stage_heads[mycpuid]; 2921 struct ifsubq_stage *stage; 2922 2923 while ((stage = TAILQ_FIRST(&head->stg_head)) != NULL) { 2924 struct ifaltq_subque *ifsq = stage->stg_subq; 2925 int is_sched = 0; 2926 2927 if (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED) 2928 is_sched = 1; 2929 ifsq_stage_remove(head, stage); 2930 2931 if (is_sched) { 2932 ifsq_ifstart_schedule(ifsq, 1); 2933 } else { 2934 int start = 0; 2935 2936 ALTQ_SQ_LOCK(ifsq); 2937 if (!ifsq_is_started(ifsq)) { 2938 /* 2939 * Hold the interlock of ifnet.if_start 2940 */ 2941 ifsq_set_started(ifsq); 2942 start = 1; 2943 } 2944 ALTQ_SQ_UNLOCK(ifsq); 2945 2946 if (start) 2947 ifsq_ifstart_try(ifsq, 1); 2948 } 2949 KKASSERT((stage->stg_flags & 2950 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); 2951 } 2952 } 2953 2954 static void 2955 ifnetinit(void *dummy __unused) 2956 { 2957 int i; 2958 2959 for (i = 0; i < ncpus; ++i) { 2960 struct thread *thr = &ifnet_threads[i]; 2961 2962 lwkt_create(ifnet_service_loop, NULL, NULL, 2963 thr, TDF_NOSTART|TDF_FORCE_SPINPORT, 2964 i, "ifnet %d", i); 2965 netmsg_service_port_init(&thr->td_msgport); 2966 lwkt_schedule(thr); 2967 } 2968 2969 for (i = 0; i < ncpus; ++i) 2970 TAILQ_INIT(&ifsubq_stage_heads[i].stg_head); 2971 netisr_register_rollup(if_start_rollup, NETISR_ROLLUP_PRIO_IFSTART); 2972 } 2973 2974 struct ifnet * 2975 ifnet_byindex(unsigned short idx) 2976 { 2977 if (idx > if_index) 2978 return NULL; 2979 return ifindex2ifnet[idx]; 2980 } 2981 2982 struct ifaddr * 2983 ifaddr_byindex(unsigned short idx) 2984 { 2985 struct ifnet *ifp; 2986 2987 ifp = ifnet_byindex(idx); 2988 if (!ifp) 2989 return NULL; 2990 return TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 2991 } 2992 2993 void 2994 if_register_com_alloc(u_char type, 2995 if_com_alloc_t *a, if_com_free_t *f) 2996 { 2997 2998 KASSERT(if_com_alloc[type] == NULL, 2999 ("if_register_com_alloc: %d already registered", type)); 3000 KASSERT(if_com_free[type] == NULL, 3001 ("if_register_com_alloc: %d free already registered", type)); 3002 3003 if_com_alloc[type] = a; 3004 if_com_free[type] = f; 3005 } 3006 3007 void 3008 if_deregister_com_alloc(u_char type) 3009 { 3010 3011 KASSERT(if_com_alloc[type] != NULL, 3012 ("if_deregister_com_alloc: %d not registered", type)); 3013 KASSERT(if_com_free[type] != NULL, 3014 ("if_deregister_com_alloc: %d free not registered", type)); 3015 if_com_alloc[type] = NULL; 3016 if_com_free[type] = NULL; 3017 } 3018 3019 int 3020 if_ring_count2(int cnt, int cnt_max) 3021 { 3022 int shift = 0; 3023 3024 KASSERT(cnt_max >= 1 && powerof2(cnt_max), 3025 ("invalid ring count max %d", cnt_max)); 3026 3027 if (cnt <= 0) 3028 cnt = cnt_max; 3029 if (cnt > ncpus2) 3030 cnt = ncpus2; 3031 if (cnt > cnt_max) 3032 cnt = cnt_max; 3033 3034 while ((1 << (shift + 1)) <= cnt) 3035 ++shift; 3036 cnt = 1 << shift; 3037 3038 KASSERT(cnt >= 1 && cnt <= ncpus2 && cnt <= cnt_max, 3039 ("calculate cnt %d, ncpus2 %d, cnt max %d", 3040 cnt, ncpus2, cnt_max)); 3041 return cnt; 3042 } 3043 3044 void 3045 ifq_set_maxlen(struct ifaltq *ifq, int len) 3046 { 3047 ifq->altq_maxlen = len + (ncpus * ifsq_stage_cntmax); 3048 } 3049 3050 int 3051 ifq_mapsubq_default(struct ifaltq *ifq __unused, int cpuid __unused) 3052 { 3053 return ALTQ_SUBQ_INDEX_DEFAULT; 3054 } 3055 3056 int 3057 ifq_mapsubq_mask(struct ifaltq *ifq, int cpuid) 3058 { 3059 return (cpuid & ifq->altq_subq_mask); 3060 } 3061 3062 static void 3063 ifsq_watchdog(void *arg) 3064 { 3065 struct ifsubq_watchdog *wd = arg; 3066 struct ifnet *ifp; 3067 3068 if (__predict_true(wd->wd_timer == 0 || --wd->wd_timer)) 3069 goto done; 3070 3071 ifp = ifsq_get_ifp(wd->wd_subq); 3072 if (ifnet_tryserialize_all(ifp)) { 3073 wd->wd_watchdog(wd->wd_subq); 3074 ifnet_deserialize_all(ifp); 3075 } else { 3076 /* try again next timeout */ 3077 wd->wd_timer = 1; 3078 } 3079 done: 3080 ifsq_watchdog_reset(wd); 3081 } 3082 3083 static void 3084 ifsq_watchdog_reset(struct ifsubq_watchdog *wd) 3085 { 3086 callout_reset_bycpu(&wd->wd_callout, hz, ifsq_watchdog, wd, 3087 ifsq_get_cpuid(wd->wd_subq)); 3088 } 3089 3090 void 3091 ifsq_watchdog_init(struct ifsubq_watchdog *wd, struct ifaltq_subque *ifsq, 3092 ifsq_watchdog_t watchdog) 3093 { 3094 callout_init_mp(&wd->wd_callout); 3095 wd->wd_timer = 0; 3096 wd->wd_subq = ifsq; 3097 wd->wd_watchdog = watchdog; 3098 } 3099 3100 void 3101 ifsq_watchdog_start(struct ifsubq_watchdog *wd) 3102 { 3103 wd->wd_timer = 0; 3104 ifsq_watchdog_reset(wd); 3105 } 3106 3107 void 3108 ifsq_watchdog_stop(struct ifsubq_watchdog *wd) 3109 { 3110 wd->wd_timer = 0; 3111 callout_stop(&wd->wd_callout); 3112 } 3113