1 /* 2 * Copyright (c) 1980, 1986, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)if.c 8.3 (Berkeley) 1/4/94 30 * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $ 31 */ 32 33 #include "opt_inet6.h" 34 #include "opt_inet.h" 35 #include "opt_ifpoll.h" 36 37 #include <sys/param.h> 38 #include <sys/malloc.h> 39 #include <sys/mbuf.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/priv.h> 43 #include <sys/protosw.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/socketops.h> 47 #include <sys/kernel.h> 48 #include <sys/ktr.h> 49 #include <sys/mutex.h> 50 #include <sys/sockio.h> 51 #include <sys/syslog.h> 52 #include <sys/sysctl.h> 53 #include <sys/domain.h> 54 #include <sys/thread.h> 55 #include <sys/serialize.h> 56 #include <sys/bus.h> 57 58 #include <sys/thread2.h> 59 #include <sys/msgport2.h> 60 #include <sys/mutex2.h> 61 62 #include <net/if.h> 63 #include <net/if_arp.h> 64 #include <net/if_dl.h> 65 #include <net/if_types.h> 66 #include <net/if_var.h> 67 #include <net/ifq_var.h> 68 #include <net/radix.h> 69 #include <net/route.h> 70 #include <net/if_clone.h> 71 #include <net/netisr2.h> 72 #include <net/netmsg2.h> 73 74 #include <machine/atomic.h> 75 #include <machine/stdarg.h> 76 #include <machine/smp.h> 77 78 #if defined(INET) || defined(INET6) 79 /*XXX*/ 80 #include <netinet/in.h> 81 #include <netinet/in_var.h> 82 #include <netinet/if_ether.h> 83 #ifdef INET6 84 #include <netinet6/in6_var.h> 85 #include <netinet6/in6_ifattach.h> 86 #endif 87 #endif 88 89 struct netmsg_ifaddr { 90 struct netmsg_base base; 91 struct ifaddr *ifa; 92 struct ifnet *ifp; 93 int tail; 94 }; 95 96 struct ifsubq_stage_head { 97 TAILQ_HEAD(, ifsubq_stage) stg_head; 98 } __cachealign; 99 100 struct if_ringmap { 101 int rm_cnt; 102 int rm_grid; 103 int rm_cpumap[]; 104 }; 105 106 /* 107 * System initialization 108 */ 109 static void if_attachdomain(void *); 110 static void if_attachdomain1(struct ifnet *); 111 static int ifconf(u_long, caddr_t, struct ucred *); 112 static void ifinit(void *); 113 static void ifnetinit(void *); 114 static void if_slowtimo(void *); 115 static void link_rtrequest(int, struct rtentry *); 116 static int if_rtdel(struct radix_node *, void *); 117 static void if_slowtimo_dispatch(netmsg_t); 118 119 /* Helper functions */ 120 static void ifsq_watchdog_reset(struct ifsubq_watchdog *); 121 static int if_delmulti_serialized(struct ifnet *, struct sockaddr *); 122 static struct ifnet_array *ifnet_array_alloc(int); 123 static void ifnet_array_free(struct ifnet_array *); 124 static struct ifnet_array *ifnet_array_add(struct ifnet *, 125 const struct ifnet_array *); 126 static struct ifnet_array *ifnet_array_del(struct ifnet *, 127 const struct ifnet_array *); 128 129 #ifdef INET6 130 /* 131 * XXX: declare here to avoid to include many inet6 related files.. 132 * should be more generalized? 133 */ 134 extern void nd6_setmtu(struct ifnet *); 135 #endif 136 137 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); 138 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); 139 SYSCTL_NODE(_net_link, OID_AUTO, ringmap, CTLFLAG_RW, 0, "link ringmap"); 140 141 static int ifsq_stage_cntmax = 4; 142 TUNABLE_INT("net.link.stage_cntmax", &ifsq_stage_cntmax); 143 SYSCTL_INT(_net_link, OID_AUTO, stage_cntmax, CTLFLAG_RW, 144 &ifsq_stage_cntmax, 0, "ifq staging packet count max"); 145 146 static int if_stats_compat = 0; 147 SYSCTL_INT(_net_link, OID_AUTO, stats_compat, CTLFLAG_RW, 148 &if_stats_compat, 0, "Compat the old ifnet stats"); 149 150 static int if_ringmap_dumprdr = 0; 151 SYSCTL_INT(_net_link_ringmap, OID_AUTO, dump_rdr, CTLFLAG_RW, 152 &if_ringmap_dumprdr, 0, "dump redirect table"); 153 154 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL); 155 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, ifnetinit, NULL); 156 157 static if_com_alloc_t *if_com_alloc[256]; 158 static if_com_free_t *if_com_free[256]; 159 160 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); 161 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); 162 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure"); 163 164 int ifqmaxlen = IFQ_MAXLEN; 165 struct ifnethead ifnet = TAILQ_HEAD_INITIALIZER(ifnet); 166 167 static struct ifnet_array ifnet_array0; 168 static struct ifnet_array *ifnet_array = &ifnet_array0; 169 170 static struct callout if_slowtimo_timer; 171 static struct netmsg_base if_slowtimo_netmsg; 172 173 int if_index = 0; 174 struct ifnet **ifindex2ifnet = NULL; 175 static struct mtx ifnet_mtx = MTX_INITIALIZER("ifnet"); 176 177 static struct ifsubq_stage_head ifsubq_stage_heads[MAXCPU]; 178 179 #ifdef notyet 180 #define IFQ_KTR_STRING "ifq=%p" 181 #define IFQ_KTR_ARGS struct ifaltq *ifq 182 #ifndef KTR_IFQ 183 #define KTR_IFQ KTR_ALL 184 #endif 185 KTR_INFO_MASTER(ifq); 186 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS); 187 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS); 188 #define logifq(name, arg) KTR_LOG(ifq_ ## name, arg) 189 190 #define IF_START_KTR_STRING "ifp=%p" 191 #define IF_START_KTR_ARGS struct ifnet *ifp 192 #ifndef KTR_IF_START 193 #define KTR_IF_START KTR_ALL 194 #endif 195 KTR_INFO_MASTER(if_start); 196 KTR_INFO(KTR_IF_START, if_start, run, 0, 197 IF_START_KTR_STRING, IF_START_KTR_ARGS); 198 KTR_INFO(KTR_IF_START, if_start, sched, 1, 199 IF_START_KTR_STRING, IF_START_KTR_ARGS); 200 KTR_INFO(KTR_IF_START, if_start, avoid, 2, 201 IF_START_KTR_STRING, IF_START_KTR_ARGS); 202 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3, 203 IF_START_KTR_STRING, IF_START_KTR_ARGS); 204 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4, 205 IF_START_KTR_STRING, IF_START_KTR_ARGS); 206 #define logifstart(name, arg) KTR_LOG(if_start_ ## name, arg) 207 #endif 208 209 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head); 210 211 /* 212 * Network interface utility routines. 213 * 214 * Routines with ifa_ifwith* names take sockaddr *'s as 215 * parameters. 216 */ 217 /* ARGSUSED*/ 218 static void 219 ifinit(void *dummy) 220 { 221 struct ifnet *ifp; 222 223 callout_init_mp(&if_slowtimo_timer); 224 netmsg_init(&if_slowtimo_netmsg, NULL, &netisr_adone_rport, 225 MSGF_PRIORITY, if_slowtimo_dispatch); 226 227 /* XXX is this necessary? */ 228 ifnet_lock(); 229 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 230 if (ifp->if_snd.altq_maxlen == 0) { 231 if_printf(ifp, "XXX: driver didn't set altq_maxlen\n"); 232 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen); 233 } 234 } 235 ifnet_unlock(); 236 237 /* Start if_slowtimo */ 238 lwkt_sendmsg(netisr_cpuport(0), &if_slowtimo_netmsg.lmsg); 239 } 240 241 static void 242 ifsq_ifstart_ipifunc(void *arg) 243 { 244 struct ifaltq_subque *ifsq = arg; 245 struct lwkt_msg *lmsg = ifsq_get_ifstart_lmsg(ifsq, mycpuid); 246 247 crit_enter(); 248 if (lmsg->ms_flags & MSGF_DONE) 249 lwkt_sendmsg_oncpu(netisr_cpuport(mycpuid), lmsg); 250 crit_exit(); 251 } 252 253 static __inline void 254 ifsq_stage_remove(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) 255 { 256 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); 257 TAILQ_REMOVE(&head->stg_head, stage, stg_link); 258 stage->stg_flags &= ~(IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED); 259 stage->stg_cnt = 0; 260 stage->stg_len = 0; 261 } 262 263 static __inline void 264 ifsq_stage_insert(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) 265 { 266 KKASSERT((stage->stg_flags & 267 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); 268 stage->stg_flags |= IFSQ_STAGE_FLAG_QUED; 269 TAILQ_INSERT_TAIL(&head->stg_head, stage, stg_link); 270 } 271 272 /* 273 * Schedule ifnet.if_start on the subqueue owner CPU 274 */ 275 static void 276 ifsq_ifstart_schedule(struct ifaltq_subque *ifsq, int force) 277 { 278 int cpu; 279 280 if (!force && curthread->td_type == TD_TYPE_NETISR && 281 ifsq_stage_cntmax > 0) { 282 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); 283 284 stage->stg_cnt = 0; 285 stage->stg_len = 0; 286 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) 287 ifsq_stage_insert(&ifsubq_stage_heads[mycpuid], stage); 288 stage->stg_flags |= IFSQ_STAGE_FLAG_SCHED; 289 return; 290 } 291 292 cpu = ifsq_get_cpuid(ifsq); 293 if (cpu != mycpuid) 294 lwkt_send_ipiq(globaldata_find(cpu), ifsq_ifstart_ipifunc, ifsq); 295 else 296 ifsq_ifstart_ipifunc(ifsq); 297 } 298 299 /* 300 * NOTE: 301 * This function will release ifnet.if_start subqueue interlock, 302 * if ifnet.if_start for the subqueue does not need to be scheduled 303 */ 304 static __inline int 305 ifsq_ifstart_need_schedule(struct ifaltq_subque *ifsq, int running) 306 { 307 if (!running || ifsq_is_empty(ifsq) 308 #ifdef ALTQ 309 || ifsq->ifsq_altq->altq_tbr != NULL 310 #endif 311 ) { 312 ALTQ_SQ_LOCK(ifsq); 313 /* 314 * ifnet.if_start subqueue interlock is released, if: 315 * 1) Hardware can not take any packets, due to 316 * o interface is marked down 317 * o hardware queue is full (ifsq_is_oactive) 318 * Under the second situation, hardware interrupt 319 * or polling(4) will call/schedule ifnet.if_start 320 * on the subqueue when hardware queue is ready 321 * 2) There is no packet in the subqueue. 322 * Further ifq_dispatch or ifq_handoff will call/ 323 * schedule ifnet.if_start on the subqueue. 324 * 3) TBR is used and it does not allow further 325 * dequeueing. 326 * TBR callout will call ifnet.if_start on the 327 * subqueue. 328 */ 329 if (!running || !ifsq_data_ready(ifsq)) { 330 ifsq_clr_started(ifsq); 331 ALTQ_SQ_UNLOCK(ifsq); 332 return 0; 333 } 334 ALTQ_SQ_UNLOCK(ifsq); 335 } 336 return 1; 337 } 338 339 static void 340 ifsq_ifstart_dispatch(netmsg_t msg) 341 { 342 struct lwkt_msg *lmsg = &msg->base.lmsg; 343 struct ifaltq_subque *ifsq = lmsg->u.ms_resultp; 344 struct ifnet *ifp = ifsq_get_ifp(ifsq); 345 struct globaldata *gd = mycpu; 346 int running = 0, need_sched; 347 348 crit_enter_gd(gd); 349 350 lwkt_replymsg(lmsg, 0); /* reply ASAP */ 351 352 if (gd->gd_cpuid != ifsq_get_cpuid(ifsq)) { 353 /* 354 * We need to chase the subqueue owner CPU change. 355 */ 356 ifsq_ifstart_schedule(ifsq, 1); 357 crit_exit_gd(gd); 358 return; 359 } 360 361 ifsq_serialize_hw(ifsq); 362 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { 363 ifp->if_start(ifp, ifsq); 364 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 365 running = 1; 366 } 367 need_sched = ifsq_ifstart_need_schedule(ifsq, running); 368 ifsq_deserialize_hw(ifsq); 369 370 if (need_sched) { 371 /* 372 * More data need to be transmitted, ifnet.if_start is 373 * scheduled on the subqueue owner CPU, and we keep going. 374 * NOTE: ifnet.if_start subqueue interlock is not released. 375 */ 376 ifsq_ifstart_schedule(ifsq, 0); 377 } 378 379 crit_exit_gd(gd); 380 } 381 382 /* Device driver ifnet.if_start helper function */ 383 void 384 ifsq_devstart(struct ifaltq_subque *ifsq) 385 { 386 struct ifnet *ifp = ifsq_get_ifp(ifsq); 387 int running = 0; 388 389 ASSERT_ALTQ_SQ_SERIALIZED_HW(ifsq); 390 391 ALTQ_SQ_LOCK(ifsq); 392 if (ifsq_is_started(ifsq) || !ifsq_data_ready(ifsq)) { 393 ALTQ_SQ_UNLOCK(ifsq); 394 return; 395 } 396 ifsq_set_started(ifsq); 397 ALTQ_SQ_UNLOCK(ifsq); 398 399 ifp->if_start(ifp, ifsq); 400 401 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 402 running = 1; 403 404 if (ifsq_ifstart_need_schedule(ifsq, running)) { 405 /* 406 * More data need to be transmitted, ifnet.if_start is 407 * scheduled on ifnet's CPU, and we keep going. 408 * NOTE: ifnet.if_start interlock is not released. 409 */ 410 ifsq_ifstart_schedule(ifsq, 0); 411 } 412 } 413 414 void 415 if_devstart(struct ifnet *ifp) 416 { 417 ifsq_devstart(ifq_get_subq_default(&ifp->if_snd)); 418 } 419 420 /* Device driver ifnet.if_start schedule helper function */ 421 void 422 ifsq_devstart_sched(struct ifaltq_subque *ifsq) 423 { 424 ifsq_ifstart_schedule(ifsq, 1); 425 } 426 427 void 428 if_devstart_sched(struct ifnet *ifp) 429 { 430 ifsq_devstart_sched(ifq_get_subq_default(&ifp->if_snd)); 431 } 432 433 static void 434 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 435 { 436 lwkt_serialize_enter(ifp->if_serializer); 437 } 438 439 static void 440 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 441 { 442 lwkt_serialize_exit(ifp->if_serializer); 443 } 444 445 static int 446 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 447 { 448 return lwkt_serialize_try(ifp->if_serializer); 449 } 450 451 #ifdef INVARIANTS 452 static void 453 if_default_serialize_assert(struct ifnet *ifp, 454 enum ifnet_serialize slz __unused, 455 boolean_t serialized) 456 { 457 if (serialized) 458 ASSERT_SERIALIZED(ifp->if_serializer); 459 else 460 ASSERT_NOT_SERIALIZED(ifp->if_serializer); 461 } 462 #endif 463 464 /* 465 * Attach an interface to the list of "active" interfaces. 466 * 467 * The serializer is optional. 468 */ 469 void 470 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer) 471 { 472 unsigned socksize; 473 int namelen, masklen; 474 struct sockaddr_dl *sdl, *sdl_addr; 475 struct ifaddr *ifa; 476 struct ifaltq *ifq; 477 struct ifnet **old_ifindex2ifnet = NULL; 478 struct ifnet_array *old_ifnet_array; 479 int i, q; 480 481 static int if_indexlim = 8; 482 483 if (ifp->if_serialize != NULL) { 484 KASSERT(ifp->if_deserialize != NULL && 485 ifp->if_tryserialize != NULL && 486 ifp->if_serialize_assert != NULL, 487 ("serialize functions are partially setup")); 488 489 /* 490 * If the device supplies serialize functions, 491 * then clear if_serializer to catch any invalid 492 * usage of this field. 493 */ 494 KASSERT(serializer == NULL, 495 ("both serialize functions and default serializer " 496 "are supplied")); 497 ifp->if_serializer = NULL; 498 } else { 499 KASSERT(ifp->if_deserialize == NULL && 500 ifp->if_tryserialize == NULL && 501 ifp->if_serialize_assert == NULL, 502 ("serialize functions are partially setup")); 503 ifp->if_serialize = if_default_serialize; 504 ifp->if_deserialize = if_default_deserialize; 505 ifp->if_tryserialize = if_default_tryserialize; 506 #ifdef INVARIANTS 507 ifp->if_serialize_assert = if_default_serialize_assert; 508 #endif 509 510 /* 511 * The serializer can be passed in from the device, 512 * allowing the same serializer to be used for both 513 * the interrupt interlock and the device queue. 514 * If not specified, the netif structure will use an 515 * embedded serializer. 516 */ 517 if (serializer == NULL) { 518 serializer = &ifp->if_default_serializer; 519 lwkt_serialize_init(serializer); 520 } 521 ifp->if_serializer = serializer; 522 } 523 524 /* 525 * XXX - 526 * The old code would work if the interface passed a pre-existing 527 * chain of ifaddrs to this code. We don't trust our callers to 528 * properly initialize the tailq, however, so we no longer allow 529 * this unlikely case. 530 */ 531 ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead), 532 M_IFADDR, M_WAITOK | M_ZERO); 533 for (i = 0; i < ncpus; ++i) 534 TAILQ_INIT(&ifp->if_addrheads[i]); 535 536 TAILQ_INIT(&ifp->if_multiaddrs); 537 TAILQ_INIT(&ifp->if_groups); 538 getmicrotime(&ifp->if_lastchange); 539 540 /* 541 * create a Link Level name for this device 542 */ 543 namelen = strlen(ifp->if_xname); 544 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; 545 socksize = masklen + ifp->if_addrlen; 546 if (socksize < sizeof(*sdl)) 547 socksize = sizeof(*sdl); 548 socksize = RT_ROUNDUP(socksize); 549 ifa = ifa_create(sizeof(struct ifaddr) + 2 * socksize); 550 sdl = sdl_addr = (struct sockaddr_dl *)(ifa + 1); 551 sdl->sdl_len = socksize; 552 sdl->sdl_family = AF_LINK; 553 bcopy(ifp->if_xname, sdl->sdl_data, namelen); 554 sdl->sdl_nlen = namelen; 555 sdl->sdl_type = ifp->if_type; 556 ifp->if_lladdr = ifa; 557 ifa->ifa_ifp = ifp; 558 ifa->ifa_rtrequest = link_rtrequest; 559 ifa->ifa_addr = (struct sockaddr *)sdl; 560 sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); 561 ifa->ifa_netmask = (struct sockaddr *)sdl; 562 sdl->sdl_len = masklen; 563 while (namelen != 0) 564 sdl->sdl_data[--namelen] = 0xff; 565 ifa_iflink(ifa, ifp, 0 /* Insert head */); 566 567 ifp->if_data_pcpu = kmalloc_cachealign( 568 ncpus * sizeof(struct ifdata_pcpu), M_DEVBUF, M_WAITOK | M_ZERO); 569 570 if (ifp->if_mapsubq == NULL) 571 ifp->if_mapsubq = ifq_mapsubq_default; 572 573 ifq = &ifp->if_snd; 574 ifq->altq_type = 0; 575 ifq->altq_disc = NULL; 576 ifq->altq_flags &= ALTQF_CANTCHANGE; 577 ifq->altq_tbr = NULL; 578 ifq->altq_ifp = ifp; 579 580 if (ifq->altq_subq_cnt <= 0) 581 ifq->altq_subq_cnt = 1; 582 ifq->altq_subq = kmalloc_cachealign( 583 ifq->altq_subq_cnt * sizeof(struct ifaltq_subque), 584 M_DEVBUF, M_WAITOK | M_ZERO); 585 586 if (ifq->altq_maxlen == 0) { 587 if_printf(ifp, "driver didn't set altq_maxlen\n"); 588 ifq_set_maxlen(ifq, ifqmaxlen); 589 } 590 591 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 592 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 593 594 ALTQ_SQ_LOCK_INIT(ifsq); 595 ifsq->ifsq_index = q; 596 597 ifsq->ifsq_altq = ifq; 598 ifsq->ifsq_ifp = ifp; 599 600 ifsq->ifsq_maxlen = ifq->altq_maxlen; 601 ifsq->ifsq_maxbcnt = ifsq->ifsq_maxlen * MCLBYTES; 602 ifsq->ifsq_prepended = NULL; 603 ifsq->ifsq_started = 0; 604 ifsq->ifsq_hw_oactive = 0; 605 ifsq_set_cpuid(ifsq, 0); 606 if (ifp->if_serializer != NULL) 607 ifsq_set_hw_serialize(ifsq, ifp->if_serializer); 608 609 ifsq->ifsq_stage = 610 kmalloc_cachealign(ncpus * sizeof(struct ifsubq_stage), 611 M_DEVBUF, M_WAITOK | M_ZERO); 612 for (i = 0; i < ncpus; ++i) 613 ifsq->ifsq_stage[i].stg_subq = ifsq; 614 615 ifsq->ifsq_ifstart_nmsg = 616 kmalloc(ncpus * sizeof(struct netmsg_base), 617 M_LWKTMSG, M_WAITOK); 618 for (i = 0; i < ncpus; ++i) { 619 netmsg_init(&ifsq->ifsq_ifstart_nmsg[i], NULL, 620 &netisr_adone_rport, 0, ifsq_ifstart_dispatch); 621 ifsq->ifsq_ifstart_nmsg[i].lmsg.u.ms_resultp = ifsq; 622 } 623 } 624 ifq_set_classic(ifq); 625 626 /* 627 * Increase mbuf cluster/jcluster limits for the mbufs that 628 * could sit on the device queues for quite some time. 629 */ 630 if (ifp->if_nmbclusters > 0) 631 mcl_inclimit(ifp->if_nmbclusters); 632 if (ifp->if_nmbjclusters > 0) 633 mjcl_inclimit(ifp->if_nmbjclusters); 634 635 /* 636 * Install this ifp into ifindex2inet, ifnet queue and ifnet 637 * array after it is setup. 638 * 639 * Protect ifindex2ifnet, ifnet queue and ifnet array changes 640 * by ifnet lock, so that non-netisr threads could get a 641 * consistent view. 642 */ 643 ifnet_lock(); 644 645 /* Don't update if_index until ifindex2ifnet is setup */ 646 ifp->if_index = if_index + 1; 647 sdl_addr->sdl_index = ifp->if_index; 648 649 /* 650 * Install this ifp into ifindex2ifnet 651 */ 652 if (ifindex2ifnet == NULL || ifp->if_index >= if_indexlim) { 653 unsigned int n; 654 struct ifnet **q; 655 656 /* 657 * Grow ifindex2ifnet 658 */ 659 if_indexlim <<= 1; 660 n = if_indexlim * sizeof(*q); 661 q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO); 662 if (ifindex2ifnet != NULL) { 663 bcopy(ifindex2ifnet, q, n/2); 664 /* Free old ifindex2ifnet after sync all netisrs */ 665 old_ifindex2ifnet = ifindex2ifnet; 666 } 667 ifindex2ifnet = q; 668 } 669 ifindex2ifnet[ifp->if_index] = ifp; 670 /* 671 * Update if_index after this ifp is installed into ifindex2ifnet, 672 * so that netisrs could get a consistent view of ifindex2ifnet. 673 */ 674 cpu_sfence(); 675 if_index = ifp->if_index; 676 677 /* 678 * Install this ifp into ifnet array. 679 */ 680 /* Free old ifnet array after sync all netisrs */ 681 old_ifnet_array = ifnet_array; 682 ifnet_array = ifnet_array_add(ifp, old_ifnet_array); 683 684 /* 685 * Install this ifp into ifnet queue. 686 */ 687 TAILQ_INSERT_TAIL(&ifnetlist, ifp, if_link); 688 689 ifnet_unlock(); 690 691 /* 692 * Sync all netisrs so that the old ifindex2ifnet and ifnet array 693 * are no longer accessed and we can free them safely later on. 694 */ 695 netmsg_service_sync(); 696 if (old_ifindex2ifnet != NULL) 697 kfree(old_ifindex2ifnet, M_IFADDR); 698 ifnet_array_free(old_ifnet_array); 699 700 if (!SLIST_EMPTY(&domains)) 701 if_attachdomain1(ifp); 702 703 /* Announce the interface. */ 704 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 705 devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); 706 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 707 } 708 709 static void 710 if_attachdomain(void *dummy) 711 { 712 struct ifnet *ifp; 713 714 ifnet_lock(); 715 TAILQ_FOREACH(ifp, &ifnetlist, if_list) 716 if_attachdomain1(ifp); 717 ifnet_unlock(); 718 } 719 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, 720 if_attachdomain, NULL); 721 722 static void 723 if_attachdomain1(struct ifnet *ifp) 724 { 725 struct domain *dp; 726 727 crit_enter(); 728 729 /* address family dependent data region */ 730 bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); 731 SLIST_FOREACH(dp, &domains, dom_next) 732 if (dp->dom_ifattach) 733 ifp->if_afdata[dp->dom_family] = 734 (*dp->dom_ifattach)(ifp); 735 crit_exit(); 736 } 737 738 /* 739 * Purge all addresses whose type is _not_ AF_LINK 740 */ 741 static void 742 if_purgeaddrs_nolink_dispatch(netmsg_t nmsg) 743 { 744 struct lwkt_msg *lmsg = &nmsg->lmsg; 745 struct ifnet *ifp = lmsg->u.ms_resultp; 746 struct ifaddr_container *ifac, *next; 747 748 ASSERT_IN_NETISR(0); 749 750 /* 751 * The ifaddr processing in the following loop will block, 752 * however, this function is called in netisr0, in which 753 * ifaddr list changes happen, so we don't care about the 754 * blockness of the ifaddr processing here. 755 */ 756 TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid], 757 ifa_link, next) { 758 struct ifaddr *ifa = ifac->ifa; 759 760 /* Ignore marker */ 761 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 762 continue; 763 764 /* Leave link ifaddr as it is */ 765 if (ifa->ifa_addr->sa_family == AF_LINK) 766 continue; 767 #ifdef INET 768 /* XXX: Ugly!! ad hoc just for INET */ 769 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) { 770 struct ifaliasreq ifr; 771 #ifdef IFADDR_DEBUG_VERBOSE 772 int i; 773 774 kprintf("purge in4 addr %p: ", ifa); 775 for (i = 0; i < ncpus; ++i) 776 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt); 777 kprintf("\n"); 778 #endif 779 780 bzero(&ifr, sizeof ifr); 781 ifr.ifra_addr = *ifa->ifa_addr; 782 if (ifa->ifa_dstaddr) 783 ifr.ifra_broadaddr = *ifa->ifa_dstaddr; 784 if (in_control(SIOCDIFADDR, (caddr_t)&ifr, ifp, 785 NULL) == 0) 786 continue; 787 } 788 #endif /* INET */ 789 #ifdef INET6 790 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) { 791 #ifdef IFADDR_DEBUG_VERBOSE 792 int i; 793 794 kprintf("purge in6 addr %p: ", ifa); 795 for (i = 0; i < ncpus; ++i) 796 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt); 797 kprintf("\n"); 798 #endif 799 800 in6_purgeaddr(ifa); 801 /* ifp_addrhead is already updated */ 802 continue; 803 } 804 #endif /* INET6 */ 805 ifa_ifunlink(ifa, ifp); 806 ifa_destroy(ifa); 807 } 808 809 lwkt_replymsg(lmsg, 0); 810 } 811 812 void 813 if_purgeaddrs_nolink(struct ifnet *ifp) 814 { 815 struct netmsg_base nmsg; 816 struct lwkt_msg *lmsg = &nmsg.lmsg; 817 818 ASSERT_CANDOMSG_NETISR0(curthread); 819 820 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, 821 if_purgeaddrs_nolink_dispatch); 822 lmsg->u.ms_resultp = ifp; 823 lwkt_domsg(netisr_cpuport(0), lmsg, 0); 824 } 825 826 static void 827 ifq_stage_detach_handler(netmsg_t nmsg) 828 { 829 struct ifaltq *ifq = nmsg->lmsg.u.ms_resultp; 830 int q; 831 832 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 833 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 834 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); 835 836 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) 837 ifsq_stage_remove(&ifsubq_stage_heads[mycpuid], stage); 838 } 839 lwkt_replymsg(&nmsg->lmsg, 0); 840 } 841 842 static void 843 ifq_stage_detach(struct ifaltq *ifq) 844 { 845 struct netmsg_base base; 846 int cpu; 847 848 netmsg_init(&base, NULL, &curthread->td_msgport, 0, 849 ifq_stage_detach_handler); 850 base.lmsg.u.ms_resultp = ifq; 851 852 for (cpu = 0; cpu < ncpus; ++cpu) 853 lwkt_domsg(netisr_cpuport(cpu), &base.lmsg, 0); 854 } 855 856 struct netmsg_if_rtdel { 857 struct netmsg_base base; 858 struct ifnet *ifp; 859 }; 860 861 static void 862 if_rtdel_dispatch(netmsg_t msg) 863 { 864 struct netmsg_if_rtdel *rmsg = (void *)msg; 865 int i, nextcpu, cpu; 866 867 cpu = mycpuid; 868 for (i = 1; i <= AF_MAX; i++) { 869 struct radix_node_head *rnh; 870 871 if ((rnh = rt_tables[cpu][i]) == NULL) 872 continue; 873 rnh->rnh_walktree(rnh, if_rtdel, rmsg->ifp); 874 } 875 876 nextcpu = cpu + 1; 877 if (nextcpu < ncpus) 878 lwkt_forwardmsg(netisr_cpuport(nextcpu), &rmsg->base.lmsg); 879 else 880 lwkt_replymsg(&rmsg->base.lmsg, 0); 881 } 882 883 /* 884 * Detach an interface, removing it from the 885 * list of "active" interfaces. 886 */ 887 void 888 if_detach(struct ifnet *ifp) 889 { 890 struct ifnet_array *old_ifnet_array; 891 struct netmsg_if_rtdel msg; 892 struct domain *dp; 893 int q; 894 895 /* Announce that the interface is gone. */ 896 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 897 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 898 devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); 899 900 /* 901 * Remove this ifp from ifindex2inet, ifnet queue and ifnet 902 * array before it is whacked. 903 * 904 * Protect ifindex2ifnet, ifnet queue and ifnet array changes 905 * by ifnet lock, so that non-netisr threads could get a 906 * consistent view. 907 */ 908 ifnet_lock(); 909 910 /* 911 * Remove this ifp from ifindex2ifnet and maybe decrement if_index. 912 */ 913 ifindex2ifnet[ifp->if_index] = NULL; 914 while (if_index > 0 && ifindex2ifnet[if_index] == NULL) 915 if_index--; 916 917 /* 918 * Remove this ifp from ifnet queue. 919 */ 920 TAILQ_REMOVE(&ifnetlist, ifp, if_link); 921 922 /* 923 * Remove this ifp from ifnet array. 924 */ 925 /* Free old ifnet array after sync all netisrs */ 926 old_ifnet_array = ifnet_array; 927 ifnet_array = ifnet_array_del(ifp, old_ifnet_array); 928 929 ifnet_unlock(); 930 931 /* 932 * Sync all netisrs so that the old ifnet array is no longer 933 * accessed and we can free it safely later on. 934 */ 935 netmsg_service_sync(); 936 ifnet_array_free(old_ifnet_array); 937 938 /* 939 * Remove routes and flush queues. 940 */ 941 crit_enter(); 942 #ifdef IFPOLL_ENABLE 943 if (ifp->if_flags & IFF_NPOLLING) 944 ifpoll_deregister(ifp); 945 #endif 946 if_down(ifp); 947 948 /* Decrease the mbuf clusters/jclusters limits increased by us */ 949 if (ifp->if_nmbclusters > 0) 950 mcl_inclimit(-ifp->if_nmbclusters); 951 if (ifp->if_nmbjclusters > 0) 952 mjcl_inclimit(-ifp->if_nmbjclusters); 953 954 #ifdef ALTQ 955 if (ifq_is_enabled(&ifp->if_snd)) 956 altq_disable(&ifp->if_snd); 957 if (ifq_is_attached(&ifp->if_snd)) 958 altq_detach(&ifp->if_snd); 959 #endif 960 961 /* 962 * Clean up all addresses. 963 */ 964 ifp->if_lladdr = NULL; 965 966 if_purgeaddrs_nolink(ifp); 967 if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) { 968 struct ifaddr *ifa; 969 970 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 971 KASSERT(ifa->ifa_addr->sa_family == AF_LINK, 972 ("non-link ifaddr is left on if_addrheads")); 973 974 ifa_ifunlink(ifa, ifp); 975 ifa_destroy(ifa); 976 KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]), 977 ("there are still ifaddrs left on if_addrheads")); 978 } 979 980 #ifdef INET 981 /* 982 * Remove all IPv4 kernel structures related to ifp. 983 */ 984 in_ifdetach(ifp); 985 #endif 986 987 #ifdef INET6 988 /* 989 * Remove all IPv6 kernel structs related to ifp. This should be done 990 * before removing routing entries below, since IPv6 interface direct 991 * routes are expected to be removed by the IPv6-specific kernel API. 992 * Otherwise, the kernel will detect some inconsistency and bark it. 993 */ 994 in6_ifdetach(ifp); 995 #endif 996 997 /* 998 * Delete all remaining routes using this interface 999 */ 1000 netmsg_init(&msg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 1001 if_rtdel_dispatch); 1002 msg.ifp = ifp; 1003 rt_domsg_global(&msg.base); 1004 1005 SLIST_FOREACH(dp, &domains, dom_next) 1006 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) 1007 (*dp->dom_ifdetach)(ifp, 1008 ifp->if_afdata[dp->dom_family]); 1009 1010 kfree(ifp->if_addrheads, M_IFADDR); 1011 1012 lwkt_synchronize_ipiqs("if_detach"); 1013 ifq_stage_detach(&ifp->if_snd); 1014 1015 for (q = 0; q < ifp->if_snd.altq_subq_cnt; ++q) { 1016 struct ifaltq_subque *ifsq = &ifp->if_snd.altq_subq[q]; 1017 1018 kfree(ifsq->ifsq_ifstart_nmsg, M_LWKTMSG); 1019 kfree(ifsq->ifsq_stage, M_DEVBUF); 1020 } 1021 kfree(ifp->if_snd.altq_subq, M_DEVBUF); 1022 1023 kfree(ifp->if_data_pcpu, M_DEVBUF); 1024 1025 crit_exit(); 1026 } 1027 1028 /* 1029 * Create interface group without members 1030 */ 1031 struct ifg_group * 1032 if_creategroup(const char *groupname) 1033 { 1034 struct ifg_group *ifg = NULL; 1035 1036 if ((ifg = (struct ifg_group *)kmalloc(sizeof(struct ifg_group), 1037 M_TEMP, M_NOWAIT)) == NULL) 1038 return (NULL); 1039 1040 strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); 1041 ifg->ifg_refcnt = 0; 1042 ifg->ifg_carp_demoted = 0; 1043 TAILQ_INIT(&ifg->ifg_members); 1044 #if NPF > 0 1045 pfi_attach_ifgroup(ifg); 1046 #endif 1047 TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next); 1048 1049 return (ifg); 1050 } 1051 1052 /* 1053 * Add a group to an interface 1054 */ 1055 int 1056 if_addgroup(struct ifnet *ifp, const char *groupname) 1057 { 1058 struct ifg_list *ifgl; 1059 struct ifg_group *ifg = NULL; 1060 struct ifg_member *ifgm; 1061 1062 if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' && 1063 groupname[strlen(groupname) - 1] <= '9') 1064 return (EINVAL); 1065 1066 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 1067 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) 1068 return (EEXIST); 1069 1070 if ((ifgl = kmalloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL) 1071 return (ENOMEM); 1072 1073 if ((ifgm = kmalloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) { 1074 kfree(ifgl, M_TEMP); 1075 return (ENOMEM); 1076 } 1077 1078 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) 1079 if (!strcmp(ifg->ifg_group, groupname)) 1080 break; 1081 1082 if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) { 1083 kfree(ifgl, M_TEMP); 1084 kfree(ifgm, M_TEMP); 1085 return (ENOMEM); 1086 } 1087 1088 ifg->ifg_refcnt++; 1089 ifgl->ifgl_group = ifg; 1090 ifgm->ifgm_ifp = ifp; 1091 1092 TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); 1093 TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); 1094 1095 #if NPF > 0 1096 pfi_group_change(groupname); 1097 #endif 1098 1099 return (0); 1100 } 1101 1102 /* 1103 * Remove a group from an interface 1104 */ 1105 int 1106 if_delgroup(struct ifnet *ifp, const char *groupname) 1107 { 1108 struct ifg_list *ifgl; 1109 struct ifg_member *ifgm; 1110 1111 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 1112 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) 1113 break; 1114 if (ifgl == NULL) 1115 return (ENOENT); 1116 1117 TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); 1118 1119 TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) 1120 if (ifgm->ifgm_ifp == ifp) 1121 break; 1122 1123 if (ifgm != NULL) { 1124 TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); 1125 kfree(ifgm, M_TEMP); 1126 } 1127 1128 if (--ifgl->ifgl_group->ifg_refcnt == 0) { 1129 TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next); 1130 #if NPF > 0 1131 pfi_detach_ifgroup(ifgl->ifgl_group); 1132 #endif 1133 kfree(ifgl->ifgl_group, M_TEMP); 1134 } 1135 1136 kfree(ifgl, M_TEMP); 1137 1138 #if NPF > 0 1139 pfi_group_change(groupname); 1140 #endif 1141 1142 return (0); 1143 } 1144 1145 /* 1146 * Stores all groups from an interface in memory pointed 1147 * to by data 1148 */ 1149 int 1150 if_getgroup(caddr_t data, struct ifnet *ifp) 1151 { 1152 int len, error; 1153 struct ifg_list *ifgl; 1154 struct ifg_req ifgrq, *ifgp; 1155 struct ifgroupreq *ifgr = (struct ifgroupreq *)data; 1156 1157 if (ifgr->ifgr_len == 0) { 1158 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 1159 ifgr->ifgr_len += sizeof(struct ifg_req); 1160 return (0); 1161 } 1162 1163 len = ifgr->ifgr_len; 1164 ifgp = ifgr->ifgr_groups; 1165 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1166 if (len < sizeof(ifgrq)) 1167 return (EINVAL); 1168 bzero(&ifgrq, sizeof ifgrq); 1169 strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group, 1170 sizeof(ifgrq.ifgrq_group)); 1171 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp, 1172 sizeof(struct ifg_req)))) 1173 return (error); 1174 len -= sizeof(ifgrq); 1175 ifgp++; 1176 } 1177 1178 return (0); 1179 } 1180 1181 /* 1182 * Stores all members of a group in memory pointed to by data 1183 */ 1184 int 1185 if_getgroupmembers(caddr_t data) 1186 { 1187 struct ifgroupreq *ifgr = (struct ifgroupreq *)data; 1188 struct ifg_group *ifg; 1189 struct ifg_member *ifgm; 1190 struct ifg_req ifgrq, *ifgp; 1191 int len, error; 1192 1193 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) 1194 if (!strcmp(ifg->ifg_group, ifgr->ifgr_name)) 1195 break; 1196 if (ifg == NULL) 1197 return (ENOENT); 1198 1199 if (ifgr->ifgr_len == 0) { 1200 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) 1201 ifgr->ifgr_len += sizeof(ifgrq); 1202 return (0); 1203 } 1204 1205 len = ifgr->ifgr_len; 1206 ifgp = ifgr->ifgr_groups; 1207 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { 1208 if (len < sizeof(ifgrq)) 1209 return (EINVAL); 1210 bzero(&ifgrq, sizeof ifgrq); 1211 strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname, 1212 sizeof(ifgrq.ifgrq_member)); 1213 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp, 1214 sizeof(struct ifg_req)))) 1215 return (error); 1216 len -= sizeof(ifgrq); 1217 ifgp++; 1218 } 1219 1220 return (0); 1221 } 1222 1223 /* 1224 * Delete Routes for a Network Interface 1225 * 1226 * Called for each routing entry via the rnh->rnh_walktree() call above 1227 * to delete all route entries referencing a detaching network interface. 1228 * 1229 * Arguments: 1230 * rn pointer to node in the routing table 1231 * arg argument passed to rnh->rnh_walktree() - detaching interface 1232 * 1233 * Returns: 1234 * 0 successful 1235 * errno failed - reason indicated 1236 * 1237 */ 1238 static int 1239 if_rtdel(struct radix_node *rn, void *arg) 1240 { 1241 struct rtentry *rt = (struct rtentry *)rn; 1242 struct ifnet *ifp = arg; 1243 int err; 1244 1245 if (rt->rt_ifp == ifp) { 1246 1247 /* 1248 * Protect (sorta) against walktree recursion problems 1249 * with cloned routes 1250 */ 1251 if (!(rt->rt_flags & RTF_UP)) 1252 return (0); 1253 1254 err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, 1255 rt_mask(rt), rt->rt_flags, 1256 NULL); 1257 if (err) { 1258 log(LOG_WARNING, "if_rtdel: error %d\n", err); 1259 } 1260 } 1261 1262 return (0); 1263 } 1264 1265 static __inline boolean_t 1266 ifa_prefer(const struct ifaddr *cur_ifa, const struct ifaddr *old_ifa) 1267 { 1268 if (old_ifa == NULL) 1269 return TRUE; 1270 1271 if ((old_ifa->ifa_ifp->if_flags & IFF_UP) == 0 && 1272 (cur_ifa->ifa_ifp->if_flags & IFF_UP)) 1273 return TRUE; 1274 if ((old_ifa->ifa_flags & IFA_ROUTE) == 0 && 1275 (cur_ifa->ifa_flags & IFA_ROUTE)) 1276 return TRUE; 1277 return FALSE; 1278 } 1279 1280 /* 1281 * Locate an interface based on a complete address. 1282 */ 1283 struct ifaddr * 1284 ifa_ifwithaddr(struct sockaddr *addr) 1285 { 1286 const struct ifnet_array *arr; 1287 int i; 1288 1289 arr = ifnet_array_get(); 1290 for (i = 0; i < arr->ifnet_count; ++i) { 1291 struct ifnet *ifp = arr->ifnet_arr[i]; 1292 struct ifaddr_container *ifac; 1293 1294 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1295 struct ifaddr *ifa = ifac->ifa; 1296 1297 if (ifa->ifa_addr->sa_family != addr->sa_family) 1298 continue; 1299 if (sa_equal(addr, ifa->ifa_addr)) 1300 return (ifa); 1301 if ((ifp->if_flags & IFF_BROADCAST) && 1302 ifa->ifa_broadaddr && 1303 /* IPv6 doesn't have broadcast */ 1304 ifa->ifa_broadaddr->sa_len != 0 && 1305 sa_equal(ifa->ifa_broadaddr, addr)) 1306 return (ifa); 1307 } 1308 } 1309 return (NULL); 1310 } 1311 1312 /* 1313 * Locate the point to point interface with a given destination address. 1314 */ 1315 struct ifaddr * 1316 ifa_ifwithdstaddr(struct sockaddr *addr) 1317 { 1318 const struct ifnet_array *arr; 1319 int i; 1320 1321 arr = ifnet_array_get(); 1322 for (i = 0; i < arr->ifnet_count; ++i) { 1323 struct ifnet *ifp = arr->ifnet_arr[i]; 1324 struct ifaddr_container *ifac; 1325 1326 if (!(ifp->if_flags & IFF_POINTOPOINT)) 1327 continue; 1328 1329 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1330 struct ifaddr *ifa = ifac->ifa; 1331 1332 if (ifa->ifa_addr->sa_family != addr->sa_family) 1333 continue; 1334 if (ifa->ifa_dstaddr && 1335 sa_equal(addr, ifa->ifa_dstaddr)) 1336 return (ifa); 1337 } 1338 } 1339 return (NULL); 1340 } 1341 1342 /* 1343 * Find an interface on a specific network. If many, choice 1344 * is most specific found. 1345 */ 1346 struct ifaddr * 1347 ifa_ifwithnet(struct sockaddr *addr) 1348 { 1349 struct ifaddr *ifa_maybe = NULL; 1350 u_int af = addr->sa_family; 1351 char *addr_data = addr->sa_data, *cplim; 1352 const struct ifnet_array *arr; 1353 int i; 1354 1355 /* 1356 * AF_LINK addresses can be looked up directly by their index number, 1357 * so do that if we can. 1358 */ 1359 if (af == AF_LINK) { 1360 struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr; 1361 1362 if (sdl->sdl_index && sdl->sdl_index <= if_index) 1363 return (ifindex2ifnet[sdl->sdl_index]->if_lladdr); 1364 } 1365 1366 /* 1367 * Scan though each interface, looking for ones that have 1368 * addresses in this address family. 1369 */ 1370 arr = ifnet_array_get(); 1371 for (i = 0; i < arr->ifnet_count; ++i) { 1372 struct ifnet *ifp = arr->ifnet_arr[i]; 1373 struct ifaddr_container *ifac; 1374 1375 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1376 struct ifaddr *ifa = ifac->ifa; 1377 char *cp, *cp2, *cp3; 1378 1379 if (ifa->ifa_addr->sa_family != af) 1380 next: continue; 1381 if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) { 1382 /* 1383 * This is a bit broken as it doesn't 1384 * take into account that the remote end may 1385 * be a single node in the network we are 1386 * looking for. 1387 * The trouble is that we don't know the 1388 * netmask for the remote end. 1389 */ 1390 if (ifa->ifa_dstaddr != NULL && 1391 sa_equal(addr, ifa->ifa_dstaddr)) 1392 return (ifa); 1393 } else { 1394 /* 1395 * if we have a special address handler, 1396 * then use it instead of the generic one. 1397 */ 1398 if (ifa->ifa_claim_addr) { 1399 if ((*ifa->ifa_claim_addr)(ifa, addr)) { 1400 return (ifa); 1401 } else { 1402 continue; 1403 } 1404 } 1405 1406 /* 1407 * Scan all the bits in the ifa's address. 1408 * If a bit dissagrees with what we are 1409 * looking for, mask it with the netmask 1410 * to see if it really matters. 1411 * (A byte at a time) 1412 */ 1413 if (ifa->ifa_netmask == 0) 1414 continue; 1415 cp = addr_data; 1416 cp2 = ifa->ifa_addr->sa_data; 1417 cp3 = ifa->ifa_netmask->sa_data; 1418 cplim = ifa->ifa_netmask->sa_len + 1419 (char *)ifa->ifa_netmask; 1420 while (cp3 < cplim) 1421 if ((*cp++ ^ *cp2++) & *cp3++) 1422 goto next; /* next address! */ 1423 /* 1424 * If the netmask of what we just found 1425 * is more specific than what we had before 1426 * (if we had one) then remember the new one 1427 * before continuing to search for an even 1428 * better one. If the netmasks are equal, 1429 * we prefer the this ifa based on the result 1430 * of ifa_prefer(). 1431 */ 1432 if (ifa_maybe == NULL || 1433 rn_refines((char *)ifa->ifa_netmask, 1434 (char *)ifa_maybe->ifa_netmask) || 1435 (sa_equal(ifa_maybe->ifa_netmask, 1436 ifa->ifa_netmask) && 1437 ifa_prefer(ifa, ifa_maybe))) 1438 ifa_maybe = ifa; 1439 } 1440 } 1441 } 1442 return (ifa_maybe); 1443 } 1444 1445 /* 1446 * Find an interface address specific to an interface best matching 1447 * a given address. 1448 */ 1449 struct ifaddr * 1450 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp) 1451 { 1452 struct ifaddr_container *ifac; 1453 char *cp, *cp2, *cp3; 1454 char *cplim; 1455 struct ifaddr *ifa_maybe = NULL; 1456 u_int af = addr->sa_family; 1457 1458 if (af >= AF_MAX) 1459 return (0); 1460 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1461 struct ifaddr *ifa = ifac->ifa; 1462 1463 if (ifa->ifa_addr->sa_family != af) 1464 continue; 1465 if (ifa_maybe == NULL) 1466 ifa_maybe = ifa; 1467 if (ifa->ifa_netmask == NULL) { 1468 if (sa_equal(addr, ifa->ifa_addr) || 1469 (ifa->ifa_dstaddr != NULL && 1470 sa_equal(addr, ifa->ifa_dstaddr))) 1471 return (ifa); 1472 continue; 1473 } 1474 if (ifp->if_flags & IFF_POINTOPOINT) { 1475 if (sa_equal(addr, ifa->ifa_dstaddr)) 1476 return (ifa); 1477 } else { 1478 cp = addr->sa_data; 1479 cp2 = ifa->ifa_addr->sa_data; 1480 cp3 = ifa->ifa_netmask->sa_data; 1481 cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; 1482 for (; cp3 < cplim; cp3++) 1483 if ((*cp++ ^ *cp2++) & *cp3) 1484 break; 1485 if (cp3 == cplim) 1486 return (ifa); 1487 } 1488 } 1489 return (ifa_maybe); 1490 } 1491 1492 /* 1493 * Default action when installing a route with a Link Level gateway. 1494 * Lookup an appropriate real ifa to point to. 1495 * This should be moved to /sys/net/link.c eventually. 1496 */ 1497 static void 1498 link_rtrequest(int cmd, struct rtentry *rt) 1499 { 1500 struct ifaddr *ifa; 1501 struct sockaddr *dst; 1502 struct ifnet *ifp; 1503 1504 if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL || 1505 (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL) 1506 return; 1507 ifa = ifaof_ifpforaddr(dst, ifp); 1508 if (ifa != NULL) { 1509 IFAFREE(rt->rt_ifa); 1510 IFAREF(ifa); 1511 rt->rt_ifa = ifa; 1512 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) 1513 ifa->ifa_rtrequest(cmd, rt); 1514 } 1515 } 1516 1517 struct netmsg_ifroute { 1518 struct netmsg_base base; 1519 struct ifnet *ifp; 1520 int flag; 1521 int fam; 1522 }; 1523 1524 /* 1525 * Mark an interface down and notify protocols of the transition. 1526 */ 1527 static void 1528 if_unroute_dispatch(netmsg_t nmsg) 1529 { 1530 struct netmsg_ifroute *msg = (struct netmsg_ifroute *)nmsg; 1531 struct ifnet *ifp = msg->ifp; 1532 int flag = msg->flag, fam = msg->fam; 1533 struct ifaddr_container *ifac; 1534 1535 ifp->if_flags &= ~flag; 1536 getmicrotime(&ifp->if_lastchange); 1537 /* 1538 * The ifaddr processing in the following loop will block, 1539 * however, this function is called in netisr0, in which 1540 * ifaddr list changes happen, so we don't care about the 1541 * blockness of the ifaddr processing here. 1542 */ 1543 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1544 struct ifaddr *ifa = ifac->ifa; 1545 1546 /* Ignore marker */ 1547 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 1548 continue; 1549 1550 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) 1551 kpfctlinput(PRC_IFDOWN, ifa->ifa_addr); 1552 } 1553 ifq_purge_all(&ifp->if_snd); 1554 rt_ifmsg(ifp); 1555 1556 lwkt_replymsg(&nmsg->lmsg, 0); 1557 } 1558 1559 void 1560 if_unroute(struct ifnet *ifp, int flag, int fam) 1561 { 1562 struct netmsg_ifroute msg; 1563 1564 ASSERT_CANDOMSG_NETISR0(curthread); 1565 1566 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0, 1567 if_unroute_dispatch); 1568 msg.ifp = ifp; 1569 msg.flag = flag; 1570 msg.fam = fam; 1571 lwkt_domsg(netisr_cpuport(0), &msg.base.lmsg, 0); 1572 } 1573 1574 /* 1575 * Mark an interface up and notify protocols of the transition. 1576 */ 1577 static void 1578 if_route_dispatch(netmsg_t nmsg) 1579 { 1580 struct netmsg_ifroute *msg = (struct netmsg_ifroute *)nmsg; 1581 struct ifnet *ifp = msg->ifp; 1582 int flag = msg->flag, fam = msg->fam; 1583 struct ifaddr_container *ifac; 1584 1585 ifq_purge_all(&ifp->if_snd); 1586 ifp->if_flags |= flag; 1587 getmicrotime(&ifp->if_lastchange); 1588 /* 1589 * The ifaddr processing in the following loop will block, 1590 * however, this function is called in netisr0, in which 1591 * ifaddr list changes happen, so we don't care about the 1592 * blockness of the ifaddr processing here. 1593 */ 1594 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1595 struct ifaddr *ifa = ifac->ifa; 1596 1597 /* Ignore marker */ 1598 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 1599 continue; 1600 1601 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) 1602 kpfctlinput(PRC_IFUP, ifa->ifa_addr); 1603 } 1604 rt_ifmsg(ifp); 1605 #ifdef INET6 1606 in6_if_up(ifp); 1607 #endif 1608 1609 lwkt_replymsg(&nmsg->lmsg, 0); 1610 } 1611 1612 void 1613 if_route(struct ifnet *ifp, int flag, int fam) 1614 { 1615 struct netmsg_ifroute msg; 1616 1617 ASSERT_CANDOMSG_NETISR0(curthread); 1618 1619 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0, 1620 if_route_dispatch); 1621 msg.ifp = ifp; 1622 msg.flag = flag; 1623 msg.fam = fam; 1624 lwkt_domsg(netisr_cpuport(0), &msg.base.lmsg, 0); 1625 } 1626 1627 /* 1628 * Mark an interface down and notify protocols of the transition. An 1629 * interface going down is also considered to be a synchronizing event. 1630 * We must ensure that all packet processing related to the interface 1631 * has completed before we return so e.g. the caller can free the ifnet 1632 * structure that the mbufs may be referencing. 1633 * 1634 * NOTE: must be called at splnet or eqivalent. 1635 */ 1636 void 1637 if_down(struct ifnet *ifp) 1638 { 1639 if_unroute(ifp, IFF_UP, AF_UNSPEC); 1640 netmsg_service_sync(); 1641 } 1642 1643 /* 1644 * Mark an interface up and notify protocols of 1645 * the transition. 1646 * NOTE: must be called at splnet or eqivalent. 1647 */ 1648 void 1649 if_up(struct ifnet *ifp) 1650 { 1651 if_route(ifp, IFF_UP, AF_UNSPEC); 1652 } 1653 1654 /* 1655 * Process a link state change. 1656 * NOTE: must be called at splsoftnet or equivalent. 1657 */ 1658 void 1659 if_link_state_change(struct ifnet *ifp) 1660 { 1661 int link_state = ifp->if_link_state; 1662 1663 rt_ifmsg(ifp); 1664 devctl_notify("IFNET", ifp->if_xname, 1665 (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); 1666 } 1667 1668 /* 1669 * Handle interface watchdog timer routines. Called 1670 * from softclock, we decrement timers (if set) and 1671 * call the appropriate interface routine on expiration. 1672 */ 1673 static void 1674 if_slowtimo_dispatch(netmsg_t nmsg) 1675 { 1676 struct globaldata *gd = mycpu; 1677 const struct ifnet_array *arr; 1678 int i; 1679 1680 ASSERT_IN_NETISR(0); 1681 1682 crit_enter_gd(gd); 1683 lwkt_replymsg(&nmsg->lmsg, 0); /* reply ASAP */ 1684 crit_exit_gd(gd); 1685 1686 arr = ifnet_array_get(); 1687 for (i = 0; i < arr->ifnet_count; ++i) { 1688 struct ifnet *ifp = arr->ifnet_arr[i]; 1689 1690 crit_enter_gd(gd); 1691 1692 if (if_stats_compat) { 1693 IFNET_STAT_GET(ifp, ipackets, ifp->if_ipackets); 1694 IFNET_STAT_GET(ifp, ierrors, ifp->if_ierrors); 1695 IFNET_STAT_GET(ifp, opackets, ifp->if_opackets); 1696 IFNET_STAT_GET(ifp, oerrors, ifp->if_oerrors); 1697 IFNET_STAT_GET(ifp, collisions, ifp->if_collisions); 1698 IFNET_STAT_GET(ifp, ibytes, ifp->if_ibytes); 1699 IFNET_STAT_GET(ifp, obytes, ifp->if_obytes); 1700 IFNET_STAT_GET(ifp, imcasts, ifp->if_imcasts); 1701 IFNET_STAT_GET(ifp, omcasts, ifp->if_omcasts); 1702 IFNET_STAT_GET(ifp, iqdrops, ifp->if_iqdrops); 1703 IFNET_STAT_GET(ifp, noproto, ifp->if_noproto); 1704 IFNET_STAT_GET(ifp, oqdrops, ifp->if_oqdrops); 1705 } 1706 1707 if (ifp->if_timer == 0 || --ifp->if_timer) { 1708 crit_exit_gd(gd); 1709 continue; 1710 } 1711 if (ifp->if_watchdog) { 1712 if (ifnet_tryserialize_all(ifp)) { 1713 (*ifp->if_watchdog)(ifp); 1714 ifnet_deserialize_all(ifp); 1715 } else { 1716 /* try again next timeout */ 1717 ++ifp->if_timer; 1718 } 1719 } 1720 1721 crit_exit_gd(gd); 1722 } 1723 1724 callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL); 1725 } 1726 1727 static void 1728 if_slowtimo(void *arg __unused) 1729 { 1730 struct lwkt_msg *lmsg = &if_slowtimo_netmsg.lmsg; 1731 1732 KASSERT(mycpuid == 0, ("not on cpu0")); 1733 crit_enter(); 1734 if (lmsg->ms_flags & MSGF_DONE) 1735 lwkt_sendmsg_oncpu(netisr_cpuport(0), lmsg); 1736 crit_exit(); 1737 } 1738 1739 /* 1740 * Map interface name to 1741 * interface structure pointer. 1742 */ 1743 struct ifnet * 1744 ifunit(const char *name) 1745 { 1746 struct ifnet *ifp; 1747 1748 /* 1749 * Search all the interfaces for this name/number 1750 */ 1751 KASSERT(mtx_owned(&ifnet_mtx), ("ifnet is not locked")); 1752 1753 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 1754 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) 1755 break; 1756 } 1757 return (ifp); 1758 } 1759 1760 struct ifnet * 1761 ifunit_netisr(const char *name) 1762 { 1763 const struct ifnet_array *arr; 1764 int i; 1765 1766 /* 1767 * Search all the interfaces for this name/number 1768 */ 1769 1770 arr = ifnet_array_get(); 1771 for (i = 0; i < arr->ifnet_count; ++i) { 1772 struct ifnet *ifp = arr->ifnet_arr[i]; 1773 1774 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) 1775 return ifp; 1776 } 1777 return NULL; 1778 } 1779 1780 /* 1781 * Interface ioctls. 1782 */ 1783 int 1784 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred) 1785 { 1786 struct ifnet *ifp; 1787 struct ifreq *ifr; 1788 struct ifstat *ifs; 1789 int error, do_ifup = 0; 1790 short oif_flags; 1791 int new_flags; 1792 size_t namelen, onamelen; 1793 char new_name[IFNAMSIZ]; 1794 struct ifaddr *ifa; 1795 struct sockaddr_dl *sdl; 1796 1797 switch (cmd) { 1798 case SIOCGIFCONF: 1799 case OSIOCGIFCONF: 1800 return (ifconf(cmd, data, cred)); 1801 default: 1802 break; 1803 } 1804 1805 ifr = (struct ifreq *)data; 1806 1807 switch (cmd) { 1808 case SIOCIFCREATE: 1809 case SIOCIFCREATE2: 1810 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0) 1811 return (error); 1812 return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name), 1813 cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL)); 1814 case SIOCIFDESTROY: 1815 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0) 1816 return (error); 1817 return (if_clone_destroy(ifr->ifr_name)); 1818 case SIOCIFGCLONERS: 1819 return (if_clone_list((struct if_clonereq *)data)); 1820 default: 1821 break; 1822 } 1823 1824 /* 1825 * Nominal ioctl through interface, lookup the ifp and obtain a 1826 * lock to serialize the ifconfig ioctl operation. 1827 */ 1828 ifnet_lock(); 1829 1830 ifp = ifunit(ifr->ifr_name); 1831 if (ifp == NULL) { 1832 ifnet_unlock(); 1833 return (ENXIO); 1834 } 1835 error = 0; 1836 1837 switch (cmd) { 1838 case SIOCGIFINDEX: 1839 ifr->ifr_index = ifp->if_index; 1840 break; 1841 1842 case SIOCGIFFLAGS: 1843 ifr->ifr_flags = ifp->if_flags; 1844 ifr->ifr_flagshigh = ifp->if_flags >> 16; 1845 break; 1846 1847 case SIOCGIFCAP: 1848 ifr->ifr_reqcap = ifp->if_capabilities; 1849 ifr->ifr_curcap = ifp->if_capenable; 1850 break; 1851 1852 case SIOCGIFMETRIC: 1853 ifr->ifr_metric = ifp->if_metric; 1854 break; 1855 1856 case SIOCGIFMTU: 1857 ifr->ifr_mtu = ifp->if_mtu; 1858 break; 1859 1860 case SIOCGIFTSOLEN: 1861 ifr->ifr_tsolen = ifp->if_tsolen; 1862 break; 1863 1864 case SIOCGIFDATA: 1865 error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data, 1866 sizeof(ifp->if_data)); 1867 break; 1868 1869 case SIOCGIFPHYS: 1870 ifr->ifr_phys = ifp->if_physical; 1871 break; 1872 1873 case SIOCGIFPOLLCPU: 1874 ifr->ifr_pollcpu = -1; 1875 break; 1876 1877 case SIOCSIFPOLLCPU: 1878 break; 1879 1880 case SIOCSIFFLAGS: 1881 error = priv_check_cred(cred, PRIV_ROOT, 0); 1882 if (error) 1883 break; 1884 new_flags = (ifr->ifr_flags & 0xffff) | 1885 (ifr->ifr_flagshigh << 16); 1886 if (ifp->if_flags & IFF_SMART) { 1887 /* Smart drivers twiddle their own routes */ 1888 } else if (ifp->if_flags & IFF_UP && 1889 (new_flags & IFF_UP) == 0) { 1890 if_down(ifp); 1891 } else if (new_flags & IFF_UP && 1892 (ifp->if_flags & IFF_UP) == 0) { 1893 do_ifup = 1; 1894 } 1895 1896 #ifdef IFPOLL_ENABLE 1897 if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) { 1898 if (new_flags & IFF_NPOLLING) 1899 ifpoll_register(ifp); 1900 else 1901 ifpoll_deregister(ifp); 1902 } 1903 #endif 1904 1905 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | 1906 (new_flags &~ IFF_CANTCHANGE); 1907 if (new_flags & IFF_PPROMISC) { 1908 /* Permanently promiscuous mode requested */ 1909 ifp->if_flags |= IFF_PROMISC; 1910 } else if (ifp->if_pcount == 0) { 1911 ifp->if_flags &= ~IFF_PROMISC; 1912 } 1913 if (ifp->if_ioctl) { 1914 ifnet_serialize_all(ifp); 1915 ifp->if_ioctl(ifp, cmd, data, cred); 1916 ifnet_deserialize_all(ifp); 1917 } 1918 if (do_ifup) 1919 if_up(ifp); 1920 getmicrotime(&ifp->if_lastchange); 1921 break; 1922 1923 case SIOCSIFCAP: 1924 error = priv_check_cred(cred, PRIV_ROOT, 0); 1925 if (error) 1926 break; 1927 if (ifr->ifr_reqcap & ~ifp->if_capabilities) { 1928 error = EINVAL; 1929 break; 1930 } 1931 ifnet_serialize_all(ifp); 1932 ifp->if_ioctl(ifp, cmd, data, cred); 1933 ifnet_deserialize_all(ifp); 1934 break; 1935 1936 case SIOCSIFNAME: 1937 error = priv_check_cred(cred, PRIV_ROOT, 0); 1938 if (error) 1939 break; 1940 error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL); 1941 if (error) 1942 break; 1943 if (new_name[0] == '\0') { 1944 error = EINVAL; 1945 break; 1946 } 1947 if (ifunit(new_name) != NULL) { 1948 error = EEXIST; 1949 break; 1950 } 1951 1952 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 1953 1954 /* Announce the departure of the interface. */ 1955 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 1956 1957 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); 1958 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 1959 sdl = (struct sockaddr_dl *)ifa->ifa_addr; 1960 namelen = strlen(new_name); 1961 onamelen = sdl->sdl_nlen; 1962 /* 1963 * Move the address if needed. This is safe because we 1964 * allocate space for a name of length IFNAMSIZ when we 1965 * create this in if_attach(). 1966 */ 1967 if (namelen != onamelen) { 1968 bcopy(sdl->sdl_data + onamelen, 1969 sdl->sdl_data + namelen, sdl->sdl_alen); 1970 } 1971 bcopy(new_name, sdl->sdl_data, namelen); 1972 sdl->sdl_nlen = namelen; 1973 sdl = (struct sockaddr_dl *)ifa->ifa_netmask; 1974 bzero(sdl->sdl_data, onamelen); 1975 while (namelen != 0) 1976 sdl->sdl_data[--namelen] = 0xff; 1977 1978 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 1979 1980 /* Announce the return of the interface. */ 1981 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 1982 break; 1983 1984 case SIOCSIFMETRIC: 1985 error = priv_check_cred(cred, PRIV_ROOT, 0); 1986 if (error) 1987 break; 1988 ifp->if_metric = ifr->ifr_metric; 1989 getmicrotime(&ifp->if_lastchange); 1990 break; 1991 1992 case SIOCSIFPHYS: 1993 error = priv_check_cred(cred, PRIV_ROOT, 0); 1994 if (error) 1995 break; 1996 if (ifp->if_ioctl == NULL) { 1997 error = EOPNOTSUPP; 1998 break; 1999 } 2000 ifnet_serialize_all(ifp); 2001 error = ifp->if_ioctl(ifp, cmd, data, cred); 2002 ifnet_deserialize_all(ifp); 2003 if (error == 0) 2004 getmicrotime(&ifp->if_lastchange); 2005 break; 2006 2007 case SIOCSIFMTU: 2008 { 2009 u_long oldmtu = ifp->if_mtu; 2010 2011 error = priv_check_cred(cred, PRIV_ROOT, 0); 2012 if (error) 2013 break; 2014 if (ifp->if_ioctl == NULL) { 2015 error = EOPNOTSUPP; 2016 break; 2017 } 2018 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) { 2019 error = EINVAL; 2020 break; 2021 } 2022 ifnet_serialize_all(ifp); 2023 error = ifp->if_ioctl(ifp, cmd, data, cred); 2024 ifnet_deserialize_all(ifp); 2025 if (error == 0) { 2026 getmicrotime(&ifp->if_lastchange); 2027 rt_ifmsg(ifp); 2028 } 2029 /* 2030 * If the link MTU changed, do network layer specific procedure. 2031 */ 2032 if (ifp->if_mtu != oldmtu) { 2033 #ifdef INET6 2034 nd6_setmtu(ifp); 2035 #endif 2036 } 2037 break; 2038 } 2039 2040 case SIOCSIFTSOLEN: 2041 error = priv_check_cred(cred, PRIV_ROOT, 0); 2042 if (error) 2043 break; 2044 2045 /* XXX need driver supplied upper limit */ 2046 if (ifr->ifr_tsolen <= 0) { 2047 error = EINVAL; 2048 break; 2049 } 2050 ifp->if_tsolen = ifr->ifr_tsolen; 2051 break; 2052 2053 case SIOCADDMULTI: 2054 case SIOCDELMULTI: 2055 error = priv_check_cred(cred, PRIV_ROOT, 0); 2056 if (error) 2057 break; 2058 2059 /* Don't allow group membership on non-multicast interfaces. */ 2060 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 2061 error = EOPNOTSUPP; 2062 break; 2063 } 2064 2065 /* Don't let users screw up protocols' entries. */ 2066 if (ifr->ifr_addr.sa_family != AF_LINK) { 2067 error = EINVAL; 2068 break; 2069 } 2070 2071 if (cmd == SIOCADDMULTI) { 2072 struct ifmultiaddr *ifma; 2073 error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); 2074 } else { 2075 error = if_delmulti(ifp, &ifr->ifr_addr); 2076 } 2077 if (error == 0) 2078 getmicrotime(&ifp->if_lastchange); 2079 break; 2080 2081 case SIOCSIFPHYADDR: 2082 case SIOCDIFPHYADDR: 2083 #ifdef INET6 2084 case SIOCSIFPHYADDR_IN6: 2085 #endif 2086 case SIOCSLIFPHYADDR: 2087 case SIOCSIFMEDIA: 2088 case SIOCSIFGENERIC: 2089 error = priv_check_cred(cred, PRIV_ROOT, 0); 2090 if (error) 2091 break; 2092 if (ifp->if_ioctl == 0) { 2093 error = EOPNOTSUPP; 2094 break; 2095 } 2096 ifnet_serialize_all(ifp); 2097 error = ifp->if_ioctl(ifp, cmd, data, cred); 2098 ifnet_deserialize_all(ifp); 2099 if (error == 0) 2100 getmicrotime(&ifp->if_lastchange); 2101 break; 2102 2103 case SIOCGIFSTATUS: 2104 ifs = (struct ifstat *)data; 2105 ifs->ascii[0] = '\0'; 2106 /* fall through */ 2107 case SIOCGIFPSRCADDR: 2108 case SIOCGIFPDSTADDR: 2109 case SIOCGLIFPHYADDR: 2110 case SIOCGIFMEDIA: 2111 case SIOCGIFGENERIC: 2112 if (ifp->if_ioctl == NULL) { 2113 error = EOPNOTSUPP; 2114 break; 2115 } 2116 ifnet_serialize_all(ifp); 2117 error = ifp->if_ioctl(ifp, cmd, data, cred); 2118 ifnet_deserialize_all(ifp); 2119 break; 2120 2121 case SIOCSIFLLADDR: 2122 error = priv_check_cred(cred, PRIV_ROOT, 0); 2123 if (error) 2124 break; 2125 error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, 2126 ifr->ifr_addr.sa_len); 2127 EVENTHANDLER_INVOKE(iflladdr_event, ifp); 2128 break; 2129 2130 default: 2131 oif_flags = ifp->if_flags; 2132 if (so->so_proto == 0) { 2133 error = EOPNOTSUPP; 2134 break; 2135 } 2136 error = so_pru_control_direct(so, cmd, data, ifp); 2137 2138 if ((oif_flags ^ ifp->if_flags) & IFF_UP) { 2139 #ifdef INET6 2140 DELAY(100);/* XXX: temporary workaround for fxp issue*/ 2141 if (ifp->if_flags & IFF_UP) { 2142 crit_enter(); 2143 in6_if_up(ifp); 2144 crit_exit(); 2145 } 2146 #endif 2147 } 2148 break; 2149 } 2150 2151 ifnet_unlock(); 2152 return (error); 2153 } 2154 2155 /* 2156 * Set/clear promiscuous mode on interface ifp based on the truth value 2157 * of pswitch. The calls are reference counted so that only the first 2158 * "on" request actually has an effect, as does the final "off" request. 2159 * Results are undefined if the "off" and "on" requests are not matched. 2160 */ 2161 int 2162 ifpromisc(struct ifnet *ifp, int pswitch) 2163 { 2164 struct ifreq ifr; 2165 int error; 2166 int oldflags; 2167 2168 oldflags = ifp->if_flags; 2169 if (ifp->if_flags & IFF_PPROMISC) { 2170 /* Do nothing if device is in permanently promiscuous mode */ 2171 ifp->if_pcount += pswitch ? 1 : -1; 2172 return (0); 2173 } 2174 if (pswitch) { 2175 /* 2176 * If the device is not configured up, we cannot put it in 2177 * promiscuous mode. 2178 */ 2179 if ((ifp->if_flags & IFF_UP) == 0) 2180 return (ENETDOWN); 2181 if (ifp->if_pcount++ != 0) 2182 return (0); 2183 ifp->if_flags |= IFF_PROMISC; 2184 log(LOG_INFO, "%s: promiscuous mode enabled\n", 2185 ifp->if_xname); 2186 } else { 2187 if (--ifp->if_pcount > 0) 2188 return (0); 2189 ifp->if_flags &= ~IFF_PROMISC; 2190 log(LOG_INFO, "%s: promiscuous mode disabled\n", 2191 ifp->if_xname); 2192 } 2193 ifr.ifr_flags = ifp->if_flags; 2194 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2195 ifnet_serialize_all(ifp); 2196 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL); 2197 ifnet_deserialize_all(ifp); 2198 if (error == 0) 2199 rt_ifmsg(ifp); 2200 else 2201 ifp->if_flags = oldflags; 2202 return error; 2203 } 2204 2205 /* 2206 * Return interface configuration 2207 * of system. List may be used 2208 * in later ioctl's (above) to get 2209 * other information. 2210 */ 2211 static int 2212 ifconf(u_long cmd, caddr_t data, struct ucred *cred) 2213 { 2214 struct ifconf *ifc = (struct ifconf *)data; 2215 struct ifnet *ifp; 2216 struct sockaddr *sa; 2217 struct ifreq ifr, *ifrp; 2218 int space = ifc->ifc_len, error = 0; 2219 2220 ifrp = ifc->ifc_req; 2221 2222 ifnet_lock(); 2223 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 2224 struct ifaddr_container *ifac, *ifac_mark; 2225 struct ifaddr_marker mark; 2226 struct ifaddrhead *head; 2227 int addrs; 2228 2229 if (space <= sizeof ifr) 2230 break; 2231 2232 /* 2233 * Zero the stack declared structure first to prevent 2234 * memory disclosure. 2235 */ 2236 bzero(&ifr, sizeof(ifr)); 2237 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) 2238 >= sizeof(ifr.ifr_name)) { 2239 error = ENAMETOOLONG; 2240 break; 2241 } 2242 2243 /* 2244 * Add a marker, since copyout() could block and during that 2245 * period the list could be changed. Inserting the marker to 2246 * the header of the list will not cause trouble for the code 2247 * assuming that the first element of the list is AF_LINK; the 2248 * marker will be moved to the next position w/o blocking. 2249 */ 2250 ifa_marker_init(&mark, ifp); 2251 ifac_mark = &mark.ifac; 2252 head = &ifp->if_addrheads[mycpuid]; 2253 2254 addrs = 0; 2255 TAILQ_INSERT_HEAD(head, ifac_mark, ifa_link); 2256 while ((ifac = TAILQ_NEXT(ifac_mark, ifa_link)) != NULL) { 2257 struct ifaddr *ifa = ifac->ifa; 2258 2259 TAILQ_REMOVE(head, ifac_mark, ifa_link); 2260 TAILQ_INSERT_AFTER(head, ifac, ifac_mark, ifa_link); 2261 2262 /* Ignore marker */ 2263 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 2264 continue; 2265 2266 if (space <= sizeof ifr) 2267 break; 2268 sa = ifa->ifa_addr; 2269 if (cred->cr_prison && 2270 prison_if(cred, sa)) 2271 continue; 2272 addrs++; 2273 /* 2274 * Keep a reference on this ifaddr, so that it will 2275 * not be destroyed when its address is copied to 2276 * the userland, which could block. 2277 */ 2278 IFAREF(ifa); 2279 if (sa->sa_len <= sizeof(*sa)) { 2280 ifr.ifr_addr = *sa; 2281 error = copyout(&ifr, ifrp, sizeof ifr); 2282 ifrp++; 2283 } else { 2284 if (space < (sizeof ifr) + sa->sa_len - 2285 sizeof(*sa)) { 2286 IFAFREE(ifa); 2287 break; 2288 } 2289 space -= sa->sa_len - sizeof(*sa); 2290 error = copyout(&ifr, ifrp, 2291 sizeof ifr.ifr_name); 2292 if (error == 0) 2293 error = copyout(sa, &ifrp->ifr_addr, 2294 sa->sa_len); 2295 ifrp = (struct ifreq *) 2296 (sa->sa_len + (caddr_t)&ifrp->ifr_addr); 2297 } 2298 IFAFREE(ifa); 2299 if (error) 2300 break; 2301 space -= sizeof ifr; 2302 } 2303 TAILQ_REMOVE(head, ifac_mark, ifa_link); 2304 if (error) 2305 break; 2306 if (!addrs) { 2307 bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr); 2308 error = copyout(&ifr, ifrp, sizeof ifr); 2309 if (error) 2310 break; 2311 space -= sizeof ifr; 2312 ifrp++; 2313 } 2314 } 2315 ifnet_unlock(); 2316 2317 ifc->ifc_len -= space; 2318 return (error); 2319 } 2320 2321 /* 2322 * Just like if_promisc(), but for all-multicast-reception mode. 2323 */ 2324 int 2325 if_allmulti(struct ifnet *ifp, int onswitch) 2326 { 2327 int error = 0; 2328 struct ifreq ifr; 2329 2330 crit_enter(); 2331 2332 if (onswitch) { 2333 if (ifp->if_amcount++ == 0) { 2334 ifp->if_flags |= IFF_ALLMULTI; 2335 ifr.ifr_flags = ifp->if_flags; 2336 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2337 ifnet_serialize_all(ifp); 2338 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2339 NULL); 2340 ifnet_deserialize_all(ifp); 2341 } 2342 } else { 2343 if (ifp->if_amcount > 1) { 2344 ifp->if_amcount--; 2345 } else { 2346 ifp->if_amcount = 0; 2347 ifp->if_flags &= ~IFF_ALLMULTI; 2348 ifr.ifr_flags = ifp->if_flags; 2349 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2350 ifnet_serialize_all(ifp); 2351 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2352 NULL); 2353 ifnet_deserialize_all(ifp); 2354 } 2355 } 2356 2357 crit_exit(); 2358 2359 if (error == 0) 2360 rt_ifmsg(ifp); 2361 return error; 2362 } 2363 2364 /* 2365 * Add a multicast listenership to the interface in question. 2366 * The link layer provides a routine which converts 2367 */ 2368 int 2369 if_addmulti_serialized(struct ifnet *ifp, struct sockaddr *sa, 2370 struct ifmultiaddr **retifma) 2371 { 2372 struct sockaddr *llsa, *dupsa; 2373 int error; 2374 struct ifmultiaddr *ifma; 2375 2376 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2377 2378 /* 2379 * If the matching multicast address already exists 2380 * then don't add a new one, just add a reference 2381 */ 2382 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2383 if (sa_equal(sa, ifma->ifma_addr)) { 2384 ifma->ifma_refcount++; 2385 if (retifma) 2386 *retifma = ifma; 2387 return 0; 2388 } 2389 } 2390 2391 /* 2392 * Give the link layer a chance to accept/reject it, and also 2393 * find out which AF_LINK address this maps to, if it isn't one 2394 * already. 2395 */ 2396 if (ifp->if_resolvemulti) { 2397 error = ifp->if_resolvemulti(ifp, &llsa, sa); 2398 if (error) 2399 return error; 2400 } else { 2401 llsa = NULL; 2402 } 2403 2404 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_INTWAIT); 2405 dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_INTWAIT); 2406 bcopy(sa, dupsa, sa->sa_len); 2407 2408 ifma->ifma_addr = dupsa; 2409 ifma->ifma_lladdr = llsa; 2410 ifma->ifma_ifp = ifp; 2411 ifma->ifma_refcount = 1; 2412 ifma->ifma_protospec = NULL; 2413 rt_newmaddrmsg(RTM_NEWMADDR, ifma); 2414 2415 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2416 if (retifma) 2417 *retifma = ifma; 2418 2419 if (llsa != NULL) { 2420 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2421 if (sa_equal(ifma->ifma_addr, llsa)) 2422 break; 2423 } 2424 if (ifma) { 2425 ifma->ifma_refcount++; 2426 } else { 2427 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_INTWAIT); 2428 dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_INTWAIT); 2429 bcopy(llsa, dupsa, llsa->sa_len); 2430 ifma->ifma_addr = dupsa; 2431 ifma->ifma_ifp = ifp; 2432 ifma->ifma_refcount = 1; 2433 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2434 } 2435 } 2436 /* 2437 * We are certain we have added something, so call down to the 2438 * interface to let them know about it. 2439 */ 2440 if (ifp->if_ioctl) 2441 ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL); 2442 2443 return 0; 2444 } 2445 2446 int 2447 if_addmulti(struct ifnet *ifp, struct sockaddr *sa, 2448 struct ifmultiaddr **retifma) 2449 { 2450 int error; 2451 2452 ifnet_serialize_all(ifp); 2453 error = if_addmulti_serialized(ifp, sa, retifma); 2454 ifnet_deserialize_all(ifp); 2455 2456 return error; 2457 } 2458 2459 /* 2460 * Remove a reference to a multicast address on this interface. Yell 2461 * if the request does not match an existing membership. 2462 */ 2463 static int 2464 if_delmulti_serialized(struct ifnet *ifp, struct sockaddr *sa) 2465 { 2466 struct ifmultiaddr *ifma; 2467 2468 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2469 2470 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2471 if (sa_equal(sa, ifma->ifma_addr)) 2472 break; 2473 if (ifma == NULL) 2474 return ENOENT; 2475 2476 if (ifma->ifma_refcount > 1) { 2477 ifma->ifma_refcount--; 2478 return 0; 2479 } 2480 2481 rt_newmaddrmsg(RTM_DELMADDR, ifma); 2482 sa = ifma->ifma_lladdr; 2483 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2484 /* 2485 * Make sure the interface driver is notified 2486 * in the case of a link layer mcast group being left. 2487 */ 2488 if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) 2489 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2490 kfree(ifma->ifma_addr, M_IFMADDR); 2491 kfree(ifma, M_IFMADDR); 2492 if (sa == NULL) 2493 return 0; 2494 2495 /* 2496 * Now look for the link-layer address which corresponds to 2497 * this network address. It had been squirreled away in 2498 * ifma->ifma_lladdr for this purpose (so we don't have 2499 * to call ifp->if_resolvemulti() again), and we saved that 2500 * value in sa above. If some nasty deleted the 2501 * link-layer address out from underneath us, we can deal because 2502 * the address we stored was is not the same as the one which was 2503 * in the record for the link-layer address. (So we don't complain 2504 * in that case.) 2505 */ 2506 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2507 if (sa_equal(sa, ifma->ifma_addr)) 2508 break; 2509 if (ifma == NULL) 2510 return 0; 2511 2512 if (ifma->ifma_refcount > 1) { 2513 ifma->ifma_refcount--; 2514 return 0; 2515 } 2516 2517 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2518 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2519 kfree(ifma->ifma_addr, M_IFMADDR); 2520 kfree(sa, M_IFMADDR); 2521 kfree(ifma, M_IFMADDR); 2522 2523 return 0; 2524 } 2525 2526 int 2527 if_delmulti(struct ifnet *ifp, struct sockaddr *sa) 2528 { 2529 int error; 2530 2531 ifnet_serialize_all(ifp); 2532 error = if_delmulti_serialized(ifp, sa); 2533 ifnet_deserialize_all(ifp); 2534 2535 return error; 2536 } 2537 2538 /* 2539 * Delete all multicast group membership for an interface. 2540 * Should be used to quickly flush all multicast filters. 2541 */ 2542 void 2543 if_delallmulti_serialized(struct ifnet *ifp) 2544 { 2545 struct ifmultiaddr *ifma, mark; 2546 struct sockaddr sa; 2547 2548 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2549 2550 bzero(&sa, sizeof(sa)); 2551 sa.sa_family = AF_UNSPEC; 2552 sa.sa_len = sizeof(sa); 2553 2554 bzero(&mark, sizeof(mark)); 2555 mark.ifma_addr = &sa; 2556 2557 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, &mark, ifma_link); 2558 while ((ifma = TAILQ_NEXT(&mark, ifma_link)) != NULL) { 2559 TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link); 2560 TAILQ_INSERT_AFTER(&ifp->if_multiaddrs, ifma, &mark, 2561 ifma_link); 2562 2563 if (ifma->ifma_addr->sa_family == AF_UNSPEC) 2564 continue; 2565 2566 if_delmulti_serialized(ifp, ifma->ifma_addr); 2567 } 2568 TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link); 2569 } 2570 2571 2572 /* 2573 * Set the link layer address on an interface. 2574 * 2575 * At this time we only support certain types of interfaces, 2576 * and we don't allow the length of the address to change. 2577 */ 2578 int 2579 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) 2580 { 2581 struct sockaddr_dl *sdl; 2582 struct ifreq ifr; 2583 2584 sdl = IF_LLSOCKADDR(ifp); 2585 if (sdl == NULL) 2586 return (EINVAL); 2587 if (len != sdl->sdl_alen) /* don't allow length to change */ 2588 return (EINVAL); 2589 switch (ifp->if_type) { 2590 case IFT_ETHER: /* these types use struct arpcom */ 2591 case IFT_XETHER: 2592 case IFT_L2VLAN: 2593 case IFT_IEEE8023ADLAG: 2594 bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len); 2595 bcopy(lladdr, LLADDR(sdl), len); 2596 break; 2597 default: 2598 return (ENODEV); 2599 } 2600 /* 2601 * If the interface is already up, we need 2602 * to re-init it in order to reprogram its 2603 * address filter. 2604 */ 2605 ifnet_serialize_all(ifp); 2606 if ((ifp->if_flags & IFF_UP) != 0) { 2607 #ifdef INET 2608 struct ifaddr_container *ifac; 2609 #endif 2610 2611 ifp->if_flags &= ~IFF_UP; 2612 ifr.ifr_flags = ifp->if_flags; 2613 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2614 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2615 NULL); 2616 ifp->if_flags |= IFF_UP; 2617 ifr.ifr_flags = ifp->if_flags; 2618 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2619 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2620 NULL); 2621 #ifdef INET 2622 /* 2623 * Also send gratuitous ARPs to notify other nodes about 2624 * the address change. 2625 */ 2626 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 2627 struct ifaddr *ifa = ifac->ifa; 2628 2629 if (ifa->ifa_addr != NULL && 2630 ifa->ifa_addr->sa_family == AF_INET) 2631 arp_gratuitous(ifp, ifa); 2632 } 2633 #endif 2634 } 2635 ifnet_deserialize_all(ifp); 2636 return (0); 2637 } 2638 2639 struct ifmultiaddr * 2640 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp) 2641 { 2642 struct ifmultiaddr *ifma; 2643 2644 /* TODO: need ifnet_serialize_main */ 2645 ifnet_serialize_all(ifp); 2646 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2647 if (sa_equal(ifma->ifma_addr, sa)) 2648 break; 2649 ifnet_deserialize_all(ifp); 2650 2651 return ifma; 2652 } 2653 2654 /* 2655 * This function locates the first real ethernet MAC from a network 2656 * card and loads it into node, returning 0 on success or ENOENT if 2657 * no suitable interfaces were found. It is used by the uuid code to 2658 * generate a unique 6-byte number. 2659 */ 2660 int 2661 if_getanyethermac(uint16_t *node, int minlen) 2662 { 2663 struct ifnet *ifp; 2664 struct sockaddr_dl *sdl; 2665 2666 ifnet_lock(); 2667 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 2668 if (ifp->if_type != IFT_ETHER) 2669 continue; 2670 sdl = IF_LLSOCKADDR(ifp); 2671 if (sdl->sdl_alen < minlen) 2672 continue; 2673 bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node, 2674 minlen); 2675 ifnet_unlock(); 2676 return(0); 2677 } 2678 ifnet_unlock(); 2679 return (ENOENT); 2680 } 2681 2682 /* 2683 * The name argument must be a pointer to storage which will last as 2684 * long as the interface does. For physical devices, the result of 2685 * device_get_name(dev) is a good choice and for pseudo-devices a 2686 * static string works well. 2687 */ 2688 void 2689 if_initname(struct ifnet *ifp, const char *name, int unit) 2690 { 2691 ifp->if_dname = name; 2692 ifp->if_dunit = unit; 2693 if (unit != IF_DUNIT_NONE) 2694 ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); 2695 else 2696 strlcpy(ifp->if_xname, name, IFNAMSIZ); 2697 } 2698 2699 int 2700 if_printf(struct ifnet *ifp, const char *fmt, ...) 2701 { 2702 __va_list ap; 2703 int retval; 2704 2705 retval = kprintf("%s: ", ifp->if_xname); 2706 __va_start(ap, fmt); 2707 retval += kvprintf(fmt, ap); 2708 __va_end(ap); 2709 return (retval); 2710 } 2711 2712 struct ifnet * 2713 if_alloc(uint8_t type) 2714 { 2715 struct ifnet *ifp; 2716 size_t size; 2717 2718 /* 2719 * XXX temporary hack until arpcom is setup in if_l2com 2720 */ 2721 if (type == IFT_ETHER) 2722 size = sizeof(struct arpcom); 2723 else 2724 size = sizeof(struct ifnet); 2725 2726 ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO); 2727 2728 ifp->if_type = type; 2729 2730 if (if_com_alloc[type] != NULL) { 2731 ifp->if_l2com = if_com_alloc[type](type, ifp); 2732 if (ifp->if_l2com == NULL) { 2733 kfree(ifp, M_IFNET); 2734 return (NULL); 2735 } 2736 } 2737 return (ifp); 2738 } 2739 2740 void 2741 if_free(struct ifnet *ifp) 2742 { 2743 kfree(ifp, M_IFNET); 2744 } 2745 2746 void 2747 ifq_set_classic(struct ifaltq *ifq) 2748 { 2749 ifq_set_methods(ifq, ifq->altq_ifp->if_mapsubq, 2750 ifsq_classic_enqueue, ifsq_classic_dequeue, ifsq_classic_request); 2751 } 2752 2753 void 2754 ifq_set_methods(struct ifaltq *ifq, altq_mapsubq_t mapsubq, 2755 ifsq_enqueue_t enqueue, ifsq_dequeue_t dequeue, ifsq_request_t request) 2756 { 2757 int q; 2758 2759 KASSERT(mapsubq != NULL, ("mapsubq is not specified")); 2760 KASSERT(enqueue != NULL, ("enqueue is not specified")); 2761 KASSERT(dequeue != NULL, ("dequeue is not specified")); 2762 KASSERT(request != NULL, ("request is not specified")); 2763 2764 ifq->altq_mapsubq = mapsubq; 2765 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 2766 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 2767 2768 ifsq->ifsq_enqueue = enqueue; 2769 ifsq->ifsq_dequeue = dequeue; 2770 ifsq->ifsq_request = request; 2771 } 2772 } 2773 2774 static void 2775 ifsq_norm_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m) 2776 { 2777 2778 classq_add(&ifsq->ifsq_norm, m); 2779 ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len); 2780 } 2781 2782 static void 2783 ifsq_prio_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m) 2784 { 2785 2786 classq_add(&ifsq->ifsq_prio, m); 2787 ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len); 2788 ALTQ_SQ_PRIO_CNTR_INC(ifsq, m->m_pkthdr.len); 2789 } 2790 2791 static struct mbuf * 2792 ifsq_norm_dequeue(struct ifaltq_subque *ifsq) 2793 { 2794 struct mbuf *m; 2795 2796 m = classq_get(&ifsq->ifsq_norm); 2797 if (m != NULL) 2798 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len); 2799 return (m); 2800 } 2801 2802 static struct mbuf * 2803 ifsq_prio_dequeue(struct ifaltq_subque *ifsq) 2804 { 2805 struct mbuf *m; 2806 2807 m = classq_get(&ifsq->ifsq_prio); 2808 if (m != NULL) { 2809 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len); 2810 ALTQ_SQ_PRIO_CNTR_DEC(ifsq, m->m_pkthdr.len); 2811 } 2812 return (m); 2813 } 2814 2815 int 2816 ifsq_classic_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m, 2817 struct altq_pktattr *pa __unused) 2818 { 2819 2820 M_ASSERTPKTHDR(m); 2821 again: 2822 if (ifsq->ifsq_len >= ifsq->ifsq_maxlen || 2823 ifsq->ifsq_bcnt >= ifsq->ifsq_maxbcnt) { 2824 struct mbuf *m_drop; 2825 2826 if (m->m_flags & M_PRIO) { 2827 m_drop = NULL; 2828 if (ifsq->ifsq_prio_len < (ifsq->ifsq_maxlen >> 1) && 2829 ifsq->ifsq_prio_bcnt < (ifsq->ifsq_maxbcnt >> 1)) { 2830 /* Try dropping some from normal queue. */ 2831 m_drop = ifsq_norm_dequeue(ifsq); 2832 } 2833 if (m_drop == NULL) 2834 m_drop = ifsq_prio_dequeue(ifsq); 2835 } else { 2836 m_drop = ifsq_norm_dequeue(ifsq); 2837 } 2838 if (m_drop != NULL) { 2839 IFNET_STAT_INC(ifsq->ifsq_ifp, oqdrops, 1); 2840 m_freem(m_drop); 2841 goto again; 2842 } 2843 /* 2844 * No old packets could be dropped! 2845 * NOTE: Caller increases oqdrops. 2846 */ 2847 m_freem(m); 2848 return (ENOBUFS); 2849 } else { 2850 if (m->m_flags & M_PRIO) 2851 ifsq_prio_enqueue(ifsq, m); 2852 else 2853 ifsq_norm_enqueue(ifsq, m); 2854 return (0); 2855 } 2856 } 2857 2858 struct mbuf * 2859 ifsq_classic_dequeue(struct ifaltq_subque *ifsq, int op) 2860 { 2861 struct mbuf *m; 2862 2863 switch (op) { 2864 case ALTDQ_POLL: 2865 m = classq_head(&ifsq->ifsq_prio); 2866 if (m == NULL) 2867 m = classq_head(&ifsq->ifsq_norm); 2868 break; 2869 2870 case ALTDQ_REMOVE: 2871 m = ifsq_prio_dequeue(ifsq); 2872 if (m == NULL) 2873 m = ifsq_norm_dequeue(ifsq); 2874 break; 2875 2876 default: 2877 panic("unsupported ALTQ dequeue op: %d", op); 2878 } 2879 return m; 2880 } 2881 2882 int 2883 ifsq_classic_request(struct ifaltq_subque *ifsq, int req, void *arg) 2884 { 2885 switch (req) { 2886 case ALTRQ_PURGE: 2887 for (;;) { 2888 struct mbuf *m; 2889 2890 m = ifsq_classic_dequeue(ifsq, ALTDQ_REMOVE); 2891 if (m == NULL) 2892 break; 2893 m_freem(m); 2894 } 2895 break; 2896 2897 default: 2898 panic("unsupported ALTQ request: %d", req); 2899 } 2900 return 0; 2901 } 2902 2903 static void 2904 ifsq_ifstart_try(struct ifaltq_subque *ifsq, int force_sched) 2905 { 2906 struct ifnet *ifp = ifsq_get_ifp(ifsq); 2907 int running = 0, need_sched; 2908 2909 /* 2910 * Try to do direct ifnet.if_start on the subqueue first, if there is 2911 * contention on the subqueue hardware serializer, ifnet.if_start on 2912 * the subqueue will be scheduled on the subqueue owner CPU. 2913 */ 2914 if (!ifsq_tryserialize_hw(ifsq)) { 2915 /* 2916 * Subqueue hardware serializer contention happened, 2917 * ifnet.if_start on the subqueue is scheduled on 2918 * the subqueue owner CPU, and we keep going. 2919 */ 2920 ifsq_ifstart_schedule(ifsq, 1); 2921 return; 2922 } 2923 2924 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { 2925 ifp->if_start(ifp, ifsq); 2926 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 2927 running = 1; 2928 } 2929 need_sched = ifsq_ifstart_need_schedule(ifsq, running); 2930 2931 ifsq_deserialize_hw(ifsq); 2932 2933 if (need_sched) { 2934 /* 2935 * More data need to be transmitted, ifnet.if_start on the 2936 * subqueue is scheduled on the subqueue owner CPU, and we 2937 * keep going. 2938 * NOTE: ifnet.if_start subqueue interlock is not released. 2939 */ 2940 ifsq_ifstart_schedule(ifsq, force_sched); 2941 } 2942 } 2943 2944 /* 2945 * Subqeue packets staging mechanism: 2946 * 2947 * The packets enqueued into the subqueue are staged to a certain amount 2948 * before the ifnet.if_start on the subqueue is called. In this way, the 2949 * driver could avoid writing to hardware registers upon every packet, 2950 * instead, hardware registers could be written when certain amount of 2951 * packets are put onto hardware TX ring. The measurement on several modern 2952 * NICs (emx(4), igb(4), bnx(4), bge(4), jme(4)) shows that the hardware 2953 * registers writing aggregation could save ~20% CPU time when 18bytes UDP 2954 * datagrams are transmitted at 1.48Mpps. The performance improvement by 2955 * hardware registers writing aggeregation is also mentioned by Luigi Rizzo's 2956 * netmap paper (http://info.iet.unipi.it/~luigi/netmap/). 2957 * 2958 * Subqueue packets staging is performed for two entry points into drivers' 2959 * transmission function: 2960 * - Direct ifnet.if_start calling on the subqueue, i.e. ifsq_ifstart_try() 2961 * - ifnet.if_start scheduling on the subqueue, i.e. ifsq_ifstart_schedule() 2962 * 2963 * Subqueue packets staging will be stopped upon any of the following 2964 * conditions: 2965 * - If the count of packets enqueued on the current CPU is great than or 2966 * equal to ifsq_stage_cntmax. (XXX this should be per-interface) 2967 * - If the total length of packets enqueued on the current CPU is great 2968 * than or equal to the hardware's MTU - max_protohdr. max_protohdr is 2969 * cut from the hardware's MTU mainly bacause a full TCP segment's size 2970 * is usually less than hardware's MTU. 2971 * - ifsq_ifstart_schedule() is not pending on the current CPU and 2972 * ifnet.if_start subqueue interlock (ifaltq_subq.ifsq_started) is not 2973 * released. 2974 * - The if_start_rollup(), which is registered as low priority netisr 2975 * rollup function, is called; probably because no more work is pending 2976 * for netisr. 2977 * 2978 * NOTE: 2979 * Currently subqueue packet staging is only performed in netisr threads. 2980 */ 2981 int 2982 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa) 2983 { 2984 struct ifaltq *ifq = &ifp->if_snd; 2985 struct ifaltq_subque *ifsq; 2986 int error, start = 0, len, mcast = 0, avoid_start = 0; 2987 struct ifsubq_stage_head *head = NULL; 2988 struct ifsubq_stage *stage = NULL; 2989 struct globaldata *gd = mycpu; 2990 struct thread *td = gd->gd_curthread; 2991 2992 crit_enter_quick(td); 2993 2994 ifsq = ifq_map_subq(ifq, gd->gd_cpuid); 2995 ASSERT_ALTQ_SQ_NOT_SERIALIZED_HW(ifsq); 2996 2997 len = m->m_pkthdr.len; 2998 if (m->m_flags & M_MCAST) 2999 mcast = 1; 3000 3001 if (td->td_type == TD_TYPE_NETISR) { 3002 head = &ifsubq_stage_heads[mycpuid]; 3003 stage = ifsq_get_stage(ifsq, mycpuid); 3004 3005 stage->stg_cnt++; 3006 stage->stg_len += len; 3007 if (stage->stg_cnt < ifsq_stage_cntmax && 3008 stage->stg_len < (ifp->if_mtu - max_protohdr)) 3009 avoid_start = 1; 3010 } 3011 3012 ALTQ_SQ_LOCK(ifsq); 3013 error = ifsq_enqueue_locked(ifsq, m, pa); 3014 if (error) { 3015 IFNET_STAT_INC(ifp, oqdrops, 1); 3016 if (!ifsq_data_ready(ifsq)) { 3017 ALTQ_SQ_UNLOCK(ifsq); 3018 crit_exit_quick(td); 3019 return error; 3020 } 3021 avoid_start = 0; 3022 } 3023 if (!ifsq_is_started(ifsq)) { 3024 if (avoid_start) { 3025 ALTQ_SQ_UNLOCK(ifsq); 3026 3027 KKASSERT(!error); 3028 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) 3029 ifsq_stage_insert(head, stage); 3030 3031 IFNET_STAT_INC(ifp, obytes, len); 3032 if (mcast) 3033 IFNET_STAT_INC(ifp, omcasts, 1); 3034 crit_exit_quick(td); 3035 return error; 3036 } 3037 3038 /* 3039 * Hold the subqueue interlock of ifnet.if_start 3040 */ 3041 ifsq_set_started(ifsq); 3042 start = 1; 3043 } 3044 ALTQ_SQ_UNLOCK(ifsq); 3045 3046 if (!error) { 3047 IFNET_STAT_INC(ifp, obytes, len); 3048 if (mcast) 3049 IFNET_STAT_INC(ifp, omcasts, 1); 3050 } 3051 3052 if (stage != NULL) { 3053 if (!start && (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)) { 3054 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); 3055 if (!avoid_start) { 3056 ifsq_stage_remove(head, stage); 3057 ifsq_ifstart_schedule(ifsq, 1); 3058 } 3059 crit_exit_quick(td); 3060 return error; 3061 } 3062 3063 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) { 3064 ifsq_stage_remove(head, stage); 3065 } else { 3066 stage->stg_cnt = 0; 3067 stage->stg_len = 0; 3068 } 3069 } 3070 3071 if (!start) { 3072 crit_exit_quick(td); 3073 return error; 3074 } 3075 3076 ifsq_ifstart_try(ifsq, 0); 3077 3078 crit_exit_quick(td); 3079 return error; 3080 } 3081 3082 void * 3083 ifa_create(int size) 3084 { 3085 struct ifaddr *ifa; 3086 int i; 3087 3088 KASSERT(size >= sizeof(*ifa), ("ifaddr size too small")); 3089 3090 ifa = kmalloc(size, M_IFADDR, M_INTWAIT | M_ZERO); 3091 ifa->ifa_containers = 3092 kmalloc_cachealign(ncpus * sizeof(struct ifaddr_container), 3093 M_IFADDR, M_INTWAIT | M_ZERO); 3094 3095 ifa->ifa_ncnt = ncpus; 3096 for (i = 0; i < ncpus; ++i) { 3097 struct ifaddr_container *ifac = &ifa->ifa_containers[i]; 3098 3099 ifac->ifa_magic = IFA_CONTAINER_MAGIC; 3100 ifac->ifa = ifa; 3101 ifac->ifa_refcnt = 1; 3102 } 3103 #ifdef IFADDR_DEBUG 3104 kprintf("alloc ifa %p %d\n", ifa, size); 3105 #endif 3106 return ifa; 3107 } 3108 3109 void 3110 ifac_free(struct ifaddr_container *ifac, int cpu_id) 3111 { 3112 struct ifaddr *ifa = ifac->ifa; 3113 3114 KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC); 3115 KKASSERT(ifac->ifa_refcnt == 0); 3116 KASSERT(ifac->ifa_listmask == 0, 3117 ("ifa is still on %#x lists", ifac->ifa_listmask)); 3118 3119 ifac->ifa_magic = IFA_CONTAINER_DEAD; 3120 3121 #ifdef IFADDR_DEBUG_VERBOSE 3122 kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id); 3123 #endif 3124 3125 KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus, 3126 ("invalid # of ifac, %d", ifa->ifa_ncnt)); 3127 if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) { 3128 #ifdef IFADDR_DEBUG 3129 kprintf("free ifa %p\n", ifa); 3130 #endif 3131 kfree(ifa->ifa_containers, M_IFADDR); 3132 kfree(ifa, M_IFADDR); 3133 } 3134 } 3135 3136 static void 3137 ifa_iflink_dispatch(netmsg_t nmsg) 3138 { 3139 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3140 struct ifaddr *ifa = msg->ifa; 3141 struct ifnet *ifp = msg->ifp; 3142 int cpu = mycpuid; 3143 struct ifaddr_container *ifac; 3144 3145 crit_enter(); 3146 3147 ifac = &ifa->ifa_containers[cpu]; 3148 ASSERT_IFAC_VALID(ifac); 3149 KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0, 3150 ("ifaddr is on if_addrheads")); 3151 3152 ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD; 3153 if (msg->tail) 3154 TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link); 3155 else 3156 TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link); 3157 3158 crit_exit(); 3159 3160 netisr_forwardmsg(&nmsg->base, cpu + 1); 3161 } 3162 3163 void 3164 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail) 3165 { 3166 struct netmsg_ifaddr msg; 3167 3168 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3169 0, ifa_iflink_dispatch); 3170 msg.ifa = ifa; 3171 msg.ifp = ifp; 3172 msg.tail = tail; 3173 3174 netisr_domsg(&msg.base, 0); 3175 } 3176 3177 static void 3178 ifa_ifunlink_dispatch(netmsg_t nmsg) 3179 { 3180 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3181 struct ifaddr *ifa = msg->ifa; 3182 struct ifnet *ifp = msg->ifp; 3183 int cpu = mycpuid; 3184 struct ifaddr_container *ifac; 3185 3186 crit_enter(); 3187 3188 ifac = &ifa->ifa_containers[cpu]; 3189 ASSERT_IFAC_VALID(ifac); 3190 KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD, 3191 ("ifaddr is not on if_addrhead")); 3192 3193 TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link); 3194 ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD; 3195 3196 crit_exit(); 3197 3198 netisr_forwardmsg(&nmsg->base, cpu + 1); 3199 } 3200 3201 void 3202 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp) 3203 { 3204 struct netmsg_ifaddr msg; 3205 3206 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3207 0, ifa_ifunlink_dispatch); 3208 msg.ifa = ifa; 3209 msg.ifp = ifp; 3210 3211 netisr_domsg(&msg.base, 0); 3212 } 3213 3214 static void 3215 ifa_destroy_dispatch(netmsg_t nmsg) 3216 { 3217 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3218 3219 IFAFREE(msg->ifa); 3220 netisr_forwardmsg(&nmsg->base, mycpuid + 1); 3221 } 3222 3223 void 3224 ifa_destroy(struct ifaddr *ifa) 3225 { 3226 struct netmsg_ifaddr msg; 3227 3228 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3229 0, ifa_destroy_dispatch); 3230 msg.ifa = ifa; 3231 3232 netisr_domsg(&msg.base, 0); 3233 } 3234 3235 static void 3236 if_start_rollup(void) 3237 { 3238 struct ifsubq_stage_head *head = &ifsubq_stage_heads[mycpuid]; 3239 struct ifsubq_stage *stage; 3240 3241 crit_enter(); 3242 3243 while ((stage = TAILQ_FIRST(&head->stg_head)) != NULL) { 3244 struct ifaltq_subque *ifsq = stage->stg_subq; 3245 int is_sched = 0; 3246 3247 if (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED) 3248 is_sched = 1; 3249 ifsq_stage_remove(head, stage); 3250 3251 if (is_sched) { 3252 ifsq_ifstart_schedule(ifsq, 1); 3253 } else { 3254 int start = 0; 3255 3256 ALTQ_SQ_LOCK(ifsq); 3257 if (!ifsq_is_started(ifsq)) { 3258 /* 3259 * Hold the subqueue interlock of 3260 * ifnet.if_start 3261 */ 3262 ifsq_set_started(ifsq); 3263 start = 1; 3264 } 3265 ALTQ_SQ_UNLOCK(ifsq); 3266 3267 if (start) 3268 ifsq_ifstart_try(ifsq, 1); 3269 } 3270 KKASSERT((stage->stg_flags & 3271 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); 3272 } 3273 3274 crit_exit(); 3275 } 3276 3277 static void 3278 ifnetinit(void *dummy __unused) 3279 { 3280 int i; 3281 3282 for (i = 0; i < ncpus; ++i) 3283 TAILQ_INIT(&ifsubq_stage_heads[i].stg_head); 3284 netisr_register_rollup(if_start_rollup, NETISR_ROLLUP_PRIO_IFSTART); 3285 } 3286 3287 void 3288 if_register_com_alloc(u_char type, 3289 if_com_alloc_t *a, if_com_free_t *f) 3290 { 3291 3292 KASSERT(if_com_alloc[type] == NULL, 3293 ("if_register_com_alloc: %d already registered", type)); 3294 KASSERT(if_com_free[type] == NULL, 3295 ("if_register_com_alloc: %d free already registered", type)); 3296 3297 if_com_alloc[type] = a; 3298 if_com_free[type] = f; 3299 } 3300 3301 void 3302 if_deregister_com_alloc(u_char type) 3303 { 3304 3305 KASSERT(if_com_alloc[type] != NULL, 3306 ("if_deregister_com_alloc: %d not registered", type)); 3307 KASSERT(if_com_free[type] != NULL, 3308 ("if_deregister_com_alloc: %d free not registered", type)); 3309 if_com_alloc[type] = NULL; 3310 if_com_free[type] = NULL; 3311 } 3312 3313 void 3314 ifq_set_maxlen(struct ifaltq *ifq, int len) 3315 { 3316 ifq->altq_maxlen = len + (ncpus * ifsq_stage_cntmax); 3317 } 3318 3319 int 3320 ifq_mapsubq_default(struct ifaltq *ifq __unused, int cpuid __unused) 3321 { 3322 return ALTQ_SUBQ_INDEX_DEFAULT; 3323 } 3324 3325 int 3326 ifq_mapsubq_mask(struct ifaltq *ifq, int cpuid) 3327 { 3328 3329 return (cpuid & ifq->altq_subq_mappriv); 3330 } 3331 3332 int 3333 ifq_mapsubq_modulo(struct ifaltq *ifq, int cpuid) 3334 { 3335 3336 return (cpuid % ifq->altq_subq_mappriv); 3337 } 3338 3339 static void 3340 ifsq_watchdog(void *arg) 3341 { 3342 struct ifsubq_watchdog *wd = arg; 3343 struct ifnet *ifp; 3344 3345 if (__predict_true(wd->wd_timer == 0 || --wd->wd_timer)) 3346 goto done; 3347 3348 ifp = ifsq_get_ifp(wd->wd_subq); 3349 if (ifnet_tryserialize_all(ifp)) { 3350 wd->wd_watchdog(wd->wd_subq); 3351 ifnet_deserialize_all(ifp); 3352 } else { 3353 /* try again next timeout */ 3354 wd->wd_timer = 1; 3355 } 3356 done: 3357 ifsq_watchdog_reset(wd); 3358 } 3359 3360 static void 3361 ifsq_watchdog_reset(struct ifsubq_watchdog *wd) 3362 { 3363 callout_reset_bycpu(&wd->wd_callout, hz, ifsq_watchdog, wd, 3364 ifsq_get_cpuid(wd->wd_subq)); 3365 } 3366 3367 void 3368 ifsq_watchdog_init(struct ifsubq_watchdog *wd, struct ifaltq_subque *ifsq, 3369 ifsq_watchdog_t watchdog) 3370 { 3371 callout_init_mp(&wd->wd_callout); 3372 wd->wd_timer = 0; 3373 wd->wd_subq = ifsq; 3374 wd->wd_watchdog = watchdog; 3375 } 3376 3377 void 3378 ifsq_watchdog_start(struct ifsubq_watchdog *wd) 3379 { 3380 wd->wd_timer = 0; 3381 ifsq_watchdog_reset(wd); 3382 } 3383 3384 void 3385 ifsq_watchdog_stop(struct ifsubq_watchdog *wd) 3386 { 3387 wd->wd_timer = 0; 3388 callout_stop(&wd->wd_callout); 3389 } 3390 3391 void 3392 ifnet_lock(void) 3393 { 3394 KASSERT(curthread->td_type != TD_TYPE_NETISR, 3395 ("try holding ifnet lock in netisr")); 3396 mtx_lock(&ifnet_mtx); 3397 } 3398 3399 void 3400 ifnet_unlock(void) 3401 { 3402 KASSERT(curthread->td_type != TD_TYPE_NETISR, 3403 ("try holding ifnet lock in netisr")); 3404 mtx_unlock(&ifnet_mtx); 3405 } 3406 3407 static struct ifnet_array * 3408 ifnet_array_alloc(int count) 3409 { 3410 struct ifnet_array *arr; 3411 3412 arr = kmalloc(__offsetof(struct ifnet_array, ifnet_arr[count]), 3413 M_IFNET, M_WAITOK); 3414 arr->ifnet_count = count; 3415 3416 return arr; 3417 } 3418 3419 static void 3420 ifnet_array_free(struct ifnet_array *arr) 3421 { 3422 if (arr == &ifnet_array0) 3423 return; 3424 kfree(arr, M_IFNET); 3425 } 3426 3427 static struct ifnet_array * 3428 ifnet_array_add(struct ifnet *ifp, const struct ifnet_array *old_arr) 3429 { 3430 struct ifnet_array *arr; 3431 int count, i; 3432 3433 KASSERT(old_arr->ifnet_count >= 0, 3434 ("invalid ifnet array count %d", old_arr->ifnet_count)); 3435 count = old_arr->ifnet_count + 1; 3436 arr = ifnet_array_alloc(count); 3437 3438 /* 3439 * Save the old ifnet array and append this ifp to the end of 3440 * the new ifnet array. 3441 */ 3442 for (i = 0; i < old_arr->ifnet_count; ++i) { 3443 KASSERT(old_arr->ifnet_arr[i] != ifp, 3444 ("%s is already in ifnet array", ifp->if_xname)); 3445 arr->ifnet_arr[i] = old_arr->ifnet_arr[i]; 3446 } 3447 KASSERT(i == count - 1, 3448 ("add %s, ifnet array index mismatch, should be %d, but got %d", 3449 ifp->if_xname, count - 1, i)); 3450 arr->ifnet_arr[i] = ifp; 3451 3452 return arr; 3453 } 3454 3455 static struct ifnet_array * 3456 ifnet_array_del(struct ifnet *ifp, const struct ifnet_array *old_arr) 3457 { 3458 struct ifnet_array *arr; 3459 int count, i, idx, found = 0; 3460 3461 KASSERT(old_arr->ifnet_count > 0, 3462 ("invalid ifnet array count %d", old_arr->ifnet_count)); 3463 count = old_arr->ifnet_count - 1; 3464 arr = ifnet_array_alloc(count); 3465 3466 /* 3467 * Save the old ifnet array, but skip this ifp. 3468 */ 3469 idx = 0; 3470 for (i = 0; i < old_arr->ifnet_count; ++i) { 3471 if (old_arr->ifnet_arr[i] == ifp) { 3472 KASSERT(!found, 3473 ("dup %s is in ifnet array", ifp->if_xname)); 3474 found = 1; 3475 continue; 3476 } 3477 KASSERT(idx < count, 3478 ("invalid ifnet array index %d, count %d", idx, count)); 3479 arr->ifnet_arr[idx] = old_arr->ifnet_arr[i]; 3480 ++idx; 3481 } 3482 KASSERT(found, ("%s is not in ifnet array", ifp->if_xname)); 3483 KASSERT(idx == count, 3484 ("del %s, ifnet array count mismatch, should be %d, but got %d ", 3485 ifp->if_xname, count, idx)); 3486 3487 return arr; 3488 } 3489 3490 const struct ifnet_array * 3491 ifnet_array_get(void) 3492 { 3493 const struct ifnet_array *ret; 3494 3495 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 3496 ret = ifnet_array; 3497 /* Make sure 'ret' is really used. */ 3498 cpu_ccfence(); 3499 return (ret); 3500 } 3501 3502 int 3503 ifnet_array_isempty(void) 3504 { 3505 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 3506 if (ifnet_array->ifnet_count == 0) 3507 return 1; 3508 else 3509 return 0; 3510 } 3511 3512 void 3513 ifa_marker_init(struct ifaddr_marker *mark, struct ifnet *ifp) 3514 { 3515 struct ifaddr *ifa; 3516 3517 memset(mark, 0, sizeof(*mark)); 3518 ifa = &mark->ifa; 3519 3520 mark->ifac.ifa = ifa; 3521 3522 ifa->ifa_addr = &mark->addr; 3523 ifa->ifa_dstaddr = &mark->dstaddr; 3524 ifa->ifa_netmask = &mark->netmask; 3525 ifa->ifa_ifp = ifp; 3526 } 3527 3528 static int 3529 if_ringcnt_fixup(int ring_cnt, int ring_cntmax) 3530 { 3531 3532 KASSERT(ring_cntmax > 0, ("invalid ring count max %d", ring_cntmax)); 3533 if (ring_cnt == 1 || ring_cntmax == 1 || netisr_ncpus == 1) 3534 return (1); 3535 3536 if (ring_cnt <= 0 || ring_cnt > ring_cntmax) 3537 ring_cnt = ring_cntmax; 3538 if (ring_cnt > netisr_ncpus) 3539 ring_cnt = netisr_ncpus; 3540 return (ring_cnt); 3541 } 3542 3543 static void 3544 if_ringmap_set_grid(device_t dev, struct if_ringmap *rm, int grid) 3545 { 3546 int i, offset; 3547 3548 KASSERT(grid > 0, ("invalid if_ringmap grid %d", grid)); 3549 rm->rm_grid = grid; 3550 3551 offset = (rm->rm_grid * device_get_unit(dev)) % netisr_ncpus; 3552 for (i = 0; i < rm->rm_cnt; ++i) 3553 rm->rm_cpumap[i] = (offset + i) % netisr_ncpus; 3554 } 3555 3556 struct if_ringmap * 3557 if_ringmap_alloc(device_t dev, int ring_cnt, int ring_cntmax) 3558 { 3559 struct if_ringmap *rm; 3560 int i, grid = 0; 3561 3562 ring_cnt = if_ringcnt_fixup(ring_cnt, ring_cntmax); 3563 rm = kmalloc(__offsetof(struct if_ringmap, rm_cpumap[ring_cnt]), 3564 M_DEVBUF, M_WAITOK | M_ZERO); 3565 3566 rm->rm_cnt = ring_cnt; 3567 for (i = 0; i < netisr_ncpus; ++i) { 3568 if (netisr_ncpus % (i + 1) != 0) 3569 continue; 3570 3571 if (rm->rm_cnt > netisr_ncpus / (i + 2)) { 3572 grid = netisr_ncpus / (i + 1); 3573 if (rm->rm_cnt > grid) 3574 rm->rm_cnt = grid; 3575 break; 3576 } 3577 } 3578 if_ringmap_set_grid(dev, rm, grid); 3579 3580 return (rm); 3581 } 3582 3583 void 3584 if_ringmap_free(struct if_ringmap *rm) 3585 { 3586 3587 kfree(rm, M_DEVBUF); 3588 } 3589 3590 void 3591 if_ringmap_align(device_t dev, struct if_ringmap *rm0, struct if_ringmap *rm1) 3592 { 3593 3594 if (rm0->rm_grid > rm1->rm_grid) 3595 if_ringmap_set_grid(dev, rm1, rm0->rm_grid); 3596 else if (rm0->rm_grid < rm1->rm_grid) 3597 if_ringmap_set_grid(dev, rm0, rm1->rm_grid); 3598 } 3599 3600 void 3601 if_ringmap_match(device_t dev, struct if_ringmap *rm0, struct if_ringmap *rm1) 3602 { 3603 3604 if (rm0->rm_grid == netisr_ncpus || rm1->rm_grid == netisr_ncpus) 3605 return; 3606 if_ringmap_align(dev, rm0, rm1); 3607 } 3608 3609 int 3610 if_ringmap_count(const struct if_ringmap *rm) 3611 { 3612 3613 return (rm->rm_cnt); 3614 } 3615 3616 int 3617 if_ringmap_cpumap(const struct if_ringmap *rm, int ring) 3618 { 3619 3620 KASSERT(ring >= 0 && ring < rm->rm_cnt, ("invalid ring %d", ring)); 3621 return (rm->rm_cpumap[ring]); 3622 } 3623 3624 void 3625 if_ringmap_rdrtable(const struct if_ringmap *rm, int table[], int table_nent) 3626 { 3627 int i, grid_idx, grid_cnt, patch_off, patch_cnt, ncopy; 3628 3629 KASSERT(table_nent > 0 && (table_nent & NETISR_CPUMASK) == 0, 3630 ("invalid redirect table entries %d", table_nent)); 3631 3632 grid_idx = 0; 3633 for (i = 0; i < NETISR_CPUMAX; ++i) { 3634 table[i] = grid_idx++ % rm->rm_cnt; 3635 3636 if (grid_idx == rm->rm_grid) 3637 grid_idx = 0; 3638 } 3639 3640 /* 3641 * Make the ring distributed more evenly for the remainder of each 3642 * grid. 3643 */ 3644 patch_cnt = rm->rm_grid % rm->rm_cnt; 3645 if (patch_cnt == 0) 3646 goto done; 3647 patch_off = rm->rm_grid - (rm->rm_grid % rm->rm_cnt); 3648 3649 grid_cnt = roundup(NETISR_CPUMAX, rm->rm_grid) / rm->rm_grid; 3650 grid_idx = 0; 3651 for (i = 0; i < grid_cnt; ++i) { 3652 int j; 3653 3654 for (j = 0; j < patch_cnt; ++j) { 3655 int fix_idx; 3656 3657 fix_idx = (i * rm->rm_grid) + patch_off + j; 3658 if (fix_idx >= NETISR_CPUMAX) 3659 goto done; 3660 table[fix_idx] = grid_idx++ % rm->rm_cnt; 3661 } 3662 } 3663 done: 3664 ncopy = table_nent / NETISR_CPUMAX; 3665 for (i = 1; i < ncopy; ++i) { 3666 memcpy(&table[i * NETISR_CPUMAX], table, 3667 NETISR_CPUMAX * sizeof(table[0])); 3668 } 3669 if (if_ringmap_dumprdr) { 3670 for (i = 0; i < table_nent; ++i) { 3671 if (i != 0 && i % 16 == 0) 3672 kprintf("\n"); 3673 kprintf("%03d ", table[i]); 3674 } 3675 kprintf("\n"); 3676 } 3677 } 3678 3679 int 3680 if_ringmap_cpumap_sysctl(SYSCTL_HANDLER_ARGS) 3681 { 3682 struct if_ringmap *rm = arg1; 3683 int i, error = 0; 3684 3685 for (i = 0; i < rm->rm_cnt; ++i) { 3686 int cpu = rm->rm_cpumap[i]; 3687 3688 error = SYSCTL_OUT(req, &cpu, sizeof(cpu)); 3689 if (error) 3690 break; 3691 } 3692 return (error); 3693 } 3694 3695 int 3696 if_ring_count2(int ring_cnt, int ring_cntmax) 3697 { 3698 3699 ring_cnt = if_ringcnt_fixup(ring_cnt, ring_cntmax); 3700 return (1 << (fls(ring_cnt) - 1)); 3701 } 3702