1 /* 2 * Copyright (c) 1980, 1986, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)if.c 8.3 (Berkeley) 1/4/94 30 * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $ 31 */ 32 33 #include "opt_inet6.h" 34 #include "opt_inet.h" 35 #include "opt_ifpoll.h" 36 37 #include <sys/param.h> 38 #include <sys/malloc.h> 39 #include <sys/mbuf.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/caps.h> 43 #include <sys/protosw.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/socketops.h> 47 #include <sys/kernel.h> 48 #include <sys/ktr.h> 49 #include <sys/mutex.h> 50 #include <sys/lock.h> 51 #include <sys/sockio.h> 52 #include <sys/syslog.h> 53 #include <sys/sysctl.h> 54 #include <sys/domain.h> 55 #include <sys/thread.h> 56 #include <sys/serialize.h> 57 #include <sys/bus.h> 58 #include <sys/jail.h> 59 60 #include <sys/thread2.h> 61 #include <sys/msgport2.h> 62 #include <sys/mutex2.h> 63 64 #include <net/if.h> 65 #include <net/if_arp.h> 66 #include <net/if_dl.h> 67 #include <net/if_types.h> 68 #include <net/if_var.h> 69 #include <net/if_ringmap.h> 70 #include <net/ifq_var.h> 71 #include <net/radix.h> 72 #include <net/route.h> 73 #include <net/if_clone.h> 74 #include <net/netisr2.h> 75 #include <net/netmsg2.h> 76 77 #include <machine/atomic.h> 78 #include <machine/stdarg.h> 79 #include <machine/smp.h> 80 81 #if defined(INET) || defined(INET6) 82 #include <netinet/in.h> 83 #include <netinet/in_var.h> 84 #include <netinet/if_ether.h> 85 #ifdef INET6 86 #include <netinet6/in6_var.h> 87 #include <netinet6/in6_ifattach.h> 88 #endif /* INET6 */ 89 #endif /* INET || INET6 */ 90 91 struct netmsg_ifaddr { 92 struct netmsg_base base; 93 struct ifaddr *ifa; 94 struct ifnet *ifp; 95 int tail; 96 }; 97 98 struct ifsubq_stage_head { 99 TAILQ_HEAD(, ifsubq_stage) stg_head; 100 } __cachealign; 101 102 struct if_ringmap { 103 int rm_cnt; 104 int rm_grid; 105 int rm_cpumap[]; 106 }; 107 108 #define RINGMAP_FLAG_NONE 0x0 109 #define RINGMAP_FLAG_POWEROF2 0x1 110 111 /* 112 * System initialization 113 */ 114 static void if_attachdomain(void *); 115 static void if_attachdomain1(struct ifnet *); 116 static int ifconf(u_long, caddr_t, struct ucred *); 117 static void ifinit(void *); 118 static void ifnetinit(void *); 119 static void if_slowtimo(void *); 120 static int if_rtdel(struct radix_node *, void *); 121 static void if_slowtimo_dispatch(netmsg_t); 122 123 /* Helper functions */ 124 static void ifsq_watchdog_reset(struct ifsubq_watchdog *); 125 static int if_delmulti_serialized(struct ifnet *, struct sockaddr *); 126 static struct ifnet_array *ifnet_array_alloc(int); 127 static void ifnet_array_free(struct ifnet_array *); 128 static struct ifnet_array *ifnet_array_add(struct ifnet *, 129 const struct ifnet_array *); 130 static struct ifnet_array *ifnet_array_del(struct ifnet *, 131 const struct ifnet_array *); 132 static struct ifg_group *if_creategroup(const char *); 133 static int if_destroygroup(struct ifg_group *); 134 static int if_delgroup_locked(struct ifnet *, const char *); 135 static int if_getgroups(struct ifgroupreq *, struct ifnet *); 136 static int if_getgroupmembers(struct ifgroupreq *); 137 138 #ifdef INET6 139 /* 140 * XXX: declare here to avoid to include many inet6 related files.. 141 * should be more generalized? 142 */ 143 extern void nd6_setmtu(struct ifnet *); 144 #endif 145 146 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); 147 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); 148 SYSCTL_NODE(_net_link, OID_AUTO, ringmap, CTLFLAG_RW, 0, "link ringmap"); 149 150 static int ifsq_stage_cntmax = 16; 151 TUNABLE_INT("net.link.stage_cntmax", &ifsq_stage_cntmax); 152 SYSCTL_INT(_net_link, OID_AUTO, stage_cntmax, CTLFLAG_RW, 153 &ifsq_stage_cntmax, 0, "ifq staging packet count max"); 154 155 static int if_stats_compat = 0; 156 SYSCTL_INT(_net_link, OID_AUTO, stats_compat, CTLFLAG_RW, 157 &if_stats_compat, 0, "Compat the old ifnet stats"); 158 159 static int if_ringmap_dumprdr = 0; 160 SYSCTL_INT(_net_link_ringmap, OID_AUTO, dump_rdr, CTLFLAG_RW, 161 &if_ringmap_dumprdr, 0, "dump redirect table"); 162 163 /* Interface description */ 164 static unsigned int ifdescr_maxlen = 1024; 165 SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW, 166 &ifdescr_maxlen, 0, 167 "administrative maximum length for interface description"); 168 169 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL); 170 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, ifnetinit, NULL); 171 172 static if_com_alloc_t *if_com_alloc[256]; 173 static if_com_free_t *if_com_free[256]; 174 175 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); 176 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); 177 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure"); 178 MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions"); 179 180 int ifqmaxlen = IFQ_MAXLEN; 181 struct ifnethead ifnet = TAILQ_HEAD_INITIALIZER(ifnet); 182 struct ifgrouphead ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head); 183 static struct lock ifgroup_lock; 184 185 static struct ifnet_array ifnet_array0; 186 static struct ifnet_array *ifnet_array = &ifnet_array0; 187 188 static struct callout if_slowtimo_timer; 189 static struct netmsg_base if_slowtimo_netmsg; 190 191 int if_index = 0; 192 struct ifnet **ifindex2ifnet = NULL; 193 static struct mtx ifnet_mtx = MTX_INITIALIZER("ifnet"); 194 195 static struct ifsubq_stage_head ifsubq_stage_heads[MAXCPU]; 196 197 #ifdef notyet 198 #define IFQ_KTR_STRING "ifq=%p" 199 #define IFQ_KTR_ARGS struct ifaltq *ifq 200 #ifndef KTR_IFQ 201 #define KTR_IFQ KTR_ALL 202 #endif 203 KTR_INFO_MASTER(ifq); 204 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS); 205 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS); 206 #define logifq(name, arg) KTR_LOG(ifq_ ## name, arg) 207 208 #define IF_START_KTR_STRING "ifp=%p" 209 #define IF_START_KTR_ARGS struct ifnet *ifp 210 #ifndef KTR_IF_START 211 #define KTR_IF_START KTR_ALL 212 #endif 213 KTR_INFO_MASTER(if_start); 214 KTR_INFO(KTR_IF_START, if_start, run, 0, 215 IF_START_KTR_STRING, IF_START_KTR_ARGS); 216 KTR_INFO(KTR_IF_START, if_start, sched, 1, 217 IF_START_KTR_STRING, IF_START_KTR_ARGS); 218 KTR_INFO(KTR_IF_START, if_start, avoid, 2, 219 IF_START_KTR_STRING, IF_START_KTR_ARGS); 220 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3, 221 IF_START_KTR_STRING, IF_START_KTR_ARGS); 222 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4, 223 IF_START_KTR_STRING, IF_START_KTR_ARGS); 224 #define logifstart(name, arg) KTR_LOG(if_start_ ## name, arg) 225 #endif /* notyet */ 226 227 /* 228 * Network interface utility routines. 229 * 230 * Routines with ifa_ifwith* names take sockaddr *'s as 231 * parameters. 232 */ 233 /* ARGSUSED */ 234 static void 235 ifinit(void *dummy) 236 { 237 lockinit(&ifgroup_lock, "ifgroup", 0, 0); 238 239 callout_init_mp(&if_slowtimo_timer); 240 netmsg_init(&if_slowtimo_netmsg, NULL, &netisr_adone_rport, 241 MSGF_PRIORITY, if_slowtimo_dispatch); 242 243 /* Start if_slowtimo */ 244 lwkt_sendmsg(netisr_cpuport(0), &if_slowtimo_netmsg.lmsg); 245 } 246 247 static void 248 ifsq_ifstart_ipifunc(void *arg) 249 { 250 struct ifaltq_subque *ifsq = arg; 251 struct lwkt_msg *lmsg = ifsq_get_ifstart_lmsg(ifsq, mycpuid); 252 253 crit_enter(); 254 if (lmsg->ms_flags & MSGF_DONE) 255 lwkt_sendmsg_oncpu(netisr_cpuport(mycpuid), lmsg); 256 crit_exit(); 257 } 258 259 static __inline void 260 ifsq_stage_remove(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) 261 { 262 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); 263 TAILQ_REMOVE(&head->stg_head, stage, stg_link); 264 stage->stg_flags &= ~(IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED); 265 stage->stg_cnt = 0; 266 stage->stg_len = 0; 267 } 268 269 static __inline void 270 ifsq_stage_insert(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) 271 { 272 KKASSERT((stage->stg_flags & 273 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); 274 stage->stg_flags |= IFSQ_STAGE_FLAG_QUED; 275 TAILQ_INSERT_TAIL(&head->stg_head, stage, stg_link); 276 } 277 278 /* 279 * Schedule ifnet.if_start on the subqueue owner CPU 280 */ 281 static void 282 ifsq_ifstart_schedule(struct ifaltq_subque *ifsq, int force) 283 { 284 int cpu; 285 286 if (!force && curthread->td_type == TD_TYPE_NETISR && 287 ifsq_stage_cntmax > 0) { 288 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); 289 290 stage->stg_cnt = 0; 291 stage->stg_len = 0; 292 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) 293 ifsq_stage_insert(&ifsubq_stage_heads[mycpuid], stage); 294 stage->stg_flags |= IFSQ_STAGE_FLAG_SCHED; 295 return; 296 } 297 298 cpu = ifsq_get_cpuid(ifsq); 299 if (cpu != mycpuid) 300 lwkt_send_ipiq(globaldata_find(cpu), ifsq_ifstart_ipifunc, ifsq); 301 else 302 ifsq_ifstart_ipifunc(ifsq); 303 } 304 305 /* 306 * NOTE: 307 * This function will release ifnet.if_start subqueue interlock, 308 * if ifnet.if_start for the subqueue does not need to be scheduled 309 */ 310 static __inline int 311 ifsq_ifstart_need_schedule(struct ifaltq_subque *ifsq, int running) 312 { 313 if (!running || ifsq_is_empty(ifsq) 314 #ifdef ALTQ 315 || ifsq->ifsq_altq->altq_tbr != NULL 316 #endif 317 ) { 318 ALTQ_SQ_LOCK(ifsq); 319 /* 320 * ifnet.if_start subqueue interlock is released, if: 321 * 1) Hardware can not take any packets, due to 322 * o interface is marked down 323 * o hardware queue is full (ifsq_is_oactive) 324 * Under the second situation, hardware interrupt 325 * or polling(4) will call/schedule ifnet.if_start 326 * on the subqueue when hardware queue is ready 327 * 2) There is no packet in the subqueue. 328 * Further ifq_dispatch or ifq_handoff will call/ 329 * schedule ifnet.if_start on the subqueue. 330 * 3) TBR is used and it does not allow further 331 * dequeueing. 332 * TBR callout will call ifnet.if_start on the 333 * subqueue. 334 */ 335 if (!running || !ifsq_data_ready(ifsq)) { 336 ifsq_clr_started(ifsq); 337 ALTQ_SQ_UNLOCK(ifsq); 338 return 0; 339 } 340 ALTQ_SQ_UNLOCK(ifsq); 341 } 342 return 1; 343 } 344 345 static void 346 ifsq_ifstart_dispatch(netmsg_t msg) 347 { 348 struct lwkt_msg *lmsg = &msg->base.lmsg; 349 struct ifaltq_subque *ifsq = lmsg->u.ms_resultp; 350 struct ifnet *ifp = ifsq_get_ifp(ifsq); 351 struct globaldata *gd = mycpu; 352 int running = 0, need_sched; 353 354 crit_enter_gd(gd); 355 356 lwkt_replymsg(lmsg, 0); /* reply ASAP */ 357 358 if (gd->gd_cpuid != ifsq_get_cpuid(ifsq)) { 359 /* 360 * We need to chase the subqueue owner CPU change. 361 */ 362 ifsq_ifstart_schedule(ifsq, 1); 363 crit_exit_gd(gd); 364 return; 365 } 366 367 ifsq_serialize_hw(ifsq); 368 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { 369 ifp->if_start(ifp, ifsq); 370 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 371 running = 1; 372 } 373 need_sched = ifsq_ifstart_need_schedule(ifsq, running); 374 ifsq_deserialize_hw(ifsq); 375 376 if (need_sched) { 377 /* 378 * More data need to be transmitted, ifnet.if_start is 379 * scheduled on the subqueue owner CPU, and we keep going. 380 * NOTE: ifnet.if_start subqueue interlock is not released. 381 */ 382 ifsq_ifstart_schedule(ifsq, 0); 383 } 384 385 crit_exit_gd(gd); 386 } 387 388 /* Device driver ifnet.if_start helper function */ 389 void 390 ifsq_devstart(struct ifaltq_subque *ifsq) 391 { 392 struct ifnet *ifp = ifsq_get_ifp(ifsq); 393 int running = 0; 394 395 ASSERT_ALTQ_SQ_SERIALIZED_HW(ifsq); 396 397 ALTQ_SQ_LOCK(ifsq); 398 if (ifsq_is_started(ifsq) || !ifsq_data_ready(ifsq)) { 399 ALTQ_SQ_UNLOCK(ifsq); 400 return; 401 } 402 ifsq_set_started(ifsq); 403 ALTQ_SQ_UNLOCK(ifsq); 404 405 ifp->if_start(ifp, ifsq); 406 407 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 408 running = 1; 409 410 if (ifsq_ifstart_need_schedule(ifsq, running)) { 411 /* 412 * More data need to be transmitted, ifnet.if_start is 413 * scheduled on ifnet's CPU, and we keep going. 414 * NOTE: ifnet.if_start interlock is not released. 415 */ 416 ifsq_ifstart_schedule(ifsq, 0); 417 } 418 } 419 420 void 421 if_devstart(struct ifnet *ifp) 422 { 423 ifsq_devstart(ifq_get_subq_default(&ifp->if_snd)); 424 } 425 426 /* Device driver ifnet.if_start schedule helper function */ 427 void 428 ifsq_devstart_sched(struct ifaltq_subque *ifsq) 429 { 430 ifsq_ifstart_schedule(ifsq, 1); 431 } 432 433 void 434 if_devstart_sched(struct ifnet *ifp) 435 { 436 ifsq_devstart_sched(ifq_get_subq_default(&ifp->if_snd)); 437 } 438 439 static void 440 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 441 { 442 lwkt_serialize_enter(ifp->if_serializer); 443 } 444 445 static void 446 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 447 { 448 lwkt_serialize_exit(ifp->if_serializer); 449 } 450 451 static int 452 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 453 { 454 return lwkt_serialize_try(ifp->if_serializer); 455 } 456 457 #ifdef INVARIANTS 458 static void 459 if_default_serialize_assert(struct ifnet *ifp, 460 enum ifnet_serialize slz __unused, 461 boolean_t serialized) 462 { 463 if (serialized) 464 ASSERT_SERIALIZED(ifp->if_serializer); 465 else 466 ASSERT_NOT_SERIALIZED(ifp->if_serializer); 467 } 468 #endif 469 470 /* 471 * Attach an interface to the list of "active" interfaces. 472 * 473 * The serializer is optional. 474 */ 475 void 476 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer) 477 { 478 unsigned socksize; 479 int namelen, masklen; 480 struct sockaddr_dl *sdl, *sdl_addr; 481 struct ifaddr *ifa; 482 struct ifaltq *ifq; 483 struct ifnet **old_ifindex2ifnet = NULL; 484 struct ifnet_array *old_ifnet_array; 485 int i, q, qlen; 486 char qlenname[64]; 487 488 static int if_indexlim = 8; 489 490 if (ifp->if_serialize != NULL) { 491 KASSERT(ifp->if_deserialize != NULL && 492 ifp->if_tryserialize != NULL && 493 ifp->if_serialize_assert != NULL, 494 ("serialize functions are partially setup")); 495 496 /* 497 * If the device supplies serialize functions, 498 * then clear if_serializer to catch any invalid 499 * usage of this field. 500 */ 501 KASSERT(serializer == NULL, 502 ("both serialize functions and default serializer " 503 "are supplied")); 504 ifp->if_serializer = NULL; 505 } else { 506 KASSERT(ifp->if_deserialize == NULL && 507 ifp->if_tryserialize == NULL && 508 ifp->if_serialize_assert == NULL, 509 ("serialize functions are partially setup")); 510 ifp->if_serialize = if_default_serialize; 511 ifp->if_deserialize = if_default_deserialize; 512 ifp->if_tryserialize = if_default_tryserialize; 513 #ifdef INVARIANTS 514 ifp->if_serialize_assert = if_default_serialize_assert; 515 #endif 516 517 /* 518 * The serializer can be passed in from the device, 519 * allowing the same serializer to be used for both 520 * the interrupt interlock and the device queue. 521 * If not specified, the netif structure will use an 522 * embedded serializer. 523 */ 524 if (serializer == NULL) { 525 serializer = &ifp->if_default_serializer; 526 lwkt_serialize_init(serializer); 527 } 528 ifp->if_serializer = serializer; 529 } 530 531 /* 532 * Make if_addrhead available on all CPUs, since they 533 * could be accessed by any threads. 534 */ 535 ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead), 536 M_IFADDR, M_WAITOK | M_ZERO); 537 for (i = 0; i < ncpus; ++i) 538 TAILQ_INIT(&ifp->if_addrheads[i]); 539 540 TAILQ_INIT(&ifp->if_multiaddrs); 541 TAILQ_INIT(&ifp->if_groups); 542 getmicrotime(&ifp->if_lastchange); 543 if_addgroup(ifp, IFG_ALL); 544 545 /* 546 * create a Link Level name for this device 547 */ 548 namelen = strlen(ifp->if_xname); 549 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; 550 socksize = masklen + ifp->if_addrlen; 551 if (socksize < sizeof(*sdl)) 552 socksize = sizeof(*sdl); 553 socksize = RT_ROUNDUP(socksize); 554 ifa = ifa_create(sizeof(struct ifaddr) + 2 * socksize); 555 sdl = sdl_addr = (struct sockaddr_dl *)(ifa + 1); 556 sdl->sdl_len = socksize; 557 sdl->sdl_family = AF_LINK; 558 bcopy(ifp->if_xname, sdl->sdl_data, namelen); 559 sdl->sdl_nlen = namelen; 560 sdl->sdl_type = ifp->if_type; 561 ifp->if_lladdr = ifa; 562 ifa->ifa_ifp = ifp; 563 ifa->ifa_addr = (struct sockaddr *)sdl; 564 sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); 565 ifa->ifa_netmask = (struct sockaddr *)sdl; 566 sdl->sdl_len = masklen; 567 while (namelen != 0) 568 sdl->sdl_data[--namelen] = 0xff; 569 ifa_iflink(ifa, ifp, 0 /* Insert head */); 570 571 /* 572 * Make if_data available on all CPUs, since they could 573 * be updated by hardware interrupt routing, which could 574 * be bound to any CPU. 575 */ 576 ifp->if_data_pcpu = kmalloc(ncpus * sizeof(struct ifdata_pcpu), 577 M_DEVBUF, 578 M_WAITOK | M_ZERO | M_CACHEALIGN); 579 580 if (ifp->if_mapsubq == NULL) 581 ifp->if_mapsubq = ifq_mapsubq_default; 582 583 ifq = &ifp->if_snd; 584 ifq->altq_type = 0; 585 ifq->altq_disc = NULL; 586 ifq->altq_flags &= ALTQF_CANTCHANGE; 587 ifq->altq_tbr = NULL; 588 ifq->altq_ifp = ifp; 589 590 if (ifq->altq_subq_cnt <= 0) 591 ifq->altq_subq_cnt = 1; 592 ifq->altq_subq = 593 kmalloc(ifq->altq_subq_cnt * sizeof(struct ifaltq_subque), 594 M_DEVBUF, 595 M_WAITOK | M_ZERO | M_CACHEALIGN); 596 597 if (ifq->altq_maxlen == 0) { 598 if_printf(ifp, "driver didn't set altq_maxlen\n"); 599 ifq_set_maxlen(ifq, ifqmaxlen); 600 } 601 602 /* Allow user to override driver's setting. */ 603 ksnprintf(qlenname, sizeof(qlenname), "net.%s.qlenmax", ifp->if_xname); 604 qlen = -1; 605 TUNABLE_INT_FETCH(qlenname, &qlen); 606 if (qlen > 0) { 607 if_printf(ifp, "qlenmax -> %d\n", qlen); 608 ifq_set_maxlen(ifq, qlen); 609 } 610 611 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 612 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 613 614 ALTQ_SQ_LOCK_INIT(ifsq); 615 ifsq->ifsq_index = q; 616 617 ifsq->ifsq_altq = ifq; 618 ifsq->ifsq_ifp = ifp; 619 620 ifsq->ifsq_maxlen = ifq->altq_maxlen; 621 ifsq->ifsq_maxbcnt = ifsq->ifsq_maxlen * MCLBYTES; 622 ifsq->ifsq_prepended = NULL; 623 ifsq->ifsq_started = 0; 624 ifsq->ifsq_hw_oactive = 0; 625 ifsq_set_cpuid(ifsq, 0); 626 if (ifp->if_serializer != NULL) 627 ifsq_set_hw_serialize(ifsq, ifp->if_serializer); 628 629 /* XXX: netisr_ncpus */ 630 ifsq->ifsq_stage = 631 kmalloc(ncpus * sizeof(struct ifsubq_stage), 632 M_DEVBUF, 633 M_WAITOK | M_ZERO | M_CACHEALIGN); 634 for (i = 0; i < ncpus; ++i) 635 ifsq->ifsq_stage[i].stg_subq = ifsq; 636 637 /* 638 * Allocate one if_start message for each CPU, since 639 * the hardware TX ring could be assigned to any CPU. 640 * 641 * NOTE: 642 * If the hardware TX ring polling CPU and the hardware 643 * TX ring interrupt CPU are same, one if_start message 644 * should be enough. 645 */ 646 ifsq->ifsq_ifstart_nmsg = 647 kmalloc(ncpus * sizeof(struct netmsg_base), 648 M_LWKTMSG, M_WAITOK); 649 for (i = 0; i < ncpus; ++i) { 650 netmsg_init(&ifsq->ifsq_ifstart_nmsg[i], NULL, 651 &netisr_adone_rport, 0, ifsq_ifstart_dispatch); 652 ifsq->ifsq_ifstart_nmsg[i].lmsg.u.ms_resultp = ifsq; 653 } 654 } 655 ifq_set_classic(ifq); 656 657 /* 658 * Increase mbuf cluster/jcluster limits for the mbufs that 659 * could sit on the device queues for quite some time. 660 */ 661 if (ifp->if_nmbclusters > 0) 662 mcl_inclimit(ifp->if_nmbclusters); 663 if (ifp->if_nmbjclusters > 0) 664 mjcl_inclimit(ifp->if_nmbjclusters); 665 666 /* 667 * Install this ifp into ifindex2inet, ifnet queue and ifnet 668 * array after it is setup. 669 * 670 * Protect ifindex2ifnet, ifnet queue and ifnet array changes 671 * by ifnet lock, so that non-netisr threads could get a 672 * consistent view. 673 */ 674 ifnet_lock(); 675 676 /* Don't update if_index until ifindex2ifnet is setup */ 677 ifp->if_index = if_index + 1; 678 sdl_addr->sdl_index = ifp->if_index; 679 680 /* 681 * Install this ifp into ifindex2ifnet 682 */ 683 if (ifindex2ifnet == NULL || ifp->if_index >= if_indexlim) { 684 unsigned int n; 685 struct ifnet **q; 686 687 /* 688 * Grow ifindex2ifnet 689 */ 690 if_indexlim <<= 1; 691 n = if_indexlim * sizeof(*q); 692 q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO); 693 if (ifindex2ifnet != NULL) { 694 bcopy(ifindex2ifnet, q, n/2); 695 /* Free old ifindex2ifnet after sync all netisrs */ 696 old_ifindex2ifnet = ifindex2ifnet; 697 } 698 ifindex2ifnet = q; 699 } 700 ifindex2ifnet[ifp->if_index] = ifp; 701 /* 702 * Update if_index after this ifp is installed into ifindex2ifnet, 703 * so that netisrs could get a consistent view of ifindex2ifnet. 704 */ 705 cpu_sfence(); 706 if_index = ifp->if_index; 707 708 /* 709 * Install this ifp into ifnet array. 710 */ 711 /* Free old ifnet array after sync all netisrs */ 712 old_ifnet_array = ifnet_array; 713 ifnet_array = ifnet_array_add(ifp, old_ifnet_array); 714 715 /* 716 * Install this ifp into ifnet queue. 717 */ 718 TAILQ_INSERT_TAIL(&ifnetlist, ifp, if_link); 719 720 ifnet_unlock(); 721 722 /* 723 * Sync all netisrs so that the old ifindex2ifnet and ifnet array 724 * are no longer accessed and we can free them safely later on. 725 */ 726 netmsg_service_sync(); 727 if (old_ifindex2ifnet != NULL) 728 kfree(old_ifindex2ifnet, M_IFADDR); 729 ifnet_array_free(old_ifnet_array); 730 731 if (!SLIST_EMPTY(&domains)) 732 if_attachdomain1(ifp); 733 734 /* Announce the interface. */ 735 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 736 devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); 737 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 738 } 739 740 static void 741 if_attachdomain(void *dummy) 742 { 743 struct ifnet *ifp; 744 745 ifnet_lock(); 746 TAILQ_FOREACH(ifp, &ifnetlist, if_list) 747 if_attachdomain1(ifp); 748 ifnet_unlock(); 749 } 750 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, 751 if_attachdomain, NULL); 752 753 static void 754 if_attachdomain1(struct ifnet *ifp) 755 { 756 struct domain *dp; 757 758 crit_enter(); 759 760 /* address family dependent data region */ 761 bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); 762 SLIST_FOREACH(dp, &domains, dom_next) 763 if (dp->dom_ifattach) 764 ifp->if_afdata[dp->dom_family] = 765 (*dp->dom_ifattach)(ifp); 766 crit_exit(); 767 } 768 769 /* 770 * Purge all addresses whose type is _not_ AF_LINK 771 */ 772 static void 773 if_purgeaddrs_nolink_dispatch(netmsg_t nmsg) 774 { 775 struct ifnet *ifp = nmsg->lmsg.u.ms_resultp; 776 struct ifaddr_container *ifac, *next; 777 778 ASSERT_NETISR0; 779 780 /* 781 * The ifaddr processing in the following loop will block, 782 * however, this function is called in netisr0, in which 783 * ifaddr list changes happen, so we don't care about the 784 * blockness of the ifaddr processing here. 785 */ 786 TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid], 787 ifa_link, next) { 788 struct ifaddr *ifa = ifac->ifa; 789 790 /* Ignore marker */ 791 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 792 continue; 793 794 /* Leave link ifaddr as it is */ 795 if (ifa->ifa_addr->sa_family == AF_LINK) 796 continue; 797 #ifdef INET 798 /* XXX: Ugly!! ad hoc just for INET */ 799 if (ifa->ifa_addr->sa_family == AF_INET) { 800 struct ifaliasreq ifr; 801 struct sockaddr_in saved_addr, saved_dst; 802 #ifdef IFADDR_DEBUG_VERBOSE 803 int i; 804 805 kprintf("purge in4 addr %p: ", ifa); 806 for (i = 0; i < ncpus; ++i) { 807 kprintf("%d ", 808 ifa->ifa_containers[i].ifa_refcnt); 809 } 810 kprintf("\n"); 811 #endif 812 813 /* Save information for panic. */ 814 memcpy(&saved_addr, ifa->ifa_addr, sizeof(saved_addr)); 815 if (ifa->ifa_dstaddr != NULL) { 816 memcpy(&saved_dst, ifa->ifa_dstaddr, 817 sizeof(saved_dst)); 818 } else { 819 memset(&saved_dst, 0, sizeof(saved_dst)); 820 } 821 822 bzero(&ifr, sizeof ifr); 823 ifr.ifra_addr = *ifa->ifa_addr; 824 if (ifa->ifa_dstaddr) 825 ifr.ifra_broadaddr = *ifa->ifa_dstaddr; 826 if (in_control(SIOCDIFADDR, (caddr_t)&ifr, ifp, 827 NULL) == 0) 828 continue; 829 830 /* MUST NOT HAPPEN */ 831 panic("%s: in_control failed %x, dst %x", ifp->if_xname, 832 ntohl(saved_addr.sin_addr.s_addr), 833 ntohl(saved_dst.sin_addr.s_addr)); 834 } 835 #endif /* INET */ 836 #ifdef INET6 837 if (ifa->ifa_addr->sa_family == AF_INET6) { 838 #ifdef IFADDR_DEBUG_VERBOSE 839 int i; 840 841 kprintf("purge in6 addr %p: ", ifa); 842 for (i = 0; i < ncpus; ++i) { 843 kprintf("%d ", 844 ifa->ifa_containers[i].ifa_refcnt); 845 } 846 kprintf("\n"); 847 #endif 848 849 in6_purgeaddr(ifa); 850 /* ifp_addrhead is already updated */ 851 continue; 852 } 853 #endif /* INET6 */ 854 if_printf(ifp, "destroy ifaddr family %d\n", 855 ifa->ifa_addr->sa_family); 856 ifa_ifunlink(ifa, ifp); 857 ifa_destroy(ifa); 858 } 859 860 netisr_replymsg(&nmsg->base, 0); 861 } 862 863 void 864 if_purgeaddrs_nolink(struct ifnet *ifp) 865 { 866 struct netmsg_base nmsg; 867 868 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, 869 if_purgeaddrs_nolink_dispatch); 870 nmsg.lmsg.u.ms_resultp = ifp; 871 netisr_domsg(&nmsg, 0); 872 } 873 874 static void 875 ifq_stage_detach_handler(netmsg_t nmsg) 876 { 877 struct ifaltq *ifq = nmsg->lmsg.u.ms_resultp; 878 int q; 879 880 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 881 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 882 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); 883 884 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) 885 ifsq_stage_remove(&ifsubq_stage_heads[mycpuid], stage); 886 } 887 lwkt_replymsg(&nmsg->lmsg, 0); 888 } 889 890 static void 891 ifq_stage_detach(struct ifaltq *ifq) 892 { 893 struct netmsg_base base; 894 int cpu; 895 896 netmsg_init(&base, NULL, &curthread->td_msgport, 0, 897 ifq_stage_detach_handler); 898 base.lmsg.u.ms_resultp = ifq; 899 900 /* XXX netisr_ncpus */ 901 for (cpu = 0; cpu < ncpus; ++cpu) 902 lwkt_domsg(netisr_cpuport(cpu), &base.lmsg, 0); 903 } 904 905 struct netmsg_if_rtdel { 906 struct netmsg_base base; 907 struct ifnet *ifp; 908 }; 909 910 static void 911 if_rtdel_dispatch(netmsg_t msg) 912 { 913 struct netmsg_if_rtdel *rmsg = (void *)msg; 914 int i, cpu; 915 916 cpu = mycpuid; 917 ASSERT_NETISR_NCPUS(cpu); 918 919 for (i = 1; i <= AF_MAX; i++) { 920 struct radix_node_head *rnh; 921 922 if ((rnh = rt_tables[cpu][i]) == NULL) 923 continue; 924 rnh->rnh_walktree(rnh, if_rtdel, rmsg->ifp); 925 } 926 netisr_forwardmsg(&msg->base, cpu + 1); 927 } 928 929 /* 930 * Detach an interface, removing it from the 931 * list of "active" interfaces. 932 */ 933 void 934 if_detach(struct ifnet *ifp) 935 { 936 struct ifnet_array *old_ifnet_array; 937 struct ifg_list *ifgl; 938 struct netmsg_if_rtdel msg; 939 struct domain *dp; 940 int q; 941 942 /* Announce that the interface is gone. */ 943 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 944 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 945 devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); 946 947 /* 948 * Remove this ifp from ifindex2inet, ifnet queue and ifnet 949 * array before it is whacked. 950 * 951 * Protect ifindex2ifnet, ifnet queue and ifnet array changes 952 * by ifnet lock, so that non-netisr threads could get a 953 * consistent view. 954 */ 955 ifnet_lock(); 956 957 /* 958 * Remove this ifp from ifindex2ifnet and maybe decrement if_index. 959 */ 960 ifindex2ifnet[ifp->if_index] = NULL; 961 while (if_index > 0 && ifindex2ifnet[if_index] == NULL) 962 if_index--; 963 964 /* 965 * Remove this ifp from ifnet queue. 966 */ 967 TAILQ_REMOVE(&ifnetlist, ifp, if_link); 968 969 /* 970 * Remove this ifp from ifnet array. 971 */ 972 /* Free old ifnet array after sync all netisrs */ 973 old_ifnet_array = ifnet_array; 974 ifnet_array = ifnet_array_del(ifp, old_ifnet_array); 975 976 ifnet_unlock(); 977 978 ifgroup_lockmgr(LK_EXCLUSIVE); 979 while ((ifgl = TAILQ_FIRST(&ifp->if_groups)) != NULL) 980 if_delgroup_locked(ifp, ifgl->ifgl_group->ifg_group); 981 ifgroup_lockmgr(LK_RELEASE); 982 983 /* 984 * Sync all netisrs so that the old ifnet array is no longer 985 * accessed and we can free it safely later on. 986 */ 987 netmsg_service_sync(); 988 ifnet_array_free(old_ifnet_array); 989 990 /* 991 * Remove routes and flush queues. 992 */ 993 crit_enter(); 994 #ifdef IFPOLL_ENABLE 995 if (ifp->if_flags & IFF_NPOLLING) 996 ifpoll_deregister(ifp); 997 #endif 998 if_down(ifp); 999 1000 /* Decrease the mbuf clusters/jclusters limits increased by us */ 1001 if (ifp->if_nmbclusters > 0) 1002 mcl_inclimit(-ifp->if_nmbclusters); 1003 if (ifp->if_nmbjclusters > 0) 1004 mjcl_inclimit(-ifp->if_nmbjclusters); 1005 1006 #ifdef ALTQ 1007 if (ifq_is_enabled(&ifp->if_snd)) 1008 altq_disable(&ifp->if_snd); 1009 if (ifq_is_attached(&ifp->if_snd)) 1010 altq_detach(&ifp->if_snd); 1011 #endif 1012 1013 /* 1014 * Clean up all addresses. 1015 */ 1016 ifp->if_lladdr = NULL; 1017 1018 if_purgeaddrs_nolink(ifp); 1019 if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) { 1020 struct ifaddr *ifa; 1021 1022 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 1023 KASSERT(ifa->ifa_addr->sa_family == AF_LINK, 1024 ("non-link ifaddr is left on if_addrheads")); 1025 1026 ifa_ifunlink(ifa, ifp); 1027 ifa_destroy(ifa); 1028 KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]), 1029 ("there are still ifaddrs left on if_addrheads")); 1030 } 1031 1032 #ifdef INET 1033 /* 1034 * Remove all IPv4 kernel structures related to ifp. 1035 */ 1036 in_ifdetach(ifp); 1037 #endif 1038 1039 #ifdef INET6 1040 /* 1041 * Remove all IPv6 kernel structs related to ifp. This should be done 1042 * before removing routing entries below, since IPv6 interface direct 1043 * routes are expected to be removed by the IPv6-specific kernel API. 1044 * Otherwise, the kernel will detect some inconsistency and bark it. 1045 */ 1046 in6_ifdetach(ifp); 1047 #endif 1048 1049 /* 1050 * Delete all remaining routes using this interface 1051 */ 1052 netmsg_init(&msg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 1053 if_rtdel_dispatch); 1054 msg.ifp = ifp; 1055 netisr_domsg_global(&msg.base); 1056 1057 SLIST_FOREACH(dp, &domains, dom_next) { 1058 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) 1059 (*dp->dom_ifdetach)(ifp, 1060 ifp->if_afdata[dp->dom_family]); 1061 } 1062 1063 kfree(ifp->if_addrheads, M_IFADDR); 1064 1065 lwkt_synchronize_ipiqs("if_detach"); 1066 ifq_stage_detach(&ifp->if_snd); 1067 1068 for (q = 0; q < ifp->if_snd.altq_subq_cnt; ++q) { 1069 struct ifaltq_subque *ifsq = &ifp->if_snd.altq_subq[q]; 1070 1071 kfree(ifsq->ifsq_ifstart_nmsg, M_LWKTMSG); 1072 kfree(ifsq->ifsq_stage, M_DEVBUF); 1073 } 1074 kfree(ifp->if_snd.altq_subq, M_DEVBUF); 1075 1076 kfree(ifp->if_data_pcpu, M_DEVBUF); 1077 1078 crit_exit(); 1079 } 1080 1081 int 1082 ifgroup_lockmgr(u_int flags) 1083 { 1084 return lockmgr(&ifgroup_lock, flags); 1085 } 1086 1087 /* 1088 * Create an empty interface group. 1089 */ 1090 static struct ifg_group * 1091 if_creategroup(const char *groupname) 1092 { 1093 struct ifg_group *ifg; 1094 1095 ifg = kmalloc(sizeof(*ifg), M_IFNET, M_WAITOK); 1096 strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); 1097 ifg->ifg_refcnt = 0; 1098 ifg->ifg_carp_demoted = 0; 1099 TAILQ_INIT(&ifg->ifg_members); 1100 1101 ifgroup_lockmgr(LK_EXCLUSIVE); 1102 TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next); 1103 ifgroup_lockmgr(LK_RELEASE); 1104 1105 EVENTHANDLER_INVOKE(group_attach_event, ifg); 1106 1107 return (ifg); 1108 } 1109 1110 /* 1111 * Destroy an empty interface group. 1112 */ 1113 static int 1114 if_destroygroup(struct ifg_group *ifg) 1115 { 1116 KASSERT(ifg->ifg_refcnt == 0, 1117 ("trying to delete a non-empty interface group")); 1118 1119 ifgroup_lockmgr(LK_EXCLUSIVE); 1120 TAILQ_REMOVE(&ifg_head, ifg, ifg_next); 1121 ifgroup_lockmgr(LK_RELEASE); 1122 1123 EVENTHANDLER_INVOKE(group_detach_event, ifg); 1124 kfree(ifg, M_IFNET); 1125 1126 return (0); 1127 } 1128 1129 /* 1130 * Add the interface to a group. 1131 * The target group will be created if it doesn't exist. 1132 */ 1133 int 1134 if_addgroup(struct ifnet *ifp, const char *groupname) 1135 { 1136 struct ifg_list *ifgl; 1137 struct ifg_group *ifg; 1138 struct ifg_member *ifgm; 1139 1140 if (groupname[0] && 1141 groupname[strlen(groupname) - 1] >= '0' && 1142 groupname[strlen(groupname) - 1] <= '9') 1143 return (EINVAL); 1144 1145 ifgroup_lockmgr(LK_SHARED); 1146 1147 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1148 if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) { 1149 ifgroup_lockmgr(LK_RELEASE); 1150 return (EEXIST); 1151 } 1152 } 1153 1154 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) { 1155 if (strcmp(ifg->ifg_group, groupname) == 0) 1156 break; 1157 } 1158 1159 ifgroup_lockmgr(LK_RELEASE); 1160 1161 if (ifg == NULL) 1162 ifg = if_creategroup(groupname); 1163 1164 ifgl = kmalloc(sizeof(*ifgl), M_IFNET, M_WAITOK); 1165 ifgm = kmalloc(sizeof(*ifgm), M_IFNET, M_WAITOK); 1166 ifgl->ifgl_group = ifg; 1167 ifgm->ifgm_ifp = ifp; 1168 ifg->ifg_refcnt++; 1169 1170 ifgroup_lockmgr(LK_EXCLUSIVE); 1171 TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); 1172 TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); 1173 ifgroup_lockmgr(LK_RELEASE); 1174 1175 EVENTHANDLER_INVOKE(group_change_event, groupname); 1176 1177 return (0); 1178 } 1179 1180 /* 1181 * Remove the interface from a group. 1182 * The group will be destroyed if it becomes empty. 1183 * 1184 * The 'ifgroup_lock' must be hold exclusively when calling this. 1185 */ 1186 static int 1187 if_delgroup_locked(struct ifnet *ifp, const char *groupname) 1188 { 1189 struct ifg_list *ifgl; 1190 struct ifg_member *ifgm; 1191 1192 KKASSERT(lockstatus(&ifgroup_lock, curthread) == LK_EXCLUSIVE); 1193 1194 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1195 if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) 1196 break; 1197 } 1198 if (ifgl == NULL) 1199 return (ENOENT); 1200 1201 TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); 1202 1203 TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) { 1204 if (ifgm->ifgm_ifp == ifp) 1205 break; 1206 } 1207 1208 if (ifgm != NULL) { 1209 TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); 1210 1211 ifgroup_lockmgr(LK_RELEASE); 1212 EVENTHANDLER_INVOKE(group_change_event, groupname); 1213 ifgroup_lockmgr(LK_EXCLUSIVE); 1214 1215 kfree(ifgm, M_IFNET); 1216 ifgl->ifgl_group->ifg_refcnt--; 1217 } 1218 1219 if (ifgl->ifgl_group->ifg_refcnt == 0) { 1220 ifgroup_lockmgr(LK_RELEASE); 1221 if_destroygroup(ifgl->ifgl_group); 1222 ifgroup_lockmgr(LK_EXCLUSIVE); 1223 } 1224 1225 kfree(ifgl, M_IFNET); 1226 1227 return (0); 1228 } 1229 1230 int 1231 if_delgroup(struct ifnet *ifp, const char *groupname) 1232 { 1233 int error; 1234 1235 ifgroup_lockmgr(LK_EXCLUSIVE); 1236 error = if_delgroup_locked(ifp, groupname); 1237 ifgroup_lockmgr(LK_RELEASE); 1238 1239 return (error); 1240 } 1241 1242 /* 1243 * Store all the groups that the interface belongs to in memory 1244 * pointed to by data. 1245 */ 1246 static int 1247 if_getgroups(struct ifgroupreq *ifgr, struct ifnet *ifp) 1248 { 1249 struct ifg_list *ifgl; 1250 struct ifg_req *ifgrq, *p; 1251 int len, error; 1252 1253 len = 0; 1254 ifgroup_lockmgr(LK_SHARED); 1255 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 1256 len += sizeof(struct ifg_req); 1257 ifgroup_lockmgr(LK_RELEASE); 1258 1259 if (ifgr->ifgr_len == 0) { 1260 /* 1261 * Caller is asking how much memory should be allocated in 1262 * the next request in order to hold all the groups. 1263 */ 1264 ifgr->ifgr_len = len; 1265 return (0); 1266 } else if (ifgr->ifgr_len != len) { 1267 return (EINVAL); 1268 } 1269 1270 ifgrq = kmalloc(len, M_TEMP, M_INTWAIT | M_NULLOK | M_ZERO); 1271 if (ifgrq == NULL) 1272 return (ENOMEM); 1273 1274 ifgroup_lockmgr(LK_SHARED); 1275 p = ifgrq; 1276 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1277 if (len < sizeof(struct ifg_req)) { 1278 ifgroup_lockmgr(LK_RELEASE); 1279 error = EINVAL; 1280 goto failed; 1281 } 1282 1283 strlcpy(p->ifgrq_group, ifgl->ifgl_group->ifg_group, 1284 sizeof(ifgrq->ifgrq_group)); 1285 len -= sizeof(struct ifg_req); 1286 p++; 1287 } 1288 ifgroup_lockmgr(LK_RELEASE); 1289 1290 error = copyout(ifgrq, ifgr->ifgr_groups, ifgr->ifgr_len); 1291 failed: 1292 kfree(ifgrq, M_TEMP); 1293 return error; 1294 } 1295 1296 /* 1297 * Store all the members of a group in memory pointed to by data. 1298 */ 1299 static int 1300 if_getgroupmembers(struct ifgroupreq *ifgr) 1301 { 1302 struct ifg_group *ifg; 1303 struct ifg_member *ifgm; 1304 struct ifg_req *ifgrq, *p; 1305 int len, error; 1306 1307 ifgroup_lockmgr(LK_SHARED); 1308 1309 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) { 1310 if (strcmp(ifg->ifg_group, ifgr->ifgr_name) == 0) 1311 break; 1312 } 1313 if (ifg == NULL) { 1314 ifgroup_lockmgr(LK_RELEASE); 1315 return (ENOENT); 1316 } 1317 1318 len = 0; 1319 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) 1320 len += sizeof(struct ifg_req); 1321 1322 ifgroup_lockmgr(LK_RELEASE); 1323 1324 if (ifgr->ifgr_len == 0) { 1325 ifgr->ifgr_len = len; 1326 return (0); 1327 } else if (ifgr->ifgr_len != len) { 1328 return (EINVAL); 1329 } 1330 1331 ifgrq = kmalloc(len, M_TEMP, M_INTWAIT | M_NULLOK | M_ZERO); 1332 if (ifgrq == NULL) 1333 return (ENOMEM); 1334 1335 ifgroup_lockmgr(LK_SHARED); 1336 p = ifgrq; 1337 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { 1338 if (len < sizeof(struct ifg_req)) { 1339 ifgroup_lockmgr(LK_RELEASE); 1340 error = EINVAL; 1341 goto failed; 1342 } 1343 1344 strlcpy(p->ifgrq_member, ifgm->ifgm_ifp->if_xname, 1345 sizeof(p->ifgrq_member)); 1346 len -= sizeof(struct ifg_req); 1347 p++; 1348 } 1349 ifgroup_lockmgr(LK_RELEASE); 1350 1351 error = copyout(ifgrq, ifgr->ifgr_groups, ifgr->ifgr_len); 1352 failed: 1353 kfree(ifgrq, M_TEMP); 1354 return error; 1355 } 1356 1357 static int 1358 ifa_maintain_loopback_route(int cmd, struct ifaddr *ifa, struct sockaddr *ia) 1359 { 1360 struct sockaddr_dl null_sdl; 1361 struct rt_addrinfo info; 1362 struct ifaddr *rti_ifa; 1363 struct ifnet *ifp; 1364 int error; 1365 1366 /* RTM_CHANGE is unsupported in rtrequest1() yet. */ 1367 KKASSERT(cmd == RTM_DELETE || cmd == RTM_ADD); 1368 1369 rti_ifa = NULL; 1370 ifp = ifa->ifa_ifp; 1371 1372 bzero(&null_sdl, sizeof(null_sdl)); 1373 null_sdl.sdl_len = sizeof(null_sdl); 1374 null_sdl.sdl_family = AF_LINK; 1375 null_sdl.sdl_index = ifp->if_index; 1376 null_sdl.sdl_type = ifp->if_type; 1377 1378 bzero(&info, sizeof(info)); 1379 if (cmd != RTM_DELETE) 1380 info.rti_ifp = loif; 1381 if (cmd == RTM_ADD) { 1382 /* 1383 * Explicitly specify the loopback IFA. 1384 */ 1385 rti_ifa = ifaof_ifpforaddr(ifa->ifa_addr, info.rti_ifp); 1386 if (rti_ifa != NULL) { 1387 /* 1388 * The loopback IFA wouldn't disappear, but ref it 1389 * for safety. 1390 */ 1391 IFAREF(rti_ifa); 1392 info.rti_ifa = rti_ifa; 1393 } 1394 } 1395 info.rti_info[RTAX_DST] = ia; 1396 info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl; 1397 /* 1398 * Manually set RTF_LOCAL so that the IFA and IFP wouldn't be 1399 * overrided to be the owner of the destination address (ia) 1400 * by in_addroute(). 1401 */ 1402 info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_LOCAL; 1403 1404 error = rtrequest1_global(cmd, &info, NULL, NULL, RTREQ_PRIO_NORM); 1405 1406 if (rti_ifa != NULL) 1407 IFAFREE(rti_ifa); 1408 1409 if (error == 0 || 1410 (cmd == RTM_ADD && error == EEXIST) || 1411 (cmd == RTM_DELETE && (error == ESRCH || error == ENOENT))) 1412 return (error); 1413 1414 log(LOG_DEBUG, "%s: %s failed for interface %s: %d\n", 1415 __func__, (cmd == RTM_ADD ? "insertion" : "deletion"), 1416 ifp->if_xname, error); 1417 return (error); 1418 } 1419 1420 int 1421 ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) 1422 { 1423 return ifa_maintain_loopback_route(RTM_ADD, ifa, ia); 1424 } 1425 1426 int 1427 ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) 1428 { 1429 return ifa_maintain_loopback_route(RTM_DELETE, ifa, ia); 1430 } 1431 1432 /* 1433 * Delete Routes for a Network Interface 1434 * 1435 * Called for each routing entry via the rnh->rnh_walktree() call above 1436 * to delete all route entries referencing a detaching network interface. 1437 * 1438 * Arguments: 1439 * rn pointer to node in the routing table 1440 * arg argument passed to rnh->rnh_walktree() - detaching interface 1441 * 1442 * Returns: 1443 * 0 successful 1444 * errno failed - reason indicated 1445 * 1446 */ 1447 static int 1448 if_rtdel(struct radix_node *rn, void *arg) 1449 { 1450 struct rtentry *rt = (struct rtentry *)rn; 1451 struct ifnet *ifp = arg; 1452 int err; 1453 1454 if (rt->rt_ifp == ifp) { 1455 1456 /* 1457 * Protect (sorta) against walktree recursion problems 1458 * with cloned routes 1459 */ 1460 if (!(rt->rt_flags & RTF_UP)) 1461 return (0); 1462 1463 err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, 1464 rt_mask(rt), rt->rt_flags, 1465 NULL); 1466 if (err) { 1467 log(LOG_WARNING, "if_rtdel: error %d\n", err); 1468 } 1469 } 1470 1471 return (0); 1472 } 1473 1474 static __inline boolean_t 1475 ifa_match_withmask(const struct ifaddr *ifa, const struct sockaddr *addr) 1476 { 1477 const char *cp, *cp2, *cp3, *cplim; 1478 1479 KKASSERT(ifa->ifa_addr->sa_family == addr->sa_family); 1480 1481 cp = addr->sa_data; 1482 cp2 = ifa->ifa_addr->sa_data; 1483 cp3 = ifa->ifa_netmask->sa_data; 1484 cplim = (const char *)ifa->ifa_netmask + ifa->ifa_netmask->sa_len; 1485 1486 while (cp3 < cplim) { 1487 if ((*cp++ ^ *cp2++) & *cp3++) 1488 return (FALSE); 1489 } 1490 1491 return (TRUE); 1492 } 1493 1494 static __inline boolean_t 1495 ifa_prefer(const struct ifaddr *cur_ifa, const struct ifaddr *old_ifa) 1496 { 1497 if (old_ifa == NULL) 1498 return (TRUE); 1499 1500 if ((old_ifa->ifa_ifp->if_flags & IFF_UP) == 0 && 1501 (cur_ifa->ifa_ifp->if_flags & IFF_UP)) 1502 return (TRUE); 1503 if ((old_ifa->ifa_flags & IFA_ROUTE) == 0 && 1504 (cur_ifa->ifa_flags & IFA_ROUTE)) 1505 return (TRUE); 1506 1507 return (FALSE); 1508 } 1509 1510 /* 1511 * Locate an interface based on a complete address. 1512 */ 1513 struct ifaddr * 1514 ifa_ifwithaddr(struct sockaddr *addr) 1515 { 1516 const struct ifnet_array *arr; 1517 int i; 1518 1519 arr = ifnet_array_get(); 1520 for (i = 0; i < arr->ifnet_count; ++i) { 1521 struct ifnet *ifp = arr->ifnet_arr[i]; 1522 struct ifaddr_container *ifac; 1523 1524 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1525 struct ifaddr *ifa = ifac->ifa; 1526 1527 if (ifa->ifa_addr->sa_family != addr->sa_family) 1528 continue; 1529 if (sa_equal(addr, ifa->ifa_addr)) 1530 return (ifa); 1531 if ((ifp->if_flags & IFF_BROADCAST) && 1532 ifa->ifa_broadaddr && 1533 /* IPv6 doesn't have broadcast */ 1534 ifa->ifa_broadaddr->sa_len != 0 && 1535 sa_equal(ifa->ifa_broadaddr, addr)) 1536 return (ifa); 1537 } 1538 } 1539 return (NULL); 1540 } 1541 1542 /* 1543 * Locate the point to point interface with a given destination address. 1544 */ 1545 struct ifaddr * 1546 ifa_ifwithdstaddr(struct sockaddr *addr) 1547 { 1548 const struct ifnet_array *arr; 1549 int i; 1550 1551 arr = ifnet_array_get(); 1552 for (i = 0; i < arr->ifnet_count; ++i) { 1553 struct ifnet *ifp = arr->ifnet_arr[i]; 1554 struct ifaddr_container *ifac; 1555 1556 if (!(ifp->if_flags & IFF_POINTOPOINT)) 1557 continue; 1558 1559 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1560 struct ifaddr *ifa = ifac->ifa; 1561 1562 if (ifa->ifa_addr->sa_family != addr->sa_family) 1563 continue; 1564 if (ifa->ifa_dstaddr && 1565 sa_equal(addr, ifa->ifa_dstaddr)) 1566 return (ifa); 1567 } 1568 } 1569 return (NULL); 1570 } 1571 1572 /* 1573 * Find an interface on a specific network. If many, choice 1574 * is most specific found. 1575 */ 1576 struct ifaddr * 1577 ifa_ifwithnet(struct sockaddr *addr) 1578 { 1579 struct ifaddr *ifa_maybe = NULL; 1580 u_int af = addr->sa_family; 1581 const struct ifnet_array *arr; 1582 int i; 1583 1584 /* 1585 * AF_LINK addresses can be looked up directly by their index number, 1586 * so do that if we can. 1587 */ 1588 if (af == AF_LINK) { 1589 struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr; 1590 1591 if (sdl->sdl_index && sdl->sdl_index <= if_index) 1592 return (ifindex2ifnet[sdl->sdl_index]->if_lladdr); 1593 } 1594 1595 /* 1596 * Scan though each interface, looking for ones that have 1597 * addresses in this address family. 1598 */ 1599 arr = ifnet_array_get(); 1600 for (i = 0; i < arr->ifnet_count; ++i) { 1601 struct ifnet *ifp = arr->ifnet_arr[i]; 1602 struct ifaddr_container *ifac; 1603 1604 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1605 struct ifaddr *ifa = ifac->ifa; 1606 1607 if (ifa->ifa_addr->sa_family != af) 1608 continue; 1609 if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) { 1610 /* 1611 * This is a bit broken as it doesn't 1612 * take into account that the remote end may 1613 * be a single node in the network we are 1614 * looking for. 1615 * The trouble is that we don't know the 1616 * netmask for the remote end. 1617 */ 1618 if (ifa->ifa_dstaddr != NULL && 1619 sa_equal(addr, ifa->ifa_dstaddr)) 1620 return (ifa); 1621 } else { 1622 /* 1623 * If we have a special address handler, 1624 * then use it instead of the generic one. 1625 */ 1626 if (ifa->ifa_claim_addr) { 1627 if ((*ifa->ifa_claim_addr)(ifa, addr)) { 1628 return (ifa); 1629 } else { 1630 continue; 1631 } 1632 } 1633 1634 if (ifa->ifa_netmask == NULL || 1635 !ifa_match_withmask(ifa, addr)) 1636 continue; 1637 1638 /* 1639 * If the netmask of what we just found 1640 * is more specific than what we had before 1641 * (if we had one) then remember the new one 1642 * before continuing to search for an even 1643 * better one. If the netmasks are equal, 1644 * we prefer the this ifa based on the result 1645 * of ifa_prefer(). 1646 */ 1647 if (ifa_maybe == NULL || 1648 rn_refines(ifa->ifa_netmask, 1649 ifa_maybe->ifa_netmask) || 1650 (sa_equal(ifa_maybe->ifa_netmask, 1651 ifa->ifa_netmask) && 1652 ifa_prefer(ifa, ifa_maybe))) 1653 ifa_maybe = ifa; 1654 } 1655 } 1656 } 1657 1658 return (ifa_maybe); 1659 } 1660 1661 /* 1662 * Find an interface address specific to an interface best matching 1663 * a given address. 1664 */ 1665 struct ifaddr * 1666 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp) 1667 { 1668 struct ifaddr_container *ifac; 1669 struct ifaddr *ifa_maybe = NULL; 1670 u_int af = addr->sa_family; 1671 1672 if (af >= AF_MAX) 1673 return (NULL); 1674 1675 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1676 struct ifaddr *ifa = ifac->ifa; 1677 1678 if (ifa->ifa_addr->sa_family != af) 1679 continue; 1680 if (ifa_maybe == NULL) 1681 ifa_maybe = ifa; 1682 if (ifa->ifa_netmask == NULL) { 1683 if (sa_equal(addr, ifa->ifa_addr) || 1684 (ifa->ifa_dstaddr != NULL && 1685 sa_equal(addr, ifa->ifa_dstaddr))) 1686 return (ifa); 1687 continue; 1688 } 1689 if (ifp->if_flags & IFF_POINTOPOINT) { 1690 if (sa_equal(addr, ifa->ifa_dstaddr)) 1691 return (ifa); 1692 } else { 1693 if (ifa_match_withmask(ifa, addr)) 1694 return (ifa); 1695 } 1696 } 1697 1698 return (ifa_maybe); 1699 } 1700 1701 struct netmsg_if { 1702 struct netmsg_base base; 1703 struct ifnet *ifp; 1704 }; 1705 1706 /* 1707 * Mark an interface down and notify protocols of the transition. 1708 */ 1709 static void 1710 if_down_dispatch(netmsg_t nmsg) 1711 { 1712 struct netmsg_if *msg = (struct netmsg_if *)nmsg; 1713 struct ifnet *ifp = msg->ifp; 1714 struct ifaddr_container *ifac; 1715 struct domain *dp; 1716 1717 ASSERT_NETISR0; 1718 1719 ifp->if_flags &= ~IFF_UP; 1720 getmicrotime(&ifp->if_lastchange); 1721 rt_ifmsg(ifp); 1722 1723 /* 1724 * The ifaddr processing in the following loop will block, 1725 * however, this function is called in netisr0, in which 1726 * ifaddr list changes happen, so we don't care about the 1727 * blockness of the ifaddr processing here. 1728 */ 1729 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1730 struct ifaddr *ifa = ifac->ifa; 1731 1732 /* Ignore marker */ 1733 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 1734 continue; 1735 1736 kpfctlinput(PRC_IFDOWN, ifa->ifa_addr); 1737 } 1738 1739 SLIST_FOREACH(dp, &domains, dom_next) 1740 if (dp->dom_if_down != NULL) 1741 dp->dom_if_down(ifp); 1742 1743 ifq_purge_all(&ifp->if_snd); 1744 netisr_replymsg(&nmsg->base, 0); 1745 } 1746 1747 /* 1748 * Mark an interface up and notify protocols of the transition. 1749 */ 1750 static void 1751 if_up_dispatch(netmsg_t nmsg) 1752 { 1753 struct netmsg_if *msg = (struct netmsg_if *)nmsg; 1754 struct ifnet *ifp = msg->ifp; 1755 struct ifaddr_container *ifac; 1756 struct domain *dp; 1757 1758 ASSERT_NETISR0; 1759 1760 ifq_purge_all(&ifp->if_snd); 1761 ifp->if_flags |= IFF_UP; 1762 getmicrotime(&ifp->if_lastchange); 1763 rt_ifmsg(ifp); 1764 1765 /* 1766 * The ifaddr processing in the following loop will block, 1767 * however, this function is called in netisr0, in which 1768 * ifaddr list changes happen, so we don't care about the 1769 * blockness of the ifaddr processing here. 1770 */ 1771 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1772 struct ifaddr *ifa = ifac->ifa; 1773 1774 /* Ignore marker */ 1775 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 1776 continue; 1777 1778 kpfctlinput(PRC_IFUP, ifa->ifa_addr); 1779 } 1780 1781 SLIST_FOREACH(dp, &domains, dom_next) 1782 if (dp->dom_if_up != NULL) 1783 dp->dom_if_up(ifp); 1784 1785 netisr_replymsg(&nmsg->base, 0); 1786 } 1787 1788 /* 1789 * Mark an interface down and notify protocols of the transition. An 1790 * interface going down is also considered to be a synchronizing event. 1791 * We must ensure that all packet processing related to the interface 1792 * has completed before we return so e.g. the caller can free the ifnet 1793 * structure that the mbufs may be referencing. 1794 * 1795 * NOTE: must be called at splnet or eqivalent. 1796 */ 1797 void 1798 if_down(struct ifnet *ifp) 1799 { 1800 struct netmsg_if msg; 1801 1802 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN); 1803 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0, 1804 if_down_dispatch); 1805 msg.ifp = ifp; 1806 netisr_domsg(&msg.base, 0); 1807 netmsg_service_sync(); 1808 } 1809 1810 /* 1811 * Mark an interface up and notify protocols of 1812 * the transition. 1813 * NOTE: must be called at splnet or eqivalent. 1814 */ 1815 void 1816 if_up(struct ifnet *ifp) 1817 { 1818 struct netmsg_if msg; 1819 1820 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0, 1821 if_up_dispatch); 1822 msg.ifp = ifp; 1823 netisr_domsg(&msg.base, 0); 1824 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP); 1825 } 1826 1827 /* 1828 * Process a link state change. 1829 * NOTE: must be called at splsoftnet or equivalent. 1830 */ 1831 void 1832 if_link_state_change(struct ifnet *ifp) 1833 { 1834 int link_state = ifp->if_link_state; 1835 1836 rt_ifmsg(ifp); 1837 devctl_notify("IFNET", ifp->if_xname, 1838 (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); 1839 1840 EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state); 1841 } 1842 1843 /* 1844 * Handle interface watchdog timer routines. Called 1845 * from softclock, we decrement timers (if set) and 1846 * call the appropriate interface routine on expiration. 1847 */ 1848 static void 1849 if_slowtimo_dispatch(netmsg_t nmsg) 1850 { 1851 struct globaldata *gd = mycpu; 1852 const struct ifnet_array *arr; 1853 int i; 1854 1855 ASSERT_NETISR0; 1856 1857 crit_enter_gd(gd); 1858 lwkt_replymsg(&nmsg->lmsg, 0); /* reply ASAP */ 1859 crit_exit_gd(gd); 1860 1861 arr = ifnet_array_get(); 1862 for (i = 0; i < arr->ifnet_count; ++i) { 1863 struct ifnet *ifp = arr->ifnet_arr[i]; 1864 1865 crit_enter_gd(gd); 1866 1867 if (if_stats_compat) { 1868 IFNET_STAT_GET(ifp, ipackets, ifp->if_ipackets); 1869 IFNET_STAT_GET(ifp, ierrors, ifp->if_ierrors); 1870 IFNET_STAT_GET(ifp, opackets, ifp->if_opackets); 1871 IFNET_STAT_GET(ifp, oerrors, ifp->if_oerrors); 1872 IFNET_STAT_GET(ifp, collisions, ifp->if_collisions); 1873 IFNET_STAT_GET(ifp, ibytes, ifp->if_ibytes); 1874 IFNET_STAT_GET(ifp, obytes, ifp->if_obytes); 1875 IFNET_STAT_GET(ifp, imcasts, ifp->if_imcasts); 1876 IFNET_STAT_GET(ifp, omcasts, ifp->if_omcasts); 1877 IFNET_STAT_GET(ifp, iqdrops, ifp->if_iqdrops); 1878 IFNET_STAT_GET(ifp, noproto, ifp->if_noproto); 1879 IFNET_STAT_GET(ifp, oqdrops, ifp->if_oqdrops); 1880 } 1881 1882 if (ifp->if_timer == 0 || --ifp->if_timer) { 1883 crit_exit_gd(gd); 1884 continue; 1885 } 1886 if (ifp->if_watchdog) { 1887 if (ifnet_tryserialize_all(ifp)) { 1888 (*ifp->if_watchdog)(ifp); 1889 ifnet_deserialize_all(ifp); 1890 } else { 1891 /* try again next timeout */ 1892 ++ifp->if_timer; 1893 } 1894 } 1895 1896 crit_exit_gd(gd); 1897 } 1898 1899 callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL); 1900 } 1901 1902 static void 1903 if_slowtimo(void *arg __unused) 1904 { 1905 struct lwkt_msg *lmsg = &if_slowtimo_netmsg.lmsg; 1906 1907 KASSERT(mycpuid == 0, ("not on cpu0")); 1908 crit_enter(); 1909 if (lmsg->ms_flags & MSGF_DONE) 1910 lwkt_sendmsg_oncpu(netisr_cpuport(0), lmsg); 1911 crit_exit(); 1912 } 1913 1914 /* 1915 * Map interface name to 1916 * interface structure pointer. 1917 */ 1918 struct ifnet * 1919 ifunit(const char *name) 1920 { 1921 struct ifnet *ifp; 1922 1923 /* 1924 * Search all the interfaces for this name/number 1925 */ 1926 KASSERT(mtx_owned(&ifnet_mtx), ("ifnet is not locked")); 1927 1928 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 1929 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) 1930 break; 1931 } 1932 return (ifp); 1933 } 1934 1935 struct ifnet * 1936 ifunit_netisr(const char *name) 1937 { 1938 const struct ifnet_array *arr; 1939 int i; 1940 1941 /* 1942 * Search all the interfaces for this name/number 1943 */ 1944 1945 arr = ifnet_array_get(); 1946 for (i = 0; i < arr->ifnet_count; ++i) { 1947 struct ifnet *ifp = arr->ifnet_arr[i]; 1948 1949 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) 1950 return ifp; 1951 } 1952 return NULL; 1953 } 1954 1955 /* 1956 * Interface ioctls. 1957 */ 1958 int 1959 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred) 1960 { 1961 struct ifnet *ifp; 1962 struct ifgroupreq *ifgr; 1963 struct ifreq *ifr; 1964 struct ifstat *ifs; 1965 int error, do_ifup = 0; 1966 short oif_flags; 1967 int new_flags; 1968 size_t namelen, onamelen; 1969 size_t descrlen; 1970 char *descrbuf, *odescrbuf; 1971 char new_name[IFNAMSIZ]; 1972 struct ifaddr *ifa; 1973 struct sockaddr_dl *sdl; 1974 1975 switch (cmd) { 1976 case SIOCGIFCONF: 1977 return (ifconf(cmd, data, cred)); 1978 default: 1979 break; 1980 } 1981 1982 ifr = (struct ifreq *)data; 1983 1984 switch (cmd) { 1985 case SIOCIFCREATE: 1986 case SIOCIFCREATE2: 1987 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 1988 if (error) 1989 return (error); 1990 return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name), 1991 (cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL), NULL)); 1992 case SIOCIFDESTROY: 1993 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 1994 if (error) 1995 return (error); 1996 return (if_clone_destroy(ifr->ifr_name)); 1997 case SIOCIFGCLONERS: 1998 return (if_clone_list((struct if_clonereq *)data)); 1999 case SIOCGIFGMEMB: 2000 return (if_getgroupmembers((struct ifgroupreq *)data)); 2001 default: 2002 break; 2003 } 2004 2005 /* 2006 * Nominal ioctl through interface, lookup the ifp and obtain a 2007 * lock to serialize the ifconfig ioctl operation. 2008 */ 2009 ifnet_lock(); 2010 2011 ifp = ifunit(ifr->ifr_name); 2012 if (ifp == NULL) { 2013 ifnet_unlock(); 2014 return (ENXIO); 2015 } 2016 error = 0; 2017 2018 switch (cmd) { 2019 case SIOCGIFINDEX: 2020 ifr->ifr_index = ifp->if_index; 2021 break; 2022 2023 case SIOCGIFFLAGS: 2024 ifr->ifr_flags = ifp->if_flags; 2025 ifr->ifr_flagshigh = ifp->if_flags >> 16; 2026 break; 2027 2028 case SIOCGIFCAP: 2029 ifr->ifr_reqcap = ifp->if_capabilities; 2030 ifr->ifr_curcap = ifp->if_capenable; 2031 break; 2032 2033 case SIOCGIFMETRIC: 2034 ifr->ifr_metric = ifp->if_metric; 2035 break; 2036 2037 case SIOCGIFMTU: 2038 ifr->ifr_mtu = ifp->if_mtu; 2039 break; 2040 2041 case SIOCGIFTSOLEN: 2042 ifr->ifr_tsolen = ifp->if_tsolen; 2043 break; 2044 2045 case SIOCGIFDATA: 2046 error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data, 2047 sizeof(ifp->if_data)); 2048 break; 2049 2050 case SIOCGIFPHYS: 2051 ifr->ifr_phys = ifp->if_physical; 2052 break; 2053 2054 case SIOCGIFPOLLCPU: 2055 ifr->ifr_pollcpu = -1; 2056 break; 2057 2058 case SIOCSIFPOLLCPU: 2059 break; 2060 2061 case SIOCGIFDESCR: 2062 error = 0; 2063 ifnet_lock(); 2064 if (ifp->if_description == NULL) { 2065 ifr->ifr_buffer.length = 0; 2066 error = ENOMSG; 2067 } else { 2068 /* space for terminating nul */ 2069 descrlen = strlen(ifp->if_description) + 1; 2070 if (ifr->ifr_buffer.length < descrlen) 2071 error = ENAMETOOLONG; 2072 else 2073 error = copyout(ifp->if_description, 2074 ifr->ifr_buffer.buffer, descrlen); 2075 ifr->ifr_buffer.length = descrlen; 2076 } 2077 ifnet_unlock(); 2078 break; 2079 2080 case SIOCSIFDESCR: 2081 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2082 if (error) 2083 break; 2084 2085 /* 2086 * Copy only (length-1) bytes to make sure that 2087 * if_description is always nul terminated. The 2088 * length parameter is supposed to count the 2089 * terminating nul in. 2090 */ 2091 if (ifr->ifr_buffer.length > ifdescr_maxlen) 2092 return (ENAMETOOLONG); 2093 else if (ifr->ifr_buffer.length == 0) 2094 descrbuf = NULL; 2095 else { 2096 descrbuf = kmalloc(ifr->ifr_buffer.length, M_IFDESCR, 2097 M_WAITOK | M_ZERO); 2098 error = copyin(ifr->ifr_buffer.buffer, descrbuf, 2099 ifr->ifr_buffer.length - 1); 2100 if (error) { 2101 kfree(descrbuf, M_IFDESCR); 2102 break; 2103 } 2104 } 2105 2106 ifnet_lock(); 2107 odescrbuf = ifp->if_description; 2108 ifp->if_description = descrbuf; 2109 ifnet_unlock(); 2110 2111 if (odescrbuf) 2112 kfree(odescrbuf, M_IFDESCR); 2113 2114 case SIOCSIFFLAGS: 2115 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2116 if (error) 2117 break; 2118 new_flags = (ifr->ifr_flags & 0xffff) | 2119 (ifr->ifr_flagshigh << 16); 2120 if (ifp->if_flags & IFF_SMART) { 2121 /* Smart drivers twiddle their own routes */ 2122 } else if (ifp->if_flags & IFF_UP && 2123 (new_flags & IFF_UP) == 0) { 2124 if_down(ifp); 2125 } else if (new_flags & IFF_UP && 2126 (ifp->if_flags & IFF_UP) == 0) { 2127 do_ifup = 1; 2128 } 2129 2130 #ifdef IFPOLL_ENABLE 2131 if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) { 2132 if (new_flags & IFF_NPOLLING) 2133 ifpoll_register(ifp); 2134 else 2135 ifpoll_deregister(ifp); 2136 } 2137 #endif 2138 2139 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | 2140 (new_flags &~ IFF_CANTCHANGE); 2141 if (new_flags & IFF_PPROMISC) { 2142 /* Permanently promiscuous mode requested */ 2143 ifp->if_flags |= IFF_PROMISC; 2144 } else if (ifp->if_pcount == 0) { 2145 ifp->if_flags &= ~IFF_PROMISC; 2146 } 2147 if (ifp->if_ioctl) { 2148 ifnet_serialize_all(ifp); 2149 ifp->if_ioctl(ifp, cmd, data, cred); 2150 ifnet_deserialize_all(ifp); 2151 } 2152 if (do_ifup) 2153 if_up(ifp); 2154 getmicrotime(&ifp->if_lastchange); 2155 break; 2156 2157 case SIOCSIFCAP: 2158 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2159 if (error) 2160 break; 2161 if (ifr->ifr_reqcap & ~ifp->if_capabilities) { 2162 error = EINVAL; 2163 break; 2164 } 2165 ifnet_serialize_all(ifp); 2166 ifp->if_ioctl(ifp, cmd, data, cred); 2167 ifnet_deserialize_all(ifp); 2168 break; 2169 2170 case SIOCSIFNAME: 2171 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2172 if (error) 2173 break; 2174 error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL); 2175 if (error) 2176 break; 2177 if (new_name[0] == '\0') { 2178 error = EINVAL; 2179 break; 2180 } 2181 if (ifunit(new_name) != NULL) { 2182 error = EEXIST; 2183 break; 2184 } 2185 2186 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 2187 2188 /* Announce the departure of the interface. */ 2189 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 2190 2191 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); 2192 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 2193 sdl = (struct sockaddr_dl *)ifa->ifa_addr; 2194 namelen = strlen(new_name); 2195 onamelen = sdl->sdl_nlen; 2196 /* 2197 * Move the address if needed. This is safe because we 2198 * allocate space for a name of length IFNAMSIZ when we 2199 * create this in if_attach(). 2200 */ 2201 if (namelen != onamelen) { 2202 bcopy(sdl->sdl_data + onamelen, 2203 sdl->sdl_data + namelen, sdl->sdl_alen); 2204 } 2205 bcopy(new_name, sdl->sdl_data, namelen); 2206 sdl->sdl_nlen = namelen; 2207 sdl = (struct sockaddr_dl *)ifa->ifa_netmask; 2208 bzero(sdl->sdl_data, onamelen); 2209 while (namelen != 0) 2210 sdl->sdl_data[--namelen] = 0xff; 2211 2212 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 2213 2214 /* Announce the return of the interface. */ 2215 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 2216 break; 2217 2218 case SIOCSIFMETRIC: 2219 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2220 if (error) 2221 break; 2222 ifp->if_metric = ifr->ifr_metric; 2223 getmicrotime(&ifp->if_lastchange); 2224 break; 2225 2226 case SIOCSIFPHYS: 2227 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2228 if (error) 2229 break; 2230 if (ifp->if_ioctl == NULL) { 2231 error = EOPNOTSUPP; 2232 break; 2233 } 2234 ifnet_serialize_all(ifp); 2235 error = ifp->if_ioctl(ifp, cmd, data, cred); 2236 ifnet_deserialize_all(ifp); 2237 if (error == 0) 2238 getmicrotime(&ifp->if_lastchange); 2239 break; 2240 2241 case SIOCSIFMTU: 2242 { 2243 u_long oldmtu = ifp->if_mtu; 2244 2245 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2246 if (error) 2247 break; 2248 if (ifp->if_ioctl == NULL) { 2249 error = EOPNOTSUPP; 2250 break; 2251 } 2252 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) { 2253 error = EINVAL; 2254 break; 2255 } 2256 ifnet_serialize_all(ifp); 2257 error = ifp->if_ioctl(ifp, cmd, data, cred); 2258 ifnet_deserialize_all(ifp); 2259 if (error == 0) { 2260 getmicrotime(&ifp->if_lastchange); 2261 rt_ifmsg(ifp); 2262 } 2263 /* 2264 * If the link MTU changed, do network layer specific procedure. 2265 */ 2266 if (ifp->if_mtu != oldmtu) { 2267 #ifdef INET6 2268 nd6_setmtu(ifp); 2269 #endif 2270 } 2271 break; 2272 } 2273 2274 case SIOCSIFTSOLEN: 2275 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2276 if (error) 2277 break; 2278 2279 /* XXX need driver supplied upper limit */ 2280 if (ifr->ifr_tsolen <= 0) { 2281 error = EINVAL; 2282 break; 2283 } 2284 ifp->if_tsolen = ifr->ifr_tsolen; 2285 break; 2286 2287 case SIOCADDMULTI: 2288 case SIOCDELMULTI: 2289 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2290 if (error) 2291 break; 2292 2293 /* Don't allow group membership on non-multicast interfaces. */ 2294 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 2295 error = EOPNOTSUPP; 2296 break; 2297 } 2298 2299 /* Don't let users screw up protocols' entries. */ 2300 if (ifr->ifr_addr.sa_family != AF_LINK) { 2301 error = EINVAL; 2302 break; 2303 } 2304 2305 if (cmd == SIOCADDMULTI) { 2306 struct ifmultiaddr *ifma; 2307 error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); 2308 } else { 2309 error = if_delmulti(ifp, &ifr->ifr_addr); 2310 } 2311 if (error == 0) 2312 getmicrotime(&ifp->if_lastchange); 2313 break; 2314 2315 case SIOCSIFPHYADDR: 2316 case SIOCDIFPHYADDR: 2317 #ifdef INET6 2318 case SIOCSIFPHYADDR_IN6: 2319 #endif 2320 case SIOCSLIFPHYADDR: 2321 case SIOCSIFMEDIA: 2322 case SIOCSIFGENERIC: 2323 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2324 if (error) 2325 break; 2326 if (ifp->if_ioctl == NULL) { 2327 error = EOPNOTSUPP; 2328 break; 2329 } 2330 ifnet_serialize_all(ifp); 2331 error = ifp->if_ioctl(ifp, cmd, data, cred); 2332 ifnet_deserialize_all(ifp); 2333 if (error == 0) 2334 getmicrotime(&ifp->if_lastchange); 2335 break; 2336 2337 case SIOCGIFSTATUS: 2338 ifs = (struct ifstat *)data; 2339 ifs->ascii[0] = '\0'; 2340 /* fall through */ 2341 case SIOCGIFPSRCADDR: 2342 case SIOCGIFPDSTADDR: 2343 case SIOCGLIFPHYADDR: 2344 case SIOCGIFMEDIA: 2345 case SIOCGIFXMEDIA: 2346 case SIOCGIFGENERIC: 2347 if (ifp->if_ioctl == NULL) { 2348 error = EOPNOTSUPP; 2349 break; 2350 } 2351 ifnet_serialize_all(ifp); 2352 error = ifp->if_ioctl(ifp, cmd, data, cred); 2353 ifnet_deserialize_all(ifp); 2354 break; 2355 2356 case SIOCSIFLLADDR: 2357 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2358 if (error) 2359 break; 2360 error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, 2361 ifr->ifr_addr.sa_len); 2362 EVENTHANDLER_INVOKE(iflladdr_event, ifp); 2363 break; 2364 2365 case SIOCAIFGROUP: 2366 ifgr = (struct ifgroupreq *)ifr; 2367 error = caps_priv_check(cred, SYSCAP_NONET_IFCONFIG); 2368 if (error) 2369 return (error); 2370 if ((error = if_addgroup(ifp, ifgr->ifgr_group))) 2371 return (error); 2372 break; 2373 2374 case SIOCDIFGROUP: 2375 ifgr = (struct ifgroupreq *)ifr; 2376 error = caps_priv_check(cred, SYSCAP_NONET_IFCONFIG); 2377 if (error) 2378 return (error); 2379 if ((error = if_delgroup(ifp, ifgr->ifgr_group))) 2380 return (error); 2381 break; 2382 2383 case SIOCGIFGROUP: 2384 ifgr = (struct ifgroupreq *)ifr; 2385 if ((error = if_getgroups(ifgr, ifp))) 2386 return (error); 2387 break; 2388 2389 default: 2390 oif_flags = ifp->if_flags; 2391 if (so->so_proto == 0) { 2392 error = EOPNOTSUPP; 2393 break; 2394 } 2395 error = so_pru_control_direct(so, cmd, data, ifp); 2396 2397 /* 2398 * If the socket control method returns EOPNOTSUPP, pass the 2399 * request directly to the interface. 2400 * 2401 * Exclude the SIOCSIF{ADDR,BRDADDR,DSTADDR,NETMASK} ioctls, 2402 * because drivers may trust these ioctls to come from an 2403 * already privileged layer and thus do not perform credentials 2404 * checks or input validation. 2405 */ 2406 if (error == EOPNOTSUPP && 2407 ifp->if_ioctl != NULL && 2408 cmd != SIOCSIFADDR && 2409 cmd != SIOCSIFBRDADDR && 2410 cmd != SIOCSIFDSTADDR && 2411 cmd != SIOCSIFNETMASK) { 2412 ifnet_serialize_all(ifp); 2413 error = ifp->if_ioctl(ifp, cmd, data, cred); 2414 ifnet_deserialize_all(ifp); 2415 } 2416 2417 if ((oif_flags ^ ifp->if_flags) & IFF_UP) { 2418 #ifdef INET6 2419 DELAY(100);/* XXX: temporary workaround for fxp issue*/ 2420 if (ifp->if_flags & IFF_UP) { 2421 crit_enter(); 2422 in6_if_up(ifp); 2423 crit_exit(); 2424 } 2425 #endif 2426 } 2427 break; 2428 } 2429 2430 ifnet_unlock(); 2431 return (error); 2432 } 2433 2434 /* 2435 * Set/clear promiscuous mode on interface ifp based on the truth value 2436 * of pswitch. The calls are reference counted so that only the first 2437 * "on" request actually has an effect, as does the final "off" request. 2438 * Results are undefined if the "off" and "on" requests are not matched. 2439 */ 2440 int 2441 ifpromisc(struct ifnet *ifp, int pswitch) 2442 { 2443 struct ifreq ifr; 2444 int error; 2445 int oldflags; 2446 2447 oldflags = ifp->if_flags; 2448 if (ifp->if_flags & IFF_PPROMISC) { 2449 /* Do nothing if device is in permanently promiscuous mode */ 2450 ifp->if_pcount += pswitch ? 1 : -1; 2451 return (0); 2452 } 2453 if (pswitch) { 2454 /* 2455 * If the device is not configured up, we cannot put it in 2456 * promiscuous mode. 2457 */ 2458 if ((ifp->if_flags & IFF_UP) == 0) 2459 return (ENETDOWN); 2460 if (ifp->if_pcount++ != 0) 2461 return (0); 2462 ifp->if_flags |= IFF_PROMISC; 2463 log(LOG_INFO, "%s: promiscuous mode enabled\n", 2464 ifp->if_xname); 2465 } else { 2466 if (--ifp->if_pcount > 0) 2467 return (0); 2468 ifp->if_flags &= ~IFF_PROMISC; 2469 log(LOG_INFO, "%s: promiscuous mode disabled\n", 2470 ifp->if_xname); 2471 } 2472 ifr.ifr_flags = ifp->if_flags; 2473 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2474 ifnet_serialize_all(ifp); 2475 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL); 2476 ifnet_deserialize_all(ifp); 2477 if (error == 0) 2478 rt_ifmsg(ifp); 2479 else 2480 ifp->if_flags = oldflags; 2481 return error; 2482 } 2483 2484 /* 2485 * Return interface configuration 2486 * of system. List may be used 2487 * in later ioctl's (above) to get 2488 * other information. 2489 */ 2490 static int 2491 ifconf(u_long cmd, caddr_t data, struct ucred *cred) 2492 { 2493 struct ifconf *ifc = (struct ifconf *)data; 2494 struct ifnet *ifp; 2495 struct sockaddr *sa; 2496 struct ifreq ifr, *ifrp; 2497 int space = ifc->ifc_len, error = 0; 2498 2499 ifrp = ifc->ifc_req; 2500 2501 ifnet_lock(); 2502 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 2503 struct ifaddr_container *ifac, *ifac_mark; 2504 struct ifaddr_marker mark; 2505 struct ifaddrhead *head; 2506 int addrs; 2507 2508 if (space <= sizeof ifr) 2509 break; 2510 2511 /* 2512 * Zero the stack declared structure first to prevent 2513 * memory disclosure. 2514 */ 2515 bzero(&ifr, sizeof(ifr)); 2516 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) 2517 >= sizeof(ifr.ifr_name)) { 2518 error = ENAMETOOLONG; 2519 break; 2520 } 2521 2522 /* 2523 * Add a marker, since copyout() could block and during that 2524 * period the list could be changed. Inserting the marker to 2525 * the header of the list will not cause trouble for the code 2526 * assuming that the first element of the list is AF_LINK; the 2527 * marker will be moved to the next position w/o blocking. 2528 */ 2529 ifa_marker_init(&mark, ifp); 2530 ifac_mark = &mark.ifac; 2531 head = &ifp->if_addrheads[mycpuid]; 2532 2533 addrs = 0; 2534 TAILQ_INSERT_HEAD(head, ifac_mark, ifa_link); 2535 while ((ifac = TAILQ_NEXT(ifac_mark, ifa_link)) != NULL) { 2536 struct ifaddr *ifa = ifac->ifa; 2537 2538 TAILQ_REMOVE(head, ifac_mark, ifa_link); 2539 TAILQ_INSERT_AFTER(head, ifac, ifac_mark, ifa_link); 2540 2541 /* Ignore marker */ 2542 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 2543 continue; 2544 2545 if (space <= sizeof ifr) 2546 break; 2547 sa = ifa->ifa_addr; 2548 if (cred->cr_prison && prison_if(cred, sa)) 2549 continue; 2550 addrs++; 2551 /* 2552 * Keep a reference on this ifaddr, so that it will 2553 * not be destroyed when its address is copied to 2554 * the userland, which could block. 2555 */ 2556 IFAREF(ifa); 2557 if (sa->sa_len <= sizeof(*sa)) { 2558 ifr.ifr_addr = *sa; 2559 error = copyout(&ifr, ifrp, sizeof ifr); 2560 ifrp++; 2561 } else { 2562 if (space < (sizeof ifr) + sa->sa_len - 2563 sizeof(*sa)) { 2564 IFAFREE(ifa); 2565 break; 2566 } 2567 space -= sa->sa_len - sizeof(*sa); 2568 error = copyout(&ifr, ifrp, 2569 sizeof ifr.ifr_name); 2570 if (error == 0) 2571 error = copyout(sa, &ifrp->ifr_addr, 2572 sa->sa_len); 2573 ifrp = (struct ifreq *) 2574 (sa->sa_len + (caddr_t)&ifrp->ifr_addr); 2575 } 2576 IFAFREE(ifa); 2577 if (error) 2578 break; 2579 space -= sizeof ifr; 2580 } 2581 TAILQ_REMOVE(head, ifac_mark, ifa_link); 2582 if (error) 2583 break; 2584 if (!addrs) { 2585 bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr); 2586 error = copyout(&ifr, ifrp, sizeof ifr); 2587 if (error) 2588 break; 2589 space -= sizeof ifr; 2590 ifrp++; 2591 } 2592 } 2593 ifnet_unlock(); 2594 2595 ifc->ifc_len -= space; 2596 return (error); 2597 } 2598 2599 /* 2600 * Just like if_promisc(), but for all-multicast-reception mode. 2601 */ 2602 int 2603 if_allmulti(struct ifnet *ifp, int onswitch) 2604 { 2605 int error = 0; 2606 struct ifreq ifr; 2607 2608 crit_enter(); 2609 2610 if (onswitch) { 2611 if (ifp->if_amcount++ == 0) { 2612 ifp->if_flags |= IFF_ALLMULTI; 2613 ifr.ifr_flags = ifp->if_flags; 2614 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2615 ifnet_serialize_all(ifp); 2616 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2617 NULL); 2618 ifnet_deserialize_all(ifp); 2619 } 2620 } else { 2621 if (ifp->if_amcount > 1) { 2622 ifp->if_amcount--; 2623 } else { 2624 ifp->if_amcount = 0; 2625 ifp->if_flags &= ~IFF_ALLMULTI; 2626 ifr.ifr_flags = ifp->if_flags; 2627 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2628 ifnet_serialize_all(ifp); 2629 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2630 NULL); 2631 ifnet_deserialize_all(ifp); 2632 } 2633 } 2634 2635 crit_exit(); 2636 2637 if (error == 0) 2638 rt_ifmsg(ifp); 2639 return error; 2640 } 2641 2642 /* 2643 * Add a multicast listenership to the interface in question. 2644 * The link layer provides a routine which converts 2645 */ 2646 int 2647 if_addmulti_serialized(struct ifnet *ifp, struct sockaddr *sa, 2648 struct ifmultiaddr **retifma) 2649 { 2650 struct sockaddr *llsa, *dupsa; 2651 int error; 2652 struct ifmultiaddr *ifma; 2653 2654 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2655 2656 /* 2657 * If the matching multicast address already exists 2658 * then don't add a new one, just add a reference 2659 */ 2660 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2661 if (sa_equal(sa, ifma->ifma_addr)) { 2662 ifma->ifma_refcount++; 2663 if (retifma) 2664 *retifma = ifma; 2665 return 0; 2666 } 2667 } 2668 2669 /* 2670 * Give the link layer a chance to accept/reject it, and also 2671 * find out which AF_LINK address this maps to, if it isn't one 2672 * already. 2673 */ 2674 if (ifp->if_resolvemulti) { 2675 error = ifp->if_resolvemulti(ifp, &llsa, sa); 2676 if (error) 2677 return error; 2678 } else { 2679 llsa = NULL; 2680 } 2681 2682 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_INTWAIT); 2683 dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_INTWAIT); 2684 bcopy(sa, dupsa, sa->sa_len); 2685 2686 ifma->ifma_addr = dupsa; 2687 ifma->ifma_lladdr = llsa; 2688 ifma->ifma_ifp = ifp; 2689 ifma->ifma_refcount = 1; 2690 ifma->ifma_protospec = NULL; 2691 rt_newmaddrmsg(RTM_NEWMADDR, ifma); 2692 2693 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2694 if (retifma) 2695 *retifma = ifma; 2696 2697 if (llsa != NULL) { 2698 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2699 if (sa_equal(ifma->ifma_addr, llsa)) 2700 break; 2701 } 2702 if (ifma) { 2703 ifma->ifma_refcount++; 2704 } else { 2705 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_INTWAIT); 2706 dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_INTWAIT); 2707 bcopy(llsa, dupsa, llsa->sa_len); 2708 ifma->ifma_addr = dupsa; 2709 ifma->ifma_ifp = ifp; 2710 ifma->ifma_refcount = 1; 2711 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2712 } 2713 } 2714 /* 2715 * We are certain we have added something, so call down to the 2716 * interface to let them know about it. 2717 */ 2718 if (ifp->if_ioctl) 2719 ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL); 2720 2721 return 0; 2722 } 2723 2724 int 2725 if_addmulti(struct ifnet *ifp, struct sockaddr *sa, 2726 struct ifmultiaddr **retifma) 2727 { 2728 int error; 2729 2730 ifnet_serialize_all(ifp); 2731 error = if_addmulti_serialized(ifp, sa, retifma); 2732 ifnet_deserialize_all(ifp); 2733 2734 return error; 2735 } 2736 2737 /* 2738 * Remove a reference to a multicast address on this interface. Yell 2739 * if the request does not match an existing membership. 2740 */ 2741 static int 2742 if_delmulti_serialized(struct ifnet *ifp, struct sockaddr *sa) 2743 { 2744 struct ifmultiaddr *ifma; 2745 2746 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2747 2748 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2749 if (sa_equal(sa, ifma->ifma_addr)) 2750 break; 2751 if (ifma == NULL) 2752 return ENOENT; 2753 2754 if (ifma->ifma_refcount > 1) { 2755 ifma->ifma_refcount--; 2756 return 0; 2757 } 2758 2759 rt_newmaddrmsg(RTM_DELMADDR, ifma); 2760 sa = ifma->ifma_lladdr; 2761 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2762 /* 2763 * Make sure the interface driver is notified 2764 * in the case of a link layer mcast group being left. 2765 */ 2766 if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) 2767 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2768 kfree(ifma->ifma_addr, M_IFMADDR); 2769 kfree(ifma, M_IFMADDR); 2770 if (sa == NULL) 2771 return 0; 2772 2773 /* 2774 * Now look for the link-layer address which corresponds to 2775 * this network address. It had been squirreled away in 2776 * ifma->ifma_lladdr for this purpose (so we don't have 2777 * to call ifp->if_resolvemulti() again), and we saved that 2778 * value in sa above. If some nasty deleted the 2779 * link-layer address out from underneath us, we can deal because 2780 * the address we stored was is not the same as the one which was 2781 * in the record for the link-layer address. (So we don't complain 2782 * in that case.) 2783 */ 2784 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2785 if (sa_equal(sa, ifma->ifma_addr)) 2786 break; 2787 if (ifma == NULL) 2788 return 0; 2789 2790 if (ifma->ifma_refcount > 1) { 2791 ifma->ifma_refcount--; 2792 return 0; 2793 } 2794 2795 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2796 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2797 kfree(ifma->ifma_addr, M_IFMADDR); 2798 kfree(sa, M_IFMADDR); 2799 kfree(ifma, M_IFMADDR); 2800 2801 return 0; 2802 } 2803 2804 int 2805 if_delmulti(struct ifnet *ifp, struct sockaddr *sa) 2806 { 2807 int error; 2808 2809 ifnet_serialize_all(ifp); 2810 error = if_delmulti_serialized(ifp, sa); 2811 ifnet_deserialize_all(ifp); 2812 2813 return error; 2814 } 2815 2816 /* 2817 * Delete all multicast group membership for an interface. 2818 * Should be used to quickly flush all multicast filters. 2819 */ 2820 void 2821 if_delallmulti_serialized(struct ifnet *ifp) 2822 { 2823 struct ifmultiaddr *ifma, mark; 2824 struct sockaddr sa; 2825 2826 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2827 2828 bzero(&sa, sizeof(sa)); 2829 sa.sa_family = AF_UNSPEC; 2830 sa.sa_len = sizeof(sa); 2831 2832 bzero(&mark, sizeof(mark)); 2833 mark.ifma_addr = &sa; 2834 2835 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, &mark, ifma_link); 2836 while ((ifma = TAILQ_NEXT(&mark, ifma_link)) != NULL) { 2837 TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link); 2838 TAILQ_INSERT_AFTER(&ifp->if_multiaddrs, ifma, &mark, 2839 ifma_link); 2840 2841 if (ifma->ifma_addr->sa_family == AF_UNSPEC) 2842 continue; 2843 2844 if_delmulti_serialized(ifp, ifma->ifma_addr); 2845 } 2846 TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link); 2847 } 2848 2849 2850 /* 2851 * Set the link layer address on an interface. 2852 * 2853 * At this time we only support certain types of interfaces, 2854 * and we don't allow the length of the address to change. 2855 */ 2856 int 2857 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) 2858 { 2859 struct sockaddr_dl *sdl; 2860 struct ifreq ifr; 2861 2862 sdl = IF_LLSOCKADDR(ifp); 2863 if (sdl == NULL) 2864 return (EINVAL); 2865 if (len != sdl->sdl_alen) /* don't allow length to change */ 2866 return (EINVAL); 2867 switch (ifp->if_type) { 2868 case IFT_ETHER: /* these types use struct arpcom */ 2869 case IFT_XETHER: 2870 case IFT_L2VLAN: 2871 case IFT_IEEE8023ADLAG: 2872 bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len); 2873 bcopy(lladdr, LLADDR(sdl), len); 2874 break; 2875 default: 2876 return (ENODEV); 2877 } 2878 /* 2879 * If the interface is already up, we need 2880 * to re-init it in order to reprogram its 2881 * address filter. 2882 */ 2883 ifnet_serialize_all(ifp); 2884 if ((ifp->if_flags & IFF_UP) != 0) { 2885 #ifdef INET 2886 struct ifaddr_container *ifac; 2887 #endif 2888 2889 ifp->if_flags &= ~IFF_UP; 2890 ifr.ifr_flags = ifp->if_flags; 2891 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2892 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2893 NULL); 2894 ifp->if_flags |= IFF_UP; 2895 ifr.ifr_flags = ifp->if_flags; 2896 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2897 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2898 NULL); 2899 #ifdef INET 2900 /* 2901 * Also send gratuitous ARPs to notify other nodes about 2902 * the address change. 2903 */ 2904 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 2905 struct ifaddr *ifa = ifac->ifa; 2906 2907 if (ifa->ifa_addr != NULL && 2908 ifa->ifa_addr->sa_family == AF_INET) 2909 arp_gratuitous(ifp, ifa); 2910 } 2911 #endif 2912 } 2913 ifnet_deserialize_all(ifp); 2914 return (0); 2915 } 2916 2917 2918 /* 2919 * Tunnel interfaces can nest, also they may cause infinite recursion 2920 * calls when misconfigured. Introduce an upper limit to prevent infinite 2921 * recursions, as well as to constrain the nesting depth. 2922 * 2923 * Return 0, if tunnel nesting count is equal or less than limit. 2924 */ 2925 int 2926 if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, uint32_t cookie, 2927 int limit) 2928 { 2929 struct m_tag *mtag; 2930 int count; 2931 2932 count = 1; 2933 mtag = m_tag_locate(m, cookie, 0 /* type */, NULL); 2934 if (mtag != NULL) 2935 count += *(int *)(mtag + 1); 2936 if (count > limit) { 2937 log(LOG_NOTICE, 2938 "%s: packet looped too many times (%d), limit %d\n", 2939 ifp->if_xname, count, limit); 2940 return (ELOOP); 2941 } 2942 2943 if (mtag == NULL) { 2944 mtag = m_tag_alloc(cookie, 0, sizeof(int), M_NOWAIT); 2945 if (mtag == NULL) 2946 return (ENOMEM); 2947 m_tag_prepend(m, mtag); 2948 } 2949 2950 *(int *)(mtag + 1) = count; 2951 return (0); 2952 } 2953 2954 2955 /* 2956 * Locate an interface based on a complete address. 2957 */ 2958 struct ifnet * 2959 if_bylla(const void *lla, unsigned char lla_len) 2960 { 2961 const struct ifnet_array *arr; 2962 struct ifnet *ifp; 2963 struct sockaddr_dl *sdl; 2964 int i; 2965 2966 arr = ifnet_array_get(); 2967 for (i = 0; i < arr->ifnet_count; ++i) { 2968 ifp = arr->ifnet_arr[i]; 2969 if (ifp->if_addrlen != lla_len) 2970 continue; 2971 2972 sdl = IF_LLSOCKADDR(ifp); 2973 if (memcmp(lla, LLADDR(sdl), lla_len) == 0) 2974 return (ifp); 2975 } 2976 return (NULL); 2977 } 2978 2979 struct ifmultiaddr * 2980 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp) 2981 { 2982 struct ifmultiaddr *ifma; 2983 2984 /* TODO: need ifnet_serialize_main */ 2985 ifnet_serialize_all(ifp); 2986 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2987 if (sa_equal(ifma->ifma_addr, sa)) 2988 break; 2989 ifnet_deserialize_all(ifp); 2990 2991 return ifma; 2992 } 2993 2994 /* 2995 * This function locates the first real ethernet MAC from a network 2996 * card and loads it into node, returning 0 on success or ENOENT if 2997 * no suitable interfaces were found. It is used by the uuid code to 2998 * generate a unique 6-byte number. 2999 */ 3000 int 3001 if_getanyethermac(uint16_t *node, int minlen) 3002 { 3003 struct ifnet *ifp; 3004 struct sockaddr_dl *sdl; 3005 3006 ifnet_lock(); 3007 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 3008 if (ifp->if_type != IFT_ETHER) 3009 continue; 3010 sdl = IF_LLSOCKADDR(ifp); 3011 if (sdl->sdl_alen < minlen) 3012 continue; 3013 bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node, 3014 minlen); 3015 ifnet_unlock(); 3016 return(0); 3017 } 3018 ifnet_unlock(); 3019 return (ENOENT); 3020 } 3021 3022 /* 3023 * The name argument must be a pointer to storage which will last as 3024 * long as the interface does. For physical devices, the result of 3025 * device_get_name(dev) is a good choice and for pseudo-devices a 3026 * static string works well. 3027 */ 3028 void 3029 if_initname(struct ifnet *ifp, const char *name, int unit) 3030 { 3031 ifp->if_dname = name; 3032 ifp->if_dunit = unit; 3033 if (unit != IF_DUNIT_NONE) 3034 ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); 3035 else 3036 strlcpy(ifp->if_xname, name, IFNAMSIZ); 3037 } 3038 3039 int 3040 if_printf(struct ifnet *ifp, const char *fmt, ...) 3041 { 3042 __va_list ap; 3043 int retval; 3044 3045 retval = kprintf("%s: ", ifp->if_xname); 3046 __va_start(ap, fmt); 3047 retval += kvprintf(fmt, ap); 3048 __va_end(ap); 3049 return (retval); 3050 } 3051 3052 struct ifnet * 3053 if_alloc(uint8_t type) 3054 { 3055 struct ifnet *ifp; 3056 size_t size; 3057 3058 /* 3059 * XXX temporary hack until arpcom is setup in if_l2com 3060 */ 3061 if (type == IFT_ETHER) 3062 size = sizeof(struct arpcom); 3063 else 3064 size = sizeof(struct ifnet); 3065 3066 ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO); 3067 3068 ifp->if_type = type; 3069 3070 if (if_com_alloc[type] != NULL) { 3071 ifp->if_l2com = if_com_alloc[type](type, ifp); 3072 if (ifp->if_l2com == NULL) { 3073 kfree(ifp, M_IFNET); 3074 return (NULL); 3075 } 3076 } 3077 return (ifp); 3078 } 3079 3080 void 3081 if_free(struct ifnet *ifp) 3082 { 3083 if (ifp->if_description != NULL) 3084 kfree(ifp->if_description, M_IFDESCR); 3085 kfree(ifp, M_IFNET); 3086 } 3087 3088 void 3089 ifq_set_classic(struct ifaltq *ifq) 3090 { 3091 ifq_set_methods(ifq, ifq->altq_ifp->if_mapsubq, 3092 ifsq_classic_enqueue, ifsq_classic_dequeue, ifsq_classic_request); 3093 } 3094 3095 void 3096 ifq_set_methods(struct ifaltq *ifq, altq_mapsubq_t mapsubq, 3097 ifsq_enqueue_t enqueue, ifsq_dequeue_t dequeue, ifsq_request_t request) 3098 { 3099 int q; 3100 3101 KASSERT(mapsubq != NULL, ("mapsubq is not specified")); 3102 KASSERT(enqueue != NULL, ("enqueue is not specified")); 3103 KASSERT(dequeue != NULL, ("dequeue is not specified")); 3104 KASSERT(request != NULL, ("request is not specified")); 3105 3106 ifq->altq_mapsubq = mapsubq; 3107 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 3108 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 3109 3110 ifsq->ifsq_enqueue = enqueue; 3111 ifsq->ifsq_dequeue = dequeue; 3112 ifsq->ifsq_request = request; 3113 } 3114 } 3115 3116 static void 3117 ifsq_norm_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m) 3118 { 3119 3120 classq_add(&ifsq->ifsq_norm, m); 3121 ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len); 3122 } 3123 3124 static void 3125 ifsq_prio_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m) 3126 { 3127 3128 classq_add(&ifsq->ifsq_prio, m); 3129 ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len); 3130 ALTQ_SQ_PRIO_CNTR_INC(ifsq, m->m_pkthdr.len); 3131 } 3132 3133 static struct mbuf * 3134 ifsq_norm_dequeue(struct ifaltq_subque *ifsq) 3135 { 3136 struct mbuf *m; 3137 3138 m = classq_get(&ifsq->ifsq_norm); 3139 if (m != NULL) 3140 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len); 3141 return (m); 3142 } 3143 3144 static struct mbuf * 3145 ifsq_prio_dequeue(struct ifaltq_subque *ifsq) 3146 { 3147 struct mbuf *m; 3148 3149 m = classq_get(&ifsq->ifsq_prio); 3150 if (m != NULL) { 3151 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len); 3152 ALTQ_SQ_PRIO_CNTR_DEC(ifsq, m->m_pkthdr.len); 3153 } 3154 return (m); 3155 } 3156 3157 int 3158 ifsq_classic_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m, 3159 struct altq_pktattr *pa __unused) 3160 { 3161 3162 M_ASSERTPKTHDR(m); 3163 again: 3164 if (ifsq->ifsq_len >= ifsq->ifsq_maxlen || 3165 ifsq->ifsq_bcnt >= ifsq->ifsq_maxbcnt) { 3166 struct mbuf *m_drop; 3167 3168 if (m->m_flags & M_PRIO) { 3169 m_drop = NULL; 3170 if (ifsq->ifsq_prio_len < (ifsq->ifsq_maxlen >> 1) && 3171 ifsq->ifsq_prio_bcnt < (ifsq->ifsq_maxbcnt >> 1)) { 3172 /* Try dropping some from normal queue. */ 3173 m_drop = ifsq_norm_dequeue(ifsq); 3174 } 3175 if (m_drop == NULL) 3176 m_drop = ifsq_prio_dequeue(ifsq); 3177 } else { 3178 m_drop = ifsq_norm_dequeue(ifsq); 3179 } 3180 if (m_drop != NULL) { 3181 IFNET_STAT_INC(ifsq->ifsq_ifp, oqdrops, 1); 3182 m_freem(m_drop); 3183 goto again; 3184 } 3185 /* 3186 * No old packets could be dropped! 3187 * NOTE: Caller increases oqdrops. 3188 */ 3189 m_freem(m); 3190 return (ENOBUFS); 3191 } else { 3192 if (m->m_flags & M_PRIO) 3193 ifsq_prio_enqueue(ifsq, m); 3194 else 3195 ifsq_norm_enqueue(ifsq, m); 3196 return (0); 3197 } 3198 } 3199 3200 struct mbuf * 3201 ifsq_classic_dequeue(struct ifaltq_subque *ifsq, int op) 3202 { 3203 struct mbuf *m; 3204 3205 switch (op) { 3206 case ALTDQ_POLL: 3207 m = classq_head(&ifsq->ifsq_prio); 3208 if (m == NULL) 3209 m = classq_head(&ifsq->ifsq_norm); 3210 break; 3211 3212 case ALTDQ_REMOVE: 3213 m = ifsq_prio_dequeue(ifsq); 3214 if (m == NULL) 3215 m = ifsq_norm_dequeue(ifsq); 3216 break; 3217 3218 default: 3219 panic("unsupported ALTQ dequeue op: %d", op); 3220 } 3221 return m; 3222 } 3223 3224 int 3225 ifsq_classic_request(struct ifaltq_subque *ifsq, int req, void *arg) 3226 { 3227 switch (req) { 3228 case ALTRQ_PURGE: 3229 for (;;) { 3230 struct mbuf *m; 3231 3232 m = ifsq_classic_dequeue(ifsq, ALTDQ_REMOVE); 3233 if (m == NULL) 3234 break; 3235 m_freem(m); 3236 } 3237 break; 3238 3239 default: 3240 panic("unsupported ALTQ request: %d", req); 3241 } 3242 return 0; 3243 } 3244 3245 static void 3246 ifsq_ifstart_try(struct ifaltq_subque *ifsq, int force_sched) 3247 { 3248 struct ifnet *ifp = ifsq_get_ifp(ifsq); 3249 int running = 0, need_sched; 3250 3251 /* 3252 * Try to do direct ifnet.if_start on the subqueue first, if there is 3253 * contention on the subqueue hardware serializer, ifnet.if_start on 3254 * the subqueue will be scheduled on the subqueue owner CPU. 3255 */ 3256 if (!ifsq_tryserialize_hw(ifsq)) { 3257 /* 3258 * Subqueue hardware serializer contention happened, 3259 * ifnet.if_start on the subqueue is scheduled on 3260 * the subqueue owner CPU, and we keep going. 3261 */ 3262 ifsq_ifstart_schedule(ifsq, 1); 3263 return; 3264 } 3265 3266 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { 3267 ifp->if_start(ifp, ifsq); 3268 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 3269 running = 1; 3270 } 3271 need_sched = ifsq_ifstart_need_schedule(ifsq, running); 3272 3273 ifsq_deserialize_hw(ifsq); 3274 3275 if (need_sched) { 3276 /* 3277 * More data need to be transmitted, ifnet.if_start on the 3278 * subqueue is scheduled on the subqueue owner CPU, and we 3279 * keep going. 3280 * NOTE: ifnet.if_start subqueue interlock is not released. 3281 */ 3282 ifsq_ifstart_schedule(ifsq, force_sched); 3283 } 3284 } 3285 3286 /* 3287 * Subqeue packets staging mechanism: 3288 * 3289 * The packets enqueued into the subqueue are staged to a certain amount 3290 * before the ifnet.if_start on the subqueue is called. In this way, the 3291 * driver could avoid writing to hardware registers upon every packet, 3292 * instead, hardware registers could be written when certain amount of 3293 * packets are put onto hardware TX ring. The measurement on several modern 3294 * NICs (emx(4), igb(4), bnx(4), bge(4), jme(4)) shows that the hardware 3295 * registers writing aggregation could save ~20% CPU time when 18bytes UDP 3296 * datagrams are transmitted at 1.48Mpps. The performance improvement by 3297 * hardware registers writing aggeregation is also mentioned by Luigi Rizzo's 3298 * netmap paper (http://info.iet.unipi.it/~luigi/netmap/). 3299 * 3300 * Subqueue packets staging is performed for two entry points into drivers' 3301 * transmission function: 3302 * - Direct ifnet.if_start calling on the subqueue, i.e. ifsq_ifstart_try() 3303 * - ifnet.if_start scheduling on the subqueue, i.e. ifsq_ifstart_schedule() 3304 * 3305 * Subqueue packets staging will be stopped upon any of the following 3306 * conditions: 3307 * - If the count of packets enqueued on the current CPU is great than or 3308 * equal to ifsq_stage_cntmax. (XXX this should be per-interface) 3309 * - If the total length of packets enqueued on the current CPU is great 3310 * than or equal to the hardware's MTU - max_protohdr. max_protohdr is 3311 * cut from the hardware's MTU mainly bacause a full TCP segment's size 3312 * is usually less than hardware's MTU. 3313 * - ifsq_ifstart_schedule() is not pending on the current CPU and 3314 * ifnet.if_start subqueue interlock (ifaltq_subq.ifsq_started) is not 3315 * released. 3316 * - The if_start_rollup(), which is registered as low priority netisr 3317 * rollup function, is called; probably because no more work is pending 3318 * for netisr. 3319 * 3320 * NOTE: 3321 * Currently subqueue packet staging is only performed in netisr threads. 3322 */ 3323 int 3324 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa) 3325 { 3326 struct ifaltq *ifq = &ifp->if_snd; 3327 struct ifaltq_subque *ifsq; 3328 int error, start = 0, len, mcast = 0, avoid_start = 0; 3329 struct ifsubq_stage_head *head = NULL; 3330 struct ifsubq_stage *stage = NULL; 3331 struct globaldata *gd = mycpu; 3332 struct thread *td = gd->gd_curthread; 3333 3334 crit_enter_quick(td); 3335 3336 ifsq = ifq_map_subq(ifq, gd->gd_cpuid); 3337 ASSERT_ALTQ_SQ_NOT_SERIALIZED_HW(ifsq); 3338 3339 len = m->m_pkthdr.len; 3340 if (m->m_flags & M_MCAST) 3341 mcast = 1; 3342 3343 if (td->td_type == TD_TYPE_NETISR) { 3344 head = &ifsubq_stage_heads[mycpuid]; 3345 stage = ifsq_get_stage(ifsq, mycpuid); 3346 3347 stage->stg_cnt++; 3348 stage->stg_len += len; 3349 if (stage->stg_cnt < ifsq_stage_cntmax && 3350 stage->stg_len < (ifp->if_mtu - max_protohdr)) 3351 avoid_start = 1; 3352 } 3353 3354 ALTQ_SQ_LOCK(ifsq); 3355 error = ifsq_enqueue_locked(ifsq, m, pa); 3356 if (error) { 3357 IFNET_STAT_INC(ifp, oqdrops, 1); 3358 if (!ifsq_data_ready(ifsq)) { 3359 ALTQ_SQ_UNLOCK(ifsq); 3360 goto done; 3361 } 3362 avoid_start = 0; 3363 } else { 3364 IFNET_STAT_INC(ifp, obytes, len); 3365 if (mcast) 3366 IFNET_STAT_INC(ifp, omcasts, 1); 3367 } 3368 if (!ifsq_is_started(ifsq)) { 3369 if (avoid_start) { 3370 ALTQ_SQ_UNLOCK(ifsq); 3371 3372 KKASSERT(!error); 3373 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) 3374 ifsq_stage_insert(head, stage); 3375 3376 goto done; 3377 } 3378 3379 /* 3380 * Hold the subqueue interlock of ifnet.if_start 3381 */ 3382 ifsq_set_started(ifsq); 3383 start = 1; 3384 } 3385 ALTQ_SQ_UNLOCK(ifsq); 3386 3387 if (stage != NULL) { 3388 if (!start && (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)) { 3389 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); 3390 if (!avoid_start) { 3391 ifsq_stage_remove(head, stage); 3392 ifsq_ifstart_schedule(ifsq, 1); 3393 } 3394 goto done; 3395 } 3396 3397 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) { 3398 ifsq_stage_remove(head, stage); 3399 } else { 3400 stage->stg_cnt = 0; 3401 stage->stg_len = 0; 3402 } 3403 } 3404 3405 if (start) 3406 ifsq_ifstart_try(ifsq, 0); 3407 3408 done: 3409 crit_exit_quick(td); 3410 return error; 3411 } 3412 3413 void * 3414 ifa_create(int size) 3415 { 3416 struct ifaddr *ifa; 3417 int i; 3418 3419 KASSERT(size >= sizeof(*ifa), ("ifaddr size too small")); 3420 3421 ifa = kmalloc(size, M_IFADDR, M_INTWAIT | M_ZERO); 3422 3423 /* 3424 * Make ifa_container availabel on all CPUs, since they 3425 * could be accessed by any threads. 3426 */ 3427 ifa->ifa_containers = 3428 kmalloc(ncpus * sizeof(struct ifaddr_container), 3429 M_IFADDR, 3430 M_INTWAIT | M_ZERO | M_CACHEALIGN); 3431 3432 ifa->ifa_ncnt = ncpus; 3433 for (i = 0; i < ncpus; ++i) { 3434 struct ifaddr_container *ifac = &ifa->ifa_containers[i]; 3435 3436 ifac->ifa_magic = IFA_CONTAINER_MAGIC; 3437 ifac->ifa = ifa; 3438 ifac->ifa_refcnt = 1; 3439 } 3440 #ifdef IFADDR_DEBUG 3441 kprintf("alloc ifa %p %d\n", ifa, size); 3442 #endif 3443 return ifa; 3444 } 3445 3446 void 3447 ifac_free(struct ifaddr_container *ifac, int cpu_id) 3448 { 3449 struct ifaddr *ifa = ifac->ifa; 3450 3451 KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC); 3452 KKASSERT(ifac->ifa_refcnt == 0); 3453 KASSERT(ifac->ifa_listmask == 0, 3454 ("ifa is still on %#x lists", ifac->ifa_listmask)); 3455 3456 ifac->ifa_magic = IFA_CONTAINER_DEAD; 3457 3458 #ifdef IFADDR_DEBUG_VERBOSE 3459 kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id); 3460 #endif 3461 3462 KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus, 3463 ("invalid # of ifac, %d", ifa->ifa_ncnt)); 3464 if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) { 3465 #ifdef IFADDR_DEBUG 3466 kprintf("free ifa %p\n", ifa); 3467 #endif 3468 kfree(ifa->ifa_containers, M_IFADDR); 3469 kfree(ifa, M_IFADDR); 3470 } 3471 } 3472 3473 static void 3474 ifa_iflink_dispatch(netmsg_t nmsg) 3475 { 3476 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3477 struct ifaddr *ifa = msg->ifa; 3478 struct ifnet *ifp = msg->ifp; 3479 int cpu = mycpuid; 3480 struct ifaddr_container *ifac; 3481 3482 crit_enter(); 3483 3484 ifac = &ifa->ifa_containers[cpu]; 3485 ASSERT_IFAC_VALID(ifac); 3486 KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0, 3487 ("ifaddr is on if_addrheads")); 3488 3489 ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD; 3490 if (msg->tail) 3491 TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link); 3492 else 3493 TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link); 3494 3495 crit_exit(); 3496 3497 netisr_forwardmsg_all(&nmsg->base, cpu + 1); 3498 } 3499 3500 void 3501 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail) 3502 { 3503 struct netmsg_ifaddr msg; 3504 3505 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3506 0, ifa_iflink_dispatch); 3507 msg.ifa = ifa; 3508 msg.ifp = ifp; 3509 msg.tail = tail; 3510 3511 netisr_domsg(&msg.base, 0); 3512 } 3513 3514 static void 3515 ifa_ifunlink_dispatch(netmsg_t nmsg) 3516 { 3517 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3518 struct ifaddr *ifa = msg->ifa; 3519 struct ifnet *ifp = msg->ifp; 3520 int cpu = mycpuid; 3521 struct ifaddr_container *ifac; 3522 3523 crit_enter(); 3524 3525 ifac = &ifa->ifa_containers[cpu]; 3526 ASSERT_IFAC_VALID(ifac); 3527 KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD, 3528 ("ifaddr is not on if_addrhead")); 3529 3530 TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link); 3531 ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD; 3532 3533 crit_exit(); 3534 3535 netisr_forwardmsg_all(&nmsg->base, cpu + 1); 3536 } 3537 3538 void 3539 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp) 3540 { 3541 struct netmsg_ifaddr msg; 3542 3543 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3544 0, ifa_ifunlink_dispatch); 3545 msg.ifa = ifa; 3546 msg.ifp = ifp; 3547 3548 netisr_domsg(&msg.base, 0); 3549 } 3550 3551 static void 3552 ifa_destroy_dispatch(netmsg_t nmsg) 3553 { 3554 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3555 3556 IFAFREE(msg->ifa); 3557 netisr_forwardmsg_all(&nmsg->base, mycpuid + 1); 3558 } 3559 3560 void 3561 ifa_destroy(struct ifaddr *ifa) 3562 { 3563 struct netmsg_ifaddr msg; 3564 3565 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3566 0, ifa_destroy_dispatch); 3567 msg.ifa = ifa; 3568 3569 netisr_domsg(&msg.base, 0); 3570 } 3571 3572 static void 3573 if_start_rollup(void) 3574 { 3575 struct ifsubq_stage_head *head = &ifsubq_stage_heads[mycpuid]; 3576 struct ifsubq_stage *stage; 3577 3578 crit_enter(); 3579 3580 while ((stage = TAILQ_FIRST(&head->stg_head)) != NULL) { 3581 struct ifaltq_subque *ifsq = stage->stg_subq; 3582 int is_sched = 0; 3583 3584 if (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED) 3585 is_sched = 1; 3586 ifsq_stage_remove(head, stage); 3587 3588 if (is_sched) { 3589 ifsq_ifstart_schedule(ifsq, 1); 3590 } else { 3591 int start = 0; 3592 3593 ALTQ_SQ_LOCK(ifsq); 3594 if (!ifsq_is_started(ifsq)) { 3595 /* 3596 * Hold the subqueue interlock of 3597 * ifnet.if_start 3598 */ 3599 ifsq_set_started(ifsq); 3600 start = 1; 3601 } 3602 ALTQ_SQ_UNLOCK(ifsq); 3603 3604 if (start) 3605 ifsq_ifstart_try(ifsq, 1); 3606 } 3607 KKASSERT((stage->stg_flags & 3608 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); 3609 } 3610 3611 crit_exit(); 3612 } 3613 3614 static void 3615 ifnetinit(void *dummy __unused) 3616 { 3617 int i; 3618 3619 /* XXX netisr_ncpus */ 3620 for (i = 0; i < ncpus; ++i) 3621 TAILQ_INIT(&ifsubq_stage_heads[i].stg_head); 3622 netisr_register_rollup(if_start_rollup, NETISR_ROLLUP_PRIO_IFSTART); 3623 } 3624 3625 void 3626 if_register_com_alloc(u_char type, 3627 if_com_alloc_t *a, if_com_free_t *f) 3628 { 3629 3630 KASSERT(if_com_alloc[type] == NULL, 3631 ("if_register_com_alloc: %d already registered", type)); 3632 KASSERT(if_com_free[type] == NULL, 3633 ("if_register_com_alloc: %d free already registered", type)); 3634 3635 if_com_alloc[type] = a; 3636 if_com_free[type] = f; 3637 } 3638 3639 void 3640 if_deregister_com_alloc(u_char type) 3641 { 3642 3643 KASSERT(if_com_alloc[type] != NULL, 3644 ("if_deregister_com_alloc: %d not registered", type)); 3645 KASSERT(if_com_free[type] != NULL, 3646 ("if_deregister_com_alloc: %d free not registered", type)); 3647 if_com_alloc[type] = NULL; 3648 if_com_free[type] = NULL; 3649 } 3650 3651 void 3652 ifq_set_maxlen(struct ifaltq *ifq, int len) 3653 { 3654 ifq->altq_maxlen = len + (ncpus * ifsq_stage_cntmax); 3655 } 3656 3657 int 3658 ifq_mapsubq_default(struct ifaltq *ifq __unused, int cpuid __unused) 3659 { 3660 return ALTQ_SUBQ_INDEX_DEFAULT; 3661 } 3662 3663 int 3664 ifq_mapsubq_modulo(struct ifaltq *ifq, int cpuid) 3665 { 3666 3667 return (cpuid % ifq->altq_subq_mappriv); 3668 } 3669 3670 /* 3671 * Watchdog timeout. Process callback as appropriate. If we cannot 3672 * serialize the ifnet just try again on the next timeout. 3673 * 3674 * NOTE: The ifnet can adjust wd_timer while holding the serializer. We 3675 * can only safely adjust it under the same circumstances. 3676 */ 3677 static void 3678 ifsq_watchdog(void *arg) 3679 { 3680 struct ifsubq_watchdog *wd = arg; 3681 struct ifnet *ifp; 3682 int count; 3683 3684 /* 3685 * Fast track. Try to avoid acquiring the serializer when not 3686 * near the terminal count, unless asked to. If the atomic op 3687 * to decrement the count fails just retry on the next callout. 3688 */ 3689 count = wd->wd_timer; 3690 cpu_ccfence(); 3691 if (count == 0) 3692 goto done; 3693 if (count > 2 && (wd->wd_flags & IF_WDOG_ALLTICKS) == 0) { 3694 (void)atomic_cmpset_int(&wd->wd_timer, count, count - 1); 3695 goto done; 3696 } 3697 3698 /* 3699 * Obtain the serializer and then re-test all wd_timer conditions 3700 * as it may have changed. NICs do not mess with wd_timer without 3701 * holding the serializer. 3702 * 3703 * If we are unable to obtain the serializer just retry the same 3704 * count on the next callout. 3705 * 3706 * - call watchdog in terminal count (0) 3707 * - call watchdog on last tick (1) if requested 3708 * - call watchdog on all ticks if requested 3709 */ 3710 ifp = ifsq_get_ifp(wd->wd_subq); 3711 if (ifnet_tryserialize_all(ifp) == 0) 3712 goto done; 3713 if (atomic_cmpset_int(&wd->wd_timer, count, count - 1)) { 3714 --count; 3715 if (count == 0 || 3716 (wd->wd_flags & IF_WDOG_ALLTICKS) || 3717 ((wd->wd_flags & IF_WDOG_LASTTICK) && count == 1)) { 3718 wd->wd_watchdog(wd->wd_subq); 3719 } 3720 } 3721 ifnet_deserialize_all(ifp); 3722 done: 3723 ifsq_watchdog_reset(wd); 3724 } 3725 3726 static void 3727 ifsq_watchdog_reset(struct ifsubq_watchdog *wd) 3728 { 3729 callout_reset_bycpu(&wd->wd_callout, hz, ifsq_watchdog, wd, 3730 ifsq_get_cpuid(wd->wd_subq)); 3731 } 3732 3733 void 3734 ifsq_watchdog_init(struct ifsubq_watchdog *wd, struct ifaltq_subque *ifsq, 3735 ifsq_watchdog_t watchdog, int flags) 3736 { 3737 callout_init_mp(&wd->wd_callout); 3738 wd->wd_timer = 0; 3739 wd->wd_flags = flags; 3740 wd->wd_subq = ifsq; 3741 wd->wd_watchdog = watchdog; 3742 } 3743 3744 void 3745 ifsq_watchdog_start(struct ifsubq_watchdog *wd) 3746 { 3747 atomic_swap_int(&wd->wd_timer, 0); 3748 ifsq_watchdog_reset(wd); 3749 } 3750 3751 void 3752 ifsq_watchdog_stop(struct ifsubq_watchdog *wd) 3753 { 3754 atomic_swap_int(&wd->wd_timer, 0); 3755 callout_stop(&wd->wd_callout); 3756 } 3757 3758 void 3759 ifsq_watchdog_set_count(struct ifsubq_watchdog *wd, int count) 3760 { 3761 atomic_swap_int(&wd->wd_timer, count); 3762 } 3763 3764 void 3765 ifnet_lock(void) 3766 { 3767 KASSERT(curthread->td_type != TD_TYPE_NETISR, 3768 ("try holding ifnet lock in netisr")); 3769 mtx_lock(&ifnet_mtx); 3770 } 3771 3772 void 3773 ifnet_unlock(void) 3774 { 3775 KASSERT(curthread->td_type != TD_TYPE_NETISR, 3776 ("try holding ifnet lock in netisr")); 3777 mtx_unlock(&ifnet_mtx); 3778 } 3779 3780 static struct ifnet_array * 3781 ifnet_array_alloc(int count) 3782 { 3783 struct ifnet_array *arr; 3784 3785 arr = kmalloc(__offsetof(struct ifnet_array, ifnet_arr[count]), 3786 M_IFNET, M_WAITOK); 3787 arr->ifnet_count = count; 3788 3789 return arr; 3790 } 3791 3792 static void 3793 ifnet_array_free(struct ifnet_array *arr) 3794 { 3795 if (arr == &ifnet_array0) 3796 return; 3797 kfree(arr, M_IFNET); 3798 } 3799 3800 static struct ifnet_array * 3801 ifnet_array_add(struct ifnet *ifp, const struct ifnet_array *old_arr) 3802 { 3803 struct ifnet_array *arr; 3804 int count, i; 3805 3806 KASSERT(old_arr->ifnet_count >= 0, 3807 ("invalid ifnet array count %d", old_arr->ifnet_count)); 3808 count = old_arr->ifnet_count + 1; 3809 arr = ifnet_array_alloc(count); 3810 3811 /* 3812 * Save the old ifnet array and append this ifp to the end of 3813 * the new ifnet array. 3814 */ 3815 for (i = 0; i < old_arr->ifnet_count; ++i) { 3816 KASSERT(old_arr->ifnet_arr[i] != ifp, 3817 ("%s is already in ifnet array", ifp->if_xname)); 3818 arr->ifnet_arr[i] = old_arr->ifnet_arr[i]; 3819 } 3820 KASSERT(i == count - 1, 3821 ("add %s, ifnet array index mismatch, should be %d, but got %d", 3822 ifp->if_xname, count - 1, i)); 3823 arr->ifnet_arr[i] = ifp; 3824 3825 return arr; 3826 } 3827 3828 static struct ifnet_array * 3829 ifnet_array_del(struct ifnet *ifp, const struct ifnet_array *old_arr) 3830 { 3831 struct ifnet_array *arr; 3832 int count, i, idx, found = 0; 3833 3834 KASSERT(old_arr->ifnet_count > 0, 3835 ("invalid ifnet array count %d", old_arr->ifnet_count)); 3836 count = old_arr->ifnet_count - 1; 3837 arr = ifnet_array_alloc(count); 3838 3839 /* 3840 * Save the old ifnet array, but skip this ifp. 3841 */ 3842 idx = 0; 3843 for (i = 0; i < old_arr->ifnet_count; ++i) { 3844 if (old_arr->ifnet_arr[i] == ifp) { 3845 KASSERT(!found, 3846 ("dup %s is in ifnet array", ifp->if_xname)); 3847 found = 1; 3848 continue; 3849 } 3850 KASSERT(idx < count, 3851 ("invalid ifnet array index %d, count %d", idx, count)); 3852 arr->ifnet_arr[idx] = old_arr->ifnet_arr[i]; 3853 ++idx; 3854 } 3855 KASSERT(found, ("%s is not in ifnet array", ifp->if_xname)); 3856 KASSERT(idx == count, 3857 ("del %s, ifnet array count mismatch, should be %d, but got %d ", 3858 ifp->if_xname, count, idx)); 3859 3860 return arr; 3861 } 3862 3863 const struct ifnet_array * 3864 ifnet_array_get(void) 3865 { 3866 const struct ifnet_array *ret; 3867 3868 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 3869 ret = ifnet_array; 3870 /* Make sure 'ret' is really used. */ 3871 cpu_ccfence(); 3872 return (ret); 3873 } 3874 3875 int 3876 ifnet_array_isempty(void) 3877 { 3878 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 3879 if (ifnet_array->ifnet_count == 0) 3880 return 1; 3881 else 3882 return 0; 3883 } 3884 3885 void 3886 ifa_marker_init(struct ifaddr_marker *mark, struct ifnet *ifp) 3887 { 3888 struct ifaddr *ifa; 3889 3890 memset(mark, 0, sizeof(*mark)); 3891 ifa = &mark->ifa; 3892 3893 mark->ifac.ifa = ifa; 3894 3895 ifa->ifa_addr = &mark->addr; 3896 ifa->ifa_dstaddr = &mark->dstaddr; 3897 ifa->ifa_netmask = &mark->netmask; 3898 ifa->ifa_ifp = ifp; 3899 } 3900 3901 static int 3902 if_ringcnt_fixup(int ring_cnt, int ring_cntmax) 3903 { 3904 3905 KASSERT(ring_cntmax > 0, ("invalid ring count max %d", ring_cntmax)); 3906 3907 if (ring_cnt <= 0 || ring_cnt > ring_cntmax) 3908 ring_cnt = ring_cntmax; 3909 if (ring_cnt > netisr_ncpus) 3910 ring_cnt = netisr_ncpus; 3911 return (ring_cnt); 3912 } 3913 3914 static void 3915 if_ringmap_set_grid(device_t dev, struct if_ringmap *rm, int grid) 3916 { 3917 int i, offset; 3918 3919 KASSERT(grid > 0, ("invalid if_ringmap grid %d", grid)); 3920 KASSERT(grid >= rm->rm_cnt, ("invalid if_ringmap grid %d, count %d", 3921 grid, rm->rm_cnt)); 3922 rm->rm_grid = grid; 3923 3924 offset = (rm->rm_grid * device_get_unit(dev)) % netisr_ncpus; 3925 for (i = 0; i < rm->rm_cnt; ++i) { 3926 rm->rm_cpumap[i] = offset + i; 3927 KASSERT(rm->rm_cpumap[i] < netisr_ncpus, 3928 ("invalid cpumap[%d] = %d, offset %d", i, 3929 rm->rm_cpumap[i], offset)); 3930 } 3931 } 3932 3933 static struct if_ringmap * 3934 if_ringmap_alloc_flags(device_t dev, int ring_cnt, int ring_cntmax, 3935 uint32_t flags) 3936 { 3937 struct if_ringmap *rm; 3938 int i, grid = 0, prev_grid; 3939 3940 ring_cnt = if_ringcnt_fixup(ring_cnt, ring_cntmax); 3941 rm = kmalloc(__offsetof(struct if_ringmap, rm_cpumap[ring_cnt]), 3942 M_DEVBUF, M_WAITOK | M_ZERO); 3943 3944 rm->rm_cnt = ring_cnt; 3945 if (flags & RINGMAP_FLAG_POWEROF2) 3946 rm->rm_cnt = 1 << (fls(rm->rm_cnt) - 1); 3947 3948 prev_grid = netisr_ncpus; 3949 for (i = 0; i < netisr_ncpus; ++i) { 3950 if (netisr_ncpus % (i + 1) != 0) 3951 continue; 3952 3953 grid = netisr_ncpus / (i + 1); 3954 if (rm->rm_cnt > grid) { 3955 grid = prev_grid; 3956 break; 3957 } 3958 3959 if (rm->rm_cnt > netisr_ncpus / (i + 2)) 3960 break; 3961 prev_grid = grid; 3962 } 3963 if_ringmap_set_grid(dev, rm, grid); 3964 3965 return (rm); 3966 } 3967 3968 struct if_ringmap * 3969 if_ringmap_alloc(device_t dev, int ring_cnt, int ring_cntmax) 3970 { 3971 3972 return (if_ringmap_alloc_flags(dev, ring_cnt, ring_cntmax, 3973 RINGMAP_FLAG_NONE)); 3974 } 3975 3976 struct if_ringmap * 3977 if_ringmap_alloc2(device_t dev, int ring_cnt, int ring_cntmax) 3978 { 3979 3980 return (if_ringmap_alloc_flags(dev, ring_cnt, ring_cntmax, 3981 RINGMAP_FLAG_POWEROF2)); 3982 } 3983 3984 void 3985 if_ringmap_free(struct if_ringmap *rm) 3986 { 3987 3988 kfree(rm, M_DEVBUF); 3989 } 3990 3991 /* 3992 * Align the two ringmaps. 3993 * 3994 * e.g. 8 netisrs, rm0 contains 4 rings, rm1 contains 2 rings. 3995 * 3996 * Before: 3997 * 3998 * CPU 0 1 2 3 4 5 6 7 3999 * NIC_RX n0 n1 n2 n3 4000 * NIC_TX N0 N1 4001 * 4002 * After: 4003 * 4004 * CPU 0 1 2 3 4 5 6 7 4005 * NIC_RX n0 n1 n2 n3 4006 * NIC_TX N0 N1 4007 */ 4008 void 4009 if_ringmap_align(device_t dev, struct if_ringmap *rm0, struct if_ringmap *rm1) 4010 { 4011 4012 if (rm0->rm_grid > rm1->rm_grid) 4013 if_ringmap_set_grid(dev, rm1, rm0->rm_grid); 4014 else if (rm0->rm_grid < rm1->rm_grid) 4015 if_ringmap_set_grid(dev, rm0, rm1->rm_grid); 4016 } 4017 4018 void 4019 if_ringmap_match(device_t dev, struct if_ringmap *rm0, struct if_ringmap *rm1) 4020 { 4021 int subset_grid, cnt, divisor, mod, offset, i; 4022 struct if_ringmap *subset_rm, *rm; 4023 int old_rm0_grid, old_rm1_grid; 4024 4025 if (rm0->rm_grid == rm1->rm_grid) 4026 return; 4027 4028 /* Save grid for later use */ 4029 old_rm0_grid = rm0->rm_grid; 4030 old_rm1_grid = rm1->rm_grid; 4031 4032 if_ringmap_align(dev, rm0, rm1); 4033 4034 /* 4035 * Re-shuffle rings to get more even distribution. 4036 * 4037 * e.g. 12 netisrs, rm0 contains 4 rings, rm1 contains 2 rings. 4038 * 4039 * CPU 0 1 2 3 4 5 6 7 8 9 10 11 4040 * 4041 * NIC_RX a0 a1 a2 a3 b0 b1 b2 b3 c0 c1 c2 c3 4042 * NIC_TX A0 A1 B0 B1 C0 C1 4043 * 4044 * NIC_RX d0 d1 d2 d3 e0 e1 e2 e3 f0 f1 f2 f3 4045 * NIC_TX D0 D1 E0 E1 F0 F1 4046 */ 4047 4048 if (rm0->rm_cnt >= (2 * old_rm1_grid)) { 4049 cnt = rm0->rm_cnt; 4050 subset_grid = old_rm1_grid; 4051 subset_rm = rm1; 4052 rm = rm0; 4053 } else if (rm1->rm_cnt > (2 * old_rm0_grid)) { 4054 cnt = rm1->rm_cnt; 4055 subset_grid = old_rm0_grid; 4056 subset_rm = rm0; 4057 rm = rm1; 4058 } else { 4059 /* No space to shuffle. */ 4060 return; 4061 } 4062 4063 mod = cnt / subset_grid; 4064 KKASSERT(mod >= 2); 4065 divisor = netisr_ncpus / rm->rm_grid; 4066 offset = ((device_get_unit(dev) / divisor) % mod) * subset_grid; 4067 4068 for (i = 0; i < subset_rm->rm_cnt; ++i) { 4069 subset_rm->rm_cpumap[i] += offset; 4070 KASSERT(subset_rm->rm_cpumap[i] < netisr_ncpus, 4071 ("match: invalid cpumap[%d] = %d, offset %d", 4072 i, subset_rm->rm_cpumap[i], offset)); 4073 } 4074 #ifdef INVARIANTS 4075 for (i = 0; i < subset_rm->rm_cnt; ++i) { 4076 int j; 4077 4078 for (j = 0; j < rm->rm_cnt; ++j) { 4079 if (rm->rm_cpumap[j] == subset_rm->rm_cpumap[i]) 4080 break; 4081 } 4082 KASSERT(j < rm->rm_cnt, 4083 ("subset cpumap[%d] = %d not found in superset", 4084 i, subset_rm->rm_cpumap[i])); 4085 } 4086 #endif 4087 } 4088 4089 int 4090 if_ringmap_count(const struct if_ringmap *rm) 4091 { 4092 4093 return (rm->rm_cnt); 4094 } 4095 4096 int 4097 if_ringmap_cpumap(const struct if_ringmap *rm, int ring) 4098 { 4099 4100 KASSERT(ring >= 0 && ring < rm->rm_cnt, ("invalid ring %d", ring)); 4101 return (rm->rm_cpumap[ring]); 4102 } 4103 4104 void 4105 if_ringmap_rdrtable(const struct if_ringmap *rm, int table[], int table_nent) 4106 { 4107 int i, grid_idx, grid_cnt, patch_off, patch_cnt, ncopy; 4108 4109 KASSERT(table_nent > 0 && (table_nent & NETISR_CPUMASK) == 0, 4110 ("invalid redirect table entries %d", table_nent)); 4111 4112 grid_idx = 0; 4113 for (i = 0; i < NETISR_CPUMAX; ++i) { 4114 table[i] = grid_idx++ % rm->rm_cnt; 4115 4116 if (grid_idx == rm->rm_grid) 4117 grid_idx = 0; 4118 } 4119 4120 /* 4121 * Make the ring distributed more evenly for the remainder 4122 * of each grid. 4123 * 4124 * e.g. 12 netisrs, rm contains 8 rings. 4125 * 4126 * Redirect table before: 4127 * 4128 * 0 1 2 3 4 5 6 7 0 1 2 3 0 1 2 3 4129 * 4 5 6 7 0 1 2 3 0 1 2 3 4 5 6 7 4130 * 0 1 2 3 0 1 2 3 4 5 6 7 0 1 2 3 4131 * .... 4132 * 4133 * Redirect table after being patched (pX, patched entries): 4134 * 4135 * 0 1 2 3 4 5 6 7 p0 p1 p2 p3 0 1 2 3 4136 * 4 5 6 7 p4 p5 p6 p7 0 1 2 3 4 5 6 7 4137 * p0 p1 p2 p3 0 1 2 3 4 5 6 7 p4 p5 p6 p7 4138 * .... 4139 */ 4140 patch_cnt = rm->rm_grid % rm->rm_cnt; 4141 if (patch_cnt == 0) 4142 goto done; 4143 patch_off = rm->rm_grid - (rm->rm_grid % rm->rm_cnt); 4144 4145 grid_cnt = roundup(NETISR_CPUMAX, rm->rm_grid) / rm->rm_grid; 4146 grid_idx = 0; 4147 for (i = 0; i < grid_cnt; ++i) { 4148 int j; 4149 4150 for (j = 0; j < patch_cnt; ++j) { 4151 int fix_idx; 4152 4153 fix_idx = (i * rm->rm_grid) + patch_off + j; 4154 if (fix_idx >= NETISR_CPUMAX) 4155 goto done; 4156 table[fix_idx] = grid_idx++ % rm->rm_cnt; 4157 } 4158 } 4159 done: 4160 /* 4161 * If the device supports larger redirect table, duplicate 4162 * the first NETISR_CPUMAX entries to the rest of the table, 4163 * so that it matches upper layer's expectation: 4164 * (hash & NETISR_CPUMASK) % netisr_ncpus 4165 */ 4166 ncopy = table_nent / NETISR_CPUMAX; 4167 for (i = 1; i < ncopy; ++i) { 4168 memcpy(&table[i * NETISR_CPUMAX], table, 4169 NETISR_CPUMAX * sizeof(table[0])); 4170 } 4171 if (if_ringmap_dumprdr) { 4172 for (i = 0; i < table_nent; ++i) { 4173 if (i != 0 && i % 16 == 0) 4174 kprintf("\n"); 4175 kprintf("%03d ", table[i]); 4176 } 4177 kprintf("\n"); 4178 } 4179 } 4180 4181 int 4182 if_ringmap_cpumap_sysctl(SYSCTL_HANDLER_ARGS) 4183 { 4184 struct if_ringmap *rm = arg1; 4185 int i, error = 0; 4186 4187 for (i = 0; i < rm->rm_cnt; ++i) { 4188 int cpu = rm->rm_cpumap[i]; 4189 4190 error = SYSCTL_OUT(req, &cpu, sizeof(cpu)); 4191 if (error) 4192 break; 4193 } 4194 return (error); 4195 } 4196