1 /* 2 * Copyright (c) 1980, 1986, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)if.c 8.3 (Berkeley) 1/4/94 30 * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $ 31 */ 32 33 #include "opt_inet6.h" 34 #include "opt_inet.h" 35 #include "opt_ifpoll.h" 36 37 #include <sys/param.h> 38 #include <sys/malloc.h> 39 #include <sys/mbuf.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/caps.h> 43 #include <sys/protosw.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/socketops.h> 47 #include <sys/kernel.h> 48 #include <sys/ktr.h> 49 #include <sys/mutex.h> 50 #include <sys/lock.h> 51 #include <sys/sockio.h> 52 #include <sys/syslog.h> 53 #include <sys/sysctl.h> 54 #include <sys/domain.h> 55 #include <sys/thread.h> 56 #include <sys/serialize.h> 57 #include <sys/bus.h> 58 #include <sys/jail.h> 59 60 #include <sys/thread2.h> 61 #include <sys/msgport2.h> 62 #include <sys/mutex2.h> 63 64 #include <net/if.h> 65 #include <net/if_arp.h> 66 #include <net/if_dl.h> 67 #include <net/if_types.h> 68 #include <net/if_var.h> 69 #include <net/if_ringmap.h> 70 #include <net/ifq_var.h> 71 #include <net/radix.h> 72 #include <net/route.h> 73 #include <net/if_clone.h> 74 #include <net/netisr2.h> 75 #include <net/netmsg2.h> 76 77 #include <machine/atomic.h> 78 #include <machine/stdarg.h> 79 #include <machine/smp.h> 80 81 #if defined(INET) || defined(INET6) 82 #include <netinet/in.h> 83 #include <netinet/in_var.h> 84 #include <netinet/if_ether.h> 85 #ifdef INET6 86 #include <netinet6/in6_var.h> 87 #include <netinet6/in6_ifattach.h> 88 #endif /* INET6 */ 89 #endif /* INET || INET6 */ 90 91 struct netmsg_ifaddr { 92 struct netmsg_base base; 93 struct ifaddr *ifa; 94 struct ifnet *ifp; 95 int tail; 96 }; 97 98 struct ifsubq_stage_head { 99 TAILQ_HEAD(, ifsubq_stage) stg_head; 100 } __cachealign; 101 102 struct if_ringmap { 103 int rm_cnt; 104 int rm_grid; 105 int rm_cpumap[]; 106 }; 107 108 #define RINGMAP_FLAG_NONE 0x0 109 #define RINGMAP_FLAG_POWEROF2 0x1 110 111 /* 112 * System initialization 113 */ 114 static void if_attachdomain(void *); 115 static void if_attachdomain1(struct ifnet *); 116 static int ifconf(u_long, caddr_t, struct ucred *); 117 static void ifinit(void *); 118 static void ifnetinit(void *); 119 static void if_slowtimo(void *); 120 static void link_rtrequest(int, struct rtentry *); 121 static int if_rtdel(struct radix_node *, void *); 122 static void if_slowtimo_dispatch(netmsg_t); 123 124 /* Helper functions */ 125 static void ifsq_watchdog_reset(struct ifsubq_watchdog *); 126 static int if_delmulti_serialized(struct ifnet *, struct sockaddr *); 127 static struct ifnet_array *ifnet_array_alloc(int); 128 static void ifnet_array_free(struct ifnet_array *); 129 static struct ifnet_array *ifnet_array_add(struct ifnet *, 130 const struct ifnet_array *); 131 static struct ifnet_array *ifnet_array_del(struct ifnet *, 132 const struct ifnet_array *); 133 static struct ifg_group *if_creategroup(const char *); 134 static int if_destroygroup(struct ifg_group *); 135 static int if_delgroup_locked(struct ifnet *, const char *); 136 static int if_getgroups(struct ifgroupreq *, struct ifnet *); 137 static int if_getgroupmembers(struct ifgroupreq *); 138 139 #ifdef INET6 140 /* 141 * XXX: declare here to avoid to include many inet6 related files.. 142 * should be more generalized? 143 */ 144 extern void nd6_setmtu(struct ifnet *); 145 #endif 146 147 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); 148 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); 149 SYSCTL_NODE(_net_link, OID_AUTO, ringmap, CTLFLAG_RW, 0, "link ringmap"); 150 151 static int ifsq_stage_cntmax = 16; 152 TUNABLE_INT("net.link.stage_cntmax", &ifsq_stage_cntmax); 153 SYSCTL_INT(_net_link, OID_AUTO, stage_cntmax, CTLFLAG_RW, 154 &ifsq_stage_cntmax, 0, "ifq staging packet count max"); 155 156 static int if_stats_compat = 0; 157 SYSCTL_INT(_net_link, OID_AUTO, stats_compat, CTLFLAG_RW, 158 &if_stats_compat, 0, "Compat the old ifnet stats"); 159 160 static int if_ringmap_dumprdr = 0; 161 SYSCTL_INT(_net_link_ringmap, OID_AUTO, dump_rdr, CTLFLAG_RW, 162 &if_ringmap_dumprdr, 0, "dump redirect table"); 163 164 /* Interface description */ 165 static unsigned int ifdescr_maxlen = 1024; 166 SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW, 167 &ifdescr_maxlen, 0, 168 "administrative maximum length for interface description"); 169 170 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL); 171 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, ifnetinit, NULL); 172 173 static if_com_alloc_t *if_com_alloc[256]; 174 static if_com_free_t *if_com_free[256]; 175 176 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); 177 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); 178 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure"); 179 MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions"); 180 181 int ifqmaxlen = IFQ_MAXLEN; 182 struct ifnethead ifnet = TAILQ_HEAD_INITIALIZER(ifnet); 183 struct ifgrouphead ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head); 184 static struct lock ifgroup_lock; 185 186 static struct ifnet_array ifnet_array0; 187 static struct ifnet_array *ifnet_array = &ifnet_array0; 188 189 static struct callout if_slowtimo_timer; 190 static struct netmsg_base if_slowtimo_netmsg; 191 192 int if_index = 0; 193 struct ifnet **ifindex2ifnet = NULL; 194 static struct mtx ifnet_mtx = MTX_INITIALIZER("ifnet"); 195 196 static struct ifsubq_stage_head ifsubq_stage_heads[MAXCPU]; 197 198 #ifdef notyet 199 #define IFQ_KTR_STRING "ifq=%p" 200 #define IFQ_KTR_ARGS struct ifaltq *ifq 201 #ifndef KTR_IFQ 202 #define KTR_IFQ KTR_ALL 203 #endif 204 KTR_INFO_MASTER(ifq); 205 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS); 206 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS); 207 #define logifq(name, arg) KTR_LOG(ifq_ ## name, arg) 208 209 #define IF_START_KTR_STRING "ifp=%p" 210 #define IF_START_KTR_ARGS struct ifnet *ifp 211 #ifndef KTR_IF_START 212 #define KTR_IF_START KTR_ALL 213 #endif 214 KTR_INFO_MASTER(if_start); 215 KTR_INFO(KTR_IF_START, if_start, run, 0, 216 IF_START_KTR_STRING, IF_START_KTR_ARGS); 217 KTR_INFO(KTR_IF_START, if_start, sched, 1, 218 IF_START_KTR_STRING, IF_START_KTR_ARGS); 219 KTR_INFO(KTR_IF_START, if_start, avoid, 2, 220 IF_START_KTR_STRING, IF_START_KTR_ARGS); 221 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3, 222 IF_START_KTR_STRING, IF_START_KTR_ARGS); 223 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4, 224 IF_START_KTR_STRING, IF_START_KTR_ARGS); 225 #define logifstart(name, arg) KTR_LOG(if_start_ ## name, arg) 226 #endif /* notyet */ 227 228 /* 229 * Network interface utility routines. 230 * 231 * Routines with ifa_ifwith* names take sockaddr *'s as 232 * parameters. 233 */ 234 /* ARGSUSED */ 235 static void 236 ifinit(void *dummy) 237 { 238 lockinit(&ifgroup_lock, "ifgroup", 0, 0); 239 240 callout_init_mp(&if_slowtimo_timer); 241 netmsg_init(&if_slowtimo_netmsg, NULL, &netisr_adone_rport, 242 MSGF_PRIORITY, if_slowtimo_dispatch); 243 244 /* Start if_slowtimo */ 245 lwkt_sendmsg(netisr_cpuport(0), &if_slowtimo_netmsg.lmsg); 246 } 247 248 static void 249 ifsq_ifstart_ipifunc(void *arg) 250 { 251 struct ifaltq_subque *ifsq = arg; 252 struct lwkt_msg *lmsg = ifsq_get_ifstart_lmsg(ifsq, mycpuid); 253 254 crit_enter(); 255 if (lmsg->ms_flags & MSGF_DONE) 256 lwkt_sendmsg_oncpu(netisr_cpuport(mycpuid), lmsg); 257 crit_exit(); 258 } 259 260 static __inline void 261 ifsq_stage_remove(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) 262 { 263 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); 264 TAILQ_REMOVE(&head->stg_head, stage, stg_link); 265 stage->stg_flags &= ~(IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED); 266 stage->stg_cnt = 0; 267 stage->stg_len = 0; 268 } 269 270 static __inline void 271 ifsq_stage_insert(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) 272 { 273 KKASSERT((stage->stg_flags & 274 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); 275 stage->stg_flags |= IFSQ_STAGE_FLAG_QUED; 276 TAILQ_INSERT_TAIL(&head->stg_head, stage, stg_link); 277 } 278 279 /* 280 * Schedule ifnet.if_start on the subqueue owner CPU 281 */ 282 static void 283 ifsq_ifstart_schedule(struct ifaltq_subque *ifsq, int force) 284 { 285 int cpu; 286 287 if (!force && curthread->td_type == TD_TYPE_NETISR && 288 ifsq_stage_cntmax > 0) { 289 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); 290 291 stage->stg_cnt = 0; 292 stage->stg_len = 0; 293 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) 294 ifsq_stage_insert(&ifsubq_stage_heads[mycpuid], stage); 295 stage->stg_flags |= IFSQ_STAGE_FLAG_SCHED; 296 return; 297 } 298 299 cpu = ifsq_get_cpuid(ifsq); 300 if (cpu != mycpuid) 301 lwkt_send_ipiq(globaldata_find(cpu), ifsq_ifstart_ipifunc, ifsq); 302 else 303 ifsq_ifstart_ipifunc(ifsq); 304 } 305 306 /* 307 * NOTE: 308 * This function will release ifnet.if_start subqueue interlock, 309 * if ifnet.if_start for the subqueue does not need to be scheduled 310 */ 311 static __inline int 312 ifsq_ifstart_need_schedule(struct ifaltq_subque *ifsq, int running) 313 { 314 if (!running || ifsq_is_empty(ifsq) 315 #ifdef ALTQ 316 || ifsq->ifsq_altq->altq_tbr != NULL 317 #endif 318 ) { 319 ALTQ_SQ_LOCK(ifsq); 320 /* 321 * ifnet.if_start subqueue interlock is released, if: 322 * 1) Hardware can not take any packets, due to 323 * o interface is marked down 324 * o hardware queue is full (ifsq_is_oactive) 325 * Under the second situation, hardware interrupt 326 * or polling(4) will call/schedule ifnet.if_start 327 * on the subqueue when hardware queue is ready 328 * 2) There is no packet in the subqueue. 329 * Further ifq_dispatch or ifq_handoff will call/ 330 * schedule ifnet.if_start on the subqueue. 331 * 3) TBR is used and it does not allow further 332 * dequeueing. 333 * TBR callout will call ifnet.if_start on the 334 * subqueue. 335 */ 336 if (!running || !ifsq_data_ready(ifsq)) { 337 ifsq_clr_started(ifsq); 338 ALTQ_SQ_UNLOCK(ifsq); 339 return 0; 340 } 341 ALTQ_SQ_UNLOCK(ifsq); 342 } 343 return 1; 344 } 345 346 static void 347 ifsq_ifstart_dispatch(netmsg_t msg) 348 { 349 struct lwkt_msg *lmsg = &msg->base.lmsg; 350 struct ifaltq_subque *ifsq = lmsg->u.ms_resultp; 351 struct ifnet *ifp = ifsq_get_ifp(ifsq); 352 struct globaldata *gd = mycpu; 353 int running = 0, need_sched; 354 355 crit_enter_gd(gd); 356 357 lwkt_replymsg(lmsg, 0); /* reply ASAP */ 358 359 if (gd->gd_cpuid != ifsq_get_cpuid(ifsq)) { 360 /* 361 * We need to chase the subqueue owner CPU change. 362 */ 363 ifsq_ifstart_schedule(ifsq, 1); 364 crit_exit_gd(gd); 365 return; 366 } 367 368 ifsq_serialize_hw(ifsq); 369 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { 370 ifp->if_start(ifp, ifsq); 371 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 372 running = 1; 373 } 374 need_sched = ifsq_ifstart_need_schedule(ifsq, running); 375 ifsq_deserialize_hw(ifsq); 376 377 if (need_sched) { 378 /* 379 * More data need to be transmitted, ifnet.if_start is 380 * scheduled on the subqueue owner CPU, and we keep going. 381 * NOTE: ifnet.if_start subqueue interlock is not released. 382 */ 383 ifsq_ifstart_schedule(ifsq, 0); 384 } 385 386 crit_exit_gd(gd); 387 } 388 389 /* Device driver ifnet.if_start helper function */ 390 void 391 ifsq_devstart(struct ifaltq_subque *ifsq) 392 { 393 struct ifnet *ifp = ifsq_get_ifp(ifsq); 394 int running = 0; 395 396 ASSERT_ALTQ_SQ_SERIALIZED_HW(ifsq); 397 398 ALTQ_SQ_LOCK(ifsq); 399 if (ifsq_is_started(ifsq) || !ifsq_data_ready(ifsq)) { 400 ALTQ_SQ_UNLOCK(ifsq); 401 return; 402 } 403 ifsq_set_started(ifsq); 404 ALTQ_SQ_UNLOCK(ifsq); 405 406 ifp->if_start(ifp, ifsq); 407 408 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 409 running = 1; 410 411 if (ifsq_ifstart_need_schedule(ifsq, running)) { 412 /* 413 * More data need to be transmitted, ifnet.if_start is 414 * scheduled on ifnet's CPU, and we keep going. 415 * NOTE: ifnet.if_start interlock is not released. 416 */ 417 ifsq_ifstart_schedule(ifsq, 0); 418 } 419 } 420 421 void 422 if_devstart(struct ifnet *ifp) 423 { 424 ifsq_devstart(ifq_get_subq_default(&ifp->if_snd)); 425 } 426 427 /* Device driver ifnet.if_start schedule helper function */ 428 void 429 ifsq_devstart_sched(struct ifaltq_subque *ifsq) 430 { 431 ifsq_ifstart_schedule(ifsq, 1); 432 } 433 434 void 435 if_devstart_sched(struct ifnet *ifp) 436 { 437 ifsq_devstart_sched(ifq_get_subq_default(&ifp->if_snd)); 438 } 439 440 static void 441 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 442 { 443 lwkt_serialize_enter(ifp->if_serializer); 444 } 445 446 static void 447 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 448 { 449 lwkt_serialize_exit(ifp->if_serializer); 450 } 451 452 static int 453 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 454 { 455 return lwkt_serialize_try(ifp->if_serializer); 456 } 457 458 #ifdef INVARIANTS 459 static void 460 if_default_serialize_assert(struct ifnet *ifp, 461 enum ifnet_serialize slz __unused, 462 boolean_t serialized) 463 { 464 if (serialized) 465 ASSERT_SERIALIZED(ifp->if_serializer); 466 else 467 ASSERT_NOT_SERIALIZED(ifp->if_serializer); 468 } 469 #endif 470 471 /* 472 * Attach an interface to the list of "active" interfaces. 473 * 474 * The serializer is optional. 475 */ 476 void 477 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer) 478 { 479 unsigned socksize; 480 int namelen, masklen; 481 struct sockaddr_dl *sdl, *sdl_addr; 482 struct ifaddr *ifa; 483 struct ifaltq *ifq; 484 struct ifnet **old_ifindex2ifnet = NULL; 485 struct ifnet_array *old_ifnet_array; 486 int i, q, qlen; 487 char qlenname[64]; 488 489 static int if_indexlim = 8; 490 491 if (ifp->if_serialize != NULL) { 492 KASSERT(ifp->if_deserialize != NULL && 493 ifp->if_tryserialize != NULL && 494 ifp->if_serialize_assert != NULL, 495 ("serialize functions are partially setup")); 496 497 /* 498 * If the device supplies serialize functions, 499 * then clear if_serializer to catch any invalid 500 * usage of this field. 501 */ 502 KASSERT(serializer == NULL, 503 ("both serialize functions and default serializer " 504 "are supplied")); 505 ifp->if_serializer = NULL; 506 } else { 507 KASSERT(ifp->if_deserialize == NULL && 508 ifp->if_tryserialize == NULL && 509 ifp->if_serialize_assert == NULL, 510 ("serialize functions are partially setup")); 511 ifp->if_serialize = if_default_serialize; 512 ifp->if_deserialize = if_default_deserialize; 513 ifp->if_tryserialize = if_default_tryserialize; 514 #ifdef INVARIANTS 515 ifp->if_serialize_assert = if_default_serialize_assert; 516 #endif 517 518 /* 519 * The serializer can be passed in from the device, 520 * allowing the same serializer to be used for both 521 * the interrupt interlock and the device queue. 522 * If not specified, the netif structure will use an 523 * embedded serializer. 524 */ 525 if (serializer == NULL) { 526 serializer = &ifp->if_default_serializer; 527 lwkt_serialize_init(serializer); 528 } 529 ifp->if_serializer = serializer; 530 } 531 532 /* 533 * Make if_addrhead available on all CPUs, since they 534 * could be accessed by any threads. 535 */ 536 ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead), 537 M_IFADDR, M_WAITOK | M_ZERO); 538 for (i = 0; i < ncpus; ++i) 539 TAILQ_INIT(&ifp->if_addrheads[i]); 540 541 TAILQ_INIT(&ifp->if_multiaddrs); 542 TAILQ_INIT(&ifp->if_groups); 543 getmicrotime(&ifp->if_lastchange); 544 if_addgroup(ifp, IFG_ALL); 545 546 /* 547 * create a Link Level name for this device 548 */ 549 namelen = strlen(ifp->if_xname); 550 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; 551 socksize = masklen + ifp->if_addrlen; 552 if (socksize < sizeof(*sdl)) 553 socksize = sizeof(*sdl); 554 socksize = RT_ROUNDUP(socksize); 555 ifa = ifa_create(sizeof(struct ifaddr) + 2 * socksize); 556 sdl = sdl_addr = (struct sockaddr_dl *)(ifa + 1); 557 sdl->sdl_len = socksize; 558 sdl->sdl_family = AF_LINK; 559 bcopy(ifp->if_xname, sdl->sdl_data, namelen); 560 sdl->sdl_nlen = namelen; 561 sdl->sdl_type = ifp->if_type; 562 ifp->if_lladdr = ifa; 563 ifa->ifa_ifp = ifp; 564 ifa->ifa_rtrequest = link_rtrequest; 565 ifa->ifa_addr = (struct sockaddr *)sdl; 566 sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); 567 ifa->ifa_netmask = (struct sockaddr *)sdl; 568 sdl->sdl_len = masklen; 569 while (namelen != 0) 570 sdl->sdl_data[--namelen] = 0xff; 571 ifa_iflink(ifa, ifp, 0 /* Insert head */); 572 573 /* 574 * Make if_data available on all CPUs, since they could 575 * be updated by hardware interrupt routing, which could 576 * be bound to any CPU. 577 */ 578 ifp->if_data_pcpu = kmalloc(ncpus * sizeof(struct ifdata_pcpu), 579 M_DEVBUF, 580 M_WAITOK | M_ZERO | M_CACHEALIGN); 581 582 if (ifp->if_mapsubq == NULL) 583 ifp->if_mapsubq = ifq_mapsubq_default; 584 585 ifq = &ifp->if_snd; 586 ifq->altq_type = 0; 587 ifq->altq_disc = NULL; 588 ifq->altq_flags &= ALTQF_CANTCHANGE; 589 ifq->altq_tbr = NULL; 590 ifq->altq_ifp = ifp; 591 592 if (ifq->altq_subq_cnt <= 0) 593 ifq->altq_subq_cnt = 1; 594 ifq->altq_subq = 595 kmalloc(ifq->altq_subq_cnt * sizeof(struct ifaltq_subque), 596 M_DEVBUF, 597 M_WAITOK | M_ZERO | M_CACHEALIGN); 598 599 if (ifq->altq_maxlen == 0) { 600 if_printf(ifp, "driver didn't set altq_maxlen\n"); 601 ifq_set_maxlen(ifq, ifqmaxlen); 602 } 603 604 /* Allow user to override driver's setting. */ 605 ksnprintf(qlenname, sizeof(qlenname), "net.%s.qlenmax", ifp->if_xname); 606 qlen = -1; 607 TUNABLE_INT_FETCH(qlenname, &qlen); 608 if (qlen > 0) { 609 if_printf(ifp, "qlenmax -> %d\n", qlen); 610 ifq_set_maxlen(ifq, qlen); 611 } 612 613 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 614 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 615 616 ALTQ_SQ_LOCK_INIT(ifsq); 617 ifsq->ifsq_index = q; 618 619 ifsq->ifsq_altq = ifq; 620 ifsq->ifsq_ifp = ifp; 621 622 ifsq->ifsq_maxlen = ifq->altq_maxlen; 623 ifsq->ifsq_maxbcnt = ifsq->ifsq_maxlen * MCLBYTES; 624 ifsq->ifsq_prepended = NULL; 625 ifsq->ifsq_started = 0; 626 ifsq->ifsq_hw_oactive = 0; 627 ifsq_set_cpuid(ifsq, 0); 628 if (ifp->if_serializer != NULL) 629 ifsq_set_hw_serialize(ifsq, ifp->if_serializer); 630 631 /* XXX: netisr_ncpus */ 632 ifsq->ifsq_stage = 633 kmalloc(ncpus * sizeof(struct ifsubq_stage), 634 M_DEVBUF, 635 M_WAITOK | M_ZERO | M_CACHEALIGN); 636 for (i = 0; i < ncpus; ++i) 637 ifsq->ifsq_stage[i].stg_subq = ifsq; 638 639 /* 640 * Allocate one if_start message for each CPU, since 641 * the hardware TX ring could be assigned to any CPU. 642 * 643 * NOTE: 644 * If the hardware TX ring polling CPU and the hardware 645 * TX ring interrupt CPU are same, one if_start message 646 * should be enough. 647 */ 648 ifsq->ifsq_ifstart_nmsg = 649 kmalloc(ncpus * sizeof(struct netmsg_base), 650 M_LWKTMSG, M_WAITOK); 651 for (i = 0; i < ncpus; ++i) { 652 netmsg_init(&ifsq->ifsq_ifstart_nmsg[i], NULL, 653 &netisr_adone_rport, 0, ifsq_ifstart_dispatch); 654 ifsq->ifsq_ifstart_nmsg[i].lmsg.u.ms_resultp = ifsq; 655 } 656 } 657 ifq_set_classic(ifq); 658 659 /* 660 * Increase mbuf cluster/jcluster limits for the mbufs that 661 * could sit on the device queues for quite some time. 662 */ 663 if (ifp->if_nmbclusters > 0) 664 mcl_inclimit(ifp->if_nmbclusters); 665 if (ifp->if_nmbjclusters > 0) 666 mjcl_inclimit(ifp->if_nmbjclusters); 667 668 /* 669 * Install this ifp into ifindex2inet, ifnet queue and ifnet 670 * array after it is setup. 671 * 672 * Protect ifindex2ifnet, ifnet queue and ifnet array changes 673 * by ifnet lock, so that non-netisr threads could get a 674 * consistent view. 675 */ 676 ifnet_lock(); 677 678 /* Don't update if_index until ifindex2ifnet is setup */ 679 ifp->if_index = if_index + 1; 680 sdl_addr->sdl_index = ifp->if_index; 681 682 /* 683 * Install this ifp into ifindex2ifnet 684 */ 685 if (ifindex2ifnet == NULL || ifp->if_index >= if_indexlim) { 686 unsigned int n; 687 struct ifnet **q; 688 689 /* 690 * Grow ifindex2ifnet 691 */ 692 if_indexlim <<= 1; 693 n = if_indexlim * sizeof(*q); 694 q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO); 695 if (ifindex2ifnet != NULL) { 696 bcopy(ifindex2ifnet, q, n/2); 697 /* Free old ifindex2ifnet after sync all netisrs */ 698 old_ifindex2ifnet = ifindex2ifnet; 699 } 700 ifindex2ifnet = q; 701 } 702 ifindex2ifnet[ifp->if_index] = ifp; 703 /* 704 * Update if_index after this ifp is installed into ifindex2ifnet, 705 * so that netisrs could get a consistent view of ifindex2ifnet. 706 */ 707 cpu_sfence(); 708 if_index = ifp->if_index; 709 710 /* 711 * Install this ifp into ifnet array. 712 */ 713 /* Free old ifnet array after sync all netisrs */ 714 old_ifnet_array = ifnet_array; 715 ifnet_array = ifnet_array_add(ifp, old_ifnet_array); 716 717 /* 718 * Install this ifp into ifnet queue. 719 */ 720 TAILQ_INSERT_TAIL(&ifnetlist, ifp, if_link); 721 722 ifnet_unlock(); 723 724 /* 725 * Sync all netisrs so that the old ifindex2ifnet and ifnet array 726 * are no longer accessed and we can free them safely later on. 727 */ 728 netmsg_service_sync(); 729 if (old_ifindex2ifnet != NULL) 730 kfree(old_ifindex2ifnet, M_IFADDR); 731 ifnet_array_free(old_ifnet_array); 732 733 if (!SLIST_EMPTY(&domains)) 734 if_attachdomain1(ifp); 735 736 /* Announce the interface. */ 737 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 738 devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); 739 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 740 } 741 742 static void 743 if_attachdomain(void *dummy) 744 { 745 struct ifnet *ifp; 746 747 ifnet_lock(); 748 TAILQ_FOREACH(ifp, &ifnetlist, if_list) 749 if_attachdomain1(ifp); 750 ifnet_unlock(); 751 } 752 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, 753 if_attachdomain, NULL); 754 755 static void 756 if_attachdomain1(struct ifnet *ifp) 757 { 758 struct domain *dp; 759 760 crit_enter(); 761 762 /* address family dependent data region */ 763 bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); 764 SLIST_FOREACH(dp, &domains, dom_next) 765 if (dp->dom_ifattach) 766 ifp->if_afdata[dp->dom_family] = 767 (*dp->dom_ifattach)(ifp); 768 crit_exit(); 769 } 770 771 /* 772 * Purge all addresses whose type is _not_ AF_LINK 773 */ 774 static void 775 if_purgeaddrs_nolink_dispatch(netmsg_t nmsg) 776 { 777 struct ifnet *ifp = nmsg->lmsg.u.ms_resultp; 778 struct ifaddr_container *ifac, *next; 779 780 ASSERT_NETISR0; 781 782 /* 783 * The ifaddr processing in the following loop will block, 784 * however, this function is called in netisr0, in which 785 * ifaddr list changes happen, so we don't care about the 786 * blockness of the ifaddr processing here. 787 */ 788 TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid], 789 ifa_link, next) { 790 struct ifaddr *ifa = ifac->ifa; 791 792 /* Ignore marker */ 793 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 794 continue; 795 796 /* Leave link ifaddr as it is */ 797 if (ifa->ifa_addr->sa_family == AF_LINK) 798 continue; 799 #ifdef INET 800 /* XXX: Ugly!! ad hoc just for INET */ 801 if (ifa->ifa_addr->sa_family == AF_INET) { 802 struct ifaliasreq ifr; 803 struct sockaddr_in saved_addr, saved_dst; 804 #ifdef IFADDR_DEBUG_VERBOSE 805 int i; 806 807 kprintf("purge in4 addr %p: ", ifa); 808 for (i = 0; i < ncpus; ++i) { 809 kprintf("%d ", 810 ifa->ifa_containers[i].ifa_refcnt); 811 } 812 kprintf("\n"); 813 #endif 814 815 /* Save information for panic. */ 816 memcpy(&saved_addr, ifa->ifa_addr, sizeof(saved_addr)); 817 if (ifa->ifa_dstaddr != NULL) { 818 memcpy(&saved_dst, ifa->ifa_dstaddr, 819 sizeof(saved_dst)); 820 } else { 821 memset(&saved_dst, 0, sizeof(saved_dst)); 822 } 823 824 bzero(&ifr, sizeof ifr); 825 ifr.ifra_addr = *ifa->ifa_addr; 826 if (ifa->ifa_dstaddr) 827 ifr.ifra_broadaddr = *ifa->ifa_dstaddr; 828 if (in_control(SIOCDIFADDR, (caddr_t)&ifr, ifp, 829 NULL) == 0) 830 continue; 831 832 /* MUST NOT HAPPEN */ 833 panic("%s: in_control failed %x, dst %x", ifp->if_xname, 834 ntohl(saved_addr.sin_addr.s_addr), 835 ntohl(saved_dst.sin_addr.s_addr)); 836 } 837 #endif /* INET */ 838 #ifdef INET6 839 if (ifa->ifa_addr->sa_family == AF_INET6) { 840 #ifdef IFADDR_DEBUG_VERBOSE 841 int i; 842 843 kprintf("purge in6 addr %p: ", ifa); 844 for (i = 0; i < ncpus; ++i) { 845 kprintf("%d ", 846 ifa->ifa_containers[i].ifa_refcnt); 847 } 848 kprintf("\n"); 849 #endif 850 851 in6_purgeaddr(ifa); 852 /* ifp_addrhead is already updated */ 853 continue; 854 } 855 #endif /* INET6 */ 856 if_printf(ifp, "destroy ifaddr family %d\n", 857 ifa->ifa_addr->sa_family); 858 ifa_ifunlink(ifa, ifp); 859 ifa_destroy(ifa); 860 } 861 862 netisr_replymsg(&nmsg->base, 0); 863 } 864 865 void 866 if_purgeaddrs_nolink(struct ifnet *ifp) 867 { 868 struct netmsg_base nmsg; 869 870 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, 871 if_purgeaddrs_nolink_dispatch); 872 nmsg.lmsg.u.ms_resultp = ifp; 873 netisr_domsg(&nmsg, 0); 874 } 875 876 static void 877 ifq_stage_detach_handler(netmsg_t nmsg) 878 { 879 struct ifaltq *ifq = nmsg->lmsg.u.ms_resultp; 880 int q; 881 882 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 883 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 884 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); 885 886 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) 887 ifsq_stage_remove(&ifsubq_stage_heads[mycpuid], stage); 888 } 889 lwkt_replymsg(&nmsg->lmsg, 0); 890 } 891 892 static void 893 ifq_stage_detach(struct ifaltq *ifq) 894 { 895 struct netmsg_base base; 896 int cpu; 897 898 netmsg_init(&base, NULL, &curthread->td_msgport, 0, 899 ifq_stage_detach_handler); 900 base.lmsg.u.ms_resultp = ifq; 901 902 /* XXX netisr_ncpus */ 903 for (cpu = 0; cpu < ncpus; ++cpu) 904 lwkt_domsg(netisr_cpuport(cpu), &base.lmsg, 0); 905 } 906 907 struct netmsg_if_rtdel { 908 struct netmsg_base base; 909 struct ifnet *ifp; 910 }; 911 912 static void 913 if_rtdel_dispatch(netmsg_t msg) 914 { 915 struct netmsg_if_rtdel *rmsg = (void *)msg; 916 int i, cpu; 917 918 cpu = mycpuid; 919 ASSERT_NETISR_NCPUS(cpu); 920 921 for (i = 1; i <= AF_MAX; i++) { 922 struct radix_node_head *rnh; 923 924 if ((rnh = rt_tables[cpu][i]) == NULL) 925 continue; 926 rnh->rnh_walktree(rnh, if_rtdel, rmsg->ifp); 927 } 928 netisr_forwardmsg(&msg->base, cpu + 1); 929 } 930 931 /* 932 * Detach an interface, removing it from the 933 * list of "active" interfaces. 934 */ 935 void 936 if_detach(struct ifnet *ifp) 937 { 938 struct ifnet_array *old_ifnet_array; 939 struct ifg_list *ifgl; 940 struct netmsg_if_rtdel msg; 941 struct domain *dp; 942 int q; 943 944 /* Announce that the interface is gone. */ 945 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 946 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 947 devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); 948 949 /* 950 * Remove this ifp from ifindex2inet, ifnet queue and ifnet 951 * array before it is whacked. 952 * 953 * Protect ifindex2ifnet, ifnet queue and ifnet array changes 954 * by ifnet lock, so that non-netisr threads could get a 955 * consistent view. 956 */ 957 ifnet_lock(); 958 959 /* 960 * Remove this ifp from ifindex2ifnet and maybe decrement if_index. 961 */ 962 ifindex2ifnet[ifp->if_index] = NULL; 963 while (if_index > 0 && ifindex2ifnet[if_index] == NULL) 964 if_index--; 965 966 /* 967 * Remove this ifp from ifnet queue. 968 */ 969 TAILQ_REMOVE(&ifnetlist, ifp, if_link); 970 971 /* 972 * Remove this ifp from ifnet array. 973 */ 974 /* Free old ifnet array after sync all netisrs */ 975 old_ifnet_array = ifnet_array; 976 ifnet_array = ifnet_array_del(ifp, old_ifnet_array); 977 978 ifnet_unlock(); 979 980 ifgroup_lockmgr(LK_EXCLUSIVE); 981 while ((ifgl = TAILQ_FIRST(&ifp->if_groups)) != NULL) 982 if_delgroup_locked(ifp, ifgl->ifgl_group->ifg_group); 983 ifgroup_lockmgr(LK_RELEASE); 984 985 /* 986 * Sync all netisrs so that the old ifnet array is no longer 987 * accessed and we can free it safely later on. 988 */ 989 netmsg_service_sync(); 990 ifnet_array_free(old_ifnet_array); 991 992 /* 993 * Remove routes and flush queues. 994 */ 995 crit_enter(); 996 #ifdef IFPOLL_ENABLE 997 if (ifp->if_flags & IFF_NPOLLING) 998 ifpoll_deregister(ifp); 999 #endif 1000 if_down(ifp); 1001 1002 /* Decrease the mbuf clusters/jclusters limits increased by us */ 1003 if (ifp->if_nmbclusters > 0) 1004 mcl_inclimit(-ifp->if_nmbclusters); 1005 if (ifp->if_nmbjclusters > 0) 1006 mjcl_inclimit(-ifp->if_nmbjclusters); 1007 1008 #ifdef ALTQ 1009 if (ifq_is_enabled(&ifp->if_snd)) 1010 altq_disable(&ifp->if_snd); 1011 if (ifq_is_attached(&ifp->if_snd)) 1012 altq_detach(&ifp->if_snd); 1013 #endif 1014 1015 /* 1016 * Clean up all addresses. 1017 */ 1018 ifp->if_lladdr = NULL; 1019 1020 if_purgeaddrs_nolink(ifp); 1021 if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) { 1022 struct ifaddr *ifa; 1023 1024 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 1025 KASSERT(ifa->ifa_addr->sa_family == AF_LINK, 1026 ("non-link ifaddr is left on if_addrheads")); 1027 1028 ifa_ifunlink(ifa, ifp); 1029 ifa_destroy(ifa); 1030 KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]), 1031 ("there are still ifaddrs left on if_addrheads")); 1032 } 1033 1034 #ifdef INET 1035 /* 1036 * Remove all IPv4 kernel structures related to ifp. 1037 */ 1038 in_ifdetach(ifp); 1039 #endif 1040 1041 #ifdef INET6 1042 /* 1043 * Remove all IPv6 kernel structs related to ifp. This should be done 1044 * before removing routing entries below, since IPv6 interface direct 1045 * routes are expected to be removed by the IPv6-specific kernel API. 1046 * Otherwise, the kernel will detect some inconsistency and bark it. 1047 */ 1048 in6_ifdetach(ifp); 1049 #endif 1050 1051 /* 1052 * Delete all remaining routes using this interface 1053 */ 1054 netmsg_init(&msg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 1055 if_rtdel_dispatch); 1056 msg.ifp = ifp; 1057 netisr_domsg_global(&msg.base); 1058 1059 SLIST_FOREACH(dp, &domains, dom_next) { 1060 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) 1061 (*dp->dom_ifdetach)(ifp, 1062 ifp->if_afdata[dp->dom_family]); 1063 } 1064 1065 kfree(ifp->if_addrheads, M_IFADDR); 1066 1067 lwkt_synchronize_ipiqs("if_detach"); 1068 ifq_stage_detach(&ifp->if_snd); 1069 1070 for (q = 0; q < ifp->if_snd.altq_subq_cnt; ++q) { 1071 struct ifaltq_subque *ifsq = &ifp->if_snd.altq_subq[q]; 1072 1073 kfree(ifsq->ifsq_ifstart_nmsg, M_LWKTMSG); 1074 kfree(ifsq->ifsq_stage, M_DEVBUF); 1075 } 1076 kfree(ifp->if_snd.altq_subq, M_DEVBUF); 1077 1078 kfree(ifp->if_data_pcpu, M_DEVBUF); 1079 1080 crit_exit(); 1081 } 1082 1083 int 1084 ifgroup_lockmgr(u_int flags) 1085 { 1086 return lockmgr(&ifgroup_lock, flags); 1087 } 1088 1089 /* 1090 * Create an empty interface group. 1091 */ 1092 static struct ifg_group * 1093 if_creategroup(const char *groupname) 1094 { 1095 struct ifg_group *ifg; 1096 1097 ifg = kmalloc(sizeof(*ifg), M_IFNET, M_WAITOK); 1098 strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); 1099 ifg->ifg_refcnt = 0; 1100 ifg->ifg_carp_demoted = 0; 1101 TAILQ_INIT(&ifg->ifg_members); 1102 1103 ifgroup_lockmgr(LK_EXCLUSIVE); 1104 TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next); 1105 ifgroup_lockmgr(LK_RELEASE); 1106 1107 EVENTHANDLER_INVOKE(group_attach_event, ifg); 1108 1109 return (ifg); 1110 } 1111 1112 /* 1113 * Destroy an empty interface group. 1114 */ 1115 static int 1116 if_destroygroup(struct ifg_group *ifg) 1117 { 1118 KASSERT(ifg->ifg_refcnt == 0, 1119 ("trying to delete a non-empty interface group")); 1120 1121 ifgroup_lockmgr(LK_EXCLUSIVE); 1122 TAILQ_REMOVE(&ifg_head, ifg, ifg_next); 1123 ifgroup_lockmgr(LK_RELEASE); 1124 1125 EVENTHANDLER_INVOKE(group_detach_event, ifg); 1126 kfree(ifg, M_IFNET); 1127 1128 return (0); 1129 } 1130 1131 /* 1132 * Add the interface to a group. 1133 * The target group will be created if it doesn't exist. 1134 */ 1135 int 1136 if_addgroup(struct ifnet *ifp, const char *groupname) 1137 { 1138 struct ifg_list *ifgl; 1139 struct ifg_group *ifg; 1140 struct ifg_member *ifgm; 1141 1142 if (groupname[0] && 1143 groupname[strlen(groupname) - 1] >= '0' && 1144 groupname[strlen(groupname) - 1] <= '9') 1145 return (EINVAL); 1146 1147 ifgroup_lockmgr(LK_SHARED); 1148 1149 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1150 if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) { 1151 ifgroup_lockmgr(LK_RELEASE); 1152 return (EEXIST); 1153 } 1154 } 1155 1156 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) { 1157 if (strcmp(ifg->ifg_group, groupname) == 0) 1158 break; 1159 } 1160 1161 ifgroup_lockmgr(LK_RELEASE); 1162 1163 if (ifg == NULL) 1164 ifg = if_creategroup(groupname); 1165 1166 ifgl = kmalloc(sizeof(*ifgl), M_IFNET, M_WAITOK); 1167 ifgm = kmalloc(sizeof(*ifgm), M_IFNET, M_WAITOK); 1168 ifgl->ifgl_group = ifg; 1169 ifgm->ifgm_ifp = ifp; 1170 ifg->ifg_refcnt++; 1171 1172 ifgroup_lockmgr(LK_EXCLUSIVE); 1173 TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); 1174 TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); 1175 ifgroup_lockmgr(LK_RELEASE); 1176 1177 EVENTHANDLER_INVOKE(group_change_event, groupname); 1178 1179 return (0); 1180 } 1181 1182 /* 1183 * Remove the interface from a group. 1184 * The group will be destroyed if it becomes empty. 1185 * 1186 * The 'ifgroup_lock' must be hold exclusively when calling this. 1187 */ 1188 static int 1189 if_delgroup_locked(struct ifnet *ifp, const char *groupname) 1190 { 1191 struct ifg_list *ifgl; 1192 struct ifg_member *ifgm; 1193 1194 KKASSERT(lockstatus(&ifgroup_lock, curthread) == LK_EXCLUSIVE); 1195 1196 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1197 if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) 1198 break; 1199 } 1200 if (ifgl == NULL) 1201 return (ENOENT); 1202 1203 TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); 1204 1205 TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) { 1206 if (ifgm->ifgm_ifp == ifp) 1207 break; 1208 } 1209 1210 if (ifgm != NULL) { 1211 TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); 1212 1213 ifgroup_lockmgr(LK_RELEASE); 1214 EVENTHANDLER_INVOKE(group_change_event, groupname); 1215 ifgroup_lockmgr(LK_EXCLUSIVE); 1216 1217 kfree(ifgm, M_IFNET); 1218 ifgl->ifgl_group->ifg_refcnt--; 1219 } 1220 1221 if (ifgl->ifgl_group->ifg_refcnt == 0) { 1222 ifgroup_lockmgr(LK_RELEASE); 1223 if_destroygroup(ifgl->ifgl_group); 1224 ifgroup_lockmgr(LK_EXCLUSIVE); 1225 } 1226 1227 kfree(ifgl, M_IFNET); 1228 1229 return (0); 1230 } 1231 1232 int 1233 if_delgroup(struct ifnet *ifp, const char *groupname) 1234 { 1235 int error; 1236 1237 ifgroup_lockmgr(LK_EXCLUSIVE); 1238 error = if_delgroup_locked(ifp, groupname); 1239 ifgroup_lockmgr(LK_RELEASE); 1240 1241 return (error); 1242 } 1243 1244 /* 1245 * Store all the groups that the interface belongs to in memory 1246 * pointed to by data. 1247 */ 1248 static int 1249 if_getgroups(struct ifgroupreq *ifgr, struct ifnet *ifp) 1250 { 1251 struct ifg_list *ifgl; 1252 struct ifg_req *ifgrq, *p; 1253 int len, error; 1254 1255 len = 0; 1256 ifgroup_lockmgr(LK_SHARED); 1257 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 1258 len += sizeof(struct ifg_req); 1259 ifgroup_lockmgr(LK_RELEASE); 1260 1261 if (ifgr->ifgr_len == 0) { 1262 /* 1263 * Caller is asking how much memory should be allocated in 1264 * the next request in order to hold all the groups. 1265 */ 1266 ifgr->ifgr_len = len; 1267 return (0); 1268 } else if (ifgr->ifgr_len != len) { 1269 return (EINVAL); 1270 } 1271 1272 ifgrq = kmalloc(len, M_TEMP, M_INTWAIT | M_NULLOK | M_ZERO); 1273 if (ifgrq == NULL) 1274 return (ENOMEM); 1275 1276 ifgroup_lockmgr(LK_SHARED); 1277 p = ifgrq; 1278 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1279 if (len < sizeof(struct ifg_req)) { 1280 ifgroup_lockmgr(LK_RELEASE); 1281 error = EINVAL; 1282 goto failed; 1283 } 1284 1285 strlcpy(p->ifgrq_group, ifgl->ifgl_group->ifg_group, 1286 sizeof(ifgrq->ifgrq_group)); 1287 len -= sizeof(struct ifg_req); 1288 p++; 1289 } 1290 ifgroup_lockmgr(LK_RELEASE); 1291 1292 error = copyout(ifgrq, ifgr->ifgr_groups, ifgr->ifgr_len); 1293 failed: 1294 kfree(ifgrq, M_TEMP); 1295 return error; 1296 } 1297 1298 /* 1299 * Store all the members of a group in memory pointed to by data. 1300 */ 1301 static int 1302 if_getgroupmembers(struct ifgroupreq *ifgr) 1303 { 1304 struct ifg_group *ifg; 1305 struct ifg_member *ifgm; 1306 struct ifg_req *ifgrq, *p; 1307 int len, error; 1308 1309 ifgroup_lockmgr(LK_SHARED); 1310 1311 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) { 1312 if (strcmp(ifg->ifg_group, ifgr->ifgr_name) == 0) 1313 break; 1314 } 1315 if (ifg == NULL) { 1316 ifgroup_lockmgr(LK_RELEASE); 1317 return (ENOENT); 1318 } 1319 1320 len = 0; 1321 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) 1322 len += sizeof(struct ifg_req); 1323 1324 ifgroup_lockmgr(LK_RELEASE); 1325 1326 if (ifgr->ifgr_len == 0) { 1327 ifgr->ifgr_len = len; 1328 return (0); 1329 } else if (ifgr->ifgr_len != len) { 1330 return (EINVAL); 1331 } 1332 1333 ifgrq = kmalloc(len, M_TEMP, M_INTWAIT | M_NULLOK | M_ZERO); 1334 if (ifgrq == NULL) 1335 return (ENOMEM); 1336 1337 ifgroup_lockmgr(LK_SHARED); 1338 p = ifgrq; 1339 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { 1340 if (len < sizeof(struct ifg_req)) { 1341 ifgroup_lockmgr(LK_RELEASE); 1342 error = EINVAL; 1343 goto failed; 1344 } 1345 1346 strlcpy(p->ifgrq_member, ifgm->ifgm_ifp->if_xname, 1347 sizeof(p->ifgrq_member)); 1348 len -= sizeof(struct ifg_req); 1349 p++; 1350 } 1351 ifgroup_lockmgr(LK_RELEASE); 1352 1353 error = copyout(ifgrq, ifgr->ifgr_groups, ifgr->ifgr_len); 1354 failed: 1355 kfree(ifgrq, M_TEMP); 1356 return error; 1357 } 1358 1359 /* 1360 * Delete Routes for a Network Interface 1361 * 1362 * Called for each routing entry via the rnh->rnh_walktree() call above 1363 * to delete all route entries referencing a detaching network interface. 1364 * 1365 * Arguments: 1366 * rn pointer to node in the routing table 1367 * arg argument passed to rnh->rnh_walktree() - detaching interface 1368 * 1369 * Returns: 1370 * 0 successful 1371 * errno failed - reason indicated 1372 * 1373 */ 1374 static int 1375 if_rtdel(struct radix_node *rn, void *arg) 1376 { 1377 struct rtentry *rt = (struct rtentry *)rn; 1378 struct ifnet *ifp = arg; 1379 int err; 1380 1381 if (rt->rt_ifp == ifp) { 1382 1383 /* 1384 * Protect (sorta) against walktree recursion problems 1385 * with cloned routes 1386 */ 1387 if (!(rt->rt_flags & RTF_UP)) 1388 return (0); 1389 1390 err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, 1391 rt_mask(rt), rt->rt_flags, 1392 NULL); 1393 if (err) { 1394 log(LOG_WARNING, "if_rtdel: error %d\n", err); 1395 } 1396 } 1397 1398 return (0); 1399 } 1400 1401 static __inline boolean_t 1402 ifa_prefer(const struct ifaddr *cur_ifa, const struct ifaddr *old_ifa) 1403 { 1404 if (old_ifa == NULL) 1405 return TRUE; 1406 1407 if ((old_ifa->ifa_ifp->if_flags & IFF_UP) == 0 && 1408 (cur_ifa->ifa_ifp->if_flags & IFF_UP)) 1409 return TRUE; 1410 if ((old_ifa->ifa_flags & IFA_ROUTE) == 0 && 1411 (cur_ifa->ifa_flags & IFA_ROUTE)) 1412 return TRUE; 1413 return FALSE; 1414 } 1415 1416 /* 1417 * Locate an interface based on a complete address. 1418 */ 1419 struct ifaddr * 1420 ifa_ifwithaddr(struct sockaddr *addr) 1421 { 1422 const struct ifnet_array *arr; 1423 int i; 1424 1425 arr = ifnet_array_get(); 1426 for (i = 0; i < arr->ifnet_count; ++i) { 1427 struct ifnet *ifp = arr->ifnet_arr[i]; 1428 struct ifaddr_container *ifac; 1429 1430 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1431 struct ifaddr *ifa = ifac->ifa; 1432 1433 if (ifa->ifa_addr->sa_family != addr->sa_family) 1434 continue; 1435 if (sa_equal(addr, ifa->ifa_addr)) 1436 return (ifa); 1437 if ((ifp->if_flags & IFF_BROADCAST) && 1438 ifa->ifa_broadaddr && 1439 /* IPv6 doesn't have broadcast */ 1440 ifa->ifa_broadaddr->sa_len != 0 && 1441 sa_equal(ifa->ifa_broadaddr, addr)) 1442 return (ifa); 1443 } 1444 } 1445 return (NULL); 1446 } 1447 1448 /* 1449 * Locate the point to point interface with a given destination address. 1450 */ 1451 struct ifaddr * 1452 ifa_ifwithdstaddr(struct sockaddr *addr) 1453 { 1454 const struct ifnet_array *arr; 1455 int i; 1456 1457 arr = ifnet_array_get(); 1458 for (i = 0; i < arr->ifnet_count; ++i) { 1459 struct ifnet *ifp = arr->ifnet_arr[i]; 1460 struct ifaddr_container *ifac; 1461 1462 if (!(ifp->if_flags & IFF_POINTOPOINT)) 1463 continue; 1464 1465 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1466 struct ifaddr *ifa = ifac->ifa; 1467 1468 if (ifa->ifa_addr->sa_family != addr->sa_family) 1469 continue; 1470 if (ifa->ifa_dstaddr && 1471 sa_equal(addr, ifa->ifa_dstaddr)) 1472 return (ifa); 1473 } 1474 } 1475 return (NULL); 1476 } 1477 1478 /* 1479 * Find an interface on a specific network. If many, choice 1480 * is most specific found. 1481 */ 1482 struct ifaddr * 1483 ifa_ifwithnet(struct sockaddr *addr) 1484 { 1485 struct ifaddr *ifa_maybe = NULL; 1486 u_int af = addr->sa_family; 1487 char *addr_data = addr->sa_data, *cplim; 1488 const struct ifnet_array *arr; 1489 int i; 1490 1491 /* 1492 * AF_LINK addresses can be looked up directly by their index number, 1493 * so do that if we can. 1494 */ 1495 if (af == AF_LINK) { 1496 struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr; 1497 1498 if (sdl->sdl_index && sdl->sdl_index <= if_index) 1499 return (ifindex2ifnet[sdl->sdl_index]->if_lladdr); 1500 } 1501 1502 /* 1503 * Scan though each interface, looking for ones that have 1504 * addresses in this address family. 1505 */ 1506 arr = ifnet_array_get(); 1507 for (i = 0; i < arr->ifnet_count; ++i) { 1508 struct ifnet *ifp = arr->ifnet_arr[i]; 1509 struct ifaddr_container *ifac; 1510 1511 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1512 struct ifaddr *ifa = ifac->ifa; 1513 char *cp, *cp2, *cp3; 1514 1515 if (ifa->ifa_addr->sa_family != af) 1516 next: continue; 1517 if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) { 1518 /* 1519 * This is a bit broken as it doesn't 1520 * take into account that the remote end may 1521 * be a single node in the network we are 1522 * looking for. 1523 * The trouble is that we don't know the 1524 * netmask for the remote end. 1525 */ 1526 if (ifa->ifa_dstaddr != NULL && 1527 sa_equal(addr, ifa->ifa_dstaddr)) 1528 return (ifa); 1529 } else { 1530 /* 1531 * if we have a special address handler, 1532 * then use it instead of the generic one. 1533 */ 1534 if (ifa->ifa_claim_addr) { 1535 if ((*ifa->ifa_claim_addr)(ifa, addr)) { 1536 return (ifa); 1537 } else { 1538 continue; 1539 } 1540 } 1541 1542 /* 1543 * Scan all the bits in the ifa's address. 1544 * If a bit dissagrees with what we are 1545 * looking for, mask it with the netmask 1546 * to see if it really matters. 1547 * (A byte at a time) 1548 */ 1549 if (ifa->ifa_netmask == 0) 1550 continue; 1551 cp = addr_data; 1552 cp2 = ifa->ifa_addr->sa_data; 1553 cp3 = ifa->ifa_netmask->sa_data; 1554 cplim = ifa->ifa_netmask->sa_len + 1555 (char *)ifa->ifa_netmask; 1556 while (cp3 < cplim) 1557 if ((*cp++ ^ *cp2++) & *cp3++) 1558 goto next; /* next address! */ 1559 /* 1560 * If the netmask of what we just found 1561 * is more specific than what we had before 1562 * (if we had one) then remember the new one 1563 * before continuing to search for an even 1564 * better one. If the netmasks are equal, 1565 * we prefer the this ifa based on the result 1566 * of ifa_prefer(). 1567 */ 1568 if (ifa_maybe == NULL || 1569 rn_refines((char *)ifa->ifa_netmask, 1570 (char *)ifa_maybe->ifa_netmask) || 1571 (sa_equal(ifa_maybe->ifa_netmask, 1572 ifa->ifa_netmask) && 1573 ifa_prefer(ifa, ifa_maybe))) 1574 ifa_maybe = ifa; 1575 } 1576 } 1577 } 1578 return (ifa_maybe); 1579 } 1580 1581 /* 1582 * Find an interface address specific to an interface best matching 1583 * a given address. 1584 */ 1585 struct ifaddr * 1586 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp) 1587 { 1588 struct ifaddr_container *ifac; 1589 char *cp, *cp2, *cp3; 1590 char *cplim; 1591 struct ifaddr *ifa_maybe = NULL; 1592 u_int af = addr->sa_family; 1593 1594 if (af >= AF_MAX) 1595 return (0); 1596 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1597 struct ifaddr *ifa = ifac->ifa; 1598 1599 if (ifa->ifa_addr->sa_family != af) 1600 continue; 1601 if (ifa_maybe == NULL) 1602 ifa_maybe = ifa; 1603 if (ifa->ifa_netmask == NULL) { 1604 if (sa_equal(addr, ifa->ifa_addr) || 1605 (ifa->ifa_dstaddr != NULL && 1606 sa_equal(addr, ifa->ifa_dstaddr))) 1607 return (ifa); 1608 continue; 1609 } 1610 if (ifp->if_flags & IFF_POINTOPOINT) { 1611 if (sa_equal(addr, ifa->ifa_dstaddr)) 1612 return (ifa); 1613 } else { 1614 cp = addr->sa_data; 1615 cp2 = ifa->ifa_addr->sa_data; 1616 cp3 = ifa->ifa_netmask->sa_data; 1617 cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; 1618 for (; cp3 < cplim; cp3++) 1619 if ((*cp++ ^ *cp2++) & *cp3) 1620 break; 1621 if (cp3 == cplim) 1622 return (ifa); 1623 } 1624 } 1625 return (ifa_maybe); 1626 } 1627 1628 /* 1629 * Default action when installing a route with a Link Level gateway. 1630 * Lookup an appropriate real ifa to point to. 1631 * This should be moved to /sys/net/link.c eventually. 1632 */ 1633 static void 1634 link_rtrequest(int cmd, struct rtentry *rt) 1635 { 1636 struct ifaddr *ifa; 1637 struct sockaddr *dst; 1638 struct ifnet *ifp; 1639 1640 if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL || 1641 (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL) 1642 return; 1643 ifa = ifaof_ifpforaddr(dst, ifp); 1644 if (ifa != NULL) { 1645 IFAFREE(rt->rt_ifa); 1646 IFAREF(ifa); 1647 rt->rt_ifa = ifa; 1648 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) 1649 ifa->ifa_rtrequest(cmd, rt); 1650 } 1651 } 1652 1653 struct netmsg_if { 1654 struct netmsg_base base; 1655 struct ifnet *ifp; 1656 }; 1657 1658 /* 1659 * Mark an interface down and notify protocols of the transition. 1660 */ 1661 static void 1662 if_down_dispatch(netmsg_t nmsg) 1663 { 1664 struct netmsg_if *msg = (struct netmsg_if *)nmsg; 1665 struct ifnet *ifp = msg->ifp; 1666 struct ifaddr_container *ifac; 1667 struct domain *dp; 1668 1669 ASSERT_NETISR0; 1670 1671 ifp->if_flags &= ~IFF_UP; 1672 getmicrotime(&ifp->if_lastchange); 1673 rt_ifmsg(ifp); 1674 1675 /* 1676 * The ifaddr processing in the following loop will block, 1677 * however, this function is called in netisr0, in which 1678 * ifaddr list changes happen, so we don't care about the 1679 * blockness of the ifaddr processing here. 1680 */ 1681 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1682 struct ifaddr *ifa = ifac->ifa; 1683 1684 /* Ignore marker */ 1685 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 1686 continue; 1687 1688 kpfctlinput(PRC_IFDOWN, ifa->ifa_addr); 1689 } 1690 1691 SLIST_FOREACH(dp, &domains, dom_next) 1692 if (dp->dom_if_down != NULL) 1693 dp->dom_if_down(ifp); 1694 1695 ifq_purge_all(&ifp->if_snd); 1696 netisr_replymsg(&nmsg->base, 0); 1697 } 1698 1699 /* 1700 * Mark an interface up and notify protocols of the transition. 1701 */ 1702 static void 1703 if_up_dispatch(netmsg_t nmsg) 1704 { 1705 struct netmsg_if *msg = (struct netmsg_if *)nmsg; 1706 struct ifnet *ifp = msg->ifp; 1707 struct ifaddr_container *ifac; 1708 struct domain *dp; 1709 1710 ASSERT_NETISR0; 1711 1712 ifq_purge_all(&ifp->if_snd); 1713 ifp->if_flags |= IFF_UP; 1714 getmicrotime(&ifp->if_lastchange); 1715 rt_ifmsg(ifp); 1716 1717 /* 1718 * The ifaddr processing in the following loop will block, 1719 * however, this function is called in netisr0, in which 1720 * ifaddr list changes happen, so we don't care about the 1721 * blockness of the ifaddr processing here. 1722 */ 1723 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1724 struct ifaddr *ifa = ifac->ifa; 1725 1726 /* Ignore marker */ 1727 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 1728 continue; 1729 1730 kpfctlinput(PRC_IFUP, ifa->ifa_addr); 1731 } 1732 1733 SLIST_FOREACH(dp, &domains, dom_next) 1734 if (dp->dom_if_up != NULL) 1735 dp->dom_if_up(ifp); 1736 1737 netisr_replymsg(&nmsg->base, 0); 1738 } 1739 1740 /* 1741 * Mark an interface down and notify protocols of the transition. An 1742 * interface going down is also considered to be a synchronizing event. 1743 * We must ensure that all packet processing related to the interface 1744 * has completed before we return so e.g. the caller can free the ifnet 1745 * structure that the mbufs may be referencing. 1746 * 1747 * NOTE: must be called at splnet or eqivalent. 1748 */ 1749 void 1750 if_down(struct ifnet *ifp) 1751 { 1752 struct netmsg_if msg; 1753 1754 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN); 1755 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0, 1756 if_down_dispatch); 1757 msg.ifp = ifp; 1758 netisr_domsg(&msg.base, 0); 1759 netmsg_service_sync(); 1760 } 1761 1762 /* 1763 * Mark an interface up and notify protocols of 1764 * the transition. 1765 * NOTE: must be called at splnet or eqivalent. 1766 */ 1767 void 1768 if_up(struct ifnet *ifp) 1769 { 1770 struct netmsg_if msg; 1771 1772 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0, 1773 if_up_dispatch); 1774 msg.ifp = ifp; 1775 netisr_domsg(&msg.base, 0); 1776 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP); 1777 } 1778 1779 /* 1780 * Process a link state change. 1781 * NOTE: must be called at splsoftnet or equivalent. 1782 */ 1783 void 1784 if_link_state_change(struct ifnet *ifp) 1785 { 1786 int link_state = ifp->if_link_state; 1787 1788 rt_ifmsg(ifp); 1789 devctl_notify("IFNET", ifp->if_xname, 1790 (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); 1791 1792 EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state); 1793 } 1794 1795 /* 1796 * Handle interface watchdog timer routines. Called 1797 * from softclock, we decrement timers (if set) and 1798 * call the appropriate interface routine on expiration. 1799 */ 1800 static void 1801 if_slowtimo_dispatch(netmsg_t nmsg) 1802 { 1803 struct globaldata *gd = mycpu; 1804 const struct ifnet_array *arr; 1805 int i; 1806 1807 ASSERT_NETISR0; 1808 1809 crit_enter_gd(gd); 1810 lwkt_replymsg(&nmsg->lmsg, 0); /* reply ASAP */ 1811 crit_exit_gd(gd); 1812 1813 arr = ifnet_array_get(); 1814 for (i = 0; i < arr->ifnet_count; ++i) { 1815 struct ifnet *ifp = arr->ifnet_arr[i]; 1816 1817 crit_enter_gd(gd); 1818 1819 if (if_stats_compat) { 1820 IFNET_STAT_GET(ifp, ipackets, ifp->if_ipackets); 1821 IFNET_STAT_GET(ifp, ierrors, ifp->if_ierrors); 1822 IFNET_STAT_GET(ifp, opackets, ifp->if_opackets); 1823 IFNET_STAT_GET(ifp, oerrors, ifp->if_oerrors); 1824 IFNET_STAT_GET(ifp, collisions, ifp->if_collisions); 1825 IFNET_STAT_GET(ifp, ibytes, ifp->if_ibytes); 1826 IFNET_STAT_GET(ifp, obytes, ifp->if_obytes); 1827 IFNET_STAT_GET(ifp, imcasts, ifp->if_imcasts); 1828 IFNET_STAT_GET(ifp, omcasts, ifp->if_omcasts); 1829 IFNET_STAT_GET(ifp, iqdrops, ifp->if_iqdrops); 1830 IFNET_STAT_GET(ifp, noproto, ifp->if_noproto); 1831 IFNET_STAT_GET(ifp, oqdrops, ifp->if_oqdrops); 1832 } 1833 1834 if (ifp->if_timer == 0 || --ifp->if_timer) { 1835 crit_exit_gd(gd); 1836 continue; 1837 } 1838 if (ifp->if_watchdog) { 1839 if (ifnet_tryserialize_all(ifp)) { 1840 (*ifp->if_watchdog)(ifp); 1841 ifnet_deserialize_all(ifp); 1842 } else { 1843 /* try again next timeout */ 1844 ++ifp->if_timer; 1845 } 1846 } 1847 1848 crit_exit_gd(gd); 1849 } 1850 1851 callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL); 1852 } 1853 1854 static void 1855 if_slowtimo(void *arg __unused) 1856 { 1857 struct lwkt_msg *lmsg = &if_slowtimo_netmsg.lmsg; 1858 1859 KASSERT(mycpuid == 0, ("not on cpu0")); 1860 crit_enter(); 1861 if (lmsg->ms_flags & MSGF_DONE) 1862 lwkt_sendmsg_oncpu(netisr_cpuport(0), lmsg); 1863 crit_exit(); 1864 } 1865 1866 /* 1867 * Map interface name to 1868 * interface structure pointer. 1869 */ 1870 struct ifnet * 1871 ifunit(const char *name) 1872 { 1873 struct ifnet *ifp; 1874 1875 /* 1876 * Search all the interfaces for this name/number 1877 */ 1878 KASSERT(mtx_owned(&ifnet_mtx), ("ifnet is not locked")); 1879 1880 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 1881 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) 1882 break; 1883 } 1884 return (ifp); 1885 } 1886 1887 struct ifnet * 1888 ifunit_netisr(const char *name) 1889 { 1890 const struct ifnet_array *arr; 1891 int i; 1892 1893 /* 1894 * Search all the interfaces for this name/number 1895 */ 1896 1897 arr = ifnet_array_get(); 1898 for (i = 0; i < arr->ifnet_count; ++i) { 1899 struct ifnet *ifp = arr->ifnet_arr[i]; 1900 1901 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) 1902 return ifp; 1903 } 1904 return NULL; 1905 } 1906 1907 /* 1908 * Interface ioctls. 1909 */ 1910 int 1911 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred) 1912 { 1913 struct ifnet *ifp; 1914 struct ifgroupreq *ifgr; 1915 struct ifreq *ifr; 1916 struct ifstat *ifs; 1917 int error, do_ifup = 0; 1918 short oif_flags; 1919 int new_flags; 1920 size_t namelen, onamelen; 1921 size_t descrlen; 1922 char *descrbuf, *odescrbuf; 1923 char new_name[IFNAMSIZ]; 1924 struct ifaddr *ifa; 1925 struct sockaddr_dl *sdl; 1926 1927 switch (cmd) { 1928 case SIOCGIFCONF: 1929 return (ifconf(cmd, data, cred)); 1930 default: 1931 break; 1932 } 1933 1934 ifr = (struct ifreq *)data; 1935 1936 switch (cmd) { 1937 case SIOCIFCREATE: 1938 case SIOCIFCREATE2: 1939 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 1940 if (error) 1941 return (error); 1942 return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name), 1943 (cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL), NULL)); 1944 case SIOCIFDESTROY: 1945 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 1946 if (error) 1947 return (error); 1948 return (if_clone_destroy(ifr->ifr_name)); 1949 case SIOCIFGCLONERS: 1950 return (if_clone_list((struct if_clonereq *)data)); 1951 case SIOCGIFGMEMB: 1952 return (if_getgroupmembers((struct ifgroupreq *)data)); 1953 default: 1954 break; 1955 } 1956 1957 /* 1958 * Nominal ioctl through interface, lookup the ifp and obtain a 1959 * lock to serialize the ifconfig ioctl operation. 1960 */ 1961 ifnet_lock(); 1962 1963 ifp = ifunit(ifr->ifr_name); 1964 if (ifp == NULL) { 1965 ifnet_unlock(); 1966 return (ENXIO); 1967 } 1968 error = 0; 1969 1970 switch (cmd) { 1971 case SIOCGIFINDEX: 1972 ifr->ifr_index = ifp->if_index; 1973 break; 1974 1975 case SIOCGIFFLAGS: 1976 ifr->ifr_flags = ifp->if_flags; 1977 ifr->ifr_flagshigh = ifp->if_flags >> 16; 1978 break; 1979 1980 case SIOCGIFCAP: 1981 ifr->ifr_reqcap = ifp->if_capabilities; 1982 ifr->ifr_curcap = ifp->if_capenable; 1983 break; 1984 1985 case SIOCGIFMETRIC: 1986 ifr->ifr_metric = ifp->if_metric; 1987 break; 1988 1989 case SIOCGIFMTU: 1990 ifr->ifr_mtu = ifp->if_mtu; 1991 break; 1992 1993 case SIOCGIFTSOLEN: 1994 ifr->ifr_tsolen = ifp->if_tsolen; 1995 break; 1996 1997 case SIOCGIFDATA: 1998 error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data, 1999 sizeof(ifp->if_data)); 2000 break; 2001 2002 case SIOCGIFPHYS: 2003 ifr->ifr_phys = ifp->if_physical; 2004 break; 2005 2006 case SIOCGIFPOLLCPU: 2007 ifr->ifr_pollcpu = -1; 2008 break; 2009 2010 case SIOCSIFPOLLCPU: 2011 break; 2012 2013 case SIOCGIFDESCR: 2014 error = 0; 2015 ifnet_lock(); 2016 if (ifp->if_description == NULL) { 2017 ifr->ifr_buffer.length = 0; 2018 error = ENOMSG; 2019 } else { 2020 /* space for terminating nul */ 2021 descrlen = strlen(ifp->if_description) + 1; 2022 if (ifr->ifr_buffer.length < descrlen) 2023 error = ENAMETOOLONG; 2024 else 2025 error = copyout(ifp->if_description, 2026 ifr->ifr_buffer.buffer, descrlen); 2027 ifr->ifr_buffer.length = descrlen; 2028 } 2029 ifnet_unlock(); 2030 break; 2031 2032 case SIOCSIFDESCR: 2033 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2034 if (error) 2035 break; 2036 2037 /* 2038 * Copy only (length-1) bytes to make sure that 2039 * if_description is always nul terminated. The 2040 * length parameter is supposed to count the 2041 * terminating nul in. 2042 */ 2043 if (ifr->ifr_buffer.length > ifdescr_maxlen) 2044 return (ENAMETOOLONG); 2045 else if (ifr->ifr_buffer.length == 0) 2046 descrbuf = NULL; 2047 else { 2048 descrbuf = kmalloc(ifr->ifr_buffer.length, M_IFDESCR, 2049 M_WAITOK | M_ZERO); 2050 error = copyin(ifr->ifr_buffer.buffer, descrbuf, 2051 ifr->ifr_buffer.length - 1); 2052 if (error) { 2053 kfree(descrbuf, M_IFDESCR); 2054 break; 2055 } 2056 } 2057 2058 ifnet_lock(); 2059 odescrbuf = ifp->if_description; 2060 ifp->if_description = descrbuf; 2061 ifnet_unlock(); 2062 2063 if (odescrbuf) 2064 kfree(odescrbuf, M_IFDESCR); 2065 2066 case SIOCSIFFLAGS: 2067 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2068 if (error) 2069 break; 2070 new_flags = (ifr->ifr_flags & 0xffff) | 2071 (ifr->ifr_flagshigh << 16); 2072 if (ifp->if_flags & IFF_SMART) { 2073 /* Smart drivers twiddle their own routes */ 2074 } else if (ifp->if_flags & IFF_UP && 2075 (new_flags & IFF_UP) == 0) { 2076 if_down(ifp); 2077 } else if (new_flags & IFF_UP && 2078 (ifp->if_flags & IFF_UP) == 0) { 2079 do_ifup = 1; 2080 } 2081 2082 #ifdef IFPOLL_ENABLE 2083 if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) { 2084 if (new_flags & IFF_NPOLLING) 2085 ifpoll_register(ifp); 2086 else 2087 ifpoll_deregister(ifp); 2088 } 2089 #endif 2090 2091 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | 2092 (new_flags &~ IFF_CANTCHANGE); 2093 if (new_flags & IFF_PPROMISC) { 2094 /* Permanently promiscuous mode requested */ 2095 ifp->if_flags |= IFF_PROMISC; 2096 } else if (ifp->if_pcount == 0) { 2097 ifp->if_flags &= ~IFF_PROMISC; 2098 } 2099 if (ifp->if_ioctl) { 2100 ifnet_serialize_all(ifp); 2101 ifp->if_ioctl(ifp, cmd, data, cred); 2102 ifnet_deserialize_all(ifp); 2103 } 2104 if (do_ifup) 2105 if_up(ifp); 2106 getmicrotime(&ifp->if_lastchange); 2107 break; 2108 2109 case SIOCSIFCAP: 2110 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2111 if (error) 2112 break; 2113 if (ifr->ifr_reqcap & ~ifp->if_capabilities) { 2114 error = EINVAL; 2115 break; 2116 } 2117 ifnet_serialize_all(ifp); 2118 ifp->if_ioctl(ifp, cmd, data, cred); 2119 ifnet_deserialize_all(ifp); 2120 break; 2121 2122 case SIOCSIFNAME: 2123 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2124 if (error) 2125 break; 2126 error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL); 2127 if (error) 2128 break; 2129 if (new_name[0] == '\0') { 2130 error = EINVAL; 2131 break; 2132 } 2133 if (ifunit(new_name) != NULL) { 2134 error = EEXIST; 2135 break; 2136 } 2137 2138 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 2139 2140 /* Announce the departure of the interface. */ 2141 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 2142 2143 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); 2144 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 2145 sdl = (struct sockaddr_dl *)ifa->ifa_addr; 2146 namelen = strlen(new_name); 2147 onamelen = sdl->sdl_nlen; 2148 /* 2149 * Move the address if needed. This is safe because we 2150 * allocate space for a name of length IFNAMSIZ when we 2151 * create this in if_attach(). 2152 */ 2153 if (namelen != onamelen) { 2154 bcopy(sdl->sdl_data + onamelen, 2155 sdl->sdl_data + namelen, sdl->sdl_alen); 2156 } 2157 bcopy(new_name, sdl->sdl_data, namelen); 2158 sdl->sdl_nlen = namelen; 2159 sdl = (struct sockaddr_dl *)ifa->ifa_netmask; 2160 bzero(sdl->sdl_data, onamelen); 2161 while (namelen != 0) 2162 sdl->sdl_data[--namelen] = 0xff; 2163 2164 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 2165 2166 /* Announce the return of the interface. */ 2167 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 2168 break; 2169 2170 case SIOCSIFMETRIC: 2171 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2172 if (error) 2173 break; 2174 ifp->if_metric = ifr->ifr_metric; 2175 getmicrotime(&ifp->if_lastchange); 2176 break; 2177 2178 case SIOCSIFPHYS: 2179 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2180 if (error) 2181 break; 2182 if (ifp->if_ioctl == NULL) { 2183 error = EOPNOTSUPP; 2184 break; 2185 } 2186 ifnet_serialize_all(ifp); 2187 error = ifp->if_ioctl(ifp, cmd, data, cred); 2188 ifnet_deserialize_all(ifp); 2189 if (error == 0) 2190 getmicrotime(&ifp->if_lastchange); 2191 break; 2192 2193 case SIOCSIFMTU: 2194 { 2195 u_long oldmtu = ifp->if_mtu; 2196 2197 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2198 if (error) 2199 break; 2200 if (ifp->if_ioctl == NULL) { 2201 error = EOPNOTSUPP; 2202 break; 2203 } 2204 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) { 2205 error = EINVAL; 2206 break; 2207 } 2208 ifnet_serialize_all(ifp); 2209 error = ifp->if_ioctl(ifp, cmd, data, cred); 2210 ifnet_deserialize_all(ifp); 2211 if (error == 0) { 2212 getmicrotime(&ifp->if_lastchange); 2213 rt_ifmsg(ifp); 2214 } 2215 /* 2216 * If the link MTU changed, do network layer specific procedure. 2217 */ 2218 if (ifp->if_mtu != oldmtu) { 2219 #ifdef INET6 2220 nd6_setmtu(ifp); 2221 #endif 2222 } 2223 break; 2224 } 2225 2226 case SIOCSIFTSOLEN: 2227 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2228 if (error) 2229 break; 2230 2231 /* XXX need driver supplied upper limit */ 2232 if (ifr->ifr_tsolen <= 0) { 2233 error = EINVAL; 2234 break; 2235 } 2236 ifp->if_tsolen = ifr->ifr_tsolen; 2237 break; 2238 2239 case SIOCADDMULTI: 2240 case SIOCDELMULTI: 2241 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2242 if (error) 2243 break; 2244 2245 /* Don't allow group membership on non-multicast interfaces. */ 2246 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 2247 error = EOPNOTSUPP; 2248 break; 2249 } 2250 2251 /* Don't let users screw up protocols' entries. */ 2252 if (ifr->ifr_addr.sa_family != AF_LINK) { 2253 error = EINVAL; 2254 break; 2255 } 2256 2257 if (cmd == SIOCADDMULTI) { 2258 struct ifmultiaddr *ifma; 2259 error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); 2260 } else { 2261 error = if_delmulti(ifp, &ifr->ifr_addr); 2262 } 2263 if (error == 0) 2264 getmicrotime(&ifp->if_lastchange); 2265 break; 2266 2267 case SIOCSIFPHYADDR: 2268 case SIOCDIFPHYADDR: 2269 #ifdef INET6 2270 case SIOCSIFPHYADDR_IN6: 2271 #endif 2272 case SIOCSLIFPHYADDR: 2273 case SIOCSIFMEDIA: 2274 case SIOCSIFGENERIC: 2275 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2276 if (error) 2277 break; 2278 if (ifp->if_ioctl == NULL) { 2279 error = EOPNOTSUPP; 2280 break; 2281 } 2282 ifnet_serialize_all(ifp); 2283 error = ifp->if_ioctl(ifp, cmd, data, cred); 2284 ifnet_deserialize_all(ifp); 2285 if (error == 0) 2286 getmicrotime(&ifp->if_lastchange); 2287 break; 2288 2289 case SIOCGIFSTATUS: 2290 ifs = (struct ifstat *)data; 2291 ifs->ascii[0] = '\0'; 2292 /* fall through */ 2293 case SIOCGIFPSRCADDR: 2294 case SIOCGIFPDSTADDR: 2295 case SIOCGLIFPHYADDR: 2296 case SIOCGIFMEDIA: 2297 case SIOCGIFXMEDIA: 2298 case SIOCGIFGENERIC: 2299 if (ifp->if_ioctl == NULL) { 2300 error = EOPNOTSUPP; 2301 break; 2302 } 2303 ifnet_serialize_all(ifp); 2304 error = ifp->if_ioctl(ifp, cmd, data, cred); 2305 ifnet_deserialize_all(ifp); 2306 break; 2307 2308 case SIOCSIFLLADDR: 2309 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2310 if (error) 2311 break; 2312 error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, 2313 ifr->ifr_addr.sa_len); 2314 EVENTHANDLER_INVOKE(iflladdr_event, ifp); 2315 break; 2316 2317 case SIOCAIFGROUP: 2318 ifgr = (struct ifgroupreq *)ifr; 2319 error = caps_priv_check(cred, SYSCAP_NONET_IFCONFIG); 2320 if (error) 2321 return (error); 2322 if ((error = if_addgroup(ifp, ifgr->ifgr_group))) 2323 return (error); 2324 break; 2325 2326 case SIOCDIFGROUP: 2327 ifgr = (struct ifgroupreq *)ifr; 2328 error = caps_priv_check(cred, SYSCAP_NONET_IFCONFIG); 2329 if (error) 2330 return (error); 2331 if ((error = if_delgroup(ifp, ifgr->ifgr_group))) 2332 return (error); 2333 break; 2334 2335 case SIOCGIFGROUP: 2336 ifgr = (struct ifgroupreq *)ifr; 2337 if ((error = if_getgroups(ifgr, ifp))) 2338 return (error); 2339 break; 2340 2341 default: 2342 oif_flags = ifp->if_flags; 2343 if (so->so_proto == 0) { 2344 error = EOPNOTSUPP; 2345 break; 2346 } 2347 error = so_pru_control_direct(so, cmd, data, ifp); 2348 2349 /* 2350 * If the socket control method returns EOPNOTSUPP, pass the 2351 * request directly to the interface. 2352 * 2353 * Exclude the SIOCSIF{ADDR,BRDADDR,DSTADDR,NETMASK} ioctls, 2354 * because drivers may trust these ioctls to come from an 2355 * already privileged layer and thus do not perform credentials 2356 * checks or input validation. 2357 */ 2358 if (error == EOPNOTSUPP && 2359 ifp->if_ioctl != NULL && 2360 cmd != SIOCSIFADDR && 2361 cmd != SIOCSIFBRDADDR && 2362 cmd != SIOCSIFDSTADDR && 2363 cmd != SIOCSIFNETMASK) { 2364 ifnet_serialize_all(ifp); 2365 error = ifp->if_ioctl(ifp, cmd, data, cred); 2366 ifnet_deserialize_all(ifp); 2367 } 2368 2369 if ((oif_flags ^ ifp->if_flags) & IFF_UP) { 2370 #ifdef INET6 2371 DELAY(100);/* XXX: temporary workaround for fxp issue*/ 2372 if (ifp->if_flags & IFF_UP) { 2373 crit_enter(); 2374 in6_if_up(ifp); 2375 crit_exit(); 2376 } 2377 #endif 2378 } 2379 break; 2380 } 2381 2382 ifnet_unlock(); 2383 return (error); 2384 } 2385 2386 /* 2387 * Set/clear promiscuous mode on interface ifp based on the truth value 2388 * of pswitch. The calls are reference counted so that only the first 2389 * "on" request actually has an effect, as does the final "off" request. 2390 * Results are undefined if the "off" and "on" requests are not matched. 2391 */ 2392 int 2393 ifpromisc(struct ifnet *ifp, int pswitch) 2394 { 2395 struct ifreq ifr; 2396 int error; 2397 int oldflags; 2398 2399 oldflags = ifp->if_flags; 2400 if (ifp->if_flags & IFF_PPROMISC) { 2401 /* Do nothing if device is in permanently promiscuous mode */ 2402 ifp->if_pcount += pswitch ? 1 : -1; 2403 return (0); 2404 } 2405 if (pswitch) { 2406 /* 2407 * If the device is not configured up, we cannot put it in 2408 * promiscuous mode. 2409 */ 2410 if ((ifp->if_flags & IFF_UP) == 0) 2411 return (ENETDOWN); 2412 if (ifp->if_pcount++ != 0) 2413 return (0); 2414 ifp->if_flags |= IFF_PROMISC; 2415 log(LOG_INFO, "%s: promiscuous mode enabled\n", 2416 ifp->if_xname); 2417 } else { 2418 if (--ifp->if_pcount > 0) 2419 return (0); 2420 ifp->if_flags &= ~IFF_PROMISC; 2421 log(LOG_INFO, "%s: promiscuous mode disabled\n", 2422 ifp->if_xname); 2423 } 2424 ifr.ifr_flags = ifp->if_flags; 2425 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2426 ifnet_serialize_all(ifp); 2427 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL); 2428 ifnet_deserialize_all(ifp); 2429 if (error == 0) 2430 rt_ifmsg(ifp); 2431 else 2432 ifp->if_flags = oldflags; 2433 return error; 2434 } 2435 2436 /* 2437 * Return interface configuration 2438 * of system. List may be used 2439 * in later ioctl's (above) to get 2440 * other information. 2441 */ 2442 static int 2443 ifconf(u_long cmd, caddr_t data, struct ucred *cred) 2444 { 2445 struct ifconf *ifc = (struct ifconf *)data; 2446 struct ifnet *ifp; 2447 struct sockaddr *sa; 2448 struct ifreq ifr, *ifrp; 2449 int space = ifc->ifc_len, error = 0; 2450 2451 ifrp = ifc->ifc_req; 2452 2453 ifnet_lock(); 2454 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 2455 struct ifaddr_container *ifac, *ifac_mark; 2456 struct ifaddr_marker mark; 2457 struct ifaddrhead *head; 2458 int addrs; 2459 2460 if (space <= sizeof ifr) 2461 break; 2462 2463 /* 2464 * Zero the stack declared structure first to prevent 2465 * memory disclosure. 2466 */ 2467 bzero(&ifr, sizeof(ifr)); 2468 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) 2469 >= sizeof(ifr.ifr_name)) { 2470 error = ENAMETOOLONG; 2471 break; 2472 } 2473 2474 /* 2475 * Add a marker, since copyout() could block and during that 2476 * period the list could be changed. Inserting the marker to 2477 * the header of the list will not cause trouble for the code 2478 * assuming that the first element of the list is AF_LINK; the 2479 * marker will be moved to the next position w/o blocking. 2480 */ 2481 ifa_marker_init(&mark, ifp); 2482 ifac_mark = &mark.ifac; 2483 head = &ifp->if_addrheads[mycpuid]; 2484 2485 addrs = 0; 2486 TAILQ_INSERT_HEAD(head, ifac_mark, ifa_link); 2487 while ((ifac = TAILQ_NEXT(ifac_mark, ifa_link)) != NULL) { 2488 struct ifaddr *ifa = ifac->ifa; 2489 2490 TAILQ_REMOVE(head, ifac_mark, ifa_link); 2491 TAILQ_INSERT_AFTER(head, ifac, ifac_mark, ifa_link); 2492 2493 /* Ignore marker */ 2494 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 2495 continue; 2496 2497 if (space <= sizeof ifr) 2498 break; 2499 sa = ifa->ifa_addr; 2500 if (cred->cr_prison && prison_if(cred, sa)) 2501 continue; 2502 addrs++; 2503 /* 2504 * Keep a reference on this ifaddr, so that it will 2505 * not be destroyed when its address is copied to 2506 * the userland, which could block. 2507 */ 2508 IFAREF(ifa); 2509 if (sa->sa_len <= sizeof(*sa)) { 2510 ifr.ifr_addr = *sa; 2511 error = copyout(&ifr, ifrp, sizeof ifr); 2512 ifrp++; 2513 } else { 2514 if (space < (sizeof ifr) + sa->sa_len - 2515 sizeof(*sa)) { 2516 IFAFREE(ifa); 2517 break; 2518 } 2519 space -= sa->sa_len - sizeof(*sa); 2520 error = copyout(&ifr, ifrp, 2521 sizeof ifr.ifr_name); 2522 if (error == 0) 2523 error = copyout(sa, &ifrp->ifr_addr, 2524 sa->sa_len); 2525 ifrp = (struct ifreq *) 2526 (sa->sa_len + (caddr_t)&ifrp->ifr_addr); 2527 } 2528 IFAFREE(ifa); 2529 if (error) 2530 break; 2531 space -= sizeof ifr; 2532 } 2533 TAILQ_REMOVE(head, ifac_mark, ifa_link); 2534 if (error) 2535 break; 2536 if (!addrs) { 2537 bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr); 2538 error = copyout(&ifr, ifrp, sizeof ifr); 2539 if (error) 2540 break; 2541 space -= sizeof ifr; 2542 ifrp++; 2543 } 2544 } 2545 ifnet_unlock(); 2546 2547 ifc->ifc_len -= space; 2548 return (error); 2549 } 2550 2551 /* 2552 * Just like if_promisc(), but for all-multicast-reception mode. 2553 */ 2554 int 2555 if_allmulti(struct ifnet *ifp, int onswitch) 2556 { 2557 int error = 0; 2558 struct ifreq ifr; 2559 2560 crit_enter(); 2561 2562 if (onswitch) { 2563 if (ifp->if_amcount++ == 0) { 2564 ifp->if_flags |= IFF_ALLMULTI; 2565 ifr.ifr_flags = ifp->if_flags; 2566 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2567 ifnet_serialize_all(ifp); 2568 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2569 NULL); 2570 ifnet_deserialize_all(ifp); 2571 } 2572 } else { 2573 if (ifp->if_amcount > 1) { 2574 ifp->if_amcount--; 2575 } else { 2576 ifp->if_amcount = 0; 2577 ifp->if_flags &= ~IFF_ALLMULTI; 2578 ifr.ifr_flags = ifp->if_flags; 2579 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2580 ifnet_serialize_all(ifp); 2581 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2582 NULL); 2583 ifnet_deserialize_all(ifp); 2584 } 2585 } 2586 2587 crit_exit(); 2588 2589 if (error == 0) 2590 rt_ifmsg(ifp); 2591 return error; 2592 } 2593 2594 /* 2595 * Add a multicast listenership to the interface in question. 2596 * The link layer provides a routine which converts 2597 */ 2598 int 2599 if_addmulti_serialized(struct ifnet *ifp, struct sockaddr *sa, 2600 struct ifmultiaddr **retifma) 2601 { 2602 struct sockaddr *llsa, *dupsa; 2603 int error; 2604 struct ifmultiaddr *ifma; 2605 2606 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2607 2608 /* 2609 * If the matching multicast address already exists 2610 * then don't add a new one, just add a reference 2611 */ 2612 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2613 if (sa_equal(sa, ifma->ifma_addr)) { 2614 ifma->ifma_refcount++; 2615 if (retifma) 2616 *retifma = ifma; 2617 return 0; 2618 } 2619 } 2620 2621 /* 2622 * Give the link layer a chance to accept/reject it, and also 2623 * find out which AF_LINK address this maps to, if it isn't one 2624 * already. 2625 */ 2626 if (ifp->if_resolvemulti) { 2627 error = ifp->if_resolvemulti(ifp, &llsa, sa); 2628 if (error) 2629 return error; 2630 } else { 2631 llsa = NULL; 2632 } 2633 2634 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_INTWAIT); 2635 dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_INTWAIT); 2636 bcopy(sa, dupsa, sa->sa_len); 2637 2638 ifma->ifma_addr = dupsa; 2639 ifma->ifma_lladdr = llsa; 2640 ifma->ifma_ifp = ifp; 2641 ifma->ifma_refcount = 1; 2642 ifma->ifma_protospec = NULL; 2643 rt_newmaddrmsg(RTM_NEWMADDR, ifma); 2644 2645 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2646 if (retifma) 2647 *retifma = ifma; 2648 2649 if (llsa != NULL) { 2650 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2651 if (sa_equal(ifma->ifma_addr, llsa)) 2652 break; 2653 } 2654 if (ifma) { 2655 ifma->ifma_refcount++; 2656 } else { 2657 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_INTWAIT); 2658 dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_INTWAIT); 2659 bcopy(llsa, dupsa, llsa->sa_len); 2660 ifma->ifma_addr = dupsa; 2661 ifma->ifma_ifp = ifp; 2662 ifma->ifma_refcount = 1; 2663 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2664 } 2665 } 2666 /* 2667 * We are certain we have added something, so call down to the 2668 * interface to let them know about it. 2669 */ 2670 if (ifp->if_ioctl) 2671 ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL); 2672 2673 return 0; 2674 } 2675 2676 int 2677 if_addmulti(struct ifnet *ifp, struct sockaddr *sa, 2678 struct ifmultiaddr **retifma) 2679 { 2680 int error; 2681 2682 ifnet_serialize_all(ifp); 2683 error = if_addmulti_serialized(ifp, sa, retifma); 2684 ifnet_deserialize_all(ifp); 2685 2686 return error; 2687 } 2688 2689 /* 2690 * Remove a reference to a multicast address on this interface. Yell 2691 * if the request does not match an existing membership. 2692 */ 2693 static int 2694 if_delmulti_serialized(struct ifnet *ifp, struct sockaddr *sa) 2695 { 2696 struct ifmultiaddr *ifma; 2697 2698 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2699 2700 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2701 if (sa_equal(sa, ifma->ifma_addr)) 2702 break; 2703 if (ifma == NULL) 2704 return ENOENT; 2705 2706 if (ifma->ifma_refcount > 1) { 2707 ifma->ifma_refcount--; 2708 return 0; 2709 } 2710 2711 rt_newmaddrmsg(RTM_DELMADDR, ifma); 2712 sa = ifma->ifma_lladdr; 2713 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2714 /* 2715 * Make sure the interface driver is notified 2716 * in the case of a link layer mcast group being left. 2717 */ 2718 if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) 2719 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2720 kfree(ifma->ifma_addr, M_IFMADDR); 2721 kfree(ifma, M_IFMADDR); 2722 if (sa == NULL) 2723 return 0; 2724 2725 /* 2726 * Now look for the link-layer address which corresponds to 2727 * this network address. It had been squirreled away in 2728 * ifma->ifma_lladdr for this purpose (so we don't have 2729 * to call ifp->if_resolvemulti() again), and we saved that 2730 * value in sa above. If some nasty deleted the 2731 * link-layer address out from underneath us, we can deal because 2732 * the address we stored was is not the same as the one which was 2733 * in the record for the link-layer address. (So we don't complain 2734 * in that case.) 2735 */ 2736 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2737 if (sa_equal(sa, ifma->ifma_addr)) 2738 break; 2739 if (ifma == NULL) 2740 return 0; 2741 2742 if (ifma->ifma_refcount > 1) { 2743 ifma->ifma_refcount--; 2744 return 0; 2745 } 2746 2747 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2748 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2749 kfree(ifma->ifma_addr, M_IFMADDR); 2750 kfree(sa, M_IFMADDR); 2751 kfree(ifma, M_IFMADDR); 2752 2753 return 0; 2754 } 2755 2756 int 2757 if_delmulti(struct ifnet *ifp, struct sockaddr *sa) 2758 { 2759 int error; 2760 2761 ifnet_serialize_all(ifp); 2762 error = if_delmulti_serialized(ifp, sa); 2763 ifnet_deserialize_all(ifp); 2764 2765 return error; 2766 } 2767 2768 /* 2769 * Delete all multicast group membership for an interface. 2770 * Should be used to quickly flush all multicast filters. 2771 */ 2772 void 2773 if_delallmulti_serialized(struct ifnet *ifp) 2774 { 2775 struct ifmultiaddr *ifma, mark; 2776 struct sockaddr sa; 2777 2778 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2779 2780 bzero(&sa, sizeof(sa)); 2781 sa.sa_family = AF_UNSPEC; 2782 sa.sa_len = sizeof(sa); 2783 2784 bzero(&mark, sizeof(mark)); 2785 mark.ifma_addr = &sa; 2786 2787 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, &mark, ifma_link); 2788 while ((ifma = TAILQ_NEXT(&mark, ifma_link)) != NULL) { 2789 TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link); 2790 TAILQ_INSERT_AFTER(&ifp->if_multiaddrs, ifma, &mark, 2791 ifma_link); 2792 2793 if (ifma->ifma_addr->sa_family == AF_UNSPEC) 2794 continue; 2795 2796 if_delmulti_serialized(ifp, ifma->ifma_addr); 2797 } 2798 TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link); 2799 } 2800 2801 2802 /* 2803 * Set the link layer address on an interface. 2804 * 2805 * At this time we only support certain types of interfaces, 2806 * and we don't allow the length of the address to change. 2807 */ 2808 int 2809 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) 2810 { 2811 struct sockaddr_dl *sdl; 2812 struct ifreq ifr; 2813 2814 sdl = IF_LLSOCKADDR(ifp); 2815 if (sdl == NULL) 2816 return (EINVAL); 2817 if (len != sdl->sdl_alen) /* don't allow length to change */ 2818 return (EINVAL); 2819 switch (ifp->if_type) { 2820 case IFT_ETHER: /* these types use struct arpcom */ 2821 case IFT_XETHER: 2822 case IFT_L2VLAN: 2823 case IFT_IEEE8023ADLAG: 2824 bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len); 2825 bcopy(lladdr, LLADDR(sdl), len); 2826 break; 2827 default: 2828 return (ENODEV); 2829 } 2830 /* 2831 * If the interface is already up, we need 2832 * to re-init it in order to reprogram its 2833 * address filter. 2834 */ 2835 ifnet_serialize_all(ifp); 2836 if ((ifp->if_flags & IFF_UP) != 0) { 2837 #ifdef INET 2838 struct ifaddr_container *ifac; 2839 #endif 2840 2841 ifp->if_flags &= ~IFF_UP; 2842 ifr.ifr_flags = ifp->if_flags; 2843 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2844 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2845 NULL); 2846 ifp->if_flags |= IFF_UP; 2847 ifr.ifr_flags = ifp->if_flags; 2848 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2849 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2850 NULL); 2851 #ifdef INET 2852 /* 2853 * Also send gratuitous ARPs to notify other nodes about 2854 * the address change. 2855 */ 2856 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 2857 struct ifaddr *ifa = ifac->ifa; 2858 2859 if (ifa->ifa_addr != NULL && 2860 ifa->ifa_addr->sa_family == AF_INET) 2861 arp_gratuitous(ifp, ifa); 2862 } 2863 #endif 2864 } 2865 ifnet_deserialize_all(ifp); 2866 return (0); 2867 } 2868 2869 2870 /* 2871 * Locate an interface based on a complete address. 2872 */ 2873 struct ifnet * 2874 if_bylla(const void *lla, unsigned char lla_len) 2875 { 2876 const struct ifnet_array *arr; 2877 struct ifnet *ifp; 2878 struct sockaddr_dl *sdl; 2879 int i; 2880 2881 arr = ifnet_array_get(); 2882 for (i = 0; i < arr->ifnet_count; ++i) { 2883 ifp = arr->ifnet_arr[i]; 2884 if (ifp->if_addrlen != lla_len) 2885 continue; 2886 2887 sdl = IF_LLSOCKADDR(ifp); 2888 if (memcmp(lla, LLADDR(sdl), lla_len) == 0) 2889 return (ifp); 2890 } 2891 return (NULL); 2892 } 2893 2894 struct ifmultiaddr * 2895 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp) 2896 { 2897 struct ifmultiaddr *ifma; 2898 2899 /* TODO: need ifnet_serialize_main */ 2900 ifnet_serialize_all(ifp); 2901 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2902 if (sa_equal(ifma->ifma_addr, sa)) 2903 break; 2904 ifnet_deserialize_all(ifp); 2905 2906 return ifma; 2907 } 2908 2909 /* 2910 * This function locates the first real ethernet MAC from a network 2911 * card and loads it into node, returning 0 on success or ENOENT if 2912 * no suitable interfaces were found. It is used by the uuid code to 2913 * generate a unique 6-byte number. 2914 */ 2915 int 2916 if_getanyethermac(uint16_t *node, int minlen) 2917 { 2918 struct ifnet *ifp; 2919 struct sockaddr_dl *sdl; 2920 2921 ifnet_lock(); 2922 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 2923 if (ifp->if_type != IFT_ETHER) 2924 continue; 2925 sdl = IF_LLSOCKADDR(ifp); 2926 if (sdl->sdl_alen < minlen) 2927 continue; 2928 bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node, 2929 minlen); 2930 ifnet_unlock(); 2931 return(0); 2932 } 2933 ifnet_unlock(); 2934 return (ENOENT); 2935 } 2936 2937 /* 2938 * The name argument must be a pointer to storage which will last as 2939 * long as the interface does. For physical devices, the result of 2940 * device_get_name(dev) is a good choice and for pseudo-devices a 2941 * static string works well. 2942 */ 2943 void 2944 if_initname(struct ifnet *ifp, const char *name, int unit) 2945 { 2946 ifp->if_dname = name; 2947 ifp->if_dunit = unit; 2948 if (unit != IF_DUNIT_NONE) 2949 ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); 2950 else 2951 strlcpy(ifp->if_xname, name, IFNAMSIZ); 2952 } 2953 2954 int 2955 if_printf(struct ifnet *ifp, const char *fmt, ...) 2956 { 2957 __va_list ap; 2958 int retval; 2959 2960 retval = kprintf("%s: ", ifp->if_xname); 2961 __va_start(ap, fmt); 2962 retval += kvprintf(fmt, ap); 2963 __va_end(ap); 2964 return (retval); 2965 } 2966 2967 struct ifnet * 2968 if_alloc(uint8_t type) 2969 { 2970 struct ifnet *ifp; 2971 size_t size; 2972 2973 /* 2974 * XXX temporary hack until arpcom is setup in if_l2com 2975 */ 2976 if (type == IFT_ETHER) 2977 size = sizeof(struct arpcom); 2978 else 2979 size = sizeof(struct ifnet); 2980 2981 ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO); 2982 2983 ifp->if_type = type; 2984 2985 if (if_com_alloc[type] != NULL) { 2986 ifp->if_l2com = if_com_alloc[type](type, ifp); 2987 if (ifp->if_l2com == NULL) { 2988 kfree(ifp, M_IFNET); 2989 return (NULL); 2990 } 2991 } 2992 return (ifp); 2993 } 2994 2995 void 2996 if_free(struct ifnet *ifp) 2997 { 2998 if (ifp->if_description != NULL) 2999 kfree(ifp->if_description, M_IFDESCR); 3000 kfree(ifp, M_IFNET); 3001 } 3002 3003 void 3004 ifq_set_classic(struct ifaltq *ifq) 3005 { 3006 ifq_set_methods(ifq, ifq->altq_ifp->if_mapsubq, 3007 ifsq_classic_enqueue, ifsq_classic_dequeue, ifsq_classic_request); 3008 } 3009 3010 void 3011 ifq_set_methods(struct ifaltq *ifq, altq_mapsubq_t mapsubq, 3012 ifsq_enqueue_t enqueue, ifsq_dequeue_t dequeue, ifsq_request_t request) 3013 { 3014 int q; 3015 3016 KASSERT(mapsubq != NULL, ("mapsubq is not specified")); 3017 KASSERT(enqueue != NULL, ("enqueue is not specified")); 3018 KASSERT(dequeue != NULL, ("dequeue is not specified")); 3019 KASSERT(request != NULL, ("request is not specified")); 3020 3021 ifq->altq_mapsubq = mapsubq; 3022 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 3023 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 3024 3025 ifsq->ifsq_enqueue = enqueue; 3026 ifsq->ifsq_dequeue = dequeue; 3027 ifsq->ifsq_request = request; 3028 } 3029 } 3030 3031 static void 3032 ifsq_norm_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m) 3033 { 3034 3035 classq_add(&ifsq->ifsq_norm, m); 3036 ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len); 3037 } 3038 3039 static void 3040 ifsq_prio_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m) 3041 { 3042 3043 classq_add(&ifsq->ifsq_prio, m); 3044 ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len); 3045 ALTQ_SQ_PRIO_CNTR_INC(ifsq, m->m_pkthdr.len); 3046 } 3047 3048 static struct mbuf * 3049 ifsq_norm_dequeue(struct ifaltq_subque *ifsq) 3050 { 3051 struct mbuf *m; 3052 3053 m = classq_get(&ifsq->ifsq_norm); 3054 if (m != NULL) 3055 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len); 3056 return (m); 3057 } 3058 3059 static struct mbuf * 3060 ifsq_prio_dequeue(struct ifaltq_subque *ifsq) 3061 { 3062 struct mbuf *m; 3063 3064 m = classq_get(&ifsq->ifsq_prio); 3065 if (m != NULL) { 3066 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len); 3067 ALTQ_SQ_PRIO_CNTR_DEC(ifsq, m->m_pkthdr.len); 3068 } 3069 return (m); 3070 } 3071 3072 int 3073 ifsq_classic_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m, 3074 struct altq_pktattr *pa __unused) 3075 { 3076 3077 M_ASSERTPKTHDR(m); 3078 again: 3079 if (ifsq->ifsq_len >= ifsq->ifsq_maxlen || 3080 ifsq->ifsq_bcnt >= ifsq->ifsq_maxbcnt) { 3081 struct mbuf *m_drop; 3082 3083 if (m->m_flags & M_PRIO) { 3084 m_drop = NULL; 3085 if (ifsq->ifsq_prio_len < (ifsq->ifsq_maxlen >> 1) && 3086 ifsq->ifsq_prio_bcnt < (ifsq->ifsq_maxbcnt >> 1)) { 3087 /* Try dropping some from normal queue. */ 3088 m_drop = ifsq_norm_dequeue(ifsq); 3089 } 3090 if (m_drop == NULL) 3091 m_drop = ifsq_prio_dequeue(ifsq); 3092 } else { 3093 m_drop = ifsq_norm_dequeue(ifsq); 3094 } 3095 if (m_drop != NULL) { 3096 IFNET_STAT_INC(ifsq->ifsq_ifp, oqdrops, 1); 3097 m_freem(m_drop); 3098 goto again; 3099 } 3100 /* 3101 * No old packets could be dropped! 3102 * NOTE: Caller increases oqdrops. 3103 */ 3104 m_freem(m); 3105 return (ENOBUFS); 3106 } else { 3107 if (m->m_flags & M_PRIO) 3108 ifsq_prio_enqueue(ifsq, m); 3109 else 3110 ifsq_norm_enqueue(ifsq, m); 3111 return (0); 3112 } 3113 } 3114 3115 struct mbuf * 3116 ifsq_classic_dequeue(struct ifaltq_subque *ifsq, int op) 3117 { 3118 struct mbuf *m; 3119 3120 switch (op) { 3121 case ALTDQ_POLL: 3122 m = classq_head(&ifsq->ifsq_prio); 3123 if (m == NULL) 3124 m = classq_head(&ifsq->ifsq_norm); 3125 break; 3126 3127 case ALTDQ_REMOVE: 3128 m = ifsq_prio_dequeue(ifsq); 3129 if (m == NULL) 3130 m = ifsq_norm_dequeue(ifsq); 3131 break; 3132 3133 default: 3134 panic("unsupported ALTQ dequeue op: %d", op); 3135 } 3136 return m; 3137 } 3138 3139 int 3140 ifsq_classic_request(struct ifaltq_subque *ifsq, int req, void *arg) 3141 { 3142 switch (req) { 3143 case ALTRQ_PURGE: 3144 for (;;) { 3145 struct mbuf *m; 3146 3147 m = ifsq_classic_dequeue(ifsq, ALTDQ_REMOVE); 3148 if (m == NULL) 3149 break; 3150 m_freem(m); 3151 } 3152 break; 3153 3154 default: 3155 panic("unsupported ALTQ request: %d", req); 3156 } 3157 return 0; 3158 } 3159 3160 static void 3161 ifsq_ifstart_try(struct ifaltq_subque *ifsq, int force_sched) 3162 { 3163 struct ifnet *ifp = ifsq_get_ifp(ifsq); 3164 int running = 0, need_sched; 3165 3166 /* 3167 * Try to do direct ifnet.if_start on the subqueue first, if there is 3168 * contention on the subqueue hardware serializer, ifnet.if_start on 3169 * the subqueue will be scheduled on the subqueue owner CPU. 3170 */ 3171 if (!ifsq_tryserialize_hw(ifsq)) { 3172 /* 3173 * Subqueue hardware serializer contention happened, 3174 * ifnet.if_start on the subqueue is scheduled on 3175 * the subqueue owner CPU, and we keep going. 3176 */ 3177 ifsq_ifstart_schedule(ifsq, 1); 3178 return; 3179 } 3180 3181 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { 3182 ifp->if_start(ifp, ifsq); 3183 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 3184 running = 1; 3185 } 3186 need_sched = ifsq_ifstart_need_schedule(ifsq, running); 3187 3188 ifsq_deserialize_hw(ifsq); 3189 3190 if (need_sched) { 3191 /* 3192 * More data need to be transmitted, ifnet.if_start on the 3193 * subqueue is scheduled on the subqueue owner CPU, and we 3194 * keep going. 3195 * NOTE: ifnet.if_start subqueue interlock is not released. 3196 */ 3197 ifsq_ifstart_schedule(ifsq, force_sched); 3198 } 3199 } 3200 3201 /* 3202 * Subqeue packets staging mechanism: 3203 * 3204 * The packets enqueued into the subqueue are staged to a certain amount 3205 * before the ifnet.if_start on the subqueue is called. In this way, the 3206 * driver could avoid writing to hardware registers upon every packet, 3207 * instead, hardware registers could be written when certain amount of 3208 * packets are put onto hardware TX ring. The measurement on several modern 3209 * NICs (emx(4), igb(4), bnx(4), bge(4), jme(4)) shows that the hardware 3210 * registers writing aggregation could save ~20% CPU time when 18bytes UDP 3211 * datagrams are transmitted at 1.48Mpps. The performance improvement by 3212 * hardware registers writing aggeregation is also mentioned by Luigi Rizzo's 3213 * netmap paper (http://info.iet.unipi.it/~luigi/netmap/). 3214 * 3215 * Subqueue packets staging is performed for two entry points into drivers' 3216 * transmission function: 3217 * - Direct ifnet.if_start calling on the subqueue, i.e. ifsq_ifstart_try() 3218 * - ifnet.if_start scheduling on the subqueue, i.e. ifsq_ifstart_schedule() 3219 * 3220 * Subqueue packets staging will be stopped upon any of the following 3221 * conditions: 3222 * - If the count of packets enqueued on the current CPU is great than or 3223 * equal to ifsq_stage_cntmax. (XXX this should be per-interface) 3224 * - If the total length of packets enqueued on the current CPU is great 3225 * than or equal to the hardware's MTU - max_protohdr. max_protohdr is 3226 * cut from the hardware's MTU mainly bacause a full TCP segment's size 3227 * is usually less than hardware's MTU. 3228 * - ifsq_ifstart_schedule() is not pending on the current CPU and 3229 * ifnet.if_start subqueue interlock (ifaltq_subq.ifsq_started) is not 3230 * released. 3231 * - The if_start_rollup(), which is registered as low priority netisr 3232 * rollup function, is called; probably because no more work is pending 3233 * for netisr. 3234 * 3235 * NOTE: 3236 * Currently subqueue packet staging is only performed in netisr threads. 3237 */ 3238 int 3239 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa) 3240 { 3241 struct ifaltq *ifq = &ifp->if_snd; 3242 struct ifaltq_subque *ifsq; 3243 int error, start = 0, len, mcast = 0, avoid_start = 0; 3244 struct ifsubq_stage_head *head = NULL; 3245 struct ifsubq_stage *stage = NULL; 3246 struct globaldata *gd = mycpu; 3247 struct thread *td = gd->gd_curthread; 3248 3249 crit_enter_quick(td); 3250 3251 ifsq = ifq_map_subq(ifq, gd->gd_cpuid); 3252 ASSERT_ALTQ_SQ_NOT_SERIALIZED_HW(ifsq); 3253 3254 len = m->m_pkthdr.len; 3255 if (m->m_flags & M_MCAST) 3256 mcast = 1; 3257 3258 if (td->td_type == TD_TYPE_NETISR) { 3259 head = &ifsubq_stage_heads[mycpuid]; 3260 stage = ifsq_get_stage(ifsq, mycpuid); 3261 3262 stage->stg_cnt++; 3263 stage->stg_len += len; 3264 if (stage->stg_cnt < ifsq_stage_cntmax && 3265 stage->stg_len < (ifp->if_mtu - max_protohdr)) 3266 avoid_start = 1; 3267 } 3268 3269 ALTQ_SQ_LOCK(ifsq); 3270 error = ifsq_enqueue_locked(ifsq, m, pa); 3271 if (error) { 3272 IFNET_STAT_INC(ifp, oqdrops, 1); 3273 if (!ifsq_data_ready(ifsq)) { 3274 ALTQ_SQ_UNLOCK(ifsq); 3275 goto done; 3276 } 3277 avoid_start = 0; 3278 } else { 3279 IFNET_STAT_INC(ifp, obytes, len); 3280 if (mcast) 3281 IFNET_STAT_INC(ifp, omcasts, 1); 3282 } 3283 if (!ifsq_is_started(ifsq)) { 3284 if (avoid_start) { 3285 ALTQ_SQ_UNLOCK(ifsq); 3286 3287 KKASSERT(!error); 3288 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) 3289 ifsq_stage_insert(head, stage); 3290 3291 goto done; 3292 } 3293 3294 /* 3295 * Hold the subqueue interlock of ifnet.if_start 3296 */ 3297 ifsq_set_started(ifsq); 3298 start = 1; 3299 } 3300 ALTQ_SQ_UNLOCK(ifsq); 3301 3302 if (stage != NULL) { 3303 if (!start && (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)) { 3304 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); 3305 if (!avoid_start) { 3306 ifsq_stage_remove(head, stage); 3307 ifsq_ifstart_schedule(ifsq, 1); 3308 } 3309 goto done; 3310 } 3311 3312 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) { 3313 ifsq_stage_remove(head, stage); 3314 } else { 3315 stage->stg_cnt = 0; 3316 stage->stg_len = 0; 3317 } 3318 } 3319 3320 if (start) 3321 ifsq_ifstart_try(ifsq, 0); 3322 3323 done: 3324 crit_exit_quick(td); 3325 return error; 3326 } 3327 3328 void * 3329 ifa_create(int size) 3330 { 3331 struct ifaddr *ifa; 3332 int i; 3333 3334 KASSERT(size >= sizeof(*ifa), ("ifaddr size too small")); 3335 3336 ifa = kmalloc(size, M_IFADDR, M_INTWAIT | M_ZERO); 3337 3338 /* 3339 * Make ifa_container availabel on all CPUs, since they 3340 * could be accessed by any threads. 3341 */ 3342 ifa->ifa_containers = 3343 kmalloc(ncpus * sizeof(struct ifaddr_container), 3344 M_IFADDR, 3345 M_INTWAIT | M_ZERO | M_CACHEALIGN); 3346 3347 ifa->ifa_ncnt = ncpus; 3348 for (i = 0; i < ncpus; ++i) { 3349 struct ifaddr_container *ifac = &ifa->ifa_containers[i]; 3350 3351 ifac->ifa_magic = IFA_CONTAINER_MAGIC; 3352 ifac->ifa = ifa; 3353 ifac->ifa_refcnt = 1; 3354 } 3355 #ifdef IFADDR_DEBUG 3356 kprintf("alloc ifa %p %d\n", ifa, size); 3357 #endif 3358 return ifa; 3359 } 3360 3361 void 3362 ifac_free(struct ifaddr_container *ifac, int cpu_id) 3363 { 3364 struct ifaddr *ifa = ifac->ifa; 3365 3366 KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC); 3367 KKASSERT(ifac->ifa_refcnt == 0); 3368 KASSERT(ifac->ifa_listmask == 0, 3369 ("ifa is still on %#x lists", ifac->ifa_listmask)); 3370 3371 ifac->ifa_magic = IFA_CONTAINER_DEAD; 3372 3373 #ifdef IFADDR_DEBUG_VERBOSE 3374 kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id); 3375 #endif 3376 3377 KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus, 3378 ("invalid # of ifac, %d", ifa->ifa_ncnt)); 3379 if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) { 3380 #ifdef IFADDR_DEBUG 3381 kprintf("free ifa %p\n", ifa); 3382 #endif 3383 kfree(ifa->ifa_containers, M_IFADDR); 3384 kfree(ifa, M_IFADDR); 3385 } 3386 } 3387 3388 static void 3389 ifa_iflink_dispatch(netmsg_t nmsg) 3390 { 3391 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3392 struct ifaddr *ifa = msg->ifa; 3393 struct ifnet *ifp = msg->ifp; 3394 int cpu = mycpuid; 3395 struct ifaddr_container *ifac; 3396 3397 crit_enter(); 3398 3399 ifac = &ifa->ifa_containers[cpu]; 3400 ASSERT_IFAC_VALID(ifac); 3401 KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0, 3402 ("ifaddr is on if_addrheads")); 3403 3404 ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD; 3405 if (msg->tail) 3406 TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link); 3407 else 3408 TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link); 3409 3410 crit_exit(); 3411 3412 netisr_forwardmsg_all(&nmsg->base, cpu + 1); 3413 } 3414 3415 void 3416 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail) 3417 { 3418 struct netmsg_ifaddr msg; 3419 3420 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3421 0, ifa_iflink_dispatch); 3422 msg.ifa = ifa; 3423 msg.ifp = ifp; 3424 msg.tail = tail; 3425 3426 netisr_domsg(&msg.base, 0); 3427 } 3428 3429 static void 3430 ifa_ifunlink_dispatch(netmsg_t nmsg) 3431 { 3432 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3433 struct ifaddr *ifa = msg->ifa; 3434 struct ifnet *ifp = msg->ifp; 3435 int cpu = mycpuid; 3436 struct ifaddr_container *ifac; 3437 3438 crit_enter(); 3439 3440 ifac = &ifa->ifa_containers[cpu]; 3441 ASSERT_IFAC_VALID(ifac); 3442 KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD, 3443 ("ifaddr is not on if_addrhead")); 3444 3445 TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link); 3446 ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD; 3447 3448 crit_exit(); 3449 3450 netisr_forwardmsg_all(&nmsg->base, cpu + 1); 3451 } 3452 3453 void 3454 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp) 3455 { 3456 struct netmsg_ifaddr msg; 3457 3458 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3459 0, ifa_ifunlink_dispatch); 3460 msg.ifa = ifa; 3461 msg.ifp = ifp; 3462 3463 netisr_domsg(&msg.base, 0); 3464 } 3465 3466 static void 3467 ifa_destroy_dispatch(netmsg_t nmsg) 3468 { 3469 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3470 3471 IFAFREE(msg->ifa); 3472 netisr_forwardmsg_all(&nmsg->base, mycpuid + 1); 3473 } 3474 3475 void 3476 ifa_destroy(struct ifaddr *ifa) 3477 { 3478 struct netmsg_ifaddr msg; 3479 3480 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3481 0, ifa_destroy_dispatch); 3482 msg.ifa = ifa; 3483 3484 netisr_domsg(&msg.base, 0); 3485 } 3486 3487 static void 3488 if_start_rollup(void) 3489 { 3490 struct ifsubq_stage_head *head = &ifsubq_stage_heads[mycpuid]; 3491 struct ifsubq_stage *stage; 3492 3493 crit_enter(); 3494 3495 while ((stage = TAILQ_FIRST(&head->stg_head)) != NULL) { 3496 struct ifaltq_subque *ifsq = stage->stg_subq; 3497 int is_sched = 0; 3498 3499 if (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED) 3500 is_sched = 1; 3501 ifsq_stage_remove(head, stage); 3502 3503 if (is_sched) { 3504 ifsq_ifstart_schedule(ifsq, 1); 3505 } else { 3506 int start = 0; 3507 3508 ALTQ_SQ_LOCK(ifsq); 3509 if (!ifsq_is_started(ifsq)) { 3510 /* 3511 * Hold the subqueue interlock of 3512 * ifnet.if_start 3513 */ 3514 ifsq_set_started(ifsq); 3515 start = 1; 3516 } 3517 ALTQ_SQ_UNLOCK(ifsq); 3518 3519 if (start) 3520 ifsq_ifstart_try(ifsq, 1); 3521 } 3522 KKASSERT((stage->stg_flags & 3523 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); 3524 } 3525 3526 crit_exit(); 3527 } 3528 3529 static void 3530 ifnetinit(void *dummy __unused) 3531 { 3532 int i; 3533 3534 /* XXX netisr_ncpus */ 3535 for (i = 0; i < ncpus; ++i) 3536 TAILQ_INIT(&ifsubq_stage_heads[i].stg_head); 3537 netisr_register_rollup(if_start_rollup, NETISR_ROLLUP_PRIO_IFSTART); 3538 } 3539 3540 void 3541 if_register_com_alloc(u_char type, 3542 if_com_alloc_t *a, if_com_free_t *f) 3543 { 3544 3545 KASSERT(if_com_alloc[type] == NULL, 3546 ("if_register_com_alloc: %d already registered", type)); 3547 KASSERT(if_com_free[type] == NULL, 3548 ("if_register_com_alloc: %d free already registered", type)); 3549 3550 if_com_alloc[type] = a; 3551 if_com_free[type] = f; 3552 } 3553 3554 void 3555 if_deregister_com_alloc(u_char type) 3556 { 3557 3558 KASSERT(if_com_alloc[type] != NULL, 3559 ("if_deregister_com_alloc: %d not registered", type)); 3560 KASSERT(if_com_free[type] != NULL, 3561 ("if_deregister_com_alloc: %d free not registered", type)); 3562 if_com_alloc[type] = NULL; 3563 if_com_free[type] = NULL; 3564 } 3565 3566 void 3567 ifq_set_maxlen(struct ifaltq *ifq, int len) 3568 { 3569 ifq->altq_maxlen = len + (ncpus * ifsq_stage_cntmax); 3570 } 3571 3572 int 3573 ifq_mapsubq_default(struct ifaltq *ifq __unused, int cpuid __unused) 3574 { 3575 return ALTQ_SUBQ_INDEX_DEFAULT; 3576 } 3577 3578 int 3579 ifq_mapsubq_modulo(struct ifaltq *ifq, int cpuid) 3580 { 3581 3582 return (cpuid % ifq->altq_subq_mappriv); 3583 } 3584 3585 /* 3586 * Watchdog timeout. Process callback as appropriate. If we cannot 3587 * serialize the ifnet just try again on the next timeout. 3588 * 3589 * NOTE: The ifnet can adjust wd_timer while holding the serializer. We 3590 * can only safely adjust it under the same circumstances. 3591 */ 3592 static void 3593 ifsq_watchdog(void *arg) 3594 { 3595 struct ifsubq_watchdog *wd = arg; 3596 struct ifnet *ifp; 3597 int count; 3598 3599 /* 3600 * Fast track. Try to avoid acquiring the serializer when not 3601 * near the terminal count, unless asked to. If the atomic op 3602 * to decrement the count fails just retry on the next callout. 3603 */ 3604 count = wd->wd_timer; 3605 cpu_ccfence(); 3606 if (count == 0) 3607 goto done; 3608 if (count > 2 && (wd->wd_flags & IF_WDOG_ALLTICKS) == 0) { 3609 (void)atomic_cmpset_int(&wd->wd_timer, count, count - 1); 3610 goto done; 3611 } 3612 3613 /* 3614 * Obtain the serializer and then re-test all wd_timer conditions 3615 * as it may have changed. NICs do not mess with wd_timer without 3616 * holding the serializer. 3617 * 3618 * If we are unable to obtain the serializer just retry the same 3619 * count on the next callout. 3620 * 3621 * - call watchdog in terminal count (0) 3622 * - call watchdog on last tick (1) if requested 3623 * - call watchdog on all ticks if requested 3624 */ 3625 ifp = ifsq_get_ifp(wd->wd_subq); 3626 if (ifnet_tryserialize_all(ifp) == 0) 3627 goto done; 3628 if (atomic_cmpset_int(&wd->wd_timer, count, count - 1)) { 3629 --count; 3630 if (count == 0 || 3631 (wd->wd_flags & IF_WDOG_ALLTICKS) || 3632 ((wd->wd_flags & IF_WDOG_LASTTICK) && count == 1)) { 3633 wd->wd_watchdog(wd->wd_subq); 3634 } 3635 } 3636 ifnet_deserialize_all(ifp); 3637 done: 3638 ifsq_watchdog_reset(wd); 3639 } 3640 3641 static void 3642 ifsq_watchdog_reset(struct ifsubq_watchdog *wd) 3643 { 3644 callout_reset_bycpu(&wd->wd_callout, hz, ifsq_watchdog, wd, 3645 ifsq_get_cpuid(wd->wd_subq)); 3646 } 3647 3648 void 3649 ifsq_watchdog_init(struct ifsubq_watchdog *wd, struct ifaltq_subque *ifsq, 3650 ifsq_watchdog_t watchdog, int flags) 3651 { 3652 callout_init_mp(&wd->wd_callout); 3653 wd->wd_timer = 0; 3654 wd->wd_flags = flags; 3655 wd->wd_subq = ifsq; 3656 wd->wd_watchdog = watchdog; 3657 } 3658 3659 void 3660 ifsq_watchdog_start(struct ifsubq_watchdog *wd) 3661 { 3662 atomic_swap_int(&wd->wd_timer, 0); 3663 ifsq_watchdog_reset(wd); 3664 } 3665 3666 void 3667 ifsq_watchdog_stop(struct ifsubq_watchdog *wd) 3668 { 3669 atomic_swap_int(&wd->wd_timer, 0); 3670 callout_stop(&wd->wd_callout); 3671 } 3672 3673 void 3674 ifsq_watchdog_set_count(struct ifsubq_watchdog *wd, int count) 3675 { 3676 atomic_swap_int(&wd->wd_timer, count); 3677 } 3678 3679 void 3680 ifnet_lock(void) 3681 { 3682 KASSERT(curthread->td_type != TD_TYPE_NETISR, 3683 ("try holding ifnet lock in netisr")); 3684 mtx_lock(&ifnet_mtx); 3685 } 3686 3687 void 3688 ifnet_unlock(void) 3689 { 3690 KASSERT(curthread->td_type != TD_TYPE_NETISR, 3691 ("try holding ifnet lock in netisr")); 3692 mtx_unlock(&ifnet_mtx); 3693 } 3694 3695 static struct ifnet_array * 3696 ifnet_array_alloc(int count) 3697 { 3698 struct ifnet_array *arr; 3699 3700 arr = kmalloc(__offsetof(struct ifnet_array, ifnet_arr[count]), 3701 M_IFNET, M_WAITOK); 3702 arr->ifnet_count = count; 3703 3704 return arr; 3705 } 3706 3707 static void 3708 ifnet_array_free(struct ifnet_array *arr) 3709 { 3710 if (arr == &ifnet_array0) 3711 return; 3712 kfree(arr, M_IFNET); 3713 } 3714 3715 static struct ifnet_array * 3716 ifnet_array_add(struct ifnet *ifp, const struct ifnet_array *old_arr) 3717 { 3718 struct ifnet_array *arr; 3719 int count, i; 3720 3721 KASSERT(old_arr->ifnet_count >= 0, 3722 ("invalid ifnet array count %d", old_arr->ifnet_count)); 3723 count = old_arr->ifnet_count + 1; 3724 arr = ifnet_array_alloc(count); 3725 3726 /* 3727 * Save the old ifnet array and append this ifp to the end of 3728 * the new ifnet array. 3729 */ 3730 for (i = 0; i < old_arr->ifnet_count; ++i) { 3731 KASSERT(old_arr->ifnet_arr[i] != ifp, 3732 ("%s is already in ifnet array", ifp->if_xname)); 3733 arr->ifnet_arr[i] = old_arr->ifnet_arr[i]; 3734 } 3735 KASSERT(i == count - 1, 3736 ("add %s, ifnet array index mismatch, should be %d, but got %d", 3737 ifp->if_xname, count - 1, i)); 3738 arr->ifnet_arr[i] = ifp; 3739 3740 return arr; 3741 } 3742 3743 static struct ifnet_array * 3744 ifnet_array_del(struct ifnet *ifp, const struct ifnet_array *old_arr) 3745 { 3746 struct ifnet_array *arr; 3747 int count, i, idx, found = 0; 3748 3749 KASSERT(old_arr->ifnet_count > 0, 3750 ("invalid ifnet array count %d", old_arr->ifnet_count)); 3751 count = old_arr->ifnet_count - 1; 3752 arr = ifnet_array_alloc(count); 3753 3754 /* 3755 * Save the old ifnet array, but skip this ifp. 3756 */ 3757 idx = 0; 3758 for (i = 0; i < old_arr->ifnet_count; ++i) { 3759 if (old_arr->ifnet_arr[i] == ifp) { 3760 KASSERT(!found, 3761 ("dup %s is in ifnet array", ifp->if_xname)); 3762 found = 1; 3763 continue; 3764 } 3765 KASSERT(idx < count, 3766 ("invalid ifnet array index %d, count %d", idx, count)); 3767 arr->ifnet_arr[idx] = old_arr->ifnet_arr[i]; 3768 ++idx; 3769 } 3770 KASSERT(found, ("%s is not in ifnet array", ifp->if_xname)); 3771 KASSERT(idx == count, 3772 ("del %s, ifnet array count mismatch, should be %d, but got %d ", 3773 ifp->if_xname, count, idx)); 3774 3775 return arr; 3776 } 3777 3778 const struct ifnet_array * 3779 ifnet_array_get(void) 3780 { 3781 const struct ifnet_array *ret; 3782 3783 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 3784 ret = ifnet_array; 3785 /* Make sure 'ret' is really used. */ 3786 cpu_ccfence(); 3787 return (ret); 3788 } 3789 3790 int 3791 ifnet_array_isempty(void) 3792 { 3793 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 3794 if (ifnet_array->ifnet_count == 0) 3795 return 1; 3796 else 3797 return 0; 3798 } 3799 3800 void 3801 ifa_marker_init(struct ifaddr_marker *mark, struct ifnet *ifp) 3802 { 3803 struct ifaddr *ifa; 3804 3805 memset(mark, 0, sizeof(*mark)); 3806 ifa = &mark->ifa; 3807 3808 mark->ifac.ifa = ifa; 3809 3810 ifa->ifa_addr = &mark->addr; 3811 ifa->ifa_dstaddr = &mark->dstaddr; 3812 ifa->ifa_netmask = &mark->netmask; 3813 ifa->ifa_ifp = ifp; 3814 } 3815 3816 static int 3817 if_ringcnt_fixup(int ring_cnt, int ring_cntmax) 3818 { 3819 3820 KASSERT(ring_cntmax > 0, ("invalid ring count max %d", ring_cntmax)); 3821 3822 if (ring_cnt <= 0 || ring_cnt > ring_cntmax) 3823 ring_cnt = ring_cntmax; 3824 if (ring_cnt > netisr_ncpus) 3825 ring_cnt = netisr_ncpus; 3826 return (ring_cnt); 3827 } 3828 3829 static void 3830 if_ringmap_set_grid(device_t dev, struct if_ringmap *rm, int grid) 3831 { 3832 int i, offset; 3833 3834 KASSERT(grid > 0, ("invalid if_ringmap grid %d", grid)); 3835 KASSERT(grid >= rm->rm_cnt, ("invalid if_ringmap grid %d, count %d", 3836 grid, rm->rm_cnt)); 3837 rm->rm_grid = grid; 3838 3839 offset = (rm->rm_grid * device_get_unit(dev)) % netisr_ncpus; 3840 for (i = 0; i < rm->rm_cnt; ++i) { 3841 rm->rm_cpumap[i] = offset + i; 3842 KASSERT(rm->rm_cpumap[i] < netisr_ncpus, 3843 ("invalid cpumap[%d] = %d, offset %d", i, 3844 rm->rm_cpumap[i], offset)); 3845 } 3846 } 3847 3848 static struct if_ringmap * 3849 if_ringmap_alloc_flags(device_t dev, int ring_cnt, int ring_cntmax, 3850 uint32_t flags) 3851 { 3852 struct if_ringmap *rm; 3853 int i, grid = 0, prev_grid; 3854 3855 ring_cnt = if_ringcnt_fixup(ring_cnt, ring_cntmax); 3856 rm = kmalloc(__offsetof(struct if_ringmap, rm_cpumap[ring_cnt]), 3857 M_DEVBUF, M_WAITOK | M_ZERO); 3858 3859 rm->rm_cnt = ring_cnt; 3860 if (flags & RINGMAP_FLAG_POWEROF2) 3861 rm->rm_cnt = 1 << (fls(rm->rm_cnt) - 1); 3862 3863 prev_grid = netisr_ncpus; 3864 for (i = 0; i < netisr_ncpus; ++i) { 3865 if (netisr_ncpus % (i + 1) != 0) 3866 continue; 3867 3868 grid = netisr_ncpus / (i + 1); 3869 if (rm->rm_cnt > grid) { 3870 grid = prev_grid; 3871 break; 3872 } 3873 3874 if (rm->rm_cnt > netisr_ncpus / (i + 2)) 3875 break; 3876 prev_grid = grid; 3877 } 3878 if_ringmap_set_grid(dev, rm, grid); 3879 3880 return (rm); 3881 } 3882 3883 struct if_ringmap * 3884 if_ringmap_alloc(device_t dev, int ring_cnt, int ring_cntmax) 3885 { 3886 3887 return (if_ringmap_alloc_flags(dev, ring_cnt, ring_cntmax, 3888 RINGMAP_FLAG_NONE)); 3889 } 3890 3891 struct if_ringmap * 3892 if_ringmap_alloc2(device_t dev, int ring_cnt, int ring_cntmax) 3893 { 3894 3895 return (if_ringmap_alloc_flags(dev, ring_cnt, ring_cntmax, 3896 RINGMAP_FLAG_POWEROF2)); 3897 } 3898 3899 void 3900 if_ringmap_free(struct if_ringmap *rm) 3901 { 3902 3903 kfree(rm, M_DEVBUF); 3904 } 3905 3906 /* 3907 * Align the two ringmaps. 3908 * 3909 * e.g. 8 netisrs, rm0 contains 4 rings, rm1 contains 2 rings. 3910 * 3911 * Before: 3912 * 3913 * CPU 0 1 2 3 4 5 6 7 3914 * NIC_RX n0 n1 n2 n3 3915 * NIC_TX N0 N1 3916 * 3917 * After: 3918 * 3919 * CPU 0 1 2 3 4 5 6 7 3920 * NIC_RX n0 n1 n2 n3 3921 * NIC_TX N0 N1 3922 */ 3923 void 3924 if_ringmap_align(device_t dev, struct if_ringmap *rm0, struct if_ringmap *rm1) 3925 { 3926 3927 if (rm0->rm_grid > rm1->rm_grid) 3928 if_ringmap_set_grid(dev, rm1, rm0->rm_grid); 3929 else if (rm0->rm_grid < rm1->rm_grid) 3930 if_ringmap_set_grid(dev, rm0, rm1->rm_grid); 3931 } 3932 3933 void 3934 if_ringmap_match(device_t dev, struct if_ringmap *rm0, struct if_ringmap *rm1) 3935 { 3936 int subset_grid, cnt, divisor, mod, offset, i; 3937 struct if_ringmap *subset_rm, *rm; 3938 int old_rm0_grid, old_rm1_grid; 3939 3940 if (rm0->rm_grid == rm1->rm_grid) 3941 return; 3942 3943 /* Save grid for later use */ 3944 old_rm0_grid = rm0->rm_grid; 3945 old_rm1_grid = rm1->rm_grid; 3946 3947 if_ringmap_align(dev, rm0, rm1); 3948 3949 /* 3950 * Re-shuffle rings to get more even distribution. 3951 * 3952 * e.g. 12 netisrs, rm0 contains 4 rings, rm1 contains 2 rings. 3953 * 3954 * CPU 0 1 2 3 4 5 6 7 8 9 10 11 3955 * 3956 * NIC_RX a0 a1 a2 a3 b0 b1 b2 b3 c0 c1 c2 c3 3957 * NIC_TX A0 A1 B0 B1 C0 C1 3958 * 3959 * NIC_RX d0 d1 d2 d3 e0 e1 e2 e3 f0 f1 f2 f3 3960 * NIC_TX D0 D1 E0 E1 F0 F1 3961 */ 3962 3963 if (rm0->rm_cnt >= (2 * old_rm1_grid)) { 3964 cnt = rm0->rm_cnt; 3965 subset_grid = old_rm1_grid; 3966 subset_rm = rm1; 3967 rm = rm0; 3968 } else if (rm1->rm_cnt > (2 * old_rm0_grid)) { 3969 cnt = rm1->rm_cnt; 3970 subset_grid = old_rm0_grid; 3971 subset_rm = rm0; 3972 rm = rm1; 3973 } else { 3974 /* No space to shuffle. */ 3975 return; 3976 } 3977 3978 mod = cnt / subset_grid; 3979 KKASSERT(mod >= 2); 3980 divisor = netisr_ncpus / rm->rm_grid; 3981 offset = ((device_get_unit(dev) / divisor) % mod) * subset_grid; 3982 3983 for (i = 0; i < subset_rm->rm_cnt; ++i) { 3984 subset_rm->rm_cpumap[i] += offset; 3985 KASSERT(subset_rm->rm_cpumap[i] < netisr_ncpus, 3986 ("match: invalid cpumap[%d] = %d, offset %d", 3987 i, subset_rm->rm_cpumap[i], offset)); 3988 } 3989 #ifdef INVARIANTS 3990 for (i = 0; i < subset_rm->rm_cnt; ++i) { 3991 int j; 3992 3993 for (j = 0; j < rm->rm_cnt; ++j) { 3994 if (rm->rm_cpumap[j] == subset_rm->rm_cpumap[i]) 3995 break; 3996 } 3997 KASSERT(j < rm->rm_cnt, 3998 ("subset cpumap[%d] = %d not found in superset", 3999 i, subset_rm->rm_cpumap[i])); 4000 } 4001 #endif 4002 } 4003 4004 int 4005 if_ringmap_count(const struct if_ringmap *rm) 4006 { 4007 4008 return (rm->rm_cnt); 4009 } 4010 4011 int 4012 if_ringmap_cpumap(const struct if_ringmap *rm, int ring) 4013 { 4014 4015 KASSERT(ring >= 0 && ring < rm->rm_cnt, ("invalid ring %d", ring)); 4016 return (rm->rm_cpumap[ring]); 4017 } 4018 4019 void 4020 if_ringmap_rdrtable(const struct if_ringmap *rm, int table[], int table_nent) 4021 { 4022 int i, grid_idx, grid_cnt, patch_off, patch_cnt, ncopy; 4023 4024 KASSERT(table_nent > 0 && (table_nent & NETISR_CPUMASK) == 0, 4025 ("invalid redirect table entries %d", table_nent)); 4026 4027 grid_idx = 0; 4028 for (i = 0; i < NETISR_CPUMAX; ++i) { 4029 table[i] = grid_idx++ % rm->rm_cnt; 4030 4031 if (grid_idx == rm->rm_grid) 4032 grid_idx = 0; 4033 } 4034 4035 /* 4036 * Make the ring distributed more evenly for the remainder 4037 * of each grid. 4038 * 4039 * e.g. 12 netisrs, rm contains 8 rings. 4040 * 4041 * Redirect table before: 4042 * 4043 * 0 1 2 3 4 5 6 7 0 1 2 3 0 1 2 3 4044 * 4 5 6 7 0 1 2 3 0 1 2 3 4 5 6 7 4045 * 0 1 2 3 0 1 2 3 4 5 6 7 0 1 2 3 4046 * .... 4047 * 4048 * Redirect table after being patched (pX, patched entries): 4049 * 4050 * 0 1 2 3 4 5 6 7 p0 p1 p2 p3 0 1 2 3 4051 * 4 5 6 7 p4 p5 p6 p7 0 1 2 3 4 5 6 7 4052 * p0 p1 p2 p3 0 1 2 3 4 5 6 7 p4 p5 p6 p7 4053 * .... 4054 */ 4055 patch_cnt = rm->rm_grid % rm->rm_cnt; 4056 if (patch_cnt == 0) 4057 goto done; 4058 patch_off = rm->rm_grid - (rm->rm_grid % rm->rm_cnt); 4059 4060 grid_cnt = roundup(NETISR_CPUMAX, rm->rm_grid) / rm->rm_grid; 4061 grid_idx = 0; 4062 for (i = 0; i < grid_cnt; ++i) { 4063 int j; 4064 4065 for (j = 0; j < patch_cnt; ++j) { 4066 int fix_idx; 4067 4068 fix_idx = (i * rm->rm_grid) + patch_off + j; 4069 if (fix_idx >= NETISR_CPUMAX) 4070 goto done; 4071 table[fix_idx] = grid_idx++ % rm->rm_cnt; 4072 } 4073 } 4074 done: 4075 /* 4076 * If the device supports larger redirect table, duplicate 4077 * the first NETISR_CPUMAX entries to the rest of the table, 4078 * so that it matches upper layer's expectation: 4079 * (hash & NETISR_CPUMASK) % netisr_ncpus 4080 */ 4081 ncopy = table_nent / NETISR_CPUMAX; 4082 for (i = 1; i < ncopy; ++i) { 4083 memcpy(&table[i * NETISR_CPUMAX], table, 4084 NETISR_CPUMAX * sizeof(table[0])); 4085 } 4086 if (if_ringmap_dumprdr) { 4087 for (i = 0; i < table_nent; ++i) { 4088 if (i != 0 && i % 16 == 0) 4089 kprintf("\n"); 4090 kprintf("%03d ", table[i]); 4091 } 4092 kprintf("\n"); 4093 } 4094 } 4095 4096 int 4097 if_ringmap_cpumap_sysctl(SYSCTL_HANDLER_ARGS) 4098 { 4099 struct if_ringmap *rm = arg1; 4100 int i, error = 0; 4101 4102 for (i = 0; i < rm->rm_cnt; ++i) { 4103 int cpu = rm->rm_cpumap[i]; 4104 4105 error = SYSCTL_OUT(req, &cpu, sizeof(cpu)); 4106 if (error) 4107 break; 4108 } 4109 return (error); 4110 } 4111