1 /* 2 * Copyright (c) 1980, 1986, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)if.c 8.3 (Berkeley) 1/4/94 30 * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $ 31 */ 32 33 #include "opt_inet6.h" 34 #include "opt_inet.h" 35 #include "opt_ifpoll.h" 36 37 #include <sys/param.h> 38 #include <sys/malloc.h> 39 #include <sys/mbuf.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/caps.h> 43 #include <sys/protosw.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/socketops.h> 47 #include <sys/kernel.h> 48 #include <sys/ktr.h> 49 #include <sys/mutex.h> 50 #include <sys/lock.h> 51 #include <sys/sockio.h> 52 #include <sys/syslog.h> 53 #include <sys/sysctl.h> 54 #include <sys/domain.h> 55 #include <sys/thread.h> 56 #include <sys/serialize.h> 57 #include <sys/bus.h> 58 #include <sys/jail.h> 59 60 #include <sys/thread2.h> 61 #include <sys/msgport2.h> 62 #include <sys/mutex2.h> 63 64 #include <net/if.h> 65 #include <net/if_arp.h> 66 #include <net/if_dl.h> 67 #include <net/if_types.h> 68 #include <net/if_var.h> 69 #include <net/if_ringmap.h> 70 #include <net/ifq_var.h> 71 #include <net/radix.h> 72 #include <net/route.h> 73 #include <net/if_clone.h> 74 #include <net/netisr2.h> 75 #include <net/netmsg2.h> 76 77 #include <machine/atomic.h> 78 #include <machine/stdarg.h> 79 #include <machine/smp.h> 80 81 #if defined(INET) || defined(INET6) 82 #include <netinet/in.h> 83 #include <netinet/in_var.h> 84 #include <netinet/if_ether.h> 85 #ifdef INET6 86 #include <netinet6/in6_var.h> 87 #include <netinet6/in6_ifattach.h> 88 #endif /* INET6 */ 89 #endif /* INET || INET6 */ 90 91 struct netmsg_ifaddr { 92 struct netmsg_base base; 93 struct ifaddr *ifa; 94 struct ifnet *ifp; 95 int tail; 96 }; 97 98 struct ifsubq_stage_head { 99 TAILQ_HEAD(, ifsubq_stage) stg_head; 100 } __cachealign; 101 102 struct if_ringmap { 103 int rm_cnt; 104 int rm_grid; 105 int rm_cpumap[]; 106 }; 107 108 #define RINGMAP_FLAG_NONE 0x0 109 #define RINGMAP_FLAG_POWEROF2 0x1 110 111 /* 112 * System initialization 113 */ 114 static void if_attachdomain(void *); 115 static void if_attachdomain1(struct ifnet *); 116 static int ifconf(u_long, caddr_t, struct ucred *); 117 static void ifinit(void *); 118 static void ifnetinit(void *); 119 static void if_slowtimo(void *); 120 static int if_rtdel(struct radix_node *, void *); 121 static void if_slowtimo_dispatch(netmsg_t); 122 123 /* Helper functions */ 124 static void ifsq_watchdog_reset(struct ifsubq_watchdog *); 125 static int if_delmulti_serialized(struct ifnet *, struct sockaddr *); 126 static struct ifnet_array *ifnet_array_alloc(int); 127 static void ifnet_array_free(struct ifnet_array *); 128 static struct ifnet_array *ifnet_array_add(struct ifnet *, 129 const struct ifnet_array *); 130 static struct ifnet_array *ifnet_array_del(struct ifnet *, 131 const struct ifnet_array *); 132 static struct ifg_group *if_creategroup(const char *); 133 static int if_destroygroup(struct ifg_group *); 134 static int if_delgroup_locked(struct ifnet *, const char *); 135 static int if_getgroups(struct ifgroupreq *, struct ifnet *); 136 static int if_getgroupmembers(struct ifgroupreq *); 137 138 #ifdef INET6 139 /* 140 * XXX: declare here to avoid to include many inet6 related files.. 141 * should be more generalized? 142 */ 143 extern void nd6_setmtu(struct ifnet *); 144 #endif 145 146 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); 147 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); 148 SYSCTL_NODE(_net_link, OID_AUTO, ringmap, CTLFLAG_RW, 0, "link ringmap"); 149 150 static int ifsq_stage_cntmax = 16; 151 TUNABLE_INT("net.link.stage_cntmax", &ifsq_stage_cntmax); 152 SYSCTL_INT(_net_link, OID_AUTO, stage_cntmax, CTLFLAG_RW, 153 &ifsq_stage_cntmax, 0, "ifq staging packet count max"); 154 155 static int if_stats_compat = 0; 156 SYSCTL_INT(_net_link, OID_AUTO, stats_compat, CTLFLAG_RW, 157 &if_stats_compat, 0, "Compat the old ifnet stats"); 158 159 static int if_ringmap_dumprdr = 0; 160 SYSCTL_INT(_net_link_ringmap, OID_AUTO, dump_rdr, CTLFLAG_RW, 161 &if_ringmap_dumprdr, 0, "dump redirect table"); 162 163 /* Interface description */ 164 static unsigned int ifdescr_maxlen = 1024; 165 SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW, 166 &ifdescr_maxlen, 0, 167 "administrative maximum length for interface description"); 168 169 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL); 170 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, ifnetinit, NULL); 171 172 static if_com_alloc_t *if_com_alloc[256]; 173 static if_com_free_t *if_com_free[256]; 174 175 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); 176 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); 177 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure"); 178 MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions"); 179 180 int ifqmaxlen = IFQ_MAXLEN; 181 struct ifnethead ifnet = TAILQ_HEAD_INITIALIZER(ifnet); 182 struct ifgrouphead ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head); 183 static struct lock ifgroup_lock; 184 185 static struct ifnet_array ifnet_array0; 186 static struct ifnet_array *ifnet_array = &ifnet_array0; 187 188 static struct callout if_slowtimo_timer; 189 static struct netmsg_base if_slowtimo_netmsg; 190 191 int if_index = 0; 192 struct ifnet **ifindex2ifnet = NULL; 193 static struct mtx ifnet_mtx = MTX_INITIALIZER("ifnet"); 194 195 static struct ifsubq_stage_head ifsubq_stage_heads[MAXCPU]; 196 197 #ifdef notyet 198 #define IFQ_KTR_STRING "ifq=%p" 199 #define IFQ_KTR_ARGS struct ifaltq *ifq 200 #ifndef KTR_IFQ 201 #define KTR_IFQ KTR_ALL 202 #endif 203 KTR_INFO_MASTER(ifq); 204 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS); 205 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS); 206 #define logifq(name, arg) KTR_LOG(ifq_ ## name, arg) 207 208 #define IF_START_KTR_STRING "ifp=%p" 209 #define IF_START_KTR_ARGS struct ifnet *ifp 210 #ifndef KTR_IF_START 211 #define KTR_IF_START KTR_ALL 212 #endif 213 KTR_INFO_MASTER(if_start); 214 KTR_INFO(KTR_IF_START, if_start, run, 0, 215 IF_START_KTR_STRING, IF_START_KTR_ARGS); 216 KTR_INFO(KTR_IF_START, if_start, sched, 1, 217 IF_START_KTR_STRING, IF_START_KTR_ARGS); 218 KTR_INFO(KTR_IF_START, if_start, avoid, 2, 219 IF_START_KTR_STRING, IF_START_KTR_ARGS); 220 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3, 221 IF_START_KTR_STRING, IF_START_KTR_ARGS); 222 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4, 223 IF_START_KTR_STRING, IF_START_KTR_ARGS); 224 #define logifstart(name, arg) KTR_LOG(if_start_ ## name, arg) 225 #endif /* notyet */ 226 227 /* 228 * Network interface utility routines. 229 * 230 * Routines with ifa_ifwith* names take sockaddr *'s as 231 * parameters. 232 */ 233 /* ARGSUSED */ 234 static void 235 ifinit(void *dummy) 236 { 237 lockinit(&ifgroup_lock, "ifgroup", 0, 0); 238 239 callout_init_mp(&if_slowtimo_timer); 240 netmsg_init(&if_slowtimo_netmsg, NULL, &netisr_adone_rport, 241 MSGF_PRIORITY, if_slowtimo_dispatch); 242 243 /* Start if_slowtimo */ 244 lwkt_sendmsg(netisr_cpuport(0), &if_slowtimo_netmsg.lmsg); 245 } 246 247 static void 248 ifsq_ifstart_ipifunc(void *arg) 249 { 250 struct ifaltq_subque *ifsq = arg; 251 struct lwkt_msg *lmsg = ifsq_get_ifstart_lmsg(ifsq, mycpuid); 252 253 crit_enter(); 254 if (lmsg->ms_flags & MSGF_DONE) 255 lwkt_sendmsg_oncpu(netisr_cpuport(mycpuid), lmsg); 256 crit_exit(); 257 } 258 259 static __inline void 260 ifsq_stage_remove(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) 261 { 262 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); 263 TAILQ_REMOVE(&head->stg_head, stage, stg_link); 264 stage->stg_flags &= ~(IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED); 265 stage->stg_cnt = 0; 266 stage->stg_len = 0; 267 } 268 269 static __inline void 270 ifsq_stage_insert(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) 271 { 272 KKASSERT((stage->stg_flags & 273 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); 274 stage->stg_flags |= IFSQ_STAGE_FLAG_QUED; 275 TAILQ_INSERT_TAIL(&head->stg_head, stage, stg_link); 276 } 277 278 /* 279 * Schedule ifnet.if_start on the subqueue owner CPU 280 */ 281 static void 282 ifsq_ifstart_schedule(struct ifaltq_subque *ifsq, int force) 283 { 284 int cpu; 285 286 if (!force && curthread->td_type == TD_TYPE_NETISR && 287 ifsq_stage_cntmax > 0) { 288 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); 289 290 stage->stg_cnt = 0; 291 stage->stg_len = 0; 292 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) 293 ifsq_stage_insert(&ifsubq_stage_heads[mycpuid], stage); 294 stage->stg_flags |= IFSQ_STAGE_FLAG_SCHED; 295 return; 296 } 297 298 cpu = ifsq_get_cpuid(ifsq); 299 if (cpu != mycpuid) 300 lwkt_send_ipiq(globaldata_find(cpu), ifsq_ifstart_ipifunc, ifsq); 301 else 302 ifsq_ifstart_ipifunc(ifsq); 303 } 304 305 /* 306 * NOTE: 307 * This function will release ifnet.if_start subqueue interlock, 308 * if ifnet.if_start for the subqueue does not need to be scheduled 309 */ 310 static __inline int 311 ifsq_ifstart_need_schedule(struct ifaltq_subque *ifsq, int running) 312 { 313 if (!running || ifsq_is_empty(ifsq) 314 #ifdef ALTQ 315 || ifsq->ifsq_altq->altq_tbr != NULL 316 #endif 317 ) { 318 ALTQ_SQ_LOCK(ifsq); 319 /* 320 * ifnet.if_start subqueue interlock is released, if: 321 * 1) Hardware can not take any packets, due to 322 * o interface is marked down 323 * o hardware queue is full (ifsq_is_oactive) 324 * Under the second situation, hardware interrupt 325 * or polling(4) will call/schedule ifnet.if_start 326 * on the subqueue when hardware queue is ready 327 * 2) There is no packet in the subqueue. 328 * Further ifq_dispatch or ifq_handoff will call/ 329 * schedule ifnet.if_start on the subqueue. 330 * 3) TBR is used and it does not allow further 331 * dequeueing. 332 * TBR callout will call ifnet.if_start on the 333 * subqueue. 334 */ 335 if (!running || !ifsq_data_ready(ifsq)) { 336 ifsq_clr_started(ifsq); 337 ALTQ_SQ_UNLOCK(ifsq); 338 return 0; 339 } 340 ALTQ_SQ_UNLOCK(ifsq); 341 } 342 return 1; 343 } 344 345 static void 346 ifsq_ifstart_dispatch(netmsg_t msg) 347 { 348 struct lwkt_msg *lmsg = &msg->base.lmsg; 349 struct ifaltq_subque *ifsq = lmsg->u.ms_resultp; 350 struct ifnet *ifp = ifsq_get_ifp(ifsq); 351 struct globaldata *gd = mycpu; 352 int running = 0, need_sched; 353 354 crit_enter_gd(gd); 355 356 lwkt_replymsg(lmsg, 0); /* reply ASAP */ 357 358 if (gd->gd_cpuid != ifsq_get_cpuid(ifsq)) { 359 /* 360 * We need to chase the subqueue owner CPU change. 361 */ 362 ifsq_ifstart_schedule(ifsq, 1); 363 crit_exit_gd(gd); 364 return; 365 } 366 367 ifsq_serialize_hw(ifsq); 368 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { 369 ifp->if_start(ifp, ifsq); 370 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 371 running = 1; 372 } 373 need_sched = ifsq_ifstart_need_schedule(ifsq, running); 374 ifsq_deserialize_hw(ifsq); 375 376 if (need_sched) { 377 /* 378 * More data need to be transmitted, ifnet.if_start is 379 * scheduled on the subqueue owner CPU, and we keep going. 380 * NOTE: ifnet.if_start subqueue interlock is not released. 381 */ 382 ifsq_ifstart_schedule(ifsq, 0); 383 } 384 385 crit_exit_gd(gd); 386 } 387 388 /* Device driver ifnet.if_start helper function */ 389 void 390 ifsq_devstart(struct ifaltq_subque *ifsq) 391 { 392 struct ifnet *ifp = ifsq_get_ifp(ifsq); 393 int running = 0; 394 395 ASSERT_ALTQ_SQ_SERIALIZED_HW(ifsq); 396 397 ALTQ_SQ_LOCK(ifsq); 398 if (ifsq_is_started(ifsq) || !ifsq_data_ready(ifsq)) { 399 ALTQ_SQ_UNLOCK(ifsq); 400 return; 401 } 402 ifsq_set_started(ifsq); 403 ALTQ_SQ_UNLOCK(ifsq); 404 405 ifp->if_start(ifp, ifsq); 406 407 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 408 running = 1; 409 410 if (ifsq_ifstart_need_schedule(ifsq, running)) { 411 /* 412 * More data need to be transmitted, ifnet.if_start is 413 * scheduled on ifnet's CPU, and we keep going. 414 * NOTE: ifnet.if_start interlock is not released. 415 */ 416 ifsq_ifstart_schedule(ifsq, 0); 417 } 418 } 419 420 void 421 if_devstart(struct ifnet *ifp) 422 { 423 ifsq_devstart(ifq_get_subq_default(&ifp->if_snd)); 424 } 425 426 /* Device driver ifnet.if_start schedule helper function */ 427 void 428 ifsq_devstart_sched(struct ifaltq_subque *ifsq) 429 { 430 ifsq_ifstart_schedule(ifsq, 1); 431 } 432 433 void 434 if_devstart_sched(struct ifnet *ifp) 435 { 436 ifsq_devstart_sched(ifq_get_subq_default(&ifp->if_snd)); 437 } 438 439 static void 440 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 441 { 442 lwkt_serialize_enter(ifp->if_serializer); 443 } 444 445 static void 446 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 447 { 448 lwkt_serialize_exit(ifp->if_serializer); 449 } 450 451 static int 452 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 453 { 454 return lwkt_serialize_try(ifp->if_serializer); 455 } 456 457 #ifdef INVARIANTS 458 static void 459 if_default_serialize_assert(struct ifnet *ifp, 460 enum ifnet_serialize slz __unused, 461 boolean_t serialized) 462 { 463 if (serialized) 464 ASSERT_SERIALIZED(ifp->if_serializer); 465 else 466 ASSERT_NOT_SERIALIZED(ifp->if_serializer); 467 } 468 #endif 469 470 /* 471 * Attach an interface to the list of "active" interfaces. 472 * 473 * The serializer is optional. 474 */ 475 void 476 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer) 477 { 478 unsigned socksize; 479 int namelen, masklen; 480 struct sockaddr_dl *sdl, *sdl_addr; 481 struct ifaddr *ifa; 482 struct ifaltq *ifq; 483 struct ifnet **old_ifindex2ifnet = NULL; 484 struct ifnet_array *old_ifnet_array; 485 int i, q, qlen; 486 char qlenname[64]; 487 488 static int if_indexlim = 8; 489 490 if (ifp->if_serialize != NULL) { 491 KASSERT(ifp->if_deserialize != NULL && 492 ifp->if_tryserialize != NULL && 493 ifp->if_serialize_assert != NULL, 494 ("serialize functions are partially setup")); 495 496 /* 497 * If the device supplies serialize functions, 498 * then clear if_serializer to catch any invalid 499 * usage of this field. 500 */ 501 KASSERT(serializer == NULL, 502 ("both serialize functions and default serializer " 503 "are supplied")); 504 ifp->if_serializer = NULL; 505 } else { 506 KASSERT(ifp->if_deserialize == NULL && 507 ifp->if_tryserialize == NULL && 508 ifp->if_serialize_assert == NULL, 509 ("serialize functions are partially setup")); 510 ifp->if_serialize = if_default_serialize; 511 ifp->if_deserialize = if_default_deserialize; 512 ifp->if_tryserialize = if_default_tryserialize; 513 #ifdef INVARIANTS 514 ifp->if_serialize_assert = if_default_serialize_assert; 515 #endif 516 517 /* 518 * The serializer can be passed in from the device, 519 * allowing the same serializer to be used for both 520 * the interrupt interlock and the device queue. 521 * If not specified, the netif structure will use an 522 * embedded serializer. 523 */ 524 if (serializer == NULL) { 525 serializer = &ifp->if_default_serializer; 526 lwkt_serialize_init(serializer); 527 } 528 ifp->if_serializer = serializer; 529 } 530 531 /* 532 * Make if_addrhead available on all CPUs, since they 533 * could be accessed by any threads. 534 */ 535 ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead), 536 M_IFADDR, M_WAITOK | M_ZERO); 537 for (i = 0; i < ncpus; ++i) 538 TAILQ_INIT(&ifp->if_addrheads[i]); 539 540 TAILQ_INIT(&ifp->if_multiaddrs); 541 TAILQ_INIT(&ifp->if_groups); 542 getmicrotime(&ifp->if_lastchange); 543 if_addgroup(ifp, IFG_ALL); 544 545 /* 546 * create a Link Level name for this device 547 */ 548 namelen = strlen(ifp->if_xname); 549 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; 550 socksize = masklen + ifp->if_addrlen; 551 if (socksize < sizeof(*sdl)) 552 socksize = sizeof(*sdl); 553 socksize = RT_ROUNDUP(socksize); 554 ifa = ifa_create(sizeof(struct ifaddr) + 2 * socksize); 555 sdl = sdl_addr = (struct sockaddr_dl *)(ifa + 1); 556 sdl->sdl_len = socksize; 557 sdl->sdl_family = AF_LINK; 558 bcopy(ifp->if_xname, sdl->sdl_data, namelen); 559 sdl->sdl_nlen = namelen; 560 sdl->sdl_type = ifp->if_type; 561 ifp->if_lladdr = ifa; 562 ifa->ifa_ifp = ifp; 563 ifa->ifa_addr = (struct sockaddr *)sdl; 564 sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); 565 ifa->ifa_netmask = (struct sockaddr *)sdl; 566 sdl->sdl_len = masklen; 567 while (namelen != 0) 568 sdl->sdl_data[--namelen] = 0xff; 569 ifa_iflink(ifa, ifp, 0 /* Insert head */); 570 571 /* 572 * Make if_data available on all CPUs, since they could 573 * be updated by hardware interrupt routing, which could 574 * be bound to any CPU. 575 */ 576 ifp->if_data_pcpu = kmalloc(ncpus * sizeof(struct ifdata_pcpu), 577 M_DEVBUF, 578 M_WAITOK | M_ZERO | M_CACHEALIGN); 579 580 if (ifp->if_mapsubq == NULL) 581 ifp->if_mapsubq = ifq_mapsubq_default; 582 583 ifq = &ifp->if_snd; 584 ifq->altq_type = 0; 585 ifq->altq_disc = NULL; 586 ifq->altq_flags &= ALTQF_CANTCHANGE; 587 ifq->altq_tbr = NULL; 588 ifq->altq_ifp = ifp; 589 590 if (ifq->altq_subq_cnt <= 0) 591 ifq->altq_subq_cnt = 1; 592 ifq->altq_subq = 593 kmalloc(ifq->altq_subq_cnt * sizeof(struct ifaltq_subque), 594 M_DEVBUF, 595 M_WAITOK | M_ZERO | M_CACHEALIGN); 596 597 if (ifq->altq_maxlen == 0) { 598 if_printf(ifp, "driver didn't set altq_maxlen\n"); 599 ifq_set_maxlen(ifq, ifqmaxlen); 600 } 601 602 /* Allow user to override driver's setting. */ 603 ksnprintf(qlenname, sizeof(qlenname), "net.%s.qlenmax", ifp->if_xname); 604 qlen = -1; 605 TUNABLE_INT_FETCH(qlenname, &qlen); 606 if (qlen > 0) { 607 if_printf(ifp, "qlenmax -> %d\n", qlen); 608 ifq_set_maxlen(ifq, qlen); 609 } 610 611 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 612 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 613 614 ALTQ_SQ_LOCK_INIT(ifsq); 615 ifsq->ifsq_index = q; 616 617 ifsq->ifsq_altq = ifq; 618 ifsq->ifsq_ifp = ifp; 619 620 ifsq->ifsq_maxlen = ifq->altq_maxlen; 621 ifsq->ifsq_maxbcnt = ifsq->ifsq_maxlen * MCLBYTES; 622 ifsq->ifsq_prepended = NULL; 623 ifsq->ifsq_started = 0; 624 ifsq->ifsq_hw_oactive = 0; 625 ifsq_set_cpuid(ifsq, 0); 626 if (ifp->if_serializer != NULL) 627 ifsq_set_hw_serialize(ifsq, ifp->if_serializer); 628 629 /* XXX: netisr_ncpus */ 630 ifsq->ifsq_stage = 631 kmalloc(ncpus * sizeof(struct ifsubq_stage), 632 M_DEVBUF, 633 M_WAITOK | M_ZERO | M_CACHEALIGN); 634 for (i = 0; i < ncpus; ++i) 635 ifsq->ifsq_stage[i].stg_subq = ifsq; 636 637 /* 638 * Allocate one if_start message for each CPU, since 639 * the hardware TX ring could be assigned to any CPU. 640 * 641 * NOTE: 642 * If the hardware TX ring polling CPU and the hardware 643 * TX ring interrupt CPU are same, one if_start message 644 * should be enough. 645 */ 646 ifsq->ifsq_ifstart_nmsg = 647 kmalloc(ncpus * sizeof(struct netmsg_base), 648 M_LWKTMSG, M_WAITOK); 649 for (i = 0; i < ncpus; ++i) { 650 netmsg_init(&ifsq->ifsq_ifstart_nmsg[i], NULL, 651 &netisr_adone_rport, 0, ifsq_ifstart_dispatch); 652 ifsq->ifsq_ifstart_nmsg[i].lmsg.u.ms_resultp = ifsq; 653 } 654 } 655 ifq_set_classic(ifq); 656 657 /* 658 * Increase mbuf cluster/jcluster limits for the mbufs that 659 * could sit on the device queues for quite some time. 660 */ 661 if (ifp->if_nmbclusters > 0) 662 mcl_inclimit(ifp->if_nmbclusters); 663 if (ifp->if_nmbjclusters > 0) 664 mjcl_inclimit(ifp->if_nmbjclusters); 665 666 /* 667 * Install this ifp into ifindex2inet, ifnet queue and ifnet 668 * array after it is setup. 669 * 670 * Protect ifindex2ifnet, ifnet queue and ifnet array changes 671 * by ifnet lock, so that non-netisr threads could get a 672 * consistent view. 673 */ 674 ifnet_lock(); 675 676 /* Don't update if_index until ifindex2ifnet is setup */ 677 ifp->if_index = if_index + 1; 678 sdl_addr->sdl_index = ifp->if_index; 679 680 /* 681 * Install this ifp into ifindex2ifnet 682 */ 683 if (ifindex2ifnet == NULL || ifp->if_index >= if_indexlim) { 684 unsigned int n; 685 struct ifnet **q; 686 687 /* 688 * Grow ifindex2ifnet 689 */ 690 if_indexlim <<= 1; 691 n = if_indexlim * sizeof(*q); 692 q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO); 693 if (ifindex2ifnet != NULL) { 694 bcopy(ifindex2ifnet, q, n/2); 695 /* Free old ifindex2ifnet after sync all netisrs */ 696 old_ifindex2ifnet = ifindex2ifnet; 697 } 698 ifindex2ifnet = q; 699 } 700 ifindex2ifnet[ifp->if_index] = ifp; 701 /* 702 * Update if_index after this ifp is installed into ifindex2ifnet, 703 * so that netisrs could get a consistent view of ifindex2ifnet. 704 */ 705 cpu_sfence(); 706 if_index = ifp->if_index; 707 708 /* 709 * Install this ifp into ifnet array. 710 */ 711 /* Free old ifnet array after sync all netisrs */ 712 old_ifnet_array = ifnet_array; 713 ifnet_array = ifnet_array_add(ifp, old_ifnet_array); 714 715 /* 716 * Install this ifp into ifnet queue. 717 */ 718 TAILQ_INSERT_TAIL(&ifnetlist, ifp, if_link); 719 720 ifnet_unlock(); 721 722 /* 723 * Sync all netisrs so that the old ifindex2ifnet and ifnet array 724 * are no longer accessed and we can free them safely later on. 725 */ 726 netmsg_service_sync(); 727 if (old_ifindex2ifnet != NULL) 728 kfree(old_ifindex2ifnet, M_IFADDR); 729 ifnet_array_free(old_ifnet_array); 730 731 if (!SLIST_EMPTY(&domains)) 732 if_attachdomain1(ifp); 733 734 /* Announce the interface. */ 735 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 736 devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); 737 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 738 } 739 740 static void 741 if_attachdomain(void *dummy) 742 { 743 struct ifnet *ifp; 744 745 ifnet_lock(); 746 TAILQ_FOREACH(ifp, &ifnetlist, if_list) 747 if_attachdomain1(ifp); 748 ifnet_unlock(); 749 } 750 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, 751 if_attachdomain, NULL); 752 753 static void 754 if_attachdomain1(struct ifnet *ifp) 755 { 756 struct domain *dp; 757 758 crit_enter(); 759 760 /* address family dependent data region */ 761 bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); 762 SLIST_FOREACH(dp, &domains, dom_next) 763 if (dp->dom_ifattach) 764 ifp->if_afdata[dp->dom_family] = 765 (*dp->dom_ifattach)(ifp); 766 crit_exit(); 767 } 768 769 /* 770 * Purge all addresses whose type is _not_ AF_LINK 771 */ 772 static void 773 if_purgeaddrs_nolink_dispatch(netmsg_t nmsg) 774 { 775 struct ifnet *ifp = nmsg->lmsg.u.ms_resultp; 776 struct ifaddr_container *ifac, *next; 777 778 ASSERT_NETISR0; 779 780 /* 781 * The ifaddr processing in the following loop will block, 782 * however, this function is called in netisr0, in which 783 * ifaddr list changes happen, so we don't care about the 784 * blockness of the ifaddr processing here. 785 */ 786 TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid], 787 ifa_link, next) { 788 struct ifaddr *ifa = ifac->ifa; 789 790 /* Ignore marker */ 791 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 792 continue; 793 794 /* Leave link ifaddr as it is */ 795 if (ifa->ifa_addr->sa_family == AF_LINK) 796 continue; 797 #ifdef INET 798 /* XXX: Ugly!! ad hoc just for INET */ 799 if (ifa->ifa_addr->sa_family == AF_INET) { 800 struct ifaliasreq ifr; 801 struct sockaddr_in saved_addr, saved_dst; 802 #ifdef IFADDR_DEBUG_VERBOSE 803 int i; 804 805 kprintf("purge in4 addr %p: ", ifa); 806 for (i = 0; i < ncpus; ++i) { 807 kprintf("%d ", 808 ifa->ifa_containers[i].ifa_refcnt); 809 } 810 kprintf("\n"); 811 #endif 812 813 /* Save information for panic. */ 814 memcpy(&saved_addr, ifa->ifa_addr, sizeof(saved_addr)); 815 if (ifa->ifa_dstaddr != NULL) { 816 memcpy(&saved_dst, ifa->ifa_dstaddr, 817 sizeof(saved_dst)); 818 } else { 819 memset(&saved_dst, 0, sizeof(saved_dst)); 820 } 821 822 bzero(&ifr, sizeof ifr); 823 ifr.ifra_addr = *ifa->ifa_addr; 824 if (ifa->ifa_dstaddr) 825 ifr.ifra_broadaddr = *ifa->ifa_dstaddr; 826 if (in_control(SIOCDIFADDR, (caddr_t)&ifr, ifp, 827 NULL) == 0) 828 continue; 829 830 /* MUST NOT HAPPEN */ 831 panic("%s: in_control failed %x, dst %x", ifp->if_xname, 832 ntohl(saved_addr.sin_addr.s_addr), 833 ntohl(saved_dst.sin_addr.s_addr)); 834 } 835 #endif /* INET */ 836 #ifdef INET6 837 if (ifa->ifa_addr->sa_family == AF_INET6) { 838 #ifdef IFADDR_DEBUG_VERBOSE 839 int i; 840 841 kprintf("purge in6 addr %p: ", ifa); 842 for (i = 0; i < ncpus; ++i) { 843 kprintf("%d ", 844 ifa->ifa_containers[i].ifa_refcnt); 845 } 846 kprintf("\n"); 847 #endif 848 849 in6_purgeaddr(ifa); 850 /* ifp_addrhead is already updated */ 851 continue; 852 } 853 #endif /* INET6 */ 854 if_printf(ifp, "destroy ifaddr family %d\n", 855 ifa->ifa_addr->sa_family); 856 ifa_ifunlink(ifa, ifp); 857 ifa_destroy(ifa); 858 } 859 860 netisr_replymsg(&nmsg->base, 0); 861 } 862 863 void 864 if_purgeaddrs_nolink(struct ifnet *ifp) 865 { 866 struct netmsg_base nmsg; 867 868 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, 869 if_purgeaddrs_nolink_dispatch); 870 nmsg.lmsg.u.ms_resultp = ifp; 871 netisr_domsg(&nmsg, 0); 872 } 873 874 static void 875 ifq_stage_detach_handler(netmsg_t nmsg) 876 { 877 struct ifaltq *ifq = nmsg->lmsg.u.ms_resultp; 878 int q; 879 880 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 881 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 882 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); 883 884 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) 885 ifsq_stage_remove(&ifsubq_stage_heads[mycpuid], stage); 886 } 887 lwkt_replymsg(&nmsg->lmsg, 0); 888 } 889 890 static void 891 ifq_stage_detach(struct ifaltq *ifq) 892 { 893 struct netmsg_base base; 894 int cpu; 895 896 netmsg_init(&base, NULL, &curthread->td_msgport, 0, 897 ifq_stage_detach_handler); 898 base.lmsg.u.ms_resultp = ifq; 899 900 /* XXX netisr_ncpus */ 901 for (cpu = 0; cpu < ncpus; ++cpu) 902 lwkt_domsg(netisr_cpuport(cpu), &base.lmsg, 0); 903 } 904 905 struct netmsg_if_rtdel { 906 struct netmsg_base base; 907 struct ifnet *ifp; 908 }; 909 910 static void 911 if_rtdel_dispatch(netmsg_t msg) 912 { 913 struct netmsg_if_rtdel *rmsg = (void *)msg; 914 int i, cpu; 915 916 cpu = mycpuid; 917 ASSERT_NETISR_NCPUS(cpu); 918 919 for (i = 1; i <= AF_MAX; i++) { 920 struct radix_node_head *rnh; 921 922 if ((rnh = rt_tables[cpu][i]) == NULL) 923 continue; 924 rnh->rnh_walktree(rnh, if_rtdel, rmsg->ifp); 925 } 926 netisr_forwardmsg(&msg->base, cpu + 1); 927 } 928 929 /* 930 * Detach an interface, removing it from the 931 * list of "active" interfaces. 932 */ 933 void 934 if_detach(struct ifnet *ifp) 935 { 936 struct ifnet_array *old_ifnet_array; 937 struct ifg_list *ifgl; 938 struct netmsg_if_rtdel msg; 939 struct domain *dp; 940 int q; 941 942 /* Announce that the interface is gone. */ 943 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 944 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 945 devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); 946 947 /* 948 * Remove this ifp from ifindex2inet, ifnet queue and ifnet 949 * array before it is whacked. 950 * 951 * Protect ifindex2ifnet, ifnet queue and ifnet array changes 952 * by ifnet lock, so that non-netisr threads could get a 953 * consistent view. 954 */ 955 ifnet_lock(); 956 957 /* 958 * Remove this ifp from ifindex2ifnet and maybe decrement if_index. 959 */ 960 ifindex2ifnet[ifp->if_index] = NULL; 961 while (if_index > 0 && ifindex2ifnet[if_index] == NULL) 962 if_index--; 963 964 /* 965 * Remove this ifp from ifnet queue. 966 */ 967 TAILQ_REMOVE(&ifnetlist, ifp, if_link); 968 969 /* 970 * Remove this ifp from ifnet array. 971 */ 972 /* Free old ifnet array after sync all netisrs */ 973 old_ifnet_array = ifnet_array; 974 ifnet_array = ifnet_array_del(ifp, old_ifnet_array); 975 976 ifnet_unlock(); 977 978 ifgroup_lockmgr(LK_EXCLUSIVE); 979 while ((ifgl = TAILQ_FIRST(&ifp->if_groups)) != NULL) 980 if_delgroup_locked(ifp, ifgl->ifgl_group->ifg_group); 981 ifgroup_lockmgr(LK_RELEASE); 982 983 /* 984 * Sync all netisrs so that the old ifnet array is no longer 985 * accessed and we can free it safely later on. 986 */ 987 netmsg_service_sync(); 988 ifnet_array_free(old_ifnet_array); 989 990 /* 991 * Remove routes and flush queues. 992 */ 993 crit_enter(); 994 #ifdef IFPOLL_ENABLE 995 if (ifp->if_flags & IFF_NPOLLING) 996 ifpoll_deregister(ifp); 997 #endif 998 if_down(ifp); 999 1000 /* Decrease the mbuf clusters/jclusters limits increased by us */ 1001 if (ifp->if_nmbclusters > 0) 1002 mcl_inclimit(-ifp->if_nmbclusters); 1003 if (ifp->if_nmbjclusters > 0) 1004 mjcl_inclimit(-ifp->if_nmbjclusters); 1005 1006 #ifdef ALTQ 1007 if (ifq_is_enabled(&ifp->if_snd)) 1008 altq_disable(&ifp->if_snd); 1009 if (ifq_is_attached(&ifp->if_snd)) 1010 altq_detach(&ifp->if_snd); 1011 #endif 1012 1013 /* 1014 * Clean up all addresses. 1015 */ 1016 ifp->if_lladdr = NULL; 1017 1018 if_purgeaddrs_nolink(ifp); 1019 if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) { 1020 struct ifaddr *ifa; 1021 1022 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 1023 KASSERT(ifa->ifa_addr->sa_family == AF_LINK, 1024 ("non-link ifaddr is left on if_addrheads")); 1025 1026 ifa_ifunlink(ifa, ifp); 1027 ifa_destroy(ifa); 1028 KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]), 1029 ("there are still ifaddrs left on if_addrheads")); 1030 } 1031 1032 #ifdef INET 1033 /* 1034 * Remove all IPv4 kernel structures related to ifp. 1035 */ 1036 in_ifdetach(ifp); 1037 #endif 1038 1039 #ifdef INET6 1040 /* 1041 * Remove all IPv6 kernel structs related to ifp. This should be done 1042 * before removing routing entries below, since IPv6 interface direct 1043 * routes are expected to be removed by the IPv6-specific kernel API. 1044 * Otherwise, the kernel will detect some inconsistency and bark it. 1045 */ 1046 in6_ifdetach(ifp); 1047 #endif 1048 1049 /* 1050 * Delete all remaining routes using this interface 1051 */ 1052 netmsg_init(&msg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 1053 if_rtdel_dispatch); 1054 msg.ifp = ifp; 1055 netisr_domsg_global(&msg.base); 1056 1057 SLIST_FOREACH(dp, &domains, dom_next) { 1058 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) 1059 (*dp->dom_ifdetach)(ifp, 1060 ifp->if_afdata[dp->dom_family]); 1061 } 1062 1063 kfree(ifp->if_addrheads, M_IFADDR); 1064 1065 lwkt_synchronize_ipiqs("if_detach"); 1066 ifq_stage_detach(&ifp->if_snd); 1067 1068 for (q = 0; q < ifp->if_snd.altq_subq_cnt; ++q) { 1069 struct ifaltq_subque *ifsq = &ifp->if_snd.altq_subq[q]; 1070 1071 kfree(ifsq->ifsq_ifstart_nmsg, M_LWKTMSG); 1072 kfree(ifsq->ifsq_stage, M_DEVBUF); 1073 } 1074 kfree(ifp->if_snd.altq_subq, M_DEVBUF); 1075 1076 kfree(ifp->if_data_pcpu, M_DEVBUF); 1077 1078 crit_exit(); 1079 } 1080 1081 int 1082 ifgroup_lockmgr(u_int flags) 1083 { 1084 return lockmgr(&ifgroup_lock, flags); 1085 } 1086 1087 /* 1088 * Create an empty interface group. 1089 */ 1090 static struct ifg_group * 1091 if_creategroup(const char *groupname) 1092 { 1093 struct ifg_group *ifg; 1094 1095 ifg = kmalloc(sizeof(*ifg), M_IFNET, M_WAITOK); 1096 strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); 1097 ifg->ifg_refcnt = 0; 1098 ifg->ifg_carp_demoted = 0; 1099 TAILQ_INIT(&ifg->ifg_members); 1100 1101 ifgroup_lockmgr(LK_EXCLUSIVE); 1102 TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next); 1103 ifgroup_lockmgr(LK_RELEASE); 1104 1105 EVENTHANDLER_INVOKE(group_attach_event, ifg); 1106 1107 return (ifg); 1108 } 1109 1110 /* 1111 * Destroy an empty interface group. 1112 */ 1113 static int 1114 if_destroygroup(struct ifg_group *ifg) 1115 { 1116 KASSERT(ifg->ifg_refcnt == 0, 1117 ("trying to delete a non-empty interface group")); 1118 1119 ifgroup_lockmgr(LK_EXCLUSIVE); 1120 TAILQ_REMOVE(&ifg_head, ifg, ifg_next); 1121 ifgroup_lockmgr(LK_RELEASE); 1122 1123 EVENTHANDLER_INVOKE(group_detach_event, ifg); 1124 kfree(ifg, M_IFNET); 1125 1126 return (0); 1127 } 1128 1129 /* 1130 * Add the interface to a group. 1131 * The target group will be created if it doesn't exist. 1132 */ 1133 int 1134 if_addgroup(struct ifnet *ifp, const char *groupname) 1135 { 1136 struct ifg_list *ifgl; 1137 struct ifg_group *ifg; 1138 struct ifg_member *ifgm; 1139 1140 if (groupname[0] && 1141 groupname[strlen(groupname) - 1] >= '0' && 1142 groupname[strlen(groupname) - 1] <= '9') 1143 return (EINVAL); 1144 1145 ifgroup_lockmgr(LK_SHARED); 1146 1147 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1148 if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) { 1149 ifgroup_lockmgr(LK_RELEASE); 1150 return (EEXIST); 1151 } 1152 } 1153 1154 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) { 1155 if (strcmp(ifg->ifg_group, groupname) == 0) 1156 break; 1157 } 1158 1159 ifgroup_lockmgr(LK_RELEASE); 1160 1161 if (ifg == NULL) 1162 ifg = if_creategroup(groupname); 1163 1164 ifgl = kmalloc(sizeof(*ifgl), M_IFNET, M_WAITOK); 1165 ifgm = kmalloc(sizeof(*ifgm), M_IFNET, M_WAITOK); 1166 ifgl->ifgl_group = ifg; 1167 ifgm->ifgm_ifp = ifp; 1168 ifg->ifg_refcnt++; 1169 1170 ifgroup_lockmgr(LK_EXCLUSIVE); 1171 TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); 1172 TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); 1173 ifgroup_lockmgr(LK_RELEASE); 1174 1175 EVENTHANDLER_INVOKE(group_change_event, groupname); 1176 1177 return (0); 1178 } 1179 1180 /* 1181 * Remove the interface from a group. 1182 * The group will be destroyed if it becomes empty. 1183 * 1184 * The 'ifgroup_lock' must be hold exclusively when calling this. 1185 */ 1186 static int 1187 if_delgroup_locked(struct ifnet *ifp, const char *groupname) 1188 { 1189 struct ifg_list *ifgl; 1190 struct ifg_member *ifgm; 1191 1192 KKASSERT(lockstatus(&ifgroup_lock, curthread) == LK_EXCLUSIVE); 1193 1194 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1195 if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) 1196 break; 1197 } 1198 if (ifgl == NULL) 1199 return (ENOENT); 1200 1201 TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); 1202 1203 TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) { 1204 if (ifgm->ifgm_ifp == ifp) 1205 break; 1206 } 1207 1208 if (ifgm != NULL) { 1209 TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); 1210 1211 ifgroup_lockmgr(LK_RELEASE); 1212 EVENTHANDLER_INVOKE(group_change_event, groupname); 1213 ifgroup_lockmgr(LK_EXCLUSIVE); 1214 1215 kfree(ifgm, M_IFNET); 1216 ifgl->ifgl_group->ifg_refcnt--; 1217 } 1218 1219 if (ifgl->ifgl_group->ifg_refcnt == 0) { 1220 ifgroup_lockmgr(LK_RELEASE); 1221 if_destroygroup(ifgl->ifgl_group); 1222 ifgroup_lockmgr(LK_EXCLUSIVE); 1223 } 1224 1225 kfree(ifgl, M_IFNET); 1226 1227 return (0); 1228 } 1229 1230 int 1231 if_delgroup(struct ifnet *ifp, const char *groupname) 1232 { 1233 int error; 1234 1235 ifgroup_lockmgr(LK_EXCLUSIVE); 1236 error = if_delgroup_locked(ifp, groupname); 1237 ifgroup_lockmgr(LK_RELEASE); 1238 1239 return (error); 1240 } 1241 1242 /* 1243 * Store all the groups that the interface belongs to in memory 1244 * pointed to by data. 1245 */ 1246 static int 1247 if_getgroups(struct ifgroupreq *ifgr, struct ifnet *ifp) 1248 { 1249 struct ifg_list *ifgl; 1250 struct ifg_req *ifgrq, *p; 1251 int len, error; 1252 1253 len = 0; 1254 ifgroup_lockmgr(LK_SHARED); 1255 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 1256 len += sizeof(struct ifg_req); 1257 ifgroup_lockmgr(LK_RELEASE); 1258 1259 if (ifgr->ifgr_len == 0) { 1260 /* 1261 * Caller is asking how much memory should be allocated in 1262 * the next request in order to hold all the groups. 1263 */ 1264 ifgr->ifgr_len = len; 1265 return (0); 1266 } else if (ifgr->ifgr_len != len) { 1267 return (EINVAL); 1268 } 1269 1270 ifgrq = kmalloc(len, M_TEMP, M_INTWAIT | M_NULLOK | M_ZERO); 1271 if (ifgrq == NULL) 1272 return (ENOMEM); 1273 1274 ifgroup_lockmgr(LK_SHARED); 1275 p = ifgrq; 1276 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1277 if (len < sizeof(struct ifg_req)) { 1278 ifgroup_lockmgr(LK_RELEASE); 1279 error = EINVAL; 1280 goto failed; 1281 } 1282 1283 strlcpy(p->ifgrq_group, ifgl->ifgl_group->ifg_group, 1284 sizeof(ifgrq->ifgrq_group)); 1285 len -= sizeof(struct ifg_req); 1286 p++; 1287 } 1288 ifgroup_lockmgr(LK_RELEASE); 1289 1290 error = copyout(ifgrq, ifgr->ifgr_groups, ifgr->ifgr_len); 1291 failed: 1292 kfree(ifgrq, M_TEMP); 1293 return error; 1294 } 1295 1296 /* 1297 * Store all the members of a group in memory pointed to by data. 1298 */ 1299 static int 1300 if_getgroupmembers(struct ifgroupreq *ifgr) 1301 { 1302 struct ifg_group *ifg; 1303 struct ifg_member *ifgm; 1304 struct ifg_req *ifgrq, *p; 1305 int len, error; 1306 1307 ifgroup_lockmgr(LK_SHARED); 1308 1309 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) { 1310 if (strcmp(ifg->ifg_group, ifgr->ifgr_name) == 0) 1311 break; 1312 } 1313 if (ifg == NULL) { 1314 ifgroup_lockmgr(LK_RELEASE); 1315 return (ENOENT); 1316 } 1317 1318 len = 0; 1319 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) 1320 len += sizeof(struct ifg_req); 1321 1322 ifgroup_lockmgr(LK_RELEASE); 1323 1324 if (ifgr->ifgr_len == 0) { 1325 ifgr->ifgr_len = len; 1326 return (0); 1327 } else if (ifgr->ifgr_len != len) { 1328 return (EINVAL); 1329 } 1330 1331 ifgrq = kmalloc(len, M_TEMP, M_INTWAIT | M_NULLOK | M_ZERO); 1332 if (ifgrq == NULL) 1333 return (ENOMEM); 1334 1335 ifgroup_lockmgr(LK_SHARED); 1336 p = ifgrq; 1337 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { 1338 if (len < sizeof(struct ifg_req)) { 1339 ifgroup_lockmgr(LK_RELEASE); 1340 error = EINVAL; 1341 goto failed; 1342 } 1343 1344 strlcpy(p->ifgrq_member, ifgm->ifgm_ifp->if_xname, 1345 sizeof(p->ifgrq_member)); 1346 len -= sizeof(struct ifg_req); 1347 p++; 1348 } 1349 ifgroup_lockmgr(LK_RELEASE); 1350 1351 error = copyout(ifgrq, ifgr->ifgr_groups, ifgr->ifgr_len); 1352 failed: 1353 kfree(ifgrq, M_TEMP); 1354 return error; 1355 } 1356 1357 /* 1358 * Delete Routes for a Network Interface 1359 * 1360 * Called for each routing entry via the rnh->rnh_walktree() call above 1361 * to delete all route entries referencing a detaching network interface. 1362 * 1363 * Arguments: 1364 * rn pointer to node in the routing table 1365 * arg argument passed to rnh->rnh_walktree() - detaching interface 1366 * 1367 * Returns: 1368 * 0 successful 1369 * errno failed - reason indicated 1370 * 1371 */ 1372 static int 1373 if_rtdel(struct radix_node *rn, void *arg) 1374 { 1375 struct rtentry *rt = (struct rtentry *)rn; 1376 struct ifnet *ifp = arg; 1377 int err; 1378 1379 if (rt->rt_ifp == ifp) { 1380 1381 /* 1382 * Protect (sorta) against walktree recursion problems 1383 * with cloned routes 1384 */ 1385 if (!(rt->rt_flags & RTF_UP)) 1386 return (0); 1387 1388 err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, 1389 rt_mask(rt), rt->rt_flags, 1390 NULL); 1391 if (err) { 1392 log(LOG_WARNING, "if_rtdel: error %d\n", err); 1393 } 1394 } 1395 1396 return (0); 1397 } 1398 1399 static __inline boolean_t 1400 ifa_prefer(const struct ifaddr *cur_ifa, const struct ifaddr *old_ifa) 1401 { 1402 if (old_ifa == NULL) 1403 return TRUE; 1404 1405 if ((old_ifa->ifa_ifp->if_flags & IFF_UP) == 0 && 1406 (cur_ifa->ifa_ifp->if_flags & IFF_UP)) 1407 return TRUE; 1408 if ((old_ifa->ifa_flags & IFA_ROUTE) == 0 && 1409 (cur_ifa->ifa_flags & IFA_ROUTE)) 1410 return TRUE; 1411 return FALSE; 1412 } 1413 1414 /* 1415 * Locate an interface based on a complete address. 1416 */ 1417 struct ifaddr * 1418 ifa_ifwithaddr(struct sockaddr *addr) 1419 { 1420 const struct ifnet_array *arr; 1421 int i; 1422 1423 arr = ifnet_array_get(); 1424 for (i = 0; i < arr->ifnet_count; ++i) { 1425 struct ifnet *ifp = arr->ifnet_arr[i]; 1426 struct ifaddr_container *ifac; 1427 1428 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1429 struct ifaddr *ifa = ifac->ifa; 1430 1431 if (ifa->ifa_addr->sa_family != addr->sa_family) 1432 continue; 1433 if (sa_equal(addr, ifa->ifa_addr)) 1434 return (ifa); 1435 if ((ifp->if_flags & IFF_BROADCAST) && 1436 ifa->ifa_broadaddr && 1437 /* IPv6 doesn't have broadcast */ 1438 ifa->ifa_broadaddr->sa_len != 0 && 1439 sa_equal(ifa->ifa_broadaddr, addr)) 1440 return (ifa); 1441 } 1442 } 1443 return (NULL); 1444 } 1445 1446 /* 1447 * Locate the point to point interface with a given destination address. 1448 */ 1449 struct ifaddr * 1450 ifa_ifwithdstaddr(struct sockaddr *addr) 1451 { 1452 const struct ifnet_array *arr; 1453 int i; 1454 1455 arr = ifnet_array_get(); 1456 for (i = 0; i < arr->ifnet_count; ++i) { 1457 struct ifnet *ifp = arr->ifnet_arr[i]; 1458 struct ifaddr_container *ifac; 1459 1460 if (!(ifp->if_flags & IFF_POINTOPOINT)) 1461 continue; 1462 1463 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1464 struct ifaddr *ifa = ifac->ifa; 1465 1466 if (ifa->ifa_addr->sa_family != addr->sa_family) 1467 continue; 1468 if (ifa->ifa_dstaddr && 1469 sa_equal(addr, ifa->ifa_dstaddr)) 1470 return (ifa); 1471 } 1472 } 1473 return (NULL); 1474 } 1475 1476 /* 1477 * Find an interface on a specific network. If many, choice 1478 * is most specific found. 1479 */ 1480 struct ifaddr * 1481 ifa_ifwithnet(struct sockaddr *addr) 1482 { 1483 struct ifaddr *ifa_maybe = NULL; 1484 u_int af = addr->sa_family; 1485 char *addr_data = addr->sa_data, *cplim; 1486 const struct ifnet_array *arr; 1487 int i; 1488 1489 /* 1490 * AF_LINK addresses can be looked up directly by their index number, 1491 * so do that if we can. 1492 */ 1493 if (af == AF_LINK) { 1494 struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr; 1495 1496 if (sdl->sdl_index && sdl->sdl_index <= if_index) 1497 return (ifindex2ifnet[sdl->sdl_index]->if_lladdr); 1498 } 1499 1500 /* 1501 * Scan though each interface, looking for ones that have 1502 * addresses in this address family. 1503 */ 1504 arr = ifnet_array_get(); 1505 for (i = 0; i < arr->ifnet_count; ++i) { 1506 struct ifnet *ifp = arr->ifnet_arr[i]; 1507 struct ifaddr_container *ifac; 1508 1509 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1510 struct ifaddr *ifa = ifac->ifa; 1511 char *cp, *cp2, *cp3; 1512 1513 if (ifa->ifa_addr->sa_family != af) 1514 next: continue; 1515 if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) { 1516 /* 1517 * This is a bit broken as it doesn't 1518 * take into account that the remote end may 1519 * be a single node in the network we are 1520 * looking for. 1521 * The trouble is that we don't know the 1522 * netmask for the remote end. 1523 */ 1524 if (ifa->ifa_dstaddr != NULL && 1525 sa_equal(addr, ifa->ifa_dstaddr)) 1526 return (ifa); 1527 } else { 1528 /* 1529 * if we have a special address handler, 1530 * then use it instead of the generic one. 1531 */ 1532 if (ifa->ifa_claim_addr) { 1533 if ((*ifa->ifa_claim_addr)(ifa, addr)) { 1534 return (ifa); 1535 } else { 1536 continue; 1537 } 1538 } 1539 1540 /* 1541 * Scan all the bits in the ifa's address. 1542 * If a bit dissagrees with what we are 1543 * looking for, mask it with the netmask 1544 * to see if it really matters. 1545 * (A byte at a time) 1546 */ 1547 if (ifa->ifa_netmask == 0) 1548 continue; 1549 cp = addr_data; 1550 cp2 = ifa->ifa_addr->sa_data; 1551 cp3 = ifa->ifa_netmask->sa_data; 1552 cplim = ifa->ifa_netmask->sa_len + 1553 (char *)ifa->ifa_netmask; 1554 while (cp3 < cplim) 1555 if ((*cp++ ^ *cp2++) & *cp3++) 1556 goto next; /* next address! */ 1557 /* 1558 * If the netmask of what we just found 1559 * is more specific than what we had before 1560 * (if we had one) then remember the new one 1561 * before continuing to search for an even 1562 * better one. If the netmasks are equal, 1563 * we prefer the this ifa based on the result 1564 * of ifa_prefer(). 1565 */ 1566 if (ifa_maybe == NULL || 1567 rn_refines(ifa->ifa_netmask, 1568 ifa_maybe->ifa_netmask) || 1569 (sa_equal(ifa_maybe->ifa_netmask, 1570 ifa->ifa_netmask) && 1571 ifa_prefer(ifa, ifa_maybe))) 1572 ifa_maybe = ifa; 1573 } 1574 } 1575 } 1576 return (ifa_maybe); 1577 } 1578 1579 /* 1580 * Find an interface address specific to an interface best matching 1581 * a given address. 1582 */ 1583 struct ifaddr * 1584 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp) 1585 { 1586 struct ifaddr_container *ifac; 1587 char *cp, *cp2, *cp3; 1588 char *cplim; 1589 struct ifaddr *ifa_maybe = NULL; 1590 u_int af = addr->sa_family; 1591 1592 if (af >= AF_MAX) 1593 return (0); 1594 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1595 struct ifaddr *ifa = ifac->ifa; 1596 1597 if (ifa->ifa_addr->sa_family != af) 1598 continue; 1599 if (ifa_maybe == NULL) 1600 ifa_maybe = ifa; 1601 if (ifa->ifa_netmask == NULL) { 1602 if (sa_equal(addr, ifa->ifa_addr) || 1603 (ifa->ifa_dstaddr != NULL && 1604 sa_equal(addr, ifa->ifa_dstaddr))) 1605 return (ifa); 1606 continue; 1607 } 1608 if (ifp->if_flags & IFF_POINTOPOINT) { 1609 if (sa_equal(addr, ifa->ifa_dstaddr)) 1610 return (ifa); 1611 } else { 1612 cp = addr->sa_data; 1613 cp2 = ifa->ifa_addr->sa_data; 1614 cp3 = ifa->ifa_netmask->sa_data; 1615 cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; 1616 for (; cp3 < cplim; cp3++) 1617 if ((*cp++ ^ *cp2++) & *cp3) 1618 break; 1619 if (cp3 == cplim) 1620 return (ifa); 1621 } 1622 } 1623 return (ifa_maybe); 1624 } 1625 1626 struct netmsg_if { 1627 struct netmsg_base base; 1628 struct ifnet *ifp; 1629 }; 1630 1631 /* 1632 * Mark an interface down and notify protocols of the transition. 1633 */ 1634 static void 1635 if_down_dispatch(netmsg_t nmsg) 1636 { 1637 struct netmsg_if *msg = (struct netmsg_if *)nmsg; 1638 struct ifnet *ifp = msg->ifp; 1639 struct ifaddr_container *ifac; 1640 struct domain *dp; 1641 1642 ASSERT_NETISR0; 1643 1644 ifp->if_flags &= ~IFF_UP; 1645 getmicrotime(&ifp->if_lastchange); 1646 rt_ifmsg(ifp); 1647 1648 /* 1649 * The ifaddr processing in the following loop will block, 1650 * however, this function is called in netisr0, in which 1651 * ifaddr list changes happen, so we don't care about the 1652 * blockness of the ifaddr processing here. 1653 */ 1654 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1655 struct ifaddr *ifa = ifac->ifa; 1656 1657 /* Ignore marker */ 1658 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 1659 continue; 1660 1661 kpfctlinput(PRC_IFDOWN, ifa->ifa_addr); 1662 } 1663 1664 SLIST_FOREACH(dp, &domains, dom_next) 1665 if (dp->dom_if_down != NULL) 1666 dp->dom_if_down(ifp); 1667 1668 ifq_purge_all(&ifp->if_snd); 1669 netisr_replymsg(&nmsg->base, 0); 1670 } 1671 1672 /* 1673 * Mark an interface up and notify protocols of the transition. 1674 */ 1675 static void 1676 if_up_dispatch(netmsg_t nmsg) 1677 { 1678 struct netmsg_if *msg = (struct netmsg_if *)nmsg; 1679 struct ifnet *ifp = msg->ifp; 1680 struct ifaddr_container *ifac; 1681 struct domain *dp; 1682 1683 ASSERT_NETISR0; 1684 1685 ifq_purge_all(&ifp->if_snd); 1686 ifp->if_flags |= IFF_UP; 1687 getmicrotime(&ifp->if_lastchange); 1688 rt_ifmsg(ifp); 1689 1690 /* 1691 * The ifaddr processing in the following loop will block, 1692 * however, this function is called in netisr0, in which 1693 * ifaddr list changes happen, so we don't care about the 1694 * blockness of the ifaddr processing here. 1695 */ 1696 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1697 struct ifaddr *ifa = ifac->ifa; 1698 1699 /* Ignore marker */ 1700 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 1701 continue; 1702 1703 kpfctlinput(PRC_IFUP, ifa->ifa_addr); 1704 } 1705 1706 SLIST_FOREACH(dp, &domains, dom_next) 1707 if (dp->dom_if_up != NULL) 1708 dp->dom_if_up(ifp); 1709 1710 netisr_replymsg(&nmsg->base, 0); 1711 } 1712 1713 /* 1714 * Mark an interface down and notify protocols of the transition. An 1715 * interface going down is also considered to be a synchronizing event. 1716 * We must ensure that all packet processing related to the interface 1717 * has completed before we return so e.g. the caller can free the ifnet 1718 * structure that the mbufs may be referencing. 1719 * 1720 * NOTE: must be called at splnet or eqivalent. 1721 */ 1722 void 1723 if_down(struct ifnet *ifp) 1724 { 1725 struct netmsg_if msg; 1726 1727 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN); 1728 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0, 1729 if_down_dispatch); 1730 msg.ifp = ifp; 1731 netisr_domsg(&msg.base, 0); 1732 netmsg_service_sync(); 1733 } 1734 1735 /* 1736 * Mark an interface up and notify protocols of 1737 * the transition. 1738 * NOTE: must be called at splnet or eqivalent. 1739 */ 1740 void 1741 if_up(struct ifnet *ifp) 1742 { 1743 struct netmsg_if msg; 1744 1745 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0, 1746 if_up_dispatch); 1747 msg.ifp = ifp; 1748 netisr_domsg(&msg.base, 0); 1749 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP); 1750 } 1751 1752 /* 1753 * Process a link state change. 1754 * NOTE: must be called at splsoftnet or equivalent. 1755 */ 1756 void 1757 if_link_state_change(struct ifnet *ifp) 1758 { 1759 int link_state = ifp->if_link_state; 1760 1761 rt_ifmsg(ifp); 1762 devctl_notify("IFNET", ifp->if_xname, 1763 (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); 1764 1765 EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state); 1766 } 1767 1768 /* 1769 * Handle interface watchdog timer routines. Called 1770 * from softclock, we decrement timers (if set) and 1771 * call the appropriate interface routine on expiration. 1772 */ 1773 static void 1774 if_slowtimo_dispatch(netmsg_t nmsg) 1775 { 1776 struct globaldata *gd = mycpu; 1777 const struct ifnet_array *arr; 1778 int i; 1779 1780 ASSERT_NETISR0; 1781 1782 crit_enter_gd(gd); 1783 lwkt_replymsg(&nmsg->lmsg, 0); /* reply ASAP */ 1784 crit_exit_gd(gd); 1785 1786 arr = ifnet_array_get(); 1787 for (i = 0; i < arr->ifnet_count; ++i) { 1788 struct ifnet *ifp = arr->ifnet_arr[i]; 1789 1790 crit_enter_gd(gd); 1791 1792 if (if_stats_compat) { 1793 IFNET_STAT_GET(ifp, ipackets, ifp->if_ipackets); 1794 IFNET_STAT_GET(ifp, ierrors, ifp->if_ierrors); 1795 IFNET_STAT_GET(ifp, opackets, ifp->if_opackets); 1796 IFNET_STAT_GET(ifp, oerrors, ifp->if_oerrors); 1797 IFNET_STAT_GET(ifp, collisions, ifp->if_collisions); 1798 IFNET_STAT_GET(ifp, ibytes, ifp->if_ibytes); 1799 IFNET_STAT_GET(ifp, obytes, ifp->if_obytes); 1800 IFNET_STAT_GET(ifp, imcasts, ifp->if_imcasts); 1801 IFNET_STAT_GET(ifp, omcasts, ifp->if_omcasts); 1802 IFNET_STAT_GET(ifp, iqdrops, ifp->if_iqdrops); 1803 IFNET_STAT_GET(ifp, noproto, ifp->if_noproto); 1804 IFNET_STAT_GET(ifp, oqdrops, ifp->if_oqdrops); 1805 } 1806 1807 if (ifp->if_timer == 0 || --ifp->if_timer) { 1808 crit_exit_gd(gd); 1809 continue; 1810 } 1811 if (ifp->if_watchdog) { 1812 if (ifnet_tryserialize_all(ifp)) { 1813 (*ifp->if_watchdog)(ifp); 1814 ifnet_deserialize_all(ifp); 1815 } else { 1816 /* try again next timeout */ 1817 ++ifp->if_timer; 1818 } 1819 } 1820 1821 crit_exit_gd(gd); 1822 } 1823 1824 callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL); 1825 } 1826 1827 static void 1828 if_slowtimo(void *arg __unused) 1829 { 1830 struct lwkt_msg *lmsg = &if_slowtimo_netmsg.lmsg; 1831 1832 KASSERT(mycpuid == 0, ("not on cpu0")); 1833 crit_enter(); 1834 if (lmsg->ms_flags & MSGF_DONE) 1835 lwkt_sendmsg_oncpu(netisr_cpuport(0), lmsg); 1836 crit_exit(); 1837 } 1838 1839 /* 1840 * Map interface name to 1841 * interface structure pointer. 1842 */ 1843 struct ifnet * 1844 ifunit(const char *name) 1845 { 1846 struct ifnet *ifp; 1847 1848 /* 1849 * Search all the interfaces for this name/number 1850 */ 1851 KASSERT(mtx_owned(&ifnet_mtx), ("ifnet is not locked")); 1852 1853 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 1854 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) 1855 break; 1856 } 1857 return (ifp); 1858 } 1859 1860 struct ifnet * 1861 ifunit_netisr(const char *name) 1862 { 1863 const struct ifnet_array *arr; 1864 int i; 1865 1866 /* 1867 * Search all the interfaces for this name/number 1868 */ 1869 1870 arr = ifnet_array_get(); 1871 for (i = 0; i < arr->ifnet_count; ++i) { 1872 struct ifnet *ifp = arr->ifnet_arr[i]; 1873 1874 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) 1875 return ifp; 1876 } 1877 return NULL; 1878 } 1879 1880 /* 1881 * Interface ioctls. 1882 */ 1883 int 1884 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred) 1885 { 1886 struct ifnet *ifp; 1887 struct ifgroupreq *ifgr; 1888 struct ifreq *ifr; 1889 struct ifstat *ifs; 1890 int error, do_ifup = 0; 1891 short oif_flags; 1892 int new_flags; 1893 size_t namelen, onamelen; 1894 size_t descrlen; 1895 char *descrbuf, *odescrbuf; 1896 char new_name[IFNAMSIZ]; 1897 struct ifaddr *ifa; 1898 struct sockaddr_dl *sdl; 1899 1900 switch (cmd) { 1901 case SIOCGIFCONF: 1902 return (ifconf(cmd, data, cred)); 1903 default: 1904 break; 1905 } 1906 1907 ifr = (struct ifreq *)data; 1908 1909 switch (cmd) { 1910 case SIOCIFCREATE: 1911 case SIOCIFCREATE2: 1912 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 1913 if (error) 1914 return (error); 1915 return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name), 1916 (cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL), NULL)); 1917 case SIOCIFDESTROY: 1918 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 1919 if (error) 1920 return (error); 1921 return (if_clone_destroy(ifr->ifr_name)); 1922 case SIOCIFGCLONERS: 1923 return (if_clone_list((struct if_clonereq *)data)); 1924 case SIOCGIFGMEMB: 1925 return (if_getgroupmembers((struct ifgroupreq *)data)); 1926 default: 1927 break; 1928 } 1929 1930 /* 1931 * Nominal ioctl through interface, lookup the ifp and obtain a 1932 * lock to serialize the ifconfig ioctl operation. 1933 */ 1934 ifnet_lock(); 1935 1936 ifp = ifunit(ifr->ifr_name); 1937 if (ifp == NULL) { 1938 ifnet_unlock(); 1939 return (ENXIO); 1940 } 1941 error = 0; 1942 1943 switch (cmd) { 1944 case SIOCGIFINDEX: 1945 ifr->ifr_index = ifp->if_index; 1946 break; 1947 1948 case SIOCGIFFLAGS: 1949 ifr->ifr_flags = ifp->if_flags; 1950 ifr->ifr_flagshigh = ifp->if_flags >> 16; 1951 break; 1952 1953 case SIOCGIFCAP: 1954 ifr->ifr_reqcap = ifp->if_capabilities; 1955 ifr->ifr_curcap = ifp->if_capenable; 1956 break; 1957 1958 case SIOCGIFMETRIC: 1959 ifr->ifr_metric = ifp->if_metric; 1960 break; 1961 1962 case SIOCGIFMTU: 1963 ifr->ifr_mtu = ifp->if_mtu; 1964 break; 1965 1966 case SIOCGIFTSOLEN: 1967 ifr->ifr_tsolen = ifp->if_tsolen; 1968 break; 1969 1970 case SIOCGIFDATA: 1971 error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data, 1972 sizeof(ifp->if_data)); 1973 break; 1974 1975 case SIOCGIFPHYS: 1976 ifr->ifr_phys = ifp->if_physical; 1977 break; 1978 1979 case SIOCGIFPOLLCPU: 1980 ifr->ifr_pollcpu = -1; 1981 break; 1982 1983 case SIOCSIFPOLLCPU: 1984 break; 1985 1986 case SIOCGIFDESCR: 1987 error = 0; 1988 ifnet_lock(); 1989 if (ifp->if_description == NULL) { 1990 ifr->ifr_buffer.length = 0; 1991 error = ENOMSG; 1992 } else { 1993 /* space for terminating nul */ 1994 descrlen = strlen(ifp->if_description) + 1; 1995 if (ifr->ifr_buffer.length < descrlen) 1996 error = ENAMETOOLONG; 1997 else 1998 error = copyout(ifp->if_description, 1999 ifr->ifr_buffer.buffer, descrlen); 2000 ifr->ifr_buffer.length = descrlen; 2001 } 2002 ifnet_unlock(); 2003 break; 2004 2005 case SIOCSIFDESCR: 2006 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2007 if (error) 2008 break; 2009 2010 /* 2011 * Copy only (length-1) bytes to make sure that 2012 * if_description is always nul terminated. The 2013 * length parameter is supposed to count the 2014 * terminating nul in. 2015 */ 2016 if (ifr->ifr_buffer.length > ifdescr_maxlen) 2017 return (ENAMETOOLONG); 2018 else if (ifr->ifr_buffer.length == 0) 2019 descrbuf = NULL; 2020 else { 2021 descrbuf = kmalloc(ifr->ifr_buffer.length, M_IFDESCR, 2022 M_WAITOK | M_ZERO); 2023 error = copyin(ifr->ifr_buffer.buffer, descrbuf, 2024 ifr->ifr_buffer.length - 1); 2025 if (error) { 2026 kfree(descrbuf, M_IFDESCR); 2027 break; 2028 } 2029 } 2030 2031 ifnet_lock(); 2032 odescrbuf = ifp->if_description; 2033 ifp->if_description = descrbuf; 2034 ifnet_unlock(); 2035 2036 if (odescrbuf) 2037 kfree(odescrbuf, M_IFDESCR); 2038 2039 case SIOCSIFFLAGS: 2040 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2041 if (error) 2042 break; 2043 new_flags = (ifr->ifr_flags & 0xffff) | 2044 (ifr->ifr_flagshigh << 16); 2045 if (ifp->if_flags & IFF_SMART) { 2046 /* Smart drivers twiddle their own routes */ 2047 } else if (ifp->if_flags & IFF_UP && 2048 (new_flags & IFF_UP) == 0) { 2049 if_down(ifp); 2050 } else if (new_flags & IFF_UP && 2051 (ifp->if_flags & IFF_UP) == 0) { 2052 do_ifup = 1; 2053 } 2054 2055 #ifdef IFPOLL_ENABLE 2056 if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) { 2057 if (new_flags & IFF_NPOLLING) 2058 ifpoll_register(ifp); 2059 else 2060 ifpoll_deregister(ifp); 2061 } 2062 #endif 2063 2064 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | 2065 (new_flags &~ IFF_CANTCHANGE); 2066 if (new_flags & IFF_PPROMISC) { 2067 /* Permanently promiscuous mode requested */ 2068 ifp->if_flags |= IFF_PROMISC; 2069 } else if (ifp->if_pcount == 0) { 2070 ifp->if_flags &= ~IFF_PROMISC; 2071 } 2072 if (ifp->if_ioctl) { 2073 ifnet_serialize_all(ifp); 2074 ifp->if_ioctl(ifp, cmd, data, cred); 2075 ifnet_deserialize_all(ifp); 2076 } 2077 if (do_ifup) 2078 if_up(ifp); 2079 getmicrotime(&ifp->if_lastchange); 2080 break; 2081 2082 case SIOCSIFCAP: 2083 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2084 if (error) 2085 break; 2086 if (ifr->ifr_reqcap & ~ifp->if_capabilities) { 2087 error = EINVAL; 2088 break; 2089 } 2090 ifnet_serialize_all(ifp); 2091 ifp->if_ioctl(ifp, cmd, data, cred); 2092 ifnet_deserialize_all(ifp); 2093 break; 2094 2095 case SIOCSIFNAME: 2096 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2097 if (error) 2098 break; 2099 error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL); 2100 if (error) 2101 break; 2102 if (new_name[0] == '\0') { 2103 error = EINVAL; 2104 break; 2105 } 2106 if (ifunit(new_name) != NULL) { 2107 error = EEXIST; 2108 break; 2109 } 2110 2111 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 2112 2113 /* Announce the departure of the interface. */ 2114 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 2115 2116 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); 2117 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 2118 sdl = (struct sockaddr_dl *)ifa->ifa_addr; 2119 namelen = strlen(new_name); 2120 onamelen = sdl->sdl_nlen; 2121 /* 2122 * Move the address if needed. This is safe because we 2123 * allocate space for a name of length IFNAMSIZ when we 2124 * create this in if_attach(). 2125 */ 2126 if (namelen != onamelen) { 2127 bcopy(sdl->sdl_data + onamelen, 2128 sdl->sdl_data + namelen, sdl->sdl_alen); 2129 } 2130 bcopy(new_name, sdl->sdl_data, namelen); 2131 sdl->sdl_nlen = namelen; 2132 sdl = (struct sockaddr_dl *)ifa->ifa_netmask; 2133 bzero(sdl->sdl_data, onamelen); 2134 while (namelen != 0) 2135 sdl->sdl_data[--namelen] = 0xff; 2136 2137 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 2138 2139 /* Announce the return of the interface. */ 2140 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 2141 break; 2142 2143 case SIOCSIFMETRIC: 2144 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2145 if (error) 2146 break; 2147 ifp->if_metric = ifr->ifr_metric; 2148 getmicrotime(&ifp->if_lastchange); 2149 break; 2150 2151 case SIOCSIFPHYS: 2152 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2153 if (error) 2154 break; 2155 if (ifp->if_ioctl == NULL) { 2156 error = EOPNOTSUPP; 2157 break; 2158 } 2159 ifnet_serialize_all(ifp); 2160 error = ifp->if_ioctl(ifp, cmd, data, cred); 2161 ifnet_deserialize_all(ifp); 2162 if (error == 0) 2163 getmicrotime(&ifp->if_lastchange); 2164 break; 2165 2166 case SIOCSIFMTU: 2167 { 2168 u_long oldmtu = ifp->if_mtu; 2169 2170 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2171 if (error) 2172 break; 2173 if (ifp->if_ioctl == NULL) { 2174 error = EOPNOTSUPP; 2175 break; 2176 } 2177 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) { 2178 error = EINVAL; 2179 break; 2180 } 2181 ifnet_serialize_all(ifp); 2182 error = ifp->if_ioctl(ifp, cmd, data, cred); 2183 ifnet_deserialize_all(ifp); 2184 if (error == 0) { 2185 getmicrotime(&ifp->if_lastchange); 2186 rt_ifmsg(ifp); 2187 } 2188 /* 2189 * If the link MTU changed, do network layer specific procedure. 2190 */ 2191 if (ifp->if_mtu != oldmtu) { 2192 #ifdef INET6 2193 nd6_setmtu(ifp); 2194 #endif 2195 } 2196 break; 2197 } 2198 2199 case SIOCSIFTSOLEN: 2200 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2201 if (error) 2202 break; 2203 2204 /* XXX need driver supplied upper limit */ 2205 if (ifr->ifr_tsolen <= 0) { 2206 error = EINVAL; 2207 break; 2208 } 2209 ifp->if_tsolen = ifr->ifr_tsolen; 2210 break; 2211 2212 case SIOCADDMULTI: 2213 case SIOCDELMULTI: 2214 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2215 if (error) 2216 break; 2217 2218 /* Don't allow group membership on non-multicast interfaces. */ 2219 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 2220 error = EOPNOTSUPP; 2221 break; 2222 } 2223 2224 /* Don't let users screw up protocols' entries. */ 2225 if (ifr->ifr_addr.sa_family != AF_LINK) { 2226 error = EINVAL; 2227 break; 2228 } 2229 2230 if (cmd == SIOCADDMULTI) { 2231 struct ifmultiaddr *ifma; 2232 error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); 2233 } else { 2234 error = if_delmulti(ifp, &ifr->ifr_addr); 2235 } 2236 if (error == 0) 2237 getmicrotime(&ifp->if_lastchange); 2238 break; 2239 2240 case SIOCSIFPHYADDR: 2241 case SIOCDIFPHYADDR: 2242 #ifdef INET6 2243 case SIOCSIFPHYADDR_IN6: 2244 #endif 2245 case SIOCSLIFPHYADDR: 2246 case SIOCSIFMEDIA: 2247 case SIOCSIFGENERIC: 2248 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2249 if (error) 2250 break; 2251 if (ifp->if_ioctl == NULL) { 2252 error = EOPNOTSUPP; 2253 break; 2254 } 2255 ifnet_serialize_all(ifp); 2256 error = ifp->if_ioctl(ifp, cmd, data, cred); 2257 ifnet_deserialize_all(ifp); 2258 if (error == 0) 2259 getmicrotime(&ifp->if_lastchange); 2260 break; 2261 2262 case SIOCGIFSTATUS: 2263 ifs = (struct ifstat *)data; 2264 ifs->ascii[0] = '\0'; 2265 /* fall through */ 2266 case SIOCGIFPSRCADDR: 2267 case SIOCGIFPDSTADDR: 2268 case SIOCGLIFPHYADDR: 2269 case SIOCGIFMEDIA: 2270 case SIOCGIFXMEDIA: 2271 case SIOCGIFGENERIC: 2272 if (ifp->if_ioctl == NULL) { 2273 error = EOPNOTSUPP; 2274 break; 2275 } 2276 ifnet_serialize_all(ifp); 2277 error = ifp->if_ioctl(ifp, cmd, data, cred); 2278 ifnet_deserialize_all(ifp); 2279 break; 2280 2281 case SIOCSIFLLADDR: 2282 error = caps_priv_check(cred, SYSCAP_RESTRICTEDROOT); 2283 if (error) 2284 break; 2285 error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, 2286 ifr->ifr_addr.sa_len); 2287 EVENTHANDLER_INVOKE(iflladdr_event, ifp); 2288 break; 2289 2290 case SIOCAIFGROUP: 2291 ifgr = (struct ifgroupreq *)ifr; 2292 error = caps_priv_check(cred, SYSCAP_NONET_IFCONFIG); 2293 if (error) 2294 return (error); 2295 if ((error = if_addgroup(ifp, ifgr->ifgr_group))) 2296 return (error); 2297 break; 2298 2299 case SIOCDIFGROUP: 2300 ifgr = (struct ifgroupreq *)ifr; 2301 error = caps_priv_check(cred, SYSCAP_NONET_IFCONFIG); 2302 if (error) 2303 return (error); 2304 if ((error = if_delgroup(ifp, ifgr->ifgr_group))) 2305 return (error); 2306 break; 2307 2308 case SIOCGIFGROUP: 2309 ifgr = (struct ifgroupreq *)ifr; 2310 if ((error = if_getgroups(ifgr, ifp))) 2311 return (error); 2312 break; 2313 2314 default: 2315 oif_flags = ifp->if_flags; 2316 if (so->so_proto == 0) { 2317 error = EOPNOTSUPP; 2318 break; 2319 } 2320 error = so_pru_control_direct(so, cmd, data, ifp); 2321 2322 /* 2323 * If the socket control method returns EOPNOTSUPP, pass the 2324 * request directly to the interface. 2325 * 2326 * Exclude the SIOCSIF{ADDR,BRDADDR,DSTADDR,NETMASK} ioctls, 2327 * because drivers may trust these ioctls to come from an 2328 * already privileged layer and thus do not perform credentials 2329 * checks or input validation. 2330 */ 2331 if (error == EOPNOTSUPP && 2332 ifp->if_ioctl != NULL && 2333 cmd != SIOCSIFADDR && 2334 cmd != SIOCSIFBRDADDR && 2335 cmd != SIOCSIFDSTADDR && 2336 cmd != SIOCSIFNETMASK) { 2337 ifnet_serialize_all(ifp); 2338 error = ifp->if_ioctl(ifp, cmd, data, cred); 2339 ifnet_deserialize_all(ifp); 2340 } 2341 2342 if ((oif_flags ^ ifp->if_flags) & IFF_UP) { 2343 #ifdef INET6 2344 DELAY(100);/* XXX: temporary workaround for fxp issue*/ 2345 if (ifp->if_flags & IFF_UP) { 2346 crit_enter(); 2347 in6_if_up(ifp); 2348 crit_exit(); 2349 } 2350 #endif 2351 } 2352 break; 2353 } 2354 2355 ifnet_unlock(); 2356 return (error); 2357 } 2358 2359 /* 2360 * Set/clear promiscuous mode on interface ifp based on the truth value 2361 * of pswitch. The calls are reference counted so that only the first 2362 * "on" request actually has an effect, as does the final "off" request. 2363 * Results are undefined if the "off" and "on" requests are not matched. 2364 */ 2365 int 2366 ifpromisc(struct ifnet *ifp, int pswitch) 2367 { 2368 struct ifreq ifr; 2369 int error; 2370 int oldflags; 2371 2372 oldflags = ifp->if_flags; 2373 if (ifp->if_flags & IFF_PPROMISC) { 2374 /* Do nothing if device is in permanently promiscuous mode */ 2375 ifp->if_pcount += pswitch ? 1 : -1; 2376 return (0); 2377 } 2378 if (pswitch) { 2379 /* 2380 * If the device is not configured up, we cannot put it in 2381 * promiscuous mode. 2382 */ 2383 if ((ifp->if_flags & IFF_UP) == 0) 2384 return (ENETDOWN); 2385 if (ifp->if_pcount++ != 0) 2386 return (0); 2387 ifp->if_flags |= IFF_PROMISC; 2388 log(LOG_INFO, "%s: promiscuous mode enabled\n", 2389 ifp->if_xname); 2390 } else { 2391 if (--ifp->if_pcount > 0) 2392 return (0); 2393 ifp->if_flags &= ~IFF_PROMISC; 2394 log(LOG_INFO, "%s: promiscuous mode disabled\n", 2395 ifp->if_xname); 2396 } 2397 ifr.ifr_flags = ifp->if_flags; 2398 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2399 ifnet_serialize_all(ifp); 2400 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL); 2401 ifnet_deserialize_all(ifp); 2402 if (error == 0) 2403 rt_ifmsg(ifp); 2404 else 2405 ifp->if_flags = oldflags; 2406 return error; 2407 } 2408 2409 /* 2410 * Return interface configuration 2411 * of system. List may be used 2412 * in later ioctl's (above) to get 2413 * other information. 2414 */ 2415 static int 2416 ifconf(u_long cmd, caddr_t data, struct ucred *cred) 2417 { 2418 struct ifconf *ifc = (struct ifconf *)data; 2419 struct ifnet *ifp; 2420 struct sockaddr *sa; 2421 struct ifreq ifr, *ifrp; 2422 int space = ifc->ifc_len, error = 0; 2423 2424 ifrp = ifc->ifc_req; 2425 2426 ifnet_lock(); 2427 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 2428 struct ifaddr_container *ifac, *ifac_mark; 2429 struct ifaddr_marker mark; 2430 struct ifaddrhead *head; 2431 int addrs; 2432 2433 if (space <= sizeof ifr) 2434 break; 2435 2436 /* 2437 * Zero the stack declared structure first to prevent 2438 * memory disclosure. 2439 */ 2440 bzero(&ifr, sizeof(ifr)); 2441 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) 2442 >= sizeof(ifr.ifr_name)) { 2443 error = ENAMETOOLONG; 2444 break; 2445 } 2446 2447 /* 2448 * Add a marker, since copyout() could block and during that 2449 * period the list could be changed. Inserting the marker to 2450 * the header of the list will not cause trouble for the code 2451 * assuming that the first element of the list is AF_LINK; the 2452 * marker will be moved to the next position w/o blocking. 2453 */ 2454 ifa_marker_init(&mark, ifp); 2455 ifac_mark = &mark.ifac; 2456 head = &ifp->if_addrheads[mycpuid]; 2457 2458 addrs = 0; 2459 TAILQ_INSERT_HEAD(head, ifac_mark, ifa_link); 2460 while ((ifac = TAILQ_NEXT(ifac_mark, ifa_link)) != NULL) { 2461 struct ifaddr *ifa = ifac->ifa; 2462 2463 TAILQ_REMOVE(head, ifac_mark, ifa_link); 2464 TAILQ_INSERT_AFTER(head, ifac, ifac_mark, ifa_link); 2465 2466 /* Ignore marker */ 2467 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 2468 continue; 2469 2470 if (space <= sizeof ifr) 2471 break; 2472 sa = ifa->ifa_addr; 2473 if (cred->cr_prison && prison_if(cred, sa)) 2474 continue; 2475 addrs++; 2476 /* 2477 * Keep a reference on this ifaddr, so that it will 2478 * not be destroyed when its address is copied to 2479 * the userland, which could block. 2480 */ 2481 IFAREF(ifa); 2482 if (sa->sa_len <= sizeof(*sa)) { 2483 ifr.ifr_addr = *sa; 2484 error = copyout(&ifr, ifrp, sizeof ifr); 2485 ifrp++; 2486 } else { 2487 if (space < (sizeof ifr) + sa->sa_len - 2488 sizeof(*sa)) { 2489 IFAFREE(ifa); 2490 break; 2491 } 2492 space -= sa->sa_len - sizeof(*sa); 2493 error = copyout(&ifr, ifrp, 2494 sizeof ifr.ifr_name); 2495 if (error == 0) 2496 error = copyout(sa, &ifrp->ifr_addr, 2497 sa->sa_len); 2498 ifrp = (struct ifreq *) 2499 (sa->sa_len + (caddr_t)&ifrp->ifr_addr); 2500 } 2501 IFAFREE(ifa); 2502 if (error) 2503 break; 2504 space -= sizeof ifr; 2505 } 2506 TAILQ_REMOVE(head, ifac_mark, ifa_link); 2507 if (error) 2508 break; 2509 if (!addrs) { 2510 bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr); 2511 error = copyout(&ifr, ifrp, sizeof ifr); 2512 if (error) 2513 break; 2514 space -= sizeof ifr; 2515 ifrp++; 2516 } 2517 } 2518 ifnet_unlock(); 2519 2520 ifc->ifc_len -= space; 2521 return (error); 2522 } 2523 2524 /* 2525 * Just like if_promisc(), but for all-multicast-reception mode. 2526 */ 2527 int 2528 if_allmulti(struct ifnet *ifp, int onswitch) 2529 { 2530 int error = 0; 2531 struct ifreq ifr; 2532 2533 crit_enter(); 2534 2535 if (onswitch) { 2536 if (ifp->if_amcount++ == 0) { 2537 ifp->if_flags |= IFF_ALLMULTI; 2538 ifr.ifr_flags = ifp->if_flags; 2539 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2540 ifnet_serialize_all(ifp); 2541 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2542 NULL); 2543 ifnet_deserialize_all(ifp); 2544 } 2545 } else { 2546 if (ifp->if_amcount > 1) { 2547 ifp->if_amcount--; 2548 } else { 2549 ifp->if_amcount = 0; 2550 ifp->if_flags &= ~IFF_ALLMULTI; 2551 ifr.ifr_flags = ifp->if_flags; 2552 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2553 ifnet_serialize_all(ifp); 2554 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2555 NULL); 2556 ifnet_deserialize_all(ifp); 2557 } 2558 } 2559 2560 crit_exit(); 2561 2562 if (error == 0) 2563 rt_ifmsg(ifp); 2564 return error; 2565 } 2566 2567 /* 2568 * Add a multicast listenership to the interface in question. 2569 * The link layer provides a routine which converts 2570 */ 2571 int 2572 if_addmulti_serialized(struct ifnet *ifp, struct sockaddr *sa, 2573 struct ifmultiaddr **retifma) 2574 { 2575 struct sockaddr *llsa, *dupsa; 2576 int error; 2577 struct ifmultiaddr *ifma; 2578 2579 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2580 2581 /* 2582 * If the matching multicast address already exists 2583 * then don't add a new one, just add a reference 2584 */ 2585 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2586 if (sa_equal(sa, ifma->ifma_addr)) { 2587 ifma->ifma_refcount++; 2588 if (retifma) 2589 *retifma = ifma; 2590 return 0; 2591 } 2592 } 2593 2594 /* 2595 * Give the link layer a chance to accept/reject it, and also 2596 * find out which AF_LINK address this maps to, if it isn't one 2597 * already. 2598 */ 2599 if (ifp->if_resolvemulti) { 2600 error = ifp->if_resolvemulti(ifp, &llsa, sa); 2601 if (error) 2602 return error; 2603 } else { 2604 llsa = NULL; 2605 } 2606 2607 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_INTWAIT); 2608 dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_INTWAIT); 2609 bcopy(sa, dupsa, sa->sa_len); 2610 2611 ifma->ifma_addr = dupsa; 2612 ifma->ifma_lladdr = llsa; 2613 ifma->ifma_ifp = ifp; 2614 ifma->ifma_refcount = 1; 2615 ifma->ifma_protospec = NULL; 2616 rt_newmaddrmsg(RTM_NEWMADDR, ifma); 2617 2618 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2619 if (retifma) 2620 *retifma = ifma; 2621 2622 if (llsa != NULL) { 2623 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2624 if (sa_equal(ifma->ifma_addr, llsa)) 2625 break; 2626 } 2627 if (ifma) { 2628 ifma->ifma_refcount++; 2629 } else { 2630 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_INTWAIT); 2631 dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_INTWAIT); 2632 bcopy(llsa, dupsa, llsa->sa_len); 2633 ifma->ifma_addr = dupsa; 2634 ifma->ifma_ifp = ifp; 2635 ifma->ifma_refcount = 1; 2636 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2637 } 2638 } 2639 /* 2640 * We are certain we have added something, so call down to the 2641 * interface to let them know about it. 2642 */ 2643 if (ifp->if_ioctl) 2644 ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL); 2645 2646 return 0; 2647 } 2648 2649 int 2650 if_addmulti(struct ifnet *ifp, struct sockaddr *sa, 2651 struct ifmultiaddr **retifma) 2652 { 2653 int error; 2654 2655 ifnet_serialize_all(ifp); 2656 error = if_addmulti_serialized(ifp, sa, retifma); 2657 ifnet_deserialize_all(ifp); 2658 2659 return error; 2660 } 2661 2662 /* 2663 * Remove a reference to a multicast address on this interface. Yell 2664 * if the request does not match an existing membership. 2665 */ 2666 static int 2667 if_delmulti_serialized(struct ifnet *ifp, struct sockaddr *sa) 2668 { 2669 struct ifmultiaddr *ifma; 2670 2671 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2672 2673 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2674 if (sa_equal(sa, ifma->ifma_addr)) 2675 break; 2676 if (ifma == NULL) 2677 return ENOENT; 2678 2679 if (ifma->ifma_refcount > 1) { 2680 ifma->ifma_refcount--; 2681 return 0; 2682 } 2683 2684 rt_newmaddrmsg(RTM_DELMADDR, ifma); 2685 sa = ifma->ifma_lladdr; 2686 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2687 /* 2688 * Make sure the interface driver is notified 2689 * in the case of a link layer mcast group being left. 2690 */ 2691 if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) 2692 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2693 kfree(ifma->ifma_addr, M_IFMADDR); 2694 kfree(ifma, M_IFMADDR); 2695 if (sa == NULL) 2696 return 0; 2697 2698 /* 2699 * Now look for the link-layer address which corresponds to 2700 * this network address. It had been squirreled away in 2701 * ifma->ifma_lladdr for this purpose (so we don't have 2702 * to call ifp->if_resolvemulti() again), and we saved that 2703 * value in sa above. If some nasty deleted the 2704 * link-layer address out from underneath us, we can deal because 2705 * the address we stored was is not the same as the one which was 2706 * in the record for the link-layer address. (So we don't complain 2707 * in that case.) 2708 */ 2709 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2710 if (sa_equal(sa, ifma->ifma_addr)) 2711 break; 2712 if (ifma == NULL) 2713 return 0; 2714 2715 if (ifma->ifma_refcount > 1) { 2716 ifma->ifma_refcount--; 2717 return 0; 2718 } 2719 2720 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2721 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2722 kfree(ifma->ifma_addr, M_IFMADDR); 2723 kfree(sa, M_IFMADDR); 2724 kfree(ifma, M_IFMADDR); 2725 2726 return 0; 2727 } 2728 2729 int 2730 if_delmulti(struct ifnet *ifp, struct sockaddr *sa) 2731 { 2732 int error; 2733 2734 ifnet_serialize_all(ifp); 2735 error = if_delmulti_serialized(ifp, sa); 2736 ifnet_deserialize_all(ifp); 2737 2738 return error; 2739 } 2740 2741 /* 2742 * Delete all multicast group membership for an interface. 2743 * Should be used to quickly flush all multicast filters. 2744 */ 2745 void 2746 if_delallmulti_serialized(struct ifnet *ifp) 2747 { 2748 struct ifmultiaddr *ifma, mark; 2749 struct sockaddr sa; 2750 2751 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2752 2753 bzero(&sa, sizeof(sa)); 2754 sa.sa_family = AF_UNSPEC; 2755 sa.sa_len = sizeof(sa); 2756 2757 bzero(&mark, sizeof(mark)); 2758 mark.ifma_addr = &sa; 2759 2760 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, &mark, ifma_link); 2761 while ((ifma = TAILQ_NEXT(&mark, ifma_link)) != NULL) { 2762 TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link); 2763 TAILQ_INSERT_AFTER(&ifp->if_multiaddrs, ifma, &mark, 2764 ifma_link); 2765 2766 if (ifma->ifma_addr->sa_family == AF_UNSPEC) 2767 continue; 2768 2769 if_delmulti_serialized(ifp, ifma->ifma_addr); 2770 } 2771 TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link); 2772 } 2773 2774 2775 /* 2776 * Set the link layer address on an interface. 2777 * 2778 * At this time we only support certain types of interfaces, 2779 * and we don't allow the length of the address to change. 2780 */ 2781 int 2782 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) 2783 { 2784 struct sockaddr_dl *sdl; 2785 struct ifreq ifr; 2786 2787 sdl = IF_LLSOCKADDR(ifp); 2788 if (sdl == NULL) 2789 return (EINVAL); 2790 if (len != sdl->sdl_alen) /* don't allow length to change */ 2791 return (EINVAL); 2792 switch (ifp->if_type) { 2793 case IFT_ETHER: /* these types use struct arpcom */ 2794 case IFT_XETHER: 2795 case IFT_L2VLAN: 2796 case IFT_IEEE8023ADLAG: 2797 bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len); 2798 bcopy(lladdr, LLADDR(sdl), len); 2799 break; 2800 default: 2801 return (ENODEV); 2802 } 2803 /* 2804 * If the interface is already up, we need 2805 * to re-init it in order to reprogram its 2806 * address filter. 2807 */ 2808 ifnet_serialize_all(ifp); 2809 if ((ifp->if_flags & IFF_UP) != 0) { 2810 #ifdef INET 2811 struct ifaddr_container *ifac; 2812 #endif 2813 2814 ifp->if_flags &= ~IFF_UP; 2815 ifr.ifr_flags = ifp->if_flags; 2816 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2817 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2818 NULL); 2819 ifp->if_flags |= IFF_UP; 2820 ifr.ifr_flags = ifp->if_flags; 2821 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2822 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2823 NULL); 2824 #ifdef INET 2825 /* 2826 * Also send gratuitous ARPs to notify other nodes about 2827 * the address change. 2828 */ 2829 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 2830 struct ifaddr *ifa = ifac->ifa; 2831 2832 if (ifa->ifa_addr != NULL && 2833 ifa->ifa_addr->sa_family == AF_INET) 2834 arp_gratuitous(ifp, ifa); 2835 } 2836 #endif 2837 } 2838 ifnet_deserialize_all(ifp); 2839 return (0); 2840 } 2841 2842 2843 /* 2844 * Tunnel interfaces can nest, also they may cause infinite recursion 2845 * calls when misconfigured. Introduce an upper limit to prevent infinite 2846 * recursions, as well as to constrain the nesting depth. 2847 * 2848 * Return 0, if tunnel nesting count is equal or less than limit. 2849 */ 2850 int 2851 if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, uint32_t cookie, 2852 int limit) 2853 { 2854 struct m_tag *mtag; 2855 int count; 2856 2857 count = 1; 2858 mtag = m_tag_locate(m, cookie, 0 /* type */, NULL); 2859 if (mtag != NULL) 2860 count += *(int *)(mtag + 1); 2861 if (count > limit) { 2862 log(LOG_NOTICE, 2863 "%s: packet looped too many times (%d), limit %d\n", 2864 ifp->if_xname, count, limit); 2865 return (ELOOP); 2866 } 2867 2868 if (mtag == NULL) { 2869 mtag = m_tag_alloc(cookie, 0, sizeof(int), M_NOWAIT); 2870 if (mtag == NULL) 2871 return (ENOMEM); 2872 m_tag_prepend(m, mtag); 2873 } 2874 2875 *(int *)(mtag + 1) = count; 2876 return (0); 2877 } 2878 2879 2880 /* 2881 * Locate an interface based on a complete address. 2882 */ 2883 struct ifnet * 2884 if_bylla(const void *lla, unsigned char lla_len) 2885 { 2886 const struct ifnet_array *arr; 2887 struct ifnet *ifp; 2888 struct sockaddr_dl *sdl; 2889 int i; 2890 2891 arr = ifnet_array_get(); 2892 for (i = 0; i < arr->ifnet_count; ++i) { 2893 ifp = arr->ifnet_arr[i]; 2894 if (ifp->if_addrlen != lla_len) 2895 continue; 2896 2897 sdl = IF_LLSOCKADDR(ifp); 2898 if (memcmp(lla, LLADDR(sdl), lla_len) == 0) 2899 return (ifp); 2900 } 2901 return (NULL); 2902 } 2903 2904 struct ifmultiaddr * 2905 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp) 2906 { 2907 struct ifmultiaddr *ifma; 2908 2909 /* TODO: need ifnet_serialize_main */ 2910 ifnet_serialize_all(ifp); 2911 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2912 if (sa_equal(ifma->ifma_addr, sa)) 2913 break; 2914 ifnet_deserialize_all(ifp); 2915 2916 return ifma; 2917 } 2918 2919 /* 2920 * This function locates the first real ethernet MAC from a network 2921 * card and loads it into node, returning 0 on success or ENOENT if 2922 * no suitable interfaces were found. It is used by the uuid code to 2923 * generate a unique 6-byte number. 2924 */ 2925 int 2926 if_getanyethermac(uint16_t *node, int minlen) 2927 { 2928 struct ifnet *ifp; 2929 struct sockaddr_dl *sdl; 2930 2931 ifnet_lock(); 2932 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 2933 if (ifp->if_type != IFT_ETHER) 2934 continue; 2935 sdl = IF_LLSOCKADDR(ifp); 2936 if (sdl->sdl_alen < minlen) 2937 continue; 2938 bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node, 2939 minlen); 2940 ifnet_unlock(); 2941 return(0); 2942 } 2943 ifnet_unlock(); 2944 return (ENOENT); 2945 } 2946 2947 /* 2948 * The name argument must be a pointer to storage which will last as 2949 * long as the interface does. For physical devices, the result of 2950 * device_get_name(dev) is a good choice and for pseudo-devices a 2951 * static string works well. 2952 */ 2953 void 2954 if_initname(struct ifnet *ifp, const char *name, int unit) 2955 { 2956 ifp->if_dname = name; 2957 ifp->if_dunit = unit; 2958 if (unit != IF_DUNIT_NONE) 2959 ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); 2960 else 2961 strlcpy(ifp->if_xname, name, IFNAMSIZ); 2962 } 2963 2964 int 2965 if_printf(struct ifnet *ifp, const char *fmt, ...) 2966 { 2967 __va_list ap; 2968 int retval; 2969 2970 retval = kprintf("%s: ", ifp->if_xname); 2971 __va_start(ap, fmt); 2972 retval += kvprintf(fmt, ap); 2973 __va_end(ap); 2974 return (retval); 2975 } 2976 2977 struct ifnet * 2978 if_alloc(uint8_t type) 2979 { 2980 struct ifnet *ifp; 2981 size_t size; 2982 2983 /* 2984 * XXX temporary hack until arpcom is setup in if_l2com 2985 */ 2986 if (type == IFT_ETHER) 2987 size = sizeof(struct arpcom); 2988 else 2989 size = sizeof(struct ifnet); 2990 2991 ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO); 2992 2993 ifp->if_type = type; 2994 2995 if (if_com_alloc[type] != NULL) { 2996 ifp->if_l2com = if_com_alloc[type](type, ifp); 2997 if (ifp->if_l2com == NULL) { 2998 kfree(ifp, M_IFNET); 2999 return (NULL); 3000 } 3001 } 3002 return (ifp); 3003 } 3004 3005 void 3006 if_free(struct ifnet *ifp) 3007 { 3008 if (ifp->if_description != NULL) 3009 kfree(ifp->if_description, M_IFDESCR); 3010 kfree(ifp, M_IFNET); 3011 } 3012 3013 void 3014 ifq_set_classic(struct ifaltq *ifq) 3015 { 3016 ifq_set_methods(ifq, ifq->altq_ifp->if_mapsubq, 3017 ifsq_classic_enqueue, ifsq_classic_dequeue, ifsq_classic_request); 3018 } 3019 3020 void 3021 ifq_set_methods(struct ifaltq *ifq, altq_mapsubq_t mapsubq, 3022 ifsq_enqueue_t enqueue, ifsq_dequeue_t dequeue, ifsq_request_t request) 3023 { 3024 int q; 3025 3026 KASSERT(mapsubq != NULL, ("mapsubq is not specified")); 3027 KASSERT(enqueue != NULL, ("enqueue is not specified")); 3028 KASSERT(dequeue != NULL, ("dequeue is not specified")); 3029 KASSERT(request != NULL, ("request is not specified")); 3030 3031 ifq->altq_mapsubq = mapsubq; 3032 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 3033 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 3034 3035 ifsq->ifsq_enqueue = enqueue; 3036 ifsq->ifsq_dequeue = dequeue; 3037 ifsq->ifsq_request = request; 3038 } 3039 } 3040 3041 static void 3042 ifsq_norm_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m) 3043 { 3044 3045 classq_add(&ifsq->ifsq_norm, m); 3046 ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len); 3047 } 3048 3049 static void 3050 ifsq_prio_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m) 3051 { 3052 3053 classq_add(&ifsq->ifsq_prio, m); 3054 ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len); 3055 ALTQ_SQ_PRIO_CNTR_INC(ifsq, m->m_pkthdr.len); 3056 } 3057 3058 static struct mbuf * 3059 ifsq_norm_dequeue(struct ifaltq_subque *ifsq) 3060 { 3061 struct mbuf *m; 3062 3063 m = classq_get(&ifsq->ifsq_norm); 3064 if (m != NULL) 3065 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len); 3066 return (m); 3067 } 3068 3069 static struct mbuf * 3070 ifsq_prio_dequeue(struct ifaltq_subque *ifsq) 3071 { 3072 struct mbuf *m; 3073 3074 m = classq_get(&ifsq->ifsq_prio); 3075 if (m != NULL) { 3076 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len); 3077 ALTQ_SQ_PRIO_CNTR_DEC(ifsq, m->m_pkthdr.len); 3078 } 3079 return (m); 3080 } 3081 3082 int 3083 ifsq_classic_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m, 3084 struct altq_pktattr *pa __unused) 3085 { 3086 3087 M_ASSERTPKTHDR(m); 3088 again: 3089 if (ifsq->ifsq_len >= ifsq->ifsq_maxlen || 3090 ifsq->ifsq_bcnt >= ifsq->ifsq_maxbcnt) { 3091 struct mbuf *m_drop; 3092 3093 if (m->m_flags & M_PRIO) { 3094 m_drop = NULL; 3095 if (ifsq->ifsq_prio_len < (ifsq->ifsq_maxlen >> 1) && 3096 ifsq->ifsq_prio_bcnt < (ifsq->ifsq_maxbcnt >> 1)) { 3097 /* Try dropping some from normal queue. */ 3098 m_drop = ifsq_norm_dequeue(ifsq); 3099 } 3100 if (m_drop == NULL) 3101 m_drop = ifsq_prio_dequeue(ifsq); 3102 } else { 3103 m_drop = ifsq_norm_dequeue(ifsq); 3104 } 3105 if (m_drop != NULL) { 3106 IFNET_STAT_INC(ifsq->ifsq_ifp, oqdrops, 1); 3107 m_freem(m_drop); 3108 goto again; 3109 } 3110 /* 3111 * No old packets could be dropped! 3112 * NOTE: Caller increases oqdrops. 3113 */ 3114 m_freem(m); 3115 return (ENOBUFS); 3116 } else { 3117 if (m->m_flags & M_PRIO) 3118 ifsq_prio_enqueue(ifsq, m); 3119 else 3120 ifsq_norm_enqueue(ifsq, m); 3121 return (0); 3122 } 3123 } 3124 3125 struct mbuf * 3126 ifsq_classic_dequeue(struct ifaltq_subque *ifsq, int op) 3127 { 3128 struct mbuf *m; 3129 3130 switch (op) { 3131 case ALTDQ_POLL: 3132 m = classq_head(&ifsq->ifsq_prio); 3133 if (m == NULL) 3134 m = classq_head(&ifsq->ifsq_norm); 3135 break; 3136 3137 case ALTDQ_REMOVE: 3138 m = ifsq_prio_dequeue(ifsq); 3139 if (m == NULL) 3140 m = ifsq_norm_dequeue(ifsq); 3141 break; 3142 3143 default: 3144 panic("unsupported ALTQ dequeue op: %d", op); 3145 } 3146 return m; 3147 } 3148 3149 int 3150 ifsq_classic_request(struct ifaltq_subque *ifsq, int req, void *arg) 3151 { 3152 switch (req) { 3153 case ALTRQ_PURGE: 3154 for (;;) { 3155 struct mbuf *m; 3156 3157 m = ifsq_classic_dequeue(ifsq, ALTDQ_REMOVE); 3158 if (m == NULL) 3159 break; 3160 m_freem(m); 3161 } 3162 break; 3163 3164 default: 3165 panic("unsupported ALTQ request: %d", req); 3166 } 3167 return 0; 3168 } 3169 3170 static void 3171 ifsq_ifstart_try(struct ifaltq_subque *ifsq, int force_sched) 3172 { 3173 struct ifnet *ifp = ifsq_get_ifp(ifsq); 3174 int running = 0, need_sched; 3175 3176 /* 3177 * Try to do direct ifnet.if_start on the subqueue first, if there is 3178 * contention on the subqueue hardware serializer, ifnet.if_start on 3179 * the subqueue will be scheduled on the subqueue owner CPU. 3180 */ 3181 if (!ifsq_tryserialize_hw(ifsq)) { 3182 /* 3183 * Subqueue hardware serializer contention happened, 3184 * ifnet.if_start on the subqueue is scheduled on 3185 * the subqueue owner CPU, and we keep going. 3186 */ 3187 ifsq_ifstart_schedule(ifsq, 1); 3188 return; 3189 } 3190 3191 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { 3192 ifp->if_start(ifp, ifsq); 3193 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 3194 running = 1; 3195 } 3196 need_sched = ifsq_ifstart_need_schedule(ifsq, running); 3197 3198 ifsq_deserialize_hw(ifsq); 3199 3200 if (need_sched) { 3201 /* 3202 * More data need to be transmitted, ifnet.if_start on the 3203 * subqueue is scheduled on the subqueue owner CPU, and we 3204 * keep going. 3205 * NOTE: ifnet.if_start subqueue interlock is not released. 3206 */ 3207 ifsq_ifstart_schedule(ifsq, force_sched); 3208 } 3209 } 3210 3211 /* 3212 * Subqeue packets staging mechanism: 3213 * 3214 * The packets enqueued into the subqueue are staged to a certain amount 3215 * before the ifnet.if_start on the subqueue is called. In this way, the 3216 * driver could avoid writing to hardware registers upon every packet, 3217 * instead, hardware registers could be written when certain amount of 3218 * packets are put onto hardware TX ring. The measurement on several modern 3219 * NICs (emx(4), igb(4), bnx(4), bge(4), jme(4)) shows that the hardware 3220 * registers writing aggregation could save ~20% CPU time when 18bytes UDP 3221 * datagrams are transmitted at 1.48Mpps. The performance improvement by 3222 * hardware registers writing aggeregation is also mentioned by Luigi Rizzo's 3223 * netmap paper (http://info.iet.unipi.it/~luigi/netmap/). 3224 * 3225 * Subqueue packets staging is performed for two entry points into drivers' 3226 * transmission function: 3227 * - Direct ifnet.if_start calling on the subqueue, i.e. ifsq_ifstart_try() 3228 * - ifnet.if_start scheduling on the subqueue, i.e. ifsq_ifstart_schedule() 3229 * 3230 * Subqueue packets staging will be stopped upon any of the following 3231 * conditions: 3232 * - If the count of packets enqueued on the current CPU is great than or 3233 * equal to ifsq_stage_cntmax. (XXX this should be per-interface) 3234 * - If the total length of packets enqueued on the current CPU is great 3235 * than or equal to the hardware's MTU - max_protohdr. max_protohdr is 3236 * cut from the hardware's MTU mainly bacause a full TCP segment's size 3237 * is usually less than hardware's MTU. 3238 * - ifsq_ifstart_schedule() is not pending on the current CPU and 3239 * ifnet.if_start subqueue interlock (ifaltq_subq.ifsq_started) is not 3240 * released. 3241 * - The if_start_rollup(), which is registered as low priority netisr 3242 * rollup function, is called; probably because no more work is pending 3243 * for netisr. 3244 * 3245 * NOTE: 3246 * Currently subqueue packet staging is only performed in netisr threads. 3247 */ 3248 int 3249 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa) 3250 { 3251 struct ifaltq *ifq = &ifp->if_snd; 3252 struct ifaltq_subque *ifsq; 3253 int error, start = 0, len, mcast = 0, avoid_start = 0; 3254 struct ifsubq_stage_head *head = NULL; 3255 struct ifsubq_stage *stage = NULL; 3256 struct globaldata *gd = mycpu; 3257 struct thread *td = gd->gd_curthread; 3258 3259 crit_enter_quick(td); 3260 3261 ifsq = ifq_map_subq(ifq, gd->gd_cpuid); 3262 ASSERT_ALTQ_SQ_NOT_SERIALIZED_HW(ifsq); 3263 3264 len = m->m_pkthdr.len; 3265 if (m->m_flags & M_MCAST) 3266 mcast = 1; 3267 3268 if (td->td_type == TD_TYPE_NETISR) { 3269 head = &ifsubq_stage_heads[mycpuid]; 3270 stage = ifsq_get_stage(ifsq, mycpuid); 3271 3272 stage->stg_cnt++; 3273 stage->stg_len += len; 3274 if (stage->stg_cnt < ifsq_stage_cntmax && 3275 stage->stg_len < (ifp->if_mtu - max_protohdr)) 3276 avoid_start = 1; 3277 } 3278 3279 ALTQ_SQ_LOCK(ifsq); 3280 error = ifsq_enqueue_locked(ifsq, m, pa); 3281 if (error) { 3282 IFNET_STAT_INC(ifp, oqdrops, 1); 3283 if (!ifsq_data_ready(ifsq)) { 3284 ALTQ_SQ_UNLOCK(ifsq); 3285 goto done; 3286 } 3287 avoid_start = 0; 3288 } else { 3289 IFNET_STAT_INC(ifp, obytes, len); 3290 if (mcast) 3291 IFNET_STAT_INC(ifp, omcasts, 1); 3292 } 3293 if (!ifsq_is_started(ifsq)) { 3294 if (avoid_start) { 3295 ALTQ_SQ_UNLOCK(ifsq); 3296 3297 KKASSERT(!error); 3298 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) 3299 ifsq_stage_insert(head, stage); 3300 3301 goto done; 3302 } 3303 3304 /* 3305 * Hold the subqueue interlock of ifnet.if_start 3306 */ 3307 ifsq_set_started(ifsq); 3308 start = 1; 3309 } 3310 ALTQ_SQ_UNLOCK(ifsq); 3311 3312 if (stage != NULL) { 3313 if (!start && (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)) { 3314 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); 3315 if (!avoid_start) { 3316 ifsq_stage_remove(head, stage); 3317 ifsq_ifstart_schedule(ifsq, 1); 3318 } 3319 goto done; 3320 } 3321 3322 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) { 3323 ifsq_stage_remove(head, stage); 3324 } else { 3325 stage->stg_cnt = 0; 3326 stage->stg_len = 0; 3327 } 3328 } 3329 3330 if (start) 3331 ifsq_ifstart_try(ifsq, 0); 3332 3333 done: 3334 crit_exit_quick(td); 3335 return error; 3336 } 3337 3338 void * 3339 ifa_create(int size) 3340 { 3341 struct ifaddr *ifa; 3342 int i; 3343 3344 KASSERT(size >= sizeof(*ifa), ("ifaddr size too small")); 3345 3346 ifa = kmalloc(size, M_IFADDR, M_INTWAIT | M_ZERO); 3347 3348 /* 3349 * Make ifa_container availabel on all CPUs, since they 3350 * could be accessed by any threads. 3351 */ 3352 ifa->ifa_containers = 3353 kmalloc(ncpus * sizeof(struct ifaddr_container), 3354 M_IFADDR, 3355 M_INTWAIT | M_ZERO | M_CACHEALIGN); 3356 3357 ifa->ifa_ncnt = ncpus; 3358 for (i = 0; i < ncpus; ++i) { 3359 struct ifaddr_container *ifac = &ifa->ifa_containers[i]; 3360 3361 ifac->ifa_magic = IFA_CONTAINER_MAGIC; 3362 ifac->ifa = ifa; 3363 ifac->ifa_refcnt = 1; 3364 } 3365 #ifdef IFADDR_DEBUG 3366 kprintf("alloc ifa %p %d\n", ifa, size); 3367 #endif 3368 return ifa; 3369 } 3370 3371 void 3372 ifac_free(struct ifaddr_container *ifac, int cpu_id) 3373 { 3374 struct ifaddr *ifa = ifac->ifa; 3375 3376 KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC); 3377 KKASSERT(ifac->ifa_refcnt == 0); 3378 KASSERT(ifac->ifa_listmask == 0, 3379 ("ifa is still on %#x lists", ifac->ifa_listmask)); 3380 3381 ifac->ifa_magic = IFA_CONTAINER_DEAD; 3382 3383 #ifdef IFADDR_DEBUG_VERBOSE 3384 kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id); 3385 #endif 3386 3387 KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus, 3388 ("invalid # of ifac, %d", ifa->ifa_ncnt)); 3389 if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) { 3390 #ifdef IFADDR_DEBUG 3391 kprintf("free ifa %p\n", ifa); 3392 #endif 3393 kfree(ifa->ifa_containers, M_IFADDR); 3394 kfree(ifa, M_IFADDR); 3395 } 3396 } 3397 3398 static void 3399 ifa_iflink_dispatch(netmsg_t nmsg) 3400 { 3401 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3402 struct ifaddr *ifa = msg->ifa; 3403 struct ifnet *ifp = msg->ifp; 3404 int cpu = mycpuid; 3405 struct ifaddr_container *ifac; 3406 3407 crit_enter(); 3408 3409 ifac = &ifa->ifa_containers[cpu]; 3410 ASSERT_IFAC_VALID(ifac); 3411 KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0, 3412 ("ifaddr is on if_addrheads")); 3413 3414 ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD; 3415 if (msg->tail) 3416 TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link); 3417 else 3418 TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link); 3419 3420 crit_exit(); 3421 3422 netisr_forwardmsg_all(&nmsg->base, cpu + 1); 3423 } 3424 3425 void 3426 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail) 3427 { 3428 struct netmsg_ifaddr msg; 3429 3430 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3431 0, ifa_iflink_dispatch); 3432 msg.ifa = ifa; 3433 msg.ifp = ifp; 3434 msg.tail = tail; 3435 3436 netisr_domsg(&msg.base, 0); 3437 } 3438 3439 static void 3440 ifa_ifunlink_dispatch(netmsg_t nmsg) 3441 { 3442 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3443 struct ifaddr *ifa = msg->ifa; 3444 struct ifnet *ifp = msg->ifp; 3445 int cpu = mycpuid; 3446 struct ifaddr_container *ifac; 3447 3448 crit_enter(); 3449 3450 ifac = &ifa->ifa_containers[cpu]; 3451 ASSERT_IFAC_VALID(ifac); 3452 KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD, 3453 ("ifaddr is not on if_addrhead")); 3454 3455 TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link); 3456 ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD; 3457 3458 crit_exit(); 3459 3460 netisr_forwardmsg_all(&nmsg->base, cpu + 1); 3461 } 3462 3463 void 3464 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp) 3465 { 3466 struct netmsg_ifaddr msg; 3467 3468 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3469 0, ifa_ifunlink_dispatch); 3470 msg.ifa = ifa; 3471 msg.ifp = ifp; 3472 3473 netisr_domsg(&msg.base, 0); 3474 } 3475 3476 static void 3477 ifa_destroy_dispatch(netmsg_t nmsg) 3478 { 3479 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3480 3481 IFAFREE(msg->ifa); 3482 netisr_forwardmsg_all(&nmsg->base, mycpuid + 1); 3483 } 3484 3485 void 3486 ifa_destroy(struct ifaddr *ifa) 3487 { 3488 struct netmsg_ifaddr msg; 3489 3490 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3491 0, ifa_destroy_dispatch); 3492 msg.ifa = ifa; 3493 3494 netisr_domsg(&msg.base, 0); 3495 } 3496 3497 static void 3498 if_start_rollup(void) 3499 { 3500 struct ifsubq_stage_head *head = &ifsubq_stage_heads[mycpuid]; 3501 struct ifsubq_stage *stage; 3502 3503 crit_enter(); 3504 3505 while ((stage = TAILQ_FIRST(&head->stg_head)) != NULL) { 3506 struct ifaltq_subque *ifsq = stage->stg_subq; 3507 int is_sched = 0; 3508 3509 if (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED) 3510 is_sched = 1; 3511 ifsq_stage_remove(head, stage); 3512 3513 if (is_sched) { 3514 ifsq_ifstart_schedule(ifsq, 1); 3515 } else { 3516 int start = 0; 3517 3518 ALTQ_SQ_LOCK(ifsq); 3519 if (!ifsq_is_started(ifsq)) { 3520 /* 3521 * Hold the subqueue interlock of 3522 * ifnet.if_start 3523 */ 3524 ifsq_set_started(ifsq); 3525 start = 1; 3526 } 3527 ALTQ_SQ_UNLOCK(ifsq); 3528 3529 if (start) 3530 ifsq_ifstart_try(ifsq, 1); 3531 } 3532 KKASSERT((stage->stg_flags & 3533 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); 3534 } 3535 3536 crit_exit(); 3537 } 3538 3539 static void 3540 ifnetinit(void *dummy __unused) 3541 { 3542 int i; 3543 3544 /* XXX netisr_ncpus */ 3545 for (i = 0; i < ncpus; ++i) 3546 TAILQ_INIT(&ifsubq_stage_heads[i].stg_head); 3547 netisr_register_rollup(if_start_rollup, NETISR_ROLLUP_PRIO_IFSTART); 3548 } 3549 3550 void 3551 if_register_com_alloc(u_char type, 3552 if_com_alloc_t *a, if_com_free_t *f) 3553 { 3554 3555 KASSERT(if_com_alloc[type] == NULL, 3556 ("if_register_com_alloc: %d already registered", type)); 3557 KASSERT(if_com_free[type] == NULL, 3558 ("if_register_com_alloc: %d free already registered", type)); 3559 3560 if_com_alloc[type] = a; 3561 if_com_free[type] = f; 3562 } 3563 3564 void 3565 if_deregister_com_alloc(u_char type) 3566 { 3567 3568 KASSERT(if_com_alloc[type] != NULL, 3569 ("if_deregister_com_alloc: %d not registered", type)); 3570 KASSERT(if_com_free[type] != NULL, 3571 ("if_deregister_com_alloc: %d free not registered", type)); 3572 if_com_alloc[type] = NULL; 3573 if_com_free[type] = NULL; 3574 } 3575 3576 void 3577 ifq_set_maxlen(struct ifaltq *ifq, int len) 3578 { 3579 ifq->altq_maxlen = len + (ncpus * ifsq_stage_cntmax); 3580 } 3581 3582 int 3583 ifq_mapsubq_default(struct ifaltq *ifq __unused, int cpuid __unused) 3584 { 3585 return ALTQ_SUBQ_INDEX_DEFAULT; 3586 } 3587 3588 int 3589 ifq_mapsubq_modulo(struct ifaltq *ifq, int cpuid) 3590 { 3591 3592 return (cpuid % ifq->altq_subq_mappriv); 3593 } 3594 3595 /* 3596 * Watchdog timeout. Process callback as appropriate. If we cannot 3597 * serialize the ifnet just try again on the next timeout. 3598 * 3599 * NOTE: The ifnet can adjust wd_timer while holding the serializer. We 3600 * can only safely adjust it under the same circumstances. 3601 */ 3602 static void 3603 ifsq_watchdog(void *arg) 3604 { 3605 struct ifsubq_watchdog *wd = arg; 3606 struct ifnet *ifp; 3607 int count; 3608 3609 /* 3610 * Fast track. Try to avoid acquiring the serializer when not 3611 * near the terminal count, unless asked to. If the atomic op 3612 * to decrement the count fails just retry on the next callout. 3613 */ 3614 count = wd->wd_timer; 3615 cpu_ccfence(); 3616 if (count == 0) 3617 goto done; 3618 if (count > 2 && (wd->wd_flags & IF_WDOG_ALLTICKS) == 0) { 3619 (void)atomic_cmpset_int(&wd->wd_timer, count, count - 1); 3620 goto done; 3621 } 3622 3623 /* 3624 * Obtain the serializer and then re-test all wd_timer conditions 3625 * as it may have changed. NICs do not mess with wd_timer without 3626 * holding the serializer. 3627 * 3628 * If we are unable to obtain the serializer just retry the same 3629 * count on the next callout. 3630 * 3631 * - call watchdog in terminal count (0) 3632 * - call watchdog on last tick (1) if requested 3633 * - call watchdog on all ticks if requested 3634 */ 3635 ifp = ifsq_get_ifp(wd->wd_subq); 3636 if (ifnet_tryserialize_all(ifp) == 0) 3637 goto done; 3638 if (atomic_cmpset_int(&wd->wd_timer, count, count - 1)) { 3639 --count; 3640 if (count == 0 || 3641 (wd->wd_flags & IF_WDOG_ALLTICKS) || 3642 ((wd->wd_flags & IF_WDOG_LASTTICK) && count == 1)) { 3643 wd->wd_watchdog(wd->wd_subq); 3644 } 3645 } 3646 ifnet_deserialize_all(ifp); 3647 done: 3648 ifsq_watchdog_reset(wd); 3649 } 3650 3651 static void 3652 ifsq_watchdog_reset(struct ifsubq_watchdog *wd) 3653 { 3654 callout_reset_bycpu(&wd->wd_callout, hz, ifsq_watchdog, wd, 3655 ifsq_get_cpuid(wd->wd_subq)); 3656 } 3657 3658 void 3659 ifsq_watchdog_init(struct ifsubq_watchdog *wd, struct ifaltq_subque *ifsq, 3660 ifsq_watchdog_t watchdog, int flags) 3661 { 3662 callout_init_mp(&wd->wd_callout); 3663 wd->wd_timer = 0; 3664 wd->wd_flags = flags; 3665 wd->wd_subq = ifsq; 3666 wd->wd_watchdog = watchdog; 3667 } 3668 3669 void 3670 ifsq_watchdog_start(struct ifsubq_watchdog *wd) 3671 { 3672 atomic_swap_int(&wd->wd_timer, 0); 3673 ifsq_watchdog_reset(wd); 3674 } 3675 3676 void 3677 ifsq_watchdog_stop(struct ifsubq_watchdog *wd) 3678 { 3679 atomic_swap_int(&wd->wd_timer, 0); 3680 callout_stop(&wd->wd_callout); 3681 } 3682 3683 void 3684 ifsq_watchdog_set_count(struct ifsubq_watchdog *wd, int count) 3685 { 3686 atomic_swap_int(&wd->wd_timer, count); 3687 } 3688 3689 void 3690 ifnet_lock(void) 3691 { 3692 KASSERT(curthread->td_type != TD_TYPE_NETISR, 3693 ("try holding ifnet lock in netisr")); 3694 mtx_lock(&ifnet_mtx); 3695 } 3696 3697 void 3698 ifnet_unlock(void) 3699 { 3700 KASSERT(curthread->td_type != TD_TYPE_NETISR, 3701 ("try holding ifnet lock in netisr")); 3702 mtx_unlock(&ifnet_mtx); 3703 } 3704 3705 static struct ifnet_array * 3706 ifnet_array_alloc(int count) 3707 { 3708 struct ifnet_array *arr; 3709 3710 arr = kmalloc(__offsetof(struct ifnet_array, ifnet_arr[count]), 3711 M_IFNET, M_WAITOK); 3712 arr->ifnet_count = count; 3713 3714 return arr; 3715 } 3716 3717 static void 3718 ifnet_array_free(struct ifnet_array *arr) 3719 { 3720 if (arr == &ifnet_array0) 3721 return; 3722 kfree(arr, M_IFNET); 3723 } 3724 3725 static struct ifnet_array * 3726 ifnet_array_add(struct ifnet *ifp, const struct ifnet_array *old_arr) 3727 { 3728 struct ifnet_array *arr; 3729 int count, i; 3730 3731 KASSERT(old_arr->ifnet_count >= 0, 3732 ("invalid ifnet array count %d", old_arr->ifnet_count)); 3733 count = old_arr->ifnet_count + 1; 3734 arr = ifnet_array_alloc(count); 3735 3736 /* 3737 * Save the old ifnet array and append this ifp to the end of 3738 * the new ifnet array. 3739 */ 3740 for (i = 0; i < old_arr->ifnet_count; ++i) { 3741 KASSERT(old_arr->ifnet_arr[i] != ifp, 3742 ("%s is already in ifnet array", ifp->if_xname)); 3743 arr->ifnet_arr[i] = old_arr->ifnet_arr[i]; 3744 } 3745 KASSERT(i == count - 1, 3746 ("add %s, ifnet array index mismatch, should be %d, but got %d", 3747 ifp->if_xname, count - 1, i)); 3748 arr->ifnet_arr[i] = ifp; 3749 3750 return arr; 3751 } 3752 3753 static struct ifnet_array * 3754 ifnet_array_del(struct ifnet *ifp, const struct ifnet_array *old_arr) 3755 { 3756 struct ifnet_array *arr; 3757 int count, i, idx, found = 0; 3758 3759 KASSERT(old_arr->ifnet_count > 0, 3760 ("invalid ifnet array count %d", old_arr->ifnet_count)); 3761 count = old_arr->ifnet_count - 1; 3762 arr = ifnet_array_alloc(count); 3763 3764 /* 3765 * Save the old ifnet array, but skip this ifp. 3766 */ 3767 idx = 0; 3768 for (i = 0; i < old_arr->ifnet_count; ++i) { 3769 if (old_arr->ifnet_arr[i] == ifp) { 3770 KASSERT(!found, 3771 ("dup %s is in ifnet array", ifp->if_xname)); 3772 found = 1; 3773 continue; 3774 } 3775 KASSERT(idx < count, 3776 ("invalid ifnet array index %d, count %d", idx, count)); 3777 arr->ifnet_arr[idx] = old_arr->ifnet_arr[i]; 3778 ++idx; 3779 } 3780 KASSERT(found, ("%s is not in ifnet array", ifp->if_xname)); 3781 KASSERT(idx == count, 3782 ("del %s, ifnet array count mismatch, should be %d, but got %d ", 3783 ifp->if_xname, count, idx)); 3784 3785 return arr; 3786 } 3787 3788 const struct ifnet_array * 3789 ifnet_array_get(void) 3790 { 3791 const struct ifnet_array *ret; 3792 3793 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 3794 ret = ifnet_array; 3795 /* Make sure 'ret' is really used. */ 3796 cpu_ccfence(); 3797 return (ret); 3798 } 3799 3800 int 3801 ifnet_array_isempty(void) 3802 { 3803 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 3804 if (ifnet_array->ifnet_count == 0) 3805 return 1; 3806 else 3807 return 0; 3808 } 3809 3810 void 3811 ifa_marker_init(struct ifaddr_marker *mark, struct ifnet *ifp) 3812 { 3813 struct ifaddr *ifa; 3814 3815 memset(mark, 0, sizeof(*mark)); 3816 ifa = &mark->ifa; 3817 3818 mark->ifac.ifa = ifa; 3819 3820 ifa->ifa_addr = &mark->addr; 3821 ifa->ifa_dstaddr = &mark->dstaddr; 3822 ifa->ifa_netmask = &mark->netmask; 3823 ifa->ifa_ifp = ifp; 3824 } 3825 3826 static int 3827 if_ringcnt_fixup(int ring_cnt, int ring_cntmax) 3828 { 3829 3830 KASSERT(ring_cntmax > 0, ("invalid ring count max %d", ring_cntmax)); 3831 3832 if (ring_cnt <= 0 || ring_cnt > ring_cntmax) 3833 ring_cnt = ring_cntmax; 3834 if (ring_cnt > netisr_ncpus) 3835 ring_cnt = netisr_ncpus; 3836 return (ring_cnt); 3837 } 3838 3839 static void 3840 if_ringmap_set_grid(device_t dev, struct if_ringmap *rm, int grid) 3841 { 3842 int i, offset; 3843 3844 KASSERT(grid > 0, ("invalid if_ringmap grid %d", grid)); 3845 KASSERT(grid >= rm->rm_cnt, ("invalid if_ringmap grid %d, count %d", 3846 grid, rm->rm_cnt)); 3847 rm->rm_grid = grid; 3848 3849 offset = (rm->rm_grid * device_get_unit(dev)) % netisr_ncpus; 3850 for (i = 0; i < rm->rm_cnt; ++i) { 3851 rm->rm_cpumap[i] = offset + i; 3852 KASSERT(rm->rm_cpumap[i] < netisr_ncpus, 3853 ("invalid cpumap[%d] = %d, offset %d", i, 3854 rm->rm_cpumap[i], offset)); 3855 } 3856 } 3857 3858 static struct if_ringmap * 3859 if_ringmap_alloc_flags(device_t dev, int ring_cnt, int ring_cntmax, 3860 uint32_t flags) 3861 { 3862 struct if_ringmap *rm; 3863 int i, grid = 0, prev_grid; 3864 3865 ring_cnt = if_ringcnt_fixup(ring_cnt, ring_cntmax); 3866 rm = kmalloc(__offsetof(struct if_ringmap, rm_cpumap[ring_cnt]), 3867 M_DEVBUF, M_WAITOK | M_ZERO); 3868 3869 rm->rm_cnt = ring_cnt; 3870 if (flags & RINGMAP_FLAG_POWEROF2) 3871 rm->rm_cnt = 1 << (fls(rm->rm_cnt) - 1); 3872 3873 prev_grid = netisr_ncpus; 3874 for (i = 0; i < netisr_ncpus; ++i) { 3875 if (netisr_ncpus % (i + 1) != 0) 3876 continue; 3877 3878 grid = netisr_ncpus / (i + 1); 3879 if (rm->rm_cnt > grid) { 3880 grid = prev_grid; 3881 break; 3882 } 3883 3884 if (rm->rm_cnt > netisr_ncpus / (i + 2)) 3885 break; 3886 prev_grid = grid; 3887 } 3888 if_ringmap_set_grid(dev, rm, grid); 3889 3890 return (rm); 3891 } 3892 3893 struct if_ringmap * 3894 if_ringmap_alloc(device_t dev, int ring_cnt, int ring_cntmax) 3895 { 3896 3897 return (if_ringmap_alloc_flags(dev, ring_cnt, ring_cntmax, 3898 RINGMAP_FLAG_NONE)); 3899 } 3900 3901 struct if_ringmap * 3902 if_ringmap_alloc2(device_t dev, int ring_cnt, int ring_cntmax) 3903 { 3904 3905 return (if_ringmap_alloc_flags(dev, ring_cnt, ring_cntmax, 3906 RINGMAP_FLAG_POWEROF2)); 3907 } 3908 3909 void 3910 if_ringmap_free(struct if_ringmap *rm) 3911 { 3912 3913 kfree(rm, M_DEVBUF); 3914 } 3915 3916 /* 3917 * Align the two ringmaps. 3918 * 3919 * e.g. 8 netisrs, rm0 contains 4 rings, rm1 contains 2 rings. 3920 * 3921 * Before: 3922 * 3923 * CPU 0 1 2 3 4 5 6 7 3924 * NIC_RX n0 n1 n2 n3 3925 * NIC_TX N0 N1 3926 * 3927 * After: 3928 * 3929 * CPU 0 1 2 3 4 5 6 7 3930 * NIC_RX n0 n1 n2 n3 3931 * NIC_TX N0 N1 3932 */ 3933 void 3934 if_ringmap_align(device_t dev, struct if_ringmap *rm0, struct if_ringmap *rm1) 3935 { 3936 3937 if (rm0->rm_grid > rm1->rm_grid) 3938 if_ringmap_set_grid(dev, rm1, rm0->rm_grid); 3939 else if (rm0->rm_grid < rm1->rm_grid) 3940 if_ringmap_set_grid(dev, rm0, rm1->rm_grid); 3941 } 3942 3943 void 3944 if_ringmap_match(device_t dev, struct if_ringmap *rm0, struct if_ringmap *rm1) 3945 { 3946 int subset_grid, cnt, divisor, mod, offset, i; 3947 struct if_ringmap *subset_rm, *rm; 3948 int old_rm0_grid, old_rm1_grid; 3949 3950 if (rm0->rm_grid == rm1->rm_grid) 3951 return; 3952 3953 /* Save grid for later use */ 3954 old_rm0_grid = rm0->rm_grid; 3955 old_rm1_grid = rm1->rm_grid; 3956 3957 if_ringmap_align(dev, rm0, rm1); 3958 3959 /* 3960 * Re-shuffle rings to get more even distribution. 3961 * 3962 * e.g. 12 netisrs, rm0 contains 4 rings, rm1 contains 2 rings. 3963 * 3964 * CPU 0 1 2 3 4 5 6 7 8 9 10 11 3965 * 3966 * NIC_RX a0 a1 a2 a3 b0 b1 b2 b3 c0 c1 c2 c3 3967 * NIC_TX A0 A1 B0 B1 C0 C1 3968 * 3969 * NIC_RX d0 d1 d2 d3 e0 e1 e2 e3 f0 f1 f2 f3 3970 * NIC_TX D0 D1 E0 E1 F0 F1 3971 */ 3972 3973 if (rm0->rm_cnt >= (2 * old_rm1_grid)) { 3974 cnt = rm0->rm_cnt; 3975 subset_grid = old_rm1_grid; 3976 subset_rm = rm1; 3977 rm = rm0; 3978 } else if (rm1->rm_cnt > (2 * old_rm0_grid)) { 3979 cnt = rm1->rm_cnt; 3980 subset_grid = old_rm0_grid; 3981 subset_rm = rm0; 3982 rm = rm1; 3983 } else { 3984 /* No space to shuffle. */ 3985 return; 3986 } 3987 3988 mod = cnt / subset_grid; 3989 KKASSERT(mod >= 2); 3990 divisor = netisr_ncpus / rm->rm_grid; 3991 offset = ((device_get_unit(dev) / divisor) % mod) * subset_grid; 3992 3993 for (i = 0; i < subset_rm->rm_cnt; ++i) { 3994 subset_rm->rm_cpumap[i] += offset; 3995 KASSERT(subset_rm->rm_cpumap[i] < netisr_ncpus, 3996 ("match: invalid cpumap[%d] = %d, offset %d", 3997 i, subset_rm->rm_cpumap[i], offset)); 3998 } 3999 #ifdef INVARIANTS 4000 for (i = 0; i < subset_rm->rm_cnt; ++i) { 4001 int j; 4002 4003 for (j = 0; j < rm->rm_cnt; ++j) { 4004 if (rm->rm_cpumap[j] == subset_rm->rm_cpumap[i]) 4005 break; 4006 } 4007 KASSERT(j < rm->rm_cnt, 4008 ("subset cpumap[%d] = %d not found in superset", 4009 i, subset_rm->rm_cpumap[i])); 4010 } 4011 #endif 4012 } 4013 4014 int 4015 if_ringmap_count(const struct if_ringmap *rm) 4016 { 4017 4018 return (rm->rm_cnt); 4019 } 4020 4021 int 4022 if_ringmap_cpumap(const struct if_ringmap *rm, int ring) 4023 { 4024 4025 KASSERT(ring >= 0 && ring < rm->rm_cnt, ("invalid ring %d", ring)); 4026 return (rm->rm_cpumap[ring]); 4027 } 4028 4029 void 4030 if_ringmap_rdrtable(const struct if_ringmap *rm, int table[], int table_nent) 4031 { 4032 int i, grid_idx, grid_cnt, patch_off, patch_cnt, ncopy; 4033 4034 KASSERT(table_nent > 0 && (table_nent & NETISR_CPUMASK) == 0, 4035 ("invalid redirect table entries %d", table_nent)); 4036 4037 grid_idx = 0; 4038 for (i = 0; i < NETISR_CPUMAX; ++i) { 4039 table[i] = grid_idx++ % rm->rm_cnt; 4040 4041 if (grid_idx == rm->rm_grid) 4042 grid_idx = 0; 4043 } 4044 4045 /* 4046 * Make the ring distributed more evenly for the remainder 4047 * of each grid. 4048 * 4049 * e.g. 12 netisrs, rm contains 8 rings. 4050 * 4051 * Redirect table before: 4052 * 4053 * 0 1 2 3 4 5 6 7 0 1 2 3 0 1 2 3 4054 * 4 5 6 7 0 1 2 3 0 1 2 3 4 5 6 7 4055 * 0 1 2 3 0 1 2 3 4 5 6 7 0 1 2 3 4056 * .... 4057 * 4058 * Redirect table after being patched (pX, patched entries): 4059 * 4060 * 0 1 2 3 4 5 6 7 p0 p1 p2 p3 0 1 2 3 4061 * 4 5 6 7 p4 p5 p6 p7 0 1 2 3 4 5 6 7 4062 * p0 p1 p2 p3 0 1 2 3 4 5 6 7 p4 p5 p6 p7 4063 * .... 4064 */ 4065 patch_cnt = rm->rm_grid % rm->rm_cnt; 4066 if (patch_cnt == 0) 4067 goto done; 4068 patch_off = rm->rm_grid - (rm->rm_grid % rm->rm_cnt); 4069 4070 grid_cnt = roundup(NETISR_CPUMAX, rm->rm_grid) / rm->rm_grid; 4071 grid_idx = 0; 4072 for (i = 0; i < grid_cnt; ++i) { 4073 int j; 4074 4075 for (j = 0; j < patch_cnt; ++j) { 4076 int fix_idx; 4077 4078 fix_idx = (i * rm->rm_grid) + patch_off + j; 4079 if (fix_idx >= NETISR_CPUMAX) 4080 goto done; 4081 table[fix_idx] = grid_idx++ % rm->rm_cnt; 4082 } 4083 } 4084 done: 4085 /* 4086 * If the device supports larger redirect table, duplicate 4087 * the first NETISR_CPUMAX entries to the rest of the table, 4088 * so that it matches upper layer's expectation: 4089 * (hash & NETISR_CPUMASK) % netisr_ncpus 4090 */ 4091 ncopy = table_nent / NETISR_CPUMAX; 4092 for (i = 1; i < ncopy; ++i) { 4093 memcpy(&table[i * NETISR_CPUMAX], table, 4094 NETISR_CPUMAX * sizeof(table[0])); 4095 } 4096 if (if_ringmap_dumprdr) { 4097 for (i = 0; i < table_nent; ++i) { 4098 if (i != 0 && i % 16 == 0) 4099 kprintf("\n"); 4100 kprintf("%03d ", table[i]); 4101 } 4102 kprintf("\n"); 4103 } 4104 } 4105 4106 int 4107 if_ringmap_cpumap_sysctl(SYSCTL_HANDLER_ARGS) 4108 { 4109 struct if_ringmap *rm = arg1; 4110 int i, error = 0; 4111 4112 for (i = 0; i < rm->rm_cnt; ++i) { 4113 int cpu = rm->rm_cpumap[i]; 4114 4115 error = SYSCTL_OUT(req, &cpu, sizeof(cpu)); 4116 if (error) 4117 break; 4118 } 4119 return (error); 4120 } 4121