1 /* $OpenBSD: if_aggr.c,v 1.47 2024/12/18 01:56:05 dlg Exp $ */ 2 3 /* 4 * Copyright (c) 2019 The University of Queensland 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 /* 20 * This driver implements 802.1AX Link Aggregation (formerly 802.3ad) 21 * 22 * The specification describes systems with multiple ports that that 23 * can dynamically form aggregations. The relationships between ports 24 * and aggregations is such that arbitrary ports connected to ports 25 * on other systems may move between aggregations, and there can be 26 * as many aggregations as ports. An aggregation in this model is 27 * effectively an interface, and becomes the point that Ethernet traffic 28 * enters and leaves the system. The spec also contains a description 29 * of the Link Aggregation Control Protocol (LACP) for use on the wire, 30 * and how to process it and select ports and aggregations based on 31 * it. 32 * 33 * This driver implements a simplified or constrained model where each 34 * aggr(4) interface is effectively an independent system, and will 35 * only support one aggregation. This supports the use of the kernel 36 * interface as a static entity that is created and configured once, 37 * and has the link "come up" when that one aggregation is selected 38 * by the LACP protocol. 39 */ 40 41 /* 42 * This code was written by David Gwynne <dlg@uq.edu.au> as part 43 * of the Information Technology Infrastructure Group (ITIG) in the 44 * Faculty of Engineering, Architecture and Information Technology 45 * (EAIT). 46 */ 47 48 /* 49 * TODO: 50 * 51 * - add locking 52 * - figure out the Ready_N and Ready logic 53 */ 54 55 #include "bpfilter.h" 56 #include "kstat.h" 57 58 #include <sys/param.h> 59 #include <sys/kernel.h> 60 #include <sys/malloc.h> 61 #include <sys/mbuf.h> 62 #include <sys/queue.h> 63 #include <sys/socket.h> 64 #include <sys/sockio.h> 65 #include <sys/systm.h> 66 #include <sys/syslog.h> 67 #include <sys/rwlock.h> 68 #include <sys/percpu.h> 69 #include <sys/smr.h> 70 #include <sys/task.h> 71 #include <sys/kstat.h> 72 73 #include <net/if.h> 74 #include <net/if_dl.h> 75 #include <net/if_types.h> 76 77 #include <net/if_media.h> 78 79 #include <netinet/in.h> 80 #include <netinet/if_ether.h> 81 82 #include <crypto/siphash.h> /* if_trunk.h uses siphash bits */ 83 #include <net/if_trunk.h> 84 85 #if NBPFILTER > 0 86 #include <net/bpf.h> 87 #endif 88 89 /* 90 * Link Aggregation Control Protocol (LACP) 91 */ 92 93 struct ether_slowproto_hdr { 94 uint8_t sph_subtype; 95 uint8_t sph_version; 96 } __packed; 97 98 #define SLOWPROTOCOLS_SUBTYPE_LACP 1 99 #define SLOWPROTOCOLS_SUBTYPE_LACP_MARKER \ 100 2 101 102 #define LACP_VERSION 1 103 104 #define LACP_FAST_PERIODIC_TIME 1 105 #define LACP_SLOW_PERIODIC_TIME 30 106 #define LACP_TIMEOUT_FACTOR 3 107 #define LACP_AGGREGATION_WAIT_TIME 2 108 109 #define LACP_TX_MACHINE_RATE 3 /* per LACP_FAST_PERIODIC_TIME */ 110 111 #define LACP_ADDR_C_BRIDGE { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 } 112 #define LACP_ADDR_SLOW { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 } 113 #define LACP_ADDR_SLOW_E64 0x0180c2000002ULL 114 #define LACP_ADDR_NON_TPMR_BRIDGE { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03 } 115 116 struct lacp_tlv_hdr { 117 uint8_t lacp_tlv_type; 118 uint8_t lacp_tlv_length; 119 } __packed __aligned(2); 120 121 /* LACP TLV types */ 122 123 #define LACP_T_TERMINATOR 0x00 124 #define LACP_T_ACTOR 0x01 125 #define LACP_T_PARTNER 0x02 126 #define LACP_T_COLLECTOR 0x03 127 128 /* LACPv2 TLV types */ 129 130 #define LACP_T_PORT_ALGORITHM 0x04 131 #define LACP_T_PORT_CONVERSATION_ID_DIGEST \ 132 0x05 133 #define LACP_T_PORT_CONVERSATION_MASK 0x06 134 #define LACP_T_PORT_CONVERSATION_SERVICE_MAPPING \ 135 0x0a 136 137 struct lacp_sysid { 138 uint16_t lacp_sysid_priority; 139 uint8_t lacp_sysid_mac[ETHER_ADDR_LEN]; 140 } __packed __aligned(2); 141 142 struct lacp_portid { 143 uint16_t lacp_portid_priority; 144 uint16_t lacp_portid_number; 145 } __packed __aligned(2); 146 147 struct lacp_port_info { 148 struct lacp_sysid lacp_sysid; 149 uint16_t lacp_key; 150 struct lacp_portid lacp_portid; 151 uint8_t lacp_state; 152 uint8_t lacp_reserved[3]; 153 } __packed __aligned(2); 154 155 #define LACP_STATE_ACTIVITY (1 << 0) 156 #define LACP_STATE_TIMEOUT (1 << 1) 157 #define LACP_STATE_AGGREGATION (1 << 2) 158 #define LACP_STATE_SYNC (1 << 3) 159 #define LACP_STATE_COLLECTING (1 << 4) 160 #define LACP_STATE_DISTRIBUTING (1 << 5) 161 #define LACP_STATE_DEFAULTED (1 << 6) 162 #define LACP_STATE_EXPIRED (1 << 7) 163 164 struct lacp_collector_info { 165 uint16_t lacp_maxdelay; 166 uint8_t lacp_reserved[12]; 167 } __packed __aligned(2); 168 169 struct lacp_du { 170 struct ether_slowproto_hdr 171 lacp_du_sph; 172 struct lacp_tlv_hdr lacp_actor_info_tlv; 173 struct lacp_port_info lacp_actor_info; 174 struct lacp_tlv_hdr lacp_partner_info_tlv; 175 struct lacp_port_info lacp_partner_info; 176 struct lacp_tlv_hdr lacp_collector_info_tlv; 177 struct lacp_collector_info 178 lacp_collector_info; 179 /* other TLVs go here */ 180 struct lacp_tlv_hdr lacp_terminator; 181 uint8_t lacp_pad[50]; 182 } __packed __aligned(2); 183 184 /* Marker TLV types */ 185 186 #define MARKER_T_INFORMATION 0x01 187 #define MARKER_T_RESPONSE 0x02 188 189 struct marker_info { 190 uint16_t marker_requester_port; 191 uint8_t marker_requester_system[ETHER_ADDR_LEN]; 192 uint8_t marker_requester_txid[4]; 193 uint8_t marker_pad[2]; 194 } __packed __aligned(2); 195 196 struct marker_pdu { 197 struct ether_slowproto_hdr 198 marker_sph; 199 200 struct lacp_tlv_hdr marker_info_tlv; 201 struct marker_info marker_info; 202 struct lacp_tlv_hdr marker_terminator; 203 uint8_t marker_pad[90]; 204 } __packed __aligned(2); 205 206 enum lacp_rxm_state { 207 LACP_RXM_S_BEGIN = 0, 208 LACP_RXM_S_INITIALIZE, 209 LACP_RXM_S_PORT_DISABLED, 210 LACP_RXM_S_EXPIRED, 211 LACP_RXM_S_LACP_DISABLED, 212 LACP_RXM_S_DEFAULTED, 213 LACP_RXM_S_CURRENT, 214 }; 215 216 enum lacp_rxm_event { 217 LACP_RXM_E_BEGIN, 218 LACP_RXM_E_UCT, 219 LACP_RXM_E_PORT_MOVED, 220 LACP_RXM_E_NOT_PORT_MOVED, 221 LACP_RXM_E_PORT_ENABLED, 222 LACP_RXM_E_NOT_PORT_ENABLED, 223 LACP_RXM_E_LACP_ENABLED, 224 LACP_RXM_E_NOT_LACP_ENABLED, 225 LACP_RXM_E_LACPDU, /* CtrlMuxN:M_UNITDATA.indication(LACPDU) */ 226 LACP_RXM_E_TIMER_EXPIRED, /* current_while_timer expired */ 227 }; 228 229 enum lacp_mux_state { 230 LACP_MUX_S_BEGIN = 0, 231 LACP_MUX_S_DETACHED, 232 LACP_MUX_S_WAITING, 233 LACP_MUX_S_ATTACHED, 234 LACP_MUX_S_DISTRIBUTING, 235 LACP_MUX_S_COLLECTING, 236 }; 237 238 enum lacp_mux_event { 239 LACP_MUX_E_BEGIN, 240 LACP_MUX_E_SELECTED, 241 LACP_MUX_E_STANDBY, 242 LACP_MUX_E_UNSELECTED, 243 LACP_MUX_E_READY, 244 LACP_MUX_E_SYNC, 245 LACP_MUX_E_NOT_SYNC, 246 LACP_MUX_E_COLLECTING, 247 LACP_MUX_E_NOT_COLLECTING, 248 }; 249 250 /* 251 * LACP variables 252 */ 253 254 static const uint8_t lacp_address_slow[ETHER_ADDR_LEN] = LACP_ADDR_SLOW; 255 256 static const char *lacp_rxm_state_names[] = { 257 "BEGIN", 258 "INITIALIZE", 259 "PORT_DISABLED", 260 "EXPIRED", 261 "LACP_DISABLED", 262 "DEFAULTED", 263 "CURRENT", 264 }; 265 266 static const char *lacp_rxm_event_names[] = { 267 "BEGIN", 268 "UCT", 269 "port_moved", 270 "!port_moved", 271 "port_enabled", 272 "!port_enabled", 273 "LACP_Enabled", 274 "!LACP_Enabled", 275 "LACPDU", 276 "current_while_timer expired", 277 }; 278 279 static const char *lacp_mux_state_names[] = { 280 "BEGIN", 281 "DETACHED", 282 "WAITING", 283 "ATTACHED", 284 "DISTRIBUTING", 285 "COLLECTING", 286 }; 287 288 static const char *lacp_mux_event_names[] = { 289 "BEGIN", 290 "Selected == SELECTED", 291 "Selected == STANDBY", 292 "Selected == UNSELECTED", 293 "Ready", 294 "Partner.Sync", 295 "! Partner.Sync", 296 "Partner.Collecting", 297 "! Partner.Collecting", 298 }; 299 300 /* 301 * aggr interface 302 */ 303 304 #define AGGR_PORT_BITS 5 305 #define AGGR_FLOWID_SHIFT (16 - AGGR_PORT_BITS) 306 307 #define AGGR_MAX_PORTS (1 << AGGR_PORT_BITS) 308 #define AGGR_MAX_SLOW_PKTS 3 309 310 struct aggr_multiaddr { 311 TAILQ_ENTRY(aggr_multiaddr) 312 m_entry; 313 unsigned int m_refs; 314 uint8_t m_addrlo[ETHER_ADDR_LEN]; 315 uint8_t m_addrhi[ETHER_ADDR_LEN]; 316 struct sockaddr_storage m_addr; 317 }; 318 TAILQ_HEAD(aggr_multiaddrs, aggr_multiaddr); 319 320 struct aggr_softc; 321 322 enum aggr_port_selected { 323 AGGR_PORT_UNSELECTED, 324 AGGR_PORT_SELECTED, 325 AGGR_PORT_STANDBY, 326 }; 327 328 static const char *aggr_port_selected_names[] = { 329 "UNSELECTED", 330 "SELECTED", 331 "STANDBY", 332 }; 333 334 struct aggr_proto_count { 335 uint64_t c_pkts; 336 uint64_t c_bytes; 337 }; 338 339 #define AGGR_PROTO_TX_LACP 0 340 #define AGGR_PROTO_TX_MARKER 1 341 #define AGGR_PROTO_RX_LACP 2 342 #define AGGR_PROTO_RX_MARKER 3 343 344 #define AGGR_PROTO_COUNT 4 345 346 struct aggr_port { 347 struct ifnet *p_ifp0; 348 struct kstat *p_kstat; 349 struct mutex p_mtx; 350 351 uint8_t p_lladdr[ETHER_ADDR_LEN]; 352 uint32_t p_mtu; 353 354 int (*p_ioctl)(struct ifnet *, u_long, caddr_t); 355 void (*p_input)(struct ifnet *, struct mbuf *); 356 int (*p_output)(struct ifnet *, struct mbuf *, struct sockaddr *, 357 struct rtentry *); 358 359 struct task p_lhook; 360 struct task p_dhook; 361 362 struct aggr_softc *p_aggr; 363 TAILQ_ENTRY(aggr_port) p_entry; 364 365 unsigned int p_collecting; 366 unsigned int p_distributing; 367 TAILQ_ENTRY(aggr_port) p_entry_distributing; 368 TAILQ_ENTRY(aggr_port) p_entry_muxen; 369 370 /* Partner information */ 371 enum aggr_port_selected p_muxed; 372 enum aggr_port_selected p_selected; /* Selected */ 373 struct lacp_port_info p_partner; 374 #define p_partner_state p_partner.lacp_state 375 376 uint8_t p_actor_state; 377 uint8_t p_lacp_timeout; 378 379 struct timeout p_current_while_timer; 380 struct timeout p_wait_while_timer; 381 382 /* Receive machine */ 383 enum lacp_rxm_state p_rxm_state; 384 struct mbuf_list p_rxm_ml; 385 struct task p_rxm_task; 386 387 /* Periodic Transmission machine */ 388 struct timeout p_ptm_tx; 389 390 /* Mux machine */ 391 enum lacp_mux_state p_mux_state; 392 393 /* Transmit machine */ 394 int p_txm_log[LACP_TX_MACHINE_RATE]; 395 unsigned int p_txm_slot; 396 struct timeout p_txm_ntt; 397 398 /* Counters */ 399 struct aggr_proto_count p_proto_counts[AGGR_PROTO_COUNT]; 400 uint64_t p_rx_drops; 401 uint32_t p_nselectch; 402 }; 403 404 TAILQ_HEAD(aggr_port_list, aggr_port); 405 406 struct aggr_map { 407 struct ifnet *m_ifp0s[AGGR_MAX_PORTS]; 408 }; 409 410 struct aggr_softc { 411 struct arpcom sc_ac; 412 #define sc_if sc_ac.ac_if 413 unsigned int sc_dead; 414 unsigned int sc_promisc; 415 struct ifmedia sc_media; 416 417 struct aggr_multiaddrs sc_multiaddrs; 418 419 unsigned int sc_mix; 420 421 struct aggr_map sc_maps[2]; 422 unsigned int sc_map_gen; 423 struct aggr_map *sc_map; 424 425 struct rwlock sc_lock; 426 struct aggr_port_list sc_ports; 427 struct aggr_port_list sc_distributing; 428 struct aggr_port_list sc_muxen; 429 unsigned int sc_nports; 430 unsigned int sc_ndistributing; 431 432 struct timeout sc_tick; 433 434 uint8_t sc_lacp_mode; 435 #define AGGR_LACP_MODE_PASSIVE 0 436 #define AGGR_LACP_MODE_ACTIVE 1 437 uint8_t sc_lacp_timeout; 438 #define AGGR_LACP_TIMEOUT_SLOW 0 439 #define AGGR_LACP_TIMEOUT_FAST 1 440 uint16_t sc_lacp_prio; 441 uint16_t sc_lacp_port_prio; 442 443 struct lacp_sysid sc_partner_system; 444 uint16_t sc_partner_key; 445 }; 446 447 #define DPRINTF(_sc, fmt...) do { \ 448 if (ISSET((_sc)->sc_if.if_flags, IFF_DEBUG)) \ 449 printf(fmt); \ 450 } while (0) 451 452 static const unsigned int aggr_periodic_times[] = { 453 [AGGR_LACP_TIMEOUT_SLOW] = LACP_SLOW_PERIODIC_TIME, 454 [AGGR_LACP_TIMEOUT_FAST] = LACP_FAST_PERIODIC_TIME, 455 }; 456 457 static int aggr_clone_create(struct if_clone *, int); 458 static int aggr_clone_destroy(struct ifnet *); 459 460 static int aggr_ioctl(struct ifnet *, u_long, caddr_t); 461 static void aggr_start(struct ifqueue *); 462 static int aggr_enqueue(struct ifnet *, struct mbuf *); 463 464 static int aggr_media_change(struct ifnet *); 465 static void aggr_media_status(struct ifnet *, struct ifmediareq *); 466 467 static int aggr_up(struct aggr_softc *); 468 static int aggr_down(struct aggr_softc *); 469 static int aggr_iff(struct aggr_softc *); 470 471 static void aggr_p_linkch(void *); 472 static void aggr_p_detach(void *); 473 static int aggr_p_ioctl(struct ifnet *, u_long, caddr_t); 474 static int aggr_p_output(struct ifnet *, struct mbuf *, 475 struct sockaddr *, struct rtentry *); 476 477 static int aggr_get_trunk(struct aggr_softc *, struct trunk_reqall *); 478 static int aggr_set_options(struct aggr_softc *, 479 const struct trunk_opts *); 480 static int aggr_get_options(struct aggr_softc *, struct trunk_opts *); 481 static int aggr_set_lladdr(struct aggr_softc *, const struct ifreq *); 482 static int aggr_set_mtu(struct aggr_softc *, uint32_t); 483 static void aggr_p_dtor(struct aggr_softc *, struct aggr_port *, 484 const char *); 485 static int aggr_p_setlladdr(struct aggr_port *, const uint8_t *); 486 static int aggr_p_set_mtu(struct aggr_port *, uint32_t); 487 static int aggr_add_port(struct aggr_softc *, 488 const struct trunk_reqport *); 489 static int aggr_get_port(struct aggr_softc *, struct trunk_reqport *); 490 static int aggr_del_port(struct aggr_softc *, 491 const struct trunk_reqport *); 492 static int aggr_group(struct aggr_softc *, struct aggr_port *, u_long); 493 static int aggr_multi(struct aggr_softc *, struct aggr_port *, 494 const struct aggr_multiaddr *, u_long); 495 static void aggr_update_capabilities(struct aggr_softc *); 496 static void aggr_set_lacp_mode(struct aggr_softc *, int); 497 static void aggr_set_lacp_timeout(struct aggr_softc *, int); 498 static int aggr_multi_add(struct aggr_softc *, struct ifreq *); 499 static int aggr_multi_del(struct aggr_softc *, struct ifreq *); 500 501 static void aggr_map(struct aggr_softc *); 502 503 static void aggr_record_default(struct aggr_softc *, struct aggr_port *); 504 static void aggr_current_while_timer(void *); 505 static void aggr_wait_while_timer(void *); 506 static void aggr_rx(void *); 507 static void aggr_rxm_ev(struct aggr_softc *, struct aggr_port *, 508 enum lacp_rxm_event, const struct lacp_du *); 509 #define aggr_rxm(_sc, _p, _ev) \ 510 aggr_rxm_ev((_sc), (_p), (_ev), NULL) 511 #define aggr_rxm_lacpdu(_sc, _p, _lacpdu) \ 512 aggr_rxm_ev((_sc), (_p), LACP_RXM_E_LACPDU, (_lacpdu)) 513 514 static void aggr_mux(struct aggr_softc *, struct aggr_port *, 515 enum lacp_mux_event); 516 static int aggr_mux_ev(struct aggr_softc *, struct aggr_port *, 517 enum lacp_mux_event, int *); 518 519 static void aggr_set_partner_timeout(struct aggr_port *, int); 520 521 static void aggr_ptm_tx(void *); 522 523 static void aggr_transmit_machine(void *); 524 static void aggr_ntt(struct aggr_port *); 525 static void aggr_ntt_transmit(struct aggr_port *); 526 527 static void aggr_set_selected(struct aggr_port *, enum aggr_port_selected, 528 enum lacp_mux_event); 529 static void aggr_unselected(struct aggr_port *); 530 531 static void aggr_selection_logic(struct aggr_softc *, struct aggr_port *); 532 533 #if NKSTAT > 0 534 static void aggr_port_kstat_attach(struct aggr_port *); 535 static void aggr_port_kstat_detach(struct aggr_port *); 536 #endif 537 538 static struct if_clone aggr_cloner = 539 IF_CLONE_INITIALIZER("aggr", aggr_clone_create, aggr_clone_destroy); 540 541 void 542 aggrattach(int count) 543 { 544 if_clone_attach(&aggr_cloner); 545 } 546 547 static int 548 aggr_clone_create(struct if_clone *ifc, int unit) 549 { 550 struct aggr_softc *sc; 551 struct ifnet *ifp; 552 553 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL); 554 if (sc == NULL) 555 return (ENOMEM); 556 557 sc->sc_mix = arc4random(); 558 559 ifp = &sc->sc_if; 560 561 snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", 562 ifc->ifc_name, unit); 563 564 TAILQ_INIT(&sc->sc_multiaddrs); 565 rw_init(&sc->sc_lock, "aggrlk"); 566 TAILQ_INIT(&sc->sc_ports); 567 sc->sc_nports = 0; 568 TAILQ_INIT(&sc->sc_distributing); 569 sc->sc_ndistributing = 0; 570 TAILQ_INIT(&sc->sc_muxen); 571 572 sc->sc_map_gen = 0; 573 sc->sc_map = NULL; /* no links yet */ 574 575 sc->sc_lacp_mode = AGGR_LACP_MODE_ACTIVE; 576 sc->sc_lacp_timeout = AGGR_LACP_TIMEOUT_SLOW; 577 sc->sc_lacp_prio = 0x8000; /* medium */ 578 sc->sc_lacp_port_prio = 0x8000; /* medium */ 579 580 ifmedia_init(&sc->sc_media, 0, aggr_media_change, aggr_media_status); 581 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); 582 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); 583 584 ifp->if_softc = sc; 585 ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; 586 ifp->if_ioctl = aggr_ioctl; 587 ifp->if_qstart = aggr_start; 588 ifp->if_enqueue = aggr_enqueue; 589 ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX; 590 ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE; 591 ifp->if_link_state = LINK_STATE_DOWN; 592 ether_fakeaddr(ifp); 593 594 if_counters_alloc(ifp); 595 if_attach(ifp); 596 ether_ifattach(ifp); 597 598 ifp->if_llprio = IFQ_MAXPRIO; 599 600 return (0); 601 } 602 603 static int 604 aggr_clone_destroy(struct ifnet *ifp) 605 { 606 struct aggr_softc *sc = ifp->if_softc; 607 struct aggr_port *p; 608 609 NET_LOCK(); 610 sc->sc_dead = 1; 611 612 if (ISSET(ifp->if_flags, IFF_RUNNING)) 613 aggr_down(sc); 614 NET_UNLOCK(); 615 616 ether_ifdetach(ifp); 617 if_detach(ifp); 618 619 /* last ref, no need to lock. aggr_p_dtor locks anyway */ 620 NET_LOCK(); 621 while ((p = TAILQ_FIRST(&sc->sc_ports)) != NULL) 622 aggr_p_dtor(sc, p, "destroy"); 623 NET_UNLOCK(); 624 625 free(sc, M_DEVBUF, sizeof(*sc)); 626 627 return (0); 628 } 629 630 /* 631 * LACP_Enabled 632 */ 633 static inline int 634 aggr_lacp_enabled(struct aggr_softc *sc) 635 { 636 struct ifnet *ifp = &sc->sc_if; 637 return (ISSET(ifp->if_flags, IFF_RUNNING)); 638 } 639 640 /* 641 * port_enabled 642 */ 643 static int 644 aggr_port_enabled(struct aggr_port *p) 645 { 646 struct ifnet *ifp0 = p->p_ifp0; 647 648 if (!ISSET(ifp0->if_flags, IFF_RUNNING)) 649 return (0); 650 651 if (!LINK_STATE_IS_UP(ifp0->if_link_state)) 652 return (0); 653 654 return (1); 655 } 656 657 /* 658 * port_moved 659 * 660 * This variable is set to TRUE if the Receive machine for an Aggregation 661 * Port is in the PORT_DISABLED state, and the combination of 662 * Partner_Oper_System and Partner_Oper_Port_Number in use by that 663 * Aggregation Port has been received in an incoming LACPDU on a 664 * different Aggregation Port. This variable is set to FALSE once the 665 * INITIALIZE state of the Receive machine has set the Partner information 666 * for the Aggregation Port to administrative default values. 667 * 668 * Value: Boolean 669 */ 670 static int 671 aggr_port_moved(struct aggr_softc *sc, struct aggr_port *p) 672 { 673 return (0); 674 } 675 676 static void 677 aggr_transmit(struct aggr_softc *sc, const struct aggr_map *map, struct mbuf *m) 678 { 679 struct ifnet *ifp = &sc->sc_if; 680 struct ifnet *ifp0; 681 uint16_t flow = 0; 682 683 #if NBPFILTER > 0 684 { 685 caddr_t if_bpf = ifp->if_bpf; 686 if (if_bpf) 687 bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT); 688 } 689 #endif 690 691 if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID)) 692 flow = m->m_pkthdr.ph_flowid >> AGGR_FLOWID_SHIFT; 693 694 ifp0 = map->m_ifp0s[flow % AGGR_MAX_PORTS]; 695 696 if (if_enqueue(ifp0, m) != 0) 697 counters_inc(ifp->if_counters, ifc_oerrors); 698 } 699 700 static int 701 aggr_enqueue(struct ifnet *ifp, struct mbuf *m) 702 { 703 struct aggr_softc *sc; 704 const struct aggr_map *map; 705 int error = 0; 706 707 if (!ifq_is_priq(&ifp->if_snd)) 708 return (if_enqueue_ifq(ifp, m)); 709 710 sc = ifp->if_softc; 711 712 smr_read_enter(); 713 map = SMR_PTR_GET(&sc->sc_map); 714 if (__predict_false(map == NULL)) { 715 m_freem(m); 716 error = ENETDOWN; 717 } else { 718 counters_pkt(ifp->if_counters, 719 ifc_opackets, ifc_obytes, m->m_pkthdr.len); 720 aggr_transmit(sc, map, m); 721 } 722 smr_read_leave(); 723 724 return (error); 725 } 726 727 static void 728 aggr_start(struct ifqueue *ifq) 729 { 730 struct ifnet *ifp = ifq->ifq_if; 731 struct aggr_softc *sc = ifp->if_softc; 732 const struct aggr_map *map; 733 734 smr_read_enter(); 735 map = SMR_PTR_GET(&sc->sc_map); 736 if (__predict_false(map == NULL)) 737 ifq_purge(ifq); 738 else { 739 struct mbuf *m; 740 741 while ((m = ifq_dequeue(ifq)) != NULL) 742 aggr_transmit(sc, map, m); 743 } 744 smr_read_leave(); 745 } 746 747 static inline struct mbuf * 748 aggr_input_control(struct aggr_port *p, struct mbuf *m) 749 { 750 struct ether_header *eh; 751 int hlen = sizeof(*eh); 752 uint16_t etype; 753 uint64_t dst; 754 755 if (ISSET(m->m_flags, M_VLANTAG)) 756 return (m); 757 758 eh = mtod(m, struct ether_header *); 759 etype = eh->ether_type; 760 dst = ether_addr_to_e64((struct ether_addr *)eh->ether_dhost); 761 762 if (__predict_false(etype == htons(ETHERTYPE_SLOW) && 763 dst == LACP_ADDR_SLOW_E64)) { 764 unsigned int rx_proto = AGGR_PROTO_RX_LACP; 765 struct ether_slowproto_hdr *sph; 766 int drop = 0; 767 768 hlen += sizeof(*sph); 769 if (m->m_len < hlen) { 770 m = m_pullup(m, hlen); 771 if (m == NULL) { 772 /* short++ */ 773 return (NULL); 774 } 775 eh = mtod(m, struct ether_header *); 776 } 777 778 sph = (struct ether_slowproto_hdr *)(eh + 1); 779 switch (sph->sph_subtype) { 780 case SLOWPROTOCOLS_SUBTYPE_LACP_MARKER: 781 rx_proto = AGGR_PROTO_RX_MARKER; 782 /* FALLTHROUGH */ 783 case SLOWPROTOCOLS_SUBTYPE_LACP: 784 mtx_enter(&p->p_mtx); 785 p->p_proto_counts[rx_proto].c_pkts++; 786 p->p_proto_counts[rx_proto].c_bytes += m->m_pkthdr.len; 787 788 if (ml_len(&p->p_rxm_ml) < AGGR_MAX_SLOW_PKTS) 789 ml_enqueue(&p->p_rxm_ml, m); 790 else { 791 p->p_rx_drops++; 792 drop = 1; 793 } 794 mtx_leave(&p->p_mtx); 795 796 if (drop) 797 goto drop; 798 else 799 task_add(systq, &p->p_rxm_task); 800 return (NULL); 801 default: 802 break; 803 } 804 } else if (__predict_false(etype == htons(ETHERTYPE_LLDP) && 805 ETH64_IS_8021_RSVD(dst))) { 806 /* look at the last nibble of the 802.1 reserved address */ 807 switch (dst & 0xf) { 808 case 0x0: /* Nearest Customer Bridge */ 809 case 0x3: /* Non-TPMR Bridge */ 810 case 0xe: /* Nearest Bridge */ 811 p->p_input(p->p_ifp0, m); 812 return (NULL); 813 default: 814 break; 815 } 816 } 817 818 return (m); 819 820 drop: 821 m_freem(m); 822 return (NULL); 823 } 824 825 static void 826 aggr_input(struct ifnet *ifp0, struct mbuf *m) 827 { 828 struct arpcom *ac0 = (struct arpcom *)ifp0; 829 struct aggr_port *p = ac0->ac_trunkport; 830 struct aggr_softc *sc = p->p_aggr; 831 struct ifnet *ifp = &sc->sc_if; 832 833 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 834 goto drop; 835 836 m = aggr_input_control(p, m); 837 if (m == NULL) 838 return; 839 840 if (__predict_false(!p->p_collecting)) 841 goto drop; 842 843 if (!ISSET(m->m_pkthdr.csum_flags, M_FLOWID)) 844 m->m_pkthdr.ph_flowid = ifp0->if_index ^ sc->sc_mix; 845 846 if_vinput(ifp, m); 847 848 return; 849 850 drop: 851 m_freem(m); 852 } 853 854 static int 855 aggr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 856 { 857 struct aggr_softc *sc = ifp->if_softc; 858 struct ifreq *ifr = (struct ifreq *)data; 859 int error = 0; 860 861 if (sc->sc_dead) 862 return (ENXIO); 863 864 switch (cmd) { 865 case SIOCSIFADDR: 866 break; 867 868 case SIOCSIFFLAGS: 869 if (ISSET(ifp->if_flags, IFF_UP)) { 870 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 871 error = aggr_up(sc); 872 else 873 error = ENETRESET; 874 } else { 875 if (ISSET(ifp->if_flags, IFF_RUNNING)) 876 error = aggr_down(sc); 877 } 878 break; 879 880 case SIOCSIFLLADDR: 881 error = aggr_set_lladdr(sc, ifr); 882 break; 883 884 case SIOCSTRUNK: 885 error = suser(curproc); 886 if (error != 0) 887 break; 888 889 if (((struct trunk_reqall *)data)->ra_proto != 890 TRUNK_PROTO_LACP) { 891 error = EPROTONOSUPPORT; 892 break; 893 } 894 895 /* nop */ 896 break; 897 case SIOCGTRUNK: 898 error = aggr_get_trunk(sc, (struct trunk_reqall *)data); 899 break; 900 901 case SIOCSTRUNKOPTS: 902 error = suser(curproc); 903 if (error != 0) 904 break; 905 906 error = aggr_set_options(sc, (struct trunk_opts *)data); 907 break; 908 909 case SIOCGTRUNKOPTS: 910 error = aggr_get_options(sc, (struct trunk_opts *)data); 911 break; 912 913 case SIOCGTRUNKPORT: 914 error = aggr_get_port(sc, (struct trunk_reqport *)data); 915 break; 916 case SIOCSTRUNKPORT: 917 error = suser(curproc); 918 if (error != 0) 919 break; 920 921 error = aggr_add_port(sc, (struct trunk_reqport *)data); 922 break; 923 case SIOCSTRUNKDELPORT: 924 error = suser(curproc); 925 if (error != 0) 926 break; 927 928 error = aggr_del_port(sc, (struct trunk_reqport *)data); 929 break; 930 931 case SIOCSIFMTU: 932 error = aggr_set_mtu(sc, ifr->ifr_mtu); 933 break; 934 935 case SIOCADDMULTI: 936 error = aggr_multi_add(sc, ifr); 937 break; 938 case SIOCDELMULTI: 939 error = aggr_multi_del(sc, ifr); 940 break; 941 942 case SIOCSIFMEDIA: 943 error = EOPNOTSUPP; 944 break; 945 case SIOCGIFMEDIA: 946 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 947 break; 948 949 default: 950 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); 951 break; 952 } 953 954 if (error == ENETRESET) 955 error = aggr_iff(sc); 956 957 return (error); 958 } 959 960 static int 961 aggr_get_trunk(struct aggr_softc *sc, struct trunk_reqall *ra) 962 { 963 struct ifnet *ifp = &sc->sc_if; 964 struct trunk_reqport rp; 965 struct aggr_port *p; 966 size_t size = ra->ra_size; 967 caddr_t ubuf = (caddr_t)ra->ra_port; 968 struct lacp_opreq *req; 969 uint8_t state = 0; 970 int error = 0; 971 972 if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE) 973 SET(state, LACP_STATE_ACTIVITY); 974 if (sc->sc_lacp_timeout == AGGR_LACP_TIMEOUT_FAST) 975 SET(state, LACP_STATE_TIMEOUT); 976 977 ra->ra_proto = TRUNK_PROTO_LACP; 978 memset(&ra->ra_psc, 0, sizeof(ra->ra_psc)); 979 980 /* 981 * aggr(4) does not support Individual links so don't bother 982 * with portprio, portno, and state, as per the spec. 983 */ 984 985 req = &ra->ra_lacpreq; 986 req->actor_prio = sc->sc_lacp_prio; 987 CTASSERT(sizeof(req->actor_mac) == sizeof(sc->sc_ac.ac_enaddr)); 988 memcpy(req->actor_mac, &sc->sc_ac.ac_enaddr, sizeof(req->actor_mac)); 989 req->actor_key = ifp->if_index; 990 req->actor_state = state; 991 992 req->partner_prio = ntohs(sc->sc_partner_system.lacp_sysid_priority); 993 CTASSERT(sizeof(req->partner_mac) == 994 sizeof(sc->sc_partner_system.lacp_sysid_mac)); 995 memcpy(req->partner_mac, sc->sc_partner_system.lacp_sysid_mac, 996 sizeof(req->partner_mac)); 997 req->partner_key = ntohs(sc->sc_partner_key); 998 999 ra->ra_ports = sc->sc_nports; 1000 TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { 1001 struct ifnet *ifp0; 1002 struct lacp_opreq *opreq; 1003 1004 if (size < sizeof(rp)) 1005 break; 1006 1007 ifp0 = p->p_ifp0; 1008 1009 CTASSERT(sizeof(rp.rp_ifname) == sizeof(ifp->if_xname)); 1010 CTASSERT(sizeof(rp.rp_portname) == sizeof(ifp0->if_xname)); 1011 1012 memset(&rp, 0, sizeof(rp)); 1013 memcpy(rp.rp_ifname, ifp->if_xname, sizeof(rp.rp_ifname)); 1014 memcpy(rp.rp_portname, ifp0->if_xname, sizeof(rp.rp_portname)); 1015 1016 if (p->p_muxed) 1017 SET(rp.rp_flags, TRUNK_PORT_ACTIVE); 1018 if (p->p_collecting) 1019 SET(rp.rp_flags, TRUNK_PORT_COLLECTING); 1020 if (p->p_distributing) 1021 SET(rp.rp_flags, TRUNK_PORT_DISTRIBUTING); 1022 if (!aggr_port_enabled(p)) 1023 SET(rp.rp_flags, TRUNK_PORT_DISABLED); 1024 1025 opreq = &rp.rp_lacpreq; 1026 1027 opreq->actor_prio = sc->sc_lacp_prio; 1028 memcpy(opreq->actor_mac, &sc->sc_ac.ac_enaddr, 1029 sizeof(req->actor_mac)); 1030 opreq->actor_key = ifp->if_index; 1031 opreq->actor_portprio = sc->sc_lacp_port_prio; 1032 opreq->actor_portno = ifp0->if_index; 1033 opreq->actor_state = state | p->p_actor_state; 1034 1035 opreq->partner_prio = 1036 ntohs(p->p_partner.lacp_sysid.lacp_sysid_priority); 1037 CTASSERT(sizeof(opreq->partner_mac) == 1038 sizeof(p->p_partner.lacp_sysid.lacp_sysid_mac)); 1039 memcpy(opreq->partner_mac, 1040 p->p_partner.lacp_sysid.lacp_sysid_mac, 1041 sizeof(opreq->partner_mac)); 1042 opreq->partner_key = ntohs(p->p_partner.lacp_key); 1043 opreq->partner_portprio = 1044 ntohs(p->p_partner.lacp_portid.lacp_portid_priority); 1045 opreq->partner_portno = 1046 ntohs(p->p_partner.lacp_portid.lacp_portid_number); 1047 opreq->partner_state = p->p_partner_state; 1048 1049 error = copyout(&rp, ubuf, sizeof(rp)); 1050 if (error != 0) 1051 break; 1052 1053 ubuf += sizeof(rp); 1054 size -= sizeof(rp); 1055 } 1056 1057 return (error); 1058 } 1059 1060 static int 1061 aggr_get_options(struct aggr_softc *sc, struct trunk_opts *tro) 1062 { 1063 struct lacp_adminopts *opt = &tro->to_lacpopts; 1064 1065 if (tro->to_proto != TRUNK_PROTO_LACP) 1066 return (EPROTONOSUPPORT); 1067 1068 opt->lacp_mode = sc->sc_lacp_mode; 1069 opt->lacp_timeout = sc->sc_lacp_timeout; 1070 opt->lacp_prio = sc->sc_lacp_prio; 1071 opt->lacp_portprio = sc->sc_lacp_port_prio; 1072 opt->lacp_ifqprio = sc->sc_if.if_llprio; 1073 1074 return (0); 1075 } 1076 1077 static int 1078 aggr_set_options(struct aggr_softc *sc, const struct trunk_opts *tro) 1079 { 1080 const struct lacp_adminopts *opt = &tro->to_lacpopts; 1081 1082 if (tro->to_proto != TRUNK_PROTO_LACP) 1083 return (EPROTONOSUPPORT); 1084 1085 switch (tro->to_opts) { 1086 case TRUNK_OPT_LACP_MODE: 1087 switch (opt->lacp_mode) { 1088 case AGGR_LACP_MODE_PASSIVE: 1089 case AGGR_LACP_MODE_ACTIVE: 1090 break; 1091 default: 1092 return (EINVAL); 1093 } 1094 1095 aggr_set_lacp_mode(sc, opt->lacp_mode); 1096 break; 1097 1098 case TRUNK_OPT_LACP_TIMEOUT: 1099 if (opt->lacp_timeout >= nitems(aggr_periodic_times)) 1100 return (EINVAL); 1101 1102 aggr_set_lacp_timeout(sc, opt->lacp_timeout); 1103 break; 1104 1105 case TRUNK_OPT_LACP_SYS_PRIO: 1106 if (opt->lacp_prio == 0) 1107 return (EINVAL); 1108 1109 sc->sc_lacp_prio = opt->lacp_prio; 1110 break; 1111 1112 case TRUNK_OPT_LACP_PORT_PRIO: 1113 if (opt->lacp_portprio == 0) 1114 return (EINVAL); 1115 1116 sc->sc_lacp_port_prio = opt->lacp_portprio; 1117 break; 1118 1119 default: 1120 return (ENODEV); 1121 } 1122 1123 return (0); 1124 } 1125 1126 static int 1127 aggr_add_port(struct aggr_softc *sc, const struct trunk_reqport *rp) 1128 { 1129 struct ifnet *ifp = &sc->sc_if; 1130 struct ifnet *ifp0; 1131 struct arpcom *ac0; 1132 struct aggr_port *p; 1133 struct aggr_multiaddr *ma; 1134 int past = ticks - (hz * LACP_TIMEOUT_FACTOR); 1135 int i; 1136 int error; 1137 1138 NET_ASSERT_LOCKED(); 1139 if (sc->sc_nports > AGGR_MAX_PORTS) 1140 return (ENOSPC); 1141 1142 ifp0 = if_unit(rp->rp_portname); 1143 if (ifp0 == NULL) 1144 return (EINVAL); 1145 1146 if (ifp0->if_index == ifp->if_index) { 1147 error = EINVAL; 1148 goto put; 1149 } 1150 1151 if (ifp0->if_type != IFT_ETHER) { 1152 error = EPROTONOSUPPORT; 1153 goto put; 1154 } 1155 1156 error = ether_brport_isset(ifp0); 1157 if (error != 0) 1158 goto put; 1159 1160 if (ifp0->if_hardmtu < ifp->if_mtu) { 1161 error = ENOBUFS; 1162 goto put; 1163 } 1164 1165 ac0 = (struct arpcom *)ifp0; 1166 if (ac0->ac_trunkport != NULL) { 1167 error = EBUSY; 1168 goto put; 1169 } 1170 1171 /* let's try */ 1172 1173 p = malloc(sizeof(*p), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL); 1174 if (p == NULL) { 1175 error = ENOMEM; 1176 goto put; 1177 } 1178 1179 for (i = 0; i < nitems(p->p_txm_log); i++) 1180 p->p_txm_log[i] = past; 1181 1182 p->p_ifp0 = ifp0; 1183 p->p_aggr = sc; 1184 p->p_mtu = ifp0->if_mtu; 1185 mtx_init(&p->p_mtx, IPL_SOFTNET); 1186 1187 CTASSERT(sizeof(p->p_lladdr) == sizeof(ac0->ac_enaddr)); 1188 memcpy(p->p_lladdr, ac0->ac_enaddr, sizeof(p->p_lladdr)); 1189 p->p_ioctl = ifp0->if_ioctl; 1190 p->p_input = ifp0->if_input; 1191 p->p_output = ifp0->if_output; 1192 1193 error = aggr_group(sc, p, SIOCADDMULTI); 1194 if (error != 0) 1195 goto free; 1196 1197 error = aggr_p_setlladdr(p, sc->sc_ac.ac_enaddr); 1198 if (error != 0) 1199 goto ungroup; 1200 1201 error = aggr_p_set_mtu(p, ifp->if_mtu); 1202 if (error != 0) 1203 goto resetlladdr; 1204 1205 if (sc->sc_promisc) { 1206 error = ifpromisc(ifp0, 1); 1207 if (error != 0) 1208 goto unmtu; 1209 } 1210 1211 TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) { 1212 if (aggr_multi(sc, p, ma, SIOCADDMULTI) != 0) { 1213 log(LOG_WARNING, "%s %s: " 1214 "unable to add multicast address\n", 1215 ifp->if_xname, ifp0->if_xname); 1216 } 1217 } 1218 1219 task_set(&p->p_lhook, aggr_p_linkch, p); 1220 if_linkstatehook_add(ifp0, &p->p_lhook); 1221 1222 task_set(&p->p_dhook, aggr_p_detach, p); 1223 if_detachhook_add(ifp0, &p->p_dhook); 1224 1225 task_set(&p->p_rxm_task, aggr_rx, p); 1226 ml_init(&p->p_rxm_ml); 1227 1228 timeout_set_proc(&p->p_ptm_tx, aggr_ptm_tx, p); 1229 timeout_set_proc(&p->p_txm_ntt, aggr_transmit_machine, p); 1230 timeout_set_proc(&p->p_current_while_timer, 1231 aggr_current_while_timer, p); 1232 timeout_set_proc(&p->p_wait_while_timer, aggr_wait_while_timer, p); 1233 1234 p->p_muxed = 0; 1235 p->p_collecting = 0; 1236 p->p_distributing = 0; 1237 p->p_selected = AGGR_PORT_UNSELECTED; 1238 p->p_actor_state = LACP_STATE_AGGREGATION; 1239 1240 /* commit */ 1241 DPRINTF(sc, "%s %s trunkport: creating port\n", 1242 ifp->if_xname, ifp0->if_xname); 1243 1244 #if NKSTAT > 0 1245 aggr_port_kstat_attach(p); /* this prints warnings itself */ 1246 #endif 1247 1248 TAILQ_INSERT_TAIL(&sc->sc_ports, p, p_entry); 1249 sc->sc_nports++; 1250 1251 aggr_update_capabilities(sc); 1252 1253 /* 1254 * use (and modification) of ifp->if_input and ac->ac_trunkport 1255 * is protected by NET_LOCK. 1256 */ 1257 1258 ac0->ac_trunkport = p; 1259 1260 /* make sure p is visible before handlers can run */ 1261 membar_producer(); 1262 ifp0->if_ioctl = aggr_p_ioctl; 1263 ifp0->if_input = aggr_input; 1264 ifp0->if_output = aggr_p_output; 1265 1266 aggr_mux(sc, p, LACP_MUX_E_BEGIN); 1267 aggr_rxm(sc, p, LACP_RXM_E_BEGIN); 1268 aggr_p_linkch(p); 1269 1270 return (0); 1271 1272 unmtu: 1273 if (aggr_p_set_mtu(p, p->p_mtu) != 0) { 1274 log(LOG_WARNING, "%s add %s: unable to reset mtu %u\n", 1275 ifp->if_xname, ifp0->if_xname, p->p_mtu); 1276 } 1277 resetlladdr: 1278 if (aggr_p_setlladdr(p, p->p_lladdr) != 0) { 1279 log(LOG_WARNING, "%s add %s: unable to reset lladdr\n", 1280 ifp->if_xname, ifp0->if_xname); 1281 } 1282 ungroup: 1283 if (aggr_group(sc, p, SIOCDELMULTI) != 0) { 1284 log(LOG_WARNING, "%s add %s: " 1285 "unable to remove LACP group address\n", 1286 ifp->if_xname, ifp0->if_xname); 1287 } 1288 free: 1289 free(p, M_DEVBUF, sizeof(*p)); 1290 put: 1291 if_put(ifp0); 1292 return (error); 1293 } 1294 1295 static struct aggr_port * 1296 aggr_trunkport(struct aggr_softc *sc, const char *name) 1297 { 1298 struct aggr_port *p; 1299 1300 TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { 1301 if (strcmp(p->p_ifp0->if_xname, name) == 0) 1302 return (p); 1303 } 1304 1305 return (NULL); 1306 } 1307 1308 static int 1309 aggr_get_port(struct aggr_softc *sc, struct trunk_reqport *rp) 1310 { 1311 struct aggr_port *p; 1312 1313 NET_ASSERT_LOCKED(); 1314 p = aggr_trunkport(sc, rp->rp_portname); 1315 if (p == NULL) 1316 return (EINVAL); 1317 1318 /* XXX */ 1319 1320 return (0); 1321 } 1322 1323 static int 1324 aggr_del_port(struct aggr_softc *sc, const struct trunk_reqport *rp) 1325 { 1326 struct aggr_port *p; 1327 1328 NET_ASSERT_LOCKED(); 1329 p = aggr_trunkport(sc, rp->rp_portname); 1330 if (p == NULL) 1331 return (EINVAL); 1332 1333 aggr_p_dtor(sc, p, "del"); 1334 1335 return (0); 1336 } 1337 1338 static int 1339 aggr_p_setlladdr(struct aggr_port *p, const uint8_t *addr) 1340 { 1341 struct ifnet *ifp0 = p->p_ifp0; 1342 struct ifreq ifr; 1343 struct sockaddr *sa; 1344 int error; 1345 1346 memset(&ifr, 0, sizeof(ifr)); 1347 1348 CTASSERT(sizeof(ifr.ifr_name) == sizeof(ifp0->if_xname)); 1349 memcpy(ifr.ifr_name, ifp0->if_xname, sizeof(ifr.ifr_name)); 1350 1351 sa = &ifr.ifr_addr; 1352 1353 /* wtf is this? */ 1354 sa->sa_len = ETHER_ADDR_LEN; 1355 sa->sa_family = AF_LINK; 1356 CTASSERT(sizeof(sa->sa_data) >= ETHER_ADDR_LEN); 1357 memcpy(sa->sa_data, addr, ETHER_ADDR_LEN); 1358 1359 error = (*p->p_ioctl)(ifp0, SIOCSIFLLADDR, (caddr_t)&ifr); 1360 switch (error) { 1361 case ENOTTY: 1362 case 0: 1363 break; 1364 default: 1365 return (error); 1366 } 1367 1368 error = if_setlladdr(ifp0, addr); 1369 if (error != 0) 1370 return (error); 1371 1372 ifnewlladdr(ifp0); 1373 1374 return (0); 1375 } 1376 1377 static int 1378 aggr_p_set_mtu(struct aggr_port *p, uint32_t mtu) 1379 { 1380 struct ifnet *ifp0 = p->p_ifp0; 1381 struct ifreq ifr; 1382 1383 memset(&ifr, 0, sizeof(ifr)); 1384 1385 CTASSERT(sizeof(ifr.ifr_name) == sizeof(ifp0->if_xname)); 1386 memcpy(ifr.ifr_name, ifp0->if_xname, sizeof(ifr.ifr_name)); 1387 1388 ifr.ifr_mtu = mtu; 1389 1390 return ((*p->p_ioctl)(ifp0, SIOCSIFMTU, (caddr_t)&ifr)); 1391 } 1392 1393 static int 1394 aggr_p_ioctl(struct ifnet *ifp0, u_long cmd, caddr_t data) 1395 { 1396 struct arpcom *ac0 = (struct arpcom *)ifp0; 1397 struct aggr_port *p = ac0->ac_trunkport; 1398 struct ifreq *ifr = (struct ifreq *)data; 1399 int error = 0; 1400 1401 switch (cmd) { 1402 case SIOCGTRUNKPORT: { 1403 struct trunk_reqport *rp = (struct trunk_reqport *)data; 1404 struct aggr_softc *sc = p->p_aggr; 1405 struct ifnet *ifp = &sc->sc_if; 1406 1407 if (strncmp(rp->rp_ifname, rp->rp_portname, 1408 sizeof(rp->rp_ifname)) != 0) 1409 return (EINVAL); 1410 1411 CTASSERT(sizeof(rp->rp_ifname) == sizeof(ifp->if_xname)); 1412 memcpy(rp->rp_ifname, ifp->if_xname, sizeof(rp->rp_ifname)); 1413 break; 1414 } 1415 1416 case SIOCSIFMTU: 1417 if (ifr->ifr_mtu == ifp0->if_mtu) 1418 break; /* nop */ 1419 1420 /* FALLTHROUGH */ 1421 case SIOCSIFLLADDR: 1422 error = EBUSY; 1423 break; 1424 1425 case SIOCSIFFLAGS: 1426 if (!ISSET(ifp0->if_flags, IFF_UP) && 1427 ISSET(ifp0->if_flags, IFF_RUNNING)) { 1428 /* port is going down */ 1429 if (p->p_selected == AGGR_PORT_SELECTED) { 1430 aggr_unselected(p); 1431 aggr_ntt_transmit(p); /* XXX */ 1432 } 1433 } 1434 /* FALLTHROUGH */ 1435 default: 1436 error = (*p->p_ioctl)(ifp0, cmd, data); 1437 break; 1438 } 1439 1440 return (error); 1441 } 1442 1443 static int 1444 aggr_p_output(struct ifnet *ifp0, struct mbuf *m, struct sockaddr *dst, 1445 struct rtentry *rt) 1446 { 1447 struct arpcom *ac0 = (struct arpcom *)ifp0; 1448 struct aggr_port *p = ac0->ac_trunkport; 1449 1450 /* restrict transmission to bpf only */ 1451 if (m_tag_find(m, PACKET_TAG_DLT, NULL) == NULL) { 1452 m_freem(m); 1453 return (EBUSY); 1454 } 1455 1456 return ((*p->p_output)(ifp0, m, dst, rt)); 1457 } 1458 1459 static void 1460 aggr_p_dtor(struct aggr_softc *sc, struct aggr_port *p, const char *op) 1461 { 1462 struct ifnet *ifp = &sc->sc_if; 1463 struct ifnet *ifp0 = p->p_ifp0; 1464 struct arpcom *ac0 = (struct arpcom *)ifp0; 1465 struct aggr_multiaddr *ma; 1466 enum aggr_port_selected selected; 1467 int error; 1468 1469 DPRINTF(sc, "%s %s %s: destroying port\n", 1470 ifp->if_xname, ifp0->if_xname, op); 1471 1472 selected = p->p_selected; 1473 aggr_rxm(sc, p, LACP_RXM_E_NOT_PORT_ENABLED); 1474 aggr_unselected(p); 1475 if (aggr_port_enabled(p) && selected == AGGR_PORT_SELECTED) 1476 aggr_ntt_transmit(p); 1477 1478 timeout_del(&p->p_ptm_tx); 1479 timeout_del_barrier(&p->p_txm_ntt); /* XXX */ 1480 timeout_del(&p->p_current_while_timer); 1481 timeout_del(&p->p_wait_while_timer); 1482 1483 /* 1484 * use (and modification) of ifp->if_input and ac->ac_trunkport 1485 * is protected by NET_LOCK. 1486 */ 1487 1488 ac0->ac_trunkport = NULL; 1489 ifp0->if_input = p->p_input; 1490 ifp0->if_ioctl = p->p_ioctl; 1491 ifp0->if_output = p->p_output; 1492 1493 #if NKSTAT > 0 1494 aggr_port_kstat_detach(p); 1495 #endif 1496 1497 TAILQ_REMOVE(&sc->sc_ports, p, p_entry); 1498 sc->sc_nports--; 1499 1500 TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) { 1501 error = aggr_multi(sc, p, ma, SIOCDELMULTI); 1502 if (error != 0) { 1503 log(LOG_WARNING, "%s %s %s: " 1504 "unable to remove multicast address (%d)\n", 1505 ifp->if_xname, op, ifp0->if_xname, error); 1506 } 1507 } 1508 1509 if (sc->sc_promisc) { 1510 error = ifpromisc(ifp0, 0); 1511 if (error != 0) { 1512 log(LOG_WARNING, "%s %s %s: " 1513 "unable to disable promisc (%d)\n", 1514 ifp->if_xname, op, ifp0->if_xname, error); 1515 } 1516 } 1517 1518 error = aggr_p_set_mtu(p, p->p_mtu); 1519 if (error != 0) { 1520 log(LOG_WARNING, "%s %s %s: unable to restore mtu %u (%d)\n", 1521 ifp->if_xname, op, ifp0->if_xname, p->p_mtu, error); 1522 } 1523 1524 error = aggr_p_setlladdr(p, p->p_lladdr); 1525 if (error != 0) { 1526 log(LOG_WARNING, "%s %s %s: unable to restore lladdr (%d)\n", 1527 ifp->if_xname, op, ifp0->if_xname, error); 1528 } 1529 1530 error = aggr_group(sc, p, SIOCDELMULTI); 1531 if (error != 0) { 1532 log(LOG_WARNING, "%s %s %s: " 1533 "unable to remove LACP group address (%d)\n", 1534 ifp->if_xname, op, ifp0->if_xname, error); 1535 } 1536 1537 if_detachhook_del(ifp0, &p->p_dhook); 1538 if_linkstatehook_del(ifp0, &p->p_lhook); 1539 if_put(ifp0); 1540 free(p, M_DEVBUF, sizeof(*p)); 1541 1542 /* XXX this is a pretty ugly place to update this */ 1543 aggr_update_capabilities(sc); 1544 } 1545 1546 static void 1547 aggr_p_detach(void *arg) 1548 { 1549 struct aggr_port *p = arg; 1550 struct aggr_softc *sc = p->p_aggr; 1551 1552 aggr_p_dtor(sc, p, "detach"); 1553 1554 NET_ASSERT_LOCKED(); 1555 } 1556 1557 static void 1558 aggr_p_linkch(void *arg) 1559 { 1560 struct aggr_port *p = arg; 1561 struct aggr_softc *sc = p->p_aggr; 1562 1563 NET_ASSERT_LOCKED(); 1564 1565 if (aggr_port_enabled(p)) { 1566 aggr_rxm(sc, p, LACP_RXM_E_PORT_ENABLED); 1567 1568 if (aggr_lacp_enabled(sc)) { 1569 timeout_add_sec(&p->p_ptm_tx, 1570 aggr_periodic_times[AGGR_LACP_TIMEOUT_FAST]); 1571 } 1572 } else { 1573 aggr_rxm(sc, p, LACP_RXM_E_NOT_PORT_ENABLED); 1574 aggr_unselected(p); 1575 aggr_record_default(sc, p); 1576 timeout_del(&p->p_ptm_tx); 1577 } 1578 } 1579 1580 static void 1581 aggr_map(struct aggr_softc *sc) 1582 { 1583 struct ifnet *ifp = &sc->sc_if; 1584 struct aggr_map *map = NULL; 1585 struct aggr_port *p; 1586 unsigned int gen; 1587 unsigned int i; 1588 int link_state = LINK_STATE_DOWN; 1589 1590 p = TAILQ_FIRST(&sc->sc_distributing); 1591 if (p != NULL) { 1592 gen = sc->sc_map_gen++; 1593 map = &sc->sc_maps[gen % nitems(sc->sc_maps)]; 1594 1595 for (i = 0; i < nitems(map->m_ifp0s); i++) { 1596 map->m_ifp0s[i] = p->p_ifp0; 1597 1598 p = TAILQ_NEXT(p, p_entry_distributing); 1599 if (p == NULL) 1600 p = TAILQ_FIRST(&sc->sc_distributing); 1601 } 1602 1603 link_state = LINK_STATE_FULL_DUPLEX; 1604 } 1605 1606 SMR_PTR_SET_LOCKED(&sc->sc_map, map); 1607 smr_barrier(); 1608 1609 if (ifp->if_link_state != link_state) { 1610 ifp->if_link_state = link_state; 1611 if_link_state_change(ifp); 1612 } 1613 } 1614 1615 static void 1616 aggr_current_while_timer(void *arg) 1617 { 1618 struct aggr_port *p = arg; 1619 struct aggr_softc *sc = p->p_aggr; 1620 1621 aggr_rxm(sc, p, LACP_RXM_E_TIMER_EXPIRED); 1622 } 1623 1624 static void 1625 aggr_wait_while_timer(void *arg) 1626 { 1627 struct aggr_port *p = arg; 1628 struct aggr_softc *sc = p->p_aggr; 1629 1630 aggr_selection_logic(sc, p); 1631 } 1632 1633 static void 1634 aggr_start_current_while_timer(struct aggr_port *p, unsigned int t) 1635 { 1636 timeout_add_sec(&p->p_current_while_timer, 1637 aggr_periodic_times[t] * LACP_TIMEOUT_FACTOR); 1638 } 1639 1640 static void 1641 aggr_input_lacpdu(struct aggr_port *p, struct mbuf *m) 1642 { 1643 struct aggr_softc *sc = p->p_aggr; 1644 struct lacp_du *lacpdu; 1645 1646 if (m->m_len < sizeof(*lacpdu)) { 1647 m = m_pullup(m, sizeof(*lacpdu)); 1648 if (m == NULL) 1649 return; 1650 } 1651 1652 /* 1653 * In the process of executing the recordPDU function, a Receive 1654 * machine compliant to this standard shall not validate the 1655 * Version Number, TLV_type, or Reserved fields in received 1656 * LACPDUs. The same actions are taken regardless of the values 1657 * received in these fields. A Receive machine may validate 1658 * the Actor_Information_Length, Partner_Information_Length, 1659 * Collector_Information_Length, or Terminator_Length fields. 1660 */ 1661 1662 lacpdu = mtod(m, struct lacp_du *); 1663 aggr_rxm_lacpdu(sc, p, lacpdu); 1664 1665 m_freem(m); 1666 } 1667 1668 static void 1669 aggr_update_selected(struct aggr_softc *sc, struct aggr_port *p, 1670 const struct lacp_du *lacpdu) 1671 { 1672 const struct lacp_port_info *rpi = &lacpdu->lacp_actor_info; 1673 const struct lacp_port_info *lpi = &p->p_partner; 1674 1675 if ((rpi->lacp_portid.lacp_portid_number == 1676 lpi->lacp_portid.lacp_portid_number) && 1677 (rpi->lacp_portid.lacp_portid_priority == 1678 lpi->lacp_portid.lacp_portid_priority) && 1679 ETHER_IS_EQ(rpi->lacp_sysid.lacp_sysid_mac, 1680 lpi->lacp_sysid.lacp_sysid_mac) && 1681 (rpi->lacp_sysid.lacp_sysid_priority == 1682 lpi->lacp_sysid.lacp_sysid_priority) && 1683 (rpi->lacp_key == lpi->lacp_key) && 1684 (ISSET(rpi->lacp_state, LACP_STATE_AGGREGATION) == 1685 ISSET(lpi->lacp_state, LACP_STATE_AGGREGATION))) 1686 return; 1687 1688 aggr_unselected(p); 1689 } 1690 1691 static void 1692 aggr_record_default(struct aggr_softc *sc, struct aggr_port *p) 1693 { 1694 struct lacp_port_info *pi = &p->p_partner; 1695 1696 pi->lacp_sysid.lacp_sysid_priority = htons(0); 1697 memset(pi->lacp_sysid.lacp_sysid_mac, 0, 1698 sizeof(pi->lacp_sysid.lacp_sysid_mac)); 1699 1700 pi->lacp_key = htons(0); 1701 1702 pi->lacp_portid.lacp_portid_priority = htons(0); 1703 pi->lacp_portid.lacp_portid_number = htons(0); 1704 1705 SET(p->p_actor_state, LACP_STATE_DEFAULTED); 1706 1707 pi->lacp_state = LACP_STATE_AGGREGATION | LACP_STATE_SYNC; 1708 if (sc->sc_lacp_timeout == AGGR_LACP_TIMEOUT_FAST) 1709 SET(pi->lacp_state, LACP_STATE_TIMEOUT); 1710 if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE) 1711 SET(pi->lacp_state, LACP_STATE_ACTIVITY); 1712 1713 /* notify Mux */ 1714 aggr_mux(sc, p, LACP_MUX_E_NOT_COLLECTING); 1715 aggr_mux(sc, p, LACP_MUX_E_SYNC); 1716 } 1717 1718 static void 1719 aggr_update_default_selected(struct aggr_softc *sc, struct aggr_port *p) 1720 { 1721 const struct lacp_port_info *pi = &p->p_partner; 1722 1723 if ((pi->lacp_portid.lacp_portid_number == htons(0)) && 1724 (pi->lacp_portid.lacp_portid_priority == htons(0)) && 1725 ETHER_IS_ANYADDR(pi->lacp_sysid.lacp_sysid_mac) && 1726 (pi->lacp_sysid.lacp_sysid_priority == htons(0)) && 1727 (pi->lacp_key == htons(0)) && 1728 ISSET(pi->lacp_state, LACP_STATE_AGGREGATION)) 1729 return; 1730 1731 aggr_unselected(p); 1732 aggr_selection_logic(sc, p); /* restart */ 1733 } 1734 1735 static int 1736 aggr_update_ntt(struct aggr_port *p, const struct lacp_du *lacpdu) 1737 { 1738 struct aggr_softc *sc = p->p_aggr; 1739 struct arpcom *ac = &sc->sc_ac; 1740 struct ifnet *ifp = &ac->ac_if; 1741 struct ifnet *ifp0 = p->p_ifp0; 1742 const struct lacp_port_info *pi = &lacpdu->lacp_partner_info; 1743 uint8_t bits = LACP_STATE_ACTIVITY | LACP_STATE_TIMEOUT | 1744 LACP_STATE_SYNC | LACP_STATE_AGGREGATION; 1745 uint8_t state = p->p_actor_state; 1746 int sync = 0; 1747 1748 if (pi->lacp_portid.lacp_portid_number != htons(ifp0->if_index)) 1749 goto ntt; 1750 if (pi->lacp_portid.lacp_portid_priority != 1751 htons(sc->sc_lacp_port_prio)) 1752 goto ntt; 1753 if (!ETHER_IS_EQ(pi->lacp_sysid.lacp_sysid_mac, ac->ac_enaddr)) 1754 goto ntt; 1755 if (pi->lacp_sysid.lacp_sysid_priority != 1756 htons(sc->sc_lacp_prio)) 1757 goto ntt; 1758 if (pi->lacp_key != htons(ifp->if_index)) 1759 goto ntt; 1760 if (ISSET(pi->lacp_state, LACP_STATE_SYNC) != 1761 ISSET(state, LACP_STATE_SYNC)) 1762 goto ntt; 1763 sync = 1; 1764 1765 if (sc->sc_lacp_timeout == AGGR_LACP_TIMEOUT_FAST) 1766 SET(state, LACP_STATE_TIMEOUT); 1767 if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE) 1768 SET(state, LACP_STATE_ACTIVITY); 1769 1770 if (ISSET(pi->lacp_state, bits) != ISSET(state, bits)) 1771 goto ntt; 1772 1773 return (1); 1774 1775 ntt: 1776 aggr_ntt(p); 1777 1778 return (sync); 1779 } 1780 1781 static void 1782 aggr_recordpdu(struct aggr_port *p, const struct lacp_du *lacpdu, int sync) 1783 { 1784 struct aggr_softc *sc = p->p_aggr; 1785 const struct lacp_port_info *rpi = &lacpdu->lacp_actor_info; 1786 struct lacp_port_info *lpi = &p->p_partner; 1787 int active = ISSET(rpi->lacp_state, LACP_STATE_ACTIVITY) || 1788 (ISSET(p->p_actor_state, LACP_STATE_ACTIVITY) && 1789 ISSET(lacpdu->lacp_partner_info.lacp_state, LACP_STATE_ACTIVITY)); 1790 1791 lpi->lacp_portid.lacp_portid_number = 1792 rpi->lacp_portid.lacp_portid_number; 1793 lpi->lacp_portid.lacp_portid_priority = 1794 rpi->lacp_portid.lacp_portid_priority; 1795 memcpy(lpi->lacp_sysid.lacp_sysid_mac, 1796 rpi->lacp_sysid.lacp_sysid_mac, 1797 sizeof(lpi->lacp_sysid.lacp_sysid_mac)); 1798 lpi->lacp_sysid.lacp_sysid_priority = 1799 rpi->lacp_sysid.lacp_sysid_priority; 1800 lpi->lacp_key = rpi->lacp_key; 1801 lpi->lacp_state = rpi->lacp_state & ~LACP_STATE_SYNC; 1802 1803 CLR(p->p_actor_state, LACP_STATE_DEFAULTED); 1804 1805 if (active && ISSET(rpi->lacp_state, LACP_STATE_SYNC) && sync) { 1806 SET(p->p_partner_state, LACP_STATE_SYNC); 1807 aggr_mux(sc, p, LACP_MUX_E_SYNC); 1808 } else { 1809 CLR(p->p_partner_state, LACP_STATE_SYNC); 1810 aggr_mux(sc, p, LACP_MUX_E_NOT_SYNC); 1811 } 1812 } 1813 1814 static void 1815 aggr_marker_response(struct aggr_port *p, struct mbuf *m) 1816 { 1817 struct aggr_softc *sc = p->p_aggr; 1818 struct arpcom *ac = &sc->sc_ac; 1819 struct ifnet *ifp0 = p->p_ifp0; 1820 struct marker_pdu *mpdu; 1821 struct ether_header *eh; 1822 1823 mpdu = mtod(m, struct marker_pdu *); 1824 mpdu->marker_info_tlv.lacp_tlv_type = MARKER_T_RESPONSE; 1825 1826 m = m_prepend(m, sizeof(*eh), M_DONTWAIT); 1827 if (m == NULL) 1828 return; 1829 1830 eh = mtod(m, struct ether_header *); 1831 memcpy(eh->ether_dhost, lacp_address_slow, sizeof(eh->ether_dhost)); 1832 memcpy(eh->ether_shost, ac->ac_enaddr, sizeof(eh->ether_shost)); 1833 eh->ether_type = htons(ETHERTYPE_SLOW); 1834 1835 mtx_enter(&p->p_mtx); 1836 p->p_proto_counts[AGGR_PROTO_TX_MARKER].c_pkts++; 1837 p->p_proto_counts[AGGR_PROTO_TX_MARKER].c_bytes += m->m_pkthdr.len; 1838 mtx_leave(&p->p_mtx); 1839 1840 (void)if_enqueue(ifp0, m); 1841 } 1842 1843 static void 1844 aggr_input_marker(struct aggr_port *p, struct mbuf *m) 1845 { 1846 struct marker_pdu *mpdu; 1847 1848 if (m->m_len < sizeof(*mpdu)) { 1849 m = m_pullup(m, sizeof(*mpdu)); 1850 if (m == NULL) 1851 return; 1852 } 1853 1854 mpdu = mtod(m, struct marker_pdu *); 1855 switch (mpdu->marker_info_tlv.lacp_tlv_type) { 1856 case MARKER_T_INFORMATION: 1857 aggr_marker_response(p, m); 1858 break; 1859 default: 1860 m_freem(m); 1861 break; 1862 } 1863 } 1864 1865 static void 1866 aggr_rx(void *arg) 1867 { 1868 struct aggr_port *p = arg; 1869 struct mbuf_list ml; 1870 struct mbuf *m; 1871 1872 mtx_enter(&p->p_mtx); 1873 ml = p->p_rxm_ml; 1874 ml_init(&p->p_rxm_ml); 1875 mtx_leave(&p->p_mtx); 1876 1877 while ((m = ml_dequeue(&ml)) != NULL) { 1878 struct ether_slowproto_hdr *sph; 1879 1880 /* aggr_input has checked eh already */ 1881 m_adj(m, sizeof(struct ether_header)); 1882 1883 sph = mtod(m, struct ether_slowproto_hdr *); 1884 switch (sph->sph_subtype) { 1885 case SLOWPROTOCOLS_SUBTYPE_LACP: 1886 aggr_input_lacpdu(p, m); 1887 break; 1888 case SLOWPROTOCOLS_SUBTYPE_LACP_MARKER: 1889 aggr_input_marker(p, m); 1890 break; 1891 default: 1892 panic("unexpected slow protocol subtype"); 1893 /* NOTREACHED */ 1894 } 1895 } 1896 } 1897 1898 static void 1899 aggr_set_selected(struct aggr_port *p, enum aggr_port_selected s, 1900 enum lacp_mux_event ev) 1901 { 1902 struct aggr_softc *sc = p->p_aggr; 1903 1904 if (p->p_selected != s) { 1905 DPRINTF(sc, "%s %s: Selected %s -> %s\n", 1906 sc->sc_if.if_xname, p->p_ifp0->if_xname, 1907 aggr_port_selected_names[p->p_selected], 1908 aggr_port_selected_names[s]); 1909 1910 /* 1911 * setting p_selected doesn't need the mtx except to 1912 * coordinate with a kstat read. 1913 */ 1914 1915 mtx_enter(&p->p_mtx); 1916 p->p_selected = s; 1917 p->p_nselectch++; 1918 mtx_leave(&p->p_mtx); 1919 } 1920 aggr_mux(sc, p, ev); 1921 } 1922 1923 static void 1924 aggr_unselected(struct aggr_port *p) 1925 { 1926 aggr_set_selected(p, AGGR_PORT_UNSELECTED, LACP_MUX_E_UNSELECTED); 1927 } 1928 1929 static inline void 1930 aggr_selected(struct aggr_port *p) 1931 { 1932 aggr_set_selected(p, AGGR_PORT_SELECTED, LACP_MUX_E_SELECTED); 1933 } 1934 1935 #ifdef notyet 1936 static inline void 1937 aggr_standby(struct aggr_port *p) 1938 { 1939 aggr_set_selected(p, AGGR_PORT_STANDBY, LACP_MUX_E_STANDBY); 1940 } 1941 #endif 1942 1943 static void 1944 aggr_selection_logic(struct aggr_softc *sc, struct aggr_port *p) 1945 { 1946 const struct lacp_port_info *pi; 1947 struct arpcom *ac = &sc->sc_ac; 1948 struct ifnet *ifp = &ac->ac_if; 1949 const uint8_t *mac; 1950 1951 if (p->p_rxm_state != LACP_RXM_S_CURRENT) { 1952 DPRINTF(sc, "%s %s: selection logic: unselected (rxm !%s)\n", 1953 ifp->if_xname, p->p_ifp0->if_xname, 1954 lacp_rxm_state_names[LACP_RXM_S_CURRENT]); 1955 goto unselected; 1956 } 1957 1958 pi = &p->p_partner; 1959 if (pi->lacp_key == htons(0)) { 1960 DPRINTF(sc, "%s %s: selection logic: unselected " 1961 "(partner key == 0)\n", 1962 ifp->if_xname, p->p_ifp0->if_xname); 1963 goto unselected; 1964 } 1965 1966 /* 1967 * aggr(4) does not support individual interfaces 1968 */ 1969 if (!ISSET(pi->lacp_state, LACP_STATE_AGGREGATION)) { 1970 DPRINTF(sc, "%s %s: selection logic: unselected " 1971 "(partner state is Individual)\n", 1972 ifp->if_xname, p->p_ifp0->if_xname); 1973 goto unselected; 1974 } 1975 1976 /* 1977 * Any pair of Aggregation Ports that are members of the same 1978 * LAG, but are connected together by the same link, shall not 1979 * select the same Aggregator 1980 */ 1981 1982 mac = pi->lacp_sysid.lacp_sysid_mac; 1983 if (ETHER_IS_EQ(mac, ac->ac_enaddr) && 1984 pi->lacp_key == htons(ifp->if_index)) { 1985 DPRINTF(sc, "%s %s: selection logic: unselected " 1986 "(partner sysid !eq)\n", 1987 ifp->if_xname, p->p_ifp0->if_xname); 1988 goto unselected; 1989 } 1990 1991 if (!TAILQ_EMPTY(&sc->sc_muxen)) { 1992 /* an aggregation has already been selected */ 1993 if (!ETHER_IS_EQ(mac, sc->sc_partner_system.lacp_sysid_mac) || 1994 sc->sc_partner_key != pi->lacp_key) { 1995 DPRINTF(sc, "%s %s: selection logic: unselected " 1996 "(partner sysid != selection)\n", 1997 ifp->if_xname, p->p_ifp0->if_xname); 1998 goto unselected; 1999 } 2000 } 2001 2002 aggr_selected(p); 2003 return; 2004 2005 unselected: 2006 aggr_unselected(p); 2007 } 2008 2009 static void 2010 aggr_mux(struct aggr_softc *sc, struct aggr_port *p, enum lacp_mux_event ev) 2011 { 2012 int ntt = 0; 2013 2014 /* 2015 * the mux can move through multiple states based on a 2016 * single event, so loop until the event is completely consumed. 2017 * debounce NTT = TRUE through the multiple state transitions. 2018 */ 2019 2020 while (aggr_mux_ev(sc, p, ev, &ntt) != 0) 2021 ; 2022 2023 if (ntt) 2024 aggr_ntt(p); 2025 } 2026 2027 #ifdef notyet 2028 static int 2029 aggr_ready_n(struct aggr_port *p) 2030 { 2031 return (p->p_mux_state == LACP_MUX_S_WAITING && 2032 !timeout_pending(&p->p_wait_while_timer)); 2033 } 2034 #endif 2035 2036 static inline int 2037 aggr_ready(struct aggr_softc *sc) 2038 { 2039 return (1); 2040 } 2041 2042 static void 2043 aggr_disable_distributing(struct aggr_softc *sc, struct aggr_port *p) 2044 { 2045 if (!p->p_distributing) 2046 return; 2047 2048 sc->sc_ndistributing--; 2049 TAILQ_REMOVE(&sc->sc_distributing, p, p_entry_distributing); 2050 p->p_distributing = 0; 2051 2052 aggr_map(sc); 2053 2054 DPRINTF(sc, "%s %s: distributing disabled\n", 2055 sc->sc_if.if_xname, p->p_ifp0->if_xname); 2056 } 2057 2058 static void 2059 aggr_enable_distributing(struct aggr_softc *sc, struct aggr_port *p) 2060 { 2061 if (p->p_distributing) 2062 return; 2063 2064 /* check the LAG ID? */ 2065 2066 p->p_distributing = 1; 2067 TAILQ_INSERT_TAIL(&sc->sc_distributing, p, p_entry_distributing); 2068 sc->sc_ndistributing++; 2069 2070 aggr_map(sc); 2071 2072 DPRINTF(sc, "%s %s: distributing enabled\n", 2073 sc->sc_if.if_xname, p->p_ifp0->if_xname); 2074 } 2075 2076 static void 2077 aggr_disable_collecting(struct aggr_softc *sc, struct aggr_port *p) 2078 { 2079 if (!p->p_collecting) 2080 return; 2081 2082 p->p_collecting = 0; 2083 2084 DPRINTF(sc, "%s %s: collecting disabled\n", 2085 sc->sc_if.if_xname, p->p_ifp0->if_xname); 2086 } 2087 2088 static void 2089 aggr_enable_collecting(struct aggr_softc *sc, struct aggr_port *p) 2090 { 2091 if (p->p_collecting) 2092 return; 2093 2094 p->p_collecting = 1; 2095 2096 DPRINTF(sc, "%s %s: collecting enabled\n", 2097 sc->sc_if.if_xname, p->p_ifp0->if_xname); 2098 } 2099 2100 static void 2101 aggr_attach_mux(struct aggr_softc *sc, struct aggr_port *p) 2102 { 2103 const struct lacp_port_info *pi = &p->p_partner; 2104 2105 if (p->p_muxed) 2106 return; 2107 2108 p->p_muxed = 1; 2109 if (TAILQ_EMPTY(&sc->sc_muxen)) { 2110 KASSERT(sc->sc_partner_key == htons(0)); 2111 sc->sc_partner_system = pi->lacp_sysid; 2112 sc->sc_partner_key = pi->lacp_key; 2113 } 2114 2115 TAILQ_INSERT_TAIL(&sc->sc_muxen, p, p_entry_muxen); 2116 2117 DPRINTF(sc, "%s %s: mux attached\n", 2118 sc->sc_if.if_xname, p->p_ifp0->if_xname); 2119 } 2120 2121 static void 2122 aggr_detach_mux(struct aggr_softc *sc, struct aggr_port *p) 2123 { 2124 if (!p->p_muxed) 2125 return; 2126 2127 p->p_muxed = 0; 2128 2129 TAILQ_REMOVE(&sc->sc_muxen, p, p_entry_muxen); 2130 if (TAILQ_EMPTY(&sc->sc_muxen)) { 2131 memset(&sc->sc_partner_system.lacp_sysid_mac, 0, 2132 sizeof(sc->sc_partner_system.lacp_sysid_mac)); 2133 sc->sc_partner_system.lacp_sysid_priority = htons(0); 2134 sc->sc_partner_key = htons(0); 2135 } 2136 2137 DPRINTF(sc, "%s %s: mux detached\n", 2138 sc->sc_if.if_xname, p->p_ifp0->if_xname); 2139 } 2140 2141 static int 2142 aggr_mux_ev(struct aggr_softc *sc, struct aggr_port *p, enum lacp_mux_event ev, 2143 int *ntt) 2144 { 2145 enum lacp_mux_state nstate = LACP_MUX_S_DETACHED; 2146 2147 switch (p->p_mux_state) { 2148 case LACP_MUX_S_BEGIN: 2149 KASSERT(ev == LACP_MUX_E_BEGIN); 2150 nstate = LACP_MUX_S_DETACHED; 2151 break; 2152 case LACP_MUX_S_DETACHED: 2153 switch (ev) { 2154 case LACP_MUX_E_SELECTED: 2155 case LACP_MUX_E_STANDBY: 2156 nstate = LACP_MUX_S_WAITING; 2157 break; 2158 default: 2159 return (0); 2160 } 2161 break; 2162 case LACP_MUX_S_WAITING: 2163 switch (ev) { 2164 case LACP_MUX_E_UNSELECTED: 2165 nstate = LACP_MUX_S_DETACHED; 2166 break; 2167 case LACP_MUX_E_SELECTED: 2168 case LACP_MUX_E_READY: 2169 if (aggr_ready(sc) && 2170 p->p_selected == AGGR_PORT_SELECTED) { 2171 nstate = LACP_MUX_S_ATTACHED; 2172 break; 2173 } 2174 /* FALLTHROUGH */ 2175 default: 2176 return (0); 2177 } 2178 break; 2179 case LACP_MUX_S_ATTACHED: 2180 switch (ev) { 2181 case LACP_MUX_E_UNSELECTED: 2182 case LACP_MUX_E_STANDBY: 2183 nstate = LACP_MUX_S_DETACHED; 2184 break; 2185 case LACP_MUX_E_SELECTED: 2186 case LACP_MUX_E_SYNC: 2187 if (p->p_selected == AGGR_PORT_SELECTED && 2188 ISSET(p->p_partner_state, LACP_STATE_SYNC)) { 2189 nstate = LACP_MUX_S_COLLECTING; 2190 break; 2191 } 2192 /* FALLTHROUGH */ 2193 default: 2194 return (0); 2195 } 2196 break; 2197 case LACP_MUX_S_COLLECTING: 2198 switch (ev) { 2199 case LACP_MUX_E_UNSELECTED: 2200 case LACP_MUX_E_STANDBY: 2201 case LACP_MUX_E_NOT_SYNC: 2202 nstate = LACP_MUX_S_ATTACHED; 2203 break; 2204 case LACP_MUX_E_SELECTED: 2205 case LACP_MUX_E_SYNC: 2206 case LACP_MUX_E_COLLECTING: 2207 if (p->p_selected == AGGR_PORT_SELECTED && 2208 ISSET(p->p_partner_state, LACP_STATE_SYNC) && 2209 ISSET(p->p_partner_state, LACP_STATE_COLLECTING)) { 2210 nstate = LACP_MUX_S_DISTRIBUTING; 2211 break; 2212 } 2213 /* FALLTHROUGH */ 2214 default: 2215 return (0); 2216 } 2217 break; 2218 case LACP_MUX_S_DISTRIBUTING: 2219 switch (ev) { 2220 case LACP_MUX_E_UNSELECTED: 2221 case LACP_MUX_E_STANDBY: 2222 case LACP_MUX_E_NOT_SYNC: 2223 case LACP_MUX_E_NOT_COLLECTING: 2224 nstate = LACP_MUX_S_COLLECTING; 2225 break; 2226 default: 2227 return (0); 2228 } 2229 break; 2230 } 2231 2232 DPRINTF(sc, "%s %s mux: %s (%s) -> %s\n", 2233 sc->sc_if.if_xname, p->p_ifp0->if_xname, 2234 lacp_mux_state_names[p->p_mux_state], lacp_mux_event_names[ev], 2235 lacp_mux_state_names[nstate]); 2236 2237 /* act on the new state */ 2238 switch (nstate) { 2239 case LACP_MUX_S_BEGIN: 2240 panic("unexpected mux nstate BEGIN"); 2241 /* NOTREACHED */ 2242 case LACP_MUX_S_DETACHED: 2243 /* 2244 * Detach_Mux_From_Aggregator(); 2245 * Actor.Sync = FALSE; 2246 * Disable_Distributing(); 2247 * Actor.Distributing = FALSE; 2248 * Actor.Collecting = FALSE; 2249 * Disable_Collecting(); 2250 * NTT = TRUE; 2251 */ 2252 aggr_detach_mux(sc, p); 2253 CLR(p->p_actor_state, LACP_STATE_SYNC); 2254 aggr_disable_distributing(sc, p); 2255 CLR(p->p_actor_state, LACP_STATE_DISTRIBUTING); 2256 CLR(p->p_actor_state, LACP_STATE_COLLECTING); 2257 aggr_disable_collecting(sc, p); 2258 *ntt = 1; 2259 break; 2260 case LACP_MUX_S_WAITING: 2261 /* 2262 * Start wait_while_timer 2263 */ 2264 timeout_add_sec(&p->p_wait_while_timer, 2265 LACP_AGGREGATION_WAIT_TIME); 2266 break; 2267 case LACP_MUX_S_ATTACHED: 2268 /* 2269 * Attach_Mux_To_Aggregator(); 2270 * Actor.Sync = TRUE; 2271 * Actor.Collecting = FALSE; 2272 * Disable_Collecting(); 2273 * NTT = TRUE; 2274 */ 2275 aggr_attach_mux(sc, p); 2276 SET(p->p_actor_state, LACP_STATE_SYNC); 2277 CLR(p->p_actor_state, LACP_STATE_COLLECTING); 2278 aggr_disable_collecting(sc, p); 2279 *ntt = 1; 2280 break; 2281 2282 case LACP_MUX_S_COLLECTING: 2283 /* 2284 * Enable_Collecting(); 2285 * Actor.Collecting = TRUE; 2286 * Disable_Distributing(); 2287 * Actor.Distributing = FALSE; 2288 * NTT = TRUE; 2289 */ 2290 aggr_enable_collecting(sc, p); 2291 SET(p->p_actor_state, LACP_STATE_COLLECTING); 2292 aggr_disable_distributing(sc, p); 2293 CLR(p->p_actor_state, LACP_STATE_DISTRIBUTING); 2294 *ntt = 1; 2295 break; 2296 case LACP_MUX_S_DISTRIBUTING: 2297 /* 2298 * Actor.Distributing = TRUE; 2299 * Enable_Distributing(); 2300 */ 2301 SET(p->p_actor_state, LACP_STATE_DISTRIBUTING); 2302 aggr_enable_distributing(sc, p); 2303 break; 2304 } 2305 2306 p->p_mux_state = nstate; 2307 2308 return (1); 2309 } 2310 2311 static void 2312 aggr_rxm_ev(struct aggr_softc *sc, struct aggr_port *p, 2313 enum lacp_rxm_event ev, const struct lacp_du *lacpdu) 2314 { 2315 unsigned int port_disabled = 0; 2316 enum lacp_rxm_state nstate = LACP_RXM_S_BEGIN; 2317 2318 KASSERT((ev == LACP_RXM_E_LACPDU) == (lacpdu != NULL)); 2319 2320 /* global transitions */ 2321 2322 switch (ev) { 2323 case LACP_RXM_E_NOT_PORT_ENABLED: 2324 port_disabled = !aggr_port_moved(sc, p); 2325 break; 2326 case LACP_RXM_E_NOT_PORT_MOVED: 2327 port_disabled = !aggr_port_enabled(p); 2328 break; 2329 default: 2330 break; 2331 } 2332 2333 if (port_disabled) 2334 nstate = LACP_RXM_S_PORT_DISABLED; 2335 else switch (p->p_rxm_state) { /* local state transitions */ 2336 case LACP_RXM_S_BEGIN: 2337 KASSERT(ev == LACP_RXM_E_BEGIN); 2338 nstate = LACP_RXM_S_INITIALIZE; 2339 break; 2340 case LACP_RXM_S_INITIALIZE: 2341 /* this should only be handled via UCT in nstate handling */ 2342 panic("unexpected rxm state INITIALIZE"); 2343 2344 case LACP_RXM_S_PORT_DISABLED: 2345 switch (ev) { 2346 case LACP_RXM_E_PORT_MOVED: 2347 nstate = LACP_RXM_S_INITIALIZE; 2348 break; 2349 case LACP_RXM_E_PORT_ENABLED: 2350 nstate = aggr_lacp_enabled(sc) ? 2351 LACP_RXM_S_EXPIRED : LACP_RXM_S_LACP_DISABLED; 2352 break; 2353 case LACP_RXM_E_LACP_ENABLED: 2354 if (!aggr_port_enabled(p)) 2355 return; 2356 nstate = LACP_RXM_S_EXPIRED; 2357 break; 2358 case LACP_RXM_E_NOT_LACP_ENABLED: 2359 if (!aggr_port_enabled(p)) 2360 return; 2361 nstate = LACP_RXM_S_LACP_DISABLED; 2362 break; 2363 default: 2364 return; 2365 } 2366 break; 2367 case LACP_RXM_S_EXPIRED: 2368 switch (ev) { 2369 case LACP_RXM_E_LACPDU: 2370 nstate = LACP_RXM_S_CURRENT; 2371 break; 2372 case LACP_RXM_E_TIMER_EXPIRED: 2373 nstate = LACP_RXM_S_DEFAULTED; 2374 break; 2375 default: 2376 return; 2377 } 2378 break; 2379 case LACP_RXM_S_LACP_DISABLED: 2380 switch (ev) { 2381 case LACP_RXM_E_LACP_ENABLED: 2382 nstate = LACP_RXM_S_PORT_DISABLED; 2383 break; 2384 default: 2385 return; 2386 } 2387 break; 2388 case LACP_RXM_S_DEFAULTED: 2389 switch (ev) { 2390 case LACP_RXM_E_LACPDU: 2391 nstate = LACP_RXM_S_CURRENT; 2392 break; 2393 default: 2394 return; 2395 } 2396 break; 2397 case LACP_RXM_S_CURRENT: 2398 switch (ev) { 2399 case LACP_RXM_E_TIMER_EXPIRED: 2400 nstate = LACP_RXM_S_EXPIRED; 2401 break; 2402 case LACP_RXM_E_LACPDU: 2403 nstate = LACP_RXM_S_CURRENT; 2404 break; 2405 default: 2406 return; 2407 } 2408 break; 2409 } 2410 2411 uct: 2412 if (p->p_rxm_state != nstate) { 2413 DPRINTF(sc, "%s %s rxm: %s (%s) -> %s\n", 2414 sc->sc_if.if_xname, p->p_ifp0->if_xname, 2415 lacp_rxm_state_names[p->p_rxm_state], 2416 lacp_rxm_event_names[ev], 2417 lacp_rxm_state_names[nstate]); 2418 } 2419 2420 /* record the new state */ 2421 p->p_rxm_state = nstate; 2422 2423 /* act on the new state */ 2424 switch (nstate) { 2425 case LACP_RXM_S_BEGIN: 2426 panic("unexpected rxm nstate BEGIN"); 2427 /* NOTREACHED */ 2428 case LACP_RXM_S_INITIALIZE: 2429 /* 2430 * Selected = UNSELECTED; 2431 * recordDefault(); 2432 * Actor_Oper_Port_State.Expired = FALSE; 2433 * port_moved = FALSE; 2434 */ 2435 aggr_unselected(p); 2436 aggr_record_default(sc, p); 2437 CLR(p->p_actor_state, LACP_STATE_EXPIRED); 2438 2439 ev = LACP_RXM_E_UCT; 2440 nstate = LACP_RXM_S_PORT_DISABLED; 2441 goto uct; 2442 /* NOTREACHED */ 2443 case LACP_RXM_S_PORT_DISABLED: 2444 /* 2445 * Partner_Oper_Port_State.Synchronization = FALSE; 2446 */ 2447 CLR(p->p_partner_state, LACP_STATE_SYNC); 2448 aggr_mux(sc, p, LACP_MUX_E_NOT_SYNC); 2449 break; 2450 case LACP_RXM_S_EXPIRED: 2451 /* 2452 * Partner_Oper_Port_State.Synchronization = FALSE; 2453 * Partner_Oper_Port_State.LACP_Timeout = Short Timeout; 2454 * start current_while_timer(Short Timeout); 2455 * Actor_Oper_Port_State.Expired = TRUE; 2456 */ 2457 2458 CLR(p->p_partner_state, LACP_STATE_SYNC); 2459 aggr_mux(sc, p, LACP_MUX_E_NOT_SYNC); 2460 aggr_set_partner_timeout(p, AGGR_LACP_TIMEOUT_FAST); 2461 aggr_start_current_while_timer(p, AGGR_LACP_TIMEOUT_FAST); 2462 SET(p->p_actor_state, LACP_STATE_EXPIRED); 2463 2464 break; 2465 case LACP_RXM_S_LACP_DISABLED: 2466 /* 2467 * Selected = UNSELECTED; 2468 * recordDefault(); 2469 * Partner_Oper_Port_State.Aggregation = FALSE; 2470 * Actor_Oper_Port_State.Expired = FALSE; 2471 */ 2472 aggr_unselected(p); 2473 aggr_record_default(sc, p); 2474 CLR(p->p_partner_state, LACP_STATE_AGGREGATION); 2475 CLR(p->p_actor_state, LACP_STATE_EXPIRED); 2476 break; 2477 case LACP_RXM_S_DEFAULTED: 2478 /* 2479 * update_Default_Selected(); 2480 * recordDefault(); 2481 * Actor_Oper_Port_State.Expired = FALSE; 2482 */ 2483 aggr_update_default_selected(sc, p); 2484 aggr_record_default(sc, p); 2485 CLR(p->p_actor_state, LACP_STATE_EXPIRED); 2486 break; 2487 case LACP_RXM_S_CURRENT: { 2488 /* 2489 * update_Selected(); 2490 * update_NTT(); 2491 * if (Actor_System_LACP_Version >=2 ) recordVersionNumber(); 2492 * recordPDU(); 2493 * start current_while_timer( 2494 * Actor_Oper_Port_State.LACP_Timeout); 2495 * Actor_Oper_Port_State.Expired = FALSE; 2496 */ 2497 int sync; 2498 2499 aggr_update_selected(sc, p, lacpdu); 2500 sync = aggr_update_ntt(p, lacpdu); 2501 /* don't support v2 yet */ 2502 aggr_recordpdu(p, lacpdu, sync); 2503 aggr_start_current_while_timer(p, sc->sc_lacp_timeout); 2504 CLR(p->p_actor_state, LACP_STATE_EXPIRED); 2505 2506 if (p->p_selected == AGGR_PORT_UNSELECTED) 2507 aggr_selection_logic(sc, p); /* restart */ 2508 2509 } 2510 break; 2511 } 2512 } 2513 2514 static int 2515 aggr_up(struct aggr_softc *sc) 2516 { 2517 struct ifnet *ifp = &sc->sc_if; 2518 struct aggr_port *p; 2519 2520 NET_ASSERT_LOCKED(); 2521 KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING)); 2522 2523 SET(ifp->if_flags, IFF_RUNNING); /* LACP_Enabled = TRUE */ 2524 2525 TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { 2526 aggr_rxm(sc, p, LACP_RXM_E_LACP_ENABLED); 2527 aggr_p_linkch(p); 2528 } 2529 2530 /* start the Periodic Transmission machine */ 2531 if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE) { 2532 TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { 2533 if (!aggr_port_enabled(p)) 2534 continue; 2535 2536 timeout_add_sec(&p->p_ptm_tx, 2537 aggr_periodic_times[sc->sc_lacp_timeout]); 2538 } 2539 } 2540 2541 return (ENETRESET); 2542 } 2543 2544 static int 2545 aggr_iff(struct aggr_softc *sc) 2546 { 2547 struct ifnet *ifp = &sc->sc_if; 2548 unsigned int promisc = ISSET(ifp->if_flags, IFF_PROMISC); 2549 2550 NET_ASSERT_LOCKED(); 2551 2552 if (promisc != sc->sc_promisc) { 2553 struct aggr_port *p; 2554 2555 rw_enter_read(&sc->sc_lock); 2556 TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { 2557 struct ifnet *ifp0 = p->p_ifp0; 2558 if (ifpromisc(ifp0, promisc) != 0) { 2559 log(LOG_WARNING, "%s iff %s: " 2560 "unable to turn promisc %s\n", 2561 ifp->if_xname, ifp0->if_xname, 2562 promisc ? "on" : "off"); 2563 } 2564 } 2565 rw_exit_read(&sc->sc_lock); 2566 2567 sc->sc_promisc = promisc; 2568 } 2569 2570 return (0); 2571 } 2572 2573 static int 2574 aggr_down(struct aggr_softc *sc) 2575 { 2576 struct ifnet *ifp = &sc->sc_if; 2577 struct aggr_port *p; 2578 2579 NET_ASSERT_LOCKED(); 2580 CLR(ifp->if_flags, IFF_RUNNING); /* LACP_Enabled = FALSE */ 2581 2582 TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { 2583 aggr_rxm(sc, p, LACP_RXM_E_NOT_LACP_ENABLED); 2584 2585 /* stop the Periodic Transmission machine */ 2586 timeout_del(&p->p_ptm_tx); 2587 2588 /* stop the Mux machine */ 2589 aggr_mux(sc, p, LACP_MUX_E_UNSELECTED); 2590 2591 /* stop the Transmit machine */ 2592 timeout_del(&p->p_txm_ntt); 2593 } 2594 2595 KASSERT(TAILQ_EMPTY(&sc->sc_distributing)); 2596 KASSERT(sc->sc_ndistributing == 0); 2597 KASSERT(SMR_PTR_GET_LOCKED(&sc->sc_map) == NULL); 2598 2599 return (ENETRESET); 2600 } 2601 2602 static int 2603 aggr_set_lladdr(struct aggr_softc *sc, const struct ifreq *ifr) 2604 { 2605 struct ifnet *ifp = &sc->sc_if; 2606 struct aggr_port *p; 2607 const uint8_t *lladdr = ifr->ifr_addr.sa_data; 2608 2609 rw_enter_read(&sc->sc_lock); 2610 TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { 2611 if (aggr_p_setlladdr(p, lladdr) != 0) { 2612 struct ifnet *ifp0 = p->p_ifp0; 2613 log(LOG_WARNING, "%s setlladdr %s: " 2614 "unable to set lladdr\n", 2615 ifp->if_xname, ifp0->if_xname); 2616 } 2617 } 2618 rw_exit_read(&sc->sc_lock); 2619 2620 return (0); 2621 } 2622 2623 static int 2624 aggr_set_mtu(struct aggr_softc *sc, uint32_t mtu) 2625 { 2626 struct ifnet *ifp = &sc->sc_if; 2627 struct aggr_port *p; 2628 2629 if (mtu < ETHERMIN || mtu > ifp->if_hardmtu) 2630 return (EINVAL); 2631 2632 ifp->if_mtu = mtu; 2633 2634 TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { 2635 if (aggr_p_set_mtu(p, mtu) != 0) { 2636 struct ifnet *ifp0 = p->p_ifp0; 2637 log(LOG_WARNING, "%s %s: unable to set mtu %u\n", 2638 ifp->if_xname, ifp0->if_xname, mtu); 2639 } 2640 } 2641 2642 return (0); 2643 } 2644 2645 static int 2646 aggr_group(struct aggr_softc *sc, struct aggr_port *p, u_long cmd) 2647 { 2648 struct ifnet *ifp0 = p->p_ifp0; 2649 struct ifreq ifr; 2650 struct sockaddr *sa; 2651 2652 memset(&ifr, 0, sizeof(ifr)); 2653 2654 /* make it convincing */ 2655 CTASSERT(sizeof(ifr.ifr_name) == sizeof(ifp0->if_xname)); 2656 memcpy(ifr.ifr_name, ifp0->if_xname, sizeof(ifr.ifr_name)); 2657 2658 sa = &ifr.ifr_addr; 2659 CTASSERT(sizeof(sa->sa_data) >= sizeof(lacp_address_slow)); 2660 2661 sa->sa_family = AF_UNSPEC; 2662 memcpy(sa->sa_data, lacp_address_slow, sizeof(lacp_address_slow)); 2663 2664 return ((*p->p_ioctl)(ifp0, cmd, (caddr_t)&ifr)); 2665 } 2666 2667 static int 2668 aggr_multi(struct aggr_softc *sc, struct aggr_port *p, 2669 const struct aggr_multiaddr *ma, u_long cmd) 2670 { 2671 struct ifnet *ifp0 = p->p_ifp0; 2672 struct { 2673 char if_name[IFNAMSIZ]; 2674 struct sockaddr_storage if_addr; 2675 } ifr; 2676 2677 memset(&ifr, 0, sizeof(ifr)); 2678 2679 /* make it convincing */ 2680 CTASSERT(sizeof(ifr.if_name) == sizeof(ifp0->if_xname)); 2681 memcpy(ifr.if_name, ifp0->if_xname, sizeof(ifr.if_name)); 2682 2683 ifr.if_addr = ma->m_addr; 2684 2685 return ((*p->p_ioctl)(ifp0, cmd, (caddr_t)&ifr)); 2686 } 2687 2688 static void 2689 aggr_media_status(struct ifnet *ifp, struct ifmediareq *imr) 2690 { 2691 struct aggr_softc *sc = ifp->if_softc; 2692 2693 imr->ifm_status = IFM_AVALID; 2694 imr->ifm_active = IFM_ETHER | IFM_AUTO; 2695 2696 smr_read_enter(); /* there's no reason to block... */ 2697 if (SMR_PTR_GET(&sc->sc_map) != NULL) 2698 imr->ifm_status |= IFM_ACTIVE; 2699 smr_read_leave(); 2700 } 2701 2702 static int 2703 aggr_media_change(struct ifnet *ifp) 2704 { 2705 return (EOPNOTSUPP); 2706 } 2707 2708 static void 2709 aggr_update_capabilities(struct aggr_softc *sc) 2710 { 2711 struct aggr_port *p; 2712 uint32_t hardmtu = ETHER_MAX_HARDMTU_LEN; 2713 uint32_t capabilities = ~0; 2714 int set = 0; 2715 2716 /* Do not inherit LRO capabilities. */ 2717 CLR(capabilities, IFCAP_LRO); 2718 2719 rw_enter_read(&sc->sc_lock); 2720 TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { 2721 struct ifnet *ifp0 = p->p_ifp0; 2722 2723 set = 1; 2724 capabilities &= ifp0->if_capabilities; 2725 if (ifp0->if_hardmtu < hardmtu) 2726 hardmtu = ifp0->if_hardmtu; 2727 } 2728 rw_exit_read(&sc->sc_lock); 2729 2730 sc->sc_if.if_hardmtu = hardmtu; 2731 sc->sc_if.if_capabilities = (set ? capabilities : 0); 2732 } 2733 2734 static void 2735 aggr_ptm_tx(void *arg) 2736 { 2737 struct aggr_port *p = arg; 2738 unsigned int timeout; 2739 2740 aggr_ntt(p); 2741 2742 timeout = ISSET(p->p_partner_state, LACP_STATE_TIMEOUT) ? 2743 AGGR_LACP_TIMEOUT_FAST : AGGR_LACP_TIMEOUT_SLOW; 2744 timeout_add_sec(&p->p_ptm_tx, aggr_periodic_times[timeout]); 2745 } 2746 2747 static inline void 2748 aggr_lacp_tlv_set(struct lacp_tlv_hdr *tlv, uint8_t type, uint8_t len) 2749 { 2750 tlv->lacp_tlv_type = type; 2751 tlv->lacp_tlv_length = sizeof(*tlv) + len; 2752 } 2753 2754 static void 2755 aggr_ntt_transmit(struct aggr_port *p) 2756 { 2757 struct aggr_softc *sc = p->p_aggr; 2758 struct arpcom *ac = &sc->sc_ac; 2759 struct ifnet *ifp = &sc->sc_if; 2760 struct ifnet *ifp0 = p->p_ifp0; 2761 struct mbuf *m; 2762 struct lacp_du *lacpdu; 2763 struct lacp_port_info *pi; 2764 struct lacp_collector_info *ci; 2765 struct ether_header *eh; 2766 int linkhdr = max_linkhdr + ETHER_ALIGN; 2767 int len = linkhdr + sizeof(*eh) + sizeof(*lacpdu); 2768 2769 m = m_gethdr(M_DONTWAIT, MT_DATA); 2770 if (m == NULL) 2771 return; 2772 2773 if (len > MHLEN) { 2774 MCLGETL(m, M_DONTWAIT, len); 2775 if (!ISSET(m->m_flags, M_EXT)) { 2776 m_freem(m); 2777 return; 2778 } 2779 } 2780 2781 m->m_pkthdr.pf.prio = sc->sc_if.if_llprio; 2782 m->m_pkthdr.len = m->m_len = len; 2783 memset(m->m_data, 0, m->m_len); 2784 m_adj(m, linkhdr); 2785 2786 eh = mtod(m, struct ether_header *); 2787 2788 CTASSERT(sizeof(eh->ether_dhost) == sizeof(lacp_address_slow)); 2789 CTASSERT(sizeof(eh->ether_shost) == sizeof(ac->ac_enaddr)); 2790 2791 memcpy(eh->ether_dhost, lacp_address_slow, sizeof(eh->ether_dhost)); 2792 memcpy(eh->ether_shost, ac->ac_enaddr, sizeof(eh->ether_shost)); 2793 eh->ether_type = htons(ETHERTYPE_SLOW); 2794 2795 lacpdu = (struct lacp_du *)(eh + 1); 2796 lacpdu->lacp_du_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP; 2797 lacpdu->lacp_du_sph.sph_version = LACP_VERSION; 2798 2799 pi = &lacpdu->lacp_actor_info; 2800 aggr_lacp_tlv_set(&lacpdu->lacp_actor_info_tlv, 2801 LACP_T_ACTOR, sizeof(*pi)); 2802 2803 pi->lacp_sysid.lacp_sysid_priority = htons(sc->sc_lacp_prio); 2804 CTASSERT(sizeof(pi->lacp_sysid.lacp_sysid_mac) == 2805 sizeof(ac->ac_enaddr)); 2806 memcpy(pi->lacp_sysid.lacp_sysid_mac, ac->ac_enaddr, 2807 sizeof(pi->lacp_sysid.lacp_sysid_mac)); 2808 2809 pi->lacp_key = htons(ifp->if_index); 2810 2811 pi->lacp_portid.lacp_portid_priority = htons(sc->sc_lacp_port_prio); 2812 pi->lacp_portid.lacp_portid_number = htons(ifp0->if_index); 2813 2814 pi->lacp_state = p->p_actor_state; 2815 if (sc->sc_lacp_mode) 2816 SET(pi->lacp_state, LACP_STATE_ACTIVITY); 2817 if (sc->sc_lacp_timeout) 2818 SET(pi->lacp_state, LACP_STATE_TIMEOUT); 2819 2820 pi = &lacpdu->lacp_partner_info; 2821 aggr_lacp_tlv_set(&lacpdu->lacp_partner_info_tlv, 2822 LACP_T_PARTNER, sizeof(*pi)); 2823 2824 *pi = p->p_partner; 2825 2826 ci = &lacpdu->lacp_collector_info; 2827 aggr_lacp_tlv_set(&lacpdu->lacp_collector_info_tlv, 2828 LACP_T_COLLECTOR, sizeof(*ci)); 2829 ci->lacp_maxdelay = htons(0); 2830 2831 lacpdu->lacp_terminator.lacp_tlv_type = LACP_T_TERMINATOR; 2832 lacpdu->lacp_terminator.lacp_tlv_length = 0; 2833 2834 mtx_enter(&p->p_mtx); 2835 p->p_proto_counts[AGGR_PROTO_TX_LACP].c_pkts++; 2836 p->p_proto_counts[AGGR_PROTO_TX_LACP].c_bytes += m->m_pkthdr.len; 2837 mtx_leave(&p->p_mtx); 2838 2839 (void)if_enqueue(ifp0, m); 2840 } 2841 2842 static void 2843 aggr_ntt(struct aggr_port *p) 2844 { 2845 if (!timeout_pending(&p->p_txm_ntt)) 2846 timeout_add(&p->p_txm_ntt, 0); 2847 } 2848 2849 static void 2850 aggr_transmit_machine(void *arg) 2851 { 2852 struct aggr_port *p = arg; 2853 struct aggr_softc *sc = p->p_aggr; 2854 unsigned int slot; 2855 int *log; 2856 int period = hz * LACP_FAST_PERIODIC_TIME; 2857 int diff; 2858 2859 if (!aggr_lacp_enabled(sc) || !aggr_port_enabled(p)) 2860 return; 2861 2862 slot = p->p_txm_slot; 2863 log = &p->p_txm_log[slot % nitems(p->p_txm_log)]; 2864 2865 diff = ticks - *log; 2866 if (diff < period) { 2867 timeout_add(&p->p_txm_ntt, period - diff); 2868 return; 2869 } 2870 2871 *log = ticks; 2872 p->p_txm_slot = ++slot; 2873 2874 #if 0 2875 DPRINTF(sc, "%s %s ntt\n", sc->sc_if.if_xname, p->p_ifp0->if_xname); 2876 #endif 2877 2878 aggr_ntt_transmit(p); 2879 } 2880 2881 static void 2882 aggr_set_lacp_mode(struct aggr_softc *sc, int mode) 2883 { 2884 sc->sc_lacp_mode = mode; 2885 2886 if (mode == AGGR_LACP_MODE_PASSIVE) { 2887 struct aggr_port *p; 2888 2889 TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { 2890 if (!ISSET(p->p_partner_state, LACP_STATE_ACTIVITY)) 2891 timeout_del(&p->p_ptm_tx); 2892 } 2893 } 2894 } 2895 2896 static void 2897 aggr_set_partner_timeout(struct aggr_port *p, int timeout) 2898 { 2899 uint8_t ostate = ISSET(p->p_partner_state, LACP_STATE_TIMEOUT); 2900 uint8_t nstate = (timeout == AGGR_LACP_TIMEOUT_FAST) ? 2901 LACP_STATE_TIMEOUT : 0; 2902 2903 if (ostate == nstate) 2904 return; 2905 2906 if (timeout == AGGR_LACP_TIMEOUT_FAST) { 2907 SET(p->p_partner_state, LACP_STATE_TIMEOUT); 2908 timeout_add_sec(&p->p_ptm_tx, 2909 aggr_periodic_times[AGGR_LACP_TIMEOUT_FAST]); 2910 } else 2911 CLR(p->p_partner_state, LACP_STATE_TIMEOUT); 2912 } 2913 2914 static void 2915 aggr_set_lacp_timeout(struct aggr_softc *sc, int timeout) 2916 { 2917 struct aggr_port *p; 2918 2919 sc->sc_lacp_timeout = timeout; 2920 2921 TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { 2922 if (!ISSET(p->p_actor_state, LACP_STATE_DEFAULTED)) 2923 continue; 2924 2925 aggr_set_partner_timeout(p, timeout); 2926 } 2927 } 2928 2929 static int 2930 aggr_multi_eq(const struct aggr_multiaddr *ma, 2931 const uint8_t *addrlo, const uint8_t *addrhi) 2932 { 2933 return (ETHER_IS_EQ(ma->m_addrlo, addrlo) && 2934 ETHER_IS_EQ(ma->m_addrhi, addrhi)); 2935 } 2936 2937 static int 2938 aggr_multi_add(struct aggr_softc *sc, struct ifreq *ifr) 2939 { 2940 struct ifnet *ifp = &sc->sc_if; 2941 struct aggr_port *p; 2942 struct aggr_multiaddr *ma; 2943 uint8_t addrlo[ETHER_ADDR_LEN]; 2944 uint8_t addrhi[ETHER_ADDR_LEN]; 2945 int error; 2946 2947 error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi); 2948 if (error != 0) 2949 return (error); 2950 2951 TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) { 2952 if (aggr_multi_eq(ma, addrlo, addrhi)) { 2953 ma->m_refs++; 2954 return (0); 2955 } 2956 } 2957 2958 ma = malloc(sizeof(*ma), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); 2959 if (ma == NULL) 2960 return (ENOMEM); 2961 2962 ma->m_refs = 1; 2963 memcpy(&ma->m_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len); 2964 memcpy(ma->m_addrlo, addrlo, sizeof(ma->m_addrlo)); 2965 memcpy(ma->m_addrhi, addrhi, sizeof(ma->m_addrhi)); 2966 TAILQ_INSERT_TAIL(&sc->sc_multiaddrs, ma, m_entry); 2967 2968 TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { 2969 struct ifnet *ifp0 = p->p_ifp0; 2970 2971 if (aggr_multi(sc, p, ma, SIOCADDMULTI) != 0) { 2972 log(LOG_WARNING, "%s %s: " 2973 "unable to add multicast address\n", 2974 ifp->if_xname, ifp0->if_xname); 2975 } 2976 } 2977 2978 return (0); 2979 } 2980 2981 int 2982 aggr_multi_del(struct aggr_softc *sc, struct ifreq *ifr) 2983 { 2984 struct ifnet *ifp = &sc->sc_if; 2985 struct aggr_port *p; 2986 struct aggr_multiaddr *ma; 2987 uint8_t addrlo[ETHER_ADDR_LEN]; 2988 uint8_t addrhi[ETHER_ADDR_LEN]; 2989 int error; 2990 2991 error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi); 2992 if (error != 0) 2993 return (error); 2994 2995 TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) { 2996 if (aggr_multi_eq(ma, addrlo, addrhi)) 2997 break; 2998 } 2999 3000 if (ma == NULL) 3001 return (EINVAL); 3002 3003 if (--ma->m_refs > 0) 3004 return (0); 3005 3006 TAILQ_REMOVE(&sc->sc_multiaddrs, ma, m_entry); 3007 3008 TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { 3009 struct ifnet *ifp0 = p->p_ifp0; 3010 3011 if (aggr_multi(sc, p, ma, SIOCDELMULTI) != 0) { 3012 log(LOG_WARNING, "%s %s: " 3013 "unable to delete multicast address\n", 3014 ifp->if_xname, ifp0->if_xname); 3015 } 3016 } 3017 3018 free(ma, M_DEVBUF, sizeof(*ma)); 3019 3020 return (0); 3021 } 3022 3023 #if NKSTAT > 0 3024 static const char *aggr_proto_names[AGGR_PROTO_COUNT] = { 3025 [AGGR_PROTO_TX_LACP] = "tx-lacp", 3026 [AGGR_PROTO_TX_MARKER] = "tx-marker", 3027 [AGGR_PROTO_RX_LACP] = "rx-lacp", 3028 [AGGR_PROTO_RX_MARKER] = "rx-marker", 3029 }; 3030 3031 struct aggr_port_kstat { 3032 struct kstat_kv interface; 3033 3034 struct { 3035 struct kstat_kv pkts; 3036 struct kstat_kv bytes; 3037 } protos[AGGR_PROTO_COUNT]; 3038 3039 struct kstat_kv rx_drops; 3040 3041 struct kstat_kv selected; 3042 struct kstat_kv nselectch; 3043 }; 3044 3045 static int 3046 aggr_port_kstat_read(struct kstat *ks) 3047 { 3048 struct aggr_port *p = ks->ks_softc; 3049 struct aggr_port_kstat *pk = ks->ks_data; 3050 unsigned int proto; 3051 3052 mtx_enter(&p->p_mtx); 3053 for (proto = 0; proto < AGGR_PROTO_COUNT; proto++) { 3054 kstat_kv_u64(&pk->protos[proto].pkts) = 3055 p->p_proto_counts[proto].c_pkts; 3056 kstat_kv_u64(&pk->protos[proto].bytes) = 3057 p->p_proto_counts[proto].c_bytes; 3058 } 3059 kstat_kv_u64(&pk->rx_drops) = p->p_rx_drops; 3060 3061 kstat_kv_bool(&pk->selected) = p->p_selected == AGGR_PORT_SELECTED; 3062 kstat_kv_u32(&pk->nselectch) = p->p_nselectch; 3063 mtx_leave(&p->p_mtx); 3064 3065 nanouptime(&ks->ks_updated); 3066 3067 return (0); 3068 } 3069 3070 static void 3071 aggr_port_kstat_attach(struct aggr_port *p) 3072 { 3073 struct aggr_softc *sc = p->p_aggr; 3074 struct ifnet *ifp = &sc->sc_if; 3075 struct ifnet *ifp0 = p->p_ifp0; 3076 struct kstat *ks; 3077 struct aggr_port_kstat *pk; 3078 unsigned int proto; 3079 3080 pk = malloc(sizeof(*pk), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); 3081 if (pk == NULL) { 3082 log(LOG_WARNING, "%s %s: unable to allocate aggr-port kstat\n", 3083 ifp->if_xname, ifp0->if_xname); 3084 return; 3085 } 3086 3087 ks = kstat_create(ifp->if_xname, 0, "aggr-port", ifp0->if_index, 3088 KSTAT_T_KV, 0); 3089 if (ks == NULL) { 3090 log(LOG_WARNING, "%s %s: unable to create aggr-port kstat\n", 3091 ifp->if_xname, ifp0->if_xname); 3092 free(pk, M_DEVBUF, sizeof(*pk)); 3093 return; 3094 } 3095 3096 kstat_kv_init(&pk->interface, "interface", KSTAT_KV_T_ISTR); 3097 strlcpy(kstat_kv_istr(&pk->interface), ifp0->if_xname, 3098 sizeof(kstat_kv_istr(&pk->interface))); 3099 3100 for (proto = 0; proto < AGGR_PROTO_COUNT; proto++) { 3101 char kvname[KSTAT_KV_NAMELEN]; 3102 3103 snprintf(kvname, sizeof(kvname), 3104 "%s-pkts", aggr_proto_names[proto]); 3105 kstat_kv_unit_init(&pk->protos[proto].pkts, 3106 kvname, KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS); 3107 3108 snprintf(kvname, sizeof(kvname), 3109 "%s-bytes", aggr_proto_names[proto]); 3110 kstat_kv_unit_init(&pk->protos[proto].bytes, 3111 kvname, KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES); 3112 } 3113 3114 kstat_kv_unit_init(&pk->rx_drops, "rx-drops", 3115 KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS); 3116 3117 kstat_kv_init(&pk->selected, "selected", KSTAT_KV_T_BOOL); 3118 kstat_kv_init(&pk->nselectch, "select-changes", KSTAT_KV_T_COUNTER32); 3119 3120 ks->ks_softc = p; 3121 ks->ks_data = pk; 3122 ks->ks_datalen = sizeof(*pk); 3123 ks->ks_read = aggr_port_kstat_read; 3124 3125 kstat_install(ks); 3126 3127 p->p_kstat = ks; 3128 } 3129 3130 static void 3131 aggr_port_kstat_detach(struct aggr_port *p) 3132 { 3133 struct kstat *ks = p->p_kstat; 3134 struct aggr_port_kstat *pk; 3135 3136 if (ks == NULL) 3137 return; 3138 3139 p->p_kstat = NULL; 3140 3141 kstat_remove(ks); 3142 pk = ks->ks_data; 3143 kstat_destroy(ks); 3144 3145 free(pk, M_DEVBUF, sizeof(*pk)); 3146 } 3147 #endif 3148