xref: /openbsd-src/sys/net/if_aggr.c (revision 505ee9ea3b177e2387d907a91ca7da069f3f14d8)
1 /*	$OpenBSD: if_aggr.c,v 1.32 2020/07/10 13:26:41 patrick Exp $ */
2 
3 /*
4  * Copyright (c) 2019 The University of Queensland
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 /*
20  * This driver implements 802.1AX Link Aggregation (formerly 802.3ad)
21  *
22  * The specification describes systems with multiple ports that that
23  * can dynamically form aggregations. The relationships between ports
24  * and aggregations is such that arbitrary ports connected to ports
25  * on other systems may move between aggregations, and there can be
26  * as many aggregations as ports. An aggregation in this model is
27  * effectively an interface, and becomes the point that Ethernet traffic
28  * enters and leaves the system. The spec also contains a description
29  * of the Link Aggregation Control Protocol (LACP) for use on the wire,
30  * and how to process it and select ports and aggregations based on
31  * it.
32  *
33  * This driver implements a simplified or constrained model where each
34  * aggr(4) interface is effectively an independent system, and will
35  * only support one aggregation. This supports the use of the kernel
36  * interface as a static entity that is created and configured once,
37  * and has the link "come up" when that one aggregation is selected
38  * by the LACP protocol.
39  */
40 
41 /*
42  * This code was written by David Gwynne <dlg@uq.edu.au> as part
43  * of the Information Technology Infrastructure Group (ITIG) in the
44  * Faculty of Engineering, Architecture and Information Technology
45  * (EAIT).
46  */
47 
48 /*
49  * TODO:
50  *
51  * - add locking
52  * - figure out the Ready_N and Ready logic
53  */
54 
55 #include "bpfilter.h"
56 
57 #include <sys/param.h>
58 #include <sys/kernel.h>
59 #include <sys/malloc.h>
60 #include <sys/mbuf.h>
61 #include <sys/queue.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/systm.h>
65 #include <sys/syslog.h>
66 #include <sys/rwlock.h>
67 #include <sys/percpu.h>
68 #include <sys/smr.h>
69 #include <sys/task.h>
70 
71 #include <net/if.h>
72 #include <net/if_dl.h>
73 #include <net/if_types.h>
74 
75 #include <net/if_media.h>
76 
77 #include <netinet/in.h>
78 #include <netinet/if_ether.h>
79 
80 #include <crypto/siphash.h> /* if_trunk.h uses siphash bits */
81 #include <net/if_trunk.h>
82 
83 #if NBPFILTER > 0
84 #include <net/bpf.h>
85 #endif
86 
87 /*
88  * Link Aggregation Control Protocol (LACP)
89  */
90 
91 struct ether_slowproto_hdr {
92 	uint8_t		sph_subtype;
93 	uint8_t		sph_version;
94 } __packed;
95 
96 #define SLOWPROTOCOLS_SUBTYPE_LACP	1
97 #define SLOWPROTOCOLS_SUBTYPE_LACP_MARKER \
98 					2
99 
100 #define LACP_VERSION			1
101 
102 #define LACP_FAST_PERIODIC_TIME		1
103 #define LACP_SLOW_PERIODIC_TIME		30
104 #define LACP_TIMEOUT_FACTOR		3
105 #define LACP_AGGREGATION_WAIT_TIME	2
106 
107 #define LACP_TX_MACHINE_RATE		3 /* per LACP_FAST_PERIODIC_TIME */
108 
109 #define LACP_ADDR_C_BRIDGE		{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }
110 #define LACP_ADDR_SLOW			{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 }
111 #define LACP_ADDR_NON_TPMR_BRIDGE	{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03 }
112 
113 struct lacp_tlv_hdr {
114 	uint8_t			lacp_tlv_type;
115 	uint8_t			lacp_tlv_length;
116 } __packed __aligned(2);
117 
118 /* LACP TLV types */
119 
120 #define LACP_T_TERMINATOR		0x00
121 #define LACP_T_ACTOR			0x01
122 #define LACP_T_PARTNER			0x02
123 #define LACP_T_COLLECTOR		0x03
124 
125 /* LACPv2 TLV types */
126 
127 #define LACP_T_PORT_ALGORITHM		0x04
128 #define LACP_T_PORT_CONVERSATION_ID_DIGEST \
129 					0x05
130 #define LACP_T_PORT_CONVERSATION_MASK	0x06
131 #define LACP_T_PORT_CONVERSATION_SERVICE_MAPPING \
132 					0x0a
133 
134 struct lacp_sysid {
135 	uint16_t		lacp_sysid_priority;
136 	uint8_t			lacp_sysid_mac[ETHER_ADDR_LEN];
137 } __packed __aligned(2);
138 
139 struct lacp_portid {
140 	uint16_t		lacp_portid_priority;
141 	uint16_t		lacp_portid_number;
142 } __packed __aligned(2);
143 
144 struct lacp_port_info {
145 	struct lacp_sysid	lacp_sysid;
146 	uint16_t		lacp_key;
147 	struct lacp_portid	lacp_portid;
148 	uint8_t			lacp_state;
149 	uint8_t			lacp_reserved[3];
150 } __packed __aligned(2);
151 
152 #define LACP_STATE_ACTIVITY		(1 << 0)
153 #define LACP_STATE_TIMEOUT		(1 << 1)
154 #define LACP_STATE_AGGREGATION		(1 << 2)
155 #define LACP_STATE_SYNC			(1 << 3)
156 #define LACP_STATE_COLLECTING		(1 << 4)
157 #define LACP_STATE_DISTRIBUTING		(1 << 5)
158 #define LACP_STATE_DEFAULTED		(1 << 6)
159 #define LACP_STATE_EXPIRED		(1 << 7)
160 
161 struct lacp_collector_info {
162 	uint16_t		lacp_maxdelay;
163 	uint8_t			lacp_reserved[12];
164 } __packed __aligned(2);
165 
166 struct lacp_du {
167 	struct ether_slowproto_hdr
168 				lacp_du_sph;
169 	struct lacp_tlv_hdr	lacp_actor_info_tlv;
170 	struct lacp_port_info	lacp_actor_info;
171 	struct lacp_tlv_hdr	lacp_partner_info_tlv;
172 	struct lacp_port_info	lacp_partner_info;
173 	struct lacp_tlv_hdr	lacp_collector_info_tlv;
174 	struct lacp_collector_info
175 				lacp_collector_info;
176 	/* other TLVs go here */
177 	struct lacp_tlv_hdr	lacp_terminator;
178 	uint8_t			lacp_pad[50];
179 } __packed __aligned(2);
180 
181 /* Marker TLV types */
182 
183 #define MARKER_T_INFORMATION		0x01
184 #define MARKER_T_RESPONSE		0x02
185 
186 struct marker_info {
187 	uint16_t		marker_requester_port;
188 	uint8_t			marker_requester_system[ETHER_ADDR_LEN];
189 	uint8_t			marker_requester_txid[4];
190 	uint8_t			marker_pad[2];
191 } __packed __aligned(2);
192 
193 struct marker_pdu {
194 	struct ether_slowproto_hdr
195 				marker_sph;
196 
197 	struct lacp_tlv_hdr	marker_info_tlv;
198 	struct marker_info	marker_info;
199 	struct lacp_tlv_hdr	marker_terminator;
200 	uint8_t			marker_pad[90];
201 } __packed __aligned(2);
202 
203 enum lacp_rxm_state {
204 	LACP_RXM_S_BEGIN = 0,
205 	LACP_RXM_S_INITIALIZE,
206 	LACP_RXM_S_PORT_DISABLED,
207 	LACP_RXM_S_EXPIRED,
208 	LACP_RXM_S_LACP_DISABLED,
209 	LACP_RXM_S_DEFAULTED,
210 	LACP_RXM_S_CURRENT,
211 };
212 
213 enum lacp_rxm_event {
214 	LACP_RXM_E_BEGIN,
215 	LACP_RXM_E_UCT,
216 	LACP_RXM_E_PORT_MOVED,
217 	LACP_RXM_E_NOT_PORT_MOVED,
218 	LACP_RXM_E_PORT_ENABLED,
219 	LACP_RXM_E_NOT_PORT_ENABLED,
220 	LACP_RXM_E_LACP_ENABLED,
221 	LACP_RXM_E_NOT_LACP_ENABLED,
222 	LACP_RXM_E_LACPDU, /* CtrlMuxN:M_UNITDATA.indication(LACPDU) */
223 	LACP_RXM_E_TIMER_EXPIRED, /* current_while_timer expired */
224 };
225 
226 enum lacp_mux_state {
227 	LACP_MUX_S_BEGIN = 0,
228 	LACP_MUX_S_DETACHED,
229 	LACP_MUX_S_WAITING,
230 	LACP_MUX_S_ATTACHED,
231 	LACP_MUX_S_DISTRIBUTING,
232 	LACP_MUX_S_COLLECTING,
233 };
234 
235 enum lacp_mux_event {
236 	LACP_MUX_E_BEGIN,
237 	LACP_MUX_E_SELECTED,
238 	LACP_MUX_E_STANDBY,
239 	LACP_MUX_E_UNSELECTED,
240 	LACP_MUX_E_READY,
241 	LACP_MUX_E_SYNC,
242 	LACP_MUX_E_NOT_SYNC,
243 	LACP_MUX_E_COLLECTING,
244 	LACP_MUX_E_NOT_COLLECTING,
245 };
246 
247 /*
248  * LACP variables
249  */
250 
251 static const uint8_t lacp_address_slow[ETHER_ADDR_LEN] = LACP_ADDR_SLOW;
252 
253 static const char *lacp_rxm_state_names[] = {
254 	"BEGIN",
255 	"INITIALIZE",
256 	"PORT_DISABLED",
257 	"EXPIRED",
258 	"LACP_DISABLED",
259 	"DEFAULTED",
260 	"CURRENT",
261 };
262 
263 static const char *lacp_rxm_event_names[] = {
264 	"BEGIN",
265 	"UCT",
266 	"port_moved",
267 	"!port_moved",
268 	"port_enabled",
269 	"!port_enabled",
270 	"LACP_Enabled",
271 	"!LACP_Enabled",
272 	"LACPDU",
273 	"current_while_timer expired",
274 };
275 
276 static const char *lacp_mux_state_names[] = {
277 	"BEGIN",
278 	"DETACHED",
279 	"WAITING",
280 	"ATTACHED",
281 	"DISTRIBUTING",
282 	"COLLECTING",
283 };
284 
285 static const char *lacp_mux_event_names[] = {
286 	"BEGIN",
287 	"Selected == SELECTED",
288 	"Selected == STANDBY",
289 	"Selected == UNSELECTED",
290 	"Ready",
291 	"Partner.Sync",
292 	"! Partner.Sync",
293 	"Partner.Collecting",
294 	"! Partner.Collecting",
295 };
296 
297 /*
298  * aggr interface
299  */
300 
301 #define AGGR_MAX_PORTS		32
302 #define AGGR_MAX_SLOW_PKTS	(AGGR_MAX_PORTS * 3)
303 
304 struct aggr_multiaddr {
305 	TAILQ_ENTRY(aggr_multiaddr)
306 				m_entry;
307 	unsigned int		m_refs;
308 	uint8_t			m_addrlo[ETHER_ADDR_LEN];
309 	uint8_t			m_addrhi[ETHER_ADDR_LEN];
310 	struct sockaddr_storage m_addr;
311 };
312 TAILQ_HEAD(aggr_multiaddrs, aggr_multiaddr);
313 
314 struct aggr_softc;
315 
316 enum aggr_port_selected {
317 	AGGR_PORT_UNSELECTED,
318 	AGGR_PORT_SELECTED,
319 	AGGR_PORT_STANDBY,
320 };
321 
322 static const char *aggr_port_selected_names[] = {
323 	"UNSELECTED",
324 	"SELECTED",
325 	"STANDBY",
326 };
327 
328 struct aggr_port {
329 	struct ifnet		*p_ifp0;
330 
331 	uint8_t			 p_lladdr[ETHER_ADDR_LEN];
332 	uint32_t		 p_mtu;
333 
334 	int (*p_ioctl)(struct ifnet *, u_long, caddr_t);
335 	int (*p_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
336 	    struct rtentry *);
337 
338 	struct task		 p_lhook;
339 	struct task		 p_dhook;
340 
341 	struct aggr_softc	*p_aggr;
342 	TAILQ_ENTRY(aggr_port)	 p_entry;
343 
344 	unsigned int		 p_collecting;
345 	unsigned int		 p_distributing;
346 	TAILQ_ENTRY(aggr_port)	 p_entry_distributing;
347 	TAILQ_ENTRY(aggr_port)	 p_entry_muxen;
348 
349 	/* Partner information */
350 	enum aggr_port_selected	 p_muxed;
351 	enum aggr_port_selected	 p_selected;		/* Selected */
352 	struct lacp_port_info	 p_partner;
353 #define p_partner_state		 p_partner.lacp_state
354 
355 	uint8_t			 p_actor_state;
356 	uint8_t			 p_lacp_timeout;
357 
358 	struct timeout		 p_current_while_timer;
359 	struct timeout		 p_wait_while_timer;
360 
361 	/* Receive machine */
362 	enum lacp_rxm_state	 p_rxm_state;
363 	struct mbuf_queue	 p_rxm_mq;
364 	struct task		 p_rxm_task;
365 
366 	/* Periodic Transmission machine */
367 	struct timeout		 p_ptm_tx;
368 
369 	/* Mux machine */
370 	enum lacp_mux_state	 p_mux_state;
371 
372 	/* Transmit machine */
373 	int			 p_txm_log[LACP_TX_MACHINE_RATE];
374 	unsigned int		 p_txm_slot;
375 	struct timeout		 p_txm_ntt;
376 };
377 
378 TAILQ_HEAD(aggr_port_list, aggr_port);
379 
380 struct aggr_map {
381 	struct ifnet		*m_ifp0s[AGGR_MAX_PORTS];
382 };
383 
384 struct aggr_softc {
385 	struct arpcom		 sc_ac;
386 #define sc_if			 sc_ac.ac_if
387 	unsigned int		 sc_dead;
388 	unsigned int		 sc_promisc;
389 	struct ifmedia		 sc_media;
390 
391 	struct aggr_multiaddrs	 sc_multiaddrs;
392 
393 	unsigned int		 sc_mix;
394 
395 	struct aggr_map		 sc_maps[2];
396 	unsigned int		 sc_map_gen;
397 	struct aggr_map		*sc_map;
398 
399 	struct rwlock		 sc_lock;
400 	struct aggr_port_list	 sc_ports;
401 	struct aggr_port_list	 sc_distributing;
402 	struct aggr_port_list	 sc_muxen;
403 	unsigned int		 sc_nports;
404 	unsigned int		 sc_ndistributing;
405 
406 	struct timeout		 sc_tick;
407 
408 	uint8_t			 sc_lacp_mode;
409 #define AGGR_LACP_MODE_PASSIVE		0
410 #define AGGR_LACP_MODE_ACTIVE		1
411 	uint8_t			 sc_lacp_timeout;
412 #define AGGR_LACP_TIMEOUT_SLOW		0
413 #define AGGR_LACP_TIMEOUT_FAST		1
414 	uint16_t		 sc_lacp_prio;
415 	uint16_t		 sc_lacp_port_prio;
416 
417 	struct lacp_sysid	 sc_partner_system;
418 	uint16_t		 sc_partner_key;
419 };
420 
421 #define DPRINTF(_sc, fmt...)	do { \
422 	if (ISSET((_sc)->sc_if.if_flags, IFF_DEBUG)) \
423 		printf(fmt); \
424 } while (0)
425 
426 static const unsigned int aggr_periodic_times[] = {
427 	[AGGR_LACP_TIMEOUT_SLOW] = LACP_SLOW_PERIODIC_TIME,
428 	[AGGR_LACP_TIMEOUT_FAST] = LACP_FAST_PERIODIC_TIME,
429 };
430 
431 static int	aggr_clone_create(struct if_clone *, int);
432 static int	aggr_clone_destroy(struct ifnet *);
433 
434 static int	aggr_ioctl(struct ifnet *, u_long, caddr_t);
435 static void	aggr_start(struct ifqueue *);
436 static int	aggr_enqueue(struct ifnet *, struct mbuf *);
437 
438 static int	aggr_media_change(struct ifnet *);
439 static void	aggr_media_status(struct ifnet *, struct ifmediareq *);
440 
441 static int	aggr_up(struct aggr_softc *);
442 static int	aggr_down(struct aggr_softc *);
443 static int	aggr_iff(struct aggr_softc *);
444 
445 static void	aggr_p_linkch(void *);
446 static void	aggr_p_detach(void *);
447 static int	aggr_p_ioctl(struct ifnet *, u_long, caddr_t);
448 static int	aggr_p_output(struct ifnet *, struct mbuf *,
449 		    struct sockaddr *, struct rtentry *);
450 
451 static int	aggr_get_trunk(struct aggr_softc *, struct trunk_reqall *);
452 static int	aggr_set_options(struct aggr_softc *,
453 		    const struct trunk_opts *);
454 static int	aggr_get_options(struct aggr_softc *, struct trunk_opts *);
455 static int	aggr_set_lladdr(struct aggr_softc *, const struct ifreq *);
456 static int	aggr_set_mtu(struct aggr_softc *, uint32_t);
457 static void	aggr_p_dtor(struct aggr_softc *, struct aggr_port *,
458 		    const char *);
459 static int	aggr_p_setlladdr(struct aggr_port *, const uint8_t *);
460 static int	aggr_p_set_mtu(struct aggr_port *, uint32_t);
461 static int	aggr_add_port(struct aggr_softc *,
462 		    const struct trunk_reqport *);
463 static int	aggr_get_port(struct aggr_softc *, struct trunk_reqport *);
464 static int	aggr_del_port(struct aggr_softc *,
465 		    const struct trunk_reqport *);
466 static int	aggr_group(struct aggr_softc *, struct aggr_port *, u_long);
467 static int	aggr_multi(struct aggr_softc *, struct aggr_port *,
468 		    const struct aggr_multiaddr *, u_long);
469 static void	aggr_update_capabilities(struct aggr_softc *);
470 static void	aggr_set_lacp_mode(struct aggr_softc *, int);
471 static void	aggr_set_lacp_timeout(struct aggr_softc *, int);
472 static int	aggr_multi_add(struct aggr_softc *, struct ifreq *);
473 static int	aggr_multi_del(struct aggr_softc *, struct ifreq *);
474 
475 static void	aggr_map(struct aggr_softc *);
476 
477 static void	aggr_record_default(struct aggr_softc *, struct aggr_port *);
478 static void	aggr_current_while_timer(void *);
479 static void	aggr_wait_while_timer(void *);
480 static void	aggr_rx(void *);
481 static void	aggr_rxm_ev(struct aggr_softc *, struct aggr_port *,
482 		    enum lacp_rxm_event, const struct lacp_du *);
483 #define aggr_rxm(_sc, _p, _ev) \
484 		aggr_rxm_ev((_sc), (_p), (_ev), NULL)
485 #define aggr_rxm_lacpdu(_sc, _p, _lacpdu) \
486 		aggr_rxm_ev((_sc), (_p), LACP_RXM_E_LACPDU, (_lacpdu))
487 
488 static void	aggr_mux(struct aggr_softc *, struct aggr_port *,
489 		    enum lacp_mux_event);
490 static int	aggr_mux_ev(struct aggr_softc *, struct aggr_port *,
491 		    enum lacp_mux_event, int *);
492 
493 static void	aggr_set_partner_timeout(struct aggr_port *, int);
494 
495 static void	aggr_ptm_tx(void *);
496 
497 static void	aggr_transmit_machine(void *);
498 static void	aggr_ntt(struct aggr_port *);
499 static void	aggr_ntt_transmit(struct aggr_port *);
500 
501 static void	aggr_set_selected(struct aggr_port *, enum aggr_port_selected,
502 		    enum lacp_mux_event);
503 static void	aggr_unselected(struct aggr_port *);
504 
505 static void	aggr_selection_logic(struct aggr_softc *, struct aggr_port *);
506 
507 #define ETHER_IS_SLOWADDR(_a)	ETHER_IS_EQ((_a), lacp_address_slow)
508 
509 static struct if_clone aggr_cloner =
510     IF_CLONE_INITIALIZER("aggr", aggr_clone_create, aggr_clone_destroy);
511 
512 void
513 aggrattach(int count)
514 {
515 	if_clone_attach(&aggr_cloner);
516 }
517 
518 static int
519 aggr_clone_create(struct if_clone *ifc, int unit)
520 {
521 	struct aggr_softc *sc;
522 	struct ifnet *ifp;
523 
524 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
525 	if (sc == NULL)
526 		return (ENOMEM);
527 
528 	sc->sc_mix = arc4random();
529 
530 	ifp = &sc->sc_if;
531 
532 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
533 	    ifc->ifc_name, unit);
534 
535 	TAILQ_INIT(&sc->sc_multiaddrs);
536 	rw_init(&sc->sc_lock, "aggrlk");
537 	TAILQ_INIT(&sc->sc_ports);
538 	sc->sc_nports = 0;
539 	TAILQ_INIT(&sc->sc_distributing);
540 	sc->sc_ndistributing = 0;
541 	TAILQ_INIT(&sc->sc_muxen);
542 
543 	sc->sc_map_gen = 0;
544 	sc->sc_map = NULL; /* no links yet */
545 
546 	sc->sc_lacp_mode = AGGR_LACP_MODE_ACTIVE;
547 	sc->sc_lacp_timeout = AGGR_LACP_TIMEOUT_SLOW;
548 	sc->sc_lacp_prio = 0x8000; /* medium */
549 	sc->sc_lacp_port_prio = 0x8000; /* medium */
550 
551 	ifmedia_init(&sc->sc_media, 0, aggr_media_change, aggr_media_status);
552 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
553 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
554 
555 	ifp->if_softc = sc;
556 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
557 	ifp->if_ioctl = aggr_ioctl;
558 	ifp->if_qstart = aggr_start;
559 	ifp->if_enqueue = aggr_enqueue;
560 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
561 	ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
562 	ifp->if_link_state = LINK_STATE_DOWN;
563 	ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN);
564 	ether_fakeaddr(ifp);
565 
566 	if_counters_alloc(ifp);
567 	if_attach(ifp);
568 	ether_ifattach(ifp);
569 
570 	ifp->if_llprio = IFQ_MAXPRIO;
571 
572 	return (0);
573 }
574 
575 static int
576 aggr_clone_destroy(struct ifnet *ifp)
577 {
578 	struct aggr_softc *sc = ifp->if_softc;
579 	struct aggr_port *p;
580 
581 	NET_LOCK();
582 	sc->sc_dead = 1;
583 
584 	if (ISSET(ifp->if_flags, IFF_RUNNING))
585 		aggr_down(sc);
586 	NET_UNLOCK();
587 
588 	ether_ifdetach(ifp);
589 	if_detach(ifp);
590 
591 	/* last ref, no need to lock. aggr_p_dtor locks anyway */
592 	NET_LOCK();
593 	while ((p = TAILQ_FIRST(&sc->sc_ports)) != NULL)
594 		aggr_p_dtor(sc, p, "destroy");
595 	NET_UNLOCK();
596 
597 	free(sc, M_DEVBUF, sizeof(*sc));
598 
599 	return (0);
600 }
601 
602 /*
603  * LACP_Enabled
604  */
605 static inline int
606 aggr_lacp_enabled(struct aggr_softc *sc)
607 {
608 	struct ifnet *ifp = &sc->sc_if;
609 	return (ISSET(ifp->if_flags, IFF_RUNNING));
610 }
611 
612 /*
613  * port_enabled
614  */
615 static int
616 aggr_port_enabled(struct aggr_port *p)
617 {
618 	struct ifnet *ifp0 = p->p_ifp0;
619 
620 	if (!ISSET(ifp0->if_flags, IFF_RUNNING))
621 		return (0);
622 
623 	if (!LINK_STATE_IS_UP(ifp0->if_link_state))
624 		return (0);
625 
626 	return (1);
627 }
628 
629 /*
630  * port_moved
631  *
632  * This variable is set to TRUE if the Receive machine for an Aggregation
633  * Port is in the PORT_DISABLED state, and the combination of
634  * Partner_Oper_System and Partner_Oper_Port_Number in use by that
635  * Aggregation Port has been received in an incoming LACPDU on a
636  * different Aggregation Port. This variable is set to FALSE once the
637  * INITIALIZE state of the Receive machine has set the Partner information
638  * for the Aggregation Port to administrative default values.
639  *
640  * Value: Boolean
641 */
642 static int
643 aggr_port_moved(struct aggr_softc *sc, struct aggr_port *p)
644 {
645 	return (0);
646 }
647 
648 static void
649 aggr_transmit(struct aggr_softc *sc, const struct aggr_map *map, struct mbuf *m)
650 {
651 	struct ifnet *ifp = &sc->sc_if;
652 	struct ifnet *ifp0;
653 	uint16_t flow = 0;
654 
655 #if NBPFILTER > 0
656 	{
657 		caddr_t if_bpf = ifp->if_bpf;
658 		if (if_bpf)
659 			bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
660 	}
661 #endif
662 
663 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
664 		flow = m->m_pkthdr.ph_flowid;
665 
666 	ifp0 = map->m_ifp0s[flow % AGGR_MAX_PORTS];
667 
668 	if (if_enqueue(ifp0, m) != 0)
669 		counters_inc(ifp->if_counters, ifc_oerrors);
670 }
671 
672 static int
673 aggr_enqueue(struct ifnet *ifp, struct mbuf *m)
674 {
675 	struct aggr_softc *sc;
676 	const struct aggr_map *map;
677 	int error = 0;
678 
679 	if (!ifq_is_priq(&ifp->if_snd))
680 		return (if_enqueue_ifq(ifp, m));
681 
682 	sc = ifp->if_softc;
683 
684 	smr_read_enter();
685 	map = SMR_PTR_GET(&sc->sc_map);
686 	if (__predict_false(map == NULL)) {
687 		m_freem(m);
688 		error = ENETDOWN;
689 	} else {
690 		counters_pkt(ifp->if_counters,
691 		    ifc_opackets, ifc_obytes, m->m_pkthdr.len);
692 		aggr_transmit(sc, map, m);
693 	}
694 	smr_read_leave();
695 
696 	return (error);
697 }
698 
699 static void
700 aggr_start(struct ifqueue *ifq)
701 {
702 	struct ifnet *ifp = ifq->ifq_if;
703 	struct aggr_softc *sc = ifp->if_softc;
704 	const struct aggr_map *map;
705 
706 	smr_read_enter();
707 	map = SMR_PTR_GET(&sc->sc_map);
708 	if (__predict_false(map == NULL))
709 		ifq_purge(ifq);
710 	else {
711 		struct mbuf *m;
712 
713 		while ((m = ifq_dequeue(ifq)) != NULL)
714 			aggr_transmit(sc, map, m);
715 	}
716 	smr_read_leave();
717 }
718 
719 static inline int
720 aggr_eh_is_slow(const struct ether_header *eh)
721 {
722 	return (ETHER_IS_SLOWADDR(eh->ether_dhost) &&
723 	    eh->ether_type == htons(ETHERTYPE_SLOW));
724 }
725 
726 static int
727 aggr_input(struct ifnet *ifp0, struct mbuf *m, void *cookie)
728 {
729 	struct ether_header *eh;
730 	struct aggr_port *p = cookie;
731 	struct aggr_softc *sc = p->p_aggr;
732 	struct ifnet *ifp = &sc->sc_if;
733 	int hlen = sizeof(*eh);
734 
735 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
736 		goto drop;
737 
738 	eh = mtod(m, struct ether_header *);
739 	if (!ISSET(m->m_flags, M_VLANTAG) &&
740 	    __predict_false(aggr_eh_is_slow(eh))) {
741 		struct ether_slowproto_hdr *sph;
742 
743 		hlen += sizeof(*sph);
744 		if (m->m_len < hlen) {
745 			m = m_pullup(m, hlen);
746 			if (m == NULL) {
747 				/* short++ */
748 				return (1);
749 			}
750 			eh = mtod(m, struct ether_header *);
751 		}
752 
753 		sph = (struct ether_slowproto_hdr *)(eh + 1);
754 		switch (sph->sph_subtype) {
755 		case SLOWPROTOCOLS_SUBTYPE_LACP:
756 		case SLOWPROTOCOLS_SUBTYPE_LACP_MARKER:
757 			if (mq_enqueue(&p->p_rxm_mq, m) == 0)
758 				task_add(systq, &p->p_rxm_task);
759 			return (1);
760 		default:
761 			break;
762 		}
763 	}
764 
765 	if (__predict_false(!p->p_collecting))
766 		goto drop;
767 
768 	if (!ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
769 		m->m_pkthdr.ph_flowid = ifp0->if_index ^ sc->sc_mix;
770 
771 	if_vinput(ifp, m);
772 
773 	return (1);
774 
775 drop:
776 	m_freem(m);
777 	return (1);
778 }
779 
780 static int
781 aggr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
782 {
783 	struct aggr_softc *sc = ifp->if_softc;
784 	struct ifreq *ifr = (struct ifreq *)data;
785 	int error = 0;
786 
787 	if (sc->sc_dead)
788 		return (ENXIO);
789 
790 	switch (cmd) {
791 	case SIOCSIFADDR:
792 		break;
793 
794 	case SIOCSIFFLAGS:
795 		if (ISSET(ifp->if_flags, IFF_UP)) {
796 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
797 				error = aggr_up(sc);
798 			else
799 				error = ENETRESET;
800 		} else {
801 			if (ISSET(ifp->if_flags, IFF_RUNNING))
802 				error = aggr_down(sc);
803 		}
804 		break;
805 
806 	case SIOCSIFLLADDR:
807 		error = aggr_set_lladdr(sc, ifr);
808 		break;
809 
810 	case SIOCSTRUNK:
811 		error = suser(curproc);
812 		if (error != 0)
813 			break;
814 
815 		if (((struct trunk_reqall *)data)->ra_proto !=
816 		    TRUNK_PROTO_LACP) {
817 			error = EPROTONOSUPPORT;
818 			break;
819 		}
820 
821 		/* nop */
822 		break;
823 	case SIOCGTRUNK:
824 		error = aggr_get_trunk(sc, (struct trunk_reqall *)data);
825 		break;
826 
827 	case SIOCSTRUNKOPTS:
828 		error = suser(curproc);
829 		if (error != 0)
830 			break;
831 
832 		error = aggr_set_options(sc, (struct trunk_opts *)data);
833 		break;
834 
835 	case SIOCGTRUNKOPTS:
836 		error = aggr_get_options(sc, (struct trunk_opts *)data);
837 		break;
838 
839 	case SIOCGTRUNKPORT:
840 		error = aggr_get_port(sc, (struct trunk_reqport *)data);
841 		break;
842 	case SIOCSTRUNKPORT:
843 		error = suser(curproc);
844 		if (error != 0)
845 			break;
846 
847 		error = aggr_add_port(sc, (struct trunk_reqport *)data);
848 		break;
849 	case SIOCSTRUNKDELPORT:
850 		error = suser(curproc);
851 		if (error != 0)
852 			break;
853 
854 		error = aggr_del_port(sc, (struct trunk_reqport *)data);
855 		break;
856 
857 	case SIOCSIFMTU:
858 		error = aggr_set_mtu(sc, ifr->ifr_mtu);
859 		break;
860 
861 	case SIOCADDMULTI:
862 		error = aggr_multi_add(sc, ifr);
863 		break;
864 	case SIOCDELMULTI:
865 		error = aggr_multi_del(sc, ifr);
866 		break;
867 
868 	case SIOCSIFMEDIA:
869 		error = EOPNOTSUPP;
870 		break;
871 	case SIOCGIFMEDIA:
872 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
873 		break;
874 
875 	default:
876 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
877 		break;
878 	}
879 
880 	if (error == ENETRESET)
881 		error = aggr_iff(sc);
882 
883 	return (error);
884 }
885 
886 static int
887 aggr_get_trunk(struct aggr_softc *sc, struct trunk_reqall *ra)
888 {
889 	struct ifnet *ifp = &sc->sc_if;
890 	struct trunk_reqport rp;
891 	struct aggr_port *p;
892 	size_t size = ra->ra_size;
893 	caddr_t ubuf = (caddr_t)ra->ra_port;
894 	struct lacp_opreq *req;
895 	uint8_t state = 0;
896 	int error = 0;
897 
898 	if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE)
899 		SET(state, LACP_STATE_ACTIVITY);
900 	if (sc->sc_lacp_timeout == AGGR_LACP_TIMEOUT_FAST)
901 		SET(state, LACP_STATE_TIMEOUT);
902 
903 	ra->ra_proto = TRUNK_PROTO_LACP;
904 	memset(&ra->ra_psc, 0, sizeof(ra->ra_psc));
905 
906 	/*
907 	 * aggr(4) does not support Individual links so don't bother
908 	 * with portprio, portno, and state, as per the spec.
909 	 */
910 
911 	req = &ra->ra_lacpreq;
912 	req->actor_prio = sc->sc_lacp_prio;
913 	CTASSERT(sizeof(req->actor_mac) == sizeof(sc->sc_ac.ac_enaddr));
914 	memcpy(req->actor_mac, &sc->sc_ac.ac_enaddr, sizeof(req->actor_mac));
915 	req->actor_key = ifp->if_index;
916 	req->actor_state = state;
917 
918 	req->partner_prio = ntohs(sc->sc_partner_system.lacp_sysid_priority);
919 	CTASSERT(sizeof(req->partner_mac) ==
920 	    sizeof(sc->sc_partner_system.lacp_sysid_mac));
921 	memcpy(req->partner_mac, sc->sc_partner_system.lacp_sysid_mac,
922 	    sizeof(req->partner_mac));
923 	req->partner_key = ntohs(sc->sc_partner_key);
924 
925 	ra->ra_ports = sc->sc_nports;
926 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
927 		struct ifnet *ifp0;
928 		struct lacp_opreq *opreq;
929 
930 		if (size < sizeof(rp))
931 			break;
932 
933 		ifp0 = p->p_ifp0;
934 
935 		CTASSERT(sizeof(rp.rp_ifname) == sizeof(ifp->if_xname));
936 		CTASSERT(sizeof(rp.rp_portname) == sizeof(ifp0->if_xname));
937 
938 		memset(&rp, 0, sizeof(rp));
939 		memcpy(rp.rp_ifname, ifp->if_xname, sizeof(rp.rp_ifname));
940 		memcpy(rp.rp_portname, ifp0->if_xname, sizeof(rp.rp_portname));
941 
942 		if (p->p_muxed)
943 			SET(rp.rp_flags, TRUNK_PORT_ACTIVE);
944 		if (p->p_collecting)
945 			SET(rp.rp_flags, TRUNK_PORT_COLLECTING);
946 		if (p->p_distributing)
947 			SET(rp.rp_flags, TRUNK_PORT_DISTRIBUTING);
948 		if (!aggr_port_enabled(p))
949 			SET(rp.rp_flags, TRUNK_PORT_DISABLED);
950 
951 		opreq = &rp.rp_lacpreq;
952 
953 		opreq->actor_prio = sc->sc_lacp_prio;
954 		memcpy(opreq->actor_mac, &sc->sc_ac.ac_enaddr,
955 		    sizeof(req->actor_mac));
956 		opreq->actor_key = ifp->if_index;
957 		opreq->actor_portprio = sc->sc_lacp_port_prio;
958 		opreq->actor_portno = ifp0->if_index;
959 		opreq->actor_state = state | p->p_actor_state;
960 
961 		opreq->partner_prio =
962 		    ntohs(p->p_partner.lacp_sysid.lacp_sysid_priority);
963 		CTASSERT(sizeof(opreq->partner_mac) ==
964 		    sizeof(p->p_partner.lacp_sysid.lacp_sysid_mac));
965 		memcpy(opreq->partner_mac,
966 		    p->p_partner.lacp_sysid.lacp_sysid_mac,
967 		    sizeof(opreq->partner_mac));
968 		opreq->partner_key = ntohs(p->p_partner.lacp_key);
969 		opreq->partner_portprio =
970 		    ntohs(p->p_partner.lacp_portid.lacp_portid_priority);
971 		opreq->partner_portno =
972 		    ntohs(p->p_partner.lacp_portid.lacp_portid_number);
973 		opreq->partner_state = p->p_partner_state;
974 
975 		error = copyout(&rp, ubuf, sizeof(rp));
976 		if (error != 0)
977 			break;
978 
979 		ubuf += sizeof(rp);
980 		size -= sizeof(rp);
981 	}
982 
983 	return (error);
984 }
985 
986 static int
987 aggr_get_options(struct aggr_softc *sc, struct trunk_opts *tro)
988 {
989 	struct lacp_adminopts *opt = &tro->to_lacpopts;
990 
991 	if (tro->to_proto != TRUNK_PROTO_LACP)
992 		return (EPROTONOSUPPORT);
993 
994 	opt->lacp_mode = sc->sc_lacp_mode;
995 	opt->lacp_timeout = sc->sc_lacp_timeout;
996 	opt->lacp_prio = sc->sc_lacp_prio;
997 	opt->lacp_portprio = sc->sc_lacp_port_prio;
998 	opt->lacp_ifqprio = sc->sc_if.if_llprio;
999 
1000 	return (0);
1001 }
1002 
1003 static int
1004 aggr_set_options(struct aggr_softc *sc, const struct trunk_opts *tro)
1005 {
1006 	const struct lacp_adminopts *opt = &tro->to_lacpopts;
1007 
1008 	if (tro->to_proto != TRUNK_PROTO_LACP)
1009 		return (EPROTONOSUPPORT);
1010 
1011 	switch (tro->to_opts) {
1012 	case TRUNK_OPT_LACP_MODE:
1013 		switch (opt->lacp_mode) {
1014 		case AGGR_LACP_MODE_PASSIVE:
1015 		case AGGR_LACP_MODE_ACTIVE:
1016 			break;
1017 		default:
1018 			return (EINVAL);
1019 		}
1020 
1021 		aggr_set_lacp_mode(sc, opt->lacp_mode);
1022 		break;
1023 
1024 	case TRUNK_OPT_LACP_TIMEOUT:
1025 		if (opt->lacp_timeout >= nitems(aggr_periodic_times))
1026 			return (EINVAL);
1027 
1028 		aggr_set_lacp_timeout(sc, opt->lacp_timeout);
1029 		break;
1030 
1031 	case TRUNK_OPT_LACP_SYS_PRIO:
1032 		if (opt->lacp_prio == 0)
1033 			return (EINVAL);
1034 
1035 		sc->sc_lacp_prio = opt->lacp_prio;
1036 		break;
1037 
1038 	case TRUNK_OPT_LACP_PORT_PRIO:
1039 		if (opt->lacp_portprio == 0)
1040 			return (EINVAL);
1041 
1042 		sc->sc_lacp_port_prio = opt->lacp_portprio;
1043 		break;
1044 
1045 	default:
1046 		return (ENODEV);
1047 	}
1048 
1049 	return (0);
1050 }
1051 
1052 static int
1053 aggr_add_port(struct aggr_softc *sc, const struct trunk_reqport *rp)
1054 {
1055 	struct ifnet *ifp = &sc->sc_if;
1056 	struct ifnet *ifp0;
1057 	struct arpcom *ac0;
1058 	struct aggr_port *p;
1059 	struct aggr_multiaddr *ma;
1060 	int past = ticks - (hz * LACP_TIMEOUT_FACTOR);
1061 	int i;
1062 	int error;
1063 
1064 	NET_ASSERT_LOCKED();
1065 	if (sc->sc_nports > AGGR_MAX_PORTS)
1066 		return (ENOSPC);
1067 
1068 	ifp0 = ifunit(rp->rp_portname);
1069 	if (ifp0 == NULL || ifp0->if_index == ifp->if_index)
1070 		return (EINVAL);
1071 
1072 	if (ifp0->if_type != IFT_ETHER)
1073 		return (EPROTONOSUPPORT);
1074 
1075 	if (ifp0->if_hardmtu < ifp->if_mtu)
1076 		return (ENOBUFS);
1077 
1078 	ac0 = (struct arpcom *)ifp0;
1079 	if (ac0->ac_trunkport != NULL)
1080 		return (EBUSY);
1081 
1082 	/* let's try */
1083 
1084 	ifp0 = if_get(ifp0->if_index); /* get an actual reference */
1085 	if (ifp0 == NULL) {
1086 		/* XXX this should never happen */
1087 		return (EINVAL);
1088 	}
1089 
1090 	p = malloc(sizeof(*p), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
1091 	if (p == NULL) {
1092 		error = ENOMEM;
1093 		goto put;
1094 	}
1095 
1096 	for (i = 0; i < nitems(p->p_txm_log); i++)
1097 		p->p_txm_log[i] = past;
1098 
1099 	p->p_ifp0 = ifp0;
1100 	p->p_aggr = sc;
1101 	p->p_mtu = ifp0->if_mtu;
1102 
1103 	CTASSERT(sizeof(p->p_lladdr) == sizeof(ac0->ac_enaddr));
1104 	memcpy(p->p_lladdr, ac0->ac_enaddr, sizeof(p->p_lladdr));
1105 	p->p_ioctl = ifp0->if_ioctl;
1106 	p->p_output = ifp0->if_output;
1107 
1108 	error = aggr_group(sc, p, SIOCADDMULTI);
1109 	if (error != 0)
1110 		goto free;
1111 
1112 	error = aggr_p_setlladdr(p, sc->sc_ac.ac_enaddr);
1113 	if (error != 0)
1114 		goto ungroup;
1115 
1116 	error = aggr_p_set_mtu(p, ifp->if_mtu);
1117 	if (error != 0)
1118 		goto resetlladdr;
1119 
1120 	if (sc->sc_promisc) {
1121 		error = ifpromisc(ifp0, 1);
1122 		if (error != 0)
1123 			goto unmtu;
1124 	}
1125 
1126 	TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) {
1127 		if (aggr_multi(sc, p, ma, SIOCADDMULTI) != 0) {
1128 			log(LOG_WARNING, "%s %s: "
1129 			    "unable to add multicast address\n",
1130 			    ifp->if_xname, ifp0->if_xname);
1131 		}
1132 	}
1133 
1134 	task_set(&p->p_lhook, aggr_p_linkch, p);
1135 	if_linkstatehook_add(ifp0, &p->p_lhook);
1136 
1137 	task_set(&p->p_dhook, aggr_p_detach, p);
1138 	if_detachhook_add(ifp0, &p->p_dhook);
1139 
1140 	task_set(&p->p_rxm_task, aggr_rx, p);
1141 	mq_init(&p->p_rxm_mq, 3, IPL_NET);
1142 
1143 	timeout_set_proc(&p->p_ptm_tx, aggr_ptm_tx, p);
1144 	timeout_set_proc(&p->p_txm_ntt, aggr_transmit_machine, p);
1145 	timeout_set_proc(&p->p_current_while_timer,
1146 	    aggr_current_while_timer, p);
1147 	timeout_set_proc(&p->p_wait_while_timer, aggr_wait_while_timer, p);
1148 
1149 	p->p_muxed = 0;
1150 	p->p_collecting = 0;
1151 	p->p_distributing = 0;
1152 	p->p_selected = AGGR_PORT_UNSELECTED;
1153 	p->p_actor_state = LACP_STATE_AGGREGATION;
1154 
1155 	/* commit */
1156 	DPRINTF(sc, "%s %s trunkport: creating port\n",
1157 	    ifp->if_xname, ifp0->if_xname);
1158 
1159 	TAILQ_INSERT_TAIL(&sc->sc_ports, p, p_entry);
1160 	sc->sc_nports++;
1161 
1162 	aggr_update_capabilities(sc);
1163 
1164 	ac0->ac_trunkport = p;
1165 	/* make sure p is visible before handlers can run */
1166 	membar_producer();
1167 	ifp0->if_ioctl = aggr_p_ioctl;
1168 	ifp0->if_output = aggr_p_output;
1169 	if_ih_insert(ifp0, aggr_input, p);
1170 
1171 	aggr_mux(sc, p, LACP_MUX_E_BEGIN);
1172 	aggr_rxm(sc, p, LACP_RXM_E_BEGIN);
1173 	aggr_p_linkch(p);
1174 
1175 	return (0);
1176 
1177 unmtu:
1178 	if (aggr_p_set_mtu(p, p->p_mtu) != 0) {
1179 		log(LOG_WARNING, "%s add %s: unable to reset mtu %u\n",
1180 		    ifp->if_xname, ifp0->if_xname, p->p_mtu);
1181 	}
1182 resetlladdr:
1183 	if (aggr_p_setlladdr(p, p->p_lladdr) != 0) {
1184 		log(LOG_WARNING, "%s add %s: unable to reset lladdr\n",
1185 		    ifp->if_xname, ifp0->if_xname);
1186 	}
1187 ungroup:
1188 	if (aggr_group(sc, p, SIOCDELMULTI) != 0) {
1189 		log(LOG_WARNING, "%s add %s: "
1190 		    "unable to remove LACP group address\n",
1191 		    ifp->if_xname, ifp0->if_xname);
1192 	}
1193 free:
1194 	free(p, M_DEVBUF, sizeof(*p));
1195 put:
1196 	if_put(ifp0);
1197 	return (error);
1198 }
1199 
1200 static struct aggr_port *
1201 aggr_trunkport(struct aggr_softc *sc, const char *name)
1202 {
1203 	struct aggr_port *p;
1204 
1205 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
1206 		if (strcmp(p->p_ifp0->if_xname, name) == 0)
1207 			return (p);
1208 	}
1209 
1210 	return (NULL);
1211 }
1212 
1213 static int
1214 aggr_get_port(struct aggr_softc *sc, struct trunk_reqport *rp)
1215 {
1216 	struct aggr_port *p;
1217 
1218 	NET_ASSERT_LOCKED();
1219 	p = aggr_trunkport(sc, rp->rp_portname);
1220 	if (p == NULL)
1221 		return (EINVAL);
1222 
1223 	/* XXX */
1224 
1225 	return (0);
1226 }
1227 
1228 static int
1229 aggr_del_port(struct aggr_softc *sc, const struct trunk_reqport *rp)
1230 {
1231 	struct aggr_port *p;
1232 
1233 	NET_ASSERT_LOCKED();
1234 	p = aggr_trunkport(sc, rp->rp_portname);
1235 	if (p == NULL)
1236 		return (EINVAL);
1237 
1238 	aggr_p_dtor(sc, p, "del");
1239 
1240 	return (0);
1241 }
1242 
1243 static int
1244 aggr_p_setlladdr(struct aggr_port *p, const uint8_t *addr)
1245 {
1246 	struct ifnet *ifp0 = p->p_ifp0;
1247 	struct ifreq ifr;
1248 	struct sockaddr *sa;
1249 	int error;
1250 
1251 	memset(&ifr, 0, sizeof(ifr));
1252 
1253 	CTASSERT(sizeof(ifr.ifr_name) == sizeof(ifp0->if_xname));
1254 	memcpy(ifr.ifr_name, ifp0->if_xname, sizeof(ifr.ifr_name));
1255 
1256 	sa = &ifr.ifr_addr;
1257 
1258 	/* wtf is this? */
1259 	sa->sa_len = ETHER_ADDR_LEN;
1260 	sa->sa_family = AF_LINK;
1261 	CTASSERT(sizeof(sa->sa_data) >= ETHER_ADDR_LEN);
1262 	memcpy(sa->sa_data, addr, ETHER_ADDR_LEN);
1263 
1264 	error = (*p->p_ioctl)(ifp0, SIOCSIFLLADDR, (caddr_t)&ifr);
1265 	switch (error) {
1266 	case ENOTTY:
1267 	case 0:
1268 		break;
1269 	default:
1270 		return (error);
1271 	}
1272 
1273 	error = if_setlladdr(ifp0, addr);
1274 	if (error != 0)
1275 		return (error);
1276 
1277 	ifnewlladdr(ifp0);
1278 
1279 	return (0);
1280 }
1281 
1282 static int
1283 aggr_p_set_mtu(struct aggr_port *p, uint32_t mtu)
1284 {
1285 	struct ifnet *ifp0 = p->p_ifp0;
1286 	struct ifreq ifr;
1287 
1288 	memset(&ifr, 0, sizeof(ifr));
1289 
1290 	CTASSERT(sizeof(ifr.ifr_name) == sizeof(ifp0->if_xname));
1291 	memcpy(ifr.ifr_name, ifp0->if_xname, sizeof(ifr.ifr_name));
1292 
1293 	ifr.ifr_mtu = mtu;
1294 
1295 	return ((*p->p_ioctl)(ifp0, SIOCSIFMTU, (caddr_t)&ifr));
1296 }
1297 
1298 static int
1299 aggr_p_ioctl(struct ifnet *ifp0, u_long cmd, caddr_t data)
1300 {
1301 	struct arpcom *ac0 = (struct arpcom *)ifp0;
1302 	struct aggr_port *p = ac0->ac_trunkport;
1303 	struct ifreq *ifr = (struct ifreq *)data;
1304 	int error = 0;
1305 
1306 	switch (cmd) {
1307 	case SIOCGTRUNKPORT: {
1308 		struct trunk_reqport *rp = (struct trunk_reqport *)data;
1309 		struct aggr_softc *sc = p->p_aggr;
1310 		struct ifnet *ifp = &sc->sc_if;
1311 
1312 		if (strncmp(rp->rp_ifname, rp->rp_portname,
1313 		    sizeof(rp->rp_ifname)) != 0)
1314 			return (EINVAL);
1315 
1316 		CTASSERT(sizeof(rp->rp_ifname) == sizeof(ifp->if_xname));
1317 		memcpy(rp->rp_ifname, ifp->if_xname, sizeof(rp->rp_ifname));
1318 		break;
1319 	}
1320 
1321 	case SIOCSIFMTU:
1322 		if (ifr->ifr_mtu == ifp0->if_mtu)
1323 			break; /* nop */
1324 
1325 		/* FALLTHROUGH */
1326 	case SIOCSIFLLADDR:
1327 		error = EBUSY;
1328 		break;
1329 
1330 	case SIOCSIFFLAGS:
1331 		if (!ISSET(ifp0->if_flags, IFF_UP) &&
1332 		    ISSET(ifp0->if_flags, IFF_RUNNING)) {
1333 			/* port is going down */
1334 			if (p->p_selected == AGGR_PORT_SELECTED) {
1335 				aggr_unselected(p);
1336 				aggr_ntt_transmit(p); /* XXX */
1337 			}
1338 		}
1339 		/* FALLTHROUGH */
1340 	default:
1341 		error = (*p->p_ioctl)(ifp0, cmd, data);
1342 		break;
1343 	}
1344 
1345 	return (error);
1346 }
1347 
1348 static int
1349 aggr_p_output(struct ifnet *ifp0, struct mbuf *m, struct sockaddr *dst,
1350     struct rtentry *rt)
1351 {
1352 	struct arpcom *ac0 = (struct arpcom *)ifp0;
1353 	struct aggr_port *p = ac0->ac_trunkport;
1354 
1355 	/* restrict transmission to bpf only */
1356 	if ((m_tag_find(m, PACKET_TAG_DLT, NULL) == NULL)) {
1357 		m_freem(m);
1358 		return (EBUSY);
1359 	}
1360 
1361 	return ((*p->p_output)(ifp0, m, dst, rt));
1362 }
1363 
1364 static void
1365 aggr_p_dtor(struct aggr_softc *sc, struct aggr_port *p, const char *op)
1366 {
1367 	struct ifnet *ifp = &sc->sc_if;
1368 	struct ifnet *ifp0 = p->p_ifp0;
1369 	struct arpcom *ac0 = (struct arpcom *)ifp0;
1370 	struct aggr_multiaddr *ma;
1371 	enum aggr_port_selected selected;
1372 	int error;
1373 
1374 	DPRINTF(sc, "%s %s %s: destroying port\n",
1375 	    ifp->if_xname, ifp0->if_xname, op);
1376 
1377 	selected = p->p_selected;
1378 	aggr_rxm(sc, p, LACP_RXM_E_NOT_PORT_ENABLED);
1379 	aggr_unselected(p);
1380 	if (aggr_port_enabled(p) && selected == AGGR_PORT_SELECTED)
1381 		aggr_ntt_transmit(p);
1382 
1383 	timeout_del(&p->p_ptm_tx);
1384 	timeout_del_barrier(&p->p_txm_ntt); /* XXX */
1385 	timeout_del(&p->p_current_while_timer);
1386 	timeout_del(&p->p_wait_while_timer);
1387 
1388 	if_ih_remove(ifp0, aggr_input, p);
1389 
1390 	ac0->ac_trunkport = NULL;
1391 
1392 	ifp0->if_ioctl = p->p_ioctl;
1393 	ifp0->if_output = p->p_output;
1394 
1395 	TAILQ_REMOVE(&sc->sc_ports, p, p_entry);
1396 	sc->sc_nports--;
1397 
1398 	TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) {
1399 		error = aggr_multi(sc, p, ma, SIOCDELMULTI);
1400 		if (error != 0) {
1401 			log(LOG_WARNING, "%s %s %s: "
1402 			    "unable to remove multicast address (%d)\n",
1403 			    ifp->if_xname, op, ifp0->if_xname, error);
1404 		}
1405 	}
1406 
1407 	if (sc->sc_promisc) {
1408 		error = ifpromisc(ifp0, 0);
1409 		if (error != 0) {
1410 			log(LOG_WARNING, "%s %s %s: "
1411 			    "unable to disable promisc (%d)\n",
1412 			    ifp->if_xname, op, ifp0->if_xname, error);
1413 		}
1414 	}
1415 
1416 	error = aggr_p_set_mtu(p, p->p_mtu);
1417 	if (error != 0) {
1418 		log(LOG_WARNING, "%s %s %s: unable to restore mtu %u (%d)\n",
1419 		    ifp->if_xname, op, ifp0->if_xname, p->p_mtu, error);
1420 	}
1421 
1422 	error = aggr_p_setlladdr(p, p->p_lladdr);
1423 	if (error != 0) {
1424 		log(LOG_WARNING, "%s %s %s: unable to restore lladdr (%d)\n",
1425 		    ifp->if_xname, op, ifp0->if_xname, error);
1426 	}
1427 
1428 	error = aggr_group(sc, p, SIOCDELMULTI);
1429 	if (error != 0) {
1430 		log(LOG_WARNING, "%s %s %s: "
1431 		    "unable to remove LACP group address (%d)\n",
1432 		    ifp->if_xname, op, ifp0->if_xname, error);
1433 	}
1434 
1435 	if_detachhook_del(ifp0, &p->p_dhook);
1436 	if_linkstatehook_del(ifp0, &p->p_lhook);
1437 
1438 	if_put(ifp0);
1439 	free(p, M_DEVBUF, sizeof(*p));
1440 
1441 	/* XXX this is a pretty ugly place to update this */
1442 	aggr_update_capabilities(sc);
1443 }
1444 
1445 static void
1446 aggr_p_detach(void *arg)
1447 {
1448 	struct aggr_port *p = arg;
1449 	struct aggr_softc *sc = p->p_aggr;
1450 
1451 	aggr_p_dtor(sc, p, "detach");
1452 
1453 	NET_ASSERT_LOCKED();
1454 }
1455 
1456 static void
1457 aggr_p_linkch(void *arg)
1458 {
1459 	struct aggr_port *p = arg;
1460 	struct aggr_softc *sc = p->p_aggr;
1461 
1462 	NET_ASSERT_LOCKED();
1463 
1464 	if (aggr_port_enabled(p)) {
1465 		aggr_rxm(sc, p, LACP_RXM_E_PORT_ENABLED);
1466 
1467 		if (aggr_lacp_enabled(sc)) {
1468 			timeout_add_sec(&p->p_ptm_tx,
1469 			    aggr_periodic_times[AGGR_LACP_TIMEOUT_FAST]);
1470 		}
1471 	} else {
1472 		aggr_rxm(sc, p, LACP_RXM_E_NOT_PORT_ENABLED);
1473 		aggr_unselected(p);
1474 		aggr_record_default(sc, p);
1475 		timeout_del(&p->p_ptm_tx);
1476 	}
1477 }
1478 
1479 static void
1480 aggr_map(struct aggr_softc *sc)
1481 {
1482 	struct ifnet *ifp = &sc->sc_if;
1483 	struct aggr_map *map = NULL;
1484 	struct aggr_port *p;
1485 	unsigned int gen;
1486 	unsigned int i;
1487 	int link_state = LINK_STATE_DOWN;
1488 
1489 	p = TAILQ_FIRST(&sc->sc_distributing);
1490 	if (p != NULL) {
1491 		gen = sc->sc_map_gen++;
1492 		map = &sc->sc_maps[gen % nitems(sc->sc_maps)];
1493 
1494 		for (i = 0; i < nitems(map->m_ifp0s); i++) {
1495 			map->m_ifp0s[i] = p->p_ifp0;
1496 
1497 			p = TAILQ_NEXT(p, p_entry_distributing);
1498 			if (p == NULL)
1499 				p = TAILQ_FIRST(&sc->sc_distributing);
1500 		}
1501 
1502 		link_state = LINK_STATE_FULL_DUPLEX;
1503 	}
1504 
1505 	SMR_PTR_SET_LOCKED(&sc->sc_map, map);
1506 	smr_barrier();
1507 
1508 	if (ifp->if_link_state != link_state) {
1509 		ifp->if_link_state = link_state;
1510 		if_link_state_change(ifp);
1511 	}
1512 }
1513 
1514 static void
1515 aggr_current_while_timer(void *arg)
1516 {
1517 	struct aggr_port *p = arg;
1518 	struct aggr_softc *sc = p->p_aggr;
1519 
1520 	aggr_rxm(sc, p, LACP_RXM_E_TIMER_EXPIRED);
1521 }
1522 
1523 static void
1524 aggr_wait_while_timer(void *arg)
1525 {
1526 	struct aggr_port *p = arg;
1527 	struct aggr_softc *sc = p->p_aggr;
1528 
1529 	aggr_selection_logic(sc, p);
1530 }
1531 
1532 static void
1533 aggr_start_current_while_timer(struct aggr_port *p, unsigned int t)
1534 {
1535 	timeout_add_sec(&p->p_current_while_timer,
1536 		aggr_periodic_times[t] * LACP_TIMEOUT_FACTOR);
1537 }
1538 
1539 static void
1540 aggr_input_lacpdu(struct aggr_port *p, struct mbuf *m)
1541 {
1542 	struct aggr_softc *sc = p->p_aggr;
1543 	struct lacp_du *lacpdu;
1544 
1545 	if (m->m_len < sizeof(*lacpdu)) {
1546 		m = m_pullup(m, sizeof(*lacpdu));
1547 		if (m == NULL)
1548 			return;
1549 	}
1550 
1551 	/*
1552 	 * In the process of executing the recordPDU function, a Receive
1553 	 * machine compliant to this standard shall not validate the
1554 	 * Version Number, TLV_type, or Reserved fields in received
1555 	 * LACPDUs. The same actions are taken regardless of the values
1556 	 * received in these fields. A Receive machine may validate
1557 	 * the Actor_Information_Length, Partner_Information_Length,
1558 	 * Collector_Information_Length, or Terminator_Length fields.
1559 	 */
1560 
1561 	lacpdu = mtod(m, struct lacp_du *);
1562 	aggr_rxm_lacpdu(sc, p, lacpdu);
1563 
1564 	m_freem(m);
1565 }
1566 
1567 static void
1568 aggr_update_selected(struct aggr_softc *sc, struct aggr_port *p,
1569     const struct lacp_du *lacpdu)
1570 {
1571 	const struct lacp_port_info *rpi = &lacpdu->lacp_actor_info;
1572 	const struct lacp_port_info *lpi = &p->p_partner;
1573 
1574 	if ((rpi->lacp_portid.lacp_portid_number ==
1575 	     lpi->lacp_portid.lacp_portid_number) &&
1576 	    (rpi->lacp_portid.lacp_portid_priority ==
1577 	     lpi->lacp_portid.lacp_portid_priority) &&
1578 	    ETHER_IS_EQ(rpi->lacp_sysid.lacp_sysid_mac,
1579 	     lpi->lacp_sysid.lacp_sysid_mac) &&
1580 	    (rpi->lacp_sysid.lacp_sysid_priority ==
1581 	     lpi->lacp_sysid.lacp_sysid_priority) &&
1582 	    (rpi->lacp_key == lpi->lacp_key) &&
1583 	    (ISSET(rpi->lacp_state, LACP_STATE_AGGREGATION) ==
1584 	     ISSET(lpi->lacp_state, LACP_STATE_AGGREGATION)))
1585 		return;
1586 
1587 	aggr_unselected(p);
1588 }
1589 
1590 static void
1591 aggr_record_default(struct aggr_softc *sc, struct aggr_port *p)
1592 {
1593 	struct lacp_port_info *pi = &p->p_partner;
1594 
1595 	pi->lacp_sysid.lacp_sysid_priority = htons(0);
1596 	memset(pi->lacp_sysid.lacp_sysid_mac, 0,
1597 	    sizeof(pi->lacp_sysid.lacp_sysid_mac));
1598 
1599 	pi->lacp_key = htons(0);
1600 
1601 	pi->lacp_portid.lacp_portid_priority = htons(0);
1602 	pi->lacp_portid.lacp_portid_number = htons(0);
1603 
1604 	SET(p->p_actor_state, LACP_STATE_DEFAULTED);
1605 
1606 	pi->lacp_state = LACP_STATE_AGGREGATION | LACP_STATE_SYNC;
1607 	if (sc->sc_lacp_timeout == AGGR_LACP_TIMEOUT_FAST)
1608 		SET(pi->lacp_state, LACP_STATE_TIMEOUT);
1609 	if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE)
1610 		SET(pi->lacp_state, LACP_STATE_ACTIVITY);
1611 
1612 	/* notify Mux */
1613 	aggr_mux(sc, p, LACP_MUX_E_NOT_COLLECTING);
1614 	aggr_mux(sc, p, LACP_MUX_E_SYNC);
1615 }
1616 
1617 static void
1618 aggr_update_default_selected(struct aggr_softc *sc, struct aggr_port *p)
1619 {
1620 	const struct lacp_port_info *pi = &p->p_partner;
1621 
1622 	if ((pi->lacp_portid.lacp_portid_number == htons(0)) &&
1623 	    (pi->lacp_portid.lacp_portid_priority == htons(0)) &&
1624 	    ETHER_IS_ANYADDR(pi->lacp_sysid.lacp_sysid_mac) &&
1625 	    (pi->lacp_sysid.lacp_sysid_priority == htons(0)) &&
1626 	    (pi->lacp_key == htons(0)) &&
1627 	    ISSET(pi->lacp_state, LACP_STATE_AGGREGATION))
1628 		return;
1629 
1630 	aggr_unselected(p);
1631 	aggr_selection_logic(sc, p); /* restart */
1632 }
1633 
1634 static int
1635 aggr_update_ntt(struct aggr_port *p, const struct lacp_du *lacpdu)
1636 {
1637 	struct aggr_softc *sc = p->p_aggr;
1638 	struct arpcom *ac = &sc->sc_ac;
1639 	struct ifnet *ifp = &ac->ac_if;
1640 	struct ifnet *ifp0 = p->p_ifp0;
1641 	const struct lacp_port_info *pi = &lacpdu->lacp_partner_info;
1642 	uint8_t bits = LACP_STATE_ACTIVITY | LACP_STATE_TIMEOUT |
1643 	    LACP_STATE_SYNC | LACP_STATE_AGGREGATION;
1644 	uint8_t state = p->p_actor_state;
1645 	int sync = 0;
1646 
1647 	if (pi->lacp_portid.lacp_portid_number != htons(ifp0->if_index))
1648 		goto ntt;
1649 	if (pi->lacp_portid.lacp_portid_priority !=
1650 	    htons(sc->sc_lacp_port_prio))
1651 		goto ntt;
1652 	if (!ETHER_IS_EQ(pi->lacp_sysid.lacp_sysid_mac, ac->ac_enaddr))
1653 		goto ntt;
1654 	if (pi->lacp_sysid.lacp_sysid_priority !=
1655 	    htons(sc->sc_lacp_prio))
1656 		goto ntt;
1657 	if (pi->lacp_key != htons(ifp->if_index))
1658 		goto ntt;
1659 	if (ISSET(pi->lacp_state, LACP_STATE_SYNC) !=
1660 	    ISSET(state, LACP_STATE_SYNC))
1661 		goto ntt;
1662 	sync = 1;
1663 
1664 	if (sc->sc_lacp_timeout == AGGR_LACP_TIMEOUT_FAST)
1665 		SET(state, LACP_STATE_TIMEOUT);
1666 	if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE)
1667 		SET(state, LACP_STATE_ACTIVITY);
1668 
1669 	if (ISSET(pi->lacp_state, bits) != ISSET(state, bits))
1670 		goto ntt;
1671 
1672 	return (1);
1673 
1674 ntt:
1675 	aggr_ntt(p);
1676 
1677 	return (sync);
1678 }
1679 
1680 static void
1681 aggr_recordpdu(struct aggr_port *p, const struct lacp_du *lacpdu, int sync)
1682 {
1683 	struct aggr_softc *sc = p->p_aggr;
1684 	const struct lacp_port_info *rpi = &lacpdu->lacp_actor_info;
1685 	struct lacp_port_info *lpi = &p->p_partner;
1686 	int active = ISSET(rpi->lacp_state, LACP_STATE_ACTIVITY) ||
1687 	    (ISSET(p->p_actor_state, LACP_STATE_ACTIVITY) &&
1688 	     ISSET(lacpdu->lacp_partner_info.lacp_state, LACP_STATE_ACTIVITY));
1689 
1690 	lpi->lacp_portid.lacp_portid_number =
1691 	    rpi->lacp_portid.lacp_portid_number;
1692 	lpi->lacp_portid.lacp_portid_priority =
1693 	    rpi->lacp_portid.lacp_portid_priority;
1694 	memcpy(lpi->lacp_sysid.lacp_sysid_mac,
1695 	    rpi->lacp_sysid.lacp_sysid_mac,
1696 	    sizeof(lpi->lacp_sysid.lacp_sysid_mac));
1697 	lpi->lacp_sysid.lacp_sysid_priority =
1698 	    rpi->lacp_sysid.lacp_sysid_priority;
1699 	lpi->lacp_key = rpi->lacp_key;
1700 	lpi->lacp_state = rpi->lacp_state & ~LACP_STATE_SYNC;
1701 
1702 	CLR(p->p_actor_state, LACP_STATE_DEFAULTED);
1703 
1704 	if (active && ISSET(rpi->lacp_state, LACP_STATE_SYNC) && sync) {
1705 		SET(p->p_partner_state, LACP_STATE_SYNC);
1706 		aggr_mux(sc, p, LACP_MUX_E_SYNC);
1707 	} else {
1708 		CLR(p->p_partner_state, LACP_STATE_SYNC);
1709 		aggr_mux(sc, p, LACP_MUX_E_NOT_SYNC);
1710 	}
1711 }
1712 
1713 static void
1714 aggr_marker_response(struct aggr_port *p, struct mbuf *m)
1715 {
1716 	struct aggr_softc *sc = p->p_aggr;
1717 	struct arpcom *ac = &sc->sc_ac;
1718 	struct ifnet *ifp0 = p->p_ifp0;
1719 	struct marker_pdu *mpdu;
1720 	struct ether_header *eh;
1721 
1722 	mpdu = mtod(m, struct marker_pdu *);
1723 	mpdu->marker_info_tlv.lacp_tlv_type = MARKER_T_RESPONSE;
1724 
1725 	m = m_prepend(m, sizeof(*eh), M_DONTWAIT);
1726 	if (m == NULL)
1727 		return;
1728 
1729 	eh = mtod(m, struct ether_header *);
1730 	memcpy(eh->ether_dhost, lacp_address_slow, sizeof(eh->ether_dhost));
1731 	memcpy(eh->ether_shost, ac->ac_enaddr, sizeof(eh->ether_shost));
1732 	eh->ether_type = htons(ETHERTYPE_SLOW);
1733 
1734 	(void)if_enqueue(ifp0, m);
1735 }
1736 
1737 static void
1738 aggr_input_marker(struct aggr_port *p, struct mbuf *m)
1739 {
1740 	struct marker_pdu *mpdu;
1741 
1742 	if (m->m_len < sizeof(*mpdu)) {
1743 		m = m_pullup(m, sizeof(*mpdu));
1744 		if (m == NULL)
1745 			return;
1746 	}
1747 
1748 	mpdu = mtod(m, struct marker_pdu *);
1749 	switch (mpdu->marker_info_tlv.lacp_tlv_type) {
1750 	case MARKER_T_INFORMATION:
1751 		aggr_marker_response(p, m);
1752 		break;
1753 	default:
1754 		m_freem(m);
1755 		break;
1756 	}
1757 }
1758 
1759 static void
1760 aggr_rx(void *arg)
1761 {
1762 	struct aggr_port *p = arg;
1763 	struct mbuf_list ml;
1764 	struct mbuf *m;
1765 
1766 	mq_delist(&p->p_rxm_mq, &ml);
1767 
1768 	while ((m = ml_dequeue(&ml)) != NULL) {
1769 		struct ether_slowproto_hdr *sph;
1770 
1771 		/* aggr_input has checked eh already */
1772 		m_adj(m, sizeof(struct ether_header));
1773 
1774 		sph = mtod(m, struct ether_slowproto_hdr *);
1775 		switch (sph->sph_subtype) {
1776 		case SLOWPROTOCOLS_SUBTYPE_LACP:
1777 			aggr_input_lacpdu(p, m);
1778 			break;
1779 		case SLOWPROTOCOLS_SUBTYPE_LACP_MARKER:
1780 			aggr_input_marker(p, m);
1781 			break;
1782 		default:
1783 			panic("unexpected slow protocol subtype");
1784 			/* NOTREACHED */
1785 		}
1786 	}
1787 }
1788 
1789 static void
1790 aggr_set_selected(struct aggr_port *p, enum aggr_port_selected s,
1791     enum lacp_mux_event ev)
1792 {
1793 	struct aggr_softc *sc = p->p_aggr;
1794 
1795 	if (p->p_selected != s) {
1796 		DPRINTF(sc, "%s %s: Selected %s -> %s\n",
1797 		    sc->sc_if.if_xname, p->p_ifp0->if_xname,
1798 		    aggr_port_selected_names[p->p_selected],
1799 		    aggr_port_selected_names[s]);
1800 		p->p_selected = s;
1801 	}
1802 	aggr_mux(sc, p, ev);
1803 }
1804 
1805 static void
1806 aggr_unselected(struct aggr_port *p)
1807 {
1808 	aggr_set_selected(p, AGGR_PORT_UNSELECTED, LACP_MUX_E_UNSELECTED);
1809 }
1810 
1811 static inline void
1812 aggr_selected(struct aggr_port *p)
1813 {
1814 	aggr_set_selected(p, AGGR_PORT_SELECTED, LACP_MUX_E_SELECTED);
1815 }
1816 
1817 #ifdef notyet
1818 static inline void
1819 aggr_standby(struct aggr_port *p)
1820 {
1821 	aggr_set_selected(p, AGGR_PORT_STANDBY, LACP_MUX_E_STANDBY);
1822 }
1823 #endif
1824 
1825 static void
1826 aggr_selection_logic(struct aggr_softc *sc, struct aggr_port *p)
1827 {
1828 	const struct lacp_port_info *pi;
1829 	struct arpcom *ac = &sc->sc_ac;
1830 	struct ifnet *ifp = &ac->ac_if;
1831 	const uint8_t *mac;
1832 
1833 	if (p->p_rxm_state != LACP_RXM_S_CURRENT) {
1834 		DPRINTF(sc, "%s %s: selection logic: unselected (rxm !%s)\n",
1835 		    ifp->if_xname, p->p_ifp0->if_xname,
1836 		    lacp_rxm_state_names[LACP_RXM_S_CURRENT]);
1837 		goto unselected;
1838 	}
1839 
1840 	pi = &p->p_partner;
1841 	if (pi->lacp_key == htons(0)) {
1842 		DPRINTF(sc, "%s %s: selection logic: unselected "
1843 		    "(partner key == 0)\n",
1844 		    ifp->if_xname, p->p_ifp0->if_xname);
1845 		goto unselected;
1846 	}
1847 
1848 	/*
1849 	 * aggr(4) does not support individual interfaces
1850 	 */
1851 	if (!ISSET(pi->lacp_state, LACP_STATE_AGGREGATION)) {
1852 		DPRINTF(sc, "%s %s: selection logic: unselected "
1853 		    "(partner state is Individual)\n",
1854 		    ifp->if_xname, p->p_ifp0->if_xname);
1855 		goto unselected;
1856 	}
1857 
1858 	/*
1859 	 * Any pair of Aggregation Ports that are members of the same
1860 	 * LAG, but are connected together by the same link, shall not
1861 	 * select the same Aggregator
1862 	 */
1863 
1864 	mac = pi->lacp_sysid.lacp_sysid_mac;
1865 	if (ETHER_IS_EQ(mac, ac->ac_enaddr) &&
1866 	    pi->lacp_key == htons(ifp->if_index)) {
1867 		DPRINTF(sc, "%s %s: selection logic: unselected "
1868 		    "(partner sysid !eq)\n",
1869 		    ifp->if_xname, p->p_ifp0->if_xname);
1870 		goto unselected;
1871 	}
1872 
1873 	if (!TAILQ_EMPTY(&sc->sc_muxen)) {
1874 		/* an aggregation has already been selected */
1875 		if (!ETHER_IS_EQ(mac, sc->sc_partner_system.lacp_sysid_mac) ||
1876 		    sc->sc_partner_key != pi->lacp_key) {
1877 			DPRINTF(sc, "%s %s: selection logic: unselected "
1878 			    "(partner sysid != selection)\n",
1879 			    ifp->if_xname, p->p_ifp0->if_xname);
1880 			goto unselected;
1881 		}
1882 	}
1883 
1884 	aggr_selected(p);
1885 	return;
1886 
1887 unselected:
1888 	aggr_unselected(p);
1889 }
1890 
1891 static void
1892 aggr_mux(struct aggr_softc *sc, struct aggr_port *p, enum lacp_mux_event ev)
1893 {
1894 	int ntt = 0;
1895 
1896 	/*
1897 	 * the mux can move through multiple states based on a
1898 	 * single event, so loop until the event is completely consumed.
1899 	 * debounce NTT = TRUE through the multiple state transitions.
1900 	 */
1901 
1902 	while (aggr_mux_ev(sc, p, ev, &ntt) != 0)
1903 		;
1904 
1905 	if (ntt)
1906 		aggr_ntt(p);
1907 }
1908 
1909 #ifdef notyet
1910 static int
1911 aggr_ready_n(struct aggr_port *p)
1912 {
1913 	return (p->p_mux_state == LACP_MUX_S_WAITING &&
1914 	    !timeout_pending(&p->p_wait_while_timer));
1915 }
1916 #endif
1917 
1918 static inline int
1919 aggr_ready(struct aggr_softc *sc)
1920 {
1921 	return (1);
1922 }
1923 
1924 static void
1925 aggr_disable_distributing(struct aggr_softc *sc, struct aggr_port *p)
1926 {
1927 	if (!p->p_distributing)
1928 		return;
1929 
1930 	sc->sc_ndistributing--;
1931 	TAILQ_REMOVE(&sc->sc_distributing, p, p_entry_distributing);
1932 	p->p_distributing = 0;
1933 
1934 	aggr_map(sc);
1935 
1936 	DPRINTF(sc, "%s %s: distributing disabled\n",
1937 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
1938 }
1939 
1940 static void
1941 aggr_enable_distributing(struct aggr_softc *sc, struct aggr_port *p)
1942 {
1943 	if (p->p_distributing)
1944 		return;
1945 
1946 	/* check the LAG ID? */
1947 
1948 	p->p_distributing = 1;
1949 	TAILQ_INSERT_TAIL(&sc->sc_distributing, p, p_entry_distributing);
1950 	sc->sc_ndistributing++;
1951 
1952 	aggr_map(sc);
1953 
1954 	DPRINTF(sc, "%s %s: distributing enabled\n",
1955 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
1956 }
1957 
1958 static void
1959 aggr_disable_collecting(struct aggr_softc *sc, struct aggr_port *p)
1960 {
1961 	if (!p->p_collecting)
1962 		return;
1963 
1964 	p->p_collecting = 0;
1965 
1966 	DPRINTF(sc, "%s %s: collecting disabled\n",
1967 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
1968 }
1969 
1970 static void
1971 aggr_enable_collecting(struct aggr_softc *sc, struct aggr_port *p)
1972 {
1973 	if (p->p_collecting)
1974 		return;
1975 
1976 	p->p_collecting = 1;
1977 
1978 	DPRINTF(sc, "%s %s: collecting enabled\n",
1979 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
1980 }
1981 
1982 static void
1983 aggr_attach_mux(struct aggr_softc *sc, struct aggr_port *p)
1984 {
1985 	const struct lacp_port_info *pi = &p->p_partner;
1986 
1987 	if (p->p_muxed)
1988 		return;
1989 
1990 	p->p_muxed = 1;
1991 	if (TAILQ_EMPTY(&sc->sc_muxen)) {
1992 		KASSERT(sc->sc_partner_key == htons(0));
1993 		sc->sc_partner_system = pi->lacp_sysid;
1994 		sc->sc_partner_key = pi->lacp_key;
1995 	}
1996 
1997 	TAILQ_INSERT_TAIL(&sc->sc_muxen, p, p_entry_muxen);
1998 
1999 	DPRINTF(sc, "%s %s: mux attached\n",
2000 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
2001 }
2002 
2003 static void
2004 aggr_detach_mux(struct aggr_softc *sc, struct aggr_port *p)
2005 {
2006 	if (!p->p_muxed)
2007 		return;
2008 
2009 	p->p_muxed = 0;
2010 
2011 	TAILQ_REMOVE(&sc->sc_muxen, p, p_entry_muxen);
2012 	if (TAILQ_EMPTY(&sc->sc_muxen)) {
2013 		memset(&sc->sc_partner_system.lacp_sysid_mac, 0,
2014 		    sizeof(sc->sc_partner_system.lacp_sysid_mac));
2015 		sc->sc_partner_system.lacp_sysid_priority = htons(0);
2016 		sc->sc_partner_key = htons(0);
2017 	}
2018 
2019 	DPRINTF(sc, "%s %s: mux detached\n",
2020 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
2021 }
2022 
2023 static int
2024 aggr_mux_ev(struct aggr_softc *sc, struct aggr_port *p, enum lacp_mux_event ev,
2025     int *ntt)
2026 {
2027 	enum lacp_mux_state nstate = LACP_MUX_S_DETACHED;
2028 
2029 	switch (p->p_mux_state) {
2030 	case LACP_MUX_S_BEGIN:
2031 		KASSERT(ev == LACP_MUX_E_BEGIN);
2032 		nstate = LACP_MUX_S_DETACHED;
2033 		break;
2034 	case LACP_MUX_S_DETACHED:
2035 		switch (ev) {
2036 		case LACP_MUX_E_SELECTED:
2037 		case LACP_MUX_E_STANDBY:
2038 			nstate = LACP_MUX_S_WAITING;
2039 			break;
2040 		default:
2041 			return (0);
2042 		}
2043 		break;
2044 	case LACP_MUX_S_WAITING:
2045 		switch (ev) {
2046 		case LACP_MUX_E_UNSELECTED:
2047 			nstate = LACP_MUX_S_DETACHED;
2048 			break;
2049 		case LACP_MUX_E_SELECTED:
2050 		case LACP_MUX_E_READY:
2051 			if (aggr_ready(sc) &&
2052 			    p->p_selected == AGGR_PORT_SELECTED) {
2053 				nstate = LACP_MUX_S_ATTACHED;
2054 				break;
2055 			}
2056 			/* FALLTHROUGH */
2057 		default:
2058 			return (0);
2059 		}
2060 		break;
2061 	case LACP_MUX_S_ATTACHED:
2062 		switch (ev) {
2063 		case LACP_MUX_E_UNSELECTED:
2064 		case LACP_MUX_E_STANDBY:
2065 			nstate = LACP_MUX_S_DETACHED;
2066 			break;
2067 		case LACP_MUX_E_SELECTED:
2068 		case LACP_MUX_E_SYNC:
2069 			if (p->p_selected == AGGR_PORT_SELECTED &&
2070 			    ISSET(p->p_partner_state, LACP_STATE_SYNC)) {
2071 				nstate = LACP_MUX_S_COLLECTING;
2072 				break;
2073 			}
2074 			/* FALLTHROUGH */
2075 		default:
2076 			return (0);
2077 		}
2078 		break;
2079 	case LACP_MUX_S_COLLECTING:
2080 		switch (ev) {
2081 		case LACP_MUX_E_UNSELECTED:
2082 		case LACP_MUX_E_STANDBY:
2083 		case LACP_MUX_E_NOT_SYNC:
2084 			nstate = LACP_MUX_S_ATTACHED;
2085 			break;
2086 		case LACP_MUX_E_SELECTED:
2087 		case LACP_MUX_E_SYNC:
2088 		case LACP_MUX_E_COLLECTING:
2089 			if (p->p_selected == AGGR_PORT_SELECTED &&
2090 			    ISSET(p->p_partner_state, LACP_STATE_SYNC) &&
2091 			    ISSET(p->p_partner_state, LACP_STATE_COLLECTING)) {
2092 				nstate = LACP_MUX_S_DISTRIBUTING;
2093 				break;
2094 			}
2095 			/* FALLTHROUGH */
2096 		default:
2097 			return (0);
2098 		}
2099 		break;
2100 	case LACP_MUX_S_DISTRIBUTING:
2101 		switch (ev) {
2102 		case LACP_MUX_E_UNSELECTED:
2103 		case LACP_MUX_E_STANDBY:
2104 		case LACP_MUX_E_NOT_SYNC:
2105 		case LACP_MUX_E_NOT_COLLECTING:
2106 			nstate = LACP_MUX_S_COLLECTING;
2107 			break;
2108 		default:
2109 			return (0);
2110 		}
2111 		break;
2112 	}
2113 
2114 	DPRINTF(sc, "%s %s mux: %s (%s) -> %s\n",
2115 	    sc->sc_if.if_xname, p->p_ifp0->if_xname,
2116 	    lacp_mux_state_names[p->p_mux_state], lacp_mux_event_names[ev],
2117 	    lacp_mux_state_names[nstate]);
2118 
2119 	/* act on the new state */
2120 	switch (nstate) {
2121 	case LACP_MUX_S_BEGIN:
2122 		panic("unexpected mux nstate BEGIN");
2123 		/* NOTREACHED */
2124 	case LACP_MUX_S_DETACHED:
2125 		/*
2126 		 * Detach_Mux_From_Aggregator();
2127 		 * Actor.Sync = FALSE;
2128 		 * Disable_Distributing();
2129 		 * Actor.Distributing = FALSE;
2130 		 * Actor.Collecting = FALSE;
2131 		 * Disable_Collecting();
2132 		 * NTT = TRUE;
2133 		 */
2134 		aggr_detach_mux(sc, p);
2135 		CLR(p->p_actor_state, LACP_STATE_SYNC);
2136 		aggr_disable_distributing(sc, p);
2137 		CLR(p->p_actor_state, LACP_STATE_DISTRIBUTING);
2138 		CLR(p->p_actor_state, LACP_STATE_COLLECTING);
2139 		aggr_disable_collecting(sc, p);
2140 		*ntt = 1;
2141 		break;
2142 	case LACP_MUX_S_WAITING:
2143 		/*
2144 		 * Start wait_while_timer
2145 		 */
2146 		timeout_add_sec(&p->p_wait_while_timer,
2147 		    LACP_AGGREGATION_WAIT_TIME);
2148 		break;
2149 	case LACP_MUX_S_ATTACHED:
2150 		/*
2151 		 * Attach_Mux_To_Aggregator();
2152 		 * Actor.Sync = TRUE;
2153 		 * Actor.Collecting = FALSE;
2154 		 * Disable_Collecting();
2155 		 * NTT = TRUE;
2156 		 */
2157 		aggr_attach_mux(sc, p);
2158 		SET(p->p_actor_state, LACP_STATE_SYNC);
2159 		CLR(p->p_actor_state, LACP_STATE_COLLECTING);
2160 		aggr_disable_collecting(sc, p);
2161 		*ntt = 1;
2162 		break;
2163 
2164 	case LACP_MUX_S_COLLECTING:
2165 		/*
2166 		 * Enable_Collecting();
2167 		 * Actor.Collecting = TRUE;
2168 		 * Disable_Distributing();
2169 		 * Actor.Distributing = FALSE;
2170 		 * NTT = TRUE;
2171 		 */
2172 		aggr_enable_collecting(sc, p);
2173 		SET(p->p_actor_state, LACP_STATE_COLLECTING);
2174 		aggr_disable_distributing(sc, p);
2175 		CLR(p->p_actor_state, LACP_STATE_DISTRIBUTING);
2176 		*ntt = 1;
2177 		break;
2178 	case LACP_MUX_S_DISTRIBUTING:
2179 		/*
2180 		 * Actor.Distributing = TRUE;
2181 		 * Enable_Distributing();
2182 		 */
2183 		SET(p->p_actor_state, LACP_STATE_DISTRIBUTING);
2184 		aggr_enable_distributing(sc, p);
2185 		break;
2186 	}
2187 
2188 	p->p_mux_state = nstate;
2189 
2190 	return (1);
2191 }
2192 
2193 static void
2194 aggr_rxm_ev(struct aggr_softc *sc, struct aggr_port *p,
2195     enum lacp_rxm_event ev, const struct lacp_du *lacpdu)
2196 {
2197 	unsigned int port_disabled = 0;
2198 	enum lacp_rxm_state nstate = LACP_RXM_S_BEGIN;
2199 
2200 	KASSERT((ev == LACP_RXM_E_LACPDU) == (lacpdu != NULL));
2201 
2202 	/* global transitions */
2203 
2204 	switch (ev) {
2205 	case LACP_RXM_E_NOT_PORT_ENABLED:
2206 		port_disabled = !aggr_port_moved(sc, p);
2207 		break;
2208 	case LACP_RXM_E_NOT_PORT_MOVED:
2209 		port_disabled = !aggr_port_enabled(p);
2210 		break;
2211 	default:
2212 		break;
2213 	}
2214 
2215 	if (port_disabled)
2216 		nstate = LACP_RXM_S_PORT_DISABLED;
2217 	else switch (p->p_rxm_state) { /* local state transitions */
2218 	case LACP_RXM_S_BEGIN:
2219 		KASSERT(ev == LACP_RXM_E_BEGIN);
2220 		nstate = LACP_RXM_S_INITIALIZE;
2221 		break;
2222 	case LACP_RXM_S_INITIALIZE:
2223 		/* this should only be handled via UCT in nstate handling */
2224 		panic("unexpected rxm state INITIALIZE");
2225 
2226 	case LACP_RXM_S_PORT_DISABLED:
2227 		switch (ev) {
2228 		case LACP_RXM_E_PORT_MOVED:
2229 			nstate = LACP_RXM_S_INITIALIZE;
2230 			break;
2231 		case LACP_RXM_E_PORT_ENABLED:
2232 			nstate = aggr_lacp_enabled(sc) ?
2233 			    LACP_RXM_S_EXPIRED : LACP_RXM_S_LACP_DISABLED;
2234 			break;
2235 		case LACP_RXM_E_LACP_ENABLED:
2236 			if (!aggr_port_enabled(p))
2237 				return;
2238 			nstate = LACP_RXM_S_EXPIRED;
2239 			break;
2240 		case LACP_RXM_E_NOT_LACP_ENABLED:
2241 			if (!aggr_port_enabled(p))
2242 				return;
2243 			nstate = LACP_RXM_S_LACP_DISABLED;
2244 			break;
2245 		default:
2246 			return;
2247 		}
2248 		break;
2249 	case LACP_RXM_S_EXPIRED:
2250 		switch (ev) {
2251 		case LACP_RXM_E_LACPDU:
2252 			nstate = LACP_RXM_S_CURRENT;
2253 			break;
2254 		case LACP_RXM_E_TIMER_EXPIRED:
2255 			nstate = LACP_RXM_S_DEFAULTED;
2256 			break;
2257 		default:
2258 			return;
2259 		}
2260 		break;
2261 	case LACP_RXM_S_LACP_DISABLED:
2262 		switch (ev) {
2263 		case LACP_RXM_E_LACP_ENABLED:
2264 			nstate = LACP_RXM_S_PORT_DISABLED;
2265 			break;
2266 		default:
2267 			return;
2268 		}
2269 		break;
2270 	case LACP_RXM_S_DEFAULTED:
2271 		switch (ev) {
2272 		case LACP_RXM_E_LACPDU:
2273 			nstate = LACP_RXM_S_CURRENT;
2274 			break;
2275 		default:
2276 			return;
2277 		}
2278 		break;
2279 	case LACP_RXM_S_CURRENT:
2280 		switch (ev) {
2281 		case LACP_RXM_E_TIMER_EXPIRED:
2282 			nstate = LACP_RXM_S_EXPIRED;
2283 			break;
2284 		case LACP_RXM_E_LACPDU:
2285 			nstate = LACP_RXM_S_CURRENT;
2286 			break;
2287 		default:
2288 			return;
2289 		}
2290 		break;
2291 	}
2292 
2293 uct:
2294 	if (p->p_rxm_state != nstate) {
2295 		DPRINTF(sc, "%s %s rxm: %s (%s) -> %s\n",
2296 		    sc->sc_if.if_xname, p->p_ifp0->if_xname,
2297 		    lacp_rxm_state_names[p->p_rxm_state],
2298 		    lacp_rxm_event_names[ev],
2299 		    lacp_rxm_state_names[nstate]);
2300 	}
2301 
2302 	/* record the new state */
2303 	p->p_rxm_state = nstate;
2304 
2305 	/* act on the new state */
2306 	switch (nstate) {
2307 	case LACP_RXM_S_BEGIN:
2308 		panic("unexpected rxm nstate BEGIN");
2309 		/* NOTREACHED */
2310 	case LACP_RXM_S_INITIALIZE:
2311 		/*
2312 		 * Selected = UNSELECTED;
2313 		 * recordDefault();
2314 		 * Actor_Oper_Port_State.Expired = FALSE;
2315 		 * port_moved = FALSE;
2316 		 */
2317 		aggr_unselected(p);
2318 		aggr_record_default(sc, p);
2319 		CLR(p->p_actor_state, LACP_STATE_EXPIRED);
2320 
2321 		ev = LACP_RXM_E_UCT;
2322 		nstate = LACP_RXM_S_PORT_DISABLED;
2323 		goto uct;
2324 		/* NOTREACHED */
2325 	case LACP_RXM_S_PORT_DISABLED:
2326 		/*
2327 		 * Partner_Oper_Port_State.Synchronization = FALSE;
2328 		 */
2329 		CLR(p->p_partner_state, LACP_STATE_SYNC);
2330 		aggr_mux(sc, p, LACP_MUX_E_NOT_SYNC);
2331 		break;
2332 	case LACP_RXM_S_EXPIRED:
2333 		/*
2334 		 * Partner_Oper_Port_State.Synchronization = FALSE;
2335 		 * Partner_Oper_Port_State.LACP_Timeout = Short Timeout;
2336 		 * start current_while_timer(Short Timeout);
2337 		 * Actor_Oper_Port_State.Expired = TRUE;
2338 		 */
2339 
2340 		CLR(p->p_partner_state, LACP_STATE_SYNC);
2341 		aggr_mux(sc, p, LACP_MUX_E_NOT_SYNC);
2342 		aggr_set_partner_timeout(p, AGGR_LACP_TIMEOUT_FAST);
2343 		aggr_start_current_while_timer(p, AGGR_LACP_TIMEOUT_FAST);
2344 		SET(p->p_actor_state, LACP_STATE_EXPIRED);
2345 
2346 		break;
2347 	case LACP_RXM_S_LACP_DISABLED:
2348 		/*
2349 		 * Selected = UNSELECTED;
2350 		 * recordDefault();
2351 		 * Partner_Oper_Port_State.Aggregation = FALSE;
2352 		 * Actor_Oper_Port_State.Expired = FALSE;
2353 		 */
2354 		aggr_unselected(p);
2355 		aggr_record_default(sc, p);
2356 		CLR(p->p_partner_state, LACP_STATE_AGGREGATION);
2357 		CLR(p->p_actor_state, LACP_STATE_EXPIRED);
2358 		break;
2359 	case LACP_RXM_S_DEFAULTED:
2360 		/*
2361 		 * update_Default_Selected();
2362 		 * recordDefault();
2363 		 * Actor_Oper_Port_State.Expired = FALSE;
2364 		 */
2365 		aggr_update_default_selected(sc, p);
2366 		aggr_record_default(sc, p);
2367 		CLR(p->p_actor_state, LACP_STATE_EXPIRED);
2368 		break;
2369 	case LACP_RXM_S_CURRENT: {
2370 		/*
2371 		 * update_Selected();
2372 		 * update_NTT();
2373 		 * if (Actor_System_LACP_Version >=2 ) recordVersionNumber();
2374 		 * recordPDU();
2375 		 * start current_while_timer(
2376 		 *     Actor_Oper_Port_State.LACP_Timeout);
2377 		 * Actor_Oper_Port_State.Expired = FALSE;
2378 		 */
2379 		int sync;
2380 
2381 		aggr_update_selected(sc, p, lacpdu);
2382 		sync = aggr_update_ntt(p, lacpdu);
2383 		/* don't support v2 yet */
2384 		aggr_recordpdu(p, lacpdu, sync);
2385 		aggr_start_current_while_timer(p, sc->sc_lacp_timeout);
2386 		CLR(p->p_actor_state, LACP_STATE_EXPIRED);
2387 
2388 		if (p->p_selected == AGGR_PORT_UNSELECTED)
2389 			aggr_selection_logic(sc, p); /* restart */
2390 
2391 		}
2392 		break;
2393 	}
2394 }
2395 
2396 static int
2397 aggr_up(struct aggr_softc *sc)
2398 {
2399 	struct ifnet *ifp = &sc->sc_if;
2400 	struct aggr_port *p;
2401 
2402 	NET_ASSERT_LOCKED();
2403 	KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING));
2404 
2405 	SET(ifp->if_flags, IFF_RUNNING); /* LACP_Enabled = TRUE */
2406 
2407 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2408 		aggr_rxm(sc, p, LACP_RXM_E_LACP_ENABLED);
2409 		aggr_p_linkch(p);
2410 	}
2411 
2412 	/* start the Periodic Transmission machine */
2413 	if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE) {
2414 		TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2415 			if (!aggr_port_enabled(p))
2416 				continue;
2417 
2418 			timeout_add_sec(&p->p_ptm_tx,
2419 			    aggr_periodic_times[sc->sc_lacp_timeout]);
2420 		}
2421 	}
2422 
2423 	return (ENETRESET);
2424 }
2425 
2426 static int
2427 aggr_iff(struct aggr_softc *sc)
2428 {
2429 	struct ifnet *ifp = &sc->sc_if;
2430 	unsigned int promisc = ISSET(ifp->if_flags, IFF_PROMISC);
2431 
2432 	NET_ASSERT_LOCKED();
2433 
2434 	if (promisc != sc->sc_promisc) {
2435 		struct aggr_port *p;
2436 
2437 		rw_enter_read(&sc->sc_lock);
2438 		TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2439 			struct ifnet *ifp0 = p->p_ifp0;
2440 			if (ifpromisc(ifp0, promisc) != 0) {
2441 				log(LOG_WARNING, "%s iff %s: "
2442 				    "unable to turn promisc %s\n",
2443 				    ifp->if_xname, ifp0->if_xname,
2444 				    promisc ? "on" : "off");
2445 			}
2446 		}
2447 		rw_exit_read(&sc->sc_lock);
2448 
2449 		sc->sc_promisc = promisc;
2450 	}
2451 
2452 	return (0);
2453 }
2454 
2455 static int
2456 aggr_down(struct aggr_softc *sc)
2457 {
2458 	struct ifnet *ifp = &sc->sc_if;
2459 	struct aggr_port *p;
2460 
2461 	NET_ASSERT_LOCKED();
2462 	CLR(ifp->if_flags, IFF_RUNNING); /* LACP_Enabled = FALSE */
2463 
2464 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2465 		aggr_rxm(sc, p, LACP_RXM_E_NOT_LACP_ENABLED);
2466 
2467 		/* stop the Periodic Transmission machine */
2468 		timeout_del(&p->p_ptm_tx);
2469 
2470 		/* stop the Mux machine */
2471 		aggr_mux(sc, p, LACP_MUX_E_UNSELECTED);
2472 
2473 		/* stop the Transmit machine */
2474 		timeout_del(&p->p_txm_ntt);
2475 	}
2476 
2477 	KASSERT(TAILQ_EMPTY(&sc->sc_distributing));
2478 	KASSERT(sc->sc_ndistributing == 0);
2479 	KASSERT(SMR_PTR_GET_LOCKED(&sc->sc_map) == NULL);
2480 
2481 	return (ENETRESET);
2482 }
2483 
2484 static int
2485 aggr_set_lladdr(struct aggr_softc *sc, const struct ifreq *ifr)
2486 {
2487 	struct ifnet *ifp = &sc->sc_if;
2488 	struct aggr_port *p;
2489 	const uint8_t *lladdr = ifr->ifr_addr.sa_data;
2490 
2491 	rw_enter_read(&sc->sc_lock);
2492 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2493 		if (aggr_p_setlladdr(p, lladdr) != 0) {
2494 			struct ifnet *ifp0 = p->p_ifp0;
2495 			log(LOG_WARNING, "%s setlladdr %s: "
2496 			    "unable to set lladdr\n",
2497 			    ifp->if_xname, ifp0->if_xname);
2498 		}
2499 	}
2500 	rw_exit_read(&sc->sc_lock);
2501 
2502 	return (0);
2503 }
2504 
2505 static int
2506 aggr_set_mtu(struct aggr_softc *sc, uint32_t mtu)
2507 {
2508 	struct ifnet *ifp = &sc->sc_if;
2509 	struct aggr_port *p;
2510 
2511 	if (mtu < ETHERMIN || mtu > ifp->if_hardmtu)
2512 		return (EINVAL);
2513 
2514 	ifp->if_mtu = mtu;
2515 
2516 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2517 		if (aggr_p_set_mtu(p, mtu) != 0) {
2518 			struct ifnet *ifp0 = p->p_ifp0;
2519 			log(LOG_WARNING, "%s %s: unable to set mtu %u\n",
2520 			    ifp->if_xname, ifp0->if_xname, mtu);
2521 		}
2522 	}
2523 
2524 	return (0);
2525 }
2526 
2527 static int
2528 aggr_group(struct aggr_softc *sc, struct aggr_port *p, u_long cmd)
2529 {
2530 	struct ifnet *ifp0 = p->p_ifp0;
2531 	struct ifreq ifr;
2532 	struct sockaddr *sa;
2533 
2534 	memset(&ifr, 0, sizeof(ifr));
2535 
2536 	/* make it convincing */
2537 	CTASSERT(sizeof(ifr.ifr_name) == sizeof(ifp0->if_xname));
2538 	memcpy(ifr.ifr_name, ifp0->if_xname, sizeof(ifr.ifr_name));
2539 
2540 	sa = &ifr.ifr_addr;
2541 	CTASSERT(sizeof(sa->sa_data) >= sizeof(lacp_address_slow));
2542 
2543 	sa->sa_family = AF_UNSPEC;
2544 	memcpy(sa->sa_data, lacp_address_slow, sizeof(lacp_address_slow));
2545 
2546 	return ((*p->p_ioctl)(ifp0, cmd, (caddr_t)&ifr));
2547 }
2548 
2549 static int
2550 aggr_multi(struct aggr_softc *sc, struct aggr_port *p,
2551     const struct aggr_multiaddr *ma, u_long cmd)
2552 {
2553 	struct ifnet *ifp0 = p->p_ifp0;
2554 	struct {
2555 		char			if_name[IFNAMSIZ];
2556 		struct sockaddr_storage if_addr;
2557 	} ifr;
2558 
2559 	memset(&ifr, 0, sizeof(ifr));
2560 
2561 	/* make it convincing */
2562 	CTASSERT(sizeof(ifr.if_name) == sizeof(ifp0->if_xname));
2563 	memcpy(ifr.if_name, ifp0->if_xname, sizeof(ifr.if_name));
2564 
2565 	ifr.if_addr = ma->m_addr;
2566 
2567 	return ((*p->p_ioctl)(ifp0, cmd, (caddr_t)&ifr));
2568 }
2569 
2570 static void
2571 aggr_media_status(struct ifnet *ifp, struct ifmediareq *imr)
2572 {
2573 	struct aggr_softc *sc = ifp->if_softc;
2574 
2575 	imr->ifm_status = IFM_AVALID;
2576 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
2577 
2578 	smr_read_enter(); /* there's no reason to block... */
2579 	if (SMR_PTR_GET(&sc->sc_map) != NULL)
2580 		imr->ifm_status |= IFM_ACTIVE;
2581 	smr_read_leave();
2582 }
2583 
2584 static int
2585 aggr_media_change(struct ifnet *ifp)
2586 {
2587 	return (EOPNOTSUPP);
2588 }
2589 
2590 static void
2591 aggr_update_capabilities(struct aggr_softc *sc)
2592 {
2593 	struct aggr_port *p;
2594 	uint32_t hardmtu = ETHER_MAX_HARDMTU_LEN;
2595 	uint32_t capabilities = ~0;
2596 	int set = 0;
2597 
2598 	rw_enter_read(&sc->sc_lock);
2599 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2600 		struct ifnet *ifp0 = p->p_ifp0;
2601 
2602 		set = 1;
2603 		capabilities &= ifp0->if_capabilities;
2604 		if (ifp0->if_hardmtu < hardmtu)
2605 			hardmtu = ifp0->if_hardmtu;
2606 	}
2607 	rw_exit_read(&sc->sc_lock);
2608 
2609 	sc->sc_if.if_hardmtu = hardmtu;
2610 	sc->sc_if.if_capabilities = (set ? capabilities : 0);
2611 }
2612 
2613 static void
2614 aggr_ptm_tx(void *arg)
2615 {
2616 	struct aggr_port *p = arg;
2617 	unsigned int timeout;
2618 
2619 	aggr_ntt(p);
2620 
2621 	timeout = ISSET(p->p_partner_state, LACP_STATE_TIMEOUT) ?
2622 	    AGGR_LACP_TIMEOUT_FAST : AGGR_LACP_TIMEOUT_SLOW;
2623 	timeout_add_sec(&p->p_ptm_tx, aggr_periodic_times[timeout]);
2624 }
2625 
2626 static inline void
2627 aggr_lacp_tlv_set(struct lacp_tlv_hdr *tlv, uint8_t type, uint8_t len)
2628 {
2629 	tlv->lacp_tlv_type = type;
2630 	tlv->lacp_tlv_length = sizeof(*tlv) + len;
2631 }
2632 
2633 static void
2634 aggr_ntt_transmit(struct aggr_port *p)
2635 {
2636 	struct aggr_softc *sc = p->p_aggr;
2637 	struct arpcom *ac = &sc->sc_ac;
2638 	struct ifnet *ifp = &sc->sc_if;
2639 	struct ifnet *ifp0 = p->p_ifp0;
2640 	struct mbuf *m;
2641 	struct lacp_du *lacpdu;
2642 	struct lacp_port_info *pi;
2643 	struct lacp_collector_info *ci;
2644 	struct ether_header *eh;
2645 	int linkhdr = max_linkhdr + ETHER_ALIGN;
2646 	int len = linkhdr + sizeof(*eh) + sizeof(*lacpdu);
2647 
2648 	m = m_gethdr(M_DONTWAIT, MT_DATA);
2649 	if (m == NULL)
2650 		return;
2651 
2652 	if (len > MHLEN) {
2653 		MCLGETI(m, M_DONTWAIT, NULL, len);
2654 		if (!ISSET(m->m_flags, M_EXT)) {
2655 			m_freem(m);
2656 			return;
2657 		}
2658 	}
2659 
2660 	m->m_pkthdr.pf.prio = sc->sc_if.if_llprio;
2661 	m->m_pkthdr.len = m->m_len = len;
2662 	memset(m->m_data, 0, m->m_len);
2663 	m_adj(m, linkhdr);
2664 
2665 	eh = mtod(m, struct ether_header *);
2666 
2667 	CTASSERT(sizeof(eh->ether_dhost) == sizeof(lacp_address_slow));
2668 	CTASSERT(sizeof(eh->ether_shost) == sizeof(ac->ac_enaddr));
2669 
2670 	memcpy(eh->ether_dhost, lacp_address_slow, sizeof(eh->ether_dhost));
2671 	memcpy(eh->ether_shost, ac->ac_enaddr, sizeof(eh->ether_shost));
2672 	eh->ether_type = htons(ETHERTYPE_SLOW);
2673 
2674 	lacpdu = (struct lacp_du *)(eh + 1);
2675 	lacpdu->lacp_du_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP;
2676 	lacpdu->lacp_du_sph.sph_version = LACP_VERSION;
2677 
2678 	pi = &lacpdu->lacp_actor_info;
2679 	aggr_lacp_tlv_set(&lacpdu->lacp_actor_info_tlv,
2680 	    LACP_T_ACTOR, sizeof(*pi));
2681 
2682 	pi->lacp_sysid.lacp_sysid_priority = htons(sc->sc_lacp_prio);
2683 	CTASSERT(sizeof(pi->lacp_sysid.lacp_sysid_mac) ==
2684 	    sizeof(ac->ac_enaddr));
2685 	memcpy(pi->lacp_sysid.lacp_sysid_mac, ac->ac_enaddr,
2686 	    sizeof(pi->lacp_sysid.lacp_sysid_mac));
2687 
2688 	pi->lacp_key = htons(ifp->if_index);
2689 
2690 	pi->lacp_portid.lacp_portid_priority = htons(sc->sc_lacp_port_prio);
2691 	pi->lacp_portid.lacp_portid_number = htons(ifp0->if_index);
2692 
2693 	pi->lacp_state = p->p_actor_state;
2694 	if (sc->sc_lacp_mode)
2695 		SET(pi->lacp_state, LACP_STATE_ACTIVITY);
2696 	if (sc->sc_lacp_timeout)
2697 		SET(pi->lacp_state, LACP_STATE_TIMEOUT);
2698 
2699 	pi = &lacpdu->lacp_partner_info;
2700 	aggr_lacp_tlv_set(&lacpdu->lacp_partner_info_tlv,
2701 	    LACP_T_PARTNER, sizeof(*pi));
2702 
2703 	*pi = p->p_partner;
2704 
2705 	ci = &lacpdu->lacp_collector_info;
2706 	aggr_lacp_tlv_set(&lacpdu->lacp_collector_info_tlv,
2707 	    LACP_T_COLLECTOR, sizeof(*ci));
2708 	ci->lacp_maxdelay = htons(0);
2709 
2710 	lacpdu->lacp_terminator.lacp_tlv_type = LACP_T_TERMINATOR;
2711 	lacpdu->lacp_terminator.lacp_tlv_length = 0;
2712 
2713 	(void)if_enqueue(ifp0, m);
2714 }
2715 
2716 static void
2717 aggr_ntt(struct aggr_port *p)
2718 {
2719 	if (!timeout_pending(&p->p_txm_ntt))
2720 		timeout_add(&p->p_txm_ntt, 0);
2721 }
2722 
2723 static void
2724 aggr_transmit_machine(void *arg)
2725 {
2726 	struct aggr_port *p = arg;
2727 	struct aggr_softc *sc = p->p_aggr;
2728 	unsigned int slot;
2729 	int *log;
2730 	int period = hz * LACP_FAST_PERIODIC_TIME;
2731 	int diff;
2732 
2733 	if (!aggr_lacp_enabled(sc) || !aggr_port_enabled(p))
2734 		return;
2735 
2736 	slot = p->p_txm_slot;
2737 	log = &p->p_txm_log[slot % nitems(p->p_txm_log)];
2738 
2739 	diff = ticks - *log;
2740 	if (diff < period) {
2741 		timeout_add(&p->p_txm_ntt, period - diff);
2742 		return;
2743 	}
2744 
2745 	*log = ticks;
2746 	p->p_txm_slot = ++slot;
2747 
2748 #if 0
2749 	DPRINTF(sc, "%s %s ntt\n", sc->sc_if.if_xname, p->p_ifp0->if_xname);
2750 #endif
2751 
2752 	aggr_ntt_transmit(p);
2753 }
2754 
2755 static void
2756 aggr_set_lacp_mode(struct aggr_softc *sc, int mode)
2757 {
2758 	sc->sc_lacp_mode = mode;
2759 
2760 	if (mode == AGGR_LACP_MODE_PASSIVE) {
2761 		struct aggr_port *p;
2762 
2763 		TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2764 			if (!ISSET(p->p_partner_state, LACP_STATE_ACTIVITY))
2765 				timeout_del(&p->p_ptm_tx);
2766 		}
2767 	}
2768 }
2769 
2770 static void
2771 aggr_set_partner_timeout(struct aggr_port *p, int timeout)
2772 {
2773 	uint8_t ostate = ISSET(p->p_partner_state, LACP_STATE_TIMEOUT);
2774 	uint8_t nstate = (timeout == AGGR_LACP_TIMEOUT_FAST) ?
2775 	    LACP_STATE_TIMEOUT : 0;
2776 
2777 	if (ostate == nstate)
2778 		return;
2779 
2780 	if (timeout == AGGR_LACP_TIMEOUT_FAST) {
2781 		SET(p->p_partner_state, LACP_STATE_TIMEOUT);
2782 		timeout_add_sec(&p->p_ptm_tx,
2783 		    aggr_periodic_times[AGGR_LACP_TIMEOUT_FAST]);
2784 	} else
2785 		CLR(p->p_partner_state, LACP_STATE_TIMEOUT);
2786 }
2787 
2788 static void
2789 aggr_set_lacp_timeout(struct aggr_softc *sc, int timeout)
2790 {
2791 	struct aggr_port *p;
2792 
2793 	sc->sc_lacp_timeout = timeout;
2794 
2795 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2796 		if (!ISSET(p->p_actor_state, LACP_STATE_DEFAULTED))
2797 			continue;
2798 
2799 		aggr_set_partner_timeout(p, timeout);
2800 	}
2801 }
2802 
2803 static int
2804 aggr_multi_eq(const struct aggr_multiaddr *ma,
2805     const uint8_t *addrlo, const uint8_t *addrhi)
2806 {
2807 	return (ETHER_IS_EQ(ma->m_addrlo, addrlo) &&
2808 	    ETHER_IS_EQ(ma->m_addrhi, addrhi));
2809 }
2810 
2811 static int
2812 aggr_multi_add(struct aggr_softc *sc, struct ifreq *ifr)
2813 {
2814 	struct ifnet *ifp = &sc->sc_if;
2815 	struct aggr_port *p;
2816 	struct aggr_multiaddr *ma;
2817 	uint8_t addrlo[ETHER_ADDR_LEN];
2818 	uint8_t addrhi[ETHER_ADDR_LEN];
2819 	int error;
2820 
2821 	error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
2822 	if (error != 0)
2823 		return (error);
2824 
2825 	TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) {
2826 		if (aggr_multi_eq(ma, addrlo, addrhi)) {
2827 			ma->m_refs++;
2828 			return (0);
2829 		}
2830 	}
2831 
2832 	ma = malloc(sizeof(*ma), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
2833 	if (ma == NULL)
2834 		return (ENOMEM);
2835 
2836 	ma->m_refs = 1;
2837 	memcpy(&ma->m_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len);
2838 	memcpy(ma->m_addrlo, addrlo, sizeof(ma->m_addrlo));
2839 	memcpy(ma->m_addrhi, addrhi, sizeof(ma->m_addrhi));
2840 	TAILQ_INSERT_TAIL(&sc->sc_multiaddrs, ma, m_entry);
2841 
2842 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2843 		struct ifnet *ifp0 = p->p_ifp0;
2844 
2845 		if (aggr_multi(sc, p, ma, SIOCADDMULTI) != 0) {
2846 			log(LOG_WARNING, "%s %s: "
2847 			    "unable to add multicast address\n",
2848 			    ifp->if_xname, ifp0->if_xname);
2849 		}
2850 	}
2851 
2852 	return (0);
2853 }
2854 
2855 int
2856 aggr_multi_del(struct aggr_softc *sc, struct ifreq *ifr)
2857 {
2858 	struct ifnet *ifp = &sc->sc_if;
2859 	struct aggr_port *p;
2860 	struct aggr_multiaddr *ma;
2861 	uint8_t addrlo[ETHER_ADDR_LEN];
2862 	uint8_t addrhi[ETHER_ADDR_LEN];
2863 	int error;
2864 
2865 	error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
2866 	if (error != 0)
2867 		return (error);
2868 
2869 	TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) {
2870 		if (aggr_multi_eq(ma, addrlo, addrhi))
2871 			break;
2872 	}
2873 
2874 	if (ma == NULL)
2875 		return (EINVAL);
2876 
2877 	if (--ma->m_refs > 0)
2878 		return (0);
2879 
2880 	TAILQ_REMOVE(&sc->sc_multiaddrs, ma, m_entry);
2881 
2882 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2883 		struct ifnet *ifp0 = p->p_ifp0;
2884 
2885 		if (aggr_multi(sc, p, ma, SIOCDELMULTI) != 0) {
2886 			log(LOG_WARNING, "%s %s: "
2887 			    "unable to delete multicast address\n",
2888 			    ifp->if_xname, ifp0->if_xname);
2889 		}
2890 	}
2891 
2892 	free(ma, M_DEVBUF, sizeof(*ma));
2893 
2894 	return (0);
2895 }
2896