xref: /openbsd-src/sys/net/if_aggr.c (revision 1a8dbaac879b9f3335ad7fb25429ce63ac1d6bac)
1 /*	$OpenBSD: if_aggr.c,v 1.34 2020/08/21 22:59:27 kn Exp $ */
2 
3 /*
4  * Copyright (c) 2019 The University of Queensland
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 /*
20  * This driver implements 802.1AX Link Aggregation (formerly 802.3ad)
21  *
22  * The specification describes systems with multiple ports that that
23  * can dynamically form aggregations. The relationships between ports
24  * and aggregations is such that arbitrary ports connected to ports
25  * on other systems may move between aggregations, and there can be
26  * as many aggregations as ports. An aggregation in this model is
27  * effectively an interface, and becomes the point that Ethernet traffic
28  * enters and leaves the system. The spec also contains a description
29  * of the Link Aggregation Control Protocol (LACP) for use on the wire,
30  * and how to process it and select ports and aggregations based on
31  * it.
32  *
33  * This driver implements a simplified or constrained model where each
34  * aggr(4) interface is effectively an independent system, and will
35  * only support one aggregation. This supports the use of the kernel
36  * interface as a static entity that is created and configured once,
37  * and has the link "come up" when that one aggregation is selected
38  * by the LACP protocol.
39  */
40 
41 /*
42  * This code was written by David Gwynne <dlg@uq.edu.au> as part
43  * of the Information Technology Infrastructure Group (ITIG) in the
44  * Faculty of Engineering, Architecture and Information Technology
45  * (EAIT).
46  */
47 
48 /*
49  * TODO:
50  *
51  * - add locking
52  * - figure out the Ready_N and Ready logic
53  */
54 
55 #include "bpfilter.h"
56 
57 #include <sys/param.h>
58 #include <sys/kernel.h>
59 #include <sys/malloc.h>
60 #include <sys/mbuf.h>
61 #include <sys/queue.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/systm.h>
65 #include <sys/syslog.h>
66 #include <sys/rwlock.h>
67 #include <sys/percpu.h>
68 #include <sys/smr.h>
69 #include <sys/task.h>
70 
71 #include <net/if.h>
72 #include <net/if_dl.h>
73 #include <net/if_types.h>
74 
75 #include <net/if_media.h>
76 
77 #include <netinet/in.h>
78 #include <netinet/if_ether.h>
79 
80 #include <crypto/siphash.h> /* if_trunk.h uses siphash bits */
81 #include <net/if_trunk.h>
82 
83 #if NBPFILTER > 0
84 #include <net/bpf.h>
85 #endif
86 
87 /*
88  * Link Aggregation Control Protocol (LACP)
89  */
90 
91 struct ether_slowproto_hdr {
92 	uint8_t		sph_subtype;
93 	uint8_t		sph_version;
94 } __packed;
95 
96 #define SLOWPROTOCOLS_SUBTYPE_LACP	1
97 #define SLOWPROTOCOLS_SUBTYPE_LACP_MARKER \
98 					2
99 
100 #define LACP_VERSION			1
101 
102 #define LACP_FAST_PERIODIC_TIME		1
103 #define LACP_SLOW_PERIODIC_TIME		30
104 #define LACP_TIMEOUT_FACTOR		3
105 #define LACP_AGGREGATION_WAIT_TIME	2
106 
107 #define LACP_TX_MACHINE_RATE		3 /* per LACP_FAST_PERIODIC_TIME */
108 
109 #define LACP_ADDR_C_BRIDGE		{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }
110 #define LACP_ADDR_SLOW			{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 }
111 #define LACP_ADDR_NON_TPMR_BRIDGE	{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03 }
112 
113 struct lacp_tlv_hdr {
114 	uint8_t			lacp_tlv_type;
115 	uint8_t			lacp_tlv_length;
116 } __packed __aligned(2);
117 
118 /* LACP TLV types */
119 
120 #define LACP_T_TERMINATOR		0x00
121 #define LACP_T_ACTOR			0x01
122 #define LACP_T_PARTNER			0x02
123 #define LACP_T_COLLECTOR		0x03
124 
125 /* LACPv2 TLV types */
126 
127 #define LACP_T_PORT_ALGORITHM		0x04
128 #define LACP_T_PORT_CONVERSATION_ID_DIGEST \
129 					0x05
130 #define LACP_T_PORT_CONVERSATION_MASK	0x06
131 #define LACP_T_PORT_CONVERSATION_SERVICE_MAPPING \
132 					0x0a
133 
134 struct lacp_sysid {
135 	uint16_t		lacp_sysid_priority;
136 	uint8_t			lacp_sysid_mac[ETHER_ADDR_LEN];
137 } __packed __aligned(2);
138 
139 struct lacp_portid {
140 	uint16_t		lacp_portid_priority;
141 	uint16_t		lacp_portid_number;
142 } __packed __aligned(2);
143 
144 struct lacp_port_info {
145 	struct lacp_sysid	lacp_sysid;
146 	uint16_t		lacp_key;
147 	struct lacp_portid	lacp_portid;
148 	uint8_t			lacp_state;
149 	uint8_t			lacp_reserved[3];
150 } __packed __aligned(2);
151 
152 #define LACP_STATE_ACTIVITY		(1 << 0)
153 #define LACP_STATE_TIMEOUT		(1 << 1)
154 #define LACP_STATE_AGGREGATION		(1 << 2)
155 #define LACP_STATE_SYNC			(1 << 3)
156 #define LACP_STATE_COLLECTING		(1 << 4)
157 #define LACP_STATE_DISTRIBUTING		(1 << 5)
158 #define LACP_STATE_DEFAULTED		(1 << 6)
159 #define LACP_STATE_EXPIRED		(1 << 7)
160 
161 struct lacp_collector_info {
162 	uint16_t		lacp_maxdelay;
163 	uint8_t			lacp_reserved[12];
164 } __packed __aligned(2);
165 
166 struct lacp_du {
167 	struct ether_slowproto_hdr
168 				lacp_du_sph;
169 	struct lacp_tlv_hdr	lacp_actor_info_tlv;
170 	struct lacp_port_info	lacp_actor_info;
171 	struct lacp_tlv_hdr	lacp_partner_info_tlv;
172 	struct lacp_port_info	lacp_partner_info;
173 	struct lacp_tlv_hdr	lacp_collector_info_tlv;
174 	struct lacp_collector_info
175 				lacp_collector_info;
176 	/* other TLVs go here */
177 	struct lacp_tlv_hdr	lacp_terminator;
178 	uint8_t			lacp_pad[50];
179 } __packed __aligned(2);
180 
181 /* Marker TLV types */
182 
183 #define MARKER_T_INFORMATION		0x01
184 #define MARKER_T_RESPONSE		0x02
185 
186 struct marker_info {
187 	uint16_t		marker_requester_port;
188 	uint8_t			marker_requester_system[ETHER_ADDR_LEN];
189 	uint8_t			marker_requester_txid[4];
190 	uint8_t			marker_pad[2];
191 } __packed __aligned(2);
192 
193 struct marker_pdu {
194 	struct ether_slowproto_hdr
195 				marker_sph;
196 
197 	struct lacp_tlv_hdr	marker_info_tlv;
198 	struct marker_info	marker_info;
199 	struct lacp_tlv_hdr	marker_terminator;
200 	uint8_t			marker_pad[90];
201 } __packed __aligned(2);
202 
203 enum lacp_rxm_state {
204 	LACP_RXM_S_BEGIN = 0,
205 	LACP_RXM_S_INITIALIZE,
206 	LACP_RXM_S_PORT_DISABLED,
207 	LACP_RXM_S_EXPIRED,
208 	LACP_RXM_S_LACP_DISABLED,
209 	LACP_RXM_S_DEFAULTED,
210 	LACP_RXM_S_CURRENT,
211 };
212 
213 enum lacp_rxm_event {
214 	LACP_RXM_E_BEGIN,
215 	LACP_RXM_E_UCT,
216 	LACP_RXM_E_PORT_MOVED,
217 	LACP_RXM_E_NOT_PORT_MOVED,
218 	LACP_RXM_E_PORT_ENABLED,
219 	LACP_RXM_E_NOT_PORT_ENABLED,
220 	LACP_RXM_E_LACP_ENABLED,
221 	LACP_RXM_E_NOT_LACP_ENABLED,
222 	LACP_RXM_E_LACPDU, /* CtrlMuxN:M_UNITDATA.indication(LACPDU) */
223 	LACP_RXM_E_TIMER_EXPIRED, /* current_while_timer expired */
224 };
225 
226 enum lacp_mux_state {
227 	LACP_MUX_S_BEGIN = 0,
228 	LACP_MUX_S_DETACHED,
229 	LACP_MUX_S_WAITING,
230 	LACP_MUX_S_ATTACHED,
231 	LACP_MUX_S_DISTRIBUTING,
232 	LACP_MUX_S_COLLECTING,
233 };
234 
235 enum lacp_mux_event {
236 	LACP_MUX_E_BEGIN,
237 	LACP_MUX_E_SELECTED,
238 	LACP_MUX_E_STANDBY,
239 	LACP_MUX_E_UNSELECTED,
240 	LACP_MUX_E_READY,
241 	LACP_MUX_E_SYNC,
242 	LACP_MUX_E_NOT_SYNC,
243 	LACP_MUX_E_COLLECTING,
244 	LACP_MUX_E_NOT_COLLECTING,
245 };
246 
247 /*
248  * LACP variables
249  */
250 
251 static const uint8_t lacp_address_slow[ETHER_ADDR_LEN] = LACP_ADDR_SLOW;
252 
253 static const char *lacp_rxm_state_names[] = {
254 	"BEGIN",
255 	"INITIALIZE",
256 	"PORT_DISABLED",
257 	"EXPIRED",
258 	"LACP_DISABLED",
259 	"DEFAULTED",
260 	"CURRENT",
261 };
262 
263 static const char *lacp_rxm_event_names[] = {
264 	"BEGIN",
265 	"UCT",
266 	"port_moved",
267 	"!port_moved",
268 	"port_enabled",
269 	"!port_enabled",
270 	"LACP_Enabled",
271 	"!LACP_Enabled",
272 	"LACPDU",
273 	"current_while_timer expired",
274 };
275 
276 static const char *lacp_mux_state_names[] = {
277 	"BEGIN",
278 	"DETACHED",
279 	"WAITING",
280 	"ATTACHED",
281 	"DISTRIBUTING",
282 	"COLLECTING",
283 };
284 
285 static const char *lacp_mux_event_names[] = {
286 	"BEGIN",
287 	"Selected == SELECTED",
288 	"Selected == STANDBY",
289 	"Selected == UNSELECTED",
290 	"Ready",
291 	"Partner.Sync",
292 	"! Partner.Sync",
293 	"Partner.Collecting",
294 	"! Partner.Collecting",
295 };
296 
297 /*
298  * aggr interface
299  */
300 
301 #define AGGR_MAX_PORTS		32
302 #define AGGR_MAX_SLOW_PKTS	(AGGR_MAX_PORTS * 3)
303 
304 struct aggr_multiaddr {
305 	TAILQ_ENTRY(aggr_multiaddr)
306 				m_entry;
307 	unsigned int		m_refs;
308 	uint8_t			m_addrlo[ETHER_ADDR_LEN];
309 	uint8_t			m_addrhi[ETHER_ADDR_LEN];
310 	struct sockaddr_storage m_addr;
311 };
312 TAILQ_HEAD(aggr_multiaddrs, aggr_multiaddr);
313 
314 struct aggr_softc;
315 
316 enum aggr_port_selected {
317 	AGGR_PORT_UNSELECTED,
318 	AGGR_PORT_SELECTED,
319 	AGGR_PORT_STANDBY,
320 };
321 
322 static const char *aggr_port_selected_names[] = {
323 	"UNSELECTED",
324 	"SELECTED",
325 	"STANDBY",
326 };
327 
328 struct aggr_port {
329 	struct ifnet		*p_ifp0;
330 
331 	uint8_t			 p_lladdr[ETHER_ADDR_LEN];
332 	uint32_t		 p_mtu;
333 
334 	int (*p_ioctl)(struct ifnet *, u_long, caddr_t);
335 	void (*p_input)(struct ifnet *, struct mbuf *);
336 	int (*p_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
337 	    struct rtentry *);
338 
339 	struct task		 p_lhook;
340 	struct task		 p_dhook;
341 
342 	struct aggr_softc	*p_aggr;
343 	TAILQ_ENTRY(aggr_port)	 p_entry;
344 
345 	unsigned int		 p_collecting;
346 	unsigned int		 p_distributing;
347 	TAILQ_ENTRY(aggr_port)	 p_entry_distributing;
348 	TAILQ_ENTRY(aggr_port)	 p_entry_muxen;
349 
350 	/* Partner information */
351 	enum aggr_port_selected	 p_muxed;
352 	enum aggr_port_selected	 p_selected;		/* Selected */
353 	struct lacp_port_info	 p_partner;
354 #define p_partner_state		 p_partner.lacp_state
355 
356 	uint8_t			 p_actor_state;
357 	uint8_t			 p_lacp_timeout;
358 
359 	struct timeout		 p_current_while_timer;
360 	struct timeout		 p_wait_while_timer;
361 
362 	/* Receive machine */
363 	enum lacp_rxm_state	 p_rxm_state;
364 	struct mbuf_queue	 p_rxm_mq;
365 	struct task		 p_rxm_task;
366 
367 	/* Periodic Transmission machine */
368 	struct timeout		 p_ptm_tx;
369 
370 	/* Mux machine */
371 	enum lacp_mux_state	 p_mux_state;
372 
373 	/* Transmit machine */
374 	int			 p_txm_log[LACP_TX_MACHINE_RATE];
375 	unsigned int		 p_txm_slot;
376 	struct timeout		 p_txm_ntt;
377 };
378 
379 TAILQ_HEAD(aggr_port_list, aggr_port);
380 
381 struct aggr_map {
382 	struct ifnet		*m_ifp0s[AGGR_MAX_PORTS];
383 };
384 
385 struct aggr_softc {
386 	struct arpcom		 sc_ac;
387 #define sc_if			 sc_ac.ac_if
388 	unsigned int		 sc_dead;
389 	unsigned int		 sc_promisc;
390 	struct ifmedia		 sc_media;
391 
392 	struct aggr_multiaddrs	 sc_multiaddrs;
393 
394 	unsigned int		 sc_mix;
395 
396 	struct aggr_map		 sc_maps[2];
397 	unsigned int		 sc_map_gen;
398 	struct aggr_map		*sc_map;
399 
400 	struct rwlock		 sc_lock;
401 	struct aggr_port_list	 sc_ports;
402 	struct aggr_port_list	 sc_distributing;
403 	struct aggr_port_list	 sc_muxen;
404 	unsigned int		 sc_nports;
405 	unsigned int		 sc_ndistributing;
406 
407 	struct timeout		 sc_tick;
408 
409 	uint8_t			 sc_lacp_mode;
410 #define AGGR_LACP_MODE_PASSIVE		0
411 #define AGGR_LACP_MODE_ACTIVE		1
412 	uint8_t			 sc_lacp_timeout;
413 #define AGGR_LACP_TIMEOUT_SLOW		0
414 #define AGGR_LACP_TIMEOUT_FAST		1
415 	uint16_t		 sc_lacp_prio;
416 	uint16_t		 sc_lacp_port_prio;
417 
418 	struct lacp_sysid	 sc_partner_system;
419 	uint16_t		 sc_partner_key;
420 };
421 
422 #define DPRINTF(_sc, fmt...)	do { \
423 	if (ISSET((_sc)->sc_if.if_flags, IFF_DEBUG)) \
424 		printf(fmt); \
425 } while (0)
426 
427 static const unsigned int aggr_periodic_times[] = {
428 	[AGGR_LACP_TIMEOUT_SLOW] = LACP_SLOW_PERIODIC_TIME,
429 	[AGGR_LACP_TIMEOUT_FAST] = LACP_FAST_PERIODIC_TIME,
430 };
431 
432 static int	aggr_clone_create(struct if_clone *, int);
433 static int	aggr_clone_destroy(struct ifnet *);
434 
435 static int	aggr_ioctl(struct ifnet *, u_long, caddr_t);
436 static void	aggr_start(struct ifqueue *);
437 static int	aggr_enqueue(struct ifnet *, struct mbuf *);
438 
439 static int	aggr_media_change(struct ifnet *);
440 static void	aggr_media_status(struct ifnet *, struct ifmediareq *);
441 
442 static int	aggr_up(struct aggr_softc *);
443 static int	aggr_down(struct aggr_softc *);
444 static int	aggr_iff(struct aggr_softc *);
445 
446 static void	aggr_p_linkch(void *);
447 static void	aggr_p_detach(void *);
448 static int	aggr_p_ioctl(struct ifnet *, u_long, caddr_t);
449 static int	aggr_p_output(struct ifnet *, struct mbuf *,
450 		    struct sockaddr *, struct rtentry *);
451 
452 static int	aggr_get_trunk(struct aggr_softc *, struct trunk_reqall *);
453 static int	aggr_set_options(struct aggr_softc *,
454 		    const struct trunk_opts *);
455 static int	aggr_get_options(struct aggr_softc *, struct trunk_opts *);
456 static int	aggr_set_lladdr(struct aggr_softc *, const struct ifreq *);
457 static int	aggr_set_mtu(struct aggr_softc *, uint32_t);
458 static void	aggr_p_dtor(struct aggr_softc *, struct aggr_port *,
459 		    const char *);
460 static int	aggr_p_setlladdr(struct aggr_port *, const uint8_t *);
461 static int	aggr_p_set_mtu(struct aggr_port *, uint32_t);
462 static int	aggr_add_port(struct aggr_softc *,
463 		    const struct trunk_reqport *);
464 static int	aggr_get_port(struct aggr_softc *, struct trunk_reqport *);
465 static int	aggr_del_port(struct aggr_softc *,
466 		    const struct trunk_reqport *);
467 static int	aggr_group(struct aggr_softc *, struct aggr_port *, u_long);
468 static int	aggr_multi(struct aggr_softc *, struct aggr_port *,
469 		    const struct aggr_multiaddr *, u_long);
470 static void	aggr_update_capabilities(struct aggr_softc *);
471 static void	aggr_set_lacp_mode(struct aggr_softc *, int);
472 static void	aggr_set_lacp_timeout(struct aggr_softc *, int);
473 static int	aggr_multi_add(struct aggr_softc *, struct ifreq *);
474 static int	aggr_multi_del(struct aggr_softc *, struct ifreq *);
475 
476 static void	aggr_map(struct aggr_softc *);
477 
478 static void	aggr_record_default(struct aggr_softc *, struct aggr_port *);
479 static void	aggr_current_while_timer(void *);
480 static void	aggr_wait_while_timer(void *);
481 static void	aggr_rx(void *);
482 static void	aggr_rxm_ev(struct aggr_softc *, struct aggr_port *,
483 		    enum lacp_rxm_event, const struct lacp_du *);
484 #define aggr_rxm(_sc, _p, _ev) \
485 		aggr_rxm_ev((_sc), (_p), (_ev), NULL)
486 #define aggr_rxm_lacpdu(_sc, _p, _lacpdu) \
487 		aggr_rxm_ev((_sc), (_p), LACP_RXM_E_LACPDU, (_lacpdu))
488 
489 static void	aggr_mux(struct aggr_softc *, struct aggr_port *,
490 		    enum lacp_mux_event);
491 static int	aggr_mux_ev(struct aggr_softc *, struct aggr_port *,
492 		    enum lacp_mux_event, int *);
493 
494 static void	aggr_set_partner_timeout(struct aggr_port *, int);
495 
496 static void	aggr_ptm_tx(void *);
497 
498 static void	aggr_transmit_machine(void *);
499 static void	aggr_ntt(struct aggr_port *);
500 static void	aggr_ntt_transmit(struct aggr_port *);
501 
502 static void	aggr_set_selected(struct aggr_port *, enum aggr_port_selected,
503 		    enum lacp_mux_event);
504 static void	aggr_unselected(struct aggr_port *);
505 
506 static void	aggr_selection_logic(struct aggr_softc *, struct aggr_port *);
507 
508 #define ETHER_IS_SLOWADDR(_a)	ETHER_IS_EQ((_a), lacp_address_slow)
509 
510 static struct if_clone aggr_cloner =
511     IF_CLONE_INITIALIZER("aggr", aggr_clone_create, aggr_clone_destroy);
512 
513 void
514 aggrattach(int count)
515 {
516 	if_clone_attach(&aggr_cloner);
517 }
518 
519 static int
520 aggr_clone_create(struct if_clone *ifc, int unit)
521 {
522 	struct aggr_softc *sc;
523 	struct ifnet *ifp;
524 
525 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
526 	if (sc == NULL)
527 		return (ENOMEM);
528 
529 	sc->sc_mix = arc4random();
530 
531 	ifp = &sc->sc_if;
532 
533 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
534 	    ifc->ifc_name, unit);
535 
536 	TAILQ_INIT(&sc->sc_multiaddrs);
537 	rw_init(&sc->sc_lock, "aggrlk");
538 	TAILQ_INIT(&sc->sc_ports);
539 	sc->sc_nports = 0;
540 	TAILQ_INIT(&sc->sc_distributing);
541 	sc->sc_ndistributing = 0;
542 	TAILQ_INIT(&sc->sc_muxen);
543 
544 	sc->sc_map_gen = 0;
545 	sc->sc_map = NULL; /* no links yet */
546 
547 	sc->sc_lacp_mode = AGGR_LACP_MODE_ACTIVE;
548 	sc->sc_lacp_timeout = AGGR_LACP_TIMEOUT_SLOW;
549 	sc->sc_lacp_prio = 0x8000; /* medium */
550 	sc->sc_lacp_port_prio = 0x8000; /* medium */
551 
552 	ifmedia_init(&sc->sc_media, 0, aggr_media_change, aggr_media_status);
553 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
554 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
555 
556 	ifp->if_softc = sc;
557 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
558 	ifp->if_ioctl = aggr_ioctl;
559 	ifp->if_qstart = aggr_start;
560 	ifp->if_enqueue = aggr_enqueue;
561 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
562 	ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
563 	ifp->if_link_state = LINK_STATE_DOWN;
564 	ether_fakeaddr(ifp);
565 
566 	if_counters_alloc(ifp);
567 	if_attach(ifp);
568 	ether_ifattach(ifp);
569 
570 	ifp->if_llprio = IFQ_MAXPRIO;
571 
572 	return (0);
573 }
574 
575 static int
576 aggr_clone_destroy(struct ifnet *ifp)
577 {
578 	struct aggr_softc *sc = ifp->if_softc;
579 	struct aggr_port *p;
580 
581 	NET_LOCK();
582 	sc->sc_dead = 1;
583 
584 	if (ISSET(ifp->if_flags, IFF_RUNNING))
585 		aggr_down(sc);
586 	NET_UNLOCK();
587 
588 	ether_ifdetach(ifp);
589 	if_detach(ifp);
590 
591 	/* last ref, no need to lock. aggr_p_dtor locks anyway */
592 	NET_LOCK();
593 	while ((p = TAILQ_FIRST(&sc->sc_ports)) != NULL)
594 		aggr_p_dtor(sc, p, "destroy");
595 	NET_UNLOCK();
596 
597 	free(sc, M_DEVBUF, sizeof(*sc));
598 
599 	return (0);
600 }
601 
602 /*
603  * LACP_Enabled
604  */
605 static inline int
606 aggr_lacp_enabled(struct aggr_softc *sc)
607 {
608 	struct ifnet *ifp = &sc->sc_if;
609 	return (ISSET(ifp->if_flags, IFF_RUNNING));
610 }
611 
612 /*
613  * port_enabled
614  */
615 static int
616 aggr_port_enabled(struct aggr_port *p)
617 {
618 	struct ifnet *ifp0 = p->p_ifp0;
619 
620 	if (!ISSET(ifp0->if_flags, IFF_RUNNING))
621 		return (0);
622 
623 	if (!LINK_STATE_IS_UP(ifp0->if_link_state))
624 		return (0);
625 
626 	return (1);
627 }
628 
629 /*
630  * port_moved
631  *
632  * This variable is set to TRUE if the Receive machine for an Aggregation
633  * Port is in the PORT_DISABLED state, and the combination of
634  * Partner_Oper_System and Partner_Oper_Port_Number in use by that
635  * Aggregation Port has been received in an incoming LACPDU on a
636  * different Aggregation Port. This variable is set to FALSE once the
637  * INITIALIZE state of the Receive machine has set the Partner information
638  * for the Aggregation Port to administrative default values.
639  *
640  * Value: Boolean
641 */
642 static int
643 aggr_port_moved(struct aggr_softc *sc, struct aggr_port *p)
644 {
645 	return (0);
646 }
647 
648 static void
649 aggr_transmit(struct aggr_softc *sc, const struct aggr_map *map, struct mbuf *m)
650 {
651 	struct ifnet *ifp = &sc->sc_if;
652 	struct ifnet *ifp0;
653 	uint16_t flow = 0;
654 
655 #if NBPFILTER > 0
656 	{
657 		caddr_t if_bpf = ifp->if_bpf;
658 		if (if_bpf)
659 			bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
660 	}
661 #endif
662 
663 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
664 		flow = m->m_pkthdr.ph_flowid;
665 
666 	ifp0 = map->m_ifp0s[flow % AGGR_MAX_PORTS];
667 
668 	if (if_enqueue(ifp0, m) != 0)
669 		counters_inc(ifp->if_counters, ifc_oerrors);
670 }
671 
672 static int
673 aggr_enqueue(struct ifnet *ifp, struct mbuf *m)
674 {
675 	struct aggr_softc *sc;
676 	const struct aggr_map *map;
677 	int error = 0;
678 
679 	if (!ifq_is_priq(&ifp->if_snd))
680 		return (if_enqueue_ifq(ifp, m));
681 
682 	sc = ifp->if_softc;
683 
684 	smr_read_enter();
685 	map = SMR_PTR_GET(&sc->sc_map);
686 	if (__predict_false(map == NULL)) {
687 		m_freem(m);
688 		error = ENETDOWN;
689 	} else {
690 		counters_pkt(ifp->if_counters,
691 		    ifc_opackets, ifc_obytes, m->m_pkthdr.len);
692 		aggr_transmit(sc, map, m);
693 	}
694 	smr_read_leave();
695 
696 	return (error);
697 }
698 
699 static void
700 aggr_start(struct ifqueue *ifq)
701 {
702 	struct ifnet *ifp = ifq->ifq_if;
703 	struct aggr_softc *sc = ifp->if_softc;
704 	const struct aggr_map *map;
705 
706 	smr_read_enter();
707 	map = SMR_PTR_GET(&sc->sc_map);
708 	if (__predict_false(map == NULL))
709 		ifq_purge(ifq);
710 	else {
711 		struct mbuf *m;
712 
713 		while ((m = ifq_dequeue(ifq)) != NULL)
714 			aggr_transmit(sc, map, m);
715 	}
716 	smr_read_leave();
717 }
718 
719 static inline int
720 aggr_eh_is_slow(const struct ether_header *eh)
721 {
722 	return (ETHER_IS_SLOWADDR(eh->ether_dhost) &&
723 	    eh->ether_type == htons(ETHERTYPE_SLOW));
724 }
725 
726 static void
727 aggr_input(struct ifnet *ifp0, struct mbuf *m)
728 {
729 	struct arpcom *ac0 = (struct arpcom *)ifp0;
730 	struct aggr_port *p = ac0->ac_trunkport;
731 	struct aggr_softc *sc = p->p_aggr;
732 	struct ifnet *ifp = &sc->sc_if;
733 	struct ether_header *eh;
734 	int hlen = sizeof(*eh);
735 
736 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
737 		goto drop;
738 
739 	eh = mtod(m, struct ether_header *);
740 	if (!ISSET(m->m_flags, M_VLANTAG) &&
741 	    __predict_false(aggr_eh_is_slow(eh))) {
742 		struct ether_slowproto_hdr *sph;
743 
744 		hlen += sizeof(*sph);
745 		if (m->m_len < hlen) {
746 			m = m_pullup(m, hlen);
747 			if (m == NULL) {
748 				/* short++ */
749 				return;
750 			}
751 			eh = mtod(m, struct ether_header *);
752 		}
753 
754 		sph = (struct ether_slowproto_hdr *)(eh + 1);
755 		switch (sph->sph_subtype) {
756 		case SLOWPROTOCOLS_SUBTYPE_LACP:
757 		case SLOWPROTOCOLS_SUBTYPE_LACP_MARKER:
758 			if (mq_enqueue(&p->p_rxm_mq, m) == 0)
759 				task_add(systq, &p->p_rxm_task);
760 			return;
761 		default:
762 			break;
763 		}
764 	}
765 
766 	if (__predict_false(!p->p_collecting))
767 		goto drop;
768 
769 	if (!ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
770 		m->m_pkthdr.ph_flowid = ifp0->if_index ^ sc->sc_mix;
771 
772 	if_vinput(ifp, m);
773 
774 	return;
775 
776 drop:
777 	m_freem(m);
778 }
779 
780 static int
781 aggr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
782 {
783 	struct aggr_softc *sc = ifp->if_softc;
784 	struct ifreq *ifr = (struct ifreq *)data;
785 	int error = 0;
786 
787 	if (sc->sc_dead)
788 		return (ENXIO);
789 
790 	switch (cmd) {
791 	case SIOCSIFADDR:
792 		break;
793 
794 	case SIOCSIFFLAGS:
795 		if (ISSET(ifp->if_flags, IFF_UP)) {
796 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
797 				error = aggr_up(sc);
798 			else
799 				error = ENETRESET;
800 		} else {
801 			if (ISSET(ifp->if_flags, IFF_RUNNING))
802 				error = aggr_down(sc);
803 		}
804 		break;
805 
806 	case SIOCSIFLLADDR:
807 		error = aggr_set_lladdr(sc, ifr);
808 		break;
809 
810 	case SIOCSTRUNK:
811 		error = suser(curproc);
812 		if (error != 0)
813 			break;
814 
815 		if (((struct trunk_reqall *)data)->ra_proto !=
816 		    TRUNK_PROTO_LACP) {
817 			error = EPROTONOSUPPORT;
818 			break;
819 		}
820 
821 		/* nop */
822 		break;
823 	case SIOCGTRUNK:
824 		error = aggr_get_trunk(sc, (struct trunk_reqall *)data);
825 		break;
826 
827 	case SIOCSTRUNKOPTS:
828 		error = suser(curproc);
829 		if (error != 0)
830 			break;
831 
832 		error = aggr_set_options(sc, (struct trunk_opts *)data);
833 		break;
834 
835 	case SIOCGTRUNKOPTS:
836 		error = aggr_get_options(sc, (struct trunk_opts *)data);
837 		break;
838 
839 	case SIOCGTRUNKPORT:
840 		error = aggr_get_port(sc, (struct trunk_reqport *)data);
841 		break;
842 	case SIOCSTRUNKPORT:
843 		error = suser(curproc);
844 		if (error != 0)
845 			break;
846 
847 		error = aggr_add_port(sc, (struct trunk_reqport *)data);
848 		break;
849 	case SIOCSTRUNKDELPORT:
850 		error = suser(curproc);
851 		if (error != 0)
852 			break;
853 
854 		error = aggr_del_port(sc, (struct trunk_reqport *)data);
855 		break;
856 
857 	case SIOCSIFMTU:
858 		error = aggr_set_mtu(sc, ifr->ifr_mtu);
859 		break;
860 
861 	case SIOCADDMULTI:
862 		error = aggr_multi_add(sc, ifr);
863 		break;
864 	case SIOCDELMULTI:
865 		error = aggr_multi_del(sc, ifr);
866 		break;
867 
868 	case SIOCSIFMEDIA:
869 		error = EOPNOTSUPP;
870 		break;
871 	case SIOCGIFMEDIA:
872 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
873 		break;
874 
875 	default:
876 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
877 		break;
878 	}
879 
880 	if (error == ENETRESET)
881 		error = aggr_iff(sc);
882 
883 	return (error);
884 }
885 
886 static int
887 aggr_get_trunk(struct aggr_softc *sc, struct trunk_reqall *ra)
888 {
889 	struct ifnet *ifp = &sc->sc_if;
890 	struct trunk_reqport rp;
891 	struct aggr_port *p;
892 	size_t size = ra->ra_size;
893 	caddr_t ubuf = (caddr_t)ra->ra_port;
894 	struct lacp_opreq *req;
895 	uint8_t state = 0;
896 	int error = 0;
897 
898 	if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE)
899 		SET(state, LACP_STATE_ACTIVITY);
900 	if (sc->sc_lacp_timeout == AGGR_LACP_TIMEOUT_FAST)
901 		SET(state, LACP_STATE_TIMEOUT);
902 
903 	ra->ra_proto = TRUNK_PROTO_LACP;
904 	memset(&ra->ra_psc, 0, sizeof(ra->ra_psc));
905 
906 	/*
907 	 * aggr(4) does not support Individual links so don't bother
908 	 * with portprio, portno, and state, as per the spec.
909 	 */
910 
911 	req = &ra->ra_lacpreq;
912 	req->actor_prio = sc->sc_lacp_prio;
913 	CTASSERT(sizeof(req->actor_mac) == sizeof(sc->sc_ac.ac_enaddr));
914 	memcpy(req->actor_mac, &sc->sc_ac.ac_enaddr, sizeof(req->actor_mac));
915 	req->actor_key = ifp->if_index;
916 	req->actor_state = state;
917 
918 	req->partner_prio = ntohs(sc->sc_partner_system.lacp_sysid_priority);
919 	CTASSERT(sizeof(req->partner_mac) ==
920 	    sizeof(sc->sc_partner_system.lacp_sysid_mac));
921 	memcpy(req->partner_mac, sc->sc_partner_system.lacp_sysid_mac,
922 	    sizeof(req->partner_mac));
923 	req->partner_key = ntohs(sc->sc_partner_key);
924 
925 	ra->ra_ports = sc->sc_nports;
926 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
927 		struct ifnet *ifp0;
928 		struct lacp_opreq *opreq;
929 
930 		if (size < sizeof(rp))
931 			break;
932 
933 		ifp0 = p->p_ifp0;
934 
935 		CTASSERT(sizeof(rp.rp_ifname) == sizeof(ifp->if_xname));
936 		CTASSERT(sizeof(rp.rp_portname) == sizeof(ifp0->if_xname));
937 
938 		memset(&rp, 0, sizeof(rp));
939 		memcpy(rp.rp_ifname, ifp->if_xname, sizeof(rp.rp_ifname));
940 		memcpy(rp.rp_portname, ifp0->if_xname, sizeof(rp.rp_portname));
941 
942 		if (p->p_muxed)
943 			SET(rp.rp_flags, TRUNK_PORT_ACTIVE);
944 		if (p->p_collecting)
945 			SET(rp.rp_flags, TRUNK_PORT_COLLECTING);
946 		if (p->p_distributing)
947 			SET(rp.rp_flags, TRUNK_PORT_DISTRIBUTING);
948 		if (!aggr_port_enabled(p))
949 			SET(rp.rp_flags, TRUNK_PORT_DISABLED);
950 
951 		opreq = &rp.rp_lacpreq;
952 
953 		opreq->actor_prio = sc->sc_lacp_prio;
954 		memcpy(opreq->actor_mac, &sc->sc_ac.ac_enaddr,
955 		    sizeof(req->actor_mac));
956 		opreq->actor_key = ifp->if_index;
957 		opreq->actor_portprio = sc->sc_lacp_port_prio;
958 		opreq->actor_portno = ifp0->if_index;
959 		opreq->actor_state = state | p->p_actor_state;
960 
961 		opreq->partner_prio =
962 		    ntohs(p->p_partner.lacp_sysid.lacp_sysid_priority);
963 		CTASSERT(sizeof(opreq->partner_mac) ==
964 		    sizeof(p->p_partner.lacp_sysid.lacp_sysid_mac));
965 		memcpy(opreq->partner_mac,
966 		    p->p_partner.lacp_sysid.lacp_sysid_mac,
967 		    sizeof(opreq->partner_mac));
968 		opreq->partner_key = ntohs(p->p_partner.lacp_key);
969 		opreq->partner_portprio =
970 		    ntohs(p->p_partner.lacp_portid.lacp_portid_priority);
971 		opreq->partner_portno =
972 		    ntohs(p->p_partner.lacp_portid.lacp_portid_number);
973 		opreq->partner_state = p->p_partner_state;
974 
975 		error = copyout(&rp, ubuf, sizeof(rp));
976 		if (error != 0)
977 			break;
978 
979 		ubuf += sizeof(rp);
980 		size -= sizeof(rp);
981 	}
982 
983 	return (error);
984 }
985 
986 static int
987 aggr_get_options(struct aggr_softc *sc, struct trunk_opts *tro)
988 {
989 	struct lacp_adminopts *opt = &tro->to_lacpopts;
990 
991 	if (tro->to_proto != TRUNK_PROTO_LACP)
992 		return (EPROTONOSUPPORT);
993 
994 	opt->lacp_mode = sc->sc_lacp_mode;
995 	opt->lacp_timeout = sc->sc_lacp_timeout;
996 	opt->lacp_prio = sc->sc_lacp_prio;
997 	opt->lacp_portprio = sc->sc_lacp_port_prio;
998 	opt->lacp_ifqprio = sc->sc_if.if_llprio;
999 
1000 	return (0);
1001 }
1002 
1003 static int
1004 aggr_set_options(struct aggr_softc *sc, const struct trunk_opts *tro)
1005 {
1006 	const struct lacp_adminopts *opt = &tro->to_lacpopts;
1007 
1008 	if (tro->to_proto != TRUNK_PROTO_LACP)
1009 		return (EPROTONOSUPPORT);
1010 
1011 	switch (tro->to_opts) {
1012 	case TRUNK_OPT_LACP_MODE:
1013 		switch (opt->lacp_mode) {
1014 		case AGGR_LACP_MODE_PASSIVE:
1015 		case AGGR_LACP_MODE_ACTIVE:
1016 			break;
1017 		default:
1018 			return (EINVAL);
1019 		}
1020 
1021 		aggr_set_lacp_mode(sc, opt->lacp_mode);
1022 		break;
1023 
1024 	case TRUNK_OPT_LACP_TIMEOUT:
1025 		if (opt->lacp_timeout >= nitems(aggr_periodic_times))
1026 			return (EINVAL);
1027 
1028 		aggr_set_lacp_timeout(sc, opt->lacp_timeout);
1029 		break;
1030 
1031 	case TRUNK_OPT_LACP_SYS_PRIO:
1032 		if (opt->lacp_prio == 0)
1033 			return (EINVAL);
1034 
1035 		sc->sc_lacp_prio = opt->lacp_prio;
1036 		break;
1037 
1038 	case TRUNK_OPT_LACP_PORT_PRIO:
1039 		if (opt->lacp_portprio == 0)
1040 			return (EINVAL);
1041 
1042 		sc->sc_lacp_port_prio = opt->lacp_portprio;
1043 		break;
1044 
1045 	default:
1046 		return (ENODEV);
1047 	}
1048 
1049 	return (0);
1050 }
1051 
1052 static int
1053 aggr_add_port(struct aggr_softc *sc, const struct trunk_reqport *rp)
1054 {
1055 	struct ifnet *ifp = &sc->sc_if;
1056 	struct ifnet *ifp0;
1057 	struct arpcom *ac0;
1058 	struct aggr_port *p;
1059 	struct aggr_multiaddr *ma;
1060 	int past = ticks - (hz * LACP_TIMEOUT_FACTOR);
1061 	int i;
1062 	int error;
1063 
1064 	NET_ASSERT_LOCKED();
1065 	if (sc->sc_nports > AGGR_MAX_PORTS)
1066 		return (ENOSPC);
1067 
1068 	ifp0 = ifunit(rp->rp_portname);
1069 	if (ifp0 == NULL || ifp0->if_index == ifp->if_index)
1070 		return (EINVAL);
1071 
1072 	if (ifp0->if_type != IFT_ETHER)
1073 		return (EPROTONOSUPPORT);
1074 
1075 	error = ether_brport_isset(ifp0);
1076 	if (error != 0)
1077 		return (error);
1078 
1079 	if (ifp0->if_hardmtu < ifp->if_mtu)
1080 		return (ENOBUFS);
1081 
1082 	ac0 = (struct arpcom *)ifp0;
1083 	if (ac0->ac_trunkport != NULL)
1084 		return (EBUSY);
1085 
1086 	/* let's try */
1087 
1088 	ifp0 = if_get(ifp0->if_index); /* get an actual reference */
1089 	if (ifp0 == NULL) {
1090 		/* XXX this should never happen */
1091 		return (EINVAL);
1092 	}
1093 
1094 	p = malloc(sizeof(*p), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
1095 	if (p == NULL) {
1096 		error = ENOMEM;
1097 		goto put;
1098 	}
1099 
1100 	for (i = 0; i < nitems(p->p_txm_log); i++)
1101 		p->p_txm_log[i] = past;
1102 
1103 	p->p_ifp0 = ifp0;
1104 	p->p_aggr = sc;
1105 	p->p_mtu = ifp0->if_mtu;
1106 
1107 	CTASSERT(sizeof(p->p_lladdr) == sizeof(ac0->ac_enaddr));
1108 	memcpy(p->p_lladdr, ac0->ac_enaddr, sizeof(p->p_lladdr));
1109 	p->p_ioctl = ifp0->if_ioctl;
1110 	p->p_input = ifp0->if_input;
1111 	p->p_output = ifp0->if_output;
1112 
1113 	error = aggr_group(sc, p, SIOCADDMULTI);
1114 	if (error != 0)
1115 		goto free;
1116 
1117 	error = aggr_p_setlladdr(p, sc->sc_ac.ac_enaddr);
1118 	if (error != 0)
1119 		goto ungroup;
1120 
1121 	error = aggr_p_set_mtu(p, ifp->if_mtu);
1122 	if (error != 0)
1123 		goto resetlladdr;
1124 
1125 	if (sc->sc_promisc) {
1126 		error = ifpromisc(ifp0, 1);
1127 		if (error != 0)
1128 			goto unmtu;
1129 	}
1130 
1131 	TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) {
1132 		if (aggr_multi(sc, p, ma, SIOCADDMULTI) != 0) {
1133 			log(LOG_WARNING, "%s %s: "
1134 			    "unable to add multicast address\n",
1135 			    ifp->if_xname, ifp0->if_xname);
1136 		}
1137 	}
1138 
1139 	task_set(&p->p_lhook, aggr_p_linkch, p);
1140 	if_linkstatehook_add(ifp0, &p->p_lhook);
1141 
1142 	task_set(&p->p_dhook, aggr_p_detach, p);
1143 	if_detachhook_add(ifp0, &p->p_dhook);
1144 
1145 	task_set(&p->p_rxm_task, aggr_rx, p);
1146 	mq_init(&p->p_rxm_mq, 3, IPL_NET);
1147 
1148 	timeout_set_proc(&p->p_ptm_tx, aggr_ptm_tx, p);
1149 	timeout_set_proc(&p->p_txm_ntt, aggr_transmit_machine, p);
1150 	timeout_set_proc(&p->p_current_while_timer,
1151 	    aggr_current_while_timer, p);
1152 	timeout_set_proc(&p->p_wait_while_timer, aggr_wait_while_timer, p);
1153 
1154 	p->p_muxed = 0;
1155 	p->p_collecting = 0;
1156 	p->p_distributing = 0;
1157 	p->p_selected = AGGR_PORT_UNSELECTED;
1158 	p->p_actor_state = LACP_STATE_AGGREGATION;
1159 
1160 	/* commit */
1161 	DPRINTF(sc, "%s %s trunkport: creating port\n",
1162 	    ifp->if_xname, ifp0->if_xname);
1163 
1164 	TAILQ_INSERT_TAIL(&sc->sc_ports, p, p_entry);
1165 	sc->sc_nports++;
1166 
1167 	aggr_update_capabilities(sc);
1168 
1169 	/*
1170          * use (and modification) of ifp->if_input and ac->ac_trunkport
1171          * is protected by NET_LOCK.
1172 	 */
1173 
1174 	ac0->ac_trunkport = p;
1175 
1176 	/* make sure p is visible before handlers can run */
1177 	membar_producer();
1178 	ifp0->if_ioctl = aggr_p_ioctl;
1179 	ifp0->if_input = aggr_input;
1180 	ifp0->if_output = aggr_p_output;
1181 
1182 	aggr_mux(sc, p, LACP_MUX_E_BEGIN);
1183 	aggr_rxm(sc, p, LACP_RXM_E_BEGIN);
1184 	aggr_p_linkch(p);
1185 
1186 	return (0);
1187 
1188 unmtu:
1189 	if (aggr_p_set_mtu(p, p->p_mtu) != 0) {
1190 		log(LOG_WARNING, "%s add %s: unable to reset mtu %u\n",
1191 		    ifp->if_xname, ifp0->if_xname, p->p_mtu);
1192 	}
1193 resetlladdr:
1194 	if (aggr_p_setlladdr(p, p->p_lladdr) != 0) {
1195 		log(LOG_WARNING, "%s add %s: unable to reset lladdr\n",
1196 		    ifp->if_xname, ifp0->if_xname);
1197 	}
1198 ungroup:
1199 	if (aggr_group(sc, p, SIOCDELMULTI) != 0) {
1200 		log(LOG_WARNING, "%s add %s: "
1201 		    "unable to remove LACP group address\n",
1202 		    ifp->if_xname, ifp0->if_xname);
1203 	}
1204 free:
1205 	free(p, M_DEVBUF, sizeof(*p));
1206 put:
1207 	if_put(ifp0);
1208 	return (error);
1209 }
1210 
1211 static struct aggr_port *
1212 aggr_trunkport(struct aggr_softc *sc, const char *name)
1213 {
1214 	struct aggr_port *p;
1215 
1216 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
1217 		if (strcmp(p->p_ifp0->if_xname, name) == 0)
1218 			return (p);
1219 	}
1220 
1221 	return (NULL);
1222 }
1223 
1224 static int
1225 aggr_get_port(struct aggr_softc *sc, struct trunk_reqport *rp)
1226 {
1227 	struct aggr_port *p;
1228 
1229 	NET_ASSERT_LOCKED();
1230 	p = aggr_trunkport(sc, rp->rp_portname);
1231 	if (p == NULL)
1232 		return (EINVAL);
1233 
1234 	/* XXX */
1235 
1236 	return (0);
1237 }
1238 
1239 static int
1240 aggr_del_port(struct aggr_softc *sc, const struct trunk_reqport *rp)
1241 {
1242 	struct aggr_port *p;
1243 
1244 	NET_ASSERT_LOCKED();
1245 	p = aggr_trunkport(sc, rp->rp_portname);
1246 	if (p == NULL)
1247 		return (EINVAL);
1248 
1249 	aggr_p_dtor(sc, p, "del");
1250 
1251 	return (0);
1252 }
1253 
1254 static int
1255 aggr_p_setlladdr(struct aggr_port *p, const uint8_t *addr)
1256 {
1257 	struct ifnet *ifp0 = p->p_ifp0;
1258 	struct ifreq ifr;
1259 	struct sockaddr *sa;
1260 	int error;
1261 
1262 	memset(&ifr, 0, sizeof(ifr));
1263 
1264 	CTASSERT(sizeof(ifr.ifr_name) == sizeof(ifp0->if_xname));
1265 	memcpy(ifr.ifr_name, ifp0->if_xname, sizeof(ifr.ifr_name));
1266 
1267 	sa = &ifr.ifr_addr;
1268 
1269 	/* wtf is this? */
1270 	sa->sa_len = ETHER_ADDR_LEN;
1271 	sa->sa_family = AF_LINK;
1272 	CTASSERT(sizeof(sa->sa_data) >= ETHER_ADDR_LEN);
1273 	memcpy(sa->sa_data, addr, ETHER_ADDR_LEN);
1274 
1275 	error = (*p->p_ioctl)(ifp0, SIOCSIFLLADDR, (caddr_t)&ifr);
1276 	switch (error) {
1277 	case ENOTTY:
1278 	case 0:
1279 		break;
1280 	default:
1281 		return (error);
1282 	}
1283 
1284 	error = if_setlladdr(ifp0, addr);
1285 	if (error != 0)
1286 		return (error);
1287 
1288 	ifnewlladdr(ifp0);
1289 
1290 	return (0);
1291 }
1292 
1293 static int
1294 aggr_p_set_mtu(struct aggr_port *p, uint32_t mtu)
1295 {
1296 	struct ifnet *ifp0 = p->p_ifp0;
1297 	struct ifreq ifr;
1298 
1299 	memset(&ifr, 0, sizeof(ifr));
1300 
1301 	CTASSERT(sizeof(ifr.ifr_name) == sizeof(ifp0->if_xname));
1302 	memcpy(ifr.ifr_name, ifp0->if_xname, sizeof(ifr.ifr_name));
1303 
1304 	ifr.ifr_mtu = mtu;
1305 
1306 	return ((*p->p_ioctl)(ifp0, SIOCSIFMTU, (caddr_t)&ifr));
1307 }
1308 
1309 static int
1310 aggr_p_ioctl(struct ifnet *ifp0, u_long cmd, caddr_t data)
1311 {
1312 	struct arpcom *ac0 = (struct arpcom *)ifp0;
1313 	struct aggr_port *p = ac0->ac_trunkport;
1314 	struct ifreq *ifr = (struct ifreq *)data;
1315 	int error = 0;
1316 
1317 	switch (cmd) {
1318 	case SIOCGTRUNKPORT: {
1319 		struct trunk_reqport *rp = (struct trunk_reqport *)data;
1320 		struct aggr_softc *sc = p->p_aggr;
1321 		struct ifnet *ifp = &sc->sc_if;
1322 
1323 		if (strncmp(rp->rp_ifname, rp->rp_portname,
1324 		    sizeof(rp->rp_ifname)) != 0)
1325 			return (EINVAL);
1326 
1327 		CTASSERT(sizeof(rp->rp_ifname) == sizeof(ifp->if_xname));
1328 		memcpy(rp->rp_ifname, ifp->if_xname, sizeof(rp->rp_ifname));
1329 		break;
1330 	}
1331 
1332 	case SIOCSIFMTU:
1333 		if (ifr->ifr_mtu == ifp0->if_mtu)
1334 			break; /* nop */
1335 
1336 		/* FALLTHROUGH */
1337 	case SIOCSIFLLADDR:
1338 		error = EBUSY;
1339 		break;
1340 
1341 	case SIOCSIFFLAGS:
1342 		if (!ISSET(ifp0->if_flags, IFF_UP) &&
1343 		    ISSET(ifp0->if_flags, IFF_RUNNING)) {
1344 			/* port is going down */
1345 			if (p->p_selected == AGGR_PORT_SELECTED) {
1346 				aggr_unselected(p);
1347 				aggr_ntt_transmit(p); /* XXX */
1348 			}
1349 		}
1350 		/* FALLTHROUGH */
1351 	default:
1352 		error = (*p->p_ioctl)(ifp0, cmd, data);
1353 		break;
1354 	}
1355 
1356 	return (error);
1357 }
1358 
1359 static int
1360 aggr_p_output(struct ifnet *ifp0, struct mbuf *m, struct sockaddr *dst,
1361     struct rtentry *rt)
1362 {
1363 	struct arpcom *ac0 = (struct arpcom *)ifp0;
1364 	struct aggr_port *p = ac0->ac_trunkport;
1365 
1366 	/* restrict transmission to bpf only */
1367 	if ((m_tag_find(m, PACKET_TAG_DLT, NULL) == NULL)) {
1368 		m_freem(m);
1369 		return (EBUSY);
1370 	}
1371 
1372 	return ((*p->p_output)(ifp0, m, dst, rt));
1373 }
1374 
1375 static void
1376 aggr_p_dtor(struct aggr_softc *sc, struct aggr_port *p, const char *op)
1377 {
1378 	struct ifnet *ifp = &sc->sc_if;
1379 	struct ifnet *ifp0 = p->p_ifp0;
1380 	struct arpcom *ac0 = (struct arpcom *)ifp0;
1381 	struct aggr_multiaddr *ma;
1382 	enum aggr_port_selected selected;
1383 	int error;
1384 
1385 	DPRINTF(sc, "%s %s %s: destroying port\n",
1386 	    ifp->if_xname, ifp0->if_xname, op);
1387 
1388 	selected = p->p_selected;
1389 	aggr_rxm(sc, p, LACP_RXM_E_NOT_PORT_ENABLED);
1390 	aggr_unselected(p);
1391 	if (aggr_port_enabled(p) && selected == AGGR_PORT_SELECTED)
1392 		aggr_ntt_transmit(p);
1393 
1394 	timeout_del(&p->p_ptm_tx);
1395 	timeout_del_barrier(&p->p_txm_ntt); /* XXX */
1396 	timeout_del(&p->p_current_while_timer);
1397 	timeout_del(&p->p_wait_while_timer);
1398 
1399 	/*
1400          * use (and modification) of ifp->if_input and ac->ac_trunkport
1401          * is protected by NET_LOCK.
1402 	 */
1403 
1404 	ac0->ac_trunkport = NULL;
1405 	ifp0->if_input = p->p_input;
1406 	ifp0->if_ioctl = p->p_ioctl;
1407 	ifp0->if_output = p->p_output;
1408 
1409 	TAILQ_REMOVE(&sc->sc_ports, p, p_entry);
1410 	sc->sc_nports--;
1411 
1412 	TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) {
1413 		error = aggr_multi(sc, p, ma, SIOCDELMULTI);
1414 		if (error != 0) {
1415 			log(LOG_WARNING, "%s %s %s: "
1416 			    "unable to remove multicast address (%d)\n",
1417 			    ifp->if_xname, op, ifp0->if_xname, error);
1418 		}
1419 	}
1420 
1421 	if (sc->sc_promisc) {
1422 		error = ifpromisc(ifp0, 0);
1423 		if (error != 0) {
1424 			log(LOG_WARNING, "%s %s %s: "
1425 			    "unable to disable promisc (%d)\n",
1426 			    ifp->if_xname, op, ifp0->if_xname, error);
1427 		}
1428 	}
1429 
1430 	error = aggr_p_set_mtu(p, p->p_mtu);
1431 	if (error != 0) {
1432 		log(LOG_WARNING, "%s %s %s: unable to restore mtu %u (%d)\n",
1433 		    ifp->if_xname, op, ifp0->if_xname, p->p_mtu, error);
1434 	}
1435 
1436 	error = aggr_p_setlladdr(p, p->p_lladdr);
1437 	if (error != 0) {
1438 		log(LOG_WARNING, "%s %s %s: unable to restore lladdr (%d)\n",
1439 		    ifp->if_xname, op, ifp0->if_xname, error);
1440 	}
1441 
1442 	error = aggr_group(sc, p, SIOCDELMULTI);
1443 	if (error != 0) {
1444 		log(LOG_WARNING, "%s %s %s: "
1445 		    "unable to remove LACP group address (%d)\n",
1446 		    ifp->if_xname, op, ifp0->if_xname, error);
1447 	}
1448 
1449 	if_detachhook_del(ifp0, &p->p_dhook);
1450 	if_linkstatehook_del(ifp0, &p->p_lhook);
1451 
1452 	if_put(ifp0);
1453 	free(p, M_DEVBUF, sizeof(*p));
1454 
1455 	/* XXX this is a pretty ugly place to update this */
1456 	aggr_update_capabilities(sc);
1457 }
1458 
1459 static void
1460 aggr_p_detach(void *arg)
1461 {
1462 	struct aggr_port *p = arg;
1463 	struct aggr_softc *sc = p->p_aggr;
1464 
1465 	aggr_p_dtor(sc, p, "detach");
1466 
1467 	NET_ASSERT_LOCKED();
1468 }
1469 
1470 static void
1471 aggr_p_linkch(void *arg)
1472 {
1473 	struct aggr_port *p = arg;
1474 	struct aggr_softc *sc = p->p_aggr;
1475 
1476 	NET_ASSERT_LOCKED();
1477 
1478 	if (aggr_port_enabled(p)) {
1479 		aggr_rxm(sc, p, LACP_RXM_E_PORT_ENABLED);
1480 
1481 		if (aggr_lacp_enabled(sc)) {
1482 			timeout_add_sec(&p->p_ptm_tx,
1483 			    aggr_periodic_times[AGGR_LACP_TIMEOUT_FAST]);
1484 		}
1485 	} else {
1486 		aggr_rxm(sc, p, LACP_RXM_E_NOT_PORT_ENABLED);
1487 		aggr_unselected(p);
1488 		aggr_record_default(sc, p);
1489 		timeout_del(&p->p_ptm_tx);
1490 	}
1491 }
1492 
1493 static void
1494 aggr_map(struct aggr_softc *sc)
1495 {
1496 	struct ifnet *ifp = &sc->sc_if;
1497 	struct aggr_map *map = NULL;
1498 	struct aggr_port *p;
1499 	unsigned int gen;
1500 	unsigned int i;
1501 	int link_state = LINK_STATE_DOWN;
1502 
1503 	p = TAILQ_FIRST(&sc->sc_distributing);
1504 	if (p != NULL) {
1505 		gen = sc->sc_map_gen++;
1506 		map = &sc->sc_maps[gen % nitems(sc->sc_maps)];
1507 
1508 		for (i = 0; i < nitems(map->m_ifp0s); i++) {
1509 			map->m_ifp0s[i] = p->p_ifp0;
1510 
1511 			p = TAILQ_NEXT(p, p_entry_distributing);
1512 			if (p == NULL)
1513 				p = TAILQ_FIRST(&sc->sc_distributing);
1514 		}
1515 
1516 		link_state = LINK_STATE_FULL_DUPLEX;
1517 	}
1518 
1519 	SMR_PTR_SET_LOCKED(&sc->sc_map, map);
1520 	smr_barrier();
1521 
1522 	if (ifp->if_link_state != link_state) {
1523 		ifp->if_link_state = link_state;
1524 		if_link_state_change(ifp);
1525 	}
1526 }
1527 
1528 static void
1529 aggr_current_while_timer(void *arg)
1530 {
1531 	struct aggr_port *p = arg;
1532 	struct aggr_softc *sc = p->p_aggr;
1533 
1534 	aggr_rxm(sc, p, LACP_RXM_E_TIMER_EXPIRED);
1535 }
1536 
1537 static void
1538 aggr_wait_while_timer(void *arg)
1539 {
1540 	struct aggr_port *p = arg;
1541 	struct aggr_softc *sc = p->p_aggr;
1542 
1543 	aggr_selection_logic(sc, p);
1544 }
1545 
1546 static void
1547 aggr_start_current_while_timer(struct aggr_port *p, unsigned int t)
1548 {
1549 	timeout_add_sec(&p->p_current_while_timer,
1550 		aggr_periodic_times[t] * LACP_TIMEOUT_FACTOR);
1551 }
1552 
1553 static void
1554 aggr_input_lacpdu(struct aggr_port *p, struct mbuf *m)
1555 {
1556 	struct aggr_softc *sc = p->p_aggr;
1557 	struct lacp_du *lacpdu;
1558 
1559 	if (m->m_len < sizeof(*lacpdu)) {
1560 		m = m_pullup(m, sizeof(*lacpdu));
1561 		if (m == NULL)
1562 			return;
1563 	}
1564 
1565 	/*
1566 	 * In the process of executing the recordPDU function, a Receive
1567 	 * machine compliant to this standard shall not validate the
1568 	 * Version Number, TLV_type, or Reserved fields in received
1569 	 * LACPDUs. The same actions are taken regardless of the values
1570 	 * received in these fields. A Receive machine may validate
1571 	 * the Actor_Information_Length, Partner_Information_Length,
1572 	 * Collector_Information_Length, or Terminator_Length fields.
1573 	 */
1574 
1575 	lacpdu = mtod(m, struct lacp_du *);
1576 	aggr_rxm_lacpdu(sc, p, lacpdu);
1577 
1578 	m_freem(m);
1579 }
1580 
1581 static void
1582 aggr_update_selected(struct aggr_softc *sc, struct aggr_port *p,
1583     const struct lacp_du *lacpdu)
1584 {
1585 	const struct lacp_port_info *rpi = &lacpdu->lacp_actor_info;
1586 	const struct lacp_port_info *lpi = &p->p_partner;
1587 
1588 	if ((rpi->lacp_portid.lacp_portid_number ==
1589 	     lpi->lacp_portid.lacp_portid_number) &&
1590 	    (rpi->lacp_portid.lacp_portid_priority ==
1591 	     lpi->lacp_portid.lacp_portid_priority) &&
1592 	    ETHER_IS_EQ(rpi->lacp_sysid.lacp_sysid_mac,
1593 	     lpi->lacp_sysid.lacp_sysid_mac) &&
1594 	    (rpi->lacp_sysid.lacp_sysid_priority ==
1595 	     lpi->lacp_sysid.lacp_sysid_priority) &&
1596 	    (rpi->lacp_key == lpi->lacp_key) &&
1597 	    (ISSET(rpi->lacp_state, LACP_STATE_AGGREGATION) ==
1598 	     ISSET(lpi->lacp_state, LACP_STATE_AGGREGATION)))
1599 		return;
1600 
1601 	aggr_unselected(p);
1602 }
1603 
1604 static void
1605 aggr_record_default(struct aggr_softc *sc, struct aggr_port *p)
1606 {
1607 	struct lacp_port_info *pi = &p->p_partner;
1608 
1609 	pi->lacp_sysid.lacp_sysid_priority = htons(0);
1610 	memset(pi->lacp_sysid.lacp_sysid_mac, 0,
1611 	    sizeof(pi->lacp_sysid.lacp_sysid_mac));
1612 
1613 	pi->lacp_key = htons(0);
1614 
1615 	pi->lacp_portid.lacp_portid_priority = htons(0);
1616 	pi->lacp_portid.lacp_portid_number = htons(0);
1617 
1618 	SET(p->p_actor_state, LACP_STATE_DEFAULTED);
1619 
1620 	pi->lacp_state = LACP_STATE_AGGREGATION | LACP_STATE_SYNC;
1621 	if (sc->sc_lacp_timeout == AGGR_LACP_TIMEOUT_FAST)
1622 		SET(pi->lacp_state, LACP_STATE_TIMEOUT);
1623 	if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE)
1624 		SET(pi->lacp_state, LACP_STATE_ACTIVITY);
1625 
1626 	/* notify Mux */
1627 	aggr_mux(sc, p, LACP_MUX_E_NOT_COLLECTING);
1628 	aggr_mux(sc, p, LACP_MUX_E_SYNC);
1629 }
1630 
1631 static void
1632 aggr_update_default_selected(struct aggr_softc *sc, struct aggr_port *p)
1633 {
1634 	const struct lacp_port_info *pi = &p->p_partner;
1635 
1636 	if ((pi->lacp_portid.lacp_portid_number == htons(0)) &&
1637 	    (pi->lacp_portid.lacp_portid_priority == htons(0)) &&
1638 	    ETHER_IS_ANYADDR(pi->lacp_sysid.lacp_sysid_mac) &&
1639 	    (pi->lacp_sysid.lacp_sysid_priority == htons(0)) &&
1640 	    (pi->lacp_key == htons(0)) &&
1641 	    ISSET(pi->lacp_state, LACP_STATE_AGGREGATION))
1642 		return;
1643 
1644 	aggr_unselected(p);
1645 	aggr_selection_logic(sc, p); /* restart */
1646 }
1647 
1648 static int
1649 aggr_update_ntt(struct aggr_port *p, const struct lacp_du *lacpdu)
1650 {
1651 	struct aggr_softc *sc = p->p_aggr;
1652 	struct arpcom *ac = &sc->sc_ac;
1653 	struct ifnet *ifp = &ac->ac_if;
1654 	struct ifnet *ifp0 = p->p_ifp0;
1655 	const struct lacp_port_info *pi = &lacpdu->lacp_partner_info;
1656 	uint8_t bits = LACP_STATE_ACTIVITY | LACP_STATE_TIMEOUT |
1657 	    LACP_STATE_SYNC | LACP_STATE_AGGREGATION;
1658 	uint8_t state = p->p_actor_state;
1659 	int sync = 0;
1660 
1661 	if (pi->lacp_portid.lacp_portid_number != htons(ifp0->if_index))
1662 		goto ntt;
1663 	if (pi->lacp_portid.lacp_portid_priority !=
1664 	    htons(sc->sc_lacp_port_prio))
1665 		goto ntt;
1666 	if (!ETHER_IS_EQ(pi->lacp_sysid.lacp_sysid_mac, ac->ac_enaddr))
1667 		goto ntt;
1668 	if (pi->lacp_sysid.lacp_sysid_priority !=
1669 	    htons(sc->sc_lacp_prio))
1670 		goto ntt;
1671 	if (pi->lacp_key != htons(ifp->if_index))
1672 		goto ntt;
1673 	if (ISSET(pi->lacp_state, LACP_STATE_SYNC) !=
1674 	    ISSET(state, LACP_STATE_SYNC))
1675 		goto ntt;
1676 	sync = 1;
1677 
1678 	if (sc->sc_lacp_timeout == AGGR_LACP_TIMEOUT_FAST)
1679 		SET(state, LACP_STATE_TIMEOUT);
1680 	if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE)
1681 		SET(state, LACP_STATE_ACTIVITY);
1682 
1683 	if (ISSET(pi->lacp_state, bits) != ISSET(state, bits))
1684 		goto ntt;
1685 
1686 	return (1);
1687 
1688 ntt:
1689 	aggr_ntt(p);
1690 
1691 	return (sync);
1692 }
1693 
1694 static void
1695 aggr_recordpdu(struct aggr_port *p, const struct lacp_du *lacpdu, int sync)
1696 {
1697 	struct aggr_softc *sc = p->p_aggr;
1698 	const struct lacp_port_info *rpi = &lacpdu->lacp_actor_info;
1699 	struct lacp_port_info *lpi = &p->p_partner;
1700 	int active = ISSET(rpi->lacp_state, LACP_STATE_ACTIVITY) ||
1701 	    (ISSET(p->p_actor_state, LACP_STATE_ACTIVITY) &&
1702 	     ISSET(lacpdu->lacp_partner_info.lacp_state, LACP_STATE_ACTIVITY));
1703 
1704 	lpi->lacp_portid.lacp_portid_number =
1705 	    rpi->lacp_portid.lacp_portid_number;
1706 	lpi->lacp_portid.lacp_portid_priority =
1707 	    rpi->lacp_portid.lacp_portid_priority;
1708 	memcpy(lpi->lacp_sysid.lacp_sysid_mac,
1709 	    rpi->lacp_sysid.lacp_sysid_mac,
1710 	    sizeof(lpi->lacp_sysid.lacp_sysid_mac));
1711 	lpi->lacp_sysid.lacp_sysid_priority =
1712 	    rpi->lacp_sysid.lacp_sysid_priority;
1713 	lpi->lacp_key = rpi->lacp_key;
1714 	lpi->lacp_state = rpi->lacp_state & ~LACP_STATE_SYNC;
1715 
1716 	CLR(p->p_actor_state, LACP_STATE_DEFAULTED);
1717 
1718 	if (active && ISSET(rpi->lacp_state, LACP_STATE_SYNC) && sync) {
1719 		SET(p->p_partner_state, LACP_STATE_SYNC);
1720 		aggr_mux(sc, p, LACP_MUX_E_SYNC);
1721 	} else {
1722 		CLR(p->p_partner_state, LACP_STATE_SYNC);
1723 		aggr_mux(sc, p, LACP_MUX_E_NOT_SYNC);
1724 	}
1725 }
1726 
1727 static void
1728 aggr_marker_response(struct aggr_port *p, struct mbuf *m)
1729 {
1730 	struct aggr_softc *sc = p->p_aggr;
1731 	struct arpcom *ac = &sc->sc_ac;
1732 	struct ifnet *ifp0 = p->p_ifp0;
1733 	struct marker_pdu *mpdu;
1734 	struct ether_header *eh;
1735 
1736 	mpdu = mtod(m, struct marker_pdu *);
1737 	mpdu->marker_info_tlv.lacp_tlv_type = MARKER_T_RESPONSE;
1738 
1739 	m = m_prepend(m, sizeof(*eh), M_DONTWAIT);
1740 	if (m == NULL)
1741 		return;
1742 
1743 	eh = mtod(m, struct ether_header *);
1744 	memcpy(eh->ether_dhost, lacp_address_slow, sizeof(eh->ether_dhost));
1745 	memcpy(eh->ether_shost, ac->ac_enaddr, sizeof(eh->ether_shost));
1746 	eh->ether_type = htons(ETHERTYPE_SLOW);
1747 
1748 	(void)if_enqueue(ifp0, m);
1749 }
1750 
1751 static void
1752 aggr_input_marker(struct aggr_port *p, struct mbuf *m)
1753 {
1754 	struct marker_pdu *mpdu;
1755 
1756 	if (m->m_len < sizeof(*mpdu)) {
1757 		m = m_pullup(m, sizeof(*mpdu));
1758 		if (m == NULL)
1759 			return;
1760 	}
1761 
1762 	mpdu = mtod(m, struct marker_pdu *);
1763 	switch (mpdu->marker_info_tlv.lacp_tlv_type) {
1764 	case MARKER_T_INFORMATION:
1765 		aggr_marker_response(p, m);
1766 		break;
1767 	default:
1768 		m_freem(m);
1769 		break;
1770 	}
1771 }
1772 
1773 static void
1774 aggr_rx(void *arg)
1775 {
1776 	struct aggr_port *p = arg;
1777 	struct mbuf_list ml;
1778 	struct mbuf *m;
1779 
1780 	mq_delist(&p->p_rxm_mq, &ml);
1781 
1782 	while ((m = ml_dequeue(&ml)) != NULL) {
1783 		struct ether_slowproto_hdr *sph;
1784 
1785 		/* aggr_input has checked eh already */
1786 		m_adj(m, sizeof(struct ether_header));
1787 
1788 		sph = mtod(m, struct ether_slowproto_hdr *);
1789 		switch (sph->sph_subtype) {
1790 		case SLOWPROTOCOLS_SUBTYPE_LACP:
1791 			aggr_input_lacpdu(p, m);
1792 			break;
1793 		case SLOWPROTOCOLS_SUBTYPE_LACP_MARKER:
1794 			aggr_input_marker(p, m);
1795 			break;
1796 		default:
1797 			panic("unexpected slow protocol subtype");
1798 			/* NOTREACHED */
1799 		}
1800 	}
1801 }
1802 
1803 static void
1804 aggr_set_selected(struct aggr_port *p, enum aggr_port_selected s,
1805     enum lacp_mux_event ev)
1806 {
1807 	struct aggr_softc *sc = p->p_aggr;
1808 
1809 	if (p->p_selected != s) {
1810 		DPRINTF(sc, "%s %s: Selected %s -> %s\n",
1811 		    sc->sc_if.if_xname, p->p_ifp0->if_xname,
1812 		    aggr_port_selected_names[p->p_selected],
1813 		    aggr_port_selected_names[s]);
1814 		p->p_selected = s;
1815 	}
1816 	aggr_mux(sc, p, ev);
1817 }
1818 
1819 static void
1820 aggr_unselected(struct aggr_port *p)
1821 {
1822 	aggr_set_selected(p, AGGR_PORT_UNSELECTED, LACP_MUX_E_UNSELECTED);
1823 }
1824 
1825 static inline void
1826 aggr_selected(struct aggr_port *p)
1827 {
1828 	aggr_set_selected(p, AGGR_PORT_SELECTED, LACP_MUX_E_SELECTED);
1829 }
1830 
1831 #ifdef notyet
1832 static inline void
1833 aggr_standby(struct aggr_port *p)
1834 {
1835 	aggr_set_selected(p, AGGR_PORT_STANDBY, LACP_MUX_E_STANDBY);
1836 }
1837 #endif
1838 
1839 static void
1840 aggr_selection_logic(struct aggr_softc *sc, struct aggr_port *p)
1841 {
1842 	const struct lacp_port_info *pi;
1843 	struct arpcom *ac = &sc->sc_ac;
1844 	struct ifnet *ifp = &ac->ac_if;
1845 	const uint8_t *mac;
1846 
1847 	if (p->p_rxm_state != LACP_RXM_S_CURRENT) {
1848 		DPRINTF(sc, "%s %s: selection logic: unselected (rxm !%s)\n",
1849 		    ifp->if_xname, p->p_ifp0->if_xname,
1850 		    lacp_rxm_state_names[LACP_RXM_S_CURRENT]);
1851 		goto unselected;
1852 	}
1853 
1854 	pi = &p->p_partner;
1855 	if (pi->lacp_key == htons(0)) {
1856 		DPRINTF(sc, "%s %s: selection logic: unselected "
1857 		    "(partner key == 0)\n",
1858 		    ifp->if_xname, p->p_ifp0->if_xname);
1859 		goto unselected;
1860 	}
1861 
1862 	/*
1863 	 * aggr(4) does not support individual interfaces
1864 	 */
1865 	if (!ISSET(pi->lacp_state, LACP_STATE_AGGREGATION)) {
1866 		DPRINTF(sc, "%s %s: selection logic: unselected "
1867 		    "(partner state is Individual)\n",
1868 		    ifp->if_xname, p->p_ifp0->if_xname);
1869 		goto unselected;
1870 	}
1871 
1872 	/*
1873 	 * Any pair of Aggregation Ports that are members of the same
1874 	 * LAG, but are connected together by the same link, shall not
1875 	 * select the same Aggregator
1876 	 */
1877 
1878 	mac = pi->lacp_sysid.lacp_sysid_mac;
1879 	if (ETHER_IS_EQ(mac, ac->ac_enaddr) &&
1880 	    pi->lacp_key == htons(ifp->if_index)) {
1881 		DPRINTF(sc, "%s %s: selection logic: unselected "
1882 		    "(partner sysid !eq)\n",
1883 		    ifp->if_xname, p->p_ifp0->if_xname);
1884 		goto unselected;
1885 	}
1886 
1887 	if (!TAILQ_EMPTY(&sc->sc_muxen)) {
1888 		/* an aggregation has already been selected */
1889 		if (!ETHER_IS_EQ(mac, sc->sc_partner_system.lacp_sysid_mac) ||
1890 		    sc->sc_partner_key != pi->lacp_key) {
1891 			DPRINTF(sc, "%s %s: selection logic: unselected "
1892 			    "(partner sysid != selection)\n",
1893 			    ifp->if_xname, p->p_ifp0->if_xname);
1894 			goto unselected;
1895 		}
1896 	}
1897 
1898 	aggr_selected(p);
1899 	return;
1900 
1901 unselected:
1902 	aggr_unselected(p);
1903 }
1904 
1905 static void
1906 aggr_mux(struct aggr_softc *sc, struct aggr_port *p, enum lacp_mux_event ev)
1907 {
1908 	int ntt = 0;
1909 
1910 	/*
1911 	 * the mux can move through multiple states based on a
1912 	 * single event, so loop until the event is completely consumed.
1913 	 * debounce NTT = TRUE through the multiple state transitions.
1914 	 */
1915 
1916 	while (aggr_mux_ev(sc, p, ev, &ntt) != 0)
1917 		;
1918 
1919 	if (ntt)
1920 		aggr_ntt(p);
1921 }
1922 
1923 #ifdef notyet
1924 static int
1925 aggr_ready_n(struct aggr_port *p)
1926 {
1927 	return (p->p_mux_state == LACP_MUX_S_WAITING &&
1928 	    !timeout_pending(&p->p_wait_while_timer));
1929 }
1930 #endif
1931 
1932 static inline int
1933 aggr_ready(struct aggr_softc *sc)
1934 {
1935 	return (1);
1936 }
1937 
1938 static void
1939 aggr_disable_distributing(struct aggr_softc *sc, struct aggr_port *p)
1940 {
1941 	if (!p->p_distributing)
1942 		return;
1943 
1944 	sc->sc_ndistributing--;
1945 	TAILQ_REMOVE(&sc->sc_distributing, p, p_entry_distributing);
1946 	p->p_distributing = 0;
1947 
1948 	aggr_map(sc);
1949 
1950 	DPRINTF(sc, "%s %s: distributing disabled\n",
1951 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
1952 }
1953 
1954 static void
1955 aggr_enable_distributing(struct aggr_softc *sc, struct aggr_port *p)
1956 {
1957 	if (p->p_distributing)
1958 		return;
1959 
1960 	/* check the LAG ID? */
1961 
1962 	p->p_distributing = 1;
1963 	TAILQ_INSERT_TAIL(&sc->sc_distributing, p, p_entry_distributing);
1964 	sc->sc_ndistributing++;
1965 
1966 	aggr_map(sc);
1967 
1968 	DPRINTF(sc, "%s %s: distributing enabled\n",
1969 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
1970 }
1971 
1972 static void
1973 aggr_disable_collecting(struct aggr_softc *sc, struct aggr_port *p)
1974 {
1975 	if (!p->p_collecting)
1976 		return;
1977 
1978 	p->p_collecting = 0;
1979 
1980 	DPRINTF(sc, "%s %s: collecting disabled\n",
1981 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
1982 }
1983 
1984 static void
1985 aggr_enable_collecting(struct aggr_softc *sc, struct aggr_port *p)
1986 {
1987 	if (p->p_collecting)
1988 		return;
1989 
1990 	p->p_collecting = 1;
1991 
1992 	DPRINTF(sc, "%s %s: collecting enabled\n",
1993 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
1994 }
1995 
1996 static void
1997 aggr_attach_mux(struct aggr_softc *sc, struct aggr_port *p)
1998 {
1999 	const struct lacp_port_info *pi = &p->p_partner;
2000 
2001 	if (p->p_muxed)
2002 		return;
2003 
2004 	p->p_muxed = 1;
2005 	if (TAILQ_EMPTY(&sc->sc_muxen)) {
2006 		KASSERT(sc->sc_partner_key == htons(0));
2007 		sc->sc_partner_system = pi->lacp_sysid;
2008 		sc->sc_partner_key = pi->lacp_key;
2009 	}
2010 
2011 	TAILQ_INSERT_TAIL(&sc->sc_muxen, p, p_entry_muxen);
2012 
2013 	DPRINTF(sc, "%s %s: mux attached\n",
2014 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
2015 }
2016 
2017 static void
2018 aggr_detach_mux(struct aggr_softc *sc, struct aggr_port *p)
2019 {
2020 	if (!p->p_muxed)
2021 		return;
2022 
2023 	p->p_muxed = 0;
2024 
2025 	TAILQ_REMOVE(&sc->sc_muxen, p, p_entry_muxen);
2026 	if (TAILQ_EMPTY(&sc->sc_muxen)) {
2027 		memset(&sc->sc_partner_system.lacp_sysid_mac, 0,
2028 		    sizeof(sc->sc_partner_system.lacp_sysid_mac));
2029 		sc->sc_partner_system.lacp_sysid_priority = htons(0);
2030 		sc->sc_partner_key = htons(0);
2031 	}
2032 
2033 	DPRINTF(sc, "%s %s: mux detached\n",
2034 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
2035 }
2036 
2037 static int
2038 aggr_mux_ev(struct aggr_softc *sc, struct aggr_port *p, enum lacp_mux_event ev,
2039     int *ntt)
2040 {
2041 	enum lacp_mux_state nstate = LACP_MUX_S_DETACHED;
2042 
2043 	switch (p->p_mux_state) {
2044 	case LACP_MUX_S_BEGIN:
2045 		KASSERT(ev == LACP_MUX_E_BEGIN);
2046 		nstate = LACP_MUX_S_DETACHED;
2047 		break;
2048 	case LACP_MUX_S_DETACHED:
2049 		switch (ev) {
2050 		case LACP_MUX_E_SELECTED:
2051 		case LACP_MUX_E_STANDBY:
2052 			nstate = LACP_MUX_S_WAITING;
2053 			break;
2054 		default:
2055 			return (0);
2056 		}
2057 		break;
2058 	case LACP_MUX_S_WAITING:
2059 		switch (ev) {
2060 		case LACP_MUX_E_UNSELECTED:
2061 			nstate = LACP_MUX_S_DETACHED;
2062 			break;
2063 		case LACP_MUX_E_SELECTED:
2064 		case LACP_MUX_E_READY:
2065 			if (aggr_ready(sc) &&
2066 			    p->p_selected == AGGR_PORT_SELECTED) {
2067 				nstate = LACP_MUX_S_ATTACHED;
2068 				break;
2069 			}
2070 			/* FALLTHROUGH */
2071 		default:
2072 			return (0);
2073 		}
2074 		break;
2075 	case LACP_MUX_S_ATTACHED:
2076 		switch (ev) {
2077 		case LACP_MUX_E_UNSELECTED:
2078 		case LACP_MUX_E_STANDBY:
2079 			nstate = LACP_MUX_S_DETACHED;
2080 			break;
2081 		case LACP_MUX_E_SELECTED:
2082 		case LACP_MUX_E_SYNC:
2083 			if (p->p_selected == AGGR_PORT_SELECTED &&
2084 			    ISSET(p->p_partner_state, LACP_STATE_SYNC)) {
2085 				nstate = LACP_MUX_S_COLLECTING;
2086 				break;
2087 			}
2088 			/* FALLTHROUGH */
2089 		default:
2090 			return (0);
2091 		}
2092 		break;
2093 	case LACP_MUX_S_COLLECTING:
2094 		switch (ev) {
2095 		case LACP_MUX_E_UNSELECTED:
2096 		case LACP_MUX_E_STANDBY:
2097 		case LACP_MUX_E_NOT_SYNC:
2098 			nstate = LACP_MUX_S_ATTACHED;
2099 			break;
2100 		case LACP_MUX_E_SELECTED:
2101 		case LACP_MUX_E_SYNC:
2102 		case LACP_MUX_E_COLLECTING:
2103 			if (p->p_selected == AGGR_PORT_SELECTED &&
2104 			    ISSET(p->p_partner_state, LACP_STATE_SYNC) &&
2105 			    ISSET(p->p_partner_state, LACP_STATE_COLLECTING)) {
2106 				nstate = LACP_MUX_S_DISTRIBUTING;
2107 				break;
2108 			}
2109 			/* FALLTHROUGH */
2110 		default:
2111 			return (0);
2112 		}
2113 		break;
2114 	case LACP_MUX_S_DISTRIBUTING:
2115 		switch (ev) {
2116 		case LACP_MUX_E_UNSELECTED:
2117 		case LACP_MUX_E_STANDBY:
2118 		case LACP_MUX_E_NOT_SYNC:
2119 		case LACP_MUX_E_NOT_COLLECTING:
2120 			nstate = LACP_MUX_S_COLLECTING;
2121 			break;
2122 		default:
2123 			return (0);
2124 		}
2125 		break;
2126 	}
2127 
2128 	DPRINTF(sc, "%s %s mux: %s (%s) -> %s\n",
2129 	    sc->sc_if.if_xname, p->p_ifp0->if_xname,
2130 	    lacp_mux_state_names[p->p_mux_state], lacp_mux_event_names[ev],
2131 	    lacp_mux_state_names[nstate]);
2132 
2133 	/* act on the new state */
2134 	switch (nstate) {
2135 	case LACP_MUX_S_BEGIN:
2136 		panic("unexpected mux nstate BEGIN");
2137 		/* NOTREACHED */
2138 	case LACP_MUX_S_DETACHED:
2139 		/*
2140 		 * Detach_Mux_From_Aggregator();
2141 		 * Actor.Sync = FALSE;
2142 		 * Disable_Distributing();
2143 		 * Actor.Distributing = FALSE;
2144 		 * Actor.Collecting = FALSE;
2145 		 * Disable_Collecting();
2146 		 * NTT = TRUE;
2147 		 */
2148 		aggr_detach_mux(sc, p);
2149 		CLR(p->p_actor_state, LACP_STATE_SYNC);
2150 		aggr_disable_distributing(sc, p);
2151 		CLR(p->p_actor_state, LACP_STATE_DISTRIBUTING);
2152 		CLR(p->p_actor_state, LACP_STATE_COLLECTING);
2153 		aggr_disable_collecting(sc, p);
2154 		*ntt = 1;
2155 		break;
2156 	case LACP_MUX_S_WAITING:
2157 		/*
2158 		 * Start wait_while_timer
2159 		 */
2160 		timeout_add_sec(&p->p_wait_while_timer,
2161 		    LACP_AGGREGATION_WAIT_TIME);
2162 		break;
2163 	case LACP_MUX_S_ATTACHED:
2164 		/*
2165 		 * Attach_Mux_To_Aggregator();
2166 		 * Actor.Sync = TRUE;
2167 		 * Actor.Collecting = FALSE;
2168 		 * Disable_Collecting();
2169 		 * NTT = TRUE;
2170 		 */
2171 		aggr_attach_mux(sc, p);
2172 		SET(p->p_actor_state, LACP_STATE_SYNC);
2173 		CLR(p->p_actor_state, LACP_STATE_COLLECTING);
2174 		aggr_disable_collecting(sc, p);
2175 		*ntt = 1;
2176 		break;
2177 
2178 	case LACP_MUX_S_COLLECTING:
2179 		/*
2180 		 * Enable_Collecting();
2181 		 * Actor.Collecting = TRUE;
2182 		 * Disable_Distributing();
2183 		 * Actor.Distributing = FALSE;
2184 		 * NTT = TRUE;
2185 		 */
2186 		aggr_enable_collecting(sc, p);
2187 		SET(p->p_actor_state, LACP_STATE_COLLECTING);
2188 		aggr_disable_distributing(sc, p);
2189 		CLR(p->p_actor_state, LACP_STATE_DISTRIBUTING);
2190 		*ntt = 1;
2191 		break;
2192 	case LACP_MUX_S_DISTRIBUTING:
2193 		/*
2194 		 * Actor.Distributing = TRUE;
2195 		 * Enable_Distributing();
2196 		 */
2197 		SET(p->p_actor_state, LACP_STATE_DISTRIBUTING);
2198 		aggr_enable_distributing(sc, p);
2199 		break;
2200 	}
2201 
2202 	p->p_mux_state = nstate;
2203 
2204 	return (1);
2205 }
2206 
2207 static void
2208 aggr_rxm_ev(struct aggr_softc *sc, struct aggr_port *p,
2209     enum lacp_rxm_event ev, const struct lacp_du *lacpdu)
2210 {
2211 	unsigned int port_disabled = 0;
2212 	enum lacp_rxm_state nstate = LACP_RXM_S_BEGIN;
2213 
2214 	KASSERT((ev == LACP_RXM_E_LACPDU) == (lacpdu != NULL));
2215 
2216 	/* global transitions */
2217 
2218 	switch (ev) {
2219 	case LACP_RXM_E_NOT_PORT_ENABLED:
2220 		port_disabled = !aggr_port_moved(sc, p);
2221 		break;
2222 	case LACP_RXM_E_NOT_PORT_MOVED:
2223 		port_disabled = !aggr_port_enabled(p);
2224 		break;
2225 	default:
2226 		break;
2227 	}
2228 
2229 	if (port_disabled)
2230 		nstate = LACP_RXM_S_PORT_DISABLED;
2231 	else switch (p->p_rxm_state) { /* local state transitions */
2232 	case LACP_RXM_S_BEGIN:
2233 		KASSERT(ev == LACP_RXM_E_BEGIN);
2234 		nstate = LACP_RXM_S_INITIALIZE;
2235 		break;
2236 	case LACP_RXM_S_INITIALIZE:
2237 		/* this should only be handled via UCT in nstate handling */
2238 		panic("unexpected rxm state INITIALIZE");
2239 
2240 	case LACP_RXM_S_PORT_DISABLED:
2241 		switch (ev) {
2242 		case LACP_RXM_E_PORT_MOVED:
2243 			nstate = LACP_RXM_S_INITIALIZE;
2244 			break;
2245 		case LACP_RXM_E_PORT_ENABLED:
2246 			nstate = aggr_lacp_enabled(sc) ?
2247 			    LACP_RXM_S_EXPIRED : LACP_RXM_S_LACP_DISABLED;
2248 			break;
2249 		case LACP_RXM_E_LACP_ENABLED:
2250 			if (!aggr_port_enabled(p))
2251 				return;
2252 			nstate = LACP_RXM_S_EXPIRED;
2253 			break;
2254 		case LACP_RXM_E_NOT_LACP_ENABLED:
2255 			if (!aggr_port_enabled(p))
2256 				return;
2257 			nstate = LACP_RXM_S_LACP_DISABLED;
2258 			break;
2259 		default:
2260 			return;
2261 		}
2262 		break;
2263 	case LACP_RXM_S_EXPIRED:
2264 		switch (ev) {
2265 		case LACP_RXM_E_LACPDU:
2266 			nstate = LACP_RXM_S_CURRENT;
2267 			break;
2268 		case LACP_RXM_E_TIMER_EXPIRED:
2269 			nstate = LACP_RXM_S_DEFAULTED;
2270 			break;
2271 		default:
2272 			return;
2273 		}
2274 		break;
2275 	case LACP_RXM_S_LACP_DISABLED:
2276 		switch (ev) {
2277 		case LACP_RXM_E_LACP_ENABLED:
2278 			nstate = LACP_RXM_S_PORT_DISABLED;
2279 			break;
2280 		default:
2281 			return;
2282 		}
2283 		break;
2284 	case LACP_RXM_S_DEFAULTED:
2285 		switch (ev) {
2286 		case LACP_RXM_E_LACPDU:
2287 			nstate = LACP_RXM_S_CURRENT;
2288 			break;
2289 		default:
2290 			return;
2291 		}
2292 		break;
2293 	case LACP_RXM_S_CURRENT:
2294 		switch (ev) {
2295 		case LACP_RXM_E_TIMER_EXPIRED:
2296 			nstate = LACP_RXM_S_EXPIRED;
2297 			break;
2298 		case LACP_RXM_E_LACPDU:
2299 			nstate = LACP_RXM_S_CURRENT;
2300 			break;
2301 		default:
2302 			return;
2303 		}
2304 		break;
2305 	}
2306 
2307 uct:
2308 	if (p->p_rxm_state != nstate) {
2309 		DPRINTF(sc, "%s %s rxm: %s (%s) -> %s\n",
2310 		    sc->sc_if.if_xname, p->p_ifp0->if_xname,
2311 		    lacp_rxm_state_names[p->p_rxm_state],
2312 		    lacp_rxm_event_names[ev],
2313 		    lacp_rxm_state_names[nstate]);
2314 	}
2315 
2316 	/* record the new state */
2317 	p->p_rxm_state = nstate;
2318 
2319 	/* act on the new state */
2320 	switch (nstate) {
2321 	case LACP_RXM_S_BEGIN:
2322 		panic("unexpected rxm nstate BEGIN");
2323 		/* NOTREACHED */
2324 	case LACP_RXM_S_INITIALIZE:
2325 		/*
2326 		 * Selected = UNSELECTED;
2327 		 * recordDefault();
2328 		 * Actor_Oper_Port_State.Expired = FALSE;
2329 		 * port_moved = FALSE;
2330 		 */
2331 		aggr_unselected(p);
2332 		aggr_record_default(sc, p);
2333 		CLR(p->p_actor_state, LACP_STATE_EXPIRED);
2334 
2335 		ev = LACP_RXM_E_UCT;
2336 		nstate = LACP_RXM_S_PORT_DISABLED;
2337 		goto uct;
2338 		/* NOTREACHED */
2339 	case LACP_RXM_S_PORT_DISABLED:
2340 		/*
2341 		 * Partner_Oper_Port_State.Synchronization = FALSE;
2342 		 */
2343 		CLR(p->p_partner_state, LACP_STATE_SYNC);
2344 		aggr_mux(sc, p, LACP_MUX_E_NOT_SYNC);
2345 		break;
2346 	case LACP_RXM_S_EXPIRED:
2347 		/*
2348 		 * Partner_Oper_Port_State.Synchronization = FALSE;
2349 		 * Partner_Oper_Port_State.LACP_Timeout = Short Timeout;
2350 		 * start current_while_timer(Short Timeout);
2351 		 * Actor_Oper_Port_State.Expired = TRUE;
2352 		 */
2353 
2354 		CLR(p->p_partner_state, LACP_STATE_SYNC);
2355 		aggr_mux(sc, p, LACP_MUX_E_NOT_SYNC);
2356 		aggr_set_partner_timeout(p, AGGR_LACP_TIMEOUT_FAST);
2357 		aggr_start_current_while_timer(p, AGGR_LACP_TIMEOUT_FAST);
2358 		SET(p->p_actor_state, LACP_STATE_EXPIRED);
2359 
2360 		break;
2361 	case LACP_RXM_S_LACP_DISABLED:
2362 		/*
2363 		 * Selected = UNSELECTED;
2364 		 * recordDefault();
2365 		 * Partner_Oper_Port_State.Aggregation = FALSE;
2366 		 * Actor_Oper_Port_State.Expired = FALSE;
2367 		 */
2368 		aggr_unselected(p);
2369 		aggr_record_default(sc, p);
2370 		CLR(p->p_partner_state, LACP_STATE_AGGREGATION);
2371 		CLR(p->p_actor_state, LACP_STATE_EXPIRED);
2372 		break;
2373 	case LACP_RXM_S_DEFAULTED:
2374 		/*
2375 		 * update_Default_Selected();
2376 		 * recordDefault();
2377 		 * Actor_Oper_Port_State.Expired = FALSE;
2378 		 */
2379 		aggr_update_default_selected(sc, p);
2380 		aggr_record_default(sc, p);
2381 		CLR(p->p_actor_state, LACP_STATE_EXPIRED);
2382 		break;
2383 	case LACP_RXM_S_CURRENT: {
2384 		/*
2385 		 * update_Selected();
2386 		 * update_NTT();
2387 		 * if (Actor_System_LACP_Version >=2 ) recordVersionNumber();
2388 		 * recordPDU();
2389 		 * start current_while_timer(
2390 		 *     Actor_Oper_Port_State.LACP_Timeout);
2391 		 * Actor_Oper_Port_State.Expired = FALSE;
2392 		 */
2393 		int sync;
2394 
2395 		aggr_update_selected(sc, p, lacpdu);
2396 		sync = aggr_update_ntt(p, lacpdu);
2397 		/* don't support v2 yet */
2398 		aggr_recordpdu(p, lacpdu, sync);
2399 		aggr_start_current_while_timer(p, sc->sc_lacp_timeout);
2400 		CLR(p->p_actor_state, LACP_STATE_EXPIRED);
2401 
2402 		if (p->p_selected == AGGR_PORT_UNSELECTED)
2403 			aggr_selection_logic(sc, p); /* restart */
2404 
2405 		}
2406 		break;
2407 	}
2408 }
2409 
2410 static int
2411 aggr_up(struct aggr_softc *sc)
2412 {
2413 	struct ifnet *ifp = &sc->sc_if;
2414 	struct aggr_port *p;
2415 
2416 	NET_ASSERT_LOCKED();
2417 	KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING));
2418 
2419 	SET(ifp->if_flags, IFF_RUNNING); /* LACP_Enabled = TRUE */
2420 
2421 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2422 		aggr_rxm(sc, p, LACP_RXM_E_LACP_ENABLED);
2423 		aggr_p_linkch(p);
2424 	}
2425 
2426 	/* start the Periodic Transmission machine */
2427 	if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE) {
2428 		TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2429 			if (!aggr_port_enabled(p))
2430 				continue;
2431 
2432 			timeout_add_sec(&p->p_ptm_tx,
2433 			    aggr_periodic_times[sc->sc_lacp_timeout]);
2434 		}
2435 	}
2436 
2437 	return (ENETRESET);
2438 }
2439 
2440 static int
2441 aggr_iff(struct aggr_softc *sc)
2442 {
2443 	struct ifnet *ifp = &sc->sc_if;
2444 	unsigned int promisc = ISSET(ifp->if_flags, IFF_PROMISC);
2445 
2446 	NET_ASSERT_LOCKED();
2447 
2448 	if (promisc != sc->sc_promisc) {
2449 		struct aggr_port *p;
2450 
2451 		rw_enter_read(&sc->sc_lock);
2452 		TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2453 			struct ifnet *ifp0 = p->p_ifp0;
2454 			if (ifpromisc(ifp0, promisc) != 0) {
2455 				log(LOG_WARNING, "%s iff %s: "
2456 				    "unable to turn promisc %s\n",
2457 				    ifp->if_xname, ifp0->if_xname,
2458 				    promisc ? "on" : "off");
2459 			}
2460 		}
2461 		rw_exit_read(&sc->sc_lock);
2462 
2463 		sc->sc_promisc = promisc;
2464 	}
2465 
2466 	return (0);
2467 }
2468 
2469 static int
2470 aggr_down(struct aggr_softc *sc)
2471 {
2472 	struct ifnet *ifp = &sc->sc_if;
2473 	struct aggr_port *p;
2474 
2475 	NET_ASSERT_LOCKED();
2476 	CLR(ifp->if_flags, IFF_RUNNING); /* LACP_Enabled = FALSE */
2477 
2478 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2479 		aggr_rxm(sc, p, LACP_RXM_E_NOT_LACP_ENABLED);
2480 
2481 		/* stop the Periodic Transmission machine */
2482 		timeout_del(&p->p_ptm_tx);
2483 
2484 		/* stop the Mux machine */
2485 		aggr_mux(sc, p, LACP_MUX_E_UNSELECTED);
2486 
2487 		/* stop the Transmit machine */
2488 		timeout_del(&p->p_txm_ntt);
2489 	}
2490 
2491 	KASSERT(TAILQ_EMPTY(&sc->sc_distributing));
2492 	KASSERT(sc->sc_ndistributing == 0);
2493 	KASSERT(SMR_PTR_GET_LOCKED(&sc->sc_map) == NULL);
2494 
2495 	return (ENETRESET);
2496 }
2497 
2498 static int
2499 aggr_set_lladdr(struct aggr_softc *sc, const struct ifreq *ifr)
2500 {
2501 	struct ifnet *ifp = &sc->sc_if;
2502 	struct aggr_port *p;
2503 	const uint8_t *lladdr = ifr->ifr_addr.sa_data;
2504 
2505 	rw_enter_read(&sc->sc_lock);
2506 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2507 		if (aggr_p_setlladdr(p, lladdr) != 0) {
2508 			struct ifnet *ifp0 = p->p_ifp0;
2509 			log(LOG_WARNING, "%s setlladdr %s: "
2510 			    "unable to set lladdr\n",
2511 			    ifp->if_xname, ifp0->if_xname);
2512 		}
2513 	}
2514 	rw_exit_read(&sc->sc_lock);
2515 
2516 	return (0);
2517 }
2518 
2519 static int
2520 aggr_set_mtu(struct aggr_softc *sc, uint32_t mtu)
2521 {
2522 	struct ifnet *ifp = &sc->sc_if;
2523 	struct aggr_port *p;
2524 
2525 	if (mtu < ETHERMIN || mtu > ifp->if_hardmtu)
2526 		return (EINVAL);
2527 
2528 	ifp->if_mtu = mtu;
2529 
2530 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2531 		if (aggr_p_set_mtu(p, mtu) != 0) {
2532 			struct ifnet *ifp0 = p->p_ifp0;
2533 			log(LOG_WARNING, "%s %s: unable to set mtu %u\n",
2534 			    ifp->if_xname, ifp0->if_xname, mtu);
2535 		}
2536 	}
2537 
2538 	return (0);
2539 }
2540 
2541 static int
2542 aggr_group(struct aggr_softc *sc, struct aggr_port *p, u_long cmd)
2543 {
2544 	struct ifnet *ifp0 = p->p_ifp0;
2545 	struct ifreq ifr;
2546 	struct sockaddr *sa;
2547 
2548 	memset(&ifr, 0, sizeof(ifr));
2549 
2550 	/* make it convincing */
2551 	CTASSERT(sizeof(ifr.ifr_name) == sizeof(ifp0->if_xname));
2552 	memcpy(ifr.ifr_name, ifp0->if_xname, sizeof(ifr.ifr_name));
2553 
2554 	sa = &ifr.ifr_addr;
2555 	CTASSERT(sizeof(sa->sa_data) >= sizeof(lacp_address_slow));
2556 
2557 	sa->sa_family = AF_UNSPEC;
2558 	memcpy(sa->sa_data, lacp_address_slow, sizeof(lacp_address_slow));
2559 
2560 	return ((*p->p_ioctl)(ifp0, cmd, (caddr_t)&ifr));
2561 }
2562 
2563 static int
2564 aggr_multi(struct aggr_softc *sc, struct aggr_port *p,
2565     const struct aggr_multiaddr *ma, u_long cmd)
2566 {
2567 	struct ifnet *ifp0 = p->p_ifp0;
2568 	struct {
2569 		char			if_name[IFNAMSIZ];
2570 		struct sockaddr_storage if_addr;
2571 	} ifr;
2572 
2573 	memset(&ifr, 0, sizeof(ifr));
2574 
2575 	/* make it convincing */
2576 	CTASSERT(sizeof(ifr.if_name) == sizeof(ifp0->if_xname));
2577 	memcpy(ifr.if_name, ifp0->if_xname, sizeof(ifr.if_name));
2578 
2579 	ifr.if_addr = ma->m_addr;
2580 
2581 	return ((*p->p_ioctl)(ifp0, cmd, (caddr_t)&ifr));
2582 }
2583 
2584 static void
2585 aggr_media_status(struct ifnet *ifp, struct ifmediareq *imr)
2586 {
2587 	struct aggr_softc *sc = ifp->if_softc;
2588 
2589 	imr->ifm_status = IFM_AVALID;
2590 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
2591 
2592 	smr_read_enter(); /* there's no reason to block... */
2593 	if (SMR_PTR_GET(&sc->sc_map) != NULL)
2594 		imr->ifm_status |= IFM_ACTIVE;
2595 	smr_read_leave();
2596 }
2597 
2598 static int
2599 aggr_media_change(struct ifnet *ifp)
2600 {
2601 	return (EOPNOTSUPP);
2602 }
2603 
2604 static void
2605 aggr_update_capabilities(struct aggr_softc *sc)
2606 {
2607 	struct aggr_port *p;
2608 	uint32_t hardmtu = ETHER_MAX_HARDMTU_LEN;
2609 	uint32_t capabilities = ~0;
2610 	int set = 0;
2611 
2612 	rw_enter_read(&sc->sc_lock);
2613 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2614 		struct ifnet *ifp0 = p->p_ifp0;
2615 
2616 		set = 1;
2617 		capabilities &= ifp0->if_capabilities;
2618 		if (ifp0->if_hardmtu < hardmtu)
2619 			hardmtu = ifp0->if_hardmtu;
2620 	}
2621 	rw_exit_read(&sc->sc_lock);
2622 
2623 	sc->sc_if.if_hardmtu = hardmtu;
2624 	sc->sc_if.if_capabilities = (set ? capabilities : 0);
2625 }
2626 
2627 static void
2628 aggr_ptm_tx(void *arg)
2629 {
2630 	struct aggr_port *p = arg;
2631 	unsigned int timeout;
2632 
2633 	aggr_ntt(p);
2634 
2635 	timeout = ISSET(p->p_partner_state, LACP_STATE_TIMEOUT) ?
2636 	    AGGR_LACP_TIMEOUT_FAST : AGGR_LACP_TIMEOUT_SLOW;
2637 	timeout_add_sec(&p->p_ptm_tx, aggr_periodic_times[timeout]);
2638 }
2639 
2640 static inline void
2641 aggr_lacp_tlv_set(struct lacp_tlv_hdr *tlv, uint8_t type, uint8_t len)
2642 {
2643 	tlv->lacp_tlv_type = type;
2644 	tlv->lacp_tlv_length = sizeof(*tlv) + len;
2645 }
2646 
2647 static void
2648 aggr_ntt_transmit(struct aggr_port *p)
2649 {
2650 	struct aggr_softc *sc = p->p_aggr;
2651 	struct arpcom *ac = &sc->sc_ac;
2652 	struct ifnet *ifp = &sc->sc_if;
2653 	struct ifnet *ifp0 = p->p_ifp0;
2654 	struct mbuf *m;
2655 	struct lacp_du *lacpdu;
2656 	struct lacp_port_info *pi;
2657 	struct lacp_collector_info *ci;
2658 	struct ether_header *eh;
2659 	int linkhdr = max_linkhdr + ETHER_ALIGN;
2660 	int len = linkhdr + sizeof(*eh) + sizeof(*lacpdu);
2661 
2662 	m = m_gethdr(M_DONTWAIT, MT_DATA);
2663 	if (m == NULL)
2664 		return;
2665 
2666 	if (len > MHLEN) {
2667 		MCLGETI(m, M_DONTWAIT, NULL, len);
2668 		if (!ISSET(m->m_flags, M_EXT)) {
2669 			m_freem(m);
2670 			return;
2671 		}
2672 	}
2673 
2674 	m->m_pkthdr.pf.prio = sc->sc_if.if_llprio;
2675 	m->m_pkthdr.len = m->m_len = len;
2676 	memset(m->m_data, 0, m->m_len);
2677 	m_adj(m, linkhdr);
2678 
2679 	eh = mtod(m, struct ether_header *);
2680 
2681 	CTASSERT(sizeof(eh->ether_dhost) == sizeof(lacp_address_slow));
2682 	CTASSERT(sizeof(eh->ether_shost) == sizeof(ac->ac_enaddr));
2683 
2684 	memcpy(eh->ether_dhost, lacp_address_slow, sizeof(eh->ether_dhost));
2685 	memcpy(eh->ether_shost, ac->ac_enaddr, sizeof(eh->ether_shost));
2686 	eh->ether_type = htons(ETHERTYPE_SLOW);
2687 
2688 	lacpdu = (struct lacp_du *)(eh + 1);
2689 	lacpdu->lacp_du_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP;
2690 	lacpdu->lacp_du_sph.sph_version = LACP_VERSION;
2691 
2692 	pi = &lacpdu->lacp_actor_info;
2693 	aggr_lacp_tlv_set(&lacpdu->lacp_actor_info_tlv,
2694 	    LACP_T_ACTOR, sizeof(*pi));
2695 
2696 	pi->lacp_sysid.lacp_sysid_priority = htons(sc->sc_lacp_prio);
2697 	CTASSERT(sizeof(pi->lacp_sysid.lacp_sysid_mac) ==
2698 	    sizeof(ac->ac_enaddr));
2699 	memcpy(pi->lacp_sysid.lacp_sysid_mac, ac->ac_enaddr,
2700 	    sizeof(pi->lacp_sysid.lacp_sysid_mac));
2701 
2702 	pi->lacp_key = htons(ifp->if_index);
2703 
2704 	pi->lacp_portid.lacp_portid_priority = htons(sc->sc_lacp_port_prio);
2705 	pi->lacp_portid.lacp_portid_number = htons(ifp0->if_index);
2706 
2707 	pi->lacp_state = p->p_actor_state;
2708 	if (sc->sc_lacp_mode)
2709 		SET(pi->lacp_state, LACP_STATE_ACTIVITY);
2710 	if (sc->sc_lacp_timeout)
2711 		SET(pi->lacp_state, LACP_STATE_TIMEOUT);
2712 
2713 	pi = &lacpdu->lacp_partner_info;
2714 	aggr_lacp_tlv_set(&lacpdu->lacp_partner_info_tlv,
2715 	    LACP_T_PARTNER, sizeof(*pi));
2716 
2717 	*pi = p->p_partner;
2718 
2719 	ci = &lacpdu->lacp_collector_info;
2720 	aggr_lacp_tlv_set(&lacpdu->lacp_collector_info_tlv,
2721 	    LACP_T_COLLECTOR, sizeof(*ci));
2722 	ci->lacp_maxdelay = htons(0);
2723 
2724 	lacpdu->lacp_terminator.lacp_tlv_type = LACP_T_TERMINATOR;
2725 	lacpdu->lacp_terminator.lacp_tlv_length = 0;
2726 
2727 	(void)if_enqueue(ifp0, m);
2728 }
2729 
2730 static void
2731 aggr_ntt(struct aggr_port *p)
2732 {
2733 	if (!timeout_pending(&p->p_txm_ntt))
2734 		timeout_add(&p->p_txm_ntt, 0);
2735 }
2736 
2737 static void
2738 aggr_transmit_machine(void *arg)
2739 {
2740 	struct aggr_port *p = arg;
2741 	struct aggr_softc *sc = p->p_aggr;
2742 	unsigned int slot;
2743 	int *log;
2744 	int period = hz * LACP_FAST_PERIODIC_TIME;
2745 	int diff;
2746 
2747 	if (!aggr_lacp_enabled(sc) || !aggr_port_enabled(p))
2748 		return;
2749 
2750 	slot = p->p_txm_slot;
2751 	log = &p->p_txm_log[slot % nitems(p->p_txm_log)];
2752 
2753 	diff = ticks - *log;
2754 	if (diff < period) {
2755 		timeout_add(&p->p_txm_ntt, period - diff);
2756 		return;
2757 	}
2758 
2759 	*log = ticks;
2760 	p->p_txm_slot = ++slot;
2761 
2762 #if 0
2763 	DPRINTF(sc, "%s %s ntt\n", sc->sc_if.if_xname, p->p_ifp0->if_xname);
2764 #endif
2765 
2766 	aggr_ntt_transmit(p);
2767 }
2768 
2769 static void
2770 aggr_set_lacp_mode(struct aggr_softc *sc, int mode)
2771 {
2772 	sc->sc_lacp_mode = mode;
2773 
2774 	if (mode == AGGR_LACP_MODE_PASSIVE) {
2775 		struct aggr_port *p;
2776 
2777 		TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2778 			if (!ISSET(p->p_partner_state, LACP_STATE_ACTIVITY))
2779 				timeout_del(&p->p_ptm_tx);
2780 		}
2781 	}
2782 }
2783 
2784 static void
2785 aggr_set_partner_timeout(struct aggr_port *p, int timeout)
2786 {
2787 	uint8_t ostate = ISSET(p->p_partner_state, LACP_STATE_TIMEOUT);
2788 	uint8_t nstate = (timeout == AGGR_LACP_TIMEOUT_FAST) ?
2789 	    LACP_STATE_TIMEOUT : 0;
2790 
2791 	if (ostate == nstate)
2792 		return;
2793 
2794 	if (timeout == AGGR_LACP_TIMEOUT_FAST) {
2795 		SET(p->p_partner_state, LACP_STATE_TIMEOUT);
2796 		timeout_add_sec(&p->p_ptm_tx,
2797 		    aggr_periodic_times[AGGR_LACP_TIMEOUT_FAST]);
2798 	} else
2799 		CLR(p->p_partner_state, LACP_STATE_TIMEOUT);
2800 }
2801 
2802 static void
2803 aggr_set_lacp_timeout(struct aggr_softc *sc, int timeout)
2804 {
2805 	struct aggr_port *p;
2806 
2807 	sc->sc_lacp_timeout = timeout;
2808 
2809 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2810 		if (!ISSET(p->p_actor_state, LACP_STATE_DEFAULTED))
2811 			continue;
2812 
2813 		aggr_set_partner_timeout(p, timeout);
2814 	}
2815 }
2816 
2817 static int
2818 aggr_multi_eq(const struct aggr_multiaddr *ma,
2819     const uint8_t *addrlo, const uint8_t *addrhi)
2820 {
2821 	return (ETHER_IS_EQ(ma->m_addrlo, addrlo) &&
2822 	    ETHER_IS_EQ(ma->m_addrhi, addrhi));
2823 }
2824 
2825 static int
2826 aggr_multi_add(struct aggr_softc *sc, struct ifreq *ifr)
2827 {
2828 	struct ifnet *ifp = &sc->sc_if;
2829 	struct aggr_port *p;
2830 	struct aggr_multiaddr *ma;
2831 	uint8_t addrlo[ETHER_ADDR_LEN];
2832 	uint8_t addrhi[ETHER_ADDR_LEN];
2833 	int error;
2834 
2835 	error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
2836 	if (error != 0)
2837 		return (error);
2838 
2839 	TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) {
2840 		if (aggr_multi_eq(ma, addrlo, addrhi)) {
2841 			ma->m_refs++;
2842 			return (0);
2843 		}
2844 	}
2845 
2846 	ma = malloc(sizeof(*ma), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
2847 	if (ma == NULL)
2848 		return (ENOMEM);
2849 
2850 	ma->m_refs = 1;
2851 	memcpy(&ma->m_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len);
2852 	memcpy(ma->m_addrlo, addrlo, sizeof(ma->m_addrlo));
2853 	memcpy(ma->m_addrhi, addrhi, sizeof(ma->m_addrhi));
2854 	TAILQ_INSERT_TAIL(&sc->sc_multiaddrs, ma, m_entry);
2855 
2856 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2857 		struct ifnet *ifp0 = p->p_ifp0;
2858 
2859 		if (aggr_multi(sc, p, ma, SIOCADDMULTI) != 0) {
2860 			log(LOG_WARNING, "%s %s: "
2861 			    "unable to add multicast address\n",
2862 			    ifp->if_xname, ifp0->if_xname);
2863 		}
2864 	}
2865 
2866 	return (0);
2867 }
2868 
2869 int
2870 aggr_multi_del(struct aggr_softc *sc, struct ifreq *ifr)
2871 {
2872 	struct ifnet *ifp = &sc->sc_if;
2873 	struct aggr_port *p;
2874 	struct aggr_multiaddr *ma;
2875 	uint8_t addrlo[ETHER_ADDR_LEN];
2876 	uint8_t addrhi[ETHER_ADDR_LEN];
2877 	int error;
2878 
2879 	error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
2880 	if (error != 0)
2881 		return (error);
2882 
2883 	TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) {
2884 		if (aggr_multi_eq(ma, addrlo, addrhi))
2885 			break;
2886 	}
2887 
2888 	if (ma == NULL)
2889 		return (EINVAL);
2890 
2891 	if (--ma->m_refs > 0)
2892 		return (0);
2893 
2894 	TAILQ_REMOVE(&sc->sc_multiaddrs, ma, m_entry);
2895 
2896 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2897 		struct ifnet *ifp0 = p->p_ifp0;
2898 
2899 		if (aggr_multi(sc, p, ma, SIOCDELMULTI) != 0) {
2900 			log(LOG_WARNING, "%s %s: "
2901 			    "unable to delete multicast address\n",
2902 			    ifp->if_xname, ifp0->if_xname);
2903 		}
2904 	}
2905 
2906 	free(ma, M_DEVBUF, sizeof(*ma));
2907 
2908 	return (0);
2909 }
2910