xref: /openbsd-src/sys/net/if_aggr.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: if_aggr.c,v 1.27 2019/12/23 09:03:24 dlg Exp $ */
2 
3 /*
4  * Copyright (c) 2019 The University of Queensland
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 /*
20  * This driver implements 802.1AX Link Aggregation (formerly 802.3ad)
21  *
22  * The specification describes systems with multiple ports that that
23  * can dynamically form aggregations. The relationships between ports
24  * and aggregations is such that arbitrary ports connected to ports
25  * on other systems may move between aggregations, and there can be
26  * as many aggregations as ports. An aggregation in this model is
27  * effectively an interface, and becomes the point that Ethernet traffic
28  * enters and leaves the system. The spec also contains a description
29  * of the Link Aggregation Control Protocol (LACP) for use on the wire,
30  * and how to process it and select ports and aggregations based on
31  * it.
32  *
33  * This driver implements a simplified or constrained model where each
34  * aggr(4) interface is effectively an independent system, and will
35  * only support one aggregation. This supports the use of the kernel
36  * interface as a static entity that is created and configured once,
37  * and has the link "come up" when that one aggregation is selected
38  * by the LACP protocol.
39  */
40 
41 /*
42  * This code was written by David Gwynne <dlg@uq.edu.au> as part
43  * of the Information Technology Infrastructure Group (ITIG) in the
44  * Faculty of Engineering, Architecture and Information Technology
45  * (EAIT).
46  */
47 
48 /*
49  * TODO:
50  *
51  * - add locking
52  * - figure out the Ready_N and Ready logic
53  */
54 
55 #include "bpfilter.h"
56 
57 #include <sys/param.h>
58 #include <sys/kernel.h>
59 #include <sys/malloc.h>
60 #include <sys/mbuf.h>
61 #include <sys/queue.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/systm.h>
65 #include <sys/syslog.h>
66 #include <sys/rwlock.h>
67 #include <sys/percpu.h>
68 #include <sys/smr.h>
69 #include <sys/task.h>
70 
71 #include <net/if.h>
72 #include <net/if_dl.h>
73 #include <net/if_types.h>
74 
75 #include <net/if_media.h>
76 
77 #include <netinet/in.h>
78 #include <netinet/if_ether.h>
79 
80 #include <crypto/siphash.h> /* if_trunk.h uses siphash bits */
81 #include <net/if_trunk.h>
82 
83 #if NBPFILTER > 0
84 #include <net/bpf.h>
85 #endif
86 
87 /*
88  * Link Aggregation Control Protocol (LACP)
89  */
90 
91 struct ether_slowproto_hdr {
92 	uint8_t		sph_subtype;
93 	uint8_t		sph_version;
94 } __packed;
95 
96 #define SLOWPROTOCOLS_SUBTYPE_LACP	1
97 #define SLOWPROTOCOLS_SUBTYPE_LACP_MARKER \
98 					2
99 
100 #define LACP_VERSION			1
101 
102 #define LACP_FAST_PERIODIC_TIME		1
103 #define LACP_SLOW_PERIODIC_TIME		30
104 #define LACP_TIMEOUT_FACTOR		3
105 #define LACP_AGGREGATION_WAIT_TIME	2
106 
107 #define LACP_TX_MACHINE_RATE		3 /* per LACP_FAST_PERIODIC_TIME */
108 
109 #define LACP_ADDR_C_BRIDGE		{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }
110 #define LACP_ADDR_SLOW			{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 }
111 #define LACP_ADDR_NON_TPMR_BRIDGE	{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03 }
112 
113 struct lacp_tlv_hdr {
114 	uint8_t			lacp_tlv_type;
115 	uint8_t			lacp_tlv_length;
116 } __packed __aligned(2);
117 
118 /* LACP TLV types */
119 
120 #define LACP_T_TERMINATOR		0x00
121 #define LACP_T_ACTOR			0x01
122 #define LACP_T_PARTNER			0x02
123 #define LACP_T_COLLECTOR		0x03
124 
125 /* LACPv2 TLV types */
126 
127 #define LACP_T_PORT_ALGORITHM		0x04
128 #define LACP_T_PORT_CONVERSATION_ID_DIGEST \
129 					0x05
130 #define LACP_T_PORT_CONVERSATION_MASK	0x06
131 #define LACP_T_PORT_CONVERSATION_SERVICE_MAPPING \
132 					0x0a
133 
134 struct lacp_sysid {
135 	uint16_t		lacp_sysid_priority;
136 	uint8_t			lacp_sysid_mac[ETHER_ADDR_LEN];
137 } __packed __aligned(2);
138 
139 struct lacp_portid {
140 	uint16_t		lacp_portid_priority;
141 	uint16_t		lacp_portid_number;
142 } __packed __aligned(2);
143 
144 struct lacp_port_info {
145 	struct lacp_sysid	lacp_sysid;
146 	uint16_t		lacp_key;
147 	struct lacp_portid	lacp_portid;
148 	uint8_t			lacp_state;
149 	uint8_t			lacp_reserved[3];
150 } __packed __aligned(2);
151 
152 #define LACP_STATE_ACTIVITY		(1 << 0)
153 #define LACP_STATE_TIMEOUT		(1 << 1)
154 #define LACP_STATE_AGGREGATION		(1 << 2)
155 #define LACP_STATE_SYNC			(1 << 3)
156 #define LACP_STATE_COLLECTING		(1 << 4)
157 #define LACP_STATE_DISTRIBUTING		(1 << 5)
158 #define LACP_STATE_DEFAULTED		(1 << 6)
159 #define LACP_STATE_EXPIRED		(1 << 7)
160 
161 struct lacp_collector_info {
162 	uint16_t		lacp_maxdelay;
163 	uint8_t			lacp_reserved[12];
164 } __packed __aligned(2);
165 
166 struct lacp_du {
167 	struct ether_slowproto_hdr
168 				lacp_du_sph;
169 	struct lacp_tlv_hdr	lacp_actor_info_tlv;
170 	struct lacp_port_info	lacp_actor_info;
171 	struct lacp_tlv_hdr	lacp_partner_info_tlv;
172 	struct lacp_port_info	lacp_partner_info;
173 	struct lacp_tlv_hdr	lacp_collector_info_tlv;
174 	struct lacp_collector_info
175 				lacp_collector_info;
176 	/* other TLVs go here */
177 	struct lacp_tlv_hdr	lacp_terminator;
178 	uint8_t			lacp_pad[50];
179 } __packed __aligned(2);
180 
181 /* Marker TLV types */
182 
183 #define MARKER_T_INFORMATION		0x01
184 #define MARKER_T_RESPONSE		0x02
185 
186 struct marker_info {
187 	uint16_t		marker_requester_port;
188 	uint8_t			marker_requester_system[ETHER_ADDR_LEN];
189 	uint8_t			marker_requester_txid[4];
190 	uint8_t			marker_pad[2];
191 } __packed __aligned(2);
192 
193 struct marker_pdu {
194 	struct ether_slowproto_hdr
195 				marker_sph;
196 
197 	struct lacp_tlv_hdr	marker_info_tlv;
198 	struct marker_info	marker_info;
199 	struct lacp_tlv_hdr	marker_terminator;
200 	uint8_t			marker_pad[90];
201 } __packed __aligned(2);
202 
203 enum lacp_rxm_state {
204 	LACP_RXM_S_BEGIN = 0,
205 	LACP_RXM_S_INITIALIZE,
206 	LACP_RXM_S_PORT_DISABLED,
207 	LACP_RXM_S_EXPIRED,
208 	LACP_RXM_S_LACP_DISABLED,
209 	LACP_RXM_S_DEFAULTED,
210 	LACP_RXM_S_CURRENT,
211 };
212 
213 enum lacp_rxm_event {
214 	LACP_RXM_E_BEGIN,
215 	LACP_RXM_E_UCT,
216 	LACP_RXM_E_PORT_MOVED,
217 	LACP_RXM_E_NOT_PORT_MOVED,
218 	LACP_RXM_E_PORT_ENABLED,
219 	LACP_RXM_E_NOT_PORT_ENABLED,
220 	LACP_RXM_E_LACP_ENABLED,
221 	LACP_RXM_E_NOT_LACP_ENABLED,
222 	LACP_RXM_E_LACPDU, /* CtrlMuxN:M_UNITDATA.indication(LACPDU) */
223 	LACP_RXM_E_TIMER_EXPIRED, /* current_while_timer expired */
224 };
225 
226 enum lacp_mux_state {
227 	LACP_MUX_S_BEGIN = 0,
228 	LACP_MUX_S_DETACHED,
229 	LACP_MUX_S_WAITING,
230 	LACP_MUX_S_ATTACHED,
231 	LACP_MUX_S_DISTRIBUTING,
232 	LACP_MUX_S_COLLECTING,
233 };
234 
235 enum lacp_mux_event {
236 	LACP_MUX_E_BEGIN,
237 	LACP_MUX_E_SELECTED,
238 	LACP_MUX_E_STANDBY,
239 	LACP_MUX_E_UNSELECTED,
240 	LACP_MUX_E_READY,
241 	LACP_MUX_E_SYNC,
242 	LACP_MUX_E_NOT_SYNC,
243 	LACP_MUX_E_COLLECTING,
244 	LACP_MUX_E_NOT_COLLECTING,
245 };
246 
247 /*
248  * LACP variables
249  */
250 
251 static const uint8_t lacp_address_slow[ETHER_ADDR_LEN] = LACP_ADDR_SLOW;
252 
253 static const char *lacp_rxm_state_names[] = {
254 	"BEGIN",
255 	"INITIALIZE",
256 	"PORT_DISABLED",
257 	"EXPIRED",
258 	"LACP_DISABLED",
259 	"DEFAULTED",
260 	"CURRENT",
261 };
262 
263 static const char *lacp_rxm_event_names[] = {
264 	"BEGIN",
265 	"UCT",
266 	"port_moved",
267 	"!port_moved",
268 	"port_enabled",
269 	"!port_enabled",
270 	"LACP_Enabled",
271 	"!LACP_Enabled",
272 	"LACPDU",
273 	"current_while_timer expired",
274 };
275 
276 static const char *lacp_mux_state_names[] = {
277 	"BEGIN",
278 	"DETACHED",
279 	"WAITING",
280 	"ATTACHED",
281 	"DISTRIBUTING",
282 	"COLLECTING",
283 };
284 
285 static const char *lacp_mux_event_names[] = {
286 	"BEGIN",
287 	"Selected == SELECTED",
288 	"Selected == STANDBY",
289 	"Selected == UNSELECTED",
290 	"Ready",
291 	"Partner.Sync",
292 	"! Partner.Sync",
293 	"Partner.Collecting",
294 	"! Partner.Collecting",
295 };
296 
297 /*
298  * aggr interface
299  */
300 
301 #define AGGR_MAX_PORTS		32
302 #define AGGR_MAX_SLOW_PKTS	(AGGR_MAX_PORTS * 3)
303 
304 struct aggr_multiaddr {
305 	TAILQ_ENTRY(aggr_multiaddr)
306 				m_entry;
307 	unsigned int		m_refs;
308 	uint8_t			m_addrlo[ETHER_ADDR_LEN];
309 	uint8_t			m_addrhi[ETHER_ADDR_LEN];
310 	struct sockaddr_storage m_addr;
311 };
312 TAILQ_HEAD(aggr_multiaddrs, aggr_multiaddr);
313 
314 struct aggr_softc;
315 
316 enum aggr_port_selected {
317 	AGGR_PORT_UNSELECTED,
318 	AGGR_PORT_SELECTED,
319 	AGGR_PORT_STANDBY,
320 };
321 
322 static const char *aggr_port_selected_names[] = {
323 	"UNSELECTED",
324 	"SELECTED",
325 	"STANDBY",
326 };
327 
328 struct aggr_port {
329 	struct ifnet		*p_ifp0;
330 
331 	uint8_t			 p_lladdr[ETHER_ADDR_LEN];
332 	uint32_t		 p_mtu;
333 
334 	int (*p_ioctl)(struct ifnet *, u_long, caddr_t);
335 	int (*p_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
336 	    struct rtentry *);
337 
338 	struct task		 p_lhook;
339 	struct task		 p_dhook;
340 
341 	struct aggr_softc	*p_aggr;
342 	TAILQ_ENTRY(aggr_port)	 p_entry;
343 
344 	unsigned int		 p_collecting;
345 	unsigned int		 p_distributing;
346 	TAILQ_ENTRY(aggr_port)	 p_entry_distributing;
347 	TAILQ_ENTRY(aggr_port)	 p_entry_muxen;
348 
349 	/* Partner information */
350 	enum aggr_port_selected	 p_muxed;
351 	enum aggr_port_selected	 p_selected;		/* Selected */
352 	struct lacp_port_info	 p_partner;
353 #define p_partner_state		 p_partner.lacp_state
354 
355 	uint8_t			 p_actor_state;
356 	uint8_t			 p_lacp_timeout;
357 
358 	struct timeout		 p_current_while_timer;
359 	struct timeout		 p_wait_while_timer;
360 
361 	/* Receive machine */
362 	enum lacp_rxm_state	 p_rxm_state;
363 	struct mbuf_queue	 p_rxm_mq;
364 	struct task		 p_rxm_task;
365 
366 	/* Periodic Transmission machine */
367 	struct timeout		 p_ptm_tx;
368 
369 	/* Mux machine */
370 	enum lacp_mux_state	 p_mux_state;
371 
372 	/* Transmit machine */
373 	int			 p_txm_log[LACP_TX_MACHINE_RATE];
374 	unsigned int		 p_txm_slot;
375 	struct timeout		 p_txm_ntt;
376 };
377 
378 TAILQ_HEAD(aggr_port_list, aggr_port);
379 
380 struct aggr_map {
381 	struct ifnet		*m_ifp0s[AGGR_MAX_PORTS];
382 };
383 
384 struct aggr_softc {
385 	struct arpcom		 sc_ac;
386 #define sc_if			 sc_ac.ac_if
387 	unsigned int		 sc_dead;
388 	unsigned int		 sc_promisc;
389 	struct ifmedia		 sc_media;
390 
391 	struct aggr_multiaddrs	 sc_multiaddrs;
392 
393 	unsigned int		 sc_mix;
394 
395 	struct aggr_map		 sc_maps[2];
396 	unsigned int		 sc_map_gen;
397 	struct aggr_map		*sc_map;
398 
399 	struct rwlock		 sc_lock;
400 	struct aggr_port_list	 sc_ports;
401 	struct aggr_port_list	 sc_distributing;
402 	struct aggr_port_list	 sc_muxen;
403 	unsigned int		 sc_nports;
404 	unsigned int		 sc_ndistributing;
405 
406 	struct timeout		 sc_tick;
407 
408 	uint8_t			 sc_lacp_mode;
409 #define AGGR_LACP_MODE_PASSIVE		0
410 #define AGGR_LACP_MODE_ACTIVE		1
411 	uint8_t			 sc_lacp_timeout;
412 #define AGGR_LACP_TIMEOUT_SLOW		0
413 #define AGGR_LACP_TIMEOUT_FAST		1
414 	uint16_t		 sc_lacp_prio;
415 	uint16_t		 sc_lacp_port_prio;
416 
417 	struct lacp_sysid	 sc_partner_system;
418 	uint16_t		 sc_partner_key;
419 };
420 
421 #define DPRINTF(_sc, fmt...)	do { \
422 	if (ISSET((_sc)->sc_if.if_flags, IFF_DEBUG)) \
423 		printf(fmt); \
424 } while (0)
425 
426 static const unsigned int aggr_periodic_times[] = {
427 	[AGGR_LACP_TIMEOUT_SLOW] = LACP_SLOW_PERIODIC_TIME,
428 	[AGGR_LACP_TIMEOUT_FAST] = LACP_FAST_PERIODIC_TIME,
429 };
430 
431 static int	aggr_clone_create(struct if_clone *, int);
432 static int	aggr_clone_destroy(struct ifnet *);
433 
434 static int	aggr_ioctl(struct ifnet *, u_long, caddr_t);
435 static void	aggr_start(struct ifqueue *);
436 static int	aggr_enqueue(struct ifnet *, struct mbuf *);
437 
438 static int	aggr_media_change(struct ifnet *);
439 static void	aggr_media_status(struct ifnet *, struct ifmediareq *);
440 
441 static int	aggr_up(struct aggr_softc *);
442 static int	aggr_down(struct aggr_softc *);
443 static int	aggr_iff(struct aggr_softc *);
444 
445 static void	aggr_p_linkch(void *);
446 static void	aggr_p_detach(void *);
447 static int	aggr_p_ioctl(struct ifnet *, u_long, caddr_t);
448 static int	aggr_p_output(struct ifnet *, struct mbuf *,
449 		    struct sockaddr *, struct rtentry *);
450 
451 static int	aggr_get_trunk(struct aggr_softc *, struct trunk_reqall *);
452 static int	aggr_set_options(struct aggr_softc *,
453 		    const struct trunk_opts *);
454 static int	aggr_get_options(struct aggr_softc *, struct trunk_opts *);
455 static int	aggr_set_lladdr(struct aggr_softc *, const struct ifreq *);
456 static int	aggr_set_mtu(struct aggr_softc *, uint32_t);
457 static void	aggr_p_dtor(struct aggr_softc *, struct aggr_port *,
458 		    const char *);
459 static int	aggr_p_setlladdr(struct aggr_port *, const uint8_t *);
460 static int	aggr_p_set_mtu(struct aggr_port *, uint32_t);
461 static int	aggr_add_port(struct aggr_softc *,
462 		    const struct trunk_reqport *);
463 static int	aggr_get_port(struct aggr_softc *, struct trunk_reqport *);
464 static int	aggr_del_port(struct aggr_softc *,
465 		    const struct trunk_reqport *);
466 static int	aggr_group(struct aggr_softc *, struct aggr_port *, u_long);
467 static int	aggr_multi(struct aggr_softc *, struct aggr_port *,
468 		    const struct aggr_multiaddr *, u_long);
469 static uint32_t	aggr_hardmtu(struct aggr_softc *);
470 static void	aggr_set_lacp_mode(struct aggr_softc *, int);
471 static void	aggr_set_lacp_timeout(struct aggr_softc *, int);
472 static int	aggr_multi_add(struct aggr_softc *, struct ifreq *);
473 static int	aggr_multi_del(struct aggr_softc *, struct ifreq *);
474 
475 static void	aggr_map(struct aggr_softc *);
476 
477 static void	aggr_record_default(struct aggr_softc *, struct aggr_port *);
478 static void	aggr_current_while_timer(void *);
479 static void	aggr_wait_while_timer(void *);
480 static void	aggr_rx(void *);
481 static void	aggr_rxm_ev(struct aggr_softc *, struct aggr_port *,
482 		    enum lacp_rxm_event, const struct lacp_du *);
483 #define aggr_rxm(_sc, _p, _ev) \
484 		aggr_rxm_ev((_sc), (_p), (_ev), NULL)
485 #define aggr_rxm_lacpdu(_sc, _p, _lacpdu) \
486 		aggr_rxm_ev((_sc), (_p), LACP_RXM_E_LACPDU, (_lacpdu))
487 
488 static void	aggr_mux(struct aggr_softc *, struct aggr_port *,
489 		    enum lacp_mux_event);
490 static int	aggr_mux_ev(struct aggr_softc *, struct aggr_port *,
491 		    enum lacp_mux_event, int *);
492 
493 static void	aggr_set_partner_timeout(struct aggr_port *, int);
494 
495 static void	aggr_ptm_tx(void *);
496 
497 static void	aggr_transmit_machine(void *);
498 static void	aggr_ntt(struct aggr_port *);
499 static void	aggr_ntt_transmit(struct aggr_port *);
500 
501 static void	aggr_set_selected(struct aggr_port *, enum aggr_port_selected,
502 		    enum lacp_mux_event);
503 static void	aggr_unselected(struct aggr_port *);
504 
505 static void	aggr_selection_logic(struct aggr_softc *, struct aggr_port *);
506 
507 #define ETHER_IS_SLOWADDR(_a)	ETHER_IS_EQ((_a), lacp_address_slow)
508 
509 static struct if_clone aggr_cloner =
510     IF_CLONE_INITIALIZER("aggr", aggr_clone_create, aggr_clone_destroy);
511 
512 void
513 aggrattach(int count)
514 {
515 	if_clone_attach(&aggr_cloner);
516 }
517 
518 static int
519 aggr_clone_create(struct if_clone *ifc, int unit)
520 {
521 	struct aggr_softc *sc;
522 	struct ifnet *ifp;
523 
524 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
525 	if (sc == NULL)
526 		return (ENOMEM);
527 
528 	sc->sc_mix = arc4random();
529 
530 	ifp = &sc->sc_if;
531 
532 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
533 	    ifc->ifc_name, unit);
534 
535 	TAILQ_INIT(&sc->sc_multiaddrs);
536 	rw_init(&sc->sc_lock, "aggrlk");
537 	TAILQ_INIT(&sc->sc_ports);
538 	sc->sc_nports = 0;
539 	TAILQ_INIT(&sc->sc_distributing);
540 	sc->sc_ndistributing = 0;
541 	TAILQ_INIT(&sc->sc_muxen);
542 
543 	sc->sc_map_gen = 0;
544 	sc->sc_map = NULL; /* no links yet */
545 
546 	sc->sc_lacp_mode = AGGR_LACP_MODE_ACTIVE;
547 	sc->sc_lacp_timeout = AGGR_LACP_TIMEOUT_SLOW;
548 	sc->sc_lacp_prio = 0x8000; /* medium */
549 	sc->sc_lacp_port_prio = 0x8000; /* medium */
550 
551 	ifmedia_init(&sc->sc_media, 0, aggr_media_change, aggr_media_status);
552 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
553 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
554 
555 	ifp->if_softc = sc;
556 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
557 	ifp->if_ioctl = aggr_ioctl;
558 	ifp->if_qstart = aggr_start;
559 	ifp->if_enqueue = aggr_enqueue;
560 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
561 	ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
562 	ifp->if_link_state = LINK_STATE_DOWN;
563 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
564 	ether_fakeaddr(ifp);
565 
566 	if_counters_alloc(ifp);
567 	if_attach(ifp);
568 	ether_ifattach(ifp);
569 
570 	ifp->if_llprio = IFQ_MAXPRIO;
571 
572 	return (0);
573 }
574 
575 static int
576 aggr_clone_destroy(struct ifnet *ifp)
577 {
578 	struct aggr_softc *sc = ifp->if_softc;
579 	struct aggr_port *p;
580 
581 	NET_LOCK();
582 	sc->sc_dead = 1;
583 
584 	if (ISSET(ifp->if_flags, IFF_RUNNING))
585 		aggr_down(sc);
586 	NET_UNLOCK();
587 
588 	ether_ifdetach(ifp);
589 	if_detach(ifp);
590 
591 	/* last ref, no need to lock. aggr_p_dtor locks anyway */
592 	while ((p = TAILQ_FIRST(&sc->sc_ports)) != NULL)
593 		aggr_p_dtor(sc, p, "destroy");
594 
595 	free(sc, M_DEVBUF, sizeof(*sc));
596 
597 	return (0);
598 }
599 
600 /*
601  * LACP_Enabled
602  */
603 static inline int
604 aggr_lacp_enabled(struct aggr_softc *sc)
605 {
606 	struct ifnet *ifp = &sc->sc_if;
607 	return (ISSET(ifp->if_flags, IFF_RUNNING));
608 }
609 
610 /*
611  * port_enabled
612  */
613 static int
614 aggr_port_enabled(struct aggr_port *p)
615 {
616 	struct ifnet *ifp0 = p->p_ifp0;
617 
618 	if (!ISSET(ifp0->if_flags, IFF_RUNNING))
619 		return (0);
620 
621 	if (!LINK_STATE_IS_UP(ifp0->if_link_state))
622 		return (0);
623 
624 	return (1);
625 }
626 
627 /*
628  * port_moved
629  *
630  * This variable is set to TRUE if the Receive machine for an Aggregation
631  * Port is in the PORT_DISABLED state, and the combination of
632  * Partner_Oper_System and Partner_Oper_Port_Number in use by that
633  * Aggregation Port has been received in an incoming LACPDU on a
634  * different Aggregation Port. This variable is set to FALSE once the
635  * INITIALIZE state of the Receive machine has set the Partner information
636  * for the Aggregation Port to administrative default values.
637  *
638  * Value: Boolean
639 */
640 static int
641 aggr_port_moved(struct aggr_softc *sc, struct aggr_port *p)
642 {
643 	return (0);
644 }
645 
646 static void
647 aggr_transmit(struct aggr_softc *sc, const struct aggr_map *map, struct mbuf *m)
648 {
649 	struct ifnet *ifp = &sc->sc_if;
650 	struct ifnet *ifp0;
651 	uint16_t flow = 0;
652 
653 #if NBPFILTER > 0
654 	{
655 		caddr_t if_bpf = ifp->if_bpf;
656 		if (if_bpf)
657 			bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
658 	}
659 #endif
660 
661 	if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID))
662 		flow = m->m_pkthdr.ph_flowid;
663 
664 	ifp0 = map->m_ifp0s[flow % AGGR_MAX_PORTS];
665 
666 	if (if_enqueue(ifp0, m) != 0)
667 		counters_inc(ifp->if_counters, ifc_oerrors);
668 }
669 
670 static int
671 aggr_enqueue(struct ifnet *ifp, struct mbuf *m)
672 {
673 	struct aggr_softc *sc;
674 	const struct aggr_map *map;
675 	int error = 0;
676 
677 	if (!ifq_is_priq(&ifp->if_snd))
678 		return (if_enqueue_ifq(ifp, m));
679 
680 	sc = ifp->if_softc;
681 
682 	smr_read_enter();
683 	map = SMR_PTR_GET(&sc->sc_map);
684 	if (__predict_false(map == NULL)) {
685 		m_freem(m);
686 		error = ENETDOWN;
687 	} else {
688 		counters_pkt(ifp->if_counters,
689 		    ifc_opackets, ifc_obytes, m->m_pkthdr.len);
690 		aggr_transmit(sc, map, m);
691 	}
692 	smr_read_leave();
693 
694 	return (error);
695 }
696 
697 static void
698 aggr_start(struct ifqueue *ifq)
699 {
700 	struct ifnet *ifp = ifq->ifq_if;
701 	struct aggr_softc *sc = ifp->if_softc;
702 	const struct aggr_map *map;
703 
704 	smr_read_enter();
705 	map = SMR_PTR_GET(&sc->sc_map);
706 	if (__predict_false(map == NULL))
707 		ifq_purge(ifq);
708 	else {
709 		struct mbuf *m;
710 
711 		while ((m = ifq_dequeue(ifq)) != NULL)
712 			aggr_transmit(sc, map, m);
713 	}
714 	smr_read_leave();
715 }
716 
717 static inline int
718 aggr_eh_is_slow(const struct ether_header *eh)
719 {
720 	return (ETHER_IS_SLOWADDR(eh->ether_dhost) &&
721 	    eh->ether_type == htons(ETHERTYPE_SLOW));
722 }
723 
724 static int
725 aggr_input(struct ifnet *ifp0, struct mbuf *m, void *cookie)
726 {
727 	struct ether_header *eh;
728 	struct aggr_port *p = cookie;
729 	struct aggr_softc *sc = p->p_aggr;
730 	struct ifnet *ifp = &sc->sc_if;
731 	int hlen = sizeof(*eh);
732 
733 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
734 		goto drop;
735 
736 	eh = mtod(m, struct ether_header *);
737 	if (!ISSET(m->m_flags, M_VLANTAG) &&
738 	    __predict_false(aggr_eh_is_slow(eh))) {
739 		struct ether_slowproto_hdr *sph;
740 
741 		hlen += sizeof(*sph);
742 		if (m->m_len < hlen) {
743 			m = m_pullup(m, hlen);
744 			if (m == NULL) {
745 				/* short++ */
746 				return (1);
747 			}
748 			eh = mtod(m, struct ether_header *);
749 		}
750 
751 		sph = (struct ether_slowproto_hdr *)(eh + 1);
752 		switch (sph->sph_subtype) {
753 		case SLOWPROTOCOLS_SUBTYPE_LACP:
754 		case SLOWPROTOCOLS_SUBTYPE_LACP_MARKER:
755 			if (mq_enqueue(&p->p_rxm_mq, m) == 0)
756 				task_add(systq, &p->p_rxm_task);
757 			return (1);
758 		default:
759 			break;
760 		}
761 	}
762 
763 	if (__predict_false(!p->p_collecting))
764 		goto drop;
765 
766 	if (!ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID)) {
767 		m->m_pkthdr.ph_flowid = M_FLOWID_VALID |
768 		    (ifp0->if_index ^ sc->sc_mix);
769 	}
770 
771 	if_vinput(ifp, m);
772 
773 	return (1);
774 
775 drop:
776 	m_freem(m);
777 	return (1);
778 }
779 
780 static int
781 aggr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
782 {
783 	struct aggr_softc *sc = ifp->if_softc;
784 	struct ifreq *ifr = (struct ifreq *)data;
785 	int error = 0;
786 
787 	if (sc->sc_dead)
788 		return (ENXIO);
789 
790 	switch (cmd) {
791 	case SIOCSIFADDR:
792 		break;
793 
794 	case SIOCSIFFLAGS:
795 		if (ISSET(ifp->if_flags, IFF_UP)) {
796 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
797 				error = aggr_up(sc);
798 			else
799 				error = ENETRESET;
800 		} else {
801 			if (ISSET(ifp->if_flags, IFF_RUNNING))
802 				error = aggr_down(sc);
803 		}
804 		break;
805 
806 	case SIOCSIFLLADDR:
807 		error = aggr_set_lladdr(sc, ifr);
808 		break;
809 
810 	case SIOCSTRUNK:
811 		error = suser(curproc);
812 		if (error != 0)
813 			break;
814 
815 		if (((struct trunk_reqall *)data)->ra_proto !=
816 		    TRUNK_PROTO_LACP) {
817 			error = EPROTONOSUPPORT;
818 			break;
819 		}
820 
821 		/* nop */
822 		break;
823 	case SIOCGTRUNK:
824 		error = aggr_get_trunk(sc, (struct trunk_reqall *)data);
825 		break;
826 
827 	case SIOCSTRUNKOPTS:
828 		error = suser(curproc);
829 		if (error != 0)
830 			break;
831 
832 		error = aggr_set_options(sc, (struct trunk_opts *)data);
833 		break;
834 
835 	case SIOCGTRUNKOPTS:
836 		error = aggr_get_options(sc, (struct trunk_opts *)data);
837 		break;
838 
839 	case SIOCGTRUNKPORT:
840 		error = aggr_get_port(sc, (struct trunk_reqport *)data);
841 		break;
842 	case SIOCSTRUNKPORT:
843 		error = suser(curproc);
844 		if (error != 0)
845 			break;
846 
847 		error = aggr_add_port(sc, (struct trunk_reqport *)data);
848 		break;
849 	case SIOCSTRUNKDELPORT:
850 		error = suser(curproc);
851 		if (error != 0)
852 			break;
853 
854 		error = aggr_del_port(sc, (struct trunk_reqport *)data);
855 		break;
856 
857 	case SIOCSIFMTU:
858 		error = aggr_set_mtu(sc, ifr->ifr_mtu);
859 		break;
860 
861 	case SIOCADDMULTI:
862 		error = aggr_multi_add(sc, ifr);
863 		break;
864 	case SIOCDELMULTI:
865 		error = aggr_multi_del(sc, ifr);
866 		break;
867 
868 	case SIOCSIFMEDIA:
869 		error = EOPNOTSUPP;
870 		break;
871 	case SIOCGIFMEDIA:
872 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
873 		break;
874 
875 	default:
876 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
877 		break;
878 	}
879 
880 	if (error == ENETRESET)
881 		error = aggr_iff(sc);
882 
883 	return (error);
884 }
885 
886 static int
887 aggr_get_trunk(struct aggr_softc *sc, struct trunk_reqall *ra)
888 {
889 	struct ifnet *ifp = &sc->sc_if;
890 	struct trunk_reqport rp;
891 	struct aggr_port *p;
892 	size_t size = ra->ra_size;
893 	caddr_t ubuf = (caddr_t)ra->ra_port;
894 	struct lacp_opreq *req;
895 	uint8_t state = 0;
896 	int error = 0;
897 
898 	if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE)
899 		SET(state, LACP_STATE_ACTIVITY);
900 	if (sc->sc_lacp_timeout == AGGR_LACP_TIMEOUT_FAST)
901 		SET(state, LACP_STATE_TIMEOUT);
902 
903 	ra->ra_proto = TRUNK_PROTO_LACP;
904 	memset(&ra->ra_psc, 0, sizeof(ra->ra_psc));
905 
906 	/*
907 	 * aggr(4) does not support Individual links so don't bother
908 	 * with portprio, portno, and state, as per the spec.
909 	 */
910 
911 	req = &ra->ra_lacpreq;
912 	req->actor_prio = sc->sc_lacp_prio;
913 	CTASSERT(sizeof(req->actor_mac) == sizeof(sc->sc_ac.ac_enaddr));
914 	memcpy(req->actor_mac, &sc->sc_ac.ac_enaddr, sizeof(req->actor_mac));
915 	req->actor_key = ifp->if_index;
916 	req->actor_state = state;
917 
918 	req->partner_prio = ntohs(sc->sc_partner_system.lacp_sysid_priority);
919 	CTASSERT(sizeof(req->partner_mac) ==
920 	    sizeof(sc->sc_partner_system.lacp_sysid_mac));
921 	memcpy(req->partner_mac, sc->sc_partner_system.lacp_sysid_mac,
922 	    sizeof(req->partner_mac));
923 	req->partner_key = ntohs(sc->sc_partner_key);
924 
925 	ra->ra_ports = sc->sc_nports;
926 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
927 		struct ifnet *ifp0;
928 		struct lacp_opreq *opreq;
929 
930 		if (size < sizeof(rp))
931 			break;
932 
933 		ifp0 = p->p_ifp0;
934 
935 		CTASSERT(sizeof(rp.rp_ifname) == sizeof(ifp->if_xname));
936 		CTASSERT(sizeof(rp.rp_portname) == sizeof(ifp0->if_xname));
937 
938 		memset(&rp, 0, sizeof(rp));
939 		memcpy(rp.rp_ifname, ifp->if_xname, sizeof(rp.rp_ifname));
940 		memcpy(rp.rp_portname, ifp0->if_xname, sizeof(rp.rp_portname));
941 
942 		if (p->p_muxed)
943 			SET(rp.rp_flags, TRUNK_PORT_ACTIVE);
944 		if (p->p_collecting)
945 			SET(rp.rp_flags, TRUNK_PORT_COLLECTING);
946 		if (p->p_distributing)
947 			SET(rp.rp_flags, TRUNK_PORT_DISTRIBUTING);
948 		if (!aggr_port_enabled(p))
949 			SET(rp.rp_flags, TRUNK_PORT_DISABLED);
950 
951 		opreq = &rp.rp_lacpreq;
952 
953 		opreq->actor_prio = sc->sc_lacp_prio;
954 		memcpy(opreq->actor_mac, &sc->sc_ac.ac_enaddr,
955 		    sizeof(req->actor_mac));
956 		opreq->actor_key = ifp->if_index;
957 		opreq->actor_portprio = sc->sc_lacp_port_prio;
958 		opreq->actor_portno = ifp0->if_index;
959 		opreq->actor_state = state | p->p_actor_state;
960 
961 		opreq->partner_prio =
962 		    ntohs(p->p_partner.lacp_sysid.lacp_sysid_priority);
963 		CTASSERT(sizeof(opreq->partner_mac) ==
964 		    sizeof(p->p_partner.lacp_sysid.lacp_sysid_mac));
965 		memcpy(opreq->partner_mac,
966 		    p->p_partner.lacp_sysid.lacp_sysid_mac,
967 		    sizeof(opreq->partner_mac));
968 		opreq->partner_key = ntohs(p->p_partner.lacp_key);
969 		opreq->partner_portprio =
970 		    ntohs(p->p_partner.lacp_portid.lacp_portid_priority);
971 		opreq->partner_portno =
972 		    ntohs(p->p_partner.lacp_portid.lacp_portid_number);
973 		opreq->partner_state = p->p_partner_state;
974 
975 		error = copyout(&rp, ubuf, sizeof(rp));
976 		if (error != 0)
977 			break;
978 
979 		ubuf += sizeof(rp);
980 		size -= sizeof(rp);
981 	}
982 
983 	return (error);
984 }
985 
986 static int
987 aggr_get_options(struct aggr_softc *sc, struct trunk_opts *tro)
988 {
989 	struct lacp_adminopts *opt = &tro->to_lacpopts;
990 
991 	if (tro->to_proto != TRUNK_PROTO_LACP)
992 		return (EPROTONOSUPPORT);
993 
994 	opt->lacp_mode = sc->sc_lacp_mode;
995 	opt->lacp_timeout = sc->sc_lacp_timeout;
996 	opt->lacp_prio = sc->sc_lacp_prio;
997 	opt->lacp_portprio = sc->sc_lacp_port_prio;
998 	opt->lacp_ifqprio = sc->sc_if.if_llprio;
999 
1000 	return (0);
1001 }
1002 
1003 static int
1004 aggr_set_options(struct aggr_softc *sc, const struct trunk_opts *tro)
1005 {
1006 	const struct lacp_adminopts *opt = &tro->to_lacpopts;
1007 
1008 	if (tro->to_proto != TRUNK_PROTO_LACP)
1009 		return (EPROTONOSUPPORT);
1010 
1011 	switch (tro->to_opts) {
1012 	case TRUNK_OPT_LACP_MODE:
1013 		switch (opt->lacp_mode) {
1014 		case AGGR_LACP_MODE_PASSIVE:
1015 		case AGGR_LACP_MODE_ACTIVE:
1016 			break;
1017 		default:
1018 			return (EINVAL);
1019 		}
1020 
1021 		aggr_set_lacp_mode(sc, opt->lacp_mode);
1022 		break;
1023 
1024 	case TRUNK_OPT_LACP_TIMEOUT:
1025 		if (opt->lacp_timeout > nitems(aggr_periodic_times) ||
1026 		    aggr_periodic_times[opt->lacp_timeout] == 0)
1027 			return (EINVAL);
1028 
1029 		aggr_set_lacp_timeout(sc, opt->lacp_timeout);
1030 		break;
1031 
1032 	case TRUNK_OPT_LACP_SYS_PRIO:
1033 		if (opt->lacp_prio == 0)
1034 			return (EINVAL);
1035 
1036 		sc->sc_lacp_prio = opt->lacp_prio;
1037 		break;
1038 
1039 	case TRUNK_OPT_LACP_PORT_PRIO:
1040 		if (opt->lacp_portprio == 0)
1041 			return (EINVAL);
1042 
1043 		sc->sc_lacp_port_prio = opt->lacp_portprio;
1044 		break;
1045 
1046 	default:
1047 		return (ENODEV);
1048 	}
1049 
1050 	return (0);
1051 }
1052 
1053 static int
1054 aggr_add_port(struct aggr_softc *sc, const struct trunk_reqport *rp)
1055 {
1056 	struct ifnet *ifp = &sc->sc_if;
1057 	struct ifnet *ifp0;
1058 	struct arpcom *ac0;
1059 	struct aggr_port *p;
1060 	struct aggr_multiaddr *ma;
1061 	uint32_t hardmtu;
1062 	int past = ticks - (hz * LACP_TIMEOUT_FACTOR);
1063 	int i;
1064 	int error;
1065 
1066 	NET_ASSERT_LOCKED();
1067 	if (sc->sc_nports > AGGR_MAX_PORTS)
1068 		return (ENOSPC);
1069 
1070 	ifp0 = ifunit(rp->rp_portname);
1071 	if (ifp0 == NULL || ifp0->if_index == ifp->if_index)
1072 		return (EINVAL);
1073 
1074 	if (ifp0->if_type != IFT_ETHER)
1075 		return (EPROTONOSUPPORT);
1076 
1077 	hardmtu = ifp0->if_hardmtu;
1078 	if (hardmtu < ifp->if_mtu)
1079 		return (ENOBUFS);
1080 	if (ifp->if_hardmtu < hardmtu)
1081 		hardmtu = ifp->if_hardmtu;
1082 
1083 	ac0 = (struct arpcom *)ifp0;
1084 	if (ac0->ac_trunkport != NULL)
1085 		return (EBUSY);
1086 
1087 	/* let's try */
1088 
1089 	ifp0 = if_get(ifp0->if_index); /* get an actual reference */
1090 	if (ifp0 == NULL) {
1091 		/* XXX this should never happen */
1092 		return (EINVAL);
1093 	}
1094 
1095 	p = malloc(sizeof(*p), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
1096 	if (p == NULL) {
1097 		error = ENOMEM;
1098 		goto put;
1099 	}
1100 
1101 	for (i = 0; i < nitems(p->p_txm_log); i++)
1102 		p->p_txm_log[i] = past;
1103 
1104 	p->p_ifp0 = ifp0;
1105 	p->p_aggr = sc;
1106 	p->p_mtu = ifp0->if_mtu;
1107 
1108 	CTASSERT(sizeof(p->p_lladdr) == sizeof(ac0->ac_enaddr));
1109 	memcpy(p->p_lladdr, ac0->ac_enaddr, sizeof(p->p_lladdr));
1110 	p->p_ioctl = ifp0->if_ioctl;
1111 	p->p_output = ifp0->if_output;
1112 
1113 	error = aggr_group(sc, p, SIOCADDMULTI);
1114 	if (error != 0)
1115 		goto free;
1116 
1117 	error = aggr_p_setlladdr(p, sc->sc_ac.ac_enaddr);
1118 	if (error != 0)
1119 		goto ungroup;
1120 
1121 	error = aggr_p_set_mtu(p, ifp->if_mtu);
1122 	if (error != 0)
1123 		goto resetlladdr;
1124 
1125 	if (sc->sc_promisc) {
1126 		error = ifpromisc(ifp0, 1);
1127 		if (error != 0)
1128 			goto unmtu;
1129 	}
1130 
1131 	TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) {
1132 		if (aggr_multi(sc, p, ma, SIOCADDMULTI) != 0) {
1133 			log(LOG_WARNING, "%s %s: "
1134 			    "unable to add multicast address\n",
1135 			    ifp->if_xname, ifp0->if_xname);
1136 		}
1137 	}
1138 
1139 	task_set(&p->p_lhook, aggr_p_linkch, p);
1140 	if_linkstatehook_add(ifp0, &p->p_lhook);
1141 
1142 	task_set(&p->p_dhook, aggr_p_detach, p);
1143 	if_detachhook_add(ifp0, &p->p_dhook);
1144 
1145 	task_set(&p->p_rxm_task, aggr_rx, p);
1146 	mq_init(&p->p_rxm_mq, 3, IPL_NET);
1147 
1148 	timeout_set_proc(&p->p_ptm_tx, aggr_ptm_tx, p);
1149 	timeout_set_proc(&p->p_txm_ntt, aggr_transmit_machine, p);
1150 	timeout_set_proc(&p->p_current_while_timer,
1151 	    aggr_current_while_timer, p);
1152 	timeout_set_proc(&p->p_wait_while_timer, aggr_wait_while_timer, p);
1153 
1154 	p->p_muxed = 0;
1155 	p->p_collecting = 0;
1156 	p->p_distributing = 0;
1157 	p->p_selected = AGGR_PORT_UNSELECTED;
1158 	p->p_actor_state = LACP_STATE_AGGREGATION;
1159 
1160 	/* commit */
1161 	DPRINTF(sc, "%s %s trunkport: creating port\n",
1162 	    ifp->if_xname, ifp0->if_xname);
1163 
1164 	ifp->if_hardmtu = hardmtu;
1165 
1166 	TAILQ_INSERT_TAIL(&sc->sc_ports, p, p_entry);
1167 	sc->sc_nports++;
1168 
1169 	ac0->ac_trunkport = p;
1170 	/* make sure p is visible before handlers can run */
1171 	membar_producer();
1172 	ifp0->if_ioctl = aggr_p_ioctl;
1173 	ifp0->if_output = aggr_p_output;
1174 	if_ih_insert(ifp0, aggr_input, p);
1175 
1176 	aggr_mux(sc, p, LACP_MUX_E_BEGIN);
1177 	aggr_rxm(sc, p, LACP_RXM_E_BEGIN);
1178 	aggr_p_linkch(p);
1179 
1180 	return (0);
1181 
1182 unmtu:
1183 	if (aggr_p_set_mtu(p, p->p_mtu) != 0) {
1184 		log(LOG_WARNING, "%s add %s: unable to reset mtu %u\n",
1185 		    ifp->if_xname, ifp0->if_xname, p->p_mtu);
1186 	}
1187 resetlladdr:
1188 	if (aggr_p_setlladdr(p, p->p_lladdr) != 0) {
1189 		log(LOG_WARNING, "%s add %s: unable to reset lladdr\n",
1190 		    ifp->if_xname, ifp0->if_xname);
1191 	}
1192 ungroup:
1193 	if (aggr_group(sc, p, SIOCDELMULTI) != 0) {
1194 		log(LOG_WARNING, "%s add %s: "
1195 		    "unable to remove LACP group address\n",
1196 		    ifp->if_xname, ifp0->if_xname);
1197 	}
1198 free:
1199 	free(p, M_DEVBUF, sizeof(*p));
1200 put:
1201 	if_put(ifp0);
1202 	return (error);
1203 }
1204 
1205 static struct aggr_port *
1206 aggr_trunkport(struct aggr_softc *sc, const char *name)
1207 {
1208 	struct aggr_port *p;
1209 
1210 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
1211 		if (strcmp(p->p_ifp0->if_xname, name) == 0)
1212 			return (p);
1213 	}
1214 
1215 	return (NULL);
1216 }
1217 
1218 static int
1219 aggr_get_port(struct aggr_softc *sc, struct trunk_reqport *rp)
1220 {
1221 	struct aggr_port *p;
1222 
1223 	NET_ASSERT_LOCKED();
1224 	p = aggr_trunkport(sc, rp->rp_portname);
1225 	if (p == NULL)
1226 		return (EINVAL);
1227 
1228 	/* XXX */
1229 
1230 	return (0);
1231 }
1232 
1233 static int
1234 aggr_del_port(struct aggr_softc *sc, const struct trunk_reqport *rp)
1235 {
1236 	struct aggr_port *p;
1237 
1238 	NET_ASSERT_LOCKED();
1239 	p = aggr_trunkport(sc, rp->rp_portname);
1240 	if (p == NULL)
1241 		return (EINVAL);
1242 
1243 	aggr_p_dtor(sc, p, "del");
1244 
1245 	return (0);
1246 }
1247 
1248 static int
1249 aggr_p_setlladdr(struct aggr_port *p, const uint8_t *addr)
1250 {
1251 	struct ifnet *ifp0 = p->p_ifp0;
1252 	struct ifreq ifr;
1253 	struct sockaddr *sa;
1254 	int error;
1255 
1256 	memset(&ifr, 0, sizeof(ifr));
1257 
1258 	CTASSERT(sizeof(ifr.ifr_name) == sizeof(ifp0->if_xname));
1259 	memcpy(ifr.ifr_name, ifp0->if_xname, sizeof(ifr.ifr_name));
1260 
1261 	sa = &ifr.ifr_addr;
1262 
1263 	/* wtf is this? */
1264 	sa->sa_len = ETHER_ADDR_LEN;
1265 	sa->sa_family = AF_LINK;
1266 	CTASSERT(sizeof(sa->sa_data) >= ETHER_ADDR_LEN);
1267 	memcpy(sa->sa_data, addr, ETHER_ADDR_LEN);
1268 
1269 	error = (*p->p_ioctl)(ifp0, SIOCSIFLLADDR, (caddr_t)&ifr);
1270 	switch (error) {
1271 	case ENOTTY:
1272 	case 0:
1273 		break;
1274 	default:
1275 		return (error);
1276 	}
1277 
1278 	error = if_setlladdr(ifp0, addr);
1279 	if (error != 0)
1280 		return (error);
1281 
1282 	ifnewlladdr(ifp0);
1283 
1284 	return (0);
1285 }
1286 
1287 static int
1288 aggr_p_set_mtu(struct aggr_port *p, uint32_t mtu)
1289 {
1290 	struct ifnet *ifp0 = p->p_ifp0;
1291 	struct ifreq ifr;
1292 
1293 	memset(&ifr, 0, sizeof(ifr));
1294 
1295 	CTASSERT(sizeof(ifr.ifr_name) == sizeof(ifp0->if_xname));
1296 	memcpy(ifr.ifr_name, ifp0->if_xname, sizeof(ifr.ifr_name));
1297 
1298 	ifr.ifr_mtu = mtu;
1299 
1300 	return ((*p->p_ioctl)(ifp0, SIOCSIFMTU, (caddr_t)&ifr));
1301 }
1302 
1303 static int
1304 aggr_p_ioctl(struct ifnet *ifp0, u_long cmd, caddr_t data)
1305 {
1306 	struct arpcom *ac0 = (struct arpcom *)ifp0;
1307 	struct aggr_port *p = ac0->ac_trunkport;
1308 	struct ifreq *ifr = (struct ifreq *)data;
1309 	int error = 0;
1310 
1311 	switch (cmd) {
1312 	case SIOCGTRUNKPORT: {
1313 		struct trunk_reqport *rp = (struct trunk_reqport *)data;
1314 		struct aggr_softc *sc = p->p_aggr;
1315 		struct ifnet *ifp = &sc->sc_if;
1316 
1317 		if (strncmp(rp->rp_ifname, rp->rp_portname,
1318 		    sizeof(rp->rp_ifname)) != 0)
1319 			return (EINVAL);
1320 
1321 		CTASSERT(sizeof(rp->rp_ifname) == sizeof(ifp->if_xname));
1322 		memcpy(rp->rp_ifname, ifp->if_xname, sizeof(rp->rp_ifname));
1323 		break;
1324 	}
1325 
1326 	case SIOCSIFMTU:
1327 		if (ifr->ifr_mtu == ifp0->if_mtu)
1328 			break; /* nop */
1329 
1330 		/* FALLTHROUGH */
1331 	case SIOCSIFLLADDR:
1332 		error = EBUSY;
1333 		break;
1334 
1335 	case SIOCSIFFLAGS:
1336 		if (!ISSET(ifp0->if_flags, IFF_UP) &&
1337 		    ISSET(ifp0->if_flags, IFF_RUNNING)) {
1338 			/* port is going down */
1339 			if (p->p_selected == AGGR_PORT_SELECTED) {
1340 				aggr_unselected(p);
1341 				aggr_ntt_transmit(p); /* XXX */
1342 			}
1343 		}
1344 		/* FALLTHROUGH */
1345 	default:
1346 		error = (*p->p_ioctl)(ifp0, cmd, data);
1347 		break;
1348 	}
1349 
1350 	return (error);
1351 }
1352 
1353 static int
1354 aggr_p_output(struct ifnet *ifp0, struct mbuf *m, struct sockaddr *dst,
1355     struct rtentry *rt)
1356 {
1357 	struct arpcom *ac0 = (struct arpcom *)ifp0;
1358 	struct aggr_port *p = ac0->ac_trunkport;
1359 
1360 	/* restrict transmission to bpf only */
1361 	if ((m_tag_find(m, PACKET_TAG_DLT, NULL) == NULL)) {
1362 		m_freem(m);
1363 		return (EBUSY);
1364 	}
1365 
1366 	return ((*p->p_output)(ifp0, m, dst, rt));
1367 }
1368 
1369 static void
1370 aggr_p_dtor(struct aggr_softc *sc, struct aggr_port *p, const char *op)
1371 {
1372 	struct ifnet *ifp = &sc->sc_if;
1373 	struct ifnet *ifp0 = p->p_ifp0;
1374 	struct arpcom *ac0 = (struct arpcom *)ifp0;
1375 	struct aggr_multiaddr *ma;
1376 	enum aggr_port_selected selected;
1377 	int error;
1378 
1379 	DPRINTF(sc, "%s %s %s: destroying port\n",
1380 	    ifp->if_xname, ifp0->if_xname, op);
1381 
1382 	selected = p->p_selected;
1383 	aggr_rxm(sc, p, LACP_RXM_E_NOT_PORT_ENABLED);
1384 	aggr_unselected(p);
1385 	if (aggr_port_enabled(p) && selected == AGGR_PORT_SELECTED)
1386 		aggr_ntt_transmit(p);
1387 
1388 	timeout_del(&p->p_ptm_tx);
1389 	timeout_del_barrier(&p->p_txm_ntt); /* XXX */
1390 	timeout_del(&p->p_current_while_timer);
1391 	timeout_del(&p->p_wait_while_timer);
1392 
1393 	if_ih_remove(ifp0, aggr_input, p);
1394 
1395 	ac0->ac_trunkport = NULL;
1396 
1397 	ifp0->if_ioctl = p->p_ioctl;
1398 	ifp0->if_output = p->p_output;
1399 
1400 	TAILQ_REMOVE(&sc->sc_ports, p, p_entry);
1401 	sc->sc_nports--;
1402 
1403 	TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) {
1404 		error = aggr_multi(sc, p, ma, SIOCDELMULTI);
1405 		if (error != 0) {
1406 			log(LOG_WARNING, "%s %s %s: "
1407 			    "unable to remove multicast address (%d)\n",
1408 			    ifp->if_xname, op, ifp0->if_xname, error);
1409 		}
1410 	}
1411 
1412 	if (sc->sc_promisc) {
1413 		error = ifpromisc(ifp0, 0);
1414 		if (error != 0) {
1415 			log(LOG_WARNING, "%s %s %s: "
1416 			    "unable to disable promisc (%d)\n",
1417 			    ifp->if_xname, op, ifp0->if_xname, error);
1418 		}
1419 	}
1420 
1421 	error = aggr_p_set_mtu(p, p->p_mtu);
1422 	if (error != 0) {
1423 		log(LOG_WARNING, "%s %s %s: unable to restore mtu %u (%d)\n",
1424 		    ifp->if_xname, op, ifp0->if_xname, p->p_mtu, error);
1425 	}
1426 
1427 	error = aggr_p_setlladdr(p, p->p_lladdr);
1428 	if (error != 0) {
1429 		log(LOG_WARNING, "%s %s %s: unable to restore lladdr (%d)\n",
1430 		    ifp->if_xname, op, ifp0->if_xname, error);
1431 	}
1432 
1433 	error = aggr_group(sc, p, SIOCDELMULTI);
1434 	if (error != 0) {
1435 		log(LOG_WARNING, "%s %s %s: "
1436 		    "unable to remove LACP group address (%d)\n",
1437 		    ifp->if_xname, op, ifp0->if_xname, error);
1438 	}
1439 
1440 	if_detachhook_del(ifp0, &p->p_dhook);
1441 	if_linkstatehook_del(ifp0, &p->p_lhook);
1442 
1443 	if_put(ifp0);
1444 	free(p, M_DEVBUF, sizeof(*p));
1445 
1446 	/* XXX this is a pretty ugly place to update this */
1447 	ifp->if_hardmtu = aggr_hardmtu(sc);
1448 }
1449 
1450 static void
1451 aggr_p_detach(void *arg)
1452 {
1453 	struct aggr_port *p = arg;
1454 	struct aggr_softc *sc = p->p_aggr;
1455 
1456 	aggr_p_dtor(sc, p, "detach");
1457 
1458 	NET_ASSERT_LOCKED();
1459 }
1460 
1461 static void
1462 aggr_p_linkch(void *arg)
1463 {
1464 	struct aggr_port *p = arg;
1465 	struct aggr_softc *sc = p->p_aggr;
1466 
1467 	NET_ASSERT_LOCKED();
1468 
1469 	if (aggr_port_enabled(p)) {
1470 		aggr_rxm(sc, p, LACP_RXM_E_PORT_ENABLED);
1471 
1472 		if (aggr_lacp_enabled(sc)) {
1473 			timeout_add_sec(&p->p_ptm_tx,
1474 			    aggr_periodic_times[AGGR_LACP_TIMEOUT_FAST]);
1475 		}
1476 	} else {
1477 		aggr_rxm(sc, p, LACP_RXM_E_NOT_PORT_ENABLED);
1478 		aggr_unselected(p);
1479 		aggr_record_default(sc, p);
1480 		timeout_del(&p->p_ptm_tx);
1481 	}
1482 }
1483 
1484 static void
1485 aggr_map(struct aggr_softc *sc)
1486 {
1487 	struct ifnet *ifp = &sc->sc_if;
1488 	struct aggr_map *map = NULL;
1489 	struct aggr_port *p;
1490 	unsigned int gen;
1491 	unsigned int i;
1492 	int link_state = LINK_STATE_DOWN;
1493 
1494 	p = TAILQ_FIRST(&sc->sc_distributing);
1495 	if (p != NULL) {
1496 		gen = sc->sc_map_gen++;
1497 		map = &sc->sc_maps[gen % nitems(sc->sc_maps)];
1498 
1499 		for (i = 0; i < nitems(map->m_ifp0s); i++) {
1500 			map->m_ifp0s[i] = p->p_ifp0;
1501 
1502 			p = TAILQ_NEXT(p, p_entry_distributing);
1503 			if (p == NULL)
1504 				p = TAILQ_FIRST(&sc->sc_distributing);
1505 		}
1506 
1507 		link_state = LINK_STATE_FULL_DUPLEX;
1508 	}
1509 
1510 	SMR_PTR_SET_LOCKED(&sc->sc_map, map);
1511 	smr_barrier();
1512 
1513 	if (ifp->if_link_state != link_state) {
1514 		ifp->if_link_state = link_state;
1515 		if_link_state_change(ifp);
1516 	}
1517 }
1518 
1519 static void
1520 aggr_current_while_timer(void *arg)
1521 {
1522 	struct aggr_port *p = arg;
1523 	struct aggr_softc *sc = p->p_aggr;
1524 
1525 	aggr_rxm(sc, p, LACP_RXM_E_TIMER_EXPIRED);
1526 }
1527 
1528 static void
1529 aggr_wait_while_timer(void *arg)
1530 {
1531 	struct aggr_port *p = arg;
1532 	struct aggr_softc *sc = p->p_aggr;
1533 
1534 	aggr_selection_logic(sc, p);
1535 }
1536 
1537 static void
1538 aggr_start_current_while_timer(struct aggr_port *p, unsigned int t)
1539 {
1540 	timeout_add_sec(&p->p_current_while_timer,
1541 		aggr_periodic_times[t] * LACP_TIMEOUT_FACTOR);
1542 }
1543 
1544 static void
1545 aggr_input_lacpdu(struct aggr_port *p, struct mbuf *m)
1546 {
1547 	struct aggr_softc *sc = p->p_aggr;
1548 	struct lacp_du *lacpdu;
1549 
1550 	if (m->m_len < sizeof(*lacpdu)) {
1551 		m = m_pullup(m, sizeof(*lacpdu));
1552 		if (m == NULL)
1553 			return;
1554 	}
1555 
1556 	/*
1557 	 * In the process of executing the recordPDU function, a Receive
1558 	 * machine compliant to this standard shall not validate the
1559 	 * Version Number, TLV_type, or Reserved fields in received
1560 	 * LACPDUs. The same actions are taken regardless of the values
1561 	 * received in these fields. A Receive machine may validate
1562 	 * the Actor_Information_Length, Partner_Information_Length,
1563 	 * Collector_Information_Length, or Terminator_Length fields.
1564 	 */
1565 
1566 	lacpdu = mtod(m, struct lacp_du *);
1567 	aggr_rxm_lacpdu(sc, p, lacpdu);
1568 
1569 	m_freem(m);
1570 }
1571 
1572 static void
1573 aggr_update_selected(struct aggr_softc *sc, struct aggr_port *p,
1574     const struct lacp_du *lacpdu)
1575 {
1576 	const struct lacp_port_info *rpi = &lacpdu->lacp_actor_info;
1577 	const struct lacp_port_info *lpi = &p->p_partner;
1578 
1579 	if ((rpi->lacp_portid.lacp_portid_number ==
1580 	     lpi->lacp_portid.lacp_portid_number) &&
1581 	    (rpi->lacp_portid.lacp_portid_priority ==
1582 	     lpi->lacp_portid.lacp_portid_priority) &&
1583 	    ETHER_IS_EQ(rpi->lacp_sysid.lacp_sysid_mac,
1584 	     lpi->lacp_sysid.lacp_sysid_mac) &&
1585 	    (rpi->lacp_sysid.lacp_sysid_priority ==
1586 	     lpi->lacp_sysid.lacp_sysid_priority) &&
1587 	    (rpi->lacp_key == lpi->lacp_key) &&
1588 	    (ISSET(rpi->lacp_state, LACP_STATE_AGGREGATION) ==
1589 	     ISSET(lpi->lacp_state, LACP_STATE_AGGREGATION)))
1590 		return;
1591 
1592 	aggr_unselected(p);
1593 }
1594 
1595 static void
1596 aggr_record_default(struct aggr_softc *sc, struct aggr_port *p)
1597 {
1598 	struct lacp_port_info *pi = &p->p_partner;
1599 
1600 	pi->lacp_sysid.lacp_sysid_priority = htons(0);
1601 	memset(pi->lacp_sysid.lacp_sysid_mac, 0,
1602 	    sizeof(pi->lacp_sysid.lacp_sysid_mac));
1603 
1604 	pi->lacp_key = htons(0);
1605 
1606 	pi->lacp_portid.lacp_portid_priority = htons(0);
1607 	pi->lacp_portid.lacp_portid_number = htons(0);
1608 
1609 	SET(p->p_actor_state, LACP_STATE_DEFAULTED);
1610 
1611 	pi->lacp_state = LACP_STATE_AGGREGATION | LACP_STATE_SYNC;
1612 	if (sc->sc_lacp_timeout == AGGR_LACP_TIMEOUT_FAST)
1613 		SET(pi->lacp_state, LACP_STATE_TIMEOUT);
1614 	if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE)
1615 		SET(pi->lacp_state, LACP_STATE_ACTIVITY);
1616 
1617 	/* notify Mux */
1618 	aggr_mux(sc, p, LACP_MUX_E_NOT_COLLECTING);
1619 	aggr_mux(sc, p, LACP_MUX_E_SYNC);
1620 }
1621 
1622 static void
1623 aggr_update_default_selected(struct aggr_softc *sc, struct aggr_port *p)
1624 {
1625 	const struct lacp_port_info *pi = &p->p_partner;
1626 
1627 	if ((pi->lacp_portid.lacp_portid_number == htons(0)) &&
1628 	    (pi->lacp_portid.lacp_portid_priority == htons(0)) &&
1629 	    ETHER_IS_ANYADDR(pi->lacp_sysid.lacp_sysid_mac) &&
1630 	    (pi->lacp_sysid.lacp_sysid_priority == htons(0)) &&
1631 	    (pi->lacp_key == htons(0)) &&
1632 	    ISSET(pi->lacp_state, LACP_STATE_AGGREGATION))
1633 		return;
1634 
1635 	aggr_unselected(p);
1636 	aggr_selection_logic(sc, p); /* restart */
1637 }
1638 
1639 static int
1640 aggr_update_ntt(struct aggr_port *p, const struct lacp_du *lacpdu)
1641 {
1642 	struct aggr_softc *sc = p->p_aggr;
1643 	struct arpcom *ac = &sc->sc_ac;
1644 	struct ifnet *ifp = &ac->ac_if;
1645 	struct ifnet *ifp0 = p->p_ifp0;
1646 	const struct lacp_port_info *pi = &lacpdu->lacp_partner_info;
1647 	uint8_t bits = LACP_STATE_ACTIVITY | LACP_STATE_TIMEOUT |
1648 	    LACP_STATE_SYNC | LACP_STATE_AGGREGATION;
1649 	uint8_t state = p->p_actor_state;
1650 	int sync = 0;
1651 
1652 	if (pi->lacp_portid.lacp_portid_number != htons(ifp0->if_index))
1653 		goto ntt;
1654 	if (pi->lacp_portid.lacp_portid_priority !=
1655 	    htons(sc->sc_lacp_port_prio))
1656 		goto ntt;
1657 	if (!ETHER_IS_EQ(pi->lacp_sysid.lacp_sysid_mac, ac->ac_enaddr))
1658 		goto ntt;
1659 	if (pi->lacp_sysid.lacp_sysid_priority !=
1660 	    htons(sc->sc_lacp_prio))
1661 		goto ntt;
1662 	if (pi->lacp_key != htons(ifp->if_index))
1663 		goto ntt;
1664 	if (ISSET(pi->lacp_state, LACP_STATE_SYNC) !=
1665 	    ISSET(state, LACP_STATE_SYNC))
1666 		goto ntt;
1667 	sync = 1;
1668 
1669 	if (sc->sc_lacp_timeout == AGGR_LACP_TIMEOUT_FAST)
1670 		SET(state, LACP_STATE_TIMEOUT);
1671 	if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE)
1672 		SET(state, LACP_STATE_ACTIVITY);
1673 
1674 	if (ISSET(pi->lacp_state, bits) != ISSET(state, bits))
1675 		goto ntt;
1676 
1677 	return (1);
1678 
1679 ntt:
1680 	aggr_ntt(p);
1681 
1682 	return (sync);
1683 }
1684 
1685 static void
1686 aggr_recordpdu(struct aggr_port *p, const struct lacp_du *lacpdu, int sync)
1687 {
1688 	struct aggr_softc *sc = p->p_aggr;
1689 	const struct lacp_port_info *rpi = &lacpdu->lacp_actor_info;
1690 	struct lacp_port_info *lpi = &p->p_partner;
1691 	int active = ISSET(rpi->lacp_state, LACP_STATE_ACTIVITY) ||
1692 	    (ISSET(p->p_actor_state, LACP_STATE_ACTIVITY) &&
1693 	     ISSET(lacpdu->lacp_partner_info.lacp_state, LACP_STATE_ACTIVITY));
1694 
1695 	lpi->lacp_portid.lacp_portid_number =
1696 	    rpi->lacp_portid.lacp_portid_number;
1697 	lpi->lacp_portid.lacp_portid_priority =
1698 	    rpi->lacp_portid.lacp_portid_priority;
1699 	memcpy(lpi->lacp_sysid.lacp_sysid_mac,
1700 	    rpi->lacp_sysid.lacp_sysid_mac,
1701 	    sizeof(lpi->lacp_sysid.lacp_sysid_mac));
1702 	lpi->lacp_sysid.lacp_sysid_priority =
1703 	    rpi->lacp_sysid.lacp_sysid_priority;
1704 	lpi->lacp_key = rpi->lacp_key;
1705 	lpi->lacp_state = rpi->lacp_state & ~LACP_STATE_SYNC;
1706 
1707 	CLR(p->p_actor_state, LACP_STATE_DEFAULTED);
1708 
1709 	if (active && ISSET(rpi->lacp_state, LACP_STATE_SYNC) && sync) {
1710 		SET(p->p_partner_state, LACP_STATE_SYNC);
1711 		aggr_mux(sc, p, LACP_MUX_E_SYNC);
1712 	} else {
1713 		CLR(p->p_partner_state, LACP_STATE_SYNC);
1714 		aggr_mux(sc, p, LACP_MUX_E_NOT_SYNC);
1715 	}
1716 }
1717 
1718 static void
1719 aggr_marker_response(struct aggr_port *p, struct mbuf *m)
1720 {
1721 	struct aggr_softc *sc = p->p_aggr;
1722 	struct arpcom *ac = &sc->sc_ac;
1723 	struct ifnet *ifp0 = p->p_ifp0;
1724 	struct marker_pdu *mpdu;
1725 	struct ether_header *eh;
1726 
1727 	mpdu = mtod(m, struct marker_pdu *);
1728 	mpdu->marker_info_tlv.lacp_tlv_type = MARKER_T_RESPONSE;
1729 
1730 	m = m_prepend(m, sizeof(*eh), M_DONTWAIT);
1731 	if (m == NULL)
1732 		return;
1733 
1734 	eh = mtod(m, struct ether_header *);
1735 	memcpy(eh->ether_dhost, lacp_address_slow, sizeof(eh->ether_dhost));
1736 	memcpy(eh->ether_shost, ac->ac_enaddr, sizeof(eh->ether_shost));
1737 	eh->ether_type = htons(ETHERTYPE_SLOW);
1738 
1739 	(void)if_enqueue(ifp0, m);
1740 }
1741 
1742 static void
1743 aggr_input_marker(struct aggr_port *p, struct mbuf *m)
1744 {
1745 	struct marker_pdu *mpdu;
1746 
1747 	if (m->m_len < sizeof(*mpdu)) {
1748 		m = m_pullup(m, sizeof(*mpdu));
1749 		if (m == NULL)
1750 			return;
1751 	}
1752 
1753 	mpdu = mtod(m, struct marker_pdu *);
1754 	switch (mpdu->marker_info_tlv.lacp_tlv_type) {
1755 	case MARKER_T_INFORMATION:
1756 		aggr_marker_response(p, m);
1757 		break;
1758 	default:
1759 		m_freem(m);
1760 		break;
1761 	}
1762 }
1763 
1764 static void
1765 aggr_rx(void *arg)
1766 {
1767 	struct aggr_port *p = arg;
1768 	struct mbuf_list ml;
1769 	struct mbuf *m;
1770 
1771 	mq_delist(&p->p_rxm_mq, &ml);
1772 
1773 	while ((m = ml_dequeue(&ml)) != NULL) {
1774 		struct ether_slowproto_hdr *sph;
1775 
1776 		/* aggr_input has checked eh already */
1777 		m_adj(m, sizeof(struct ether_header));
1778 
1779 		sph = mtod(m, struct ether_slowproto_hdr *);
1780 		switch (sph->sph_subtype) {
1781 		case SLOWPROTOCOLS_SUBTYPE_LACP:
1782 			aggr_input_lacpdu(p, m);
1783 			break;
1784 		case SLOWPROTOCOLS_SUBTYPE_LACP_MARKER:
1785 			aggr_input_marker(p, m);
1786 			break;
1787 		default:
1788 			panic("unexpected slow protocol subtype");
1789 			/* NOTREACHED */
1790 		}
1791 	}
1792 }
1793 
1794 static void
1795 aggr_set_selected(struct aggr_port *p, enum aggr_port_selected s,
1796     enum lacp_mux_event ev)
1797 {
1798 	struct aggr_softc *sc = p->p_aggr;
1799 
1800 	if (p->p_selected != s) {
1801 		DPRINTF(sc, "%s %s: Selected %s -> %s\n",
1802 		    sc->sc_if.if_xname, p->p_ifp0->if_xname,
1803 		    aggr_port_selected_names[p->p_selected],
1804 		    aggr_port_selected_names[s]);
1805 		p->p_selected = s;
1806 	}
1807 	aggr_mux(sc, p, ev);
1808 }
1809 
1810 static void
1811 aggr_unselected(struct aggr_port *p)
1812 {
1813 	aggr_set_selected(p, AGGR_PORT_UNSELECTED, LACP_MUX_E_UNSELECTED);
1814 }
1815 
1816 static inline void
1817 aggr_selected(struct aggr_port *p)
1818 {
1819 	aggr_set_selected(p, AGGR_PORT_SELECTED, LACP_MUX_E_SELECTED);
1820 }
1821 
1822 #ifdef notyet
1823 static inline void
1824 aggr_standby(struct aggr_port *p)
1825 {
1826 	aggr_set_selected(p, AGGR_PORT_STANDBY, LACP_MUX_E_STANDBY);
1827 }
1828 #endif
1829 
1830 static void
1831 aggr_selection_logic(struct aggr_softc *sc, struct aggr_port *p)
1832 {
1833 	const struct lacp_port_info *pi;
1834 	struct arpcom *ac = &sc->sc_ac;
1835 	struct ifnet *ifp = &ac->ac_if;
1836 	const uint8_t *mac;
1837 
1838 	if (p->p_rxm_state != LACP_RXM_S_CURRENT) {
1839 		DPRINTF(sc, "%s %s: selection logic: unselected (rxm !%s)\n",
1840 		    ifp->if_xname, p->p_ifp0->if_xname,
1841 		    lacp_rxm_state_names[LACP_RXM_S_CURRENT]);
1842 		goto unselected;
1843 	}
1844 
1845 	pi = &p->p_partner;
1846 	if (pi->lacp_key == htons(0)) {
1847 		DPRINTF(sc, "%s %s: selection logic: unselected "
1848 		    "(partner key == 0)\n",
1849 		    ifp->if_xname, p->p_ifp0->if_xname);
1850 		goto unselected;
1851 	}
1852 
1853 	/*
1854 	 * aggr(4) does not support individual interfaces
1855 	 */
1856 	if (!ISSET(pi->lacp_state, LACP_STATE_AGGREGATION)) {
1857 		DPRINTF(sc, "%s %s: selection logic: unselected "
1858 		    "(partner state is Individual)\n",
1859 		    ifp->if_xname, p->p_ifp0->if_xname);
1860 		goto unselected;
1861 	}
1862 
1863 	/*
1864 	 * Any pair of Aggregation Ports that are members of the same
1865 	 * LAG, but are connected together by the same link, shall not
1866 	 * select the same Aggregator
1867 	 */
1868 
1869 	mac = pi->lacp_sysid.lacp_sysid_mac;
1870 	if (ETHER_IS_EQ(mac, ac->ac_enaddr) &&
1871 	    pi->lacp_key == htons(ifp->if_index)) {
1872 		DPRINTF(sc, "%s %s: selection logic: unselected "
1873 		    "(partner sysid !eq)\n",
1874 		    ifp->if_xname, p->p_ifp0->if_xname);
1875 		goto unselected;
1876 	}
1877 
1878 	if (!TAILQ_EMPTY(&sc->sc_muxen)) {
1879 		/* an aggregation has already been selected */
1880 		if (!ETHER_IS_EQ(mac, sc->sc_partner_system.lacp_sysid_mac) ||
1881 		    sc->sc_partner_key != pi->lacp_key) {
1882 			DPRINTF(sc, "%s %s: selection logic: unselected "
1883 			    "(partner sysid != selection)\n",
1884 			    ifp->if_xname, p->p_ifp0->if_xname);
1885 			goto unselected;
1886 		}
1887 	}
1888 
1889 	aggr_selected(p);
1890 	return;
1891 
1892 unselected:
1893 	aggr_unselected(p);
1894 }
1895 
1896 static void
1897 aggr_mux(struct aggr_softc *sc, struct aggr_port *p, enum lacp_mux_event ev)
1898 {
1899 	int ntt = 0;
1900 
1901 	/*
1902 	 * the mux can move through multiple states based on a
1903 	 * single event, so loop until the event is completely consumed.
1904 	 * debounce NTT = TRUE through the multiple state transitions.
1905 	 */
1906 
1907 	while (aggr_mux_ev(sc, p, ev, &ntt) != 0)
1908 		;
1909 
1910 	if (ntt)
1911 		aggr_ntt(p);
1912 }
1913 
1914 #ifdef notyet
1915 static int
1916 aggr_ready_n(struct aggr_port *p)
1917 {
1918 	return (p->p_mux_state == LACP_MUX_S_WAITING &&
1919 	    !timeout_pending(&p->p_wait_while_timer));
1920 }
1921 #endif
1922 
1923 static inline int
1924 aggr_ready(struct aggr_softc *sc)
1925 {
1926 	return (1);
1927 }
1928 
1929 static void
1930 aggr_disable_distributing(struct aggr_softc *sc, struct aggr_port *p)
1931 {
1932 	if (!p->p_distributing)
1933 		return;
1934 
1935 	sc->sc_ndistributing--;
1936 	TAILQ_REMOVE(&sc->sc_distributing, p, p_entry_distributing);
1937 	p->p_distributing = 0;
1938 
1939 	aggr_map(sc);
1940 
1941 	DPRINTF(sc, "%s %s: distributing disabled\n",
1942 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
1943 }
1944 
1945 static void
1946 aggr_enable_distributing(struct aggr_softc *sc, struct aggr_port *p)
1947 {
1948 	if (p->p_distributing)
1949 		return;
1950 
1951 	/* check the LAG ID? */
1952 
1953 	p->p_distributing = 1;
1954 	TAILQ_INSERT_TAIL(&sc->sc_distributing, p, p_entry_distributing);
1955 	sc->sc_ndistributing++;
1956 
1957 	aggr_map(sc);
1958 
1959 	DPRINTF(sc, "%s %s: distributing enabled\n",
1960 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
1961 }
1962 
1963 static void
1964 aggr_disable_collecting(struct aggr_softc *sc, struct aggr_port *p)
1965 {
1966 	if (!p->p_collecting)
1967 		return;
1968 
1969 	p->p_collecting = 0;
1970 
1971 	DPRINTF(sc, "%s %s: collecting disabled\n",
1972 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
1973 }
1974 
1975 static void
1976 aggr_enable_collecting(struct aggr_softc *sc, struct aggr_port *p)
1977 {
1978 	if (p->p_collecting)
1979 		return;
1980 
1981 	p->p_collecting = 1;
1982 
1983 	DPRINTF(sc, "%s %s: collecting enabled\n",
1984 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
1985 }
1986 
1987 static void
1988 aggr_attach_mux(struct aggr_softc *sc, struct aggr_port *p)
1989 {
1990 	const struct lacp_port_info *pi = &p->p_partner;
1991 
1992 	if (p->p_muxed)
1993 		return;
1994 
1995 	p->p_muxed = 1;
1996 	if (TAILQ_EMPTY(&sc->sc_muxen)) {
1997 		KASSERT(sc->sc_partner_key == htons(0));
1998 		sc->sc_partner_system = pi->lacp_sysid;
1999 		sc->sc_partner_key = pi->lacp_key;
2000 	}
2001 
2002 	TAILQ_INSERT_TAIL(&sc->sc_muxen, p, p_entry_muxen);
2003 
2004 	DPRINTF(sc, "%s %s: mux attached\n",
2005 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
2006 }
2007 
2008 static void
2009 aggr_detach_mux(struct aggr_softc *sc, struct aggr_port *p)
2010 {
2011 	if (!p->p_muxed)
2012 		return;
2013 
2014 	p->p_muxed = 0;
2015 
2016 	TAILQ_REMOVE(&sc->sc_muxen, p, p_entry_muxen);
2017 	if (TAILQ_EMPTY(&sc->sc_muxen)) {
2018 		memset(&sc->sc_partner_system.lacp_sysid_mac, 0,
2019 		    sizeof(sc->sc_partner_system.lacp_sysid_mac));
2020 		sc->sc_partner_system.lacp_sysid_priority = htons(0);
2021 		sc->sc_partner_key = htons(0);
2022 	}
2023 
2024 	DPRINTF(sc, "%s %s: mux detached\n",
2025 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
2026 }
2027 
2028 static int
2029 aggr_mux_ev(struct aggr_softc *sc, struct aggr_port *p, enum lacp_mux_event ev,
2030     int *ntt)
2031 {
2032 	enum lacp_mux_state nstate = LACP_MUX_S_DETACHED;
2033 
2034 	switch (p->p_mux_state) {
2035 	case LACP_MUX_S_BEGIN:
2036 		KASSERT(ev == LACP_MUX_E_BEGIN);
2037 		nstate = LACP_MUX_S_DETACHED;
2038 		break;
2039 	case LACP_MUX_S_DETACHED:
2040 		switch (ev) {
2041 		case LACP_MUX_E_SELECTED:
2042 		case LACP_MUX_E_STANDBY:
2043 			nstate = LACP_MUX_S_WAITING;
2044 			break;
2045 		default:
2046 			return (0);
2047 		}
2048 		break;
2049 	case LACP_MUX_S_WAITING:
2050 		switch (ev) {
2051 		case LACP_MUX_E_UNSELECTED:
2052 			nstate = LACP_MUX_S_DETACHED;
2053 			break;
2054 		case LACP_MUX_E_SELECTED:
2055 		case LACP_MUX_E_READY:
2056 			if (aggr_ready(sc) &&
2057 			    p->p_selected == AGGR_PORT_SELECTED) {
2058 				nstate = LACP_MUX_S_ATTACHED;
2059 				break;
2060 			}
2061 			/* FALLTHROUGH */
2062 		default:
2063 			return (0);
2064 		}
2065 		break;
2066 	case LACP_MUX_S_ATTACHED:
2067 		switch (ev) {
2068 		case LACP_MUX_E_UNSELECTED:
2069 		case LACP_MUX_E_STANDBY:
2070 			nstate = LACP_MUX_S_DETACHED;
2071 			break;
2072 		case LACP_MUX_E_SELECTED:
2073 		case LACP_MUX_E_SYNC:
2074 			if (p->p_selected == AGGR_PORT_SELECTED &&
2075 			    ISSET(p->p_partner_state, LACP_STATE_SYNC)) {
2076 				nstate = LACP_MUX_S_COLLECTING;
2077 				break;
2078 			}
2079 			/* FALLTHROUGH */
2080 		default:
2081 			return (0);
2082 		}
2083 		break;
2084 	case LACP_MUX_S_COLLECTING:
2085 		switch (ev) {
2086 		case LACP_MUX_E_UNSELECTED:
2087 		case LACP_MUX_E_STANDBY:
2088 		case LACP_MUX_E_NOT_SYNC:
2089 			nstate = LACP_MUX_S_ATTACHED;
2090 			break;
2091 		case LACP_MUX_E_SELECTED:
2092 		case LACP_MUX_E_SYNC:
2093 		case LACP_MUX_E_COLLECTING:
2094 			if (p->p_selected == AGGR_PORT_SELECTED &&
2095 			    ISSET(p->p_partner_state, LACP_STATE_SYNC) &&
2096 			    ISSET(p->p_partner_state, LACP_STATE_COLLECTING)) {
2097 				nstate = LACP_MUX_S_DISTRIBUTING;
2098 				break;
2099 			}
2100 			/* FALLTHROUGH */
2101 		default:
2102 			return (0);
2103 		}
2104 		break;
2105 	case LACP_MUX_S_DISTRIBUTING:
2106 		switch (ev) {
2107 		case LACP_MUX_E_UNSELECTED:
2108 		case LACP_MUX_E_STANDBY:
2109 		case LACP_MUX_E_NOT_SYNC:
2110 		case LACP_MUX_E_NOT_COLLECTING:
2111 			nstate = LACP_MUX_S_COLLECTING;
2112 			break;
2113 		default:
2114 			return (0);
2115 		}
2116 		break;
2117 	}
2118 
2119 	DPRINTF(sc, "%s %s mux: %s (%s) -> %s\n",
2120 	    sc->sc_if.if_xname, p->p_ifp0->if_xname,
2121 	    lacp_mux_state_names[p->p_mux_state], lacp_mux_event_names[ev],
2122 	    lacp_mux_state_names[nstate]);
2123 
2124 	/* act on the new state */
2125 	switch (nstate) {
2126 	case LACP_MUX_S_BEGIN:
2127 		panic("unexpected mux nstate BEGIN");
2128 		/* NOTREACHED */
2129 	case LACP_MUX_S_DETACHED:
2130 		/*
2131 		 * Detach_Mux_From_Aggregator();
2132 		 * Actor.Sync = FALSE;
2133 		 * Disable_Distributing();
2134 		 * Actor.Distributing = FALSE;
2135 		 * Actor.Collecting = FALSE;
2136 		 * Disable_Collecting();
2137 		 * NTT = TRUE;
2138 		 */
2139 		aggr_detach_mux(sc, p);
2140 		CLR(p->p_actor_state, LACP_STATE_SYNC);
2141 		aggr_disable_distributing(sc, p);
2142 		CLR(p->p_actor_state, LACP_STATE_DISTRIBUTING);
2143 		CLR(p->p_actor_state, LACP_STATE_COLLECTING);
2144 		aggr_disable_collecting(sc, p);
2145 		*ntt = 1;
2146 		break;
2147 	case LACP_MUX_S_WAITING:
2148 		/*
2149 		 * Start wait_while_timer
2150 		 */
2151 		timeout_add_sec(&p->p_wait_while_timer,
2152 		    LACP_AGGREGATION_WAIT_TIME);
2153 		break;
2154 	case LACP_MUX_S_ATTACHED:
2155 		/*
2156 		 * Attach_Mux_To_Aggregator();
2157 		 * Actor.Sync = TRUE;
2158 		 * Actor.Collecting = FALSE;
2159 		 * Disable_Collecting();
2160 		 * NTT = TRUE;
2161 		 */
2162 		aggr_attach_mux(sc, p);
2163 		SET(p->p_actor_state, LACP_STATE_SYNC);
2164 		CLR(p->p_actor_state, LACP_STATE_COLLECTING);
2165 		aggr_disable_collecting(sc, p);
2166 		*ntt = 1;
2167 		break;
2168 
2169 	case LACP_MUX_S_COLLECTING:
2170 		/*
2171 		 * Enable_Collecting();
2172 		 * Actor.Collecting = TRUE;
2173 		 * Disable_Distributing();
2174 		 * Actor.Distributing = FALSE;
2175 		 * NTT = TRUE;
2176 		 */
2177 		aggr_enable_collecting(sc, p);
2178 		SET(p->p_actor_state, LACP_STATE_COLLECTING);
2179 		aggr_disable_distributing(sc, p);
2180 		CLR(p->p_actor_state, LACP_STATE_DISTRIBUTING);
2181 		*ntt = 1;
2182 		break;
2183 	case LACP_MUX_S_DISTRIBUTING:
2184 		/*
2185 		 * Actor.Distributing = TRUE;
2186 		 * Enable_Distributing();
2187 		 */
2188 		SET(p->p_actor_state, LACP_STATE_DISTRIBUTING);
2189 		aggr_enable_distributing(sc, p);
2190 		break;
2191 	}
2192 
2193 	p->p_mux_state = nstate;
2194 
2195 	return (1);
2196 }
2197 
2198 static void
2199 aggr_rxm_ev(struct aggr_softc *sc, struct aggr_port *p,
2200     enum lacp_rxm_event ev, const struct lacp_du *lacpdu)
2201 {
2202 	unsigned int port_disabled = 0;
2203 	enum lacp_rxm_state nstate = LACP_RXM_S_BEGIN;
2204 
2205 	KASSERT((ev == LACP_RXM_E_LACPDU) == (lacpdu != NULL));
2206 
2207 	/* global transitions */
2208 
2209 	switch (ev) {
2210 	case LACP_RXM_E_NOT_PORT_ENABLED:
2211 		port_disabled = !aggr_port_moved(sc, p);
2212 		break;
2213 	case LACP_RXM_E_NOT_PORT_MOVED:
2214 		port_disabled = !aggr_port_enabled(p);
2215 		break;
2216 	default:
2217 		break;
2218 	}
2219 
2220 	if (port_disabled)
2221 		nstate = LACP_RXM_S_PORT_DISABLED;
2222 	else switch (p->p_rxm_state) { /* local state transitions */
2223 	case LACP_RXM_S_BEGIN:
2224 		KASSERT(ev == LACP_RXM_E_BEGIN);
2225 		nstate = LACP_RXM_S_INITIALIZE;
2226 		break;
2227 	case LACP_RXM_S_INITIALIZE:
2228 		/* this should only be handled via UCT in nstate handling */
2229 		panic("unexpected rxm state INITIALIZE");
2230 
2231 	case LACP_RXM_S_PORT_DISABLED:
2232 		switch (ev) {
2233 		case LACP_RXM_E_PORT_MOVED:
2234 			nstate = LACP_RXM_S_INITIALIZE;
2235 			break;
2236 		case LACP_RXM_E_PORT_ENABLED:
2237 			nstate = aggr_lacp_enabled(sc) ?
2238 			    LACP_RXM_S_EXPIRED : LACP_RXM_S_LACP_DISABLED;
2239 			break;
2240 		case LACP_RXM_E_LACP_ENABLED:
2241 			if (!aggr_port_enabled(p))
2242 				return;
2243 			nstate = LACP_RXM_S_EXPIRED;
2244 			break;
2245 		case LACP_RXM_E_NOT_LACP_ENABLED:
2246 			if (!aggr_port_enabled(p))
2247 				return;
2248 			nstate = LACP_RXM_S_LACP_DISABLED;
2249 			break;
2250 		default:
2251 			return;
2252 		}
2253 		break;
2254 	case LACP_RXM_S_EXPIRED:
2255 		switch (ev) {
2256 		case LACP_RXM_E_LACPDU:
2257 			nstate = LACP_RXM_S_CURRENT;
2258 			break;
2259 		case LACP_RXM_E_TIMER_EXPIRED:
2260 			nstate = LACP_RXM_S_DEFAULTED;
2261 			break;
2262 		default:
2263 			return;
2264 		}
2265 		break;
2266 	case LACP_RXM_S_LACP_DISABLED:
2267 		switch (ev) {
2268 		case LACP_RXM_E_LACP_ENABLED:
2269 			nstate = LACP_RXM_S_PORT_DISABLED;
2270 			break;
2271 		default:
2272 			return;
2273 		}
2274 		break;
2275 	case LACP_RXM_S_DEFAULTED:
2276 		switch (ev) {
2277 		case LACP_RXM_E_LACPDU:
2278 			nstate = LACP_RXM_S_CURRENT;
2279 			break;
2280 		default:
2281 			return;
2282 		}
2283 		break;
2284 	case LACP_RXM_S_CURRENT:
2285 		switch (ev) {
2286 		case LACP_RXM_E_TIMER_EXPIRED:
2287 			nstate = LACP_RXM_S_EXPIRED;
2288 			break;
2289 		case LACP_RXM_E_LACPDU:
2290 			nstate = LACP_RXM_S_CURRENT;
2291 			break;
2292 		default:
2293 			return;
2294 		}
2295 		break;
2296 	}
2297 
2298 uct:
2299 	if (p->p_rxm_state != nstate) {
2300 		DPRINTF(sc, "%s %s rxm: %s (%s) -> %s\n",
2301 		    sc->sc_if.if_xname, p->p_ifp0->if_xname,
2302 		    lacp_rxm_state_names[p->p_rxm_state],
2303 		    lacp_rxm_event_names[ev],
2304 		    lacp_rxm_state_names[nstate]);
2305 	}
2306 
2307 	/* record the new state */
2308 	p->p_rxm_state = nstate;
2309 
2310 	/* act on the new state */
2311 	switch (nstate) {
2312 	case LACP_RXM_S_BEGIN:
2313 		panic("unexpected rxm nstate BEGIN");
2314 		/* NOTREACHED */
2315 	case LACP_RXM_S_INITIALIZE:
2316 		/*
2317 		 * Selected = UNSELECTED;
2318 		 * recordDefault();
2319 		 * Actor_Oper_Port_State.Expired = FALSE;
2320 		 * port_moved = FALSE;
2321 		 */
2322 		aggr_unselected(p);
2323 		aggr_record_default(sc, p);
2324 		CLR(p->p_actor_state, LACP_STATE_EXPIRED);
2325 
2326 		ev = LACP_RXM_E_UCT;
2327 		nstate = LACP_RXM_S_PORT_DISABLED;
2328 		goto uct;
2329 		/* NOTREACHED */
2330 	case LACP_RXM_S_PORT_DISABLED:
2331 		/*
2332 		 * Partner_Oper_Port_State.Synchronization = FALSE;
2333 		 */
2334 		CLR(p->p_partner_state, LACP_STATE_SYNC);
2335 		aggr_mux(sc, p, LACP_MUX_E_NOT_SYNC);
2336 		break;
2337 	case LACP_RXM_S_EXPIRED:
2338 		/*
2339 		 * Partner_Oper_Port_State.Synchronization = FALSE;
2340 		 * Partner_Oper_Port_State.LACP_Timeout = Short Timeout;
2341 		 * start current_while_timer(Short Timeout);
2342 		 * Actor_Oper_Port_State.Expired = TRUE;
2343 		 */
2344 
2345 		CLR(p->p_partner_state, LACP_STATE_SYNC);
2346 		aggr_mux(sc, p, LACP_MUX_E_NOT_SYNC);
2347 		aggr_set_partner_timeout(p, AGGR_LACP_TIMEOUT_FAST);
2348 		aggr_start_current_while_timer(p, AGGR_LACP_TIMEOUT_FAST);
2349 		SET(p->p_actor_state, LACP_STATE_EXPIRED);
2350 
2351 		break;
2352 	case LACP_RXM_S_LACP_DISABLED:
2353 		/*
2354 		 * Selected = UNSELECTED;
2355 		 * recordDefault();
2356 		 * Partner_Oper_Port_State.Aggregation = FALSE;
2357 		 * Actor_Oper_Port_State.Expired = FALSE;
2358 		 */
2359 		aggr_unselected(p);
2360 		aggr_record_default(sc, p);
2361 		CLR(p->p_partner_state, LACP_STATE_AGGREGATION);
2362 		CLR(p->p_actor_state, LACP_STATE_EXPIRED);
2363 		break;
2364 	case LACP_RXM_S_DEFAULTED:
2365 		/*
2366 		 * update_Default_Selected();
2367 		 * recordDefault();
2368 		 * Actor_Oper_Port_State.Expired = FALSE;
2369 		 */
2370 		aggr_update_default_selected(sc, p);
2371 		aggr_record_default(sc, p);
2372 		CLR(p->p_actor_state, LACP_STATE_EXPIRED);
2373 		break;
2374 	case LACP_RXM_S_CURRENT: {
2375 		/*
2376 		 * update_Selected();
2377 		 * update_NTT();
2378 		 * if (Actor_System_LACP_Version >=2 ) recordVersionNumber();
2379 		 * recordPDU();
2380 		 * start current_while_timer(
2381 		 *     Actor_Oper_Port_State.LACP_Timeout);
2382 		 * Actor_Oper_Port_State.Expired = FALSE;
2383 		 */
2384 		int sync;
2385 
2386 		aggr_update_selected(sc, p, lacpdu);
2387 		sync = aggr_update_ntt(p, lacpdu);
2388 		/* don't support v2 yet */
2389 		aggr_recordpdu(p, lacpdu, sync);
2390 		aggr_start_current_while_timer(p, sc->sc_lacp_timeout);
2391 		CLR(p->p_actor_state, LACP_STATE_EXPIRED);
2392 
2393 		if (p->p_selected == AGGR_PORT_UNSELECTED)
2394 			aggr_selection_logic(sc, p); /* restart */
2395 
2396 		}
2397 		break;
2398 	}
2399 }
2400 
2401 static int
2402 aggr_up(struct aggr_softc *sc)
2403 {
2404 	struct ifnet *ifp = &sc->sc_if;
2405 	struct aggr_port *p;
2406 
2407 	NET_ASSERT_LOCKED();
2408 	KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING));
2409 
2410 	SET(ifp->if_flags, IFF_RUNNING); /* LACP_Enabled = TRUE */
2411 
2412 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2413 		aggr_rxm(sc, p, LACP_RXM_E_LACP_ENABLED);
2414 		aggr_p_linkch(p);
2415 	}
2416 
2417 	/* start the Periodic Transmission machine */
2418 	if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE) {
2419 		TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2420 			if (!aggr_port_enabled(p))
2421 				continue;
2422 
2423 			timeout_add_sec(&p->p_ptm_tx,
2424 			    aggr_periodic_times[sc->sc_lacp_timeout]);
2425 		}
2426 	}
2427 
2428 	return (ENETRESET);
2429 }
2430 
2431 static int
2432 aggr_iff(struct aggr_softc *sc)
2433 {
2434 	struct ifnet *ifp = &sc->sc_if;
2435 	unsigned int promisc = ISSET(ifp->if_flags, IFF_PROMISC);
2436 
2437 	NET_ASSERT_LOCKED();
2438 
2439 	if (promisc != sc->sc_promisc) {
2440 		struct aggr_port *p;
2441 
2442 		rw_enter_read(&sc->sc_lock);
2443 		TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2444 			struct ifnet *ifp0 = p->p_ifp0;
2445 			if (ifpromisc(ifp0, promisc) != 0) {
2446 				log(LOG_WARNING, "%s iff %s: "
2447 				    "unable to turn promisc %s\n",
2448 				    ifp->if_xname, ifp0->if_xname,
2449 				    promisc ? "on" : "off");
2450 			}
2451 		}
2452 		rw_exit_read(&sc->sc_lock);
2453 
2454 		sc->sc_promisc = promisc;
2455 	}
2456 
2457 	return (0);
2458 }
2459 
2460 static int
2461 aggr_down(struct aggr_softc *sc)
2462 {
2463 	struct ifnet *ifp = &sc->sc_if;
2464 	struct aggr_port *p;
2465 
2466 	NET_ASSERT_LOCKED();
2467 	CLR(ifp->if_flags, IFF_RUNNING); /* LACP_Enabled = FALSE */
2468 
2469 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2470 		aggr_rxm(sc, p, LACP_RXM_E_NOT_LACP_ENABLED);
2471 
2472 		/* stop the Periodic Transmission machine */
2473 		timeout_del(&p->p_ptm_tx);
2474 
2475 		/* stop the Mux machine */
2476 		aggr_mux(sc, p, LACP_MUX_E_UNSELECTED);
2477 
2478 		/* stop the Transmit machine */
2479 		timeout_del(&p->p_txm_ntt);
2480 	}
2481 
2482 	KASSERT(TAILQ_EMPTY(&sc->sc_distributing));
2483 	KASSERT(sc->sc_ndistributing == 0);
2484 	KASSERT(SMR_PTR_GET_LOCKED(&sc->sc_map) == NULL);
2485 
2486 	return (ENETRESET);
2487 }
2488 
2489 static int
2490 aggr_set_lladdr(struct aggr_softc *sc, const struct ifreq *ifr)
2491 {
2492 	struct ifnet *ifp = &sc->sc_if;
2493 	struct aggr_port *p;
2494 	const uint8_t *lladdr = ifr->ifr_addr.sa_data;
2495 
2496 	rw_enter_read(&sc->sc_lock);
2497 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2498 		if (aggr_p_setlladdr(p, lladdr) != 0) {
2499 			struct ifnet *ifp0 = p->p_ifp0;
2500 			log(LOG_WARNING, "%s setlladdr %s: "
2501 			    "unable to set lladdr\n",
2502 			    ifp->if_xname, ifp0->if_xname);
2503 		}
2504 	}
2505 	rw_exit_read(&sc->sc_lock);
2506 
2507 	return (0);
2508 }
2509 
2510 static int
2511 aggr_set_mtu(struct aggr_softc *sc, uint32_t mtu)
2512 {
2513 	struct ifnet *ifp = &sc->sc_if;
2514 	struct aggr_port *p;
2515 
2516 	if (mtu < ETHERMIN || mtu > ifp->if_hardmtu)
2517 		return (EINVAL);
2518 
2519 	ifp->if_mtu = mtu;
2520 
2521 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2522 		if (aggr_p_set_mtu(p, mtu) != 0) {
2523 			struct ifnet *ifp0 = p->p_ifp0;
2524 			log(LOG_WARNING, "%s %s: unable to set mtu %u\n",
2525 			    ifp->if_xname, ifp0->if_xname, mtu);
2526 		}
2527 	}
2528 
2529 	return (0);
2530 }
2531 
2532 static int
2533 aggr_group(struct aggr_softc *sc, struct aggr_port *p, u_long cmd)
2534 {
2535 	struct ifnet *ifp0 = p->p_ifp0;
2536 	struct ifreq ifr;
2537 	struct sockaddr *sa;
2538 
2539 	memset(&ifr, 0, sizeof(ifr));
2540 
2541 	/* make it convincing */
2542 	CTASSERT(sizeof(ifr.ifr_name) == sizeof(ifp0->if_xname));
2543 	memcpy(ifr.ifr_name, ifp0->if_xname, sizeof(ifr.ifr_name));
2544 
2545 	sa = &ifr.ifr_addr;
2546 	CTASSERT(sizeof(sa->sa_data) >= sizeof(lacp_address_slow));
2547 
2548 	sa->sa_family = AF_UNSPEC;
2549 	memcpy(sa->sa_data, lacp_address_slow, sizeof(lacp_address_slow));
2550 
2551 	return ((*p->p_ioctl)(ifp0, cmd, (caddr_t)&ifr));
2552 }
2553 
2554 static int
2555 aggr_multi(struct aggr_softc *sc, struct aggr_port *p,
2556     const struct aggr_multiaddr *ma, u_long cmd)
2557 {
2558 	struct ifnet *ifp0 = p->p_ifp0;
2559 	struct {
2560 		char			if_name[IFNAMSIZ];
2561 		struct sockaddr_storage if_addr;
2562 	} ifr;
2563 
2564 	memset(&ifr, 0, sizeof(ifr));
2565 
2566 	/* make it convincing */
2567 	CTASSERT(sizeof(ifr.if_name) == sizeof(ifp0->if_xname));
2568 	memcpy(ifr.if_name, ifp0->if_xname, sizeof(ifr.if_name));
2569 
2570 	ifr.if_addr = ma->m_addr;
2571 
2572 	return ((*p->p_ioctl)(ifp0, cmd, (caddr_t)&ifr));
2573 }
2574 
2575 static void
2576 aggr_media_status(struct ifnet *ifp, struct ifmediareq *imr)
2577 {
2578 	struct aggr_softc *sc = ifp->if_softc;
2579 
2580 	imr->ifm_status = IFM_AVALID;
2581 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
2582 
2583 	smr_read_enter(); /* there's no reason to block... */
2584 	if (SMR_PTR_GET(&sc->sc_map) != NULL)
2585 		imr->ifm_status |= IFM_ACTIVE;
2586 	smr_read_leave();
2587 }
2588 
2589 static int
2590 aggr_media_change(struct ifnet *ifp)
2591 {
2592 	return (EOPNOTSUPP);
2593 }
2594 
2595 static uint32_t
2596 aggr_hardmtu(struct aggr_softc *sc)
2597 {
2598 	struct aggr_port *p;
2599 	uint32_t hardmtu = ETHER_MAX_HARDMTU_LEN;
2600 
2601 	rw_enter_read(&sc->sc_lock);
2602 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2603 		struct ifnet *ifp0 = p->p_ifp0;
2604 
2605 		if (ifp0->if_hardmtu < hardmtu)
2606 			hardmtu = ifp0->if_hardmtu;
2607 	}
2608 	rw_exit_read(&sc->sc_lock);
2609 
2610 	return (hardmtu);
2611 }
2612 
2613 static void
2614 aggr_ptm_tx(void *arg)
2615 {
2616 	struct aggr_port *p = arg;
2617 	unsigned int timeout;
2618 
2619 	aggr_ntt(p);
2620 
2621 	timeout = ISSET(p->p_partner_state, LACP_STATE_TIMEOUT) ?
2622 	    AGGR_LACP_TIMEOUT_FAST : AGGR_LACP_TIMEOUT_SLOW;
2623 	timeout_add_sec(&p->p_ptm_tx, aggr_periodic_times[timeout]);
2624 }
2625 
2626 static inline void
2627 aggr_lacp_tlv_set(struct lacp_tlv_hdr *tlv, uint8_t type, uint8_t len)
2628 {
2629 	tlv->lacp_tlv_type = type;
2630 	tlv->lacp_tlv_length = sizeof(*tlv) + len;
2631 }
2632 
2633 static void
2634 aggr_ntt_transmit(struct aggr_port *p)
2635 {
2636 	struct aggr_softc *sc = p->p_aggr;
2637 	struct arpcom *ac = &sc->sc_ac;
2638 	struct ifnet *ifp = &sc->sc_if;
2639 	struct ifnet *ifp0 = p->p_ifp0;
2640 	struct mbuf *m;
2641 	struct lacp_du *lacpdu;
2642 	struct lacp_port_info *pi;
2643 	struct lacp_collector_info *ci;
2644 	struct ether_header *eh;
2645 	int linkhdr = max_linkhdr + ETHER_ALIGN;
2646 	int len = linkhdr + sizeof(*eh) + sizeof(*lacpdu);
2647 
2648 	m = m_gethdr(M_DONTWAIT, MT_DATA);
2649 	if (m == NULL)
2650 		return;
2651 
2652 	if (len > MHLEN) {
2653 		MCLGETI(m, M_DONTWAIT, NULL, len);
2654 		if (!ISSET(m->m_flags, M_EXT)) {
2655 			m_freem(m);
2656 			return;
2657 		}
2658 	}
2659 
2660 	m->m_pkthdr.pf.prio = sc->sc_if.if_llprio;
2661 	m->m_pkthdr.len = m->m_len = len;
2662 	memset(m->m_data, 0, m->m_len);
2663 	m_adj(m, linkhdr);
2664 
2665 	eh = mtod(m, struct ether_header *);
2666 
2667 	CTASSERT(sizeof(eh->ether_dhost) == sizeof(lacp_address_slow));
2668 	CTASSERT(sizeof(eh->ether_shost) == sizeof(ac->ac_enaddr));
2669 
2670 	memcpy(eh->ether_dhost, lacp_address_slow, sizeof(eh->ether_dhost));
2671 	memcpy(eh->ether_shost, ac->ac_enaddr, sizeof(eh->ether_shost));
2672 	eh->ether_type = htons(ETHERTYPE_SLOW);
2673 
2674 	lacpdu = (struct lacp_du *)(eh + 1);
2675 	lacpdu->lacp_du_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP;
2676 	lacpdu->lacp_du_sph.sph_version = LACP_VERSION;
2677 
2678 	pi = &lacpdu->lacp_actor_info;
2679 	aggr_lacp_tlv_set(&lacpdu->lacp_actor_info_tlv,
2680 	    LACP_T_ACTOR, sizeof(*pi));
2681 
2682 	pi->lacp_sysid.lacp_sysid_priority = htons(sc->sc_lacp_prio);
2683 	CTASSERT(sizeof(pi->lacp_sysid.lacp_sysid_mac) ==
2684 	    sizeof(ac->ac_enaddr));
2685 	memcpy(pi->lacp_sysid.lacp_sysid_mac, ac->ac_enaddr,
2686 	    sizeof(pi->lacp_sysid.lacp_sysid_mac));
2687 
2688 	pi->lacp_key = htons(ifp->if_index);
2689 
2690 	pi->lacp_portid.lacp_portid_priority = htons(sc->sc_lacp_port_prio);
2691 	pi->lacp_portid.lacp_portid_number = htons(ifp0->if_index);
2692 
2693 	pi->lacp_state = p->p_actor_state;
2694 	if (sc->sc_lacp_mode)
2695 		SET(pi->lacp_state, LACP_STATE_ACTIVITY);
2696 	if (sc->sc_lacp_timeout)
2697 		SET(pi->lacp_state, LACP_STATE_TIMEOUT);
2698 
2699 	pi = &lacpdu->lacp_partner_info;
2700 	aggr_lacp_tlv_set(&lacpdu->lacp_partner_info_tlv,
2701 	    LACP_T_PARTNER, sizeof(*pi));
2702 
2703 	*pi = p->p_partner;
2704 
2705 	ci = &lacpdu->lacp_collector_info;
2706 	aggr_lacp_tlv_set(&lacpdu->lacp_collector_info_tlv,
2707 	    LACP_T_COLLECTOR, sizeof(*ci));
2708 	ci->lacp_maxdelay = htons(0);
2709 
2710 	lacpdu->lacp_terminator.lacp_tlv_type = LACP_T_TERMINATOR;
2711 	lacpdu->lacp_terminator.lacp_tlv_length = 0;
2712 
2713 	(void)if_enqueue(ifp0, m);
2714 }
2715 
2716 static void
2717 aggr_ntt(struct aggr_port *p)
2718 {
2719 	if (!timeout_pending(&p->p_txm_ntt))
2720 		timeout_add(&p->p_txm_ntt, 0);
2721 }
2722 
2723 static void
2724 aggr_transmit_machine(void *arg)
2725 {
2726 	struct aggr_port *p = arg;
2727 	struct aggr_softc *sc = p->p_aggr;
2728 	unsigned int slot;
2729 	int *log;
2730 	int period = hz * LACP_FAST_PERIODIC_TIME;
2731 	int diff;
2732 
2733 	if (!aggr_lacp_enabled(sc) || !aggr_port_enabled(p))
2734 		return;
2735 
2736 	slot = p->p_txm_slot;
2737 	log = &p->p_txm_log[slot % nitems(p->p_txm_log)];
2738 
2739 	diff = ticks - *log;
2740 	if (diff < period) {
2741 		timeout_add(&p->p_txm_ntt, period - diff);
2742 		return;
2743 	}
2744 
2745 	*log = ticks;
2746 	p->p_txm_slot = ++slot;
2747 
2748 #if 0
2749 	DPRINTF(sc, "%s %s ntt\n", sc->sc_if.if_xname, p->p_ifp0->if_xname);
2750 #endif
2751 
2752 	aggr_ntt_transmit(p);
2753 }
2754 
2755 static void
2756 aggr_set_lacp_mode(struct aggr_softc *sc, int mode)
2757 {
2758 	sc->sc_lacp_mode = mode;
2759 
2760 	if (mode == AGGR_LACP_MODE_PASSIVE) {
2761 		struct aggr_port *p;
2762 
2763 		TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2764 			if (!ISSET(p->p_partner_state, LACP_STATE_ACTIVITY))
2765 				timeout_del(&p->p_ptm_tx);
2766 		}
2767 	}
2768 }
2769 
2770 static void
2771 aggr_set_partner_timeout(struct aggr_port *p, int timeout)
2772 {
2773 	uint8_t ostate = ISSET(p->p_partner_state, LACP_STATE_TIMEOUT);
2774 	uint8_t nstate = (timeout == AGGR_LACP_TIMEOUT_FAST) ?
2775 	    LACP_STATE_TIMEOUT : 0;
2776 
2777 	if (ostate == nstate)
2778 		return;
2779 
2780 	if (timeout == AGGR_LACP_TIMEOUT_FAST) {
2781 		SET(p->p_partner_state, LACP_STATE_TIMEOUT);
2782 		timeout_add_sec(&p->p_ptm_tx,
2783 		    aggr_periodic_times[AGGR_LACP_TIMEOUT_FAST]);
2784 	} else
2785 		CLR(p->p_partner_state, LACP_STATE_TIMEOUT);
2786 }
2787 
2788 static void
2789 aggr_set_lacp_timeout(struct aggr_softc *sc, int timeout)
2790 {
2791 	struct aggr_port *p;
2792 
2793 	sc->sc_lacp_timeout = timeout;
2794 
2795 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2796 		if (!ISSET(p->p_actor_state, LACP_STATE_DEFAULTED))
2797 			continue;
2798 
2799 		aggr_set_partner_timeout(p, timeout);
2800 	}
2801 }
2802 
2803 static int
2804 aggr_multi_eq(const struct aggr_multiaddr *ma,
2805     const uint8_t *addrlo, const uint8_t *addrhi)
2806 {
2807 	return (ETHER_IS_EQ(ma->m_addrlo, addrlo) &&
2808 	    ETHER_IS_EQ(ma->m_addrhi, addrhi));
2809 }
2810 
2811 static int
2812 aggr_multi_add(struct aggr_softc *sc, struct ifreq *ifr)
2813 {
2814 	struct ifnet *ifp = &sc->sc_if;
2815 	struct aggr_port *p;
2816 	struct aggr_multiaddr *ma;
2817 	uint8_t addrlo[ETHER_ADDR_LEN];
2818 	uint8_t addrhi[ETHER_ADDR_LEN];
2819 	int error;
2820 
2821 	error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
2822 	if (error != 0)
2823 		return (error);
2824 
2825 	TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) {
2826 		if (aggr_multi_eq(ma, addrlo, addrhi)) {
2827 			ma->m_refs++;
2828 			return (0);
2829 		}
2830 	}
2831 
2832 	ma = malloc(sizeof(*ma), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
2833 	if (ma == NULL)
2834 		return (ENOMEM);
2835 
2836 	ma->m_refs = 1;
2837 	memcpy(&ma->m_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len);
2838 	memcpy(ma->m_addrlo, addrlo, sizeof(ma->m_addrlo));
2839 	memcpy(ma->m_addrhi, addrhi, sizeof(ma->m_addrhi));
2840 	TAILQ_INSERT_TAIL(&sc->sc_multiaddrs, ma, m_entry);
2841 
2842 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2843 		struct ifnet *ifp0 = p->p_ifp0;
2844 
2845 		if (aggr_multi(sc, p, ma, SIOCADDMULTI) != 0) {
2846 			log(LOG_WARNING, "%s %s: "
2847 			    "unable to add multicast address\n",
2848 			    ifp->if_xname, ifp0->if_xname);
2849 		}
2850 	}
2851 
2852 	return (0);
2853 }
2854 
2855 int
2856 aggr_multi_del(struct aggr_softc *sc, struct ifreq *ifr)
2857 {
2858 	struct ifnet *ifp = &sc->sc_if;
2859 	struct aggr_port *p;
2860 	struct aggr_multiaddr *ma;
2861 	uint8_t addrlo[ETHER_ADDR_LEN];
2862 	uint8_t addrhi[ETHER_ADDR_LEN];
2863 	int error;
2864 
2865 	error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
2866 	if (error != 0)
2867 		return (error);
2868 
2869 	TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) {
2870 		if (aggr_multi_eq(ma, addrlo, addrhi))
2871 			break;
2872 	}
2873 
2874 	if (ma == NULL)
2875 		return (EINVAL);
2876 
2877 	if (--ma->m_refs > 0)
2878 		return (0);
2879 
2880 	TAILQ_REMOVE(&sc->sc_multiaddrs, ma, m_entry);
2881 
2882 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2883 		struct ifnet *ifp0 = p->p_ifp0;
2884 
2885 		if (aggr_multi(sc, p, ma, SIOCDELMULTI) != 0) {
2886 			log(LOG_WARNING, "%s %s: "
2887 			    "unable to delete multicast address\n",
2888 			    ifp->if_xname, ifp0->if_xname);
2889 		}
2890 	}
2891 
2892 	free(ma, M_DEVBUF, sizeof(*ma));
2893 
2894 	return (0);
2895 }
2896