xref: /dpdk/drivers/net/bonding/rte_eth_bond_pmd.c (revision 4da0705bf896327af062212b5a1e6cb1f1366aa5)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <stdbool.h>
6 #include <netinet/in.h>
7 
8 #include <rte_bitops.h>
9 #include <rte_mbuf.h>
10 #include <rte_malloc.h>
11 #include <ethdev_driver.h>
12 #include <ethdev_vdev.h>
13 #include <rte_tcp.h>
14 #include <rte_udp.h>
15 #include <rte_ip.h>
16 #include <rte_ip_frag.h>
17 #include <rte_devargs.h>
18 #include <rte_kvargs.h>
19 #include <bus_vdev_driver.h>
20 #include <rte_alarm.h>
21 #include <rte_cycles.h>
22 #include <rte_string_fns.h>
23 
24 #include "rte_eth_bond.h"
25 #include "eth_bond_private.h"
26 #include "eth_bond_8023ad_private.h"
27 
28 #define REORDER_PERIOD_MS 10
29 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
30 #define BOND_MAX_MAC_ADDRS 16
31 
32 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
33 
34 /* Table for statistics in mode 5 TLB */
35 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
36 
37 static inline size_t
38 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
39 {
40 	size_t vlan_offset = 0;
41 
42 	if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
43 		rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
44 		struct rte_vlan_hdr *vlan_hdr =
45 			(struct rte_vlan_hdr *)(eth_hdr + 1);
46 
47 		vlan_offset = sizeof(struct rte_vlan_hdr);
48 		*proto = vlan_hdr->eth_proto;
49 
50 		if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
51 			vlan_hdr = vlan_hdr + 1;
52 			*proto = vlan_hdr->eth_proto;
53 			vlan_offset += sizeof(struct rte_vlan_hdr);
54 		}
55 	}
56 	return vlan_offset;
57 }
58 
59 static uint16_t
60 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
61 {
62 	struct bond_dev_private *internals;
63 
64 	uint16_t num_rx_total = 0;
65 	uint16_t member_count;
66 	uint16_t active_member;
67 	int i;
68 
69 	/* Cast to structure, containing bonding device's port id and queue id */
70 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
71 	internals = bd_rx_q->dev_private;
72 	member_count = internals->active_member_count;
73 	active_member = bd_rx_q->active_member;
74 
75 	for (i = 0; i < member_count && nb_pkts; i++) {
76 		uint16_t num_rx_member;
77 
78 		/*
79 		 * Offset of pointer to *bufs increases as packets are received
80 		 * from other members.
81 		 */
82 		num_rx_member =
83 			rte_eth_rx_burst(internals->active_members[active_member],
84 					 bd_rx_q->queue_id,
85 					 bufs + num_rx_total, nb_pkts);
86 		num_rx_total += num_rx_member;
87 		nb_pkts -= num_rx_member;
88 		if (++active_member >= member_count)
89 			active_member = 0;
90 	}
91 
92 	if (++bd_rx_q->active_member >= member_count)
93 		bd_rx_q->active_member = 0;
94 	return num_rx_total;
95 }
96 
97 static uint16_t
98 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
99 		uint16_t nb_pkts)
100 {
101 	struct bond_dev_private *internals;
102 
103 	/* Cast to structure, containing bonding device's port id and queue id */
104 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
105 
106 	internals = bd_rx_q->dev_private;
107 
108 	return rte_eth_rx_burst(internals->current_primary_port,
109 			bd_rx_q->queue_id, bufs, nb_pkts);
110 }
111 
112 static inline uint8_t
113 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
114 {
115 	const uint16_t ether_type_slow_be =
116 		rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
117 
118 	return !((mbuf->ol_flags & RTE_MBUF_F_RX_VLAN) ? mbuf->vlan_tci : 0) &&
119 		(ethertype == ether_type_slow_be &&
120 		(subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
121 }
122 
123 /*****************************************************************************
124  * Flow director's setup for mode 4 optimization
125  */
126 
127 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
128 	.hdr.dst_addr.addr_bytes = { 0 },
129 	.hdr.src_addr.addr_bytes = { 0 },
130 	.hdr.ether_type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
131 };
132 
133 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
134 	.hdr.dst_addr.addr_bytes = { 0 },
135 	.hdr.src_addr.addr_bytes = { 0 },
136 	.hdr.ether_type = 0xFFFF,
137 };
138 
139 static struct rte_flow_item flow_item_8023ad[] = {
140 	{
141 		.type = RTE_FLOW_ITEM_TYPE_ETH,
142 		.spec = &flow_item_eth_type_8023ad,
143 		.last = NULL,
144 		.mask = &flow_item_eth_mask_type_8023ad,
145 	},
146 	{
147 		.type = RTE_FLOW_ITEM_TYPE_END,
148 		.spec = NULL,
149 		.last = NULL,
150 		.mask = NULL,
151 	}
152 };
153 
154 const struct rte_flow_attr flow_attr_8023ad = {
155 	.group = 0,
156 	.priority = 0,
157 	.ingress = 1,
158 	.egress = 0,
159 	.reserved = 0,
160 };
161 
162 int
163 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
164 		uint16_t member_port) {
165 	struct rte_eth_dev_info member_info;
166 	struct rte_flow_error error;
167 	struct bond_dev_private *internals = bond_dev->data->dev_private;
168 
169 	const struct rte_flow_action_queue lacp_queue_conf = {
170 		.index = 0,
171 	};
172 
173 	const struct rte_flow_action actions[] = {
174 		{
175 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
176 			.conf = &lacp_queue_conf
177 		},
178 		{
179 			.type = RTE_FLOW_ACTION_TYPE_END,
180 		}
181 	};
182 
183 	int ret = rte_flow_validate(member_port, &flow_attr_8023ad,
184 			flow_item_8023ad, actions, &error);
185 	if (ret < 0) {
186 		RTE_BOND_LOG(ERR, "%s: %s (member_port=%d queue_id=%d)",
187 				__func__, error.message, member_port,
188 				internals->mode4.dedicated_queues.rx_qid);
189 		return -1;
190 	}
191 
192 	ret = rte_eth_dev_info_get(member_port, &member_info);
193 	if (ret != 0) {
194 		RTE_BOND_LOG(ERR,
195 			"%s: Error during getting device (port %u) info: %s",
196 			__func__, member_port, strerror(-ret));
197 
198 		return ret;
199 	}
200 
201 	if (member_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
202 			member_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
203 		RTE_BOND_LOG(ERR,
204 			"%s: Member %d capabilities doesn't allow allocating additional queues",
205 			__func__, member_port);
206 		return -1;
207 	}
208 
209 	return 0;
210 }
211 
212 int
213 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
214 	struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
215 	struct bond_dev_private *internals = bond_dev->data->dev_private;
216 	struct rte_eth_dev_info bond_info;
217 	uint16_t idx;
218 	int ret;
219 
220 	/* Verify if all members in bonding supports flow director and */
221 	if (internals->member_count > 0) {
222 		ret = rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
223 		if (ret != 0) {
224 			RTE_BOND_LOG(ERR,
225 				"%s: Error during getting device (port %u) info: %s",
226 				__func__, bond_dev->data->port_id,
227 				strerror(-ret));
228 
229 			return ret;
230 		}
231 
232 		internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
233 		internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
234 
235 		for (idx = 0; idx < internals->member_count; idx++) {
236 			if (bond_ethdev_8023ad_flow_verify(bond_dev,
237 					internals->members[idx].port_id) != 0)
238 				return -1;
239 		}
240 	}
241 
242 	return 0;
243 }
244 
245 int
246 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t member_port) {
247 
248 	struct rte_flow_error error;
249 	struct bond_dev_private *internals = bond_dev->data->dev_private;
250 	struct rte_flow_action_queue lacp_queue_conf = {
251 		.index = internals->mode4.dedicated_queues.rx_qid,
252 	};
253 
254 	const struct rte_flow_action actions[] = {
255 		{
256 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
257 			.conf = &lacp_queue_conf
258 		},
259 		{
260 			.type = RTE_FLOW_ACTION_TYPE_END,
261 		}
262 	};
263 
264 	internals->mode4.dedicated_queues.flow[member_port] = rte_flow_create(member_port,
265 			&flow_attr_8023ad, flow_item_8023ad, actions, &error);
266 	if (internals->mode4.dedicated_queues.flow[member_port] == NULL) {
267 		RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
268 				"(member_port=%d queue_id=%d)",
269 				error.message, member_port,
270 				internals->mode4.dedicated_queues.rx_qid);
271 		return -1;
272 	}
273 
274 	return 0;
275 }
276 
277 static bool
278 is_bond_mac_addr(const struct rte_ether_addr *ea,
279 		 const struct rte_ether_addr *mac_addrs, uint32_t max_mac_addrs)
280 {
281 	uint32_t i;
282 
283 	for (i = 0; i < max_mac_addrs; i++) {
284 		/* skip zero address */
285 		if (rte_is_zero_ether_addr(&mac_addrs[i]))
286 			continue;
287 
288 		if (rte_is_same_ether_addr(ea, &mac_addrs[i]))
289 			return true;
290 	}
291 
292 	return false;
293 }
294 
295 static inline uint16_t
296 rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts,
297 		bool dedicated_rxq)
298 {
299 	/* Cast to structure, containing bonding device's port id and queue id */
300 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
301 	struct bond_dev_private *internals = bd_rx_q->dev_private;
302 	struct rte_eth_dev *bonding_eth_dev =
303 					&rte_eth_devices[internals->port_id];
304 	struct rte_ether_addr *bond_mac = bonding_eth_dev->data->mac_addrs;
305 	struct rte_ether_hdr *hdr;
306 
307 	const uint16_t ether_type_slow_be =
308 		rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
309 	uint16_t num_rx_total = 0;	/* Total number of received packets */
310 	uint16_t members[RTE_MAX_ETHPORTS];
311 	uint16_t member_count, idx;
312 
313 	uint8_t collecting;  /* current member collecting status */
314 	const uint8_t promisc = rte_eth_promiscuous_get(internals->port_id);
315 	const uint8_t allmulti = rte_eth_allmulticast_get(internals->port_id);
316 	uint8_t subtype;
317 	uint16_t i;
318 	uint16_t j;
319 	uint16_t k;
320 
321 	/* Copy member list to protect against member up/down changes during tx
322 	 * bursting */
323 	member_count = internals->active_member_count;
324 	memcpy(members, internals->active_members,
325 			sizeof(internals->active_members[0]) * member_count);
326 
327 	idx = bd_rx_q->active_member;
328 	if (idx >= member_count) {
329 		bd_rx_q->active_member = 0;
330 		idx = 0;
331 	}
332 	for (i = 0; i < member_count && num_rx_total < nb_pkts; i++) {
333 		j = num_rx_total;
334 		collecting = ACTOR_STATE(&bond_mode_8023ad_ports[members[idx]],
335 					 COLLECTING);
336 
337 		/* Read packets from this member */
338 		num_rx_total += rte_eth_rx_burst(members[idx], bd_rx_q->queue_id,
339 				&bufs[num_rx_total], nb_pkts - num_rx_total);
340 
341 		for (k = j; k < 2 && k < num_rx_total; k++)
342 			rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
343 
344 		/* Handle slow protocol packets. */
345 		while (j < num_rx_total) {
346 			if (j + 3 < num_rx_total)
347 				rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
348 
349 			hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
350 			subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
351 
352 			/* Remove packet from array if:
353 			 * - it is slow packet but no dedicated rxq is present,
354 			 * - member is not in collecting state,
355 			 * - bonding interface is not in promiscuous mode and
356 			 *   packet address isn't in mac_addrs array:
357 			 *   - packet is unicast,
358 			 *   - packet is multicast and bonding interface
359 			 *     is not in allmulti,
360 			 */
361 			if (unlikely(
362 				(!dedicated_rxq &&
363 				 is_lacp_packets(hdr->ether_type, subtype,
364 						 bufs[j])) ||
365 				!collecting ||
366 				(!promisc &&
367 				 !is_bond_mac_addr(&hdr->dst_addr, bond_mac,
368 						   BOND_MAX_MAC_ADDRS) &&
369 				 (rte_is_unicast_ether_addr(&hdr->dst_addr) ||
370 				  !allmulti)))) {
371 				if (hdr->ether_type == ether_type_slow_be) {
372 					bond_mode_8023ad_handle_slow_pkt(
373 					    internals, members[idx], bufs[j]);
374 				} else
375 					rte_pktmbuf_free(bufs[j]);
376 
377 				/* Packet is managed by mode 4 or dropped, shift the array */
378 				num_rx_total--;
379 				if (j < num_rx_total) {
380 					memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
381 						(num_rx_total - j));
382 				}
383 			} else
384 				j++;
385 		}
386 		if (unlikely(++idx == member_count))
387 			idx = 0;
388 	}
389 
390 	if (++bd_rx_q->active_member >= member_count)
391 		bd_rx_q->active_member = 0;
392 
393 	return num_rx_total;
394 }
395 
396 static uint16_t
397 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
398 		uint16_t nb_pkts)
399 {
400 	return rx_burst_8023ad(queue, bufs, nb_pkts, false);
401 }
402 
403 static uint16_t
404 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
405 		uint16_t nb_pkts)
406 {
407 	return rx_burst_8023ad(queue, bufs, nb_pkts, true);
408 }
409 
410 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
411 uint32_t burstnumberRX;
412 uint32_t burst_number_TX;
413 
414 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
415 
416 static void
417 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
418 {
419 	switch (arp_op) {
420 	case RTE_ARP_OP_REQUEST:
421 		strlcpy(buf, "ARP Request", buf_len);
422 		return;
423 	case RTE_ARP_OP_REPLY:
424 		strlcpy(buf, "ARP Reply", buf_len);
425 		return;
426 	case RTE_ARP_OP_REVREQUEST:
427 		strlcpy(buf, "Reverse ARP Request", buf_len);
428 		return;
429 	case RTE_ARP_OP_REVREPLY:
430 		strlcpy(buf, "Reverse ARP Reply", buf_len);
431 		return;
432 	case RTE_ARP_OP_INVREQUEST:
433 		strlcpy(buf, "Peer Identify Request", buf_len);
434 		return;
435 	case RTE_ARP_OP_INVREPLY:
436 		strlcpy(buf, "Peer Identify Reply", buf_len);
437 		return;
438 	default:
439 		break;
440 	}
441 	strlcpy(buf, "Unknown", buf_len);
442 	return;
443 }
444 #endif
445 #define MaxIPv4String	16
446 static void
447 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
448 {
449 	uint32_t ipv4_addr;
450 
451 	ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
452 	snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
453 		(ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
454 		ipv4_addr & 0xFF);
455 }
456 
457 #define MAX_CLIENTS_NUMBER	128
458 uint8_t active_clients;
459 struct client_stats_t {
460 	uint16_t port;
461 	uint32_t ipv4_addr;
462 	uint32_t ipv4_rx_packets;
463 	uint32_t ipv4_tx_packets;
464 };
465 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
466 
467 static void
468 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
469 {
470 	int i = 0;
471 
472 	for (; i < MAX_CLIENTS_NUMBER; i++)	{
473 		if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))	{
474 			/* Just update RX packets number for this client */
475 			if (TXorRXindicator == &burstnumberRX)
476 				client_stats[i].ipv4_rx_packets++;
477 			else
478 				client_stats[i].ipv4_tx_packets++;
479 			return;
480 		}
481 	}
482 	/* We have a new client. Insert him to the table, and increment stats */
483 	if (TXorRXindicator == &burstnumberRX)
484 		client_stats[active_clients].ipv4_rx_packets++;
485 	else
486 		client_stats[active_clients].ipv4_tx_packets++;
487 	client_stats[active_clients].ipv4_addr = addr;
488 	client_stats[active_clients].port = port;
489 	active_clients++;
490 
491 }
492 
493 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
494 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
495 	RTE_LOG_LINE(DEBUG, BOND,				\
496 		"%s port:%d SrcMAC:" RTE_ETHER_ADDR_PRT_FMT " SrcIP:%s " \
497 		"DstMAC:" RTE_ETHER_ADDR_PRT_FMT " DstIP:%s %s %d", \
498 		info,							\
499 		port,							\
500 		RTE_ETHER_ADDR_BYTES(&eth_h->src_addr),                  \
501 		src_ip,							\
502 		RTE_ETHER_ADDR_BYTES(&eth_h->dst_addr),                  \
503 		dst_ip,							\
504 		arp_op, ++burstnumber)
505 #endif
506 
507 static void
508 mode6_debug(const char __rte_unused *info,
509 	struct rte_ether_hdr *eth_h, uint16_t port,
510 	uint32_t __rte_unused *burstnumber)
511 {
512 	struct rte_ipv4_hdr *ipv4_h;
513 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
514 	struct rte_arp_hdr *arp_h;
515 	char dst_ip[16];
516 	char ArpOp[24];
517 	char buf[16];
518 #endif
519 	char src_ip[16];
520 
521 	uint16_t ether_type = eth_h->ether_type;
522 	uint16_t offset = get_vlan_offset(eth_h, &ether_type);
523 
524 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
525 	strlcpy(buf, info, 16);
526 #endif
527 
528 	if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
529 		ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
530 		ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
531 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
532 		ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
533 		MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
534 #endif
535 		update_client_stats(ipv4_h->src_addr, port, burstnumber);
536 	}
537 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
538 	else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
539 		arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
540 		ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
541 		ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
542 		arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
543 				ArpOp, sizeof(ArpOp));
544 		MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
545 	}
546 #endif
547 }
548 #endif
549 
550 static uint16_t
551 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
552 {
553 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
554 	struct bond_dev_private *internals = bd_rx_q->dev_private;
555 	struct rte_ether_hdr *eth_h;
556 	uint16_t ether_type, offset;
557 	uint16_t nb_recv_pkts;
558 	int i;
559 
560 	nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
561 
562 	for (i = 0; i < nb_recv_pkts; i++) {
563 		eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
564 		ether_type = eth_h->ether_type;
565 		offset = get_vlan_offset(eth_h, &ether_type);
566 
567 		if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
568 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
569 			mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
570 #endif
571 			bond_mode_alb_arp_recv(eth_h, offset, internals);
572 		}
573 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
574 		else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
575 			mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
576 #endif
577 	}
578 
579 	return nb_recv_pkts;
580 }
581 
582 static uint16_t
583 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
584 		uint16_t nb_pkts)
585 {
586 	struct bond_dev_private *internals;
587 	struct bond_tx_queue *bd_tx_q;
588 
589 	struct rte_mbuf *member_bufs[RTE_MAX_ETHPORTS][nb_pkts];
590 	uint16_t member_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
591 
592 	uint16_t num_of_members;
593 	uint16_t members[RTE_MAX_ETHPORTS];
594 
595 	uint16_t num_tx_total = 0, num_tx_member;
596 
597 	static int member_idx;
598 	int i, cmember_idx = 0, tx_fail_total = 0;
599 
600 	bd_tx_q = (struct bond_tx_queue *)queue;
601 	internals = bd_tx_q->dev_private;
602 
603 	/* Copy member list to protect against member up/down changes during tx
604 	 * bursting */
605 	num_of_members = internals->active_member_count;
606 	memcpy(members, internals->active_members,
607 			sizeof(internals->active_members[0]) * num_of_members);
608 
609 	if (num_of_members < 1)
610 		return num_tx_total;
611 
612 	/* Populate members mbuf with which packets are to be sent on it  */
613 	for (i = 0; i < nb_pkts; i++) {
614 		cmember_idx = (member_idx + i) % num_of_members;
615 		member_bufs[cmember_idx][(member_nb_pkts[cmember_idx])++] = bufs[i];
616 	}
617 
618 	/*
619 	 * increment current member index so the next call to tx burst starts on the
620 	 * next member.
621 	 */
622 	member_idx = ++cmember_idx;
623 
624 	/* Send packet burst on each member device */
625 	for (i = 0; i < num_of_members; i++) {
626 		if (member_nb_pkts[i] > 0) {
627 			num_tx_member = rte_eth_tx_prepare(members[i],
628 					bd_tx_q->queue_id, member_bufs[i],
629 					member_nb_pkts[i]);
630 			num_tx_member = rte_eth_tx_burst(members[i], bd_tx_q->queue_id,
631 					member_bufs[i], num_tx_member);
632 
633 			/* if tx burst fails move packets to end of bufs */
634 			if (unlikely(num_tx_member < member_nb_pkts[i])) {
635 				int tx_fail_member = member_nb_pkts[i] - num_tx_member;
636 
637 				tx_fail_total += tx_fail_member;
638 
639 				memcpy(&bufs[nb_pkts - tx_fail_total],
640 				       &member_bufs[i][num_tx_member],
641 				       tx_fail_member * sizeof(bufs[0]));
642 			}
643 			num_tx_total += num_tx_member;
644 		}
645 	}
646 
647 	return num_tx_total;
648 }
649 
650 static uint16_t
651 bond_ethdev_tx_burst_active_backup(void *queue,
652 		struct rte_mbuf **bufs, uint16_t nb_pkts)
653 {
654 	struct bond_dev_private *internals;
655 	struct bond_tx_queue *bd_tx_q;
656 	uint16_t nb_prep_pkts;
657 
658 	bd_tx_q = (struct bond_tx_queue *)queue;
659 	internals = bd_tx_q->dev_private;
660 
661 	if (internals->active_member_count < 1)
662 		return 0;
663 
664 	nb_prep_pkts = rte_eth_tx_prepare(internals->current_primary_port,
665 				bd_tx_q->queue_id, bufs, nb_pkts);
666 
667 	return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
668 			bufs, nb_prep_pkts);
669 }
670 
671 static inline uint16_t
672 ether_hash(struct rte_ether_hdr *eth_hdr)
673 {
674 	unaligned_uint16_t *word_src_addr =
675 		(unaligned_uint16_t *)eth_hdr->src_addr.addr_bytes;
676 	unaligned_uint16_t *word_dst_addr =
677 		(unaligned_uint16_t *)eth_hdr->dst_addr.addr_bytes;
678 
679 	return (word_src_addr[0] ^ word_dst_addr[0]) ^
680 			(word_src_addr[1] ^ word_dst_addr[1]) ^
681 			(word_src_addr[2] ^ word_dst_addr[2]);
682 }
683 
684 static inline uint32_t
685 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
686 {
687 	return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
688 }
689 
690 static inline uint32_t
691 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
692 {
693 	unaligned_uint32_t *word_src_addr = (unaligned_uint32_t *)&ipv6_hdr->src_addr;
694 	unaligned_uint32_t *word_dst_addr = (unaligned_uint32_t *)&ipv6_hdr->dst_addr;
695 
696 	return (word_src_addr[0] ^ word_dst_addr[0]) ^
697 			(word_src_addr[1] ^ word_dst_addr[1]) ^
698 			(word_src_addr[2] ^ word_dst_addr[2]) ^
699 			(word_src_addr[3] ^ word_dst_addr[3]);
700 }
701 
702 
703 void
704 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
705 		uint16_t member_count, uint16_t *members)
706 {
707 	struct rte_ether_hdr *eth_hdr;
708 	uint32_t hash;
709 	int i;
710 
711 	for (i = 0; i < nb_pkts; i++) {
712 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
713 
714 		hash = ether_hash(eth_hdr);
715 
716 		members[i] = (hash ^= hash >> 8) % member_count;
717 	}
718 }
719 
720 void
721 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
722 		uint16_t member_count, uint16_t *members)
723 {
724 	uint16_t i;
725 	struct rte_ether_hdr *eth_hdr;
726 	uint16_t proto;
727 	size_t vlan_offset;
728 	uint32_t hash, l3hash;
729 
730 	for (i = 0; i < nb_pkts; i++) {
731 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
732 		l3hash = 0;
733 
734 		proto = eth_hdr->ether_type;
735 		hash = ether_hash(eth_hdr);
736 
737 		vlan_offset = get_vlan_offset(eth_hdr, &proto);
738 
739 		if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
740 			struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
741 					((char *)(eth_hdr + 1) + vlan_offset);
742 			l3hash = ipv4_hash(ipv4_hdr);
743 
744 		} else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
745 			struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
746 					((char *)(eth_hdr + 1) + vlan_offset);
747 			l3hash = ipv6_hash(ipv6_hdr);
748 		}
749 
750 		hash = hash ^ l3hash;
751 		hash ^= hash >> 16;
752 		hash ^= hash >> 8;
753 
754 		members[i] = hash % member_count;
755 	}
756 }
757 
758 void
759 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
760 		uint16_t member_count, uint16_t *members)
761 {
762 	struct rte_ether_hdr *eth_hdr;
763 	uint16_t proto;
764 	size_t vlan_offset;
765 	int i;
766 
767 	struct rte_udp_hdr *udp_hdr;
768 	struct rte_tcp_hdr *tcp_hdr;
769 	uint32_t hash, l3hash, l4hash;
770 
771 	for (i = 0; i < nb_pkts; i++) {
772 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
773 		size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
774 		proto = eth_hdr->ether_type;
775 		vlan_offset = get_vlan_offset(eth_hdr, &proto);
776 		l3hash = 0;
777 		l4hash = 0;
778 
779 		if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
780 			struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
781 					((char *)(eth_hdr + 1) + vlan_offset);
782 			size_t ip_hdr_offset;
783 
784 			l3hash = ipv4_hash(ipv4_hdr);
785 
786 			/* there is no L4 header in fragmented packet */
787 			if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
788 								== 0)) {
789 				ip_hdr_offset = (ipv4_hdr->version_ihl
790 					& RTE_IPV4_HDR_IHL_MASK) *
791 					RTE_IPV4_IHL_MULTIPLIER;
792 
793 				if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
794 					tcp_hdr = (struct rte_tcp_hdr *)
795 						((char *)ipv4_hdr +
796 							ip_hdr_offset);
797 					if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
798 							<= pkt_end)
799 						l4hash = HASH_L4_PORTS(tcp_hdr);
800 				} else if (ipv4_hdr->next_proto_id ==
801 								IPPROTO_UDP) {
802 					udp_hdr = (struct rte_udp_hdr *)
803 						((char *)ipv4_hdr +
804 							ip_hdr_offset);
805 					if ((size_t)udp_hdr + sizeof(*udp_hdr)
806 							< pkt_end)
807 						l4hash = HASH_L4_PORTS(udp_hdr);
808 				}
809 			}
810 		} else if  (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
811 			struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
812 					((char *)(eth_hdr + 1) + vlan_offset);
813 			l3hash = ipv6_hash(ipv6_hdr);
814 
815 			if (ipv6_hdr->proto == IPPROTO_TCP) {
816 				tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
817 				l4hash = HASH_L4_PORTS(tcp_hdr);
818 			} else if (ipv6_hdr->proto == IPPROTO_UDP) {
819 				udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
820 				l4hash = HASH_L4_PORTS(udp_hdr);
821 			}
822 		}
823 
824 		hash = l3hash ^ l4hash;
825 		hash ^= hash >> 16;
826 		hash ^= hash >> 8;
827 
828 		members[i] = hash % member_count;
829 	}
830 }
831 
832 struct bwg_member {
833 	uint64_t bwg_left_int;
834 	uint64_t bwg_left_remainder;
835 	uint16_t member;
836 };
837 
838 void
839 bond_tlb_activate_member(struct bond_dev_private *internals) {
840 	int i;
841 
842 	for (i = 0; i < internals->active_member_count; i++)
843 		tlb_last_obytets[internals->active_members[i]] = 0;
844 }
845 
846 static int
847 bandwidth_cmp(const void *a, const void *b)
848 {
849 	const struct bwg_member *bwg_a = a;
850 	const struct bwg_member *bwg_b = b;
851 	int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
852 	int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
853 			(int64_t)bwg_a->bwg_left_remainder;
854 	if (diff > 0)
855 		return 1;
856 	else if (diff < 0)
857 		return -1;
858 	else if (diff2 > 0)
859 		return 1;
860 	else if (diff2 < 0)
861 		return -1;
862 	else
863 		return 0;
864 }
865 
866 static void
867 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
868 		struct bwg_member *bwg_member)
869 {
870 	struct rte_eth_link link_status;
871 	int ret;
872 
873 	ret = rte_eth_link_get_nowait(port_id, &link_status);
874 	if (ret < 0) {
875 		RTE_BOND_LOG(ERR, "Member (port %u) link get failed: %s",
876 			     port_id, rte_strerror(-ret));
877 		return;
878 	}
879 	uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
880 	if (link_bwg == 0)
881 		return;
882 	link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
883 	bwg_member->bwg_left_int = (link_bwg - 1000 * load) / link_bwg;
884 	bwg_member->bwg_left_remainder = (link_bwg - 1000 * load) % link_bwg;
885 }
886 
887 static void
888 bond_ethdev_update_tlb_member_cb(void *arg)
889 {
890 	struct bond_dev_private *internals = arg;
891 	struct rte_eth_stats member_stats;
892 	struct bwg_member bwg_array[RTE_MAX_ETHPORTS];
893 	uint16_t member_count;
894 	uint64_t tx_bytes;
895 
896 	uint8_t update_stats = 0;
897 	uint16_t member_id;
898 	uint16_t i;
899 
900 	internals->member_update_idx++;
901 
902 
903 	if (internals->member_update_idx >= REORDER_PERIOD_MS)
904 		update_stats = 1;
905 
906 	for (i = 0; i < internals->active_member_count; i++) {
907 		member_id = internals->active_members[i];
908 		rte_eth_stats_get(member_id, &member_stats);
909 		tx_bytes = member_stats.obytes - tlb_last_obytets[member_id];
910 		bandwidth_left(member_id, tx_bytes,
911 				internals->member_update_idx, &bwg_array[i]);
912 		bwg_array[i].member = member_id;
913 
914 		if (update_stats) {
915 			tlb_last_obytets[member_id] = member_stats.obytes;
916 		}
917 	}
918 
919 	if (update_stats == 1)
920 		internals->member_update_idx = 0;
921 
922 	member_count = i;
923 	qsort(bwg_array, member_count, sizeof(bwg_array[0]), bandwidth_cmp);
924 	for (i = 0; i < member_count; i++)
925 		internals->tlb_members_order[i] = bwg_array[i].member;
926 
927 	rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_member_cb,
928 			(struct bond_dev_private *)internals);
929 }
930 
931 static uint16_t
932 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
933 {
934 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
935 	struct bond_dev_private *internals = bd_tx_q->dev_private;
936 
937 	struct rte_eth_dev *primary_port =
938 			&rte_eth_devices[internals->primary_port];
939 	uint16_t num_tx_total = 0, num_tx_prep;
940 	uint16_t i, j;
941 
942 	uint16_t num_of_members = internals->active_member_count;
943 	uint16_t members[RTE_MAX_ETHPORTS];
944 
945 	struct rte_ether_hdr *ether_hdr;
946 	struct rte_ether_addr primary_member_addr;
947 	struct rte_ether_addr active_member_addr;
948 
949 	if (num_of_members < 1)
950 		return num_tx_total;
951 
952 	memcpy(members, internals->tlb_members_order,
953 				sizeof(internals->tlb_members_order[0]) * num_of_members);
954 
955 
956 	rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_member_addr);
957 
958 	if (nb_pkts > 3) {
959 		for (i = 0; i < 3; i++)
960 			rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
961 	}
962 
963 	for (i = 0; i < num_of_members; i++) {
964 		rte_eth_macaddr_get(members[i], &active_member_addr);
965 		for (j = num_tx_total; j < nb_pkts; j++) {
966 			if (j + 3 < nb_pkts)
967 				rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
968 
969 			ether_hdr = rte_pktmbuf_mtod(bufs[j],
970 						struct rte_ether_hdr *);
971 			if (rte_is_same_ether_addr(&ether_hdr->src_addr,
972 							&primary_member_addr))
973 				rte_ether_addr_copy(&active_member_addr,
974 						&ether_hdr->src_addr);
975 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
976 					mode6_debug("TX IPv4:", ether_hdr, members[i],
977 						&burst_number_TX);
978 #endif
979 		}
980 
981 		num_tx_prep = rte_eth_tx_prepare(members[i], bd_tx_q->queue_id,
982 				bufs + num_tx_total, nb_pkts - num_tx_total);
983 		num_tx_total += rte_eth_tx_burst(members[i], bd_tx_q->queue_id,
984 				bufs + num_tx_total, num_tx_prep);
985 
986 		if (num_tx_total == nb_pkts)
987 			break;
988 	}
989 
990 	return num_tx_total;
991 }
992 
993 void
994 bond_tlb_disable(struct bond_dev_private *internals)
995 {
996 	rte_eal_alarm_cancel(bond_ethdev_update_tlb_member_cb, internals);
997 }
998 
999 void
1000 bond_tlb_enable(struct bond_dev_private *internals)
1001 {
1002 	bond_ethdev_update_tlb_member_cb(internals);
1003 }
1004 
1005 static uint16_t
1006 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1007 {
1008 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1009 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1010 
1011 	struct rte_ether_hdr *eth_h;
1012 	uint16_t ether_type, offset;
1013 
1014 	struct client_data *client_info;
1015 
1016 	/*
1017 	 * We create transmit buffers for every member and one additional to send
1018 	 * through tlb. In worst case every packet will be send on one port.
1019 	 */
1020 	struct rte_mbuf *member_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1021 	uint16_t member_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1022 
1023 	/*
1024 	 * We create separate transmit buffers for update packets as they won't
1025 	 * be counted in num_tx_total.
1026 	 */
1027 	struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1028 	uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1029 
1030 	struct rte_mbuf *upd_pkt;
1031 	size_t pkt_size;
1032 
1033 	uint16_t num_send, num_not_send = 0;
1034 	uint16_t num_tx_total = 0;
1035 	uint16_t member_idx;
1036 
1037 	int i, j;
1038 
1039 	/* Search tx buffer for ARP packets and forward them to alb */
1040 	for (i = 0; i < nb_pkts; i++) {
1041 		eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
1042 		ether_type = eth_h->ether_type;
1043 		offset = get_vlan_offset(eth_h, &ether_type);
1044 
1045 		if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
1046 			member_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1047 
1048 			/* Change src mac in eth header */
1049 			rte_eth_macaddr_get(member_idx, &eth_h->src_addr);
1050 
1051 			/* Add packet to member tx buffer */
1052 			member_bufs[member_idx][member_bufs_pkts[member_idx]] = bufs[i];
1053 			member_bufs_pkts[member_idx]++;
1054 		} else {
1055 			/* If packet is not ARP, send it with TLB policy */
1056 			member_bufs[RTE_MAX_ETHPORTS][member_bufs_pkts[RTE_MAX_ETHPORTS]] =
1057 					bufs[i];
1058 			member_bufs_pkts[RTE_MAX_ETHPORTS]++;
1059 		}
1060 	}
1061 
1062 	/* Update connected client ARP tables */
1063 	if (internals->mode6.ntt) {
1064 		for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1065 			client_info = &internals->mode6.client_table[i];
1066 
1067 			if (client_info->in_use) {
1068 				/* Allocate new packet to send ARP update on current member */
1069 				upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1070 				if (upd_pkt == NULL) {
1071 					RTE_BOND_LOG(ERR,
1072 						     "Failed to allocate ARP packet from pool");
1073 					continue;
1074 				}
1075 				pkt_size = sizeof(struct rte_ether_hdr) +
1076 					sizeof(struct rte_arp_hdr) +
1077 					client_info->vlan_count *
1078 					sizeof(struct rte_vlan_hdr);
1079 				upd_pkt->data_len = pkt_size;
1080 				upd_pkt->pkt_len = pkt_size;
1081 
1082 				member_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1083 						internals);
1084 
1085 				/* Add packet to update tx buffer */
1086 				update_bufs[member_idx][update_bufs_pkts[member_idx]] = upd_pkt;
1087 				update_bufs_pkts[member_idx]++;
1088 			}
1089 		}
1090 		internals->mode6.ntt = 0;
1091 	}
1092 
1093 	/* Send ARP packets on proper members */
1094 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1095 		if (member_bufs_pkts[i] > 0) {
1096 			num_send = rte_eth_tx_prepare(i, bd_tx_q->queue_id,
1097 					member_bufs[i], member_bufs_pkts[i]);
1098 			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1099 					member_bufs[i], num_send);
1100 			for (j = 0; j < member_bufs_pkts[i] - num_send; j++) {
1101 				bufs[nb_pkts - 1 - num_not_send - j] =
1102 						member_bufs[i][nb_pkts - 1 - j];
1103 			}
1104 
1105 			num_tx_total += num_send;
1106 			num_not_send += member_bufs_pkts[i] - num_send;
1107 
1108 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1109 	/* Print TX stats including update packets */
1110 			for (j = 0; j < member_bufs_pkts[i]; j++) {
1111 				eth_h = rte_pktmbuf_mtod(member_bufs[i][j],
1112 							struct rte_ether_hdr *);
1113 				mode6_debug("TX ARP:", eth_h, i, &burst_number_TX);
1114 			}
1115 #endif
1116 		}
1117 	}
1118 
1119 	/* Send update packets on proper members */
1120 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1121 		if (update_bufs_pkts[i] > 0) {
1122 			num_send = rte_eth_tx_prepare(i, bd_tx_q->queue_id,
1123 					update_bufs[i], update_bufs_pkts[i]);
1124 			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1125 					num_send);
1126 			for (j = num_send; j < update_bufs_pkts[i]; j++) {
1127 				rte_pktmbuf_free(update_bufs[i][j]);
1128 			}
1129 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1130 			for (j = 0; j < update_bufs_pkts[i]; j++) {
1131 				eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1132 							struct rte_ether_hdr *);
1133 				mode6_debug("TX ARPupd:", eth_h, i, &burst_number_TX);
1134 			}
1135 #endif
1136 		}
1137 	}
1138 
1139 	/* Send non-ARP packets using tlb policy */
1140 	if (member_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1141 		num_send = bond_ethdev_tx_burst_tlb(queue,
1142 				member_bufs[RTE_MAX_ETHPORTS],
1143 				member_bufs_pkts[RTE_MAX_ETHPORTS]);
1144 
1145 		for (j = 0; j < member_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1146 			bufs[nb_pkts - 1 - num_not_send - j] =
1147 					member_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1148 		}
1149 
1150 		num_tx_total += num_send;
1151 	}
1152 
1153 	return num_tx_total;
1154 }
1155 
1156 static inline uint16_t
1157 tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1158 		 uint16_t *member_port_ids, uint16_t member_count)
1159 {
1160 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1161 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1162 
1163 	/* Array to sort mbufs for transmission on each member into */
1164 	struct rte_mbuf *member_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1165 	/* Number of mbufs for transmission on each member */
1166 	uint16_t member_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1167 	/* Mapping array generated by hash function to map mbufs to members */
1168 	uint16_t bufs_member_port_idxs[nb_bufs];
1169 
1170 	uint16_t member_tx_count;
1171 	uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1172 
1173 	uint16_t i;
1174 
1175 	/*
1176 	 * Populate members mbuf with the packets which are to be sent on it
1177 	 * selecting output member using hash based on xmit policy
1178 	 */
1179 	internals->burst_xmit_hash(bufs, nb_bufs, member_count,
1180 			bufs_member_port_idxs);
1181 
1182 	for (i = 0; i < nb_bufs; i++) {
1183 		/* Populate member mbuf arrays with mbufs for that member. */
1184 		uint16_t member_idx = bufs_member_port_idxs[i];
1185 
1186 		member_bufs[member_idx][member_nb_bufs[member_idx]++] = bufs[i];
1187 	}
1188 
1189 	/* Send packet burst on each member device */
1190 	for (i = 0; i < member_count; i++) {
1191 		if (member_nb_bufs[i] == 0)
1192 			continue;
1193 
1194 		member_tx_count = rte_eth_tx_prepare(member_port_ids[i],
1195 				bd_tx_q->queue_id, member_bufs[i],
1196 				member_nb_bufs[i]);
1197 		member_tx_count = rte_eth_tx_burst(member_port_ids[i],
1198 				bd_tx_q->queue_id, member_bufs[i],
1199 				member_tx_count);
1200 
1201 		total_tx_count += member_tx_count;
1202 
1203 		/* If tx burst fails move packets to end of bufs */
1204 		if (unlikely(member_tx_count < member_nb_bufs[i])) {
1205 			int member_tx_fail_count = member_nb_bufs[i] -
1206 					member_tx_count;
1207 			total_tx_fail_count += member_tx_fail_count;
1208 			memcpy(&bufs[nb_bufs - total_tx_fail_count],
1209 			       &member_bufs[i][member_tx_count],
1210 			       member_tx_fail_count * sizeof(bufs[0]));
1211 		}
1212 	}
1213 
1214 	return total_tx_count;
1215 }
1216 
1217 static uint16_t
1218 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1219 		uint16_t nb_bufs)
1220 {
1221 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1222 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1223 
1224 	uint16_t member_port_ids[RTE_MAX_ETHPORTS];
1225 	uint16_t member_count;
1226 
1227 	if (unlikely(nb_bufs == 0))
1228 		return 0;
1229 
1230 	/* Copy member list to protect against member up/down changes during tx
1231 	 * bursting
1232 	 */
1233 	member_count = internals->active_member_count;
1234 	if (unlikely(member_count < 1))
1235 		return 0;
1236 
1237 	memcpy(member_port_ids, internals->active_members,
1238 			sizeof(member_port_ids[0]) * member_count);
1239 	return tx_burst_balance(queue, bufs, nb_bufs, member_port_ids,
1240 				member_count);
1241 }
1242 
1243 static inline uint16_t
1244 tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1245 		bool dedicated_txq)
1246 {
1247 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1248 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1249 
1250 	uint16_t member_port_ids[RTE_MAX_ETHPORTS];
1251 	uint16_t member_count;
1252 
1253 	uint16_t dist_member_port_ids[RTE_MAX_ETHPORTS];
1254 	uint16_t dist_member_count;
1255 
1256 	uint16_t member_tx_count;
1257 
1258 	uint16_t i;
1259 
1260 	/* Copy member list to protect against member up/down changes during tx
1261 	 * bursting */
1262 	member_count = internals->active_member_count;
1263 	if (unlikely(member_count < 1))
1264 		return 0;
1265 
1266 	memcpy(member_port_ids, internals->active_members,
1267 			sizeof(member_port_ids[0]) * member_count);
1268 
1269 	if (dedicated_txq)
1270 		goto skip_tx_ring;
1271 
1272 	/* Check for LACP control packets and send if available */
1273 	for (i = 0; i < member_count; i++) {
1274 		struct port *port = &bond_mode_8023ad_ports[member_port_ids[i]];
1275 		struct rte_mbuf *ctrl_pkt = NULL;
1276 
1277 		if (likely(rte_ring_empty(port->tx_ring)))
1278 			continue;
1279 
1280 		if (rte_ring_dequeue(port->tx_ring,
1281 				     (void **)&ctrl_pkt) != -ENOENT) {
1282 			member_tx_count = rte_eth_tx_prepare(member_port_ids[i],
1283 					bd_tx_q->queue_id, &ctrl_pkt, 1);
1284 			member_tx_count = rte_eth_tx_burst(member_port_ids[i],
1285 					bd_tx_q->queue_id, &ctrl_pkt, member_tx_count);
1286 			/*
1287 			 * re-enqueue LAG control plane packets to buffering
1288 			 * ring if transmission fails so the packet isn't lost.
1289 			 */
1290 			if (member_tx_count != 1)
1291 				rte_ring_enqueue(port->tx_ring,	ctrl_pkt);
1292 		}
1293 	}
1294 
1295 skip_tx_ring:
1296 	if (unlikely(nb_bufs == 0))
1297 		return 0;
1298 
1299 	dist_member_count = 0;
1300 	for (i = 0; i < member_count; i++) {
1301 		struct port *port = &bond_mode_8023ad_ports[member_port_ids[i]];
1302 
1303 		if (ACTOR_STATE(port, DISTRIBUTING))
1304 			dist_member_port_ids[dist_member_count++] =
1305 					member_port_ids[i];
1306 	}
1307 
1308 	if (unlikely(dist_member_count < 1))
1309 		return 0;
1310 
1311 	return tx_burst_balance(queue, bufs, nb_bufs, dist_member_port_ids,
1312 				dist_member_count);
1313 }
1314 
1315 static uint16_t
1316 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1317 		uint16_t nb_bufs)
1318 {
1319 	return tx_burst_8023ad(queue, bufs, nb_bufs, false);
1320 }
1321 
1322 static uint16_t
1323 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
1324 		uint16_t nb_bufs)
1325 {
1326 	return tx_burst_8023ad(queue, bufs, nb_bufs, true);
1327 }
1328 
1329 static uint16_t
1330 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1331 		uint16_t nb_pkts)
1332 {
1333 	struct bond_dev_private *internals;
1334 	struct bond_tx_queue *bd_tx_q;
1335 
1336 	uint16_t members[RTE_MAX_ETHPORTS];
1337 	uint8_t tx_failed_flag = 0;
1338 	uint16_t num_of_members;
1339 
1340 	uint16_t max_nb_of_tx_pkts = 0;
1341 
1342 	int member_tx_total[RTE_MAX_ETHPORTS];
1343 	int i, most_successful_tx_member = -1;
1344 
1345 	bd_tx_q = (struct bond_tx_queue *)queue;
1346 	internals = bd_tx_q->dev_private;
1347 
1348 	/* Copy member list to protect against member up/down changes during tx
1349 	 * bursting */
1350 	num_of_members = internals->active_member_count;
1351 	memcpy(members, internals->active_members,
1352 			sizeof(internals->active_members[0]) * num_of_members);
1353 
1354 	if (num_of_members < 1)
1355 		return 0;
1356 
1357 	/* It is rare that bond different PMDs together, so just call tx-prepare once */
1358 	nb_pkts = rte_eth_tx_prepare(members[0], bd_tx_q->queue_id, bufs, nb_pkts);
1359 
1360 	/* Increment reference count on mbufs */
1361 	for (i = 0; i < nb_pkts; i++)
1362 		rte_pktmbuf_refcnt_update(bufs[i], num_of_members - 1);
1363 
1364 	/* Transmit burst on each active member */
1365 	for (i = 0; i < num_of_members; i++) {
1366 		member_tx_total[i] = rte_eth_tx_burst(members[i], bd_tx_q->queue_id,
1367 					bufs, nb_pkts);
1368 
1369 		if (unlikely(member_tx_total[i] < nb_pkts))
1370 			tx_failed_flag = 1;
1371 
1372 		/* record the value and member index for the member which transmits the
1373 		 * maximum number of packets */
1374 		if (member_tx_total[i] > max_nb_of_tx_pkts) {
1375 			max_nb_of_tx_pkts = member_tx_total[i];
1376 			most_successful_tx_member = i;
1377 		}
1378 	}
1379 
1380 	/* if members fail to transmit packets from burst, the calling application
1381 	 * is not expected to know about multiple references to packets so we must
1382 	 * handle failures of all packets except those of the most successful member
1383 	 */
1384 	if (unlikely(tx_failed_flag))
1385 		for (i = 0; i < num_of_members; i++)
1386 			if (i != most_successful_tx_member)
1387 				while (member_tx_total[i] < nb_pkts)
1388 					rte_pktmbuf_free(bufs[member_tx_total[i]++]);
1389 
1390 	return max_nb_of_tx_pkts;
1391 }
1392 
1393 static void
1394 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *member_link)
1395 {
1396 	struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1397 
1398 	if (bond_ctx->mode == BONDING_MODE_8023AD) {
1399 		/**
1400 		 * If in mode 4 then save the link properties of the first
1401 		 * member, all subsequent members must match these properties
1402 		 */
1403 		struct rte_eth_link *bond_link = &bond_ctx->mode4.member_link;
1404 
1405 		bond_link->link_autoneg = member_link->link_autoneg;
1406 		bond_link->link_duplex = member_link->link_duplex;
1407 		bond_link->link_speed = member_link->link_speed;
1408 	} else {
1409 		/**
1410 		 * In any other mode the link properties are set to default
1411 		 * values of AUTONEG/DUPLEX
1412 		 */
1413 		ethdev->data->dev_link.link_autoneg = RTE_ETH_LINK_AUTONEG;
1414 		ethdev->data->dev_link.link_duplex = RTE_ETH_LINK_FULL_DUPLEX;
1415 	}
1416 }
1417 
1418 static int
1419 link_properties_valid(struct rte_eth_dev *ethdev,
1420 		struct rte_eth_link *member_link)
1421 {
1422 	struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1423 
1424 	if (bond_ctx->mode == BONDING_MODE_8023AD) {
1425 		struct rte_eth_link *bond_link = &bond_ctx->mode4.member_link;
1426 
1427 		if (bond_link->link_duplex != member_link->link_duplex ||
1428 			bond_link->link_autoneg != member_link->link_autoneg ||
1429 			bond_link->link_speed != member_link->link_speed)
1430 			return -1;
1431 	}
1432 
1433 	return 0;
1434 }
1435 
1436 int
1437 mac_address_get(struct rte_eth_dev *eth_dev,
1438 		struct rte_ether_addr *dst_mac_addr)
1439 {
1440 	struct rte_ether_addr *mac_addr;
1441 
1442 	if (eth_dev == NULL) {
1443 		RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1444 		return -1;
1445 	}
1446 
1447 	if (dst_mac_addr == NULL) {
1448 		RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1449 		return -1;
1450 	}
1451 
1452 	mac_addr = eth_dev->data->mac_addrs;
1453 
1454 	rte_ether_addr_copy(mac_addr, dst_mac_addr);
1455 	return 0;
1456 }
1457 
1458 int
1459 mac_address_set(struct rte_eth_dev *eth_dev,
1460 		struct rte_ether_addr *new_mac_addr)
1461 {
1462 	struct rte_ether_addr *mac_addr;
1463 
1464 	if (eth_dev == NULL) {
1465 		RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1466 		return -1;
1467 	}
1468 
1469 	if (new_mac_addr == NULL) {
1470 		RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1471 		return -1;
1472 	}
1473 
1474 	mac_addr = eth_dev->data->mac_addrs;
1475 
1476 	/* If new MAC is different to current MAC then update */
1477 	if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1478 		memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1479 
1480 	return 0;
1481 }
1482 
1483 static const struct rte_ether_addr null_mac_addr;
1484 
1485 /*
1486  * Add additional MAC addresses to the member
1487  */
1488 int
1489 member_add_mac_addresses(struct rte_eth_dev *bonding_eth_dev,
1490 		uint16_t member_port_id)
1491 {
1492 	int i, ret;
1493 	struct rte_ether_addr *mac_addr;
1494 
1495 	for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1496 		mac_addr = &bonding_eth_dev->data->mac_addrs[i];
1497 		if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1498 			break;
1499 
1500 		ret = rte_eth_dev_mac_addr_add(member_port_id, mac_addr, 0);
1501 		if (ret < 0) {
1502 			/* rollback */
1503 			for (i--; i > 0; i--)
1504 				rte_eth_dev_mac_addr_remove(member_port_id,
1505 					&bonding_eth_dev->data->mac_addrs[i]);
1506 			return ret;
1507 		}
1508 	}
1509 
1510 	return 0;
1511 }
1512 
1513 /*
1514  * Remove additional MAC addresses from the member
1515  */
1516 int
1517 member_remove_mac_addresses(struct rte_eth_dev *bonding_eth_dev,
1518 		uint16_t member_port_id)
1519 {
1520 	int i, rc, ret;
1521 	struct rte_ether_addr *mac_addr;
1522 
1523 	rc = 0;
1524 	for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1525 		mac_addr = &bonding_eth_dev->data->mac_addrs[i];
1526 		if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1527 			break;
1528 
1529 		ret = rte_eth_dev_mac_addr_remove(member_port_id, mac_addr);
1530 		/* save only the first error */
1531 		if (ret < 0 && rc == 0)
1532 			rc = ret;
1533 	}
1534 
1535 	return rc;
1536 }
1537 
1538 int
1539 mac_address_members_update(struct rte_eth_dev *bonding_eth_dev)
1540 {
1541 	struct bond_dev_private *internals = bonding_eth_dev->data->dev_private;
1542 	bool set;
1543 	int i;
1544 
1545 	/* Update member devices MAC addresses */
1546 	if (internals->member_count < 1)
1547 		return -1;
1548 
1549 	switch (internals->mode) {
1550 	case BONDING_MODE_ROUND_ROBIN:
1551 	case BONDING_MODE_BALANCE:
1552 	case BONDING_MODE_BROADCAST:
1553 		for (i = 0; i < internals->member_count; i++) {
1554 			if (rte_eth_dev_default_mac_addr_set(
1555 					internals->members[i].port_id,
1556 					bonding_eth_dev->data->mac_addrs)) {
1557 				RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1558 						internals->members[i].port_id);
1559 				return -1;
1560 			}
1561 		}
1562 		break;
1563 	case BONDING_MODE_8023AD:
1564 		bond_mode_8023ad_mac_address_update(bonding_eth_dev);
1565 		break;
1566 	case BONDING_MODE_ACTIVE_BACKUP:
1567 	case BONDING_MODE_TLB:
1568 	case BONDING_MODE_ALB:
1569 	default:
1570 		set = true;
1571 		for (i = 0; i < internals->member_count; i++) {
1572 			if (internals->members[i].port_id ==
1573 					internals->current_primary_port) {
1574 				if (rte_eth_dev_default_mac_addr_set(
1575 						internals->current_primary_port,
1576 						bonding_eth_dev->data->mac_addrs)) {
1577 					RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1578 							internals->current_primary_port);
1579 					set = false;
1580 				}
1581 			} else {
1582 				if (rte_eth_dev_default_mac_addr_set(
1583 						internals->members[i].port_id,
1584 						&internals->members[i].persisted_mac_addr)) {
1585 					RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1586 							internals->members[i].port_id);
1587 				}
1588 			}
1589 		}
1590 		if (!set)
1591 			return -1;
1592 	}
1593 
1594 	return 0;
1595 }
1596 
1597 int
1598 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, uint8_t mode)
1599 {
1600 	struct bond_dev_private *internals;
1601 
1602 	internals = eth_dev->data->dev_private;
1603 
1604 	switch (mode) {
1605 	case BONDING_MODE_ROUND_ROBIN:
1606 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1607 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1608 		break;
1609 	case BONDING_MODE_ACTIVE_BACKUP:
1610 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1611 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1612 		break;
1613 	case BONDING_MODE_BALANCE:
1614 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1615 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1616 		break;
1617 	case BONDING_MODE_BROADCAST:
1618 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1619 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1620 		break;
1621 	case BONDING_MODE_8023AD:
1622 		if (bond_mode_8023ad_enable(eth_dev) != 0)
1623 			return -1;
1624 
1625 		if (internals->mode4.dedicated_queues.enabled == 0) {
1626 			eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1627 			eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1628 			RTE_BOND_LOG(WARNING,
1629 				"Using mode 4, it is necessary to do TX burst "
1630 				"and RX burst at least every 100ms.");
1631 		} else {
1632 			/* Use flow director's optimization */
1633 			eth_dev->rx_pkt_burst =
1634 					bond_ethdev_rx_burst_8023ad_fast_queue;
1635 			eth_dev->tx_pkt_burst =
1636 					bond_ethdev_tx_burst_8023ad_fast_queue;
1637 		}
1638 		break;
1639 	case BONDING_MODE_TLB:
1640 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1641 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1642 		break;
1643 	case BONDING_MODE_ALB:
1644 		if (bond_mode_alb_enable(eth_dev) != 0)
1645 			return -1;
1646 
1647 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1648 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1649 		break;
1650 	default:
1651 		return -1;
1652 	}
1653 
1654 	internals->mode = mode;
1655 
1656 	return 0;
1657 }
1658 
1659 
1660 static int
1661 member_configure_slow_queue(struct rte_eth_dev *bonding_eth_dev,
1662 		struct rte_eth_dev *member_eth_dev)
1663 {
1664 	int errval = 0;
1665 	struct bond_dev_private *internals = bonding_eth_dev->data->dev_private;
1666 	struct port *port = &bond_mode_8023ad_ports[member_eth_dev->data->port_id];
1667 
1668 	if (port->slow_pool == NULL) {
1669 		char mem_name[256];
1670 		int member_id = member_eth_dev->data->port_id;
1671 
1672 		snprintf(mem_name, RTE_DIM(mem_name), "member_port%u_slow_pool",
1673 				member_id);
1674 		port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1675 			250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1676 			member_eth_dev->data->numa_node);
1677 
1678 		/* Any memory allocation failure in initialization is critical because
1679 		 * resources can't be free, so reinitialization is impossible. */
1680 		if (port->slow_pool == NULL) {
1681 			rte_panic("Member %u: Failed to create memory pool '%s': %s\n",
1682 				member_id, mem_name, rte_strerror(rte_errno));
1683 		}
1684 	}
1685 
1686 	if (internals->mode4.dedicated_queues.enabled == 1) {
1687 		struct rte_eth_dev_info member_info = {};
1688 		uint16_t nb_rx_desc = SLOW_RX_QUEUE_HW_DEFAULT_SIZE;
1689 		uint16_t nb_tx_desc = SLOW_TX_QUEUE_HW_DEFAULT_SIZE;
1690 
1691 		errval = rte_eth_dev_info_get(member_eth_dev->data->port_id,
1692 				&member_info);
1693 		if (errval != 0) {
1694 			RTE_BOND_LOG(ERR,
1695 					"rte_eth_dev_info_get: port=%d, err (%d)",
1696 					member_eth_dev->data->port_id,
1697 					errval);
1698 			return errval;
1699 		}
1700 
1701 		if (member_info.rx_desc_lim.nb_min != 0)
1702 			nb_rx_desc = member_info.rx_desc_lim.nb_min;
1703 
1704 		/* Configure slow Rx queue */
1705 		errval = rte_eth_rx_queue_setup(member_eth_dev->data->port_id,
1706 				internals->mode4.dedicated_queues.rx_qid, nb_rx_desc,
1707 				rte_eth_dev_socket_id(member_eth_dev->data->port_id),
1708 				NULL, port->slow_pool);
1709 		if (errval != 0) {
1710 			RTE_BOND_LOG(ERR,
1711 					"rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1712 					member_eth_dev->data->port_id,
1713 					internals->mode4.dedicated_queues.rx_qid,
1714 					errval);
1715 			return errval;
1716 		}
1717 
1718 		if (member_info.tx_desc_lim.nb_min != 0)
1719 			nb_tx_desc = member_info.tx_desc_lim.nb_min;
1720 
1721 		errval = rte_eth_tx_queue_setup(member_eth_dev->data->port_id,
1722 				internals->mode4.dedicated_queues.tx_qid, nb_tx_desc,
1723 				rte_eth_dev_socket_id(member_eth_dev->data->port_id),
1724 				NULL);
1725 		if (errval != 0) {
1726 			RTE_BOND_LOG(ERR,
1727 				"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1728 				member_eth_dev->data->port_id,
1729 				internals->mode4.dedicated_queues.tx_qid,
1730 				errval);
1731 			return errval;
1732 		}
1733 	}
1734 	return 0;
1735 }
1736 
1737 int
1738 member_configure(struct rte_eth_dev *bonding_eth_dev,
1739 		struct rte_eth_dev *member_eth_dev)
1740 {
1741 	uint16_t nb_rx_queues;
1742 	uint16_t nb_tx_queues;
1743 
1744 	int errval;
1745 
1746 	struct bond_dev_private *internals = bonding_eth_dev->data->dev_private;
1747 
1748 	/* Stop member */
1749 	errval = rte_eth_dev_stop(member_eth_dev->data->port_id);
1750 	if (errval != 0)
1751 		RTE_BOND_LOG(ERR, "rte_eth_dev_stop: port %u, err (%d)",
1752 			     member_eth_dev->data->port_id, errval);
1753 
1754 	/* Enable interrupts on member device if supported */
1755 	if (member_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1756 		member_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1757 
1758 	/* If RSS is enabled for bonding, try to enable it for members  */
1759 	if (bonding_eth_dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG) {
1760 		/* rss_key won't be empty if RSS is configured in bonding dev */
1761 		member_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1762 					internals->rss_key_len;
1763 		member_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1764 					internals->rss_key;
1765 
1766 		member_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1767 				bonding_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1768 		member_eth_dev->data->dev_conf.rxmode.mq_mode =
1769 				bonding_eth_dev->data->dev_conf.rxmode.mq_mode;
1770 	} else {
1771 		member_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
1772 		member_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1773 		member_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0;
1774 		member_eth_dev->data->dev_conf.rxmode.mq_mode =
1775 				bonding_eth_dev->data->dev_conf.rxmode.mq_mode;
1776 	}
1777 
1778 	member_eth_dev->data->dev_conf.rxmode.mtu =
1779 			bonding_eth_dev->data->dev_conf.rxmode.mtu;
1780 	member_eth_dev->data->dev_conf.link_speeds =
1781 			bonding_eth_dev->data->dev_conf.link_speeds;
1782 
1783 	member_eth_dev->data->dev_conf.txmode.offloads =
1784 			bonding_eth_dev->data->dev_conf.txmode.offloads;
1785 
1786 	member_eth_dev->data->dev_conf.rxmode.offloads =
1787 			bonding_eth_dev->data->dev_conf.rxmode.offloads;
1788 
1789 	nb_rx_queues = bonding_eth_dev->data->nb_rx_queues;
1790 	nb_tx_queues = bonding_eth_dev->data->nb_tx_queues;
1791 
1792 	if (internals->mode == BONDING_MODE_8023AD) {
1793 		if (internals->mode4.dedicated_queues.enabled == 1) {
1794 			nb_rx_queues++;
1795 			nb_tx_queues++;
1796 		}
1797 	}
1798 
1799 	/* Configure device */
1800 	errval = rte_eth_dev_configure(member_eth_dev->data->port_id,
1801 			nb_rx_queues, nb_tx_queues,
1802 			&member_eth_dev->data->dev_conf);
1803 	if (errval != 0) {
1804 		RTE_BOND_LOG(ERR, "Cannot configure member device: port %u, err (%d)",
1805 				member_eth_dev->data->port_id, errval);
1806 		return errval;
1807 	}
1808 
1809 	errval = rte_eth_dev_set_mtu(member_eth_dev->data->port_id,
1810 				     bonding_eth_dev->data->mtu);
1811 	if (errval != 0 && errval != -ENOTSUP) {
1812 		RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1813 				member_eth_dev->data->port_id, errval);
1814 		return errval;
1815 	}
1816 	return 0;
1817 }
1818 
1819 int
1820 member_start(struct rte_eth_dev *bonding_eth_dev,
1821 		struct rte_eth_dev *member_eth_dev)
1822 {
1823 	int errval = 0;
1824 	struct bond_rx_queue *bd_rx_q;
1825 	struct bond_tx_queue *bd_tx_q;
1826 	uint16_t q_id;
1827 	struct rte_flow_error flow_error;
1828 	struct bond_dev_private *internals = bonding_eth_dev->data->dev_private;
1829 	uint16_t member_port_id = member_eth_dev->data->port_id;
1830 
1831 	/* Setup Rx Queues */
1832 	for (q_id = 0; q_id < bonding_eth_dev->data->nb_rx_queues; q_id++) {
1833 		bd_rx_q = (struct bond_rx_queue *)bonding_eth_dev->data->rx_queues[q_id];
1834 
1835 		errval = rte_eth_rx_queue_setup(member_port_id, q_id,
1836 				bd_rx_q->nb_rx_desc,
1837 				rte_eth_dev_socket_id(member_port_id),
1838 				&(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1839 		if (errval != 0) {
1840 			RTE_BOND_LOG(ERR,
1841 					"rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1842 					member_port_id, q_id, errval);
1843 			return errval;
1844 		}
1845 	}
1846 
1847 	/* Setup Tx Queues */
1848 	for (q_id = 0; q_id < bonding_eth_dev->data->nb_tx_queues; q_id++) {
1849 		bd_tx_q = (struct bond_tx_queue *)bonding_eth_dev->data->tx_queues[q_id];
1850 
1851 		errval = rte_eth_tx_queue_setup(member_port_id, q_id,
1852 				bd_tx_q->nb_tx_desc,
1853 				rte_eth_dev_socket_id(member_port_id),
1854 				&bd_tx_q->tx_conf);
1855 		if (errval != 0) {
1856 			RTE_BOND_LOG(ERR,
1857 				"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1858 				member_port_id, q_id, errval);
1859 			return errval;
1860 		}
1861 	}
1862 
1863 	if (internals->mode == BONDING_MODE_8023AD &&
1864 			internals->mode4.dedicated_queues.enabled == 1) {
1865 		if (member_configure_slow_queue(bonding_eth_dev, member_eth_dev)
1866 				!= 0)
1867 			return errval;
1868 
1869 		errval = bond_ethdev_8023ad_flow_verify(bonding_eth_dev,
1870 				member_port_id);
1871 		if (errval != 0) {
1872 			RTE_BOND_LOG(ERR,
1873 				"bond_ethdev_8023ad_flow_verify: port=%d, err (%d)",
1874 				member_port_id, errval);
1875 			return errval;
1876 		}
1877 
1878 		if (internals->mode4.dedicated_queues.flow[member_port_id] != NULL) {
1879 			errval = rte_flow_destroy(member_port_id,
1880 					internals->mode4.dedicated_queues.flow[member_port_id],
1881 					&flow_error);
1882 			RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_destroy: port=%d, err (%d)",
1883 				member_port_id, errval);
1884 		}
1885 	}
1886 
1887 	/* Start device */
1888 	errval = rte_eth_dev_start(member_port_id);
1889 	if (errval != 0) {
1890 		RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1891 				member_port_id, errval);
1892 		return -1;
1893 	}
1894 
1895 	if (internals->mode == BONDING_MODE_8023AD &&
1896 			internals->mode4.dedicated_queues.enabled == 1) {
1897 		errval = bond_ethdev_8023ad_flow_set(bonding_eth_dev,
1898 				member_port_id);
1899 		if (errval != 0) {
1900 			RTE_BOND_LOG(ERR,
1901 				"bond_ethdev_8023ad_flow_set: port=%d, err (%d)",
1902 				member_port_id, errval);
1903 			return errval;
1904 		}
1905 	}
1906 
1907 	/* If RSS is enabled for bonding, synchronize RETA */
1908 	if (bonding_eth_dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS) {
1909 		int i;
1910 		struct bond_dev_private *internals;
1911 
1912 		internals = bonding_eth_dev->data->dev_private;
1913 
1914 		for (i = 0; i < internals->member_count; i++) {
1915 			if (internals->members[i].port_id == member_port_id) {
1916 				errval = rte_eth_dev_rss_reta_update(
1917 						member_port_id,
1918 						&internals->reta_conf[0],
1919 						internals->members[i].reta_size);
1920 				if (errval != 0) {
1921 					RTE_BOND_LOG(WARNING,
1922 						     "rte_eth_dev_rss_reta_update on member port %d fails (err %d)."
1923 						     " RSS Configuration for bonding may be inconsistent.",
1924 						     member_port_id, errval);
1925 				}
1926 				break;
1927 			}
1928 		}
1929 	}
1930 
1931 	/* If lsc interrupt is set, check initial member's link status */
1932 	if (member_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1933 		member_eth_dev->dev_ops->link_update(member_eth_dev, 0);
1934 		bond_ethdev_lsc_event_callback(member_port_id,
1935 			RTE_ETH_EVENT_INTR_LSC, &bonding_eth_dev->data->port_id,
1936 			NULL);
1937 	}
1938 
1939 	return 0;
1940 }
1941 
1942 void
1943 member_remove(struct bond_dev_private *internals,
1944 		struct rte_eth_dev *member_eth_dev)
1945 {
1946 	uint16_t i;
1947 
1948 	for (i = 0; i < internals->member_count; i++)
1949 		if (internals->members[i].port_id ==
1950 				member_eth_dev->data->port_id)
1951 			break;
1952 
1953 	if (i < (internals->member_count - 1)) {
1954 		struct rte_flow *flow;
1955 
1956 		memmove(&internals->members[i], &internals->members[i + 1],
1957 				sizeof(internals->members[0]) *
1958 				(internals->member_count - i - 1));
1959 		TAILQ_FOREACH(flow, &internals->flow_list, next) {
1960 			memmove(&flow->flows[i], &flow->flows[i + 1],
1961 				sizeof(flow->flows[0]) *
1962 				(internals->member_count - i - 1));
1963 			flow->flows[internals->member_count - 1] = NULL;
1964 		}
1965 	}
1966 
1967 	internals->member_count--;
1968 
1969 	/* force reconfiguration of member interfaces */
1970 	rte_eth_dev_internal_reset(member_eth_dev);
1971 }
1972 
1973 static void
1974 bond_ethdev_member_link_status_change_monitor(void *cb_arg);
1975 
1976 void
1977 member_add(struct bond_dev_private *internals,
1978 		struct rte_eth_dev *member_eth_dev)
1979 {
1980 	struct bond_member_details *member_details =
1981 			&internals->members[internals->member_count];
1982 
1983 	member_details->port_id = member_eth_dev->data->port_id;
1984 	member_details->last_link_status = 0;
1985 
1986 	/* Mark member devices that don't support interrupts so we can
1987 	 * compensate when we start the bond
1988 	 */
1989 	if (!(member_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))
1990 		member_details->link_status_poll_enabled = 1;
1991 
1992 	member_details->link_status_wait_to_complete = 0;
1993 	/* clean tlb_last_obytes when adding port for bonding device */
1994 	memcpy(&member_details->persisted_mac_addr, member_eth_dev->data->mac_addrs,
1995 			sizeof(struct rte_ether_addr));
1996 }
1997 
1998 void
1999 bond_ethdev_primary_set(struct bond_dev_private *internals,
2000 		uint16_t member_port_id)
2001 {
2002 	int i;
2003 
2004 	if (internals->active_member_count < 1)
2005 		internals->current_primary_port = member_port_id;
2006 	else
2007 		/* Search bonding device member ports for new proposed primary port */
2008 		for (i = 0; i < internals->active_member_count; i++) {
2009 			if (internals->active_members[i] == member_port_id)
2010 				internals->current_primary_port = member_port_id;
2011 		}
2012 }
2013 
2014 static int
2015 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2016 
2017 static int
2018 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2019 {
2020 	struct bond_dev_private *internals;
2021 	int i;
2022 
2023 	/* member eth dev will be started by bonding device */
2024 	if (check_for_bonding_ethdev(eth_dev)) {
2025 		RTE_BOND_LOG(ERR, "User tried to explicitly start a member eth_dev (%d)",
2026 				eth_dev->data->port_id);
2027 		return -1;
2028 	}
2029 
2030 	eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
2031 	eth_dev->data->dev_started = 1;
2032 
2033 	internals = eth_dev->data->dev_private;
2034 
2035 	if (internals->member_count == 0) {
2036 		RTE_BOND_LOG(ERR, "Cannot start port since there are no member devices");
2037 		goto out_err;
2038 	}
2039 
2040 	if (internals->user_defined_mac == 0) {
2041 		struct rte_ether_addr *new_mac_addr = NULL;
2042 
2043 		for (i = 0; i < internals->member_count; i++)
2044 			if (internals->members[i].port_id == internals->primary_port)
2045 				new_mac_addr = &internals->members[i].persisted_mac_addr;
2046 
2047 		if (new_mac_addr == NULL)
2048 			goto out_err;
2049 
2050 		if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2051 			RTE_BOND_LOG(ERR, "bonding port (%d) failed to update MAC address",
2052 					eth_dev->data->port_id);
2053 			goto out_err;
2054 		}
2055 	}
2056 
2057 	if (internals->mode == BONDING_MODE_8023AD) {
2058 		if (internals->mode4.dedicated_queues.enabled == 1) {
2059 			internals->mode4.dedicated_queues.rx_qid =
2060 					eth_dev->data->nb_rx_queues;
2061 			internals->mode4.dedicated_queues.tx_qid =
2062 					eth_dev->data->nb_tx_queues;
2063 		}
2064 	}
2065 
2066 
2067 	/* Reconfigure each member device if starting bonding device */
2068 	for (i = 0; i < internals->member_count; i++) {
2069 		struct rte_eth_dev *member_ethdev =
2070 				&(rte_eth_devices[internals->members[i].port_id]);
2071 		if (member_configure(eth_dev, member_ethdev) != 0) {
2072 			RTE_BOND_LOG(ERR,
2073 				"bonding port (%d) failed to reconfigure member device (%d)",
2074 				eth_dev->data->port_id,
2075 				internals->members[i].port_id);
2076 			goto out_err;
2077 		}
2078 		if (member_start(eth_dev, member_ethdev) != 0) {
2079 			RTE_BOND_LOG(ERR,
2080 				"bonding port (%d) failed to start member device (%d)",
2081 				eth_dev->data->port_id,
2082 				internals->members[i].port_id);
2083 			goto out_err;
2084 		}
2085 		/* We will need to poll for link status if any member doesn't
2086 		 * support interrupts
2087 		 */
2088 		if (internals->members[i].link_status_poll_enabled)
2089 			internals->link_status_polling_enabled = 1;
2090 	}
2091 
2092 	/* start polling if needed */
2093 	if (internals->link_status_polling_enabled) {
2094 		rte_eal_alarm_set(
2095 			internals->link_status_polling_interval_ms * 1000,
2096 			bond_ethdev_member_link_status_change_monitor,
2097 			(void *)&rte_eth_devices[internals->port_id]);
2098 	}
2099 
2100 	/* Update all member devices MACs*/
2101 	if (mac_address_members_update(eth_dev) != 0)
2102 		goto out_err;
2103 
2104 	if (internals->user_defined_primary_port)
2105 		bond_ethdev_primary_set(internals, internals->primary_port);
2106 
2107 	if (internals->mode == BONDING_MODE_8023AD)
2108 		bond_mode_8023ad_start(eth_dev);
2109 
2110 	if (internals->mode == BONDING_MODE_TLB ||
2111 			internals->mode == BONDING_MODE_ALB)
2112 		bond_tlb_enable(internals);
2113 
2114 	for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
2115 		eth_dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
2116 	for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
2117 		eth_dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
2118 
2119 	return 0;
2120 
2121 out_err:
2122 	eth_dev->data->dev_started = 0;
2123 	return -1;
2124 }
2125 
2126 static void
2127 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2128 {
2129 	uint16_t i;
2130 
2131 	if (dev->data->rx_queues != NULL) {
2132 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
2133 			rte_free(dev->data->rx_queues[i]);
2134 			dev->data->rx_queues[i] = NULL;
2135 		}
2136 		dev->data->nb_rx_queues = 0;
2137 	}
2138 
2139 	if (dev->data->tx_queues != NULL) {
2140 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
2141 			rte_free(dev->data->tx_queues[i]);
2142 			dev->data->tx_queues[i] = NULL;
2143 		}
2144 		dev->data->nb_tx_queues = 0;
2145 	}
2146 }
2147 
2148 int
2149 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2150 {
2151 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2152 	uint16_t i;
2153 	int ret;
2154 
2155 	if (internals->mode == BONDING_MODE_8023AD) {
2156 		struct port *port;
2157 		void *pkt = NULL;
2158 
2159 		bond_mode_8023ad_stop(eth_dev);
2160 
2161 		/* Discard all messages to/from mode 4 state machines */
2162 		for (i = 0; i < internals->active_member_count; i++) {
2163 			port = &bond_mode_8023ad_ports[internals->active_members[i]];
2164 
2165 			RTE_ASSERT(port->rx_ring != NULL);
2166 			while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2167 				rte_pktmbuf_free(pkt);
2168 
2169 			RTE_ASSERT(port->tx_ring != NULL);
2170 			while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2171 				rte_pktmbuf_free(pkt);
2172 		}
2173 	}
2174 
2175 	if (internals->mode == BONDING_MODE_TLB ||
2176 			internals->mode == BONDING_MODE_ALB) {
2177 		bond_tlb_disable(internals);
2178 		for (i = 0; i < internals->active_member_count; i++)
2179 			tlb_last_obytets[internals->active_members[i]] = 0;
2180 	}
2181 
2182 	eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
2183 	eth_dev->data->dev_started = 0;
2184 
2185 	if (internals->link_status_polling_enabled) {
2186 		rte_eal_alarm_cancel(bond_ethdev_member_link_status_change_monitor,
2187 			(void *)&rte_eth_devices[internals->port_id]);
2188 	}
2189 	internals->link_status_polling_enabled = 0;
2190 	for (i = 0; i < internals->member_count; i++) {
2191 		uint16_t member_id = internals->members[i].port_id;
2192 
2193 		internals->members[i].last_link_status = 0;
2194 		ret = rte_eth_dev_stop(member_id);
2195 		if (ret != 0) {
2196 			RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2197 				     member_id);
2198 			return ret;
2199 		}
2200 
2201 		/* active members need to be deactivated. */
2202 		if (find_member_by_id(internals->active_members,
2203 				internals->active_member_count, member_id) !=
2204 					internals->active_member_count)
2205 			deactivate_member(eth_dev, member_id);
2206 	}
2207 
2208 	for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
2209 		eth_dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
2210 	for (i = 0; i < eth_dev->data->nb_tx_queues; i++)
2211 		eth_dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
2212 
2213 	return 0;
2214 }
2215 
2216 static void
2217 bond_ethdev_cfg_cleanup(struct rte_eth_dev *dev, bool remove)
2218 {
2219 	struct bond_dev_private *internals = dev->data->dev_private;
2220 	uint16_t bond_port_id = internals->port_id;
2221 	int skipped = 0;
2222 	struct rte_flow_error ferror;
2223 
2224 	/* Flush flows in all back-end devices before removing them */
2225 	bond_flow_ops.flush(dev, &ferror);
2226 
2227 	while (internals->member_count != skipped) {
2228 		uint16_t port_id = internals->members[skipped].port_id;
2229 		int ret;
2230 
2231 		ret = rte_eth_dev_stop(port_id);
2232 		if (ret != 0) {
2233 			RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2234 				     port_id);
2235 		}
2236 
2237 		if (ret != 0 || !remove) {
2238 			skipped++;
2239 			continue;
2240 		}
2241 
2242 		if (rte_eth_bond_member_remove(bond_port_id, port_id) != 0) {
2243 			RTE_BOND_LOG(ERR,
2244 				     "Failed to remove port %d from bonding device %s",
2245 				     port_id, dev->device->name);
2246 			skipped++;
2247 		}
2248 	}
2249 }
2250 
2251 int
2252 bond_ethdev_close(struct rte_eth_dev *dev)
2253 {
2254 	struct bond_dev_private *internals = dev->data->dev_private;
2255 
2256 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2257 		return 0;
2258 
2259 	RTE_BOND_LOG(INFO, "Closing bonding device %s", dev->device->name);
2260 
2261 	bond_ethdev_cfg_cleanup(dev, true);
2262 
2263 	bond_ethdev_free_queues(dev);
2264 	rte_bitmap_reset(internals->vlan_filter_bmp);
2265 	rte_bitmap_free(internals->vlan_filter_bmp);
2266 	rte_free(internals->vlan_filter_bmpmem);
2267 
2268 	/* Try to release mempool used in mode6. If the bond
2269 	 * device is not mode6, free the NULL is not problem.
2270 	 */
2271 	rte_mempool_free(internals->mode6.mempool);
2272 
2273 	rte_kvargs_free(internals->kvlist);
2274 
2275 	return 0;
2276 }
2277 
2278 /* forward declaration */
2279 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2280 
2281 static int
2282 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2283 {
2284 	struct bond_dev_private *internals = dev->data->dev_private;
2285 	struct bond_member_details member;
2286 	int ret;
2287 
2288 	uint16_t max_nb_rx_queues = UINT16_MAX;
2289 	uint16_t max_nb_tx_queues = UINT16_MAX;
2290 
2291 	dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2292 
2293 	dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2294 			internals->candidate_max_rx_pktlen :
2295 			RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2296 
2297 	/* Max number of tx/rx queues that the bonding device can support is the
2298 	 * minimum values of the bonding members, as all members must be capable
2299 	 * of supporting the same number of tx/rx queues.
2300 	 */
2301 	if (internals->member_count > 0) {
2302 		struct rte_eth_dev_info member_info;
2303 		uint16_t idx;
2304 
2305 		for (idx = 0; idx < internals->member_count; idx++) {
2306 			member = internals->members[idx];
2307 			ret = rte_eth_dev_info_get(member.port_id, &member_info);
2308 			if (ret != 0) {
2309 				RTE_BOND_LOG(ERR,
2310 					"%s: Error during getting device (port %u) info: %s",
2311 					__func__,
2312 					member.port_id,
2313 					strerror(-ret));
2314 
2315 				return ret;
2316 			}
2317 
2318 			if (member_info.max_rx_queues < max_nb_rx_queues)
2319 				max_nb_rx_queues = member_info.max_rx_queues;
2320 
2321 			if (member_info.max_tx_queues < max_nb_tx_queues)
2322 				max_nb_tx_queues = member_info.max_tx_queues;
2323 		}
2324 	}
2325 
2326 	dev_info->max_rx_queues = max_nb_rx_queues;
2327 	dev_info->max_tx_queues = max_nb_tx_queues;
2328 
2329 	memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2330 	       sizeof(dev_info->default_rxconf));
2331 	memcpy(&dev_info->default_txconf, &internals->default_txconf,
2332 	       sizeof(dev_info->default_txconf));
2333 
2334 	memcpy(&dev_info->rx_desc_lim, &internals->rx_desc_lim,
2335 	       sizeof(dev_info->rx_desc_lim));
2336 	memcpy(&dev_info->tx_desc_lim, &internals->tx_desc_lim,
2337 	       sizeof(dev_info->tx_desc_lim));
2338 
2339 	/**
2340 	 * If dedicated hw queues enabled for link bonding device in LACP mode
2341 	 * then we need to reduce the maximum number of data path queues by 1.
2342 	 */
2343 	if (internals->mode == BONDING_MODE_8023AD &&
2344 		internals->mode4.dedicated_queues.enabled == 1) {
2345 		dev_info->max_rx_queues--;
2346 		dev_info->max_tx_queues--;
2347 	}
2348 
2349 	dev_info->min_rx_bufsize = 0;
2350 
2351 	dev_info->rx_offload_capa = internals->rx_offload_capa;
2352 	dev_info->tx_offload_capa = internals->tx_offload_capa;
2353 	dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2354 	dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2355 	dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2356 
2357 	dev_info->reta_size = internals->reta_size;
2358 	dev_info->hash_key_size = internals->rss_key_len;
2359 	dev_info->speed_capa = internals->speed_capa;
2360 
2361 	return 0;
2362 }
2363 
2364 static int
2365 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2366 {
2367 	int res;
2368 	uint16_t i;
2369 	struct bond_dev_private *internals = dev->data->dev_private;
2370 
2371 	/* don't do this while a member is being added */
2372 	rte_spinlock_lock(&internals->lock);
2373 
2374 	if (on)
2375 		rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2376 	else
2377 		rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2378 
2379 	for (i = 0; i < internals->member_count; i++) {
2380 		uint16_t port_id = internals->members[i].port_id;
2381 
2382 		res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2383 		if (res == ENOTSUP)
2384 			RTE_BOND_LOG(WARNING,
2385 				     "Setting VLAN filter on member port %u not supported.",
2386 				     port_id);
2387 	}
2388 
2389 	rte_spinlock_unlock(&internals->lock);
2390 	return 0;
2391 }
2392 
2393 static int
2394 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2395 		uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2396 		const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2397 {
2398 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2399 			rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2400 					0, dev->data->numa_node);
2401 	if (bd_rx_q == NULL)
2402 		return -1;
2403 
2404 	bd_rx_q->queue_id = rx_queue_id;
2405 	bd_rx_q->dev_private = dev->data->dev_private;
2406 
2407 	bd_rx_q->nb_rx_desc = nb_rx_desc;
2408 
2409 	memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2410 	bd_rx_q->mb_pool = mb_pool;
2411 
2412 	dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2413 
2414 	return 0;
2415 }
2416 
2417 static int
2418 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2419 		uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2420 		const struct rte_eth_txconf *tx_conf)
2421 {
2422 	struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2423 			rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2424 					0, dev->data->numa_node);
2425 
2426 	if (bd_tx_q == NULL)
2427 		return -1;
2428 
2429 	bd_tx_q->queue_id = tx_queue_id;
2430 	bd_tx_q->dev_private = dev->data->dev_private;
2431 
2432 	bd_tx_q->nb_tx_desc = nb_tx_desc;
2433 	memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2434 
2435 	dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2436 
2437 	return 0;
2438 }
2439 
2440 static void
2441 bond_ethdev_rx_queue_release(struct rte_eth_dev *dev, uint16_t queue_id)
2442 {
2443 	void *queue = dev->data->rx_queues[queue_id];
2444 
2445 	if (queue == NULL)
2446 		return;
2447 
2448 	rte_free(queue);
2449 }
2450 
2451 static void
2452 bond_ethdev_tx_queue_release(struct rte_eth_dev *dev, uint16_t queue_id)
2453 {
2454 	void *queue = dev->data->tx_queues[queue_id];
2455 
2456 	if (queue == NULL)
2457 		return;
2458 
2459 	rte_free(queue);
2460 }
2461 
2462 static void
2463 bond_ethdev_member_link_status_change_monitor(void *cb_arg)
2464 {
2465 	struct rte_eth_dev *bonding_ethdev, *member_ethdev;
2466 	struct bond_dev_private *internals;
2467 
2468 	/* Default value for polling member found is true as we don't want to
2469 	 * disable the polling thread if we cannot get the lock */
2470 	int i, polling_member_found = 1;
2471 
2472 	if (cb_arg == NULL)
2473 		return;
2474 
2475 	bonding_ethdev = cb_arg;
2476 	internals = bonding_ethdev->data->dev_private;
2477 
2478 	if (!bonding_ethdev->data->dev_started ||
2479 		!internals->link_status_polling_enabled)
2480 		return;
2481 
2482 	/* If device is currently being configured then don't check members link
2483 	 * status, wait until next period */
2484 	if (rte_spinlock_trylock(&internals->lock)) {
2485 		if (internals->member_count > 0)
2486 			polling_member_found = 0;
2487 
2488 		for (i = 0; i < internals->member_count; i++) {
2489 			if (!internals->members[i].link_status_poll_enabled)
2490 				continue;
2491 
2492 			member_ethdev = &rte_eth_devices[internals->members[i].port_id];
2493 			polling_member_found = 1;
2494 
2495 			/* Update member link status */
2496 			(*member_ethdev->dev_ops->link_update)(member_ethdev,
2497 					internals->members[i].link_status_wait_to_complete);
2498 
2499 			/* if link status has changed since last checked then call lsc
2500 			 * event callback */
2501 			if (member_ethdev->data->dev_link.link_status !=
2502 					internals->members[i].last_link_status) {
2503 				bond_ethdev_lsc_event_callback(internals->members[i].port_id,
2504 						RTE_ETH_EVENT_INTR_LSC,
2505 						&bonding_ethdev->data->port_id,
2506 						NULL);
2507 			}
2508 		}
2509 		rte_spinlock_unlock(&internals->lock);
2510 	}
2511 
2512 	if (polling_member_found)
2513 		/* Set alarm to continue monitoring link status of member ethdev's */
2514 		rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2515 				bond_ethdev_member_link_status_change_monitor, cb_arg);
2516 }
2517 
2518 static int
2519 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2520 {
2521 	int (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2522 
2523 	struct bond_dev_private *bond_ctx;
2524 	struct rte_eth_link member_link;
2525 
2526 	bool one_link_update_succeeded;
2527 	uint32_t idx;
2528 	int ret;
2529 
2530 	bond_ctx = ethdev->data->dev_private;
2531 
2532 	ethdev->data->dev_link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2533 
2534 	if (ethdev->data->dev_started == 0 ||
2535 			bond_ctx->active_member_count == 0) {
2536 		ethdev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
2537 		return 0;
2538 	}
2539 
2540 	ethdev->data->dev_link.link_status = RTE_ETH_LINK_UP;
2541 
2542 	if (wait_to_complete)
2543 		link_update = rte_eth_link_get;
2544 	else
2545 		link_update = rte_eth_link_get_nowait;
2546 
2547 	switch (bond_ctx->mode) {
2548 	case BONDING_MODE_BROADCAST:
2549 		/**
2550 		 * Setting link speed to UINT32_MAX to ensure we pick up the
2551 		 * value of the first active member
2552 		 */
2553 		ethdev->data->dev_link.link_speed = UINT32_MAX;
2554 
2555 		/**
2556 		 * link speed is minimum value of all the members link speed as
2557 		 * packet loss will occur on this member if transmission at rates
2558 		 * greater than this are attempted
2559 		 */
2560 		for (idx = 0; idx < bond_ctx->active_member_count; idx++) {
2561 			ret = link_update(bond_ctx->active_members[idx],
2562 					  &member_link);
2563 			if (ret < 0) {
2564 				ethdev->data->dev_link.link_speed =
2565 					RTE_ETH_SPEED_NUM_NONE;
2566 				RTE_BOND_LOG(ERR,
2567 					"Member (port %u) link get failed: %s",
2568 					bond_ctx->active_members[idx],
2569 					rte_strerror(-ret));
2570 				return 0;
2571 			}
2572 
2573 			if (member_link.link_speed <
2574 					ethdev->data->dev_link.link_speed)
2575 				ethdev->data->dev_link.link_speed =
2576 						member_link.link_speed;
2577 		}
2578 		break;
2579 	case BONDING_MODE_ACTIVE_BACKUP:
2580 		/* Current primary member */
2581 		ret = link_update(bond_ctx->current_primary_port, &member_link);
2582 		if (ret < 0) {
2583 			RTE_BOND_LOG(ERR, "Member (port %u) link get failed: %s",
2584 				bond_ctx->current_primary_port,
2585 				rte_strerror(-ret));
2586 			return 0;
2587 		}
2588 
2589 		ethdev->data->dev_link.link_speed = member_link.link_speed;
2590 		break;
2591 	case BONDING_MODE_8023AD:
2592 		ethdev->data->dev_link.link_autoneg =
2593 				bond_ctx->mode4.member_link.link_autoneg;
2594 		ethdev->data->dev_link.link_duplex =
2595 				bond_ctx->mode4.member_link.link_duplex;
2596 		/* fall through */
2597 		/* to update link speed */
2598 	case BONDING_MODE_ROUND_ROBIN:
2599 	case BONDING_MODE_BALANCE:
2600 	case BONDING_MODE_TLB:
2601 	case BONDING_MODE_ALB:
2602 	default:
2603 		/**
2604 		 * In theses mode the maximum theoretical link speed is the sum
2605 		 * of all the members
2606 		 */
2607 		ethdev->data->dev_link.link_speed = RTE_ETH_SPEED_NUM_NONE;
2608 		one_link_update_succeeded = false;
2609 
2610 		for (idx = 0; idx < bond_ctx->active_member_count; idx++) {
2611 			ret = link_update(bond_ctx->active_members[idx],
2612 					&member_link);
2613 			if (ret < 0) {
2614 				RTE_BOND_LOG(ERR,
2615 					"Member (port %u) link get failed: %s",
2616 					bond_ctx->active_members[idx],
2617 					rte_strerror(-ret));
2618 				continue;
2619 			}
2620 
2621 			one_link_update_succeeded = true;
2622 			ethdev->data->dev_link.link_speed +=
2623 					member_link.link_speed;
2624 		}
2625 
2626 		if (!one_link_update_succeeded) {
2627 			RTE_BOND_LOG(ERR, "All members link get failed");
2628 			return 0;
2629 		}
2630 	}
2631 
2632 
2633 	return 0;
2634 }
2635 
2636 
2637 static int
2638 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2639 {
2640 	struct bond_dev_private *internals = dev->data->dev_private;
2641 	struct rte_eth_stats member_stats;
2642 	int i, j;
2643 
2644 	for (i = 0; i < internals->member_count; i++) {
2645 		rte_eth_stats_get(internals->members[i].port_id, &member_stats);
2646 
2647 		stats->ipackets += member_stats.ipackets;
2648 		stats->opackets += member_stats.opackets;
2649 		stats->ibytes += member_stats.ibytes;
2650 		stats->obytes += member_stats.obytes;
2651 		stats->imissed += member_stats.imissed;
2652 		stats->ierrors += member_stats.ierrors;
2653 		stats->oerrors += member_stats.oerrors;
2654 		stats->rx_nombuf += member_stats.rx_nombuf;
2655 
2656 		for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2657 			stats->q_ipackets[j] += member_stats.q_ipackets[j];
2658 			stats->q_opackets[j] += member_stats.q_opackets[j];
2659 			stats->q_ibytes[j] += member_stats.q_ibytes[j];
2660 			stats->q_obytes[j] += member_stats.q_obytes[j];
2661 			stats->q_errors[j] += member_stats.q_errors[j];
2662 		}
2663 
2664 	}
2665 
2666 	return 0;
2667 }
2668 
2669 static int
2670 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2671 {
2672 	struct bond_dev_private *internals = dev->data->dev_private;
2673 	int i;
2674 	int err;
2675 	int ret;
2676 
2677 	for (i = 0, err = 0; i < internals->member_count; i++) {
2678 		ret = rte_eth_stats_reset(internals->members[i].port_id);
2679 		if (ret != 0)
2680 			err = ret;
2681 	}
2682 
2683 	return err;
2684 }
2685 
2686 static int
2687 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2688 {
2689 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2690 	int i;
2691 	int ret = 0;
2692 	uint16_t port_id;
2693 
2694 	switch (internals->mode) {
2695 	/* Promiscuous mode is propagated to all members */
2696 	case BONDING_MODE_ROUND_ROBIN:
2697 	case BONDING_MODE_BALANCE:
2698 	case BONDING_MODE_BROADCAST:
2699 	case BONDING_MODE_8023AD: {
2700 		unsigned int member_ok = 0;
2701 
2702 		for (i = 0; i < internals->member_count; i++) {
2703 			port_id = internals->members[i].port_id;
2704 
2705 			ret = rte_eth_promiscuous_enable(port_id);
2706 			if (ret != 0)
2707 				RTE_BOND_LOG(ERR,
2708 					"Failed to enable promiscuous mode for port %u: %s",
2709 					port_id, rte_strerror(-ret));
2710 			else
2711 				member_ok++;
2712 		}
2713 		/*
2714 		 * Report success if operation is successful on at least
2715 		 * on one member. Otherwise return last error code.
2716 		 */
2717 		if (member_ok > 0)
2718 			ret = 0;
2719 		break;
2720 	}
2721 	/* Promiscuous mode is propagated only to primary member */
2722 	case BONDING_MODE_ACTIVE_BACKUP:
2723 	case BONDING_MODE_TLB:
2724 	case BONDING_MODE_ALB:
2725 	default:
2726 		/* Do not touch promisc when there cannot be primary ports */
2727 		if (internals->member_count == 0)
2728 			break;
2729 		port_id = internals->current_primary_port;
2730 		ret = rte_eth_promiscuous_enable(port_id);
2731 		if (ret != 0)
2732 			RTE_BOND_LOG(ERR,
2733 				"Failed to enable promiscuous mode for port %u: %s",
2734 				port_id, rte_strerror(-ret));
2735 	}
2736 
2737 	return ret;
2738 }
2739 
2740 static int
2741 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2742 {
2743 	struct bond_dev_private *internals = dev->data->dev_private;
2744 	int i;
2745 	int ret = 0;
2746 	uint16_t port_id;
2747 
2748 	switch (internals->mode) {
2749 	/* Promiscuous mode is propagated to all members */
2750 	case BONDING_MODE_ROUND_ROBIN:
2751 	case BONDING_MODE_BALANCE:
2752 	case BONDING_MODE_BROADCAST:
2753 	case BONDING_MODE_8023AD: {
2754 		unsigned int member_ok = 0;
2755 
2756 		for (i = 0; i < internals->member_count; i++) {
2757 			port_id = internals->members[i].port_id;
2758 
2759 			if (internals->mode == BONDING_MODE_8023AD &&
2760 			    bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2761 					BOND_8023AD_FORCED_PROMISC) {
2762 				member_ok++;
2763 				continue;
2764 			}
2765 			ret = rte_eth_promiscuous_disable(port_id);
2766 			if (ret != 0)
2767 				RTE_BOND_LOG(ERR,
2768 					"Failed to disable promiscuous mode for port %u: %s",
2769 					port_id, rte_strerror(-ret));
2770 			else
2771 				member_ok++;
2772 		}
2773 		/*
2774 		 * Report success if operation is successful on at least
2775 		 * on one member. Otherwise return last error code.
2776 		 */
2777 		if (member_ok > 0)
2778 			ret = 0;
2779 		break;
2780 	}
2781 	/* Promiscuous mode is propagated only to primary member */
2782 	case BONDING_MODE_ACTIVE_BACKUP:
2783 	case BONDING_MODE_TLB:
2784 	case BONDING_MODE_ALB:
2785 	default:
2786 		/* Do not touch promisc when there cannot be primary ports */
2787 		if (internals->member_count == 0)
2788 			break;
2789 		port_id = internals->current_primary_port;
2790 		ret = rte_eth_promiscuous_disable(port_id);
2791 		if (ret != 0)
2792 			RTE_BOND_LOG(ERR,
2793 				"Failed to disable promiscuous mode for port %u: %s",
2794 				port_id, rte_strerror(-ret));
2795 	}
2796 
2797 	return ret;
2798 }
2799 
2800 static int
2801 bond_ethdev_promiscuous_update(struct rte_eth_dev *dev)
2802 {
2803 	struct bond_dev_private *internals = dev->data->dev_private;
2804 	uint16_t port_id = internals->current_primary_port;
2805 	int ret;
2806 
2807 	switch (internals->mode) {
2808 	case BONDING_MODE_ROUND_ROBIN:
2809 	case BONDING_MODE_BALANCE:
2810 	case BONDING_MODE_BROADCAST:
2811 	case BONDING_MODE_8023AD:
2812 		/* As promiscuous mode is propagated to all members for these
2813 		 * mode, no need to update for bonding device.
2814 		 */
2815 		break;
2816 	case BONDING_MODE_ACTIVE_BACKUP:
2817 	case BONDING_MODE_TLB:
2818 	case BONDING_MODE_ALB:
2819 	default:
2820 		/* As promiscuous mode is propagated only to primary member
2821 		 * for these mode. When active/standby switchover, promiscuous
2822 		 * mode should be set to new primary member according to bonding
2823 		 * device.
2824 		 */
2825 		if (rte_eth_promiscuous_get(internals->port_id) == 1) {
2826 			ret = rte_eth_promiscuous_enable(port_id);
2827 			if (ret != 0)
2828 				RTE_BOND_LOG(ERR,
2829 					     "Failed to enable promiscuous mode for port %u: %s",
2830 					     port_id, rte_strerror(-ret));
2831 		} else {
2832 			ret = rte_eth_promiscuous_disable(port_id);
2833 			if (ret != 0)
2834 				RTE_BOND_LOG(ERR,
2835 					     "Failed to disable promiscuous mode for port %u: %s",
2836 					     port_id, rte_strerror(-ret));
2837 		}
2838 	}
2839 
2840 	return 0;
2841 }
2842 
2843 static int
2844 bond_ethdev_allmulticast_enable(struct rte_eth_dev *eth_dev)
2845 {
2846 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2847 	int i;
2848 	int ret = 0;
2849 	uint16_t port_id;
2850 
2851 	switch (internals->mode) {
2852 	/* allmulti mode is propagated to all members */
2853 	case BONDING_MODE_ROUND_ROBIN:
2854 	case BONDING_MODE_BALANCE:
2855 	case BONDING_MODE_BROADCAST:
2856 	case BONDING_MODE_8023AD: {
2857 		unsigned int member_ok = 0;
2858 
2859 		for (i = 0; i < internals->member_count; i++) {
2860 			port_id = internals->members[i].port_id;
2861 
2862 			ret = rte_eth_allmulticast_enable(port_id);
2863 			if (ret != 0)
2864 				RTE_BOND_LOG(ERR,
2865 					"Failed to enable allmulti mode for port %u: %s",
2866 					port_id, rte_strerror(-ret));
2867 			else
2868 				member_ok++;
2869 		}
2870 		/*
2871 		 * Report success if operation is successful on at least
2872 		 * on one member. Otherwise return last error code.
2873 		 */
2874 		if (member_ok > 0)
2875 			ret = 0;
2876 		break;
2877 	}
2878 	/* allmulti mode is propagated only to primary member */
2879 	case BONDING_MODE_ACTIVE_BACKUP:
2880 	case BONDING_MODE_TLB:
2881 	case BONDING_MODE_ALB:
2882 	default:
2883 		/* Do not touch allmulti when there cannot be primary ports */
2884 		if (internals->member_count == 0)
2885 			break;
2886 		port_id = internals->current_primary_port;
2887 		ret = rte_eth_allmulticast_enable(port_id);
2888 		if (ret != 0)
2889 			RTE_BOND_LOG(ERR,
2890 				"Failed to enable allmulti mode for port %u: %s",
2891 				port_id, rte_strerror(-ret));
2892 	}
2893 
2894 	return ret;
2895 }
2896 
2897 static int
2898 bond_ethdev_allmulticast_disable(struct rte_eth_dev *eth_dev)
2899 {
2900 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2901 	int i;
2902 	int ret = 0;
2903 	uint16_t port_id;
2904 
2905 	switch (internals->mode) {
2906 	/* allmulti mode is propagated to all members */
2907 	case BONDING_MODE_ROUND_ROBIN:
2908 	case BONDING_MODE_BALANCE:
2909 	case BONDING_MODE_BROADCAST:
2910 	case BONDING_MODE_8023AD: {
2911 		unsigned int member_ok = 0;
2912 
2913 		for (i = 0; i < internals->member_count; i++) {
2914 			uint16_t port_id = internals->members[i].port_id;
2915 
2916 			if (internals->mode == BONDING_MODE_8023AD &&
2917 			    bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2918 					BOND_8023AD_FORCED_ALLMULTI)
2919 				continue;
2920 
2921 			ret = rte_eth_allmulticast_disable(port_id);
2922 			if (ret != 0)
2923 				RTE_BOND_LOG(ERR,
2924 					"Failed to disable allmulti mode for port %u: %s",
2925 					port_id, rte_strerror(-ret));
2926 			else
2927 				member_ok++;
2928 		}
2929 		/*
2930 		 * Report success if operation is successful on at least
2931 		 * on one member. Otherwise return last error code.
2932 		 */
2933 		if (member_ok > 0)
2934 			ret = 0;
2935 		break;
2936 	}
2937 	/* allmulti mode is propagated only to primary member */
2938 	case BONDING_MODE_ACTIVE_BACKUP:
2939 	case BONDING_MODE_TLB:
2940 	case BONDING_MODE_ALB:
2941 	default:
2942 		/* Do not touch allmulti when there cannot be primary ports */
2943 		if (internals->member_count == 0)
2944 			break;
2945 		port_id = internals->current_primary_port;
2946 		ret = rte_eth_allmulticast_disable(port_id);
2947 		if (ret != 0)
2948 			RTE_BOND_LOG(ERR,
2949 				"Failed to disable allmulti mode for port %u: %s",
2950 				port_id, rte_strerror(-ret));
2951 	}
2952 
2953 	return ret;
2954 }
2955 
2956 static int
2957 bond_ethdev_allmulticast_update(struct rte_eth_dev *dev)
2958 {
2959 	struct bond_dev_private *internals = dev->data->dev_private;
2960 	uint16_t port_id = internals->current_primary_port;
2961 
2962 	switch (internals->mode) {
2963 	case BONDING_MODE_ROUND_ROBIN:
2964 	case BONDING_MODE_BALANCE:
2965 	case BONDING_MODE_BROADCAST:
2966 	case BONDING_MODE_8023AD:
2967 		/* As allmulticast mode is propagated to all members for these
2968 		 * mode, no need to update for bonding device.
2969 		 */
2970 		break;
2971 	case BONDING_MODE_ACTIVE_BACKUP:
2972 	case BONDING_MODE_TLB:
2973 	case BONDING_MODE_ALB:
2974 	default:
2975 		/* As allmulticast mode is propagated only to primary member
2976 		 * for these mode. When active/standby switchover, allmulticast
2977 		 * mode should be set to new primary member according to bonding
2978 		 * device.
2979 		 */
2980 		if (rte_eth_allmulticast_get(internals->port_id) == 1)
2981 			rte_eth_allmulticast_enable(port_id);
2982 		else
2983 			rte_eth_allmulticast_disable(port_id);
2984 	}
2985 
2986 	return 0;
2987 }
2988 
2989 static void
2990 bond_ethdev_delayed_lsc_propagation(void *arg)
2991 {
2992 	if (arg == NULL)
2993 		return;
2994 
2995 	rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2996 			RTE_ETH_EVENT_INTR_LSC, NULL);
2997 }
2998 
2999 int
3000 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
3001 		void *param, void *ret_param __rte_unused)
3002 {
3003 	struct rte_eth_dev *bonding_eth_dev;
3004 	struct bond_dev_private *internals;
3005 	struct rte_eth_link link;
3006 	int rc = -1;
3007 	int ret;
3008 
3009 	uint8_t lsc_flag = 0;
3010 	int valid_member = 0;
3011 	uint16_t active_pos, member_idx;
3012 	uint16_t i;
3013 
3014 	if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
3015 		return rc;
3016 
3017 	bonding_eth_dev = &rte_eth_devices[*(uint16_t *)param];
3018 
3019 	if (check_for_bonding_ethdev(bonding_eth_dev))
3020 		return rc;
3021 
3022 	internals = bonding_eth_dev->data->dev_private;
3023 
3024 	/* If the device isn't started don't handle interrupts */
3025 	if (!bonding_eth_dev->data->dev_started)
3026 		return rc;
3027 
3028 	/* verify that port_id is a valid member of bonding port */
3029 	for (i = 0; i < internals->member_count; i++) {
3030 		if (internals->members[i].port_id == port_id) {
3031 			valid_member = 1;
3032 			member_idx = i;
3033 			break;
3034 		}
3035 	}
3036 
3037 	if (!valid_member)
3038 		return rc;
3039 
3040 	/* Synchronize lsc callback parallel calls either by real link event
3041 	 * from the members PMDs or by the bonding PMD itself.
3042 	 */
3043 	rte_spinlock_lock(&internals->lsc_lock);
3044 
3045 	/* Search for port in active port list */
3046 	active_pos = find_member_by_id(internals->active_members,
3047 			internals->active_member_count, port_id);
3048 
3049 	ret = rte_eth_link_get_nowait(port_id, &link);
3050 	if (ret < 0)
3051 		RTE_BOND_LOG(ERR, "Member (port %u) link get failed", port_id);
3052 
3053 	if (ret == 0 && link.link_status) {
3054 		if (active_pos < internals->active_member_count)
3055 			goto link_update;
3056 
3057 		/* check link state properties if bonding link is up*/
3058 		if (bonding_eth_dev->data->dev_link.link_status == RTE_ETH_LINK_UP) {
3059 			if (link_properties_valid(bonding_eth_dev, &link) != 0)
3060 				RTE_BOND_LOG(ERR, "Invalid link properties "
3061 					     "for member %d in bonding mode %d",
3062 					     port_id, internals->mode);
3063 		} else {
3064 			/* inherit member link properties */
3065 			link_properties_set(bonding_eth_dev, &link);
3066 		}
3067 
3068 		/* If no active member ports then set this port to be
3069 		 * the primary port.
3070 		 */
3071 		if (internals->active_member_count < 1) {
3072 			/* If first active member, then change link status */
3073 			bonding_eth_dev->data->dev_link.link_status =
3074 								RTE_ETH_LINK_UP;
3075 			internals->current_primary_port = port_id;
3076 			lsc_flag = 1;
3077 
3078 			mac_address_members_update(bonding_eth_dev);
3079 			bond_ethdev_promiscuous_update(bonding_eth_dev);
3080 			bond_ethdev_allmulticast_update(bonding_eth_dev);
3081 		}
3082 
3083 		activate_member(bonding_eth_dev, port_id);
3084 
3085 		/* If the user has defined the primary port then default to
3086 		 * using it.
3087 		 */
3088 		if (internals->user_defined_primary_port &&
3089 				internals->primary_port == port_id)
3090 			bond_ethdev_primary_set(internals, port_id);
3091 	} else {
3092 		if (active_pos == internals->active_member_count)
3093 			goto link_update;
3094 
3095 		/* Remove from active member list */
3096 		deactivate_member(bonding_eth_dev, port_id);
3097 
3098 		if (internals->active_member_count < 1)
3099 			lsc_flag = 1;
3100 
3101 		/* Update primary id, take first active member from list or if none
3102 		 * available set to -1 */
3103 		if (port_id == internals->current_primary_port) {
3104 			if (internals->active_member_count > 0)
3105 				bond_ethdev_primary_set(internals,
3106 						internals->active_members[0]);
3107 			else
3108 				internals->current_primary_port = internals->primary_port;
3109 			mac_address_members_update(bonding_eth_dev);
3110 			bond_ethdev_promiscuous_update(bonding_eth_dev);
3111 			bond_ethdev_allmulticast_update(bonding_eth_dev);
3112 		}
3113 	}
3114 
3115 link_update:
3116 	/**
3117 	 * Update bonding device link properties after any change to active
3118 	 * members
3119 	 */
3120 	bond_ethdev_link_update(bonding_eth_dev, 0);
3121 	internals->members[member_idx].last_link_status = link.link_status;
3122 
3123 	if (lsc_flag) {
3124 		/* Cancel any possible outstanding interrupts if delays are enabled */
3125 		if (internals->link_up_delay_ms > 0 ||
3126 			internals->link_down_delay_ms > 0)
3127 			rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
3128 					bonding_eth_dev);
3129 
3130 		if (bonding_eth_dev->data->dev_link.link_status) {
3131 			if (internals->link_up_delay_ms > 0)
3132 				rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
3133 						bond_ethdev_delayed_lsc_propagation,
3134 						(void *)bonding_eth_dev);
3135 			else
3136 				rte_eth_dev_callback_process(bonding_eth_dev,
3137 						RTE_ETH_EVENT_INTR_LSC,
3138 						NULL);
3139 
3140 		} else {
3141 			if (internals->link_down_delay_ms > 0)
3142 				rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
3143 						bond_ethdev_delayed_lsc_propagation,
3144 						(void *)bonding_eth_dev);
3145 			else
3146 				rte_eth_dev_callback_process(bonding_eth_dev,
3147 						RTE_ETH_EVENT_INTR_LSC,
3148 						NULL);
3149 		}
3150 	}
3151 
3152 	rte_spinlock_unlock(&internals->lsc_lock);
3153 
3154 	return rc;
3155 }
3156 
3157 static int
3158 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
3159 		struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
3160 {
3161 	unsigned i, j;
3162 	int result = 0;
3163 	int member_reta_size;
3164 	unsigned reta_count;
3165 	struct bond_dev_private *internals = dev->data->dev_private;
3166 
3167 	if (reta_size != internals->reta_size)
3168 		return -EINVAL;
3169 
3170 	 /* Copy RETA table */
3171 	reta_count = (reta_size + RTE_ETH_RETA_GROUP_SIZE - 1) /
3172 			RTE_ETH_RETA_GROUP_SIZE;
3173 
3174 	for (i = 0; i < reta_count; i++) {
3175 		internals->reta_conf[i].mask = reta_conf[i].mask;
3176 		for (j = 0; j < RTE_ETH_RETA_GROUP_SIZE; j++)
3177 			if ((reta_conf[i].mask >> j) & 0x01)
3178 				internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
3179 	}
3180 
3181 	/* Fill rest of array */
3182 	for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
3183 		memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
3184 				sizeof(internals->reta_conf[0]) * reta_count);
3185 
3186 	/* Propagate RETA over members */
3187 	for (i = 0; i < internals->member_count; i++) {
3188 		member_reta_size = internals->members[i].reta_size;
3189 		result = rte_eth_dev_rss_reta_update(internals->members[i].port_id,
3190 				&internals->reta_conf[0], member_reta_size);
3191 		if (result < 0)
3192 			return result;
3193 	}
3194 
3195 	return 0;
3196 }
3197 
3198 static int
3199 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
3200 		struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
3201 {
3202 	int i, j;
3203 	struct bond_dev_private *internals = dev->data->dev_private;
3204 
3205 	if (reta_size != internals->reta_size)
3206 		return -EINVAL;
3207 
3208 	 /* Copy RETA table */
3209 	for (i = 0; i < reta_size / RTE_ETH_RETA_GROUP_SIZE; i++)
3210 		for (j = 0; j < RTE_ETH_RETA_GROUP_SIZE; j++)
3211 			if ((reta_conf[i].mask >> j) & 0x01)
3212 				reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
3213 
3214 	return 0;
3215 }
3216 
3217 static int
3218 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
3219 		struct rte_eth_rss_conf *rss_conf)
3220 {
3221 	int i, result = 0;
3222 	struct bond_dev_private *internals = dev->data->dev_private;
3223 	struct rte_eth_rss_conf bond_rss_conf;
3224 
3225 	memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
3226 
3227 	bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
3228 
3229 	if (bond_rss_conf.rss_hf != 0)
3230 		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
3231 
3232 	if (bond_rss_conf.rss_key) {
3233 		if (bond_rss_conf.rss_key_len < internals->rss_key_len)
3234 			return -EINVAL;
3235 		else if (bond_rss_conf.rss_key_len > internals->rss_key_len)
3236 			RTE_BOND_LOG(WARNING, "rss_key will be truncated");
3237 
3238 		memcpy(internals->rss_key, bond_rss_conf.rss_key,
3239 				internals->rss_key_len);
3240 		bond_rss_conf.rss_key_len = internals->rss_key_len;
3241 	}
3242 
3243 	for (i = 0; i < internals->member_count; i++) {
3244 		result = rte_eth_dev_rss_hash_update(internals->members[i].port_id,
3245 				&bond_rss_conf);
3246 		if (result < 0)
3247 			return result;
3248 	}
3249 
3250 	return 0;
3251 }
3252 
3253 static int
3254 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
3255 		struct rte_eth_rss_conf *rss_conf)
3256 {
3257 	struct bond_dev_private *internals = dev->data->dev_private;
3258 
3259 	rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
3260 	rss_conf->rss_key_len = internals->rss_key_len;
3261 	if (rss_conf->rss_key)
3262 		memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
3263 
3264 	return 0;
3265 }
3266 
3267 static int
3268 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
3269 {
3270 	struct rte_eth_dev *member_eth_dev;
3271 	struct bond_dev_private *internals = dev->data->dev_private;
3272 	int ret, i;
3273 
3274 	rte_spinlock_lock(&internals->lock);
3275 
3276 	for (i = 0; i < internals->member_count; i++) {
3277 		member_eth_dev = &rte_eth_devices[internals->members[i].port_id];
3278 		if (*member_eth_dev->dev_ops->mtu_set == NULL) {
3279 			rte_spinlock_unlock(&internals->lock);
3280 			return -ENOTSUP;
3281 		}
3282 	}
3283 	for (i = 0; i < internals->member_count; i++) {
3284 		ret = rte_eth_dev_set_mtu(internals->members[i].port_id, mtu);
3285 		if (ret < 0) {
3286 			rte_spinlock_unlock(&internals->lock);
3287 			return ret;
3288 		}
3289 	}
3290 
3291 	rte_spinlock_unlock(&internals->lock);
3292 	return 0;
3293 }
3294 
3295 static int
3296 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
3297 			struct rte_ether_addr *addr)
3298 {
3299 	if (mac_address_set(dev, addr)) {
3300 		RTE_BOND_LOG(ERR, "Failed to update MAC address");
3301 		return -EINVAL;
3302 	}
3303 
3304 	return 0;
3305 }
3306 
3307 static int
3308 bond_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
3309 		  const struct rte_flow_ops **ops)
3310 {
3311 	*ops = &bond_flow_ops;
3312 	return 0;
3313 }
3314 
3315 static int
3316 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
3317 			struct rte_ether_addr *mac_addr,
3318 			__rte_unused uint32_t index, uint32_t vmdq)
3319 {
3320 	struct rte_eth_dev *member_eth_dev;
3321 	struct bond_dev_private *internals = dev->data->dev_private;
3322 	int ret, i;
3323 
3324 	rte_spinlock_lock(&internals->lock);
3325 
3326 	for (i = 0; i < internals->member_count; i++) {
3327 		member_eth_dev = &rte_eth_devices[internals->members[i].port_id];
3328 		if (*member_eth_dev->dev_ops->mac_addr_add == NULL ||
3329 			 *member_eth_dev->dev_ops->mac_addr_remove == NULL) {
3330 			ret = -ENOTSUP;
3331 			goto end;
3332 		}
3333 	}
3334 
3335 	for (i = 0; i < internals->member_count; i++) {
3336 		ret = rte_eth_dev_mac_addr_add(internals->members[i].port_id,
3337 				mac_addr, vmdq);
3338 		if (ret < 0) {
3339 			/* rollback */
3340 			for (i--; i >= 0; i--)
3341 				rte_eth_dev_mac_addr_remove(
3342 					internals->members[i].port_id, mac_addr);
3343 			goto end;
3344 		}
3345 	}
3346 
3347 	ret = 0;
3348 end:
3349 	rte_spinlock_unlock(&internals->lock);
3350 	return ret;
3351 }
3352 
3353 static void
3354 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
3355 {
3356 	struct rte_eth_dev *member_eth_dev;
3357 	struct bond_dev_private *internals = dev->data->dev_private;
3358 	int i;
3359 
3360 	rte_spinlock_lock(&internals->lock);
3361 
3362 	for (i = 0; i < internals->member_count; i++) {
3363 		member_eth_dev = &rte_eth_devices[internals->members[i].port_id];
3364 		if (*member_eth_dev->dev_ops->mac_addr_remove == NULL)
3365 			goto end;
3366 	}
3367 
3368 	struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
3369 
3370 	for (i = 0; i < internals->member_count; i++)
3371 		rte_eth_dev_mac_addr_remove(internals->members[i].port_id,
3372 				mac_addr);
3373 
3374 end:
3375 	rte_spinlock_unlock(&internals->lock);
3376 }
3377 
3378 static const char *
3379 bond_mode_name(uint8_t mode)
3380 {
3381 	switch (mode) {
3382 	case BONDING_MODE_ROUND_ROBIN:
3383 		return "ROUND_ROBIN";
3384 	case BONDING_MODE_ACTIVE_BACKUP:
3385 		return "ACTIVE_BACKUP";
3386 	case BONDING_MODE_BALANCE:
3387 		return "BALANCE";
3388 	case BONDING_MODE_BROADCAST:
3389 		return "BROADCAST";
3390 	case BONDING_MODE_8023AD:
3391 		return "8023AD";
3392 	case BONDING_MODE_TLB:
3393 		return "TLB";
3394 	case BONDING_MODE_ALB:
3395 		return "ALB";
3396 	default:
3397 		return "Unknown";
3398 	}
3399 }
3400 
3401 static void
3402 dump_basic(const struct rte_eth_dev *dev, FILE *f)
3403 {
3404 	struct bond_dev_private instant_priv;
3405 	const struct bond_dev_private *internals = &instant_priv;
3406 	int mode, i;
3407 
3408 	/* Obtain a instance of dev_private to prevent data from being modified. */
3409 	memcpy(&instant_priv, dev->data->dev_private, sizeof(struct bond_dev_private));
3410 	mode = internals->mode;
3411 
3412 	fprintf(f, "  - Dev basic:\n");
3413 	fprintf(f, "\tBonding mode: %s(%d)\n", bond_mode_name(mode), mode);
3414 
3415 	if (mode == BONDING_MODE_BALANCE || mode == BONDING_MODE_8023AD) {
3416 		fprintf(f, "\tBalance Xmit Policy: ");
3417 		switch (internals->balance_xmit_policy) {
3418 		case BALANCE_XMIT_POLICY_LAYER2:
3419 			fprintf(f, "BALANCE_XMIT_POLICY_LAYER2");
3420 			break;
3421 		case BALANCE_XMIT_POLICY_LAYER23:
3422 			fprintf(f, "BALANCE_XMIT_POLICY_LAYER23");
3423 			break;
3424 		case BALANCE_XMIT_POLICY_LAYER34:
3425 			fprintf(f, "BALANCE_XMIT_POLICY_LAYER34");
3426 			break;
3427 		default:
3428 			fprintf(f, "Unknown");
3429 		}
3430 		fprintf(f, "\n");
3431 	}
3432 
3433 	if (mode == BONDING_MODE_8023AD) {
3434 		fprintf(f, "\tIEEE802.3AD Aggregator Mode: ");
3435 		switch (internals->mode4.agg_selection) {
3436 		case AGG_BANDWIDTH:
3437 			fprintf(f, "bandwidth");
3438 			break;
3439 		case AGG_STABLE:
3440 			fprintf(f, "stable");
3441 			break;
3442 		case AGG_COUNT:
3443 			fprintf(f, "count");
3444 			break;
3445 		default:
3446 			fprintf(f, "unknown");
3447 		}
3448 		fprintf(f, "\n");
3449 	}
3450 
3451 	if (internals->member_count > 0) {
3452 		fprintf(f, "\tMembers (%u): [", internals->member_count);
3453 		for (i = 0; i < internals->member_count - 1; i++)
3454 			fprintf(f, "%u ", internals->members[i].port_id);
3455 
3456 		fprintf(f, "%u]\n", internals->members[internals->member_count - 1].port_id);
3457 	} else {
3458 		fprintf(f, "\tMembers: []\n");
3459 	}
3460 
3461 	if (internals->active_member_count > 0) {
3462 		fprintf(f, "\tActive Members (%u): [", internals->active_member_count);
3463 		for (i = 0; i < internals->active_member_count - 1; i++)
3464 			fprintf(f, "%u ", internals->active_members[i]);
3465 
3466 		fprintf(f, "%u]\n", internals->active_members[internals->active_member_count - 1]);
3467 
3468 	} else {
3469 		fprintf(f, "\tActive Members: []\n");
3470 	}
3471 
3472 	if (internals->user_defined_primary_port)
3473 		fprintf(f, "\tUser Defined Primary: [%u]\n", internals->primary_port);
3474 	if (internals->member_count > 0)
3475 		fprintf(f, "\tCurrent Primary: [%u]\n", internals->current_primary_port);
3476 }
3477 
3478 static void
3479 dump_lacp_conf(const struct rte_eth_bond_8023ad_conf *conf, FILE *f)
3480 {
3481 	fprintf(f, "\tfast period: %u ms\n", conf->fast_periodic_ms);
3482 	fprintf(f, "\tslow period: %u ms\n", conf->slow_periodic_ms);
3483 	fprintf(f, "\tshort timeout: %u ms\n", conf->short_timeout_ms);
3484 	fprintf(f, "\tlong timeout: %u ms\n", conf->long_timeout_ms);
3485 	fprintf(f, "\taggregate wait timeout: %u ms\n",
3486 			conf->aggregate_wait_timeout_ms);
3487 	fprintf(f, "\ttx period: %u ms\n", conf->tx_period_ms);
3488 	fprintf(f, "\trx marker period: %u ms\n", conf->rx_marker_period_ms);
3489 	fprintf(f, "\tupdate timeout: %u ms\n", conf->update_timeout_ms);
3490 	switch (conf->agg_selection) {
3491 	case AGG_BANDWIDTH:
3492 		fprintf(f, "\taggregation mode: bandwidth\n");
3493 		break;
3494 	case AGG_STABLE:
3495 		fprintf(f, "\taggregation mode: stable\n");
3496 		break;
3497 	case AGG_COUNT:
3498 		fprintf(f, "\taggregation mode: count\n");
3499 		break;
3500 	default:
3501 		fprintf(f, "\taggregation mode: invalid\n");
3502 		break;
3503 	}
3504 	fprintf(f, "\n");
3505 }
3506 
3507 static void
3508 dump_lacp_port_param(const struct port_params *params, FILE *f)
3509 {
3510 	char buf[RTE_ETHER_ADDR_FMT_SIZE];
3511 	fprintf(f, "\t\tsystem priority: %u\n", params->system_priority);
3512 	rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, &params->system);
3513 	fprintf(f, "\t\tsystem mac address: %s\n", buf);
3514 	fprintf(f, "\t\tport key: %u\n", params->key);
3515 	fprintf(f, "\t\tport priority: %u\n", params->port_priority);
3516 	fprintf(f, "\t\tport number: %u\n", params->port_number);
3517 }
3518 
3519 static void
3520 dump_lacp_member(const struct rte_eth_bond_8023ad_member_info *info, FILE *f)
3521 {
3522 	char a_state[256] = { 0 };
3523 	char p_state[256] = { 0 };
3524 	int a_len = 0;
3525 	int p_len = 0;
3526 	uint32_t i;
3527 
3528 	static const char * const state[] = {
3529 		"ACTIVE",
3530 		"TIMEOUT",
3531 		"AGGREGATION",
3532 		"SYNCHRONIZATION",
3533 		"COLLECTING",
3534 		"DISTRIBUTING",
3535 		"DEFAULTED",
3536 		"EXPIRED"
3537 	};
3538 	static const char * const selection[] = {
3539 		"UNSELECTED",
3540 		"STANDBY",
3541 		"SELECTED"
3542 	};
3543 
3544 	for (i = 0; i < RTE_DIM(state); i++) {
3545 		if ((info->actor_state >> i) & 1)
3546 			a_len += snprintf(&a_state[a_len],
3547 						RTE_DIM(a_state) - a_len, "%s ",
3548 						state[i]);
3549 
3550 		if ((info->partner_state >> i) & 1)
3551 			p_len += snprintf(&p_state[p_len],
3552 						RTE_DIM(p_state) - p_len, "%s ",
3553 						state[i]);
3554 	}
3555 	fprintf(f, "\tAggregator port id: %u\n", info->agg_port_id);
3556 	fprintf(f, "\tselection: %s\n", selection[info->selected]);
3557 	fprintf(f, "\tActor detail info:\n");
3558 	dump_lacp_port_param(&info->actor, f);
3559 	fprintf(f, "\t\tport state: %s\n", a_state);
3560 	fprintf(f, "\tPartner detail info:\n");
3561 	dump_lacp_port_param(&info->partner, f);
3562 	fprintf(f, "\t\tport state: %s\n", p_state);
3563 	fprintf(f, "\n");
3564 }
3565 
3566 static void
3567 dump_lacp(uint16_t port_id, FILE *f)
3568 {
3569 	struct rte_eth_bond_8023ad_member_info member_info;
3570 	struct rte_eth_bond_8023ad_conf port_conf;
3571 	uint16_t members[RTE_MAX_ETHPORTS];
3572 	int num_active_members;
3573 	int i, ret;
3574 
3575 	fprintf(f, "  - Lacp info:\n");
3576 
3577 	num_active_members = rte_eth_bond_active_members_get(port_id, members,
3578 			RTE_MAX_ETHPORTS);
3579 	if (num_active_members < 0) {
3580 		fprintf(f, "\tFailed to get active member list for port %u\n",
3581 				port_id);
3582 		return;
3583 	}
3584 
3585 	fprintf(f, "\tIEEE802.3 port: %u\n", port_id);
3586 	ret = rte_eth_bond_8023ad_conf_get(port_id, &port_conf);
3587 	if (ret) {
3588 		fprintf(f, "\tGet bonding device %u 8023ad config failed\n",
3589 			port_id);
3590 		return;
3591 	}
3592 	dump_lacp_conf(&port_conf, f);
3593 
3594 	for (i = 0; i < num_active_members; i++) {
3595 		ret = rte_eth_bond_8023ad_member_info(port_id, members[i],
3596 				&member_info);
3597 		if (ret) {
3598 			fprintf(f, "\tGet member device %u 8023ad info failed\n",
3599 				members[i]);
3600 			return;
3601 		}
3602 		fprintf(f, "\tMember Port: %u\n", members[i]);
3603 		dump_lacp_member(&member_info, f);
3604 	}
3605 }
3606 
3607 static int
3608 bond_ethdev_priv_dump(struct rte_eth_dev *dev, FILE *f)
3609 {
3610 	const struct bond_dev_private *internals = dev->data->dev_private;
3611 
3612 	dump_basic(dev, f);
3613 	if (internals->mode == BONDING_MODE_8023AD)
3614 		dump_lacp(dev->data->port_id, f);
3615 
3616 	return 0;
3617 }
3618 
3619 const struct eth_dev_ops default_dev_ops = {
3620 	.dev_start            = bond_ethdev_start,
3621 	.dev_stop             = bond_ethdev_stop,
3622 	.dev_close            = bond_ethdev_close,
3623 	.dev_configure        = bond_ethdev_configure,
3624 	.dev_infos_get        = bond_ethdev_info,
3625 	.vlan_filter_set      = bond_ethdev_vlan_filter_set,
3626 	.rx_queue_setup       = bond_ethdev_rx_queue_setup,
3627 	.tx_queue_setup       = bond_ethdev_tx_queue_setup,
3628 	.rx_queue_release     = bond_ethdev_rx_queue_release,
3629 	.tx_queue_release     = bond_ethdev_tx_queue_release,
3630 	.link_update          = bond_ethdev_link_update,
3631 	.stats_get            = bond_ethdev_stats_get,
3632 	.stats_reset          = bond_ethdev_stats_reset,
3633 	.promiscuous_enable   = bond_ethdev_promiscuous_enable,
3634 	.promiscuous_disable  = bond_ethdev_promiscuous_disable,
3635 	.allmulticast_enable  = bond_ethdev_allmulticast_enable,
3636 	.allmulticast_disable = bond_ethdev_allmulticast_disable,
3637 	.reta_update          = bond_ethdev_rss_reta_update,
3638 	.reta_query           = bond_ethdev_rss_reta_query,
3639 	.rss_hash_update      = bond_ethdev_rss_hash_update,
3640 	.rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3641 	.mtu_set              = bond_ethdev_mtu_set,
3642 	.mac_addr_set         = bond_ethdev_mac_address_set,
3643 	.mac_addr_add         = bond_ethdev_mac_addr_add,
3644 	.mac_addr_remove      = bond_ethdev_mac_addr_remove,
3645 	.flow_ops_get         = bond_flow_ops_get,
3646 	.eth_dev_priv_dump    = bond_ethdev_priv_dump,
3647 };
3648 
3649 static int
3650 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3651 {
3652 	const char *name = rte_vdev_device_name(dev);
3653 	int socket_id = dev->device.numa_node;
3654 	struct bond_dev_private *internals = NULL;
3655 	struct rte_eth_dev *eth_dev = NULL;
3656 	uint32_t vlan_filter_bmp_size;
3657 
3658 	/* now do all data allocation - for eth_dev structure, dummy pci driver
3659 	 * and internal (private) data
3660 	 */
3661 
3662 	/* reserve an ethdev entry */
3663 	eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3664 	if (eth_dev == NULL) {
3665 		RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3666 		goto err;
3667 	}
3668 
3669 	internals = eth_dev->data->dev_private;
3670 	eth_dev->data->nb_rx_queues = (uint16_t)1;
3671 	eth_dev->data->nb_tx_queues = (uint16_t)1;
3672 
3673 	/* Allocate memory for storing MAC addresses */
3674 	eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
3675 			BOND_MAX_MAC_ADDRS, 0, socket_id);
3676 	if (eth_dev->data->mac_addrs == NULL) {
3677 		RTE_BOND_LOG(ERR,
3678 			     "Failed to allocate %u bytes needed to store MAC addresses",
3679 			     RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3680 		goto err;
3681 	}
3682 
3683 	eth_dev->dev_ops = &default_dev_ops;
3684 	eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
3685 					RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
3686 
3687 	rte_spinlock_init(&internals->lock);
3688 	rte_spinlock_init(&internals->lsc_lock);
3689 
3690 	internals->port_id = eth_dev->data->port_id;
3691 	internals->mode = BONDING_MODE_INVALID;
3692 	internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3693 	internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3694 	internals->burst_xmit_hash = burst_xmit_l2_hash;
3695 	internals->user_defined_mac = 0;
3696 
3697 	internals->link_status_polling_enabled = 0;
3698 
3699 	internals->link_status_polling_interval_ms =
3700 		DEFAULT_POLLING_INTERVAL_10_MS;
3701 	internals->link_down_delay_ms = 0;
3702 	internals->link_up_delay_ms = 0;
3703 
3704 	internals->member_count = 0;
3705 	internals->active_member_count = 0;
3706 	internals->rx_offload_capa = 0;
3707 	internals->tx_offload_capa = 0;
3708 	internals->rx_queue_offload_capa = 0;
3709 	internals->tx_queue_offload_capa = 0;
3710 	internals->candidate_max_rx_pktlen = 0;
3711 	internals->max_rx_pktlen = 0;
3712 
3713 	/* Initially allow to choose any offload type */
3714 	internals->flow_type_rss_offloads = RTE_ETH_RSS_PROTO_MASK;
3715 
3716 	memset(&internals->default_rxconf, 0,
3717 	       sizeof(internals->default_rxconf));
3718 	memset(&internals->default_txconf, 0,
3719 	       sizeof(internals->default_txconf));
3720 
3721 	memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3722 	memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3723 
3724 	/*
3725 	 * Do not restrict descriptor counts until
3726 	 * the first back-end device gets attached.
3727 	 */
3728 	internals->rx_desc_lim.nb_max = UINT16_MAX;
3729 	internals->tx_desc_lim.nb_max = UINT16_MAX;
3730 	internals->rx_desc_lim.nb_align = 1;
3731 	internals->tx_desc_lim.nb_align = 1;
3732 
3733 	memset(internals->active_members, 0, sizeof(internals->active_members));
3734 	memset(internals->members, 0, sizeof(internals->members));
3735 
3736 	TAILQ_INIT(&internals->flow_list);
3737 	internals->flow_isolated_valid = 0;
3738 
3739 	/* Set mode 4 default configuration */
3740 	bond_mode_8023ad_setup(eth_dev, NULL);
3741 	if (bond_ethdev_mode_set(eth_dev, mode)) {
3742 		RTE_BOND_LOG(ERR, "Failed to set bonding device %u mode to %u",
3743 				 eth_dev->data->port_id, mode);
3744 		goto err;
3745 	}
3746 
3747 	vlan_filter_bmp_size =
3748 		rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3749 	internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3750 						   RTE_CACHE_LINE_SIZE);
3751 	if (internals->vlan_filter_bmpmem == NULL) {
3752 		RTE_BOND_LOG(ERR,
3753 			     "Failed to allocate vlan bitmap for bonding device %u",
3754 			     eth_dev->data->port_id);
3755 		goto err;
3756 	}
3757 
3758 	internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3759 			internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3760 	if (internals->vlan_filter_bmp == NULL) {
3761 		RTE_BOND_LOG(ERR,
3762 			     "Failed to init vlan bitmap for bonding device %u",
3763 			     eth_dev->data->port_id);
3764 		rte_free(internals->vlan_filter_bmpmem);
3765 		goto err;
3766 	}
3767 
3768 	return eth_dev->data->port_id;
3769 
3770 err:
3771 	rte_free(internals);
3772 	if (eth_dev != NULL)
3773 		eth_dev->data->dev_private = NULL;
3774 	rte_eth_dev_release_port(eth_dev);
3775 	return -1;
3776 }
3777 
3778 static int
3779 bond_probe(struct rte_vdev_device *dev)
3780 {
3781 	const char *name;
3782 	struct bond_dev_private *internals;
3783 	struct rte_kvargs *kvlist;
3784 	uint8_t bonding_mode;
3785 	int arg_count, port_id;
3786 	int socket_id;
3787 	uint8_t agg_mode;
3788 	struct rte_eth_dev *eth_dev;
3789 
3790 	if (!dev)
3791 		return -EINVAL;
3792 
3793 	name = rte_vdev_device_name(dev);
3794 	RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3795 
3796 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3797 		eth_dev = rte_eth_dev_attach_secondary(name);
3798 		if (!eth_dev) {
3799 			RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3800 			return -1;
3801 		}
3802 		/* TODO: request info from primary to set up Rx and Tx */
3803 		eth_dev->dev_ops = &default_dev_ops;
3804 		eth_dev->device = &dev->device;
3805 		rte_eth_dev_probing_finish(eth_dev);
3806 		return 0;
3807 	}
3808 
3809 	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3810 		pmd_bond_init_valid_arguments);
3811 	if (kvlist == NULL) {
3812 		RTE_BOND_LOG(ERR, "Invalid args in %s", rte_vdev_device_args(dev));
3813 		return -1;
3814 	}
3815 
3816 	/* Parse link bonding mode */
3817 	if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3818 		if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3819 				&bond_ethdev_parse_member_mode_kvarg,
3820 				&bonding_mode) != 0) {
3821 			RTE_BOND_LOG(ERR, "Invalid mode for bonding device %s",
3822 					name);
3823 			goto parse_error;
3824 		}
3825 	} else {
3826 		RTE_BOND_LOG(ERR, "Mode must be specified only once for bonding "
3827 				"device %s", name);
3828 		goto parse_error;
3829 	}
3830 
3831 	/* Parse socket id to create bonding device on */
3832 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3833 	if (arg_count == 1) {
3834 		if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3835 				&bond_ethdev_parse_socket_id_kvarg, &socket_id)
3836 				!= 0) {
3837 			RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3838 					"bonding device %s", name);
3839 			goto parse_error;
3840 		}
3841 	} else if (arg_count > 1) {
3842 		RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3843 				"bonding device %s", name);
3844 		goto parse_error;
3845 	} else {
3846 		socket_id = rte_socket_id();
3847 	}
3848 
3849 	dev->device.numa_node = socket_id;
3850 
3851 	/* Create link bonding eth device */
3852 	port_id = bond_alloc(dev, bonding_mode);
3853 	if (port_id < 0) {
3854 		RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3855 				"socket %d.",	name, bonding_mode, socket_id);
3856 		goto parse_error;
3857 	}
3858 	internals = rte_eth_devices[port_id].data->dev_private;
3859 	internals->kvlist = kvlist;
3860 
3861 	if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3862 		if (rte_kvargs_process(kvlist,
3863 				PMD_BOND_AGG_MODE_KVARG,
3864 				&bond_ethdev_parse_member_agg_mode_kvarg,
3865 				&agg_mode) != 0) {
3866 			RTE_BOND_LOG(ERR,
3867 					"Failed to parse agg selection mode for bonding device %s",
3868 					name);
3869 			goto parse_error;
3870 		}
3871 
3872 		if (internals->mode == BONDING_MODE_8023AD)
3873 			internals->mode4.agg_selection = agg_mode;
3874 	} else {
3875 		internals->mode4.agg_selection = AGG_STABLE;
3876 	}
3877 
3878 	rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3879 	RTE_BOND_LOG(INFO, "Create bonding device %s on port %d in mode %u on "
3880 			"socket %u.",	name, port_id, bonding_mode, socket_id);
3881 	return 0;
3882 
3883 parse_error:
3884 	rte_kvargs_free(kvlist);
3885 
3886 	return -1;
3887 }
3888 
3889 static int
3890 bond_remove(struct rte_vdev_device *dev)
3891 {
3892 	struct rte_eth_dev *eth_dev;
3893 	struct bond_dev_private *internals;
3894 	const char *name;
3895 	int ret = 0;
3896 
3897 	if (!dev)
3898 		return -EINVAL;
3899 
3900 	name = rte_vdev_device_name(dev);
3901 	RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3902 
3903 	/* find an ethdev entry */
3904 	eth_dev = rte_eth_dev_allocated(name);
3905 	if (eth_dev == NULL)
3906 		return 0; /* port already released */
3907 
3908 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3909 		return rte_eth_dev_release_port(eth_dev);
3910 
3911 	RTE_ASSERT(eth_dev->device == &dev->device);
3912 
3913 	internals = eth_dev->data->dev_private;
3914 	if (internals->member_count != 0)
3915 		return -EBUSY;
3916 
3917 	if (eth_dev->data->dev_started == 1) {
3918 		ret = bond_ethdev_stop(eth_dev);
3919 		bond_ethdev_close(eth_dev);
3920 	}
3921 	rte_eth_dev_release_port(eth_dev);
3922 
3923 	return ret;
3924 }
3925 
3926 /* this part will resolve the member portids after all the other pdev and vdev
3927  * have been allocated */
3928 static int
3929 bond_ethdev_configure(struct rte_eth_dev *dev)
3930 {
3931 	const char *name = dev->device->name;
3932 	struct bond_dev_private *internals = dev->data->dev_private;
3933 	struct rte_kvargs *kvlist = internals->kvlist;
3934 	int arg_count;
3935 	uint16_t port_id = dev - rte_eth_devices;
3936 	uint32_t link_speeds;
3937 	uint8_t agg_mode;
3938 
3939 	static const uint8_t default_rss_key[40] = {
3940 		0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3941 		0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3942 		0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3943 		0xBE, 0xAC, 0x01, 0xFA
3944 	};
3945 
3946 	unsigned i, j;
3947 
3948 
3949 	bond_ethdev_cfg_cleanup(dev, false);
3950 
3951 	/*
3952 	 * If RSS is enabled, fill table with default values and
3953 	 * set key to the value specified in port RSS configuration.
3954 	 * Fall back to default RSS key if the key is not specified
3955 	 */
3956 	if (dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS) {
3957 		struct rte_eth_rss_conf *rss_conf =
3958 			&dev->data->dev_conf.rx_adv_conf.rss_conf;
3959 
3960 		if (internals->rss_key_len == 0) {
3961 			internals->rss_key_len = sizeof(default_rss_key);
3962 		}
3963 
3964 		if (rss_conf->rss_key != NULL) {
3965 			if (internals->rss_key_len > rss_conf->rss_key_len) {
3966 				RTE_BOND_LOG(ERR, "Invalid rss key length(%u)",
3967 						rss_conf->rss_key_len);
3968 				return -EINVAL;
3969 			}
3970 
3971 			memcpy(internals->rss_key, rss_conf->rss_key,
3972 			       internals->rss_key_len);
3973 		} else {
3974 			if (internals->rss_key_len > sizeof(default_rss_key)) {
3975 				/*
3976 				 * If the rss_key includes standard_rss_key and
3977 				 * extended_hash_key, the rss key length will be
3978 				 * larger than default rss key length, so it should
3979 				 * re-calculate the hash key.
3980 				 */
3981 				for (i = 0; i < internals->rss_key_len; i++)
3982 					internals->rss_key[i] = (uint8_t)rte_rand();
3983 			} else {
3984 				memcpy(internals->rss_key, default_rss_key,
3985 					internals->rss_key_len);
3986 			}
3987 		}
3988 
3989 		for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3990 			internals->reta_conf[i].mask = ~0LL;
3991 			for (j = 0; j < RTE_ETH_RETA_GROUP_SIZE; j++)
3992 				internals->reta_conf[i].reta[j] =
3993 						(i * RTE_ETH_RETA_GROUP_SIZE + j) %
3994 						dev->data->nb_rx_queues;
3995 		}
3996 	}
3997 
3998 	link_speeds = dev->data->dev_conf.link_speeds;
3999 	/*
4000 	 * The default value of 'link_speeds' is zero. From its definition,
4001 	 * this value actually means auto-negotiation. But not all PMDs support
4002 	 * auto-negotiation. So ignore the check for the auto-negotiation and
4003 	 * only consider fixed speed to reduce the impact on PMDs.
4004 	 */
4005 	if (link_speeds & RTE_ETH_LINK_SPEED_FIXED) {
4006 		if ((link_speeds &
4007 		    (internals->speed_capa & ~RTE_ETH_LINK_SPEED_FIXED)) == 0) {
4008 			RTE_BOND_LOG(ERR, "the fixed speed is not supported by all member devices.");
4009 			return -EINVAL;
4010 		}
4011 		/*
4012 		 * Two '1' in binary of 'link_speeds': bit0 and a unique
4013 		 * speed bit.
4014 		 */
4015 		if (rte_popcount64(link_speeds) != 2) {
4016 			RTE_BOND_LOG(ERR, "please set a unique speed.");
4017 			return -EINVAL;
4018 		}
4019 	}
4020 
4021 	/* set the max_rx_pktlen */
4022 	internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
4023 
4024 	/*
4025 	 * if no kvlist, it means that this bonding device has been created
4026 	 * through the bonding api.
4027 	 */
4028 	if (!kvlist || internals->kvargs_processing_is_done)
4029 		return 0;
4030 
4031 	internals->kvargs_processing_is_done = true;
4032 
4033 	/* Parse MAC address for bonding device */
4034 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
4035 	if (arg_count == 1) {
4036 		struct rte_ether_addr bond_mac;
4037 
4038 		if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
4039 				       &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
4040 			RTE_BOND_LOG(INFO, "Invalid mac address for bonding device %s",
4041 				     name);
4042 			return -1;
4043 		}
4044 
4045 		/* Set MAC address */
4046 		if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
4047 			RTE_BOND_LOG(ERR,
4048 				     "Failed to set mac address on bonding device %s",
4049 				     name);
4050 			return -1;
4051 		}
4052 	} else if (arg_count > 1) {
4053 		RTE_BOND_LOG(ERR,
4054 			     "MAC address can be specified only once for bonding device %s",
4055 			     name);
4056 		return -1;
4057 	}
4058 
4059 	/* Parse/set balance mode transmit policy */
4060 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
4061 	if (arg_count == 1) {
4062 		uint8_t xmit_policy;
4063 
4064 		if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
4065 				       &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
4066 		    0) {
4067 			RTE_BOND_LOG(INFO,
4068 				     "Invalid xmit policy specified for bonding device %s",
4069 				     name);
4070 			return -1;
4071 		}
4072 
4073 		/* Set balance mode transmit policy*/
4074 		if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
4075 			RTE_BOND_LOG(ERR,
4076 				     "Failed to set balance xmit policy on bonding device %s",
4077 				     name);
4078 			return -1;
4079 		}
4080 	} else if (arg_count > 1) {
4081 		RTE_BOND_LOG(ERR,
4082 			     "Transmit policy can be specified only once for bonding device %s",
4083 			     name);
4084 		return -1;
4085 	}
4086 
4087 	if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
4088 		if (rte_kvargs_process(kvlist,
4089 				       PMD_BOND_AGG_MODE_KVARG,
4090 				       &bond_ethdev_parse_member_agg_mode_kvarg,
4091 				       &agg_mode) != 0) {
4092 			RTE_BOND_LOG(ERR,
4093 				     "Failed to parse agg selection mode for bonding device %s",
4094 				     name);
4095 		}
4096 		if (internals->mode == BONDING_MODE_8023AD) {
4097 			int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
4098 					agg_mode);
4099 			if (ret < 0) {
4100 				RTE_BOND_LOG(ERR,
4101 					"Invalid args for agg selection set for bonding device %s",
4102 					name);
4103 				return -1;
4104 			}
4105 		}
4106 	}
4107 
4108 	/* Parse/add member ports to bonding device */
4109 	if (rte_kvargs_count(kvlist, PMD_BOND_MEMBER_PORT_KVARG) > 0) {
4110 		struct bond_ethdev_member_ports member_ports;
4111 		unsigned i;
4112 
4113 		memset(&member_ports, 0, sizeof(member_ports));
4114 
4115 		if (rte_kvargs_process(kvlist, PMD_BOND_MEMBER_PORT_KVARG,
4116 				       &bond_ethdev_parse_member_port_kvarg, &member_ports) != 0) {
4117 			RTE_BOND_LOG(ERR,
4118 				     "Failed to parse member ports for bonding device %s",
4119 				     name);
4120 			return -1;
4121 		}
4122 
4123 		for (i = 0; i < member_ports.member_count; i++) {
4124 			if (rte_eth_bond_member_add(port_id, member_ports.members[i]) != 0) {
4125 				RTE_BOND_LOG(ERR,
4126 					     "Failed to add port %d as member to bonding device %s",
4127 					     member_ports.members[i], name);
4128 			}
4129 		}
4130 
4131 	} else {
4132 		RTE_BOND_LOG(INFO, "No members specified for bonding device %s", name);
4133 		return -1;
4134 	}
4135 
4136 	/* Parse/set primary member port id*/
4137 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_MEMBER_KVARG);
4138 	if (arg_count == 1) {
4139 		uint16_t primary_member_port_id;
4140 
4141 		if (rte_kvargs_process(kvlist,
4142 				       PMD_BOND_PRIMARY_MEMBER_KVARG,
4143 				       &bond_ethdev_parse_primary_member_port_id_kvarg,
4144 				       &primary_member_port_id) < 0) {
4145 			RTE_BOND_LOG(INFO,
4146 				     "Invalid primary member port id specified for bonding device %s",
4147 				     name);
4148 			return -1;
4149 		}
4150 
4151 		/* Set balance mode transmit policy*/
4152 		if (rte_eth_bond_primary_set(port_id, primary_member_port_id)
4153 		    != 0) {
4154 			RTE_BOND_LOG(ERR,
4155 				     "Failed to set primary member port %d on bonding device %s",
4156 				     primary_member_port_id, name);
4157 			return -1;
4158 		}
4159 	} else if (arg_count > 1) {
4160 		RTE_BOND_LOG(INFO,
4161 			     "Primary member can be specified only once for bonding device %s",
4162 			     name);
4163 		return -1;
4164 	}
4165 
4166 	/* Parse link status monitor polling interval */
4167 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
4168 	if (arg_count == 1) {
4169 		uint32_t lsc_poll_interval_ms;
4170 
4171 		if (rte_kvargs_process(kvlist,
4172 				       PMD_BOND_LSC_POLL_PERIOD_KVARG,
4173 				       &bond_ethdev_parse_time_ms_kvarg,
4174 				       &lsc_poll_interval_ms) < 0) {
4175 			RTE_BOND_LOG(INFO,
4176 				     "Invalid lsc polling interval value specified for bonding"
4177 				     " device %s", name);
4178 			return -1;
4179 		}
4180 
4181 		if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
4182 		    != 0) {
4183 			RTE_BOND_LOG(ERR,
4184 				     "Failed to set lsc monitor polling interval (%u ms) on bonding device %s",
4185 				     lsc_poll_interval_ms, name);
4186 			return -1;
4187 		}
4188 	} else if (arg_count > 1) {
4189 		RTE_BOND_LOG(INFO,
4190 			     "LSC polling interval can be specified only once for bonding"
4191 			     " device %s", name);
4192 		return -1;
4193 	}
4194 
4195 	/* Parse link up interrupt propagation delay */
4196 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
4197 	if (arg_count == 1) {
4198 		uint32_t link_up_delay_ms;
4199 
4200 		if (rte_kvargs_process(kvlist,
4201 				       PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
4202 				       &bond_ethdev_parse_time_ms_kvarg,
4203 				       &link_up_delay_ms) < 0) {
4204 			RTE_BOND_LOG(INFO,
4205 				     "Invalid link up propagation delay value specified for"
4206 				     " bonding device %s", name);
4207 			return -1;
4208 		}
4209 
4210 		/* Set balance mode transmit policy*/
4211 		if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
4212 		    != 0) {
4213 			RTE_BOND_LOG(ERR,
4214 				     "Failed to set link up propagation delay (%u ms) on bonding"
4215 				     " device %s", link_up_delay_ms, name);
4216 			return -1;
4217 		}
4218 	} else if (arg_count > 1) {
4219 		RTE_BOND_LOG(INFO,
4220 			     "Link up propagation delay can be specified only once for"
4221 			     " bonding device %s", name);
4222 		return -1;
4223 	}
4224 
4225 	/* Parse link down interrupt propagation delay */
4226 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
4227 	if (arg_count == 1) {
4228 		uint32_t link_down_delay_ms;
4229 
4230 		if (rte_kvargs_process(kvlist,
4231 				       PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
4232 				       &bond_ethdev_parse_time_ms_kvarg,
4233 				       &link_down_delay_ms) < 0) {
4234 			RTE_BOND_LOG(INFO,
4235 				     "Invalid link down propagation delay value specified for"
4236 				     " bonding device %s", name);
4237 			return -1;
4238 		}
4239 
4240 		/* Set balance mode transmit policy*/
4241 		if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
4242 		    != 0) {
4243 			RTE_BOND_LOG(ERR,
4244 				     "Failed to set link down propagation delay (%u ms) on bonding device %s",
4245 				     link_down_delay_ms, name);
4246 			return -1;
4247 		}
4248 	} else if (arg_count > 1) {
4249 		RTE_BOND_LOG(INFO,
4250 			     "Link down propagation delay can be specified only once for  bonding device %s",
4251 			     name);
4252 		return -1;
4253 	}
4254 
4255 	/* configure members so we can pass mtu setting */
4256 	for (i = 0; i < internals->member_count; i++) {
4257 		struct rte_eth_dev *member_ethdev =
4258 				&(rte_eth_devices[internals->members[i].port_id]);
4259 		if (member_configure(dev, member_ethdev) != 0) {
4260 			RTE_BOND_LOG(ERR,
4261 				"bonding port (%d) failed to configure member device (%d)",
4262 				dev->data->port_id,
4263 				internals->members[i].port_id);
4264 			return -1;
4265 		}
4266 	}
4267 	return 0;
4268 }
4269 
4270 struct rte_vdev_driver pmd_bond_drv = {
4271 	.probe = bond_probe,
4272 	.remove = bond_remove,
4273 };
4274 
4275 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
4276 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
4277 
4278 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
4279 	"member=<ifc> "
4280 	"primary=<ifc> "
4281 	"mode=[0-6] "
4282 	"xmit_policy=[l2 | l23 | l34] "
4283 	"agg_mode=[count | stable | bandwidth] "
4284 	"socket_id=<int> "
4285 	"mac=<mac addr> "
4286 	"lsc_poll_period_ms=<int> "
4287 	"up_delay=<int> "
4288 	"down_delay=<int>");
4289 
4290 /* We can't use RTE_LOG_REGISTER_DEFAULT because of the forced name for
4291  * this library, see meson.build.
4292  */
4293 RTE_LOG_REGISTER(bond_logtype, pmd.net.bonding, NOTICE);
4294