xref: /dpdk/drivers/net/bonding/rte_eth_bond_pmd.c (revision 3cc6ecfdfe85d2577fef30e1791bb7534e3d60b3)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <stdbool.h>
6 #include <netinet/in.h>
7 
8 #include <rte_mbuf.h>
9 #include <rte_malloc.h>
10 #include <rte_ethdev_driver.h>
11 #include <rte_ethdev_vdev.h>
12 #include <rte_tcp.h>
13 #include <rte_udp.h>
14 #include <rte_ip.h>
15 #include <rte_ip_frag.h>
16 #include <rte_devargs.h>
17 #include <rte_kvargs.h>
18 #include <rte_bus_vdev.h>
19 #include <rte_alarm.h>
20 #include <rte_cycles.h>
21 #include <rte_string_fns.h>
22 
23 #include "rte_eth_bond.h"
24 #include "eth_bond_private.h"
25 #include "eth_bond_8023ad_private.h"
26 
27 #define REORDER_PERIOD_MS 10
28 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
29 #define BOND_MAX_MAC_ADDRS 16
30 
31 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
32 
33 /* Table for statistics in mode 5 TLB */
34 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
35 
36 static inline size_t
37 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
38 {
39 	size_t vlan_offset = 0;
40 
41 	if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
42 		rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
43 		struct rte_vlan_hdr *vlan_hdr =
44 			(struct rte_vlan_hdr *)(eth_hdr + 1);
45 
46 		vlan_offset = sizeof(struct rte_vlan_hdr);
47 		*proto = vlan_hdr->eth_proto;
48 
49 		if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
50 			vlan_hdr = vlan_hdr + 1;
51 			*proto = vlan_hdr->eth_proto;
52 			vlan_offset += sizeof(struct rte_vlan_hdr);
53 		}
54 	}
55 	return vlan_offset;
56 }
57 
58 static uint16_t
59 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
60 {
61 	struct bond_dev_private *internals;
62 
63 	uint16_t num_rx_total = 0;
64 	uint16_t slave_count;
65 	uint16_t active_slave;
66 	int i;
67 
68 	/* Cast to structure, containing bonded device's port id and queue id */
69 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
70 	internals = bd_rx_q->dev_private;
71 	slave_count = internals->active_slave_count;
72 	active_slave = internals->active_slave;
73 
74 	for (i = 0; i < slave_count && nb_pkts; i++) {
75 		uint16_t num_rx_slave;
76 
77 		/* Offset of pointer to *bufs increases as packets are received
78 		 * from other slaves */
79 		num_rx_slave =
80 			rte_eth_rx_burst(internals->active_slaves[active_slave],
81 					 bd_rx_q->queue_id,
82 					 bufs + num_rx_total, nb_pkts);
83 		num_rx_total += num_rx_slave;
84 		nb_pkts -= num_rx_slave;
85 		if (++active_slave == slave_count)
86 			active_slave = 0;
87 	}
88 
89 	if (++internals->active_slave >= slave_count)
90 		internals->active_slave = 0;
91 	return num_rx_total;
92 }
93 
94 static uint16_t
95 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
96 		uint16_t nb_pkts)
97 {
98 	struct bond_dev_private *internals;
99 
100 	/* Cast to structure, containing bonded device's port id and queue id */
101 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
102 
103 	internals = bd_rx_q->dev_private;
104 
105 	return rte_eth_rx_burst(internals->current_primary_port,
106 			bd_rx_q->queue_id, bufs, nb_pkts);
107 }
108 
109 static inline uint8_t
110 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
111 {
112 	const uint16_t ether_type_slow_be =
113 		rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
114 
115 	return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
116 		(ethertype == ether_type_slow_be &&
117 		(subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
118 }
119 
120 /*****************************************************************************
121  * Flow director's setup for mode 4 optimization
122  */
123 
124 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
125 	.dst.addr_bytes = { 0 },
126 	.src.addr_bytes = { 0 },
127 	.type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
128 };
129 
130 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
131 	.dst.addr_bytes = { 0 },
132 	.src.addr_bytes = { 0 },
133 	.type = 0xFFFF,
134 };
135 
136 static struct rte_flow_item flow_item_8023ad[] = {
137 	{
138 		.type = RTE_FLOW_ITEM_TYPE_ETH,
139 		.spec = &flow_item_eth_type_8023ad,
140 		.last = NULL,
141 		.mask = &flow_item_eth_mask_type_8023ad,
142 	},
143 	{
144 		.type = RTE_FLOW_ITEM_TYPE_END,
145 		.spec = NULL,
146 		.last = NULL,
147 		.mask = NULL,
148 	}
149 };
150 
151 const struct rte_flow_attr flow_attr_8023ad = {
152 	.group = 0,
153 	.priority = 0,
154 	.ingress = 1,
155 	.egress = 0,
156 	.reserved = 0,
157 };
158 
159 int
160 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
161 		uint16_t slave_port) {
162 	struct rte_eth_dev_info slave_info;
163 	struct rte_flow_error error;
164 	struct bond_dev_private *internals = bond_dev->data->dev_private;
165 
166 	const struct rte_flow_action_queue lacp_queue_conf = {
167 		.index = 0,
168 	};
169 
170 	const struct rte_flow_action actions[] = {
171 		{
172 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
173 			.conf = &lacp_queue_conf
174 		},
175 		{
176 			.type = RTE_FLOW_ACTION_TYPE_END,
177 		}
178 	};
179 
180 	int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
181 			flow_item_8023ad, actions, &error);
182 	if (ret < 0) {
183 		RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
184 				__func__, error.message, slave_port,
185 				internals->mode4.dedicated_queues.rx_qid);
186 		return -1;
187 	}
188 
189 	ret = rte_eth_dev_info_get(slave_port, &slave_info);
190 	if (ret != 0) {
191 		RTE_BOND_LOG(ERR,
192 			"%s: Error during getting device (port %u) info: %s\n",
193 			__func__, slave_port, strerror(-ret));
194 
195 		return ret;
196 	}
197 
198 	if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
199 			slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
200 		RTE_BOND_LOG(ERR,
201 			"%s: Slave %d capabilities doesn't allow to allocate additional queues",
202 			__func__, slave_port);
203 		return -1;
204 	}
205 
206 	return 0;
207 }
208 
209 int
210 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
211 	struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
212 	struct bond_dev_private *internals = bond_dev->data->dev_private;
213 	struct rte_eth_dev_info bond_info;
214 	uint16_t idx;
215 	int ret;
216 
217 	/* Verify if all slaves in bonding supports flow director and */
218 	if (internals->slave_count > 0) {
219 		ret = rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
220 		if (ret != 0) {
221 			RTE_BOND_LOG(ERR,
222 				"%s: Error during getting device (port %u) info: %s\n",
223 				__func__, bond_dev->data->port_id,
224 				strerror(-ret));
225 
226 			return ret;
227 		}
228 
229 		internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
230 		internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
231 
232 		for (idx = 0; idx < internals->slave_count; idx++) {
233 			if (bond_ethdev_8023ad_flow_verify(bond_dev,
234 					internals->slaves[idx].port_id) != 0)
235 				return -1;
236 		}
237 	}
238 
239 	return 0;
240 }
241 
242 int
243 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
244 
245 	struct rte_flow_error error;
246 	struct bond_dev_private *internals = bond_dev->data->dev_private;
247 	struct rte_flow_action_queue lacp_queue_conf = {
248 		.index = internals->mode4.dedicated_queues.rx_qid,
249 	};
250 
251 	const struct rte_flow_action actions[] = {
252 		{
253 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
254 			.conf = &lacp_queue_conf
255 		},
256 		{
257 			.type = RTE_FLOW_ACTION_TYPE_END,
258 		}
259 	};
260 
261 	internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
262 			&flow_attr_8023ad, flow_item_8023ad, actions, &error);
263 	if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
264 		RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
265 				"(slave_port=%d queue_id=%d)",
266 				error.message, slave_port,
267 				internals->mode4.dedicated_queues.rx_qid);
268 		return -1;
269 	}
270 
271 	return 0;
272 }
273 
274 static inline uint16_t
275 rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts,
276 		bool dedicated_rxq)
277 {
278 	/* Cast to structure, containing bonded device's port id and queue id */
279 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
280 	struct bond_dev_private *internals = bd_rx_q->dev_private;
281 	struct rte_eth_dev *bonded_eth_dev =
282 					&rte_eth_devices[internals->port_id];
283 	struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
284 	struct rte_ether_hdr *hdr;
285 
286 	const uint16_t ether_type_slow_be =
287 		rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
288 	uint16_t num_rx_total = 0;	/* Total number of received packets */
289 	uint16_t slaves[RTE_MAX_ETHPORTS];
290 	uint16_t slave_count, idx;
291 
292 	uint8_t collecting;  /* current slave collecting status */
293 	const uint8_t promisc = rte_eth_promiscuous_get(internals->port_id);
294 	const uint8_t allmulti = rte_eth_allmulticast_get(internals->port_id);
295 	uint8_t subtype;
296 	uint16_t i;
297 	uint16_t j;
298 	uint16_t k;
299 
300 	/* Copy slave list to protect against slave up/down changes during tx
301 	 * bursting */
302 	slave_count = internals->active_slave_count;
303 	memcpy(slaves, internals->active_slaves,
304 			sizeof(internals->active_slaves[0]) * slave_count);
305 
306 	idx = internals->active_slave;
307 	if (idx >= slave_count) {
308 		internals->active_slave = 0;
309 		idx = 0;
310 	}
311 	for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
312 		j = num_rx_total;
313 		collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
314 					 COLLECTING);
315 
316 		/* Read packets from this slave */
317 		num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
318 				&bufs[num_rx_total], nb_pkts - num_rx_total);
319 
320 		for (k = j; k < 2 && k < num_rx_total; k++)
321 			rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
322 
323 		/* Handle slow protocol packets. */
324 		while (j < num_rx_total) {
325 			if (j + 3 < num_rx_total)
326 				rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
327 
328 			hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
329 			subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
330 
331 			/* Remove packet from array if:
332 			 * - it is slow packet but no dedicated rxq is present,
333 			 * - slave is not in collecting state,
334 			 * - bonding interface is not in promiscuous mode:
335 			 *   - packet is unicast and address does not match,
336 			 *   - packet is multicast and bonding interface
337 			 *     is not in allmulti,
338 			 */
339 			if (unlikely(
340 				(!dedicated_rxq &&
341 				 is_lacp_packets(hdr->ether_type, subtype,
342 						 bufs[j])) ||
343 				!collecting ||
344 				(!promisc &&
345 				 ((rte_is_unicast_ether_addr(&hdr->d_addr) &&
346 				   !rte_is_same_ether_addr(bond_mac,
347 						       &hdr->d_addr)) ||
348 				  (!allmulti &&
349 				   rte_is_multicast_ether_addr(&hdr->d_addr)))))) {
350 
351 				if (hdr->ether_type == ether_type_slow_be) {
352 					bond_mode_8023ad_handle_slow_pkt(
353 					    internals, slaves[idx], bufs[j]);
354 				} else
355 					rte_pktmbuf_free(bufs[j]);
356 
357 				/* Packet is managed by mode 4 or dropped, shift the array */
358 				num_rx_total--;
359 				if (j < num_rx_total) {
360 					memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
361 						(num_rx_total - j));
362 				}
363 			} else
364 				j++;
365 		}
366 		if (unlikely(++idx == slave_count))
367 			idx = 0;
368 	}
369 
370 	if (++internals->active_slave >= slave_count)
371 		internals->active_slave = 0;
372 
373 	return num_rx_total;
374 }
375 
376 static uint16_t
377 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
378 		uint16_t nb_pkts)
379 {
380 	return rx_burst_8023ad(queue, bufs, nb_pkts, false);
381 }
382 
383 static uint16_t
384 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
385 		uint16_t nb_pkts)
386 {
387 	return rx_burst_8023ad(queue, bufs, nb_pkts, true);
388 }
389 
390 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
391 uint32_t burstnumberRX;
392 uint32_t burstnumberTX;
393 
394 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
395 
396 static void
397 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
398 {
399 	switch (arp_op) {
400 	case RTE_ARP_OP_REQUEST:
401 		strlcpy(buf, "ARP Request", buf_len);
402 		return;
403 	case RTE_ARP_OP_REPLY:
404 		strlcpy(buf, "ARP Reply", buf_len);
405 		return;
406 	case RTE_ARP_OP_REVREQUEST:
407 		strlcpy(buf, "Reverse ARP Request", buf_len);
408 		return;
409 	case RTE_ARP_OP_REVREPLY:
410 		strlcpy(buf, "Reverse ARP Reply", buf_len);
411 		return;
412 	case RTE_ARP_OP_INVREQUEST:
413 		strlcpy(buf, "Peer Identify Request", buf_len);
414 		return;
415 	case RTE_ARP_OP_INVREPLY:
416 		strlcpy(buf, "Peer Identify Reply", buf_len);
417 		return;
418 	default:
419 		break;
420 	}
421 	strlcpy(buf, "Unknown", buf_len);
422 	return;
423 }
424 #endif
425 #define MaxIPv4String	16
426 static void
427 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
428 {
429 	uint32_t ipv4_addr;
430 
431 	ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
432 	snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
433 		(ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
434 		ipv4_addr & 0xFF);
435 }
436 
437 #define MAX_CLIENTS_NUMBER	128
438 uint8_t active_clients;
439 struct client_stats_t {
440 	uint16_t port;
441 	uint32_t ipv4_addr;
442 	uint32_t ipv4_rx_packets;
443 	uint32_t ipv4_tx_packets;
444 };
445 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
446 
447 static void
448 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
449 {
450 	int i = 0;
451 
452 	for (; i < MAX_CLIENTS_NUMBER; i++)	{
453 		if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))	{
454 			/* Just update RX packets number for this client */
455 			if (TXorRXindicator == &burstnumberRX)
456 				client_stats[i].ipv4_rx_packets++;
457 			else
458 				client_stats[i].ipv4_tx_packets++;
459 			return;
460 		}
461 	}
462 	/* We have a new client. Insert him to the table, and increment stats */
463 	if (TXorRXindicator == &burstnumberRX)
464 		client_stats[active_clients].ipv4_rx_packets++;
465 	else
466 		client_stats[active_clients].ipv4_tx_packets++;
467 	client_stats[active_clients].ipv4_addr = addr;
468 	client_stats[active_clients].port = port;
469 	active_clients++;
470 
471 }
472 
473 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
474 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
475 	rte_log(RTE_LOG_DEBUG, bond_logtype,				\
476 		"%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
477 		"DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
478 		info,							\
479 		port,							\
480 		eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
481 		eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
482 		eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
483 		src_ip,							\
484 		eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
485 		eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
486 		eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
487 		dst_ip,							\
488 		arp_op, ++burstnumber)
489 #endif
490 
491 static void
492 mode6_debug(const char __rte_unused *info,
493 	struct rte_ether_hdr *eth_h, uint16_t port,
494 	uint32_t __rte_unused *burstnumber)
495 {
496 	struct rte_ipv4_hdr *ipv4_h;
497 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
498 	struct rte_arp_hdr *arp_h;
499 	char dst_ip[16];
500 	char ArpOp[24];
501 	char buf[16];
502 #endif
503 	char src_ip[16];
504 
505 	uint16_t ether_type = eth_h->ether_type;
506 	uint16_t offset = get_vlan_offset(eth_h, &ether_type);
507 
508 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
509 	strlcpy(buf, info, 16);
510 #endif
511 
512 	if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
513 		ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
514 		ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
515 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
516 		ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
517 		MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
518 #endif
519 		update_client_stats(ipv4_h->src_addr, port, burstnumber);
520 	}
521 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
522 	else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
523 		arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
524 		ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
525 		ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
526 		arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
527 				ArpOp, sizeof(ArpOp));
528 		MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
529 	}
530 #endif
531 }
532 #endif
533 
534 static uint16_t
535 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
536 {
537 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
538 	struct bond_dev_private *internals = bd_tx_q->dev_private;
539 	struct rte_ether_hdr *eth_h;
540 	uint16_t ether_type, offset;
541 	uint16_t nb_recv_pkts;
542 	int i;
543 
544 	nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
545 
546 	for (i = 0; i < nb_recv_pkts; i++) {
547 		eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
548 		ether_type = eth_h->ether_type;
549 		offset = get_vlan_offset(eth_h, &ether_type);
550 
551 		if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
552 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
553 			mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
554 #endif
555 			bond_mode_alb_arp_recv(eth_h, offset, internals);
556 		}
557 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
558 		else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
559 			mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
560 #endif
561 	}
562 
563 	return nb_recv_pkts;
564 }
565 
566 static uint16_t
567 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
568 		uint16_t nb_pkts)
569 {
570 	struct bond_dev_private *internals;
571 	struct bond_tx_queue *bd_tx_q;
572 
573 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
574 	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
575 
576 	uint16_t num_of_slaves;
577 	uint16_t slaves[RTE_MAX_ETHPORTS];
578 
579 	uint16_t num_tx_total = 0, num_tx_slave;
580 
581 	static int slave_idx = 0;
582 	int i, cslave_idx = 0, tx_fail_total = 0;
583 
584 	bd_tx_q = (struct bond_tx_queue *)queue;
585 	internals = bd_tx_q->dev_private;
586 
587 	/* Copy slave list to protect against slave up/down changes during tx
588 	 * bursting */
589 	num_of_slaves = internals->active_slave_count;
590 	memcpy(slaves, internals->active_slaves,
591 			sizeof(internals->active_slaves[0]) * num_of_slaves);
592 
593 	if (num_of_slaves < 1)
594 		return num_tx_total;
595 
596 	/* Populate slaves mbuf with which packets are to be sent on it  */
597 	for (i = 0; i < nb_pkts; i++) {
598 		cslave_idx = (slave_idx + i) % num_of_slaves;
599 		slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
600 	}
601 
602 	/* increment current slave index so the next call to tx burst starts on the
603 	 * next slave */
604 	slave_idx = ++cslave_idx;
605 
606 	/* Send packet burst on each slave device */
607 	for (i = 0; i < num_of_slaves; i++) {
608 		if (slave_nb_pkts[i] > 0) {
609 			num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
610 					slave_bufs[i], slave_nb_pkts[i]);
611 
612 			/* if tx burst fails move packets to end of bufs */
613 			if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
614 				int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
615 
616 				tx_fail_total += tx_fail_slave;
617 
618 				memcpy(&bufs[nb_pkts - tx_fail_total],
619 				       &slave_bufs[i][num_tx_slave],
620 				       tx_fail_slave * sizeof(bufs[0]));
621 			}
622 			num_tx_total += num_tx_slave;
623 		}
624 	}
625 
626 	return num_tx_total;
627 }
628 
629 static uint16_t
630 bond_ethdev_tx_burst_active_backup(void *queue,
631 		struct rte_mbuf **bufs, uint16_t nb_pkts)
632 {
633 	struct bond_dev_private *internals;
634 	struct bond_tx_queue *bd_tx_q;
635 
636 	bd_tx_q = (struct bond_tx_queue *)queue;
637 	internals = bd_tx_q->dev_private;
638 
639 	if (internals->active_slave_count < 1)
640 		return 0;
641 
642 	return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
643 			bufs, nb_pkts);
644 }
645 
646 static inline uint16_t
647 ether_hash(struct rte_ether_hdr *eth_hdr)
648 {
649 	unaligned_uint16_t *word_src_addr =
650 		(unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
651 	unaligned_uint16_t *word_dst_addr =
652 		(unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
653 
654 	return (word_src_addr[0] ^ word_dst_addr[0]) ^
655 			(word_src_addr[1] ^ word_dst_addr[1]) ^
656 			(word_src_addr[2] ^ word_dst_addr[2]);
657 }
658 
659 static inline uint32_t
660 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
661 {
662 	return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
663 }
664 
665 static inline uint32_t
666 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
667 {
668 	unaligned_uint32_t *word_src_addr =
669 		(unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
670 	unaligned_uint32_t *word_dst_addr =
671 		(unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
672 
673 	return (word_src_addr[0] ^ word_dst_addr[0]) ^
674 			(word_src_addr[1] ^ word_dst_addr[1]) ^
675 			(word_src_addr[2] ^ word_dst_addr[2]) ^
676 			(word_src_addr[3] ^ word_dst_addr[3]);
677 }
678 
679 
680 void
681 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
682 		uint16_t slave_count, uint16_t *slaves)
683 {
684 	struct rte_ether_hdr *eth_hdr;
685 	uint32_t hash;
686 	int i;
687 
688 	for (i = 0; i < nb_pkts; i++) {
689 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
690 
691 		hash = ether_hash(eth_hdr);
692 
693 		slaves[i] = (hash ^= hash >> 8) % slave_count;
694 	}
695 }
696 
697 void
698 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
699 		uint16_t slave_count, uint16_t *slaves)
700 {
701 	uint16_t i;
702 	struct rte_ether_hdr *eth_hdr;
703 	uint16_t proto;
704 	size_t vlan_offset;
705 	uint32_t hash, l3hash;
706 
707 	for (i = 0; i < nb_pkts; i++) {
708 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
709 		l3hash = 0;
710 
711 		proto = eth_hdr->ether_type;
712 		hash = ether_hash(eth_hdr);
713 
714 		vlan_offset = get_vlan_offset(eth_hdr, &proto);
715 
716 		if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
717 			struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
718 					((char *)(eth_hdr + 1) + vlan_offset);
719 			l3hash = ipv4_hash(ipv4_hdr);
720 
721 		} else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
722 			struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
723 					((char *)(eth_hdr + 1) + vlan_offset);
724 			l3hash = ipv6_hash(ipv6_hdr);
725 		}
726 
727 		hash = hash ^ l3hash;
728 		hash ^= hash >> 16;
729 		hash ^= hash >> 8;
730 
731 		slaves[i] = hash % slave_count;
732 	}
733 }
734 
735 void
736 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
737 		uint16_t slave_count, uint16_t *slaves)
738 {
739 	struct rte_ether_hdr *eth_hdr;
740 	uint16_t proto;
741 	size_t vlan_offset;
742 	int i;
743 
744 	struct rte_udp_hdr *udp_hdr;
745 	struct rte_tcp_hdr *tcp_hdr;
746 	uint32_t hash, l3hash, l4hash;
747 
748 	for (i = 0; i < nb_pkts; i++) {
749 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
750 		size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
751 		proto = eth_hdr->ether_type;
752 		vlan_offset = get_vlan_offset(eth_hdr, &proto);
753 		l3hash = 0;
754 		l4hash = 0;
755 
756 		if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
757 			struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
758 					((char *)(eth_hdr + 1) + vlan_offset);
759 			size_t ip_hdr_offset;
760 
761 			l3hash = ipv4_hash(ipv4_hdr);
762 
763 			/* there is no L4 header in fragmented packet */
764 			if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
765 								== 0)) {
766 				ip_hdr_offset = (ipv4_hdr->version_ihl
767 					& RTE_IPV4_HDR_IHL_MASK) *
768 					RTE_IPV4_IHL_MULTIPLIER;
769 
770 				if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
771 					tcp_hdr = (struct rte_tcp_hdr *)
772 						((char *)ipv4_hdr +
773 							ip_hdr_offset);
774 					if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
775 							< pkt_end)
776 						l4hash = HASH_L4_PORTS(tcp_hdr);
777 				} else if (ipv4_hdr->next_proto_id ==
778 								IPPROTO_UDP) {
779 					udp_hdr = (struct rte_udp_hdr *)
780 						((char *)ipv4_hdr +
781 							ip_hdr_offset);
782 					if ((size_t)udp_hdr + sizeof(*udp_hdr)
783 							< pkt_end)
784 						l4hash = HASH_L4_PORTS(udp_hdr);
785 				}
786 			}
787 		} else if  (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
788 			struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
789 					((char *)(eth_hdr + 1) + vlan_offset);
790 			l3hash = ipv6_hash(ipv6_hdr);
791 
792 			if (ipv6_hdr->proto == IPPROTO_TCP) {
793 				tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
794 				l4hash = HASH_L4_PORTS(tcp_hdr);
795 			} else if (ipv6_hdr->proto == IPPROTO_UDP) {
796 				udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
797 				l4hash = HASH_L4_PORTS(udp_hdr);
798 			}
799 		}
800 
801 		hash = l3hash ^ l4hash;
802 		hash ^= hash >> 16;
803 		hash ^= hash >> 8;
804 
805 		slaves[i] = hash % slave_count;
806 	}
807 }
808 
809 struct bwg_slave {
810 	uint64_t bwg_left_int;
811 	uint64_t bwg_left_remainder;
812 	uint16_t slave;
813 };
814 
815 void
816 bond_tlb_activate_slave(struct bond_dev_private *internals) {
817 	int i;
818 
819 	for (i = 0; i < internals->active_slave_count; i++) {
820 		tlb_last_obytets[internals->active_slaves[i]] = 0;
821 	}
822 }
823 
824 static int
825 bandwidth_cmp(const void *a, const void *b)
826 {
827 	const struct bwg_slave *bwg_a = a;
828 	const struct bwg_slave *bwg_b = b;
829 	int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
830 	int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
831 			(int64_t)bwg_a->bwg_left_remainder;
832 	if (diff > 0)
833 		return 1;
834 	else if (diff < 0)
835 		return -1;
836 	else if (diff2 > 0)
837 		return 1;
838 	else if (diff2 < 0)
839 		return -1;
840 	else
841 		return 0;
842 }
843 
844 static void
845 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
846 		struct bwg_slave *bwg_slave)
847 {
848 	struct rte_eth_link link_status;
849 	int ret;
850 
851 	ret = rte_eth_link_get_nowait(port_id, &link_status);
852 	if (ret < 0) {
853 		RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
854 			     port_id, rte_strerror(-ret));
855 		return;
856 	}
857 	uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
858 	if (link_bwg == 0)
859 		return;
860 	link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
861 	bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
862 	bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
863 }
864 
865 static void
866 bond_ethdev_update_tlb_slave_cb(void *arg)
867 {
868 	struct bond_dev_private *internals = arg;
869 	struct rte_eth_stats slave_stats;
870 	struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
871 	uint16_t slave_count;
872 	uint64_t tx_bytes;
873 
874 	uint8_t update_stats = 0;
875 	uint16_t slave_id;
876 	uint16_t i;
877 
878 	internals->slave_update_idx++;
879 
880 
881 	if (internals->slave_update_idx >= REORDER_PERIOD_MS)
882 		update_stats = 1;
883 
884 	for (i = 0; i < internals->active_slave_count; i++) {
885 		slave_id = internals->active_slaves[i];
886 		rte_eth_stats_get(slave_id, &slave_stats);
887 		tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
888 		bandwidth_left(slave_id, tx_bytes,
889 				internals->slave_update_idx, &bwg_array[i]);
890 		bwg_array[i].slave = slave_id;
891 
892 		if (update_stats) {
893 			tlb_last_obytets[slave_id] = slave_stats.obytes;
894 		}
895 	}
896 
897 	if (update_stats == 1)
898 		internals->slave_update_idx = 0;
899 
900 	slave_count = i;
901 	qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
902 	for (i = 0; i < slave_count; i++)
903 		internals->tlb_slaves_order[i] = bwg_array[i].slave;
904 
905 	rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
906 			(struct bond_dev_private *)internals);
907 }
908 
909 static uint16_t
910 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
911 {
912 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
913 	struct bond_dev_private *internals = bd_tx_q->dev_private;
914 
915 	struct rte_eth_dev *primary_port =
916 			&rte_eth_devices[internals->primary_port];
917 	uint16_t num_tx_total = 0;
918 	uint16_t i, j;
919 
920 	uint16_t num_of_slaves = internals->active_slave_count;
921 	uint16_t slaves[RTE_MAX_ETHPORTS];
922 
923 	struct rte_ether_hdr *ether_hdr;
924 	struct rte_ether_addr primary_slave_addr;
925 	struct rte_ether_addr active_slave_addr;
926 
927 	if (num_of_slaves < 1)
928 		return num_tx_total;
929 
930 	memcpy(slaves, internals->tlb_slaves_order,
931 				sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
932 
933 
934 	rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
935 
936 	if (nb_pkts > 3) {
937 		for (i = 0; i < 3; i++)
938 			rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
939 	}
940 
941 	for (i = 0; i < num_of_slaves; i++) {
942 		rte_eth_macaddr_get(slaves[i], &active_slave_addr);
943 		for (j = num_tx_total; j < nb_pkts; j++) {
944 			if (j + 3 < nb_pkts)
945 				rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
946 
947 			ether_hdr = rte_pktmbuf_mtod(bufs[j],
948 						struct rte_ether_hdr *);
949 			if (rte_is_same_ether_addr(&ether_hdr->s_addr,
950 							&primary_slave_addr))
951 				rte_ether_addr_copy(&active_slave_addr,
952 						&ether_hdr->s_addr);
953 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
954 					mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
955 #endif
956 		}
957 
958 		num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
959 				bufs + num_tx_total, nb_pkts - num_tx_total);
960 
961 		if (num_tx_total == nb_pkts)
962 			break;
963 	}
964 
965 	return num_tx_total;
966 }
967 
968 void
969 bond_tlb_disable(struct bond_dev_private *internals)
970 {
971 	rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
972 }
973 
974 void
975 bond_tlb_enable(struct bond_dev_private *internals)
976 {
977 	bond_ethdev_update_tlb_slave_cb(internals);
978 }
979 
980 static uint16_t
981 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
982 {
983 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
984 	struct bond_dev_private *internals = bd_tx_q->dev_private;
985 
986 	struct rte_ether_hdr *eth_h;
987 	uint16_t ether_type, offset;
988 
989 	struct client_data *client_info;
990 
991 	/*
992 	 * We create transmit buffers for every slave and one additional to send
993 	 * through tlb. In worst case every packet will be send on one port.
994 	 */
995 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
996 	uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
997 
998 	/*
999 	 * We create separate transmit buffers for update packets as they won't
1000 	 * be counted in num_tx_total.
1001 	 */
1002 	struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1003 	uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1004 
1005 	struct rte_mbuf *upd_pkt;
1006 	size_t pkt_size;
1007 
1008 	uint16_t num_send, num_not_send = 0;
1009 	uint16_t num_tx_total = 0;
1010 	uint16_t slave_idx;
1011 
1012 	int i, j;
1013 
1014 	/* Search tx buffer for ARP packets and forward them to alb */
1015 	for (i = 0; i < nb_pkts; i++) {
1016 		eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
1017 		ether_type = eth_h->ether_type;
1018 		offset = get_vlan_offset(eth_h, &ether_type);
1019 
1020 		if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
1021 			slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1022 
1023 			/* Change src mac in eth header */
1024 			rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1025 
1026 			/* Add packet to slave tx buffer */
1027 			slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1028 			slave_bufs_pkts[slave_idx]++;
1029 		} else {
1030 			/* If packet is not ARP, send it with TLB policy */
1031 			slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1032 					bufs[i];
1033 			slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1034 		}
1035 	}
1036 
1037 	/* Update connected client ARP tables */
1038 	if (internals->mode6.ntt) {
1039 		for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1040 			client_info = &internals->mode6.client_table[i];
1041 
1042 			if (client_info->in_use) {
1043 				/* Allocate new packet to send ARP update on current slave */
1044 				upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1045 				if (upd_pkt == NULL) {
1046 					RTE_BOND_LOG(ERR,
1047 						     "Failed to allocate ARP packet from pool");
1048 					continue;
1049 				}
1050 				pkt_size = sizeof(struct rte_ether_hdr) +
1051 					sizeof(struct rte_arp_hdr) +
1052 					client_info->vlan_count *
1053 					sizeof(struct rte_vlan_hdr);
1054 				upd_pkt->data_len = pkt_size;
1055 				upd_pkt->pkt_len = pkt_size;
1056 
1057 				slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1058 						internals);
1059 
1060 				/* Add packet to update tx buffer */
1061 				update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1062 				update_bufs_pkts[slave_idx]++;
1063 			}
1064 		}
1065 		internals->mode6.ntt = 0;
1066 	}
1067 
1068 	/* Send ARP packets on proper slaves */
1069 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1070 		if (slave_bufs_pkts[i] > 0) {
1071 			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1072 					slave_bufs[i], slave_bufs_pkts[i]);
1073 			for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1074 				bufs[nb_pkts - 1 - num_not_send - j] =
1075 						slave_bufs[i][nb_pkts - 1 - j];
1076 			}
1077 
1078 			num_tx_total += num_send;
1079 			num_not_send += slave_bufs_pkts[i] - num_send;
1080 
1081 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1082 	/* Print TX stats including update packets */
1083 			for (j = 0; j < slave_bufs_pkts[i]; j++) {
1084 				eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1085 							struct rte_ether_hdr *);
1086 				mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1087 			}
1088 #endif
1089 		}
1090 	}
1091 
1092 	/* Send update packets on proper slaves */
1093 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1094 		if (update_bufs_pkts[i] > 0) {
1095 			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1096 					update_bufs_pkts[i]);
1097 			for (j = num_send; j < update_bufs_pkts[i]; j++) {
1098 				rte_pktmbuf_free(update_bufs[i][j]);
1099 			}
1100 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1101 			for (j = 0; j < update_bufs_pkts[i]; j++) {
1102 				eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1103 							struct rte_ether_hdr *);
1104 				mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1105 			}
1106 #endif
1107 		}
1108 	}
1109 
1110 	/* Send non-ARP packets using tlb policy */
1111 	if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1112 		num_send = bond_ethdev_tx_burst_tlb(queue,
1113 				slave_bufs[RTE_MAX_ETHPORTS],
1114 				slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1115 
1116 		for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1117 			bufs[nb_pkts - 1 - num_not_send - j] =
1118 					slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1119 		}
1120 
1121 		num_tx_total += num_send;
1122 	}
1123 
1124 	return num_tx_total;
1125 }
1126 
1127 static inline uint16_t
1128 tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1129 		 uint16_t *slave_port_ids, uint16_t slave_count)
1130 {
1131 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1132 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1133 
1134 	/* Array to sort mbufs for transmission on each slave into */
1135 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1136 	/* Number of mbufs for transmission on each slave */
1137 	uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1138 	/* Mapping array generated by hash function to map mbufs to slaves */
1139 	uint16_t bufs_slave_port_idxs[nb_bufs];
1140 
1141 	uint16_t slave_tx_count;
1142 	uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1143 
1144 	uint16_t i;
1145 
1146 	/*
1147 	 * Populate slaves mbuf with the packets which are to be sent on it
1148 	 * selecting output slave using hash based on xmit policy
1149 	 */
1150 	internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1151 			bufs_slave_port_idxs);
1152 
1153 	for (i = 0; i < nb_bufs; i++) {
1154 		/* Populate slave mbuf arrays with mbufs for that slave. */
1155 		uint16_t slave_idx = bufs_slave_port_idxs[i];
1156 
1157 		slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1158 	}
1159 
1160 	/* Send packet burst on each slave device */
1161 	for (i = 0; i < slave_count; i++) {
1162 		if (slave_nb_bufs[i] == 0)
1163 			continue;
1164 
1165 		slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1166 				bd_tx_q->queue_id, slave_bufs[i],
1167 				slave_nb_bufs[i]);
1168 
1169 		total_tx_count += slave_tx_count;
1170 
1171 		/* If tx burst fails move packets to end of bufs */
1172 		if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1173 			int slave_tx_fail_count = slave_nb_bufs[i] -
1174 					slave_tx_count;
1175 			total_tx_fail_count += slave_tx_fail_count;
1176 			memcpy(&bufs[nb_bufs - total_tx_fail_count],
1177 			       &slave_bufs[i][slave_tx_count],
1178 			       slave_tx_fail_count * sizeof(bufs[0]));
1179 		}
1180 	}
1181 
1182 	return total_tx_count;
1183 }
1184 
1185 static uint16_t
1186 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1187 		uint16_t nb_bufs)
1188 {
1189 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1190 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1191 
1192 	uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1193 	uint16_t slave_count;
1194 
1195 	if (unlikely(nb_bufs == 0))
1196 		return 0;
1197 
1198 	/* Copy slave list to protect against slave up/down changes during tx
1199 	 * bursting
1200 	 */
1201 	slave_count = internals->active_slave_count;
1202 	if (unlikely(slave_count < 1))
1203 		return 0;
1204 
1205 	memcpy(slave_port_ids, internals->active_slaves,
1206 			sizeof(slave_port_ids[0]) * slave_count);
1207 	return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids,
1208 				slave_count);
1209 }
1210 
1211 static inline uint16_t
1212 tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1213 		bool dedicated_txq)
1214 {
1215 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1216 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1217 
1218 	uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1219 	uint16_t slave_count;
1220 
1221 	uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1222 	uint16_t dist_slave_count;
1223 
1224 	uint16_t slave_tx_count;
1225 
1226 	uint16_t i;
1227 
1228 	/* Copy slave list to protect against slave up/down changes during tx
1229 	 * bursting */
1230 	slave_count = internals->active_slave_count;
1231 	if (unlikely(slave_count < 1))
1232 		return 0;
1233 
1234 	memcpy(slave_port_ids, internals->active_slaves,
1235 			sizeof(slave_port_ids[0]) * slave_count);
1236 
1237 	if (dedicated_txq)
1238 		goto skip_tx_ring;
1239 
1240 	/* Check for LACP control packets and send if available */
1241 	for (i = 0; i < slave_count; i++) {
1242 		struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1243 		struct rte_mbuf *ctrl_pkt = NULL;
1244 
1245 		if (likely(rte_ring_empty(port->tx_ring)))
1246 			continue;
1247 
1248 		if (rte_ring_dequeue(port->tx_ring,
1249 				     (void **)&ctrl_pkt) != -ENOENT) {
1250 			slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1251 					bd_tx_q->queue_id, &ctrl_pkt, 1);
1252 			/*
1253 			 * re-enqueue LAG control plane packets to buffering
1254 			 * ring if transmission fails so the packet isn't lost.
1255 			 */
1256 			if (slave_tx_count != 1)
1257 				rte_ring_enqueue(port->tx_ring,	ctrl_pkt);
1258 		}
1259 	}
1260 
1261 skip_tx_ring:
1262 	if (unlikely(nb_bufs == 0))
1263 		return 0;
1264 
1265 	dist_slave_count = 0;
1266 	for (i = 0; i < slave_count; i++) {
1267 		struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1268 
1269 		if (ACTOR_STATE(port, DISTRIBUTING))
1270 			dist_slave_port_ids[dist_slave_count++] =
1271 					slave_port_ids[i];
1272 	}
1273 
1274 	if (unlikely(dist_slave_count < 1))
1275 		return 0;
1276 
1277 	return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids,
1278 				dist_slave_count);
1279 }
1280 
1281 static uint16_t
1282 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1283 		uint16_t nb_bufs)
1284 {
1285 	return tx_burst_8023ad(queue, bufs, nb_bufs, false);
1286 }
1287 
1288 static uint16_t
1289 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
1290 		uint16_t nb_bufs)
1291 {
1292 	return tx_burst_8023ad(queue, bufs, nb_bufs, true);
1293 }
1294 
1295 static uint16_t
1296 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1297 		uint16_t nb_pkts)
1298 {
1299 	struct bond_dev_private *internals;
1300 	struct bond_tx_queue *bd_tx_q;
1301 
1302 	uint16_t slaves[RTE_MAX_ETHPORTS];
1303 	uint8_t tx_failed_flag = 0;
1304 	uint16_t num_of_slaves;
1305 
1306 	uint16_t max_nb_of_tx_pkts = 0;
1307 
1308 	int slave_tx_total[RTE_MAX_ETHPORTS];
1309 	int i, most_successful_tx_slave = -1;
1310 
1311 	bd_tx_q = (struct bond_tx_queue *)queue;
1312 	internals = bd_tx_q->dev_private;
1313 
1314 	/* Copy slave list to protect against slave up/down changes during tx
1315 	 * bursting */
1316 	num_of_slaves = internals->active_slave_count;
1317 	memcpy(slaves, internals->active_slaves,
1318 			sizeof(internals->active_slaves[0]) * num_of_slaves);
1319 
1320 	if (num_of_slaves < 1)
1321 		return 0;
1322 
1323 	/* Increment reference count on mbufs */
1324 	for (i = 0; i < nb_pkts; i++)
1325 		rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1326 
1327 	/* Transmit burst on each active slave */
1328 	for (i = 0; i < num_of_slaves; i++) {
1329 		slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1330 					bufs, nb_pkts);
1331 
1332 		if (unlikely(slave_tx_total[i] < nb_pkts))
1333 			tx_failed_flag = 1;
1334 
1335 		/* record the value and slave index for the slave which transmits the
1336 		 * maximum number of packets */
1337 		if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1338 			max_nb_of_tx_pkts = slave_tx_total[i];
1339 			most_successful_tx_slave = i;
1340 		}
1341 	}
1342 
1343 	/* if slaves fail to transmit packets from burst, the calling application
1344 	 * is not expected to know about multiple references to packets so we must
1345 	 * handle failures of all packets except those of the most successful slave
1346 	 */
1347 	if (unlikely(tx_failed_flag))
1348 		for (i = 0; i < num_of_slaves; i++)
1349 			if (i != most_successful_tx_slave)
1350 				while (slave_tx_total[i] < nb_pkts)
1351 					rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1352 
1353 	return max_nb_of_tx_pkts;
1354 }
1355 
1356 static void
1357 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1358 {
1359 	struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1360 
1361 	if (bond_ctx->mode == BONDING_MODE_8023AD) {
1362 		/**
1363 		 * If in mode 4 then save the link properties of the first
1364 		 * slave, all subsequent slaves must match these properties
1365 		 */
1366 		struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1367 
1368 		bond_link->link_autoneg = slave_link->link_autoneg;
1369 		bond_link->link_duplex = slave_link->link_duplex;
1370 		bond_link->link_speed = slave_link->link_speed;
1371 	} else {
1372 		/**
1373 		 * In any other mode the link properties are set to default
1374 		 * values of AUTONEG/DUPLEX
1375 		 */
1376 		ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1377 		ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1378 	}
1379 }
1380 
1381 static int
1382 link_properties_valid(struct rte_eth_dev *ethdev,
1383 		struct rte_eth_link *slave_link)
1384 {
1385 	struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1386 
1387 	if (bond_ctx->mode == BONDING_MODE_8023AD) {
1388 		struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1389 
1390 		if (bond_link->link_duplex != slave_link->link_duplex ||
1391 			bond_link->link_autoneg != slave_link->link_autoneg ||
1392 			bond_link->link_speed != slave_link->link_speed)
1393 			return -1;
1394 	}
1395 
1396 	return 0;
1397 }
1398 
1399 int
1400 mac_address_get(struct rte_eth_dev *eth_dev,
1401 		struct rte_ether_addr *dst_mac_addr)
1402 {
1403 	struct rte_ether_addr *mac_addr;
1404 
1405 	if (eth_dev == NULL) {
1406 		RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1407 		return -1;
1408 	}
1409 
1410 	if (dst_mac_addr == NULL) {
1411 		RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1412 		return -1;
1413 	}
1414 
1415 	mac_addr = eth_dev->data->mac_addrs;
1416 
1417 	rte_ether_addr_copy(mac_addr, dst_mac_addr);
1418 	return 0;
1419 }
1420 
1421 int
1422 mac_address_set(struct rte_eth_dev *eth_dev,
1423 		struct rte_ether_addr *new_mac_addr)
1424 {
1425 	struct rte_ether_addr *mac_addr;
1426 
1427 	if (eth_dev == NULL) {
1428 		RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1429 		return -1;
1430 	}
1431 
1432 	if (new_mac_addr == NULL) {
1433 		RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1434 		return -1;
1435 	}
1436 
1437 	mac_addr = eth_dev->data->mac_addrs;
1438 
1439 	/* If new MAC is different to current MAC then update */
1440 	if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1441 		memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1442 
1443 	return 0;
1444 }
1445 
1446 static const struct rte_ether_addr null_mac_addr;
1447 
1448 /*
1449  * Add additional MAC addresses to the slave
1450  */
1451 int
1452 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1453 		uint16_t slave_port_id)
1454 {
1455 	int i, ret;
1456 	struct rte_ether_addr *mac_addr;
1457 
1458 	for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1459 		mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1460 		if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1461 			break;
1462 
1463 		ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1464 		if (ret < 0) {
1465 			/* rollback */
1466 			for (i--; i > 0; i--)
1467 				rte_eth_dev_mac_addr_remove(slave_port_id,
1468 					&bonded_eth_dev->data->mac_addrs[i]);
1469 			return ret;
1470 		}
1471 	}
1472 
1473 	return 0;
1474 }
1475 
1476 /*
1477  * Remove additional MAC addresses from the slave
1478  */
1479 int
1480 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1481 		uint16_t slave_port_id)
1482 {
1483 	int i, rc, ret;
1484 	struct rte_ether_addr *mac_addr;
1485 
1486 	rc = 0;
1487 	for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1488 		mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1489 		if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1490 			break;
1491 
1492 		ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1493 		/* save only the first error */
1494 		if (ret < 0 && rc == 0)
1495 			rc = ret;
1496 	}
1497 
1498 	return rc;
1499 }
1500 
1501 int
1502 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1503 {
1504 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1505 	bool set;
1506 	int i;
1507 
1508 	/* Update slave devices MAC addresses */
1509 	if (internals->slave_count < 1)
1510 		return -1;
1511 
1512 	switch (internals->mode) {
1513 	case BONDING_MODE_ROUND_ROBIN:
1514 	case BONDING_MODE_BALANCE:
1515 	case BONDING_MODE_BROADCAST:
1516 		for (i = 0; i < internals->slave_count; i++) {
1517 			if (rte_eth_dev_default_mac_addr_set(
1518 					internals->slaves[i].port_id,
1519 					bonded_eth_dev->data->mac_addrs)) {
1520 				RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1521 						internals->slaves[i].port_id);
1522 				return -1;
1523 			}
1524 		}
1525 		break;
1526 	case BONDING_MODE_8023AD:
1527 		bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1528 		break;
1529 	case BONDING_MODE_ACTIVE_BACKUP:
1530 	case BONDING_MODE_TLB:
1531 	case BONDING_MODE_ALB:
1532 	default:
1533 		set = true;
1534 		for (i = 0; i < internals->slave_count; i++) {
1535 			if (internals->slaves[i].port_id ==
1536 					internals->current_primary_port) {
1537 				if (rte_eth_dev_default_mac_addr_set(
1538 						internals->current_primary_port,
1539 						bonded_eth_dev->data->mac_addrs)) {
1540 					RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1541 							internals->current_primary_port);
1542 					set = false;
1543 				}
1544 			} else {
1545 				if (rte_eth_dev_default_mac_addr_set(
1546 						internals->slaves[i].port_id,
1547 						&internals->slaves[i].persisted_mac_addr)) {
1548 					RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1549 							internals->slaves[i].port_id);
1550 				}
1551 			}
1552 		}
1553 		if (!set)
1554 			return -1;
1555 	}
1556 
1557 	return 0;
1558 }
1559 
1560 int
1561 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1562 {
1563 	struct bond_dev_private *internals;
1564 
1565 	internals = eth_dev->data->dev_private;
1566 
1567 	switch (mode) {
1568 	case BONDING_MODE_ROUND_ROBIN:
1569 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1570 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1571 		break;
1572 	case BONDING_MODE_ACTIVE_BACKUP:
1573 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1574 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1575 		break;
1576 	case BONDING_MODE_BALANCE:
1577 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1578 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1579 		break;
1580 	case BONDING_MODE_BROADCAST:
1581 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1582 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1583 		break;
1584 	case BONDING_MODE_8023AD:
1585 		if (bond_mode_8023ad_enable(eth_dev) != 0)
1586 			return -1;
1587 
1588 		if (internals->mode4.dedicated_queues.enabled == 0) {
1589 			eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1590 			eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1591 			RTE_BOND_LOG(WARNING,
1592 				"Using mode 4, it is necessary to do TX burst "
1593 				"and RX burst at least every 100ms.");
1594 		} else {
1595 			/* Use flow director's optimization */
1596 			eth_dev->rx_pkt_burst =
1597 					bond_ethdev_rx_burst_8023ad_fast_queue;
1598 			eth_dev->tx_pkt_burst =
1599 					bond_ethdev_tx_burst_8023ad_fast_queue;
1600 		}
1601 		break;
1602 	case BONDING_MODE_TLB:
1603 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1604 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1605 		break;
1606 	case BONDING_MODE_ALB:
1607 		if (bond_mode_alb_enable(eth_dev) != 0)
1608 			return -1;
1609 
1610 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1611 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1612 		break;
1613 	default:
1614 		return -1;
1615 	}
1616 
1617 	internals->mode = mode;
1618 
1619 	return 0;
1620 }
1621 
1622 
1623 static int
1624 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1625 		struct rte_eth_dev *slave_eth_dev)
1626 {
1627 	int errval = 0;
1628 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1629 	struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1630 
1631 	if (port->slow_pool == NULL) {
1632 		char mem_name[256];
1633 		int slave_id = slave_eth_dev->data->port_id;
1634 
1635 		snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1636 				slave_id);
1637 		port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1638 			250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1639 			slave_eth_dev->data->numa_node);
1640 
1641 		/* Any memory allocation failure in initialization is critical because
1642 		 * resources can't be free, so reinitialization is impossible. */
1643 		if (port->slow_pool == NULL) {
1644 			rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1645 				slave_id, mem_name, rte_strerror(rte_errno));
1646 		}
1647 	}
1648 
1649 	if (internals->mode4.dedicated_queues.enabled == 1) {
1650 		/* Configure slow Rx queue */
1651 
1652 		errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1653 				internals->mode4.dedicated_queues.rx_qid, 128,
1654 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1655 				NULL, port->slow_pool);
1656 		if (errval != 0) {
1657 			RTE_BOND_LOG(ERR,
1658 					"rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1659 					slave_eth_dev->data->port_id,
1660 					internals->mode4.dedicated_queues.rx_qid,
1661 					errval);
1662 			return errval;
1663 		}
1664 
1665 		errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1666 				internals->mode4.dedicated_queues.tx_qid, 512,
1667 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1668 				NULL);
1669 		if (errval != 0) {
1670 			RTE_BOND_LOG(ERR,
1671 				"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1672 				slave_eth_dev->data->port_id,
1673 				internals->mode4.dedicated_queues.tx_qid,
1674 				errval);
1675 			return errval;
1676 		}
1677 	}
1678 	return 0;
1679 }
1680 
1681 int
1682 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1683 		struct rte_eth_dev *slave_eth_dev)
1684 {
1685 	struct bond_rx_queue *bd_rx_q;
1686 	struct bond_tx_queue *bd_tx_q;
1687 	uint16_t nb_rx_queues;
1688 	uint16_t nb_tx_queues;
1689 
1690 	int errval;
1691 	uint16_t q_id;
1692 	struct rte_flow_error flow_error;
1693 
1694 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1695 
1696 	/* Stop slave */
1697 	rte_eth_dev_stop(slave_eth_dev->data->port_id);
1698 
1699 	/* Enable interrupts on slave device if supported */
1700 	if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1701 		slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1702 
1703 	/* If RSS is enabled for bonding, try to enable it for slaves  */
1704 	if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1705 		if (internals->rss_key_len != 0) {
1706 			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1707 					internals->rss_key_len;
1708 			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1709 					internals->rss_key;
1710 		} else {
1711 			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1712 		}
1713 
1714 		slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1715 				bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1716 		slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1717 				bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1718 	}
1719 
1720 	if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1721 			DEV_RX_OFFLOAD_VLAN_FILTER)
1722 		slave_eth_dev->data->dev_conf.rxmode.offloads |=
1723 				DEV_RX_OFFLOAD_VLAN_FILTER;
1724 	else
1725 		slave_eth_dev->data->dev_conf.rxmode.offloads &=
1726 				~DEV_RX_OFFLOAD_VLAN_FILTER;
1727 
1728 	nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1729 	nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1730 
1731 	if (internals->mode == BONDING_MODE_8023AD) {
1732 		if (internals->mode4.dedicated_queues.enabled == 1) {
1733 			nb_rx_queues++;
1734 			nb_tx_queues++;
1735 		}
1736 	}
1737 
1738 	errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1739 				     bonded_eth_dev->data->mtu);
1740 	if (errval != 0 && errval != -ENOTSUP) {
1741 		RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1742 				slave_eth_dev->data->port_id, errval);
1743 		return errval;
1744 	}
1745 
1746 	/* Configure device */
1747 	errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1748 			nb_rx_queues, nb_tx_queues,
1749 			&(slave_eth_dev->data->dev_conf));
1750 	if (errval != 0) {
1751 		RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1752 				slave_eth_dev->data->port_id, errval);
1753 		return errval;
1754 	}
1755 
1756 	/* Setup Rx Queues */
1757 	for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1758 		bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1759 
1760 		errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1761 				bd_rx_q->nb_rx_desc,
1762 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1763 				&(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1764 		if (errval != 0) {
1765 			RTE_BOND_LOG(ERR,
1766 					"rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1767 					slave_eth_dev->data->port_id, q_id, errval);
1768 			return errval;
1769 		}
1770 	}
1771 
1772 	/* Setup Tx Queues */
1773 	for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1774 		bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1775 
1776 		errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1777 				bd_tx_q->nb_tx_desc,
1778 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1779 				&bd_tx_q->tx_conf);
1780 		if (errval != 0) {
1781 			RTE_BOND_LOG(ERR,
1782 				"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1783 				slave_eth_dev->data->port_id, q_id, errval);
1784 			return errval;
1785 		}
1786 	}
1787 
1788 	if (internals->mode == BONDING_MODE_8023AD &&
1789 			internals->mode4.dedicated_queues.enabled == 1) {
1790 		if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1791 				!= 0)
1792 			return errval;
1793 
1794 		if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1795 				slave_eth_dev->data->port_id) != 0) {
1796 			RTE_BOND_LOG(ERR,
1797 				"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1798 				slave_eth_dev->data->port_id, q_id, errval);
1799 			return -1;
1800 		}
1801 
1802 		if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1803 			rte_flow_destroy(slave_eth_dev->data->port_id,
1804 					internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1805 					&flow_error);
1806 
1807 		bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1808 				slave_eth_dev->data->port_id);
1809 	}
1810 
1811 	/* Start device */
1812 	errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1813 	if (errval != 0) {
1814 		RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1815 				slave_eth_dev->data->port_id, errval);
1816 		return -1;
1817 	}
1818 
1819 	/* If RSS is enabled for bonding, synchronize RETA */
1820 	if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1821 		int i;
1822 		struct bond_dev_private *internals;
1823 
1824 		internals = bonded_eth_dev->data->dev_private;
1825 
1826 		for (i = 0; i < internals->slave_count; i++) {
1827 			if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1828 				errval = rte_eth_dev_rss_reta_update(
1829 						slave_eth_dev->data->port_id,
1830 						&internals->reta_conf[0],
1831 						internals->slaves[i].reta_size);
1832 				if (errval != 0) {
1833 					RTE_BOND_LOG(WARNING,
1834 						     "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1835 						     " RSS Configuration for bonding may be inconsistent.",
1836 						     slave_eth_dev->data->port_id, errval);
1837 				}
1838 				break;
1839 			}
1840 		}
1841 	}
1842 
1843 	/* If lsc interrupt is set, check initial slave's link status */
1844 	if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1845 		slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1846 		bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1847 			RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1848 			NULL);
1849 	}
1850 
1851 	return 0;
1852 }
1853 
1854 void
1855 slave_remove(struct bond_dev_private *internals,
1856 		struct rte_eth_dev *slave_eth_dev)
1857 {
1858 	uint16_t i;
1859 
1860 	for (i = 0; i < internals->slave_count; i++)
1861 		if (internals->slaves[i].port_id ==
1862 				slave_eth_dev->data->port_id)
1863 			break;
1864 
1865 	if (i < (internals->slave_count - 1)) {
1866 		struct rte_flow *flow;
1867 
1868 		memmove(&internals->slaves[i], &internals->slaves[i + 1],
1869 				sizeof(internals->slaves[0]) *
1870 				(internals->slave_count - i - 1));
1871 		TAILQ_FOREACH(flow, &internals->flow_list, next) {
1872 			memmove(&flow->flows[i], &flow->flows[i + 1],
1873 				sizeof(flow->flows[0]) *
1874 				(internals->slave_count - i - 1));
1875 			flow->flows[internals->slave_count - 1] = NULL;
1876 		}
1877 	}
1878 
1879 	internals->slave_count--;
1880 
1881 	/* force reconfiguration of slave interfaces */
1882 	_rte_eth_dev_reset(slave_eth_dev);
1883 }
1884 
1885 static void
1886 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1887 
1888 void
1889 slave_add(struct bond_dev_private *internals,
1890 		struct rte_eth_dev *slave_eth_dev)
1891 {
1892 	struct bond_slave_details *slave_details =
1893 			&internals->slaves[internals->slave_count];
1894 
1895 	slave_details->port_id = slave_eth_dev->data->port_id;
1896 	slave_details->last_link_status = 0;
1897 
1898 	/* Mark slave devices that don't support interrupts so we can
1899 	 * compensate when we start the bond
1900 	 */
1901 	if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1902 		slave_details->link_status_poll_enabled = 1;
1903 	}
1904 
1905 	slave_details->link_status_wait_to_complete = 0;
1906 	/* clean tlb_last_obytes when adding port for bonding device */
1907 	memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1908 			sizeof(struct rte_ether_addr));
1909 }
1910 
1911 void
1912 bond_ethdev_primary_set(struct bond_dev_private *internals,
1913 		uint16_t slave_port_id)
1914 {
1915 	int i;
1916 
1917 	if (internals->active_slave_count < 1)
1918 		internals->current_primary_port = slave_port_id;
1919 	else
1920 		/* Search bonded device slave ports for new proposed primary port */
1921 		for (i = 0; i < internals->active_slave_count; i++) {
1922 			if (internals->active_slaves[i] == slave_port_id)
1923 				internals->current_primary_port = slave_port_id;
1924 		}
1925 }
1926 
1927 static int
1928 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1929 
1930 static int
1931 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1932 {
1933 	struct bond_dev_private *internals;
1934 	int i;
1935 
1936 	/* slave eth dev will be started by bonded device */
1937 	if (check_for_bonded_ethdev(eth_dev)) {
1938 		RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1939 				eth_dev->data->port_id);
1940 		return -1;
1941 	}
1942 
1943 	eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1944 	eth_dev->data->dev_started = 1;
1945 
1946 	internals = eth_dev->data->dev_private;
1947 
1948 	if (internals->slave_count == 0) {
1949 		RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1950 		goto out_err;
1951 	}
1952 
1953 	if (internals->user_defined_mac == 0) {
1954 		struct rte_ether_addr *new_mac_addr = NULL;
1955 
1956 		for (i = 0; i < internals->slave_count; i++)
1957 			if (internals->slaves[i].port_id == internals->primary_port)
1958 				new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1959 
1960 		if (new_mac_addr == NULL)
1961 			goto out_err;
1962 
1963 		if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1964 			RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1965 					eth_dev->data->port_id);
1966 			goto out_err;
1967 		}
1968 	}
1969 
1970 	if (internals->mode == BONDING_MODE_8023AD) {
1971 		if (internals->mode4.dedicated_queues.enabled == 1) {
1972 			internals->mode4.dedicated_queues.rx_qid =
1973 					eth_dev->data->nb_rx_queues;
1974 			internals->mode4.dedicated_queues.tx_qid =
1975 					eth_dev->data->nb_tx_queues;
1976 		}
1977 	}
1978 
1979 
1980 	/* Reconfigure each slave device if starting bonded device */
1981 	for (i = 0; i < internals->slave_count; i++) {
1982 		struct rte_eth_dev *slave_ethdev =
1983 				&(rte_eth_devices[internals->slaves[i].port_id]);
1984 		if (slave_configure(eth_dev, slave_ethdev) != 0) {
1985 			RTE_BOND_LOG(ERR,
1986 				"bonded port (%d) failed to reconfigure slave device (%d)",
1987 				eth_dev->data->port_id,
1988 				internals->slaves[i].port_id);
1989 			goto out_err;
1990 		}
1991 		/* We will need to poll for link status if any slave doesn't
1992 		 * support interrupts
1993 		 */
1994 		if (internals->slaves[i].link_status_poll_enabled)
1995 			internals->link_status_polling_enabled = 1;
1996 	}
1997 
1998 	/* start polling if needed */
1999 	if (internals->link_status_polling_enabled) {
2000 		rte_eal_alarm_set(
2001 			internals->link_status_polling_interval_ms * 1000,
2002 			bond_ethdev_slave_link_status_change_monitor,
2003 			(void *)&rte_eth_devices[internals->port_id]);
2004 	}
2005 
2006 	/* Update all slave devices MACs*/
2007 	if (mac_address_slaves_update(eth_dev) != 0)
2008 		goto out_err;
2009 
2010 	if (internals->user_defined_primary_port)
2011 		bond_ethdev_primary_set(internals, internals->primary_port);
2012 
2013 	if (internals->mode == BONDING_MODE_8023AD)
2014 		bond_mode_8023ad_start(eth_dev);
2015 
2016 	if (internals->mode == BONDING_MODE_TLB ||
2017 			internals->mode == BONDING_MODE_ALB)
2018 		bond_tlb_enable(internals);
2019 
2020 	return 0;
2021 
2022 out_err:
2023 	eth_dev->data->dev_started = 0;
2024 	return -1;
2025 }
2026 
2027 static void
2028 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2029 {
2030 	uint16_t i;
2031 
2032 	if (dev->data->rx_queues != NULL) {
2033 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
2034 			rte_free(dev->data->rx_queues[i]);
2035 			dev->data->rx_queues[i] = NULL;
2036 		}
2037 		dev->data->nb_rx_queues = 0;
2038 	}
2039 
2040 	if (dev->data->tx_queues != NULL) {
2041 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
2042 			rte_free(dev->data->tx_queues[i]);
2043 			dev->data->tx_queues[i] = NULL;
2044 		}
2045 		dev->data->nb_tx_queues = 0;
2046 	}
2047 }
2048 
2049 void
2050 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2051 {
2052 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2053 	uint16_t i;
2054 
2055 	if (internals->mode == BONDING_MODE_8023AD) {
2056 		struct port *port;
2057 		void *pkt = NULL;
2058 
2059 		bond_mode_8023ad_stop(eth_dev);
2060 
2061 		/* Discard all messages to/from mode 4 state machines */
2062 		for (i = 0; i < internals->active_slave_count; i++) {
2063 			port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2064 
2065 			RTE_ASSERT(port->rx_ring != NULL);
2066 			while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2067 				rte_pktmbuf_free(pkt);
2068 
2069 			RTE_ASSERT(port->tx_ring != NULL);
2070 			while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2071 				rte_pktmbuf_free(pkt);
2072 		}
2073 	}
2074 
2075 	if (internals->mode == BONDING_MODE_TLB ||
2076 			internals->mode == BONDING_MODE_ALB) {
2077 		bond_tlb_disable(internals);
2078 		for (i = 0; i < internals->active_slave_count; i++)
2079 			tlb_last_obytets[internals->active_slaves[i]] = 0;
2080 	}
2081 
2082 	eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2083 	eth_dev->data->dev_started = 0;
2084 
2085 	internals->link_status_polling_enabled = 0;
2086 	for (i = 0; i < internals->slave_count; i++) {
2087 		uint16_t slave_id = internals->slaves[i].port_id;
2088 		if (find_slave_by_id(internals->active_slaves,
2089 				internals->active_slave_count, slave_id) !=
2090 						internals->active_slave_count) {
2091 			internals->slaves[i].last_link_status = 0;
2092 			rte_eth_dev_stop(slave_id);
2093 			deactivate_slave(eth_dev, slave_id);
2094 		}
2095 	}
2096 }
2097 
2098 void
2099 bond_ethdev_close(struct rte_eth_dev *dev)
2100 {
2101 	struct bond_dev_private *internals = dev->data->dev_private;
2102 	uint16_t bond_port_id = internals->port_id;
2103 	int skipped = 0;
2104 	struct rte_flow_error ferror;
2105 
2106 	RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2107 	while (internals->slave_count != skipped) {
2108 		uint16_t port_id = internals->slaves[skipped].port_id;
2109 
2110 		rte_eth_dev_stop(port_id);
2111 
2112 		if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2113 			RTE_BOND_LOG(ERR,
2114 				     "Failed to remove port %d from bonded device %s",
2115 				     port_id, dev->device->name);
2116 			skipped++;
2117 		}
2118 	}
2119 	bond_flow_ops.flush(dev, &ferror);
2120 	bond_ethdev_free_queues(dev);
2121 	rte_bitmap_reset(internals->vlan_filter_bmp);
2122 }
2123 
2124 /* forward declaration */
2125 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2126 
2127 static int
2128 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2129 {
2130 	struct bond_dev_private *internals = dev->data->dev_private;
2131 	struct bond_slave_details slave;
2132 	int ret;
2133 
2134 	uint16_t max_nb_rx_queues = UINT16_MAX;
2135 	uint16_t max_nb_tx_queues = UINT16_MAX;
2136 	uint16_t max_rx_desc_lim = UINT16_MAX;
2137 	uint16_t max_tx_desc_lim = UINT16_MAX;
2138 
2139 	dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2140 
2141 	dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2142 			internals->candidate_max_rx_pktlen :
2143 			RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2144 
2145 	/* Max number of tx/rx queues that the bonded device can support is the
2146 	 * minimum values of the bonded slaves, as all slaves must be capable
2147 	 * of supporting the same number of tx/rx queues.
2148 	 */
2149 	if (internals->slave_count > 0) {
2150 		struct rte_eth_dev_info slave_info;
2151 		uint16_t idx;
2152 
2153 		for (idx = 0; idx < internals->slave_count; idx++) {
2154 			slave = internals->slaves[idx];
2155 			ret = rte_eth_dev_info_get(slave.port_id, &slave_info);
2156 			if (ret != 0) {
2157 				RTE_BOND_LOG(ERR,
2158 					"%s: Error during getting device (port %u) info: %s\n",
2159 					__func__,
2160 					slave.port_id,
2161 					strerror(-ret));
2162 
2163 				return ret;
2164 			}
2165 
2166 			if (slave_info.max_rx_queues < max_nb_rx_queues)
2167 				max_nb_rx_queues = slave_info.max_rx_queues;
2168 
2169 			if (slave_info.max_tx_queues < max_nb_tx_queues)
2170 				max_nb_tx_queues = slave_info.max_tx_queues;
2171 
2172 			if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2173 				max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2174 
2175 			if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2176 				max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2177 		}
2178 	}
2179 
2180 	dev_info->max_rx_queues = max_nb_rx_queues;
2181 	dev_info->max_tx_queues = max_nb_tx_queues;
2182 
2183 	memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2184 	       sizeof(dev_info->default_rxconf));
2185 	memcpy(&dev_info->default_txconf, &internals->default_txconf,
2186 	       sizeof(dev_info->default_txconf));
2187 
2188 	dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2189 	dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2190 
2191 	/**
2192 	 * If dedicated hw queues enabled for link bonding device in LACP mode
2193 	 * then we need to reduce the maximum number of data path queues by 1.
2194 	 */
2195 	if (internals->mode == BONDING_MODE_8023AD &&
2196 		internals->mode4.dedicated_queues.enabled == 1) {
2197 		dev_info->max_rx_queues--;
2198 		dev_info->max_tx_queues--;
2199 	}
2200 
2201 	dev_info->min_rx_bufsize = 0;
2202 
2203 	dev_info->rx_offload_capa = internals->rx_offload_capa;
2204 	dev_info->tx_offload_capa = internals->tx_offload_capa;
2205 	dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2206 	dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2207 	dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2208 
2209 	dev_info->reta_size = internals->reta_size;
2210 
2211 	return 0;
2212 }
2213 
2214 static int
2215 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2216 {
2217 	int res;
2218 	uint16_t i;
2219 	struct bond_dev_private *internals = dev->data->dev_private;
2220 
2221 	/* don't do this while a slave is being added */
2222 	rte_spinlock_lock(&internals->lock);
2223 
2224 	if (on)
2225 		rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2226 	else
2227 		rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2228 
2229 	for (i = 0; i < internals->slave_count; i++) {
2230 		uint16_t port_id = internals->slaves[i].port_id;
2231 
2232 		res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2233 		if (res == ENOTSUP)
2234 			RTE_BOND_LOG(WARNING,
2235 				     "Setting VLAN filter on slave port %u not supported.",
2236 				     port_id);
2237 	}
2238 
2239 	rte_spinlock_unlock(&internals->lock);
2240 	return 0;
2241 }
2242 
2243 static int
2244 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2245 		uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2246 		const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2247 {
2248 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2249 			rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2250 					0, dev->data->numa_node);
2251 	if (bd_rx_q == NULL)
2252 		return -1;
2253 
2254 	bd_rx_q->queue_id = rx_queue_id;
2255 	bd_rx_q->dev_private = dev->data->dev_private;
2256 
2257 	bd_rx_q->nb_rx_desc = nb_rx_desc;
2258 
2259 	memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2260 	bd_rx_q->mb_pool = mb_pool;
2261 
2262 	dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2263 
2264 	return 0;
2265 }
2266 
2267 static int
2268 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2269 		uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2270 		const struct rte_eth_txconf *tx_conf)
2271 {
2272 	struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2273 			rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2274 					0, dev->data->numa_node);
2275 
2276 	if (bd_tx_q == NULL)
2277 		return -1;
2278 
2279 	bd_tx_q->queue_id = tx_queue_id;
2280 	bd_tx_q->dev_private = dev->data->dev_private;
2281 
2282 	bd_tx_q->nb_tx_desc = nb_tx_desc;
2283 	memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2284 
2285 	dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2286 
2287 	return 0;
2288 }
2289 
2290 static void
2291 bond_ethdev_rx_queue_release(void *queue)
2292 {
2293 	if (queue == NULL)
2294 		return;
2295 
2296 	rte_free(queue);
2297 }
2298 
2299 static void
2300 bond_ethdev_tx_queue_release(void *queue)
2301 {
2302 	if (queue == NULL)
2303 		return;
2304 
2305 	rte_free(queue);
2306 }
2307 
2308 static void
2309 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2310 {
2311 	struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2312 	struct bond_dev_private *internals;
2313 
2314 	/* Default value for polling slave found is true as we don't want to
2315 	 * disable the polling thread if we cannot get the lock */
2316 	int i, polling_slave_found = 1;
2317 
2318 	if (cb_arg == NULL)
2319 		return;
2320 
2321 	bonded_ethdev = cb_arg;
2322 	internals = bonded_ethdev->data->dev_private;
2323 
2324 	if (!bonded_ethdev->data->dev_started ||
2325 		!internals->link_status_polling_enabled)
2326 		return;
2327 
2328 	/* If device is currently being configured then don't check slaves link
2329 	 * status, wait until next period */
2330 	if (rte_spinlock_trylock(&internals->lock)) {
2331 		if (internals->slave_count > 0)
2332 			polling_slave_found = 0;
2333 
2334 		for (i = 0; i < internals->slave_count; i++) {
2335 			if (!internals->slaves[i].link_status_poll_enabled)
2336 				continue;
2337 
2338 			slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2339 			polling_slave_found = 1;
2340 
2341 			/* Update slave link status */
2342 			(*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2343 					internals->slaves[i].link_status_wait_to_complete);
2344 
2345 			/* if link status has changed since last checked then call lsc
2346 			 * event callback */
2347 			if (slave_ethdev->data->dev_link.link_status !=
2348 					internals->slaves[i].last_link_status) {
2349 				internals->slaves[i].last_link_status =
2350 						slave_ethdev->data->dev_link.link_status;
2351 
2352 				bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2353 						RTE_ETH_EVENT_INTR_LSC,
2354 						&bonded_ethdev->data->port_id,
2355 						NULL);
2356 			}
2357 		}
2358 		rte_spinlock_unlock(&internals->lock);
2359 	}
2360 
2361 	if (polling_slave_found)
2362 		/* Set alarm to continue monitoring link status of slave ethdev's */
2363 		rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2364 				bond_ethdev_slave_link_status_change_monitor, cb_arg);
2365 }
2366 
2367 static int
2368 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2369 {
2370 	int (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2371 
2372 	struct bond_dev_private *bond_ctx;
2373 	struct rte_eth_link slave_link;
2374 
2375 	bool one_link_update_succeeded;
2376 	uint32_t idx;
2377 	int ret;
2378 
2379 	bond_ctx = ethdev->data->dev_private;
2380 
2381 	ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2382 
2383 	if (ethdev->data->dev_started == 0 ||
2384 			bond_ctx->active_slave_count == 0) {
2385 		ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2386 		return 0;
2387 	}
2388 
2389 	ethdev->data->dev_link.link_status = ETH_LINK_UP;
2390 
2391 	if (wait_to_complete)
2392 		link_update = rte_eth_link_get;
2393 	else
2394 		link_update = rte_eth_link_get_nowait;
2395 
2396 	switch (bond_ctx->mode) {
2397 	case BONDING_MODE_BROADCAST:
2398 		/**
2399 		 * Setting link speed to UINT32_MAX to ensure we pick up the
2400 		 * value of the first active slave
2401 		 */
2402 		ethdev->data->dev_link.link_speed = UINT32_MAX;
2403 
2404 		/**
2405 		 * link speed is minimum value of all the slaves link speed as
2406 		 * packet loss will occur on this slave if transmission at rates
2407 		 * greater than this are attempted
2408 		 */
2409 		for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2410 			ret = link_update(bond_ctx->active_slaves[idx],
2411 					  &slave_link);
2412 			if (ret < 0) {
2413 				ethdev->data->dev_link.link_speed =
2414 					ETH_SPEED_NUM_NONE;
2415 				RTE_BOND_LOG(ERR,
2416 					"Slave (port %u) link get failed: %s",
2417 					bond_ctx->active_slaves[idx],
2418 					rte_strerror(-ret));
2419 				return 0;
2420 			}
2421 
2422 			if (slave_link.link_speed <
2423 					ethdev->data->dev_link.link_speed)
2424 				ethdev->data->dev_link.link_speed =
2425 						slave_link.link_speed;
2426 		}
2427 		break;
2428 	case BONDING_MODE_ACTIVE_BACKUP:
2429 		/* Current primary slave */
2430 		ret = link_update(bond_ctx->current_primary_port, &slave_link);
2431 		if (ret < 0) {
2432 			RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
2433 				bond_ctx->current_primary_port,
2434 				rte_strerror(-ret));
2435 			return 0;
2436 		}
2437 
2438 		ethdev->data->dev_link.link_speed = slave_link.link_speed;
2439 		break;
2440 	case BONDING_MODE_8023AD:
2441 		ethdev->data->dev_link.link_autoneg =
2442 				bond_ctx->mode4.slave_link.link_autoneg;
2443 		ethdev->data->dev_link.link_duplex =
2444 				bond_ctx->mode4.slave_link.link_duplex;
2445 		/* fall through */
2446 		/* to update link speed */
2447 	case BONDING_MODE_ROUND_ROBIN:
2448 	case BONDING_MODE_BALANCE:
2449 	case BONDING_MODE_TLB:
2450 	case BONDING_MODE_ALB:
2451 	default:
2452 		/**
2453 		 * In theses mode the maximum theoretical link speed is the sum
2454 		 * of all the slaves
2455 		 */
2456 		ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2457 		one_link_update_succeeded = false;
2458 
2459 		for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2460 			ret = link_update(bond_ctx->active_slaves[idx],
2461 					&slave_link);
2462 			if (ret < 0) {
2463 				RTE_BOND_LOG(ERR,
2464 					"Slave (port %u) link get failed: %s",
2465 					bond_ctx->active_slaves[idx],
2466 					rte_strerror(-ret));
2467 				continue;
2468 			}
2469 
2470 			one_link_update_succeeded = true;
2471 			ethdev->data->dev_link.link_speed +=
2472 					slave_link.link_speed;
2473 		}
2474 
2475 		if (!one_link_update_succeeded) {
2476 			RTE_BOND_LOG(ERR, "All slaves link get failed");
2477 			return 0;
2478 		}
2479 	}
2480 
2481 
2482 	return 0;
2483 }
2484 
2485 
2486 static int
2487 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2488 {
2489 	struct bond_dev_private *internals = dev->data->dev_private;
2490 	struct rte_eth_stats slave_stats;
2491 	int i, j;
2492 
2493 	for (i = 0; i < internals->slave_count; i++) {
2494 		rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2495 
2496 		stats->ipackets += slave_stats.ipackets;
2497 		stats->opackets += slave_stats.opackets;
2498 		stats->ibytes += slave_stats.ibytes;
2499 		stats->obytes += slave_stats.obytes;
2500 		stats->imissed += slave_stats.imissed;
2501 		stats->ierrors += slave_stats.ierrors;
2502 		stats->oerrors += slave_stats.oerrors;
2503 		stats->rx_nombuf += slave_stats.rx_nombuf;
2504 
2505 		for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2506 			stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2507 			stats->q_opackets[j] += slave_stats.q_opackets[j];
2508 			stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2509 			stats->q_obytes[j] += slave_stats.q_obytes[j];
2510 			stats->q_errors[j] += slave_stats.q_errors[j];
2511 		}
2512 
2513 	}
2514 
2515 	return 0;
2516 }
2517 
2518 static int
2519 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2520 {
2521 	struct bond_dev_private *internals = dev->data->dev_private;
2522 	int i;
2523 	int err;
2524 	int ret;
2525 
2526 	for (i = 0, err = 0; i < internals->slave_count; i++) {
2527 		ret = rte_eth_stats_reset(internals->slaves[i].port_id);
2528 		if (ret != 0)
2529 			err = ret;
2530 	}
2531 
2532 	return err;
2533 }
2534 
2535 static int
2536 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2537 {
2538 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2539 	int i;
2540 	int ret = 0;
2541 	uint16_t port_id;
2542 
2543 	switch (internals->mode) {
2544 	/* Promiscuous mode is propagated to all slaves */
2545 	case BONDING_MODE_ROUND_ROBIN:
2546 	case BONDING_MODE_BALANCE:
2547 	case BONDING_MODE_BROADCAST:
2548 	case BONDING_MODE_8023AD: {
2549 		unsigned int slave_ok = 0;
2550 
2551 		for (i = 0; i < internals->slave_count; i++) {
2552 			port_id = internals->slaves[i].port_id;
2553 
2554 			ret = rte_eth_promiscuous_enable(port_id);
2555 			if (ret != 0)
2556 				RTE_BOND_LOG(ERR,
2557 					"Failed to enable promiscuous mode for port %u: %s",
2558 					port_id, rte_strerror(-ret));
2559 			else
2560 				slave_ok++;
2561 		}
2562 		/*
2563 		 * Report success if operation is successful on at least
2564 		 * on one slave. Otherwise return last error code.
2565 		 */
2566 		if (slave_ok > 0)
2567 			ret = 0;
2568 		break;
2569 	}
2570 	/* Promiscuous mode is propagated only to primary slave */
2571 	case BONDING_MODE_ACTIVE_BACKUP:
2572 	case BONDING_MODE_TLB:
2573 	case BONDING_MODE_ALB:
2574 	default:
2575 		/* Do not touch promisc when there cannot be primary ports */
2576 		if (internals->slave_count == 0)
2577 			break;
2578 		port_id = internals->current_primary_port;
2579 		ret = rte_eth_promiscuous_enable(port_id);
2580 		if (ret != 0)
2581 			RTE_BOND_LOG(ERR,
2582 				"Failed to enable promiscuous mode for port %u: %s",
2583 				port_id, rte_strerror(-ret));
2584 	}
2585 
2586 	return ret;
2587 }
2588 
2589 static int
2590 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2591 {
2592 	struct bond_dev_private *internals = dev->data->dev_private;
2593 	int i;
2594 	int ret = 0;
2595 	uint16_t port_id;
2596 
2597 	switch (internals->mode) {
2598 	/* Promiscuous mode is propagated to all slaves */
2599 	case BONDING_MODE_ROUND_ROBIN:
2600 	case BONDING_MODE_BALANCE:
2601 	case BONDING_MODE_BROADCAST:
2602 	case BONDING_MODE_8023AD: {
2603 		unsigned int slave_ok = 0;
2604 
2605 		for (i = 0; i < internals->slave_count; i++) {
2606 			port_id = internals->slaves[i].port_id;
2607 
2608 			if (internals->mode == BONDING_MODE_8023AD &&
2609 			    bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2610 					BOND_8023AD_FORCED_PROMISC) {
2611 				slave_ok++;
2612 				continue;
2613 			}
2614 			ret = rte_eth_promiscuous_disable(port_id);
2615 			if (ret != 0)
2616 				RTE_BOND_LOG(ERR,
2617 					"Failed to disable promiscuous mode for port %u: %s",
2618 					port_id, rte_strerror(-ret));
2619 			else
2620 				slave_ok++;
2621 		}
2622 		/*
2623 		 * Report success if operation is successful on at least
2624 		 * on one slave. Otherwise return last error code.
2625 		 */
2626 		if (slave_ok > 0)
2627 			ret = 0;
2628 		break;
2629 	}
2630 	/* Promiscuous mode is propagated only to primary slave */
2631 	case BONDING_MODE_ACTIVE_BACKUP:
2632 	case BONDING_MODE_TLB:
2633 	case BONDING_MODE_ALB:
2634 	default:
2635 		/* Do not touch promisc when there cannot be primary ports */
2636 		if (internals->slave_count == 0)
2637 			break;
2638 		port_id = internals->current_primary_port;
2639 		ret = rte_eth_promiscuous_disable(port_id);
2640 		if (ret != 0)
2641 			RTE_BOND_LOG(ERR,
2642 				"Failed to disable promiscuous mode for port %u: %s",
2643 				port_id, rte_strerror(-ret));
2644 	}
2645 
2646 	return ret;
2647 }
2648 
2649 static int
2650 bond_ethdev_allmulticast_enable(struct rte_eth_dev *eth_dev)
2651 {
2652 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2653 	int i;
2654 	int ret = 0;
2655 	uint16_t port_id;
2656 
2657 	switch (internals->mode) {
2658 	/* allmulti mode is propagated to all slaves */
2659 	case BONDING_MODE_ROUND_ROBIN:
2660 	case BONDING_MODE_BALANCE:
2661 	case BONDING_MODE_BROADCAST:
2662 	case BONDING_MODE_8023AD: {
2663 		unsigned int slave_ok = 0;
2664 
2665 		for (i = 0; i < internals->slave_count; i++) {
2666 			port_id = internals->slaves[i].port_id;
2667 
2668 			ret = rte_eth_allmulticast_enable(port_id);
2669 			if (ret != 0)
2670 				RTE_BOND_LOG(ERR,
2671 					"Failed to enable allmulti mode for port %u: %s",
2672 					port_id, rte_strerror(-ret));
2673 			else
2674 				slave_ok++;
2675 		}
2676 		/*
2677 		 * Report success if operation is successful on at least
2678 		 * on one slave. Otherwise return last error code.
2679 		 */
2680 		if (slave_ok > 0)
2681 			ret = 0;
2682 		break;
2683 	}
2684 	/* allmulti mode is propagated only to primary slave */
2685 	case BONDING_MODE_ACTIVE_BACKUP:
2686 	case BONDING_MODE_TLB:
2687 	case BONDING_MODE_ALB:
2688 	default:
2689 		/* Do not touch allmulti when there cannot be primary ports */
2690 		if (internals->slave_count == 0)
2691 			break;
2692 		port_id = internals->current_primary_port;
2693 		ret = rte_eth_allmulticast_enable(port_id);
2694 		if (ret != 0)
2695 			RTE_BOND_LOG(ERR,
2696 				"Failed to enable allmulti mode for port %u: %s",
2697 				port_id, rte_strerror(-ret));
2698 	}
2699 
2700 	return ret;
2701 }
2702 
2703 static int
2704 bond_ethdev_allmulticast_disable(struct rte_eth_dev *eth_dev)
2705 {
2706 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2707 	int i;
2708 	int ret = 0;
2709 	uint16_t port_id;
2710 
2711 	switch (internals->mode) {
2712 	/* allmulti mode is propagated to all slaves */
2713 	case BONDING_MODE_ROUND_ROBIN:
2714 	case BONDING_MODE_BALANCE:
2715 	case BONDING_MODE_BROADCAST:
2716 	case BONDING_MODE_8023AD: {
2717 		unsigned int slave_ok = 0;
2718 
2719 		for (i = 0; i < internals->slave_count; i++) {
2720 			uint16_t port_id = internals->slaves[i].port_id;
2721 
2722 			if (internals->mode == BONDING_MODE_8023AD &&
2723 			    bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2724 					BOND_8023AD_FORCED_ALLMULTI)
2725 				continue;
2726 
2727 			ret = rte_eth_allmulticast_disable(port_id);
2728 			if (ret != 0)
2729 				RTE_BOND_LOG(ERR,
2730 					"Failed to disable allmulti mode for port %u: %s",
2731 					port_id, rte_strerror(-ret));
2732 			else
2733 				slave_ok++;
2734 		}
2735 		/*
2736 		 * Report success if operation is successful on at least
2737 		 * on one slave. Otherwise return last error code.
2738 		 */
2739 		if (slave_ok > 0)
2740 			ret = 0;
2741 		break;
2742 	}
2743 	/* allmulti mode is propagated only to primary slave */
2744 	case BONDING_MODE_ACTIVE_BACKUP:
2745 	case BONDING_MODE_TLB:
2746 	case BONDING_MODE_ALB:
2747 	default:
2748 		/* Do not touch allmulti when there cannot be primary ports */
2749 		if (internals->slave_count == 0)
2750 			break;
2751 		port_id = internals->current_primary_port;
2752 		ret = rte_eth_allmulticast_disable(port_id);
2753 		if (ret != 0)
2754 			RTE_BOND_LOG(ERR,
2755 				"Failed to disable allmulti mode for port %u: %s",
2756 				port_id, rte_strerror(-ret));
2757 	}
2758 
2759 	return ret;
2760 }
2761 
2762 static void
2763 bond_ethdev_delayed_lsc_propagation(void *arg)
2764 {
2765 	if (arg == NULL)
2766 		return;
2767 
2768 	_rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2769 			RTE_ETH_EVENT_INTR_LSC, NULL);
2770 }
2771 
2772 int
2773 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2774 		void *param, void *ret_param __rte_unused)
2775 {
2776 	struct rte_eth_dev *bonded_eth_dev;
2777 	struct bond_dev_private *internals;
2778 	struct rte_eth_link link;
2779 	int rc = -1;
2780 	int ret;
2781 
2782 	uint8_t lsc_flag = 0;
2783 	int valid_slave = 0;
2784 	uint16_t active_pos;
2785 	uint16_t i;
2786 
2787 	if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2788 		return rc;
2789 
2790 	bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2791 
2792 	if (check_for_bonded_ethdev(bonded_eth_dev))
2793 		return rc;
2794 
2795 	internals = bonded_eth_dev->data->dev_private;
2796 
2797 	/* If the device isn't started don't handle interrupts */
2798 	if (!bonded_eth_dev->data->dev_started)
2799 		return rc;
2800 
2801 	/* verify that port_id is a valid slave of bonded port */
2802 	for (i = 0; i < internals->slave_count; i++) {
2803 		if (internals->slaves[i].port_id == port_id) {
2804 			valid_slave = 1;
2805 			break;
2806 		}
2807 	}
2808 
2809 	if (!valid_slave)
2810 		return rc;
2811 
2812 	/* Synchronize lsc callback parallel calls either by real link event
2813 	 * from the slaves PMDs or by the bonding PMD itself.
2814 	 */
2815 	rte_spinlock_lock(&internals->lsc_lock);
2816 
2817 	/* Search for port in active port list */
2818 	active_pos = find_slave_by_id(internals->active_slaves,
2819 			internals->active_slave_count, port_id);
2820 
2821 	ret = rte_eth_link_get_nowait(port_id, &link);
2822 	if (ret < 0)
2823 		RTE_BOND_LOG(ERR, "Slave (port %u) link get failed", port_id);
2824 
2825 	if (ret == 0 && link.link_status) {
2826 		if (active_pos < internals->active_slave_count)
2827 			goto link_update;
2828 
2829 		/* check link state properties if bonded link is up*/
2830 		if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2831 			if (link_properties_valid(bonded_eth_dev, &link) != 0)
2832 				RTE_BOND_LOG(ERR, "Invalid link properties "
2833 					     "for slave %d in bonding mode %d",
2834 					     port_id, internals->mode);
2835 		} else {
2836 			/* inherit slave link properties */
2837 			link_properties_set(bonded_eth_dev, &link);
2838 		}
2839 
2840 		/* If no active slave ports then set this port to be
2841 		 * the primary port.
2842 		 */
2843 		if (internals->active_slave_count < 1) {
2844 			/* If first active slave, then change link status */
2845 			bonded_eth_dev->data->dev_link.link_status =
2846 								ETH_LINK_UP;
2847 			internals->current_primary_port = port_id;
2848 			lsc_flag = 1;
2849 
2850 			mac_address_slaves_update(bonded_eth_dev);
2851 		}
2852 
2853 		activate_slave(bonded_eth_dev, port_id);
2854 
2855 		/* If the user has defined the primary port then default to
2856 		 * using it.
2857 		 */
2858 		if (internals->user_defined_primary_port &&
2859 				internals->primary_port == port_id)
2860 			bond_ethdev_primary_set(internals, port_id);
2861 	} else {
2862 		if (active_pos == internals->active_slave_count)
2863 			goto link_update;
2864 
2865 		/* Remove from active slave list */
2866 		deactivate_slave(bonded_eth_dev, port_id);
2867 
2868 		if (internals->active_slave_count < 1)
2869 			lsc_flag = 1;
2870 
2871 		/* Update primary id, take first active slave from list or if none
2872 		 * available set to -1 */
2873 		if (port_id == internals->current_primary_port) {
2874 			if (internals->active_slave_count > 0)
2875 				bond_ethdev_primary_set(internals,
2876 						internals->active_slaves[0]);
2877 			else
2878 				internals->current_primary_port = internals->primary_port;
2879 			mac_address_slaves_update(bonded_eth_dev);
2880 		}
2881 	}
2882 
2883 link_update:
2884 	/**
2885 	 * Update bonded device link properties after any change to active
2886 	 * slaves
2887 	 */
2888 	bond_ethdev_link_update(bonded_eth_dev, 0);
2889 
2890 	if (lsc_flag) {
2891 		/* Cancel any possible outstanding interrupts if delays are enabled */
2892 		if (internals->link_up_delay_ms > 0 ||
2893 			internals->link_down_delay_ms > 0)
2894 			rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2895 					bonded_eth_dev);
2896 
2897 		if (bonded_eth_dev->data->dev_link.link_status) {
2898 			if (internals->link_up_delay_ms > 0)
2899 				rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2900 						bond_ethdev_delayed_lsc_propagation,
2901 						(void *)bonded_eth_dev);
2902 			else
2903 				_rte_eth_dev_callback_process(bonded_eth_dev,
2904 						RTE_ETH_EVENT_INTR_LSC,
2905 						NULL);
2906 
2907 		} else {
2908 			if (internals->link_down_delay_ms > 0)
2909 				rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2910 						bond_ethdev_delayed_lsc_propagation,
2911 						(void *)bonded_eth_dev);
2912 			else
2913 				_rte_eth_dev_callback_process(bonded_eth_dev,
2914 						RTE_ETH_EVENT_INTR_LSC,
2915 						NULL);
2916 		}
2917 	}
2918 
2919 	rte_spinlock_unlock(&internals->lsc_lock);
2920 
2921 	return rc;
2922 }
2923 
2924 static int
2925 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2926 		struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2927 {
2928 	unsigned i, j;
2929 	int result = 0;
2930 	int slave_reta_size;
2931 	unsigned reta_count;
2932 	struct bond_dev_private *internals = dev->data->dev_private;
2933 
2934 	if (reta_size != internals->reta_size)
2935 		return -EINVAL;
2936 
2937 	 /* Copy RETA table */
2938 	reta_count = (reta_size + RTE_RETA_GROUP_SIZE - 1) /
2939 			RTE_RETA_GROUP_SIZE;
2940 
2941 	for (i = 0; i < reta_count; i++) {
2942 		internals->reta_conf[i].mask = reta_conf[i].mask;
2943 		for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2944 			if ((reta_conf[i].mask >> j) & 0x01)
2945 				internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2946 	}
2947 
2948 	/* Fill rest of array */
2949 	for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2950 		memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2951 				sizeof(internals->reta_conf[0]) * reta_count);
2952 
2953 	/* Propagate RETA over slaves */
2954 	for (i = 0; i < internals->slave_count; i++) {
2955 		slave_reta_size = internals->slaves[i].reta_size;
2956 		result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2957 				&internals->reta_conf[0], slave_reta_size);
2958 		if (result < 0)
2959 			return result;
2960 	}
2961 
2962 	return 0;
2963 }
2964 
2965 static int
2966 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2967 		struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2968 {
2969 	int i, j;
2970 	struct bond_dev_private *internals = dev->data->dev_private;
2971 
2972 	if (reta_size != internals->reta_size)
2973 		return -EINVAL;
2974 
2975 	 /* Copy RETA table */
2976 	for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2977 		for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2978 			if ((reta_conf[i].mask >> j) & 0x01)
2979 				reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2980 
2981 	return 0;
2982 }
2983 
2984 static int
2985 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2986 		struct rte_eth_rss_conf *rss_conf)
2987 {
2988 	int i, result = 0;
2989 	struct bond_dev_private *internals = dev->data->dev_private;
2990 	struct rte_eth_rss_conf bond_rss_conf;
2991 
2992 	memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2993 
2994 	bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2995 
2996 	if (bond_rss_conf.rss_hf != 0)
2997 		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2998 
2999 	if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
3000 			sizeof(internals->rss_key)) {
3001 		if (bond_rss_conf.rss_key_len == 0)
3002 			bond_rss_conf.rss_key_len = 40;
3003 		internals->rss_key_len = bond_rss_conf.rss_key_len;
3004 		memcpy(internals->rss_key, bond_rss_conf.rss_key,
3005 				internals->rss_key_len);
3006 	}
3007 
3008 	for (i = 0; i < internals->slave_count; i++) {
3009 		result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
3010 				&bond_rss_conf);
3011 		if (result < 0)
3012 			return result;
3013 	}
3014 
3015 	return 0;
3016 }
3017 
3018 static int
3019 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
3020 		struct rte_eth_rss_conf *rss_conf)
3021 {
3022 	struct bond_dev_private *internals = dev->data->dev_private;
3023 
3024 	rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
3025 	rss_conf->rss_key_len = internals->rss_key_len;
3026 	if (rss_conf->rss_key)
3027 		memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
3028 
3029 	return 0;
3030 }
3031 
3032 static int
3033 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
3034 {
3035 	struct rte_eth_dev *slave_eth_dev;
3036 	struct bond_dev_private *internals = dev->data->dev_private;
3037 	int ret, i;
3038 
3039 	rte_spinlock_lock(&internals->lock);
3040 
3041 	for (i = 0; i < internals->slave_count; i++) {
3042 		slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3043 		if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
3044 			rte_spinlock_unlock(&internals->lock);
3045 			return -ENOTSUP;
3046 		}
3047 	}
3048 	for (i = 0; i < internals->slave_count; i++) {
3049 		ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
3050 		if (ret < 0) {
3051 			rte_spinlock_unlock(&internals->lock);
3052 			return ret;
3053 		}
3054 	}
3055 
3056 	rte_spinlock_unlock(&internals->lock);
3057 	return 0;
3058 }
3059 
3060 static int
3061 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
3062 			struct rte_ether_addr *addr)
3063 {
3064 	if (mac_address_set(dev, addr)) {
3065 		RTE_BOND_LOG(ERR, "Failed to update MAC address");
3066 		return -EINVAL;
3067 	}
3068 
3069 	return 0;
3070 }
3071 
3072 static int
3073 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
3074 		 enum rte_filter_type type, enum rte_filter_op op, void *arg)
3075 {
3076 	if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
3077 		*(const void **)arg = &bond_flow_ops;
3078 		return 0;
3079 	}
3080 	return -ENOTSUP;
3081 }
3082 
3083 static int
3084 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
3085 			struct rte_ether_addr *mac_addr,
3086 			__rte_unused uint32_t index, uint32_t vmdq)
3087 {
3088 	struct rte_eth_dev *slave_eth_dev;
3089 	struct bond_dev_private *internals = dev->data->dev_private;
3090 	int ret, i;
3091 
3092 	rte_spinlock_lock(&internals->lock);
3093 
3094 	for (i = 0; i < internals->slave_count; i++) {
3095 		slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3096 		if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
3097 			 *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
3098 			ret = -ENOTSUP;
3099 			goto end;
3100 		}
3101 	}
3102 
3103 	for (i = 0; i < internals->slave_count; i++) {
3104 		ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
3105 				mac_addr, vmdq);
3106 		if (ret < 0) {
3107 			/* rollback */
3108 			for (i--; i >= 0; i--)
3109 				rte_eth_dev_mac_addr_remove(
3110 					internals->slaves[i].port_id, mac_addr);
3111 			goto end;
3112 		}
3113 	}
3114 
3115 	ret = 0;
3116 end:
3117 	rte_spinlock_unlock(&internals->lock);
3118 	return ret;
3119 }
3120 
3121 static void
3122 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
3123 {
3124 	struct rte_eth_dev *slave_eth_dev;
3125 	struct bond_dev_private *internals = dev->data->dev_private;
3126 	int i;
3127 
3128 	rte_spinlock_lock(&internals->lock);
3129 
3130 	for (i = 0; i < internals->slave_count; i++) {
3131 		slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3132 		if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3133 			goto end;
3134 	}
3135 
3136 	struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
3137 
3138 	for (i = 0; i < internals->slave_count; i++)
3139 		rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3140 				mac_addr);
3141 
3142 end:
3143 	rte_spinlock_unlock(&internals->lock);
3144 }
3145 
3146 const struct eth_dev_ops default_dev_ops = {
3147 	.dev_start            = bond_ethdev_start,
3148 	.dev_stop             = bond_ethdev_stop,
3149 	.dev_close            = bond_ethdev_close,
3150 	.dev_configure        = bond_ethdev_configure,
3151 	.dev_infos_get        = bond_ethdev_info,
3152 	.vlan_filter_set      = bond_ethdev_vlan_filter_set,
3153 	.rx_queue_setup       = bond_ethdev_rx_queue_setup,
3154 	.tx_queue_setup       = bond_ethdev_tx_queue_setup,
3155 	.rx_queue_release     = bond_ethdev_rx_queue_release,
3156 	.tx_queue_release     = bond_ethdev_tx_queue_release,
3157 	.link_update          = bond_ethdev_link_update,
3158 	.stats_get            = bond_ethdev_stats_get,
3159 	.stats_reset          = bond_ethdev_stats_reset,
3160 	.promiscuous_enable   = bond_ethdev_promiscuous_enable,
3161 	.promiscuous_disable  = bond_ethdev_promiscuous_disable,
3162 	.allmulticast_enable  = bond_ethdev_allmulticast_enable,
3163 	.allmulticast_disable = bond_ethdev_allmulticast_disable,
3164 	.reta_update          = bond_ethdev_rss_reta_update,
3165 	.reta_query           = bond_ethdev_rss_reta_query,
3166 	.rss_hash_update      = bond_ethdev_rss_hash_update,
3167 	.rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3168 	.mtu_set              = bond_ethdev_mtu_set,
3169 	.mac_addr_set         = bond_ethdev_mac_address_set,
3170 	.mac_addr_add         = bond_ethdev_mac_addr_add,
3171 	.mac_addr_remove      = bond_ethdev_mac_addr_remove,
3172 	.filter_ctrl          = bond_filter_ctrl
3173 };
3174 
3175 static int
3176 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3177 {
3178 	const char *name = rte_vdev_device_name(dev);
3179 	uint8_t socket_id = dev->device.numa_node;
3180 	struct bond_dev_private *internals = NULL;
3181 	struct rte_eth_dev *eth_dev = NULL;
3182 	uint32_t vlan_filter_bmp_size;
3183 
3184 	/* now do all data allocation - for eth_dev structure, dummy pci driver
3185 	 * and internal (private) data
3186 	 */
3187 
3188 	/* reserve an ethdev entry */
3189 	eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3190 	if (eth_dev == NULL) {
3191 		RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3192 		goto err;
3193 	}
3194 
3195 	internals = eth_dev->data->dev_private;
3196 	eth_dev->data->nb_rx_queues = (uint16_t)1;
3197 	eth_dev->data->nb_tx_queues = (uint16_t)1;
3198 
3199 	/* Allocate memory for storing MAC addresses */
3200 	eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
3201 			BOND_MAX_MAC_ADDRS, 0, socket_id);
3202 	if (eth_dev->data->mac_addrs == NULL) {
3203 		RTE_BOND_LOG(ERR,
3204 			     "Failed to allocate %u bytes needed to store MAC addresses",
3205 			     RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3206 		goto err;
3207 	}
3208 
3209 	eth_dev->dev_ops = &default_dev_ops;
3210 	eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3211 
3212 	rte_spinlock_init(&internals->lock);
3213 	rte_spinlock_init(&internals->lsc_lock);
3214 
3215 	internals->port_id = eth_dev->data->port_id;
3216 	internals->mode = BONDING_MODE_INVALID;
3217 	internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3218 	internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3219 	internals->burst_xmit_hash = burst_xmit_l2_hash;
3220 	internals->user_defined_mac = 0;
3221 
3222 	internals->link_status_polling_enabled = 0;
3223 
3224 	internals->link_status_polling_interval_ms =
3225 		DEFAULT_POLLING_INTERVAL_10_MS;
3226 	internals->link_down_delay_ms = 0;
3227 	internals->link_up_delay_ms = 0;
3228 
3229 	internals->slave_count = 0;
3230 	internals->active_slave_count = 0;
3231 	internals->rx_offload_capa = 0;
3232 	internals->tx_offload_capa = 0;
3233 	internals->rx_queue_offload_capa = 0;
3234 	internals->tx_queue_offload_capa = 0;
3235 	internals->candidate_max_rx_pktlen = 0;
3236 	internals->max_rx_pktlen = 0;
3237 
3238 	/* Initially allow to choose any offload type */
3239 	internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3240 
3241 	memset(&internals->default_rxconf, 0,
3242 	       sizeof(internals->default_rxconf));
3243 	memset(&internals->default_txconf, 0,
3244 	       sizeof(internals->default_txconf));
3245 
3246 	memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3247 	memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3248 
3249 	memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3250 	memset(internals->slaves, 0, sizeof(internals->slaves));
3251 
3252 	TAILQ_INIT(&internals->flow_list);
3253 	internals->flow_isolated_valid = 0;
3254 
3255 	/* Set mode 4 default configuration */
3256 	bond_mode_8023ad_setup(eth_dev, NULL);
3257 	if (bond_ethdev_mode_set(eth_dev, mode)) {
3258 		RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3259 				 eth_dev->data->port_id, mode);
3260 		goto err;
3261 	}
3262 
3263 	vlan_filter_bmp_size =
3264 		rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3265 	internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3266 						   RTE_CACHE_LINE_SIZE);
3267 	if (internals->vlan_filter_bmpmem == NULL) {
3268 		RTE_BOND_LOG(ERR,
3269 			     "Failed to allocate vlan bitmap for bonded device %u",
3270 			     eth_dev->data->port_id);
3271 		goto err;
3272 	}
3273 
3274 	internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3275 			internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3276 	if (internals->vlan_filter_bmp == NULL) {
3277 		RTE_BOND_LOG(ERR,
3278 			     "Failed to init vlan bitmap for bonded device %u",
3279 			     eth_dev->data->port_id);
3280 		rte_free(internals->vlan_filter_bmpmem);
3281 		goto err;
3282 	}
3283 
3284 	return eth_dev->data->port_id;
3285 
3286 err:
3287 	rte_free(internals);
3288 	if (eth_dev != NULL)
3289 		eth_dev->data->dev_private = NULL;
3290 	rte_eth_dev_release_port(eth_dev);
3291 	return -1;
3292 }
3293 
3294 static int
3295 bond_probe(struct rte_vdev_device *dev)
3296 {
3297 	const char *name;
3298 	struct bond_dev_private *internals;
3299 	struct rte_kvargs *kvlist;
3300 	uint8_t bonding_mode, socket_id/*, agg_mode*/;
3301 	int  arg_count, port_id;
3302 	uint8_t agg_mode;
3303 	struct rte_eth_dev *eth_dev;
3304 
3305 	if (!dev)
3306 		return -EINVAL;
3307 
3308 	name = rte_vdev_device_name(dev);
3309 	RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3310 
3311 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3312 		eth_dev = rte_eth_dev_attach_secondary(name);
3313 		if (!eth_dev) {
3314 			RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3315 			return -1;
3316 		}
3317 		/* TODO: request info from primary to set up Rx and Tx */
3318 		eth_dev->dev_ops = &default_dev_ops;
3319 		eth_dev->device = &dev->device;
3320 		rte_eth_dev_probing_finish(eth_dev);
3321 		return 0;
3322 	}
3323 
3324 	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3325 		pmd_bond_init_valid_arguments);
3326 	if (kvlist == NULL)
3327 		return -1;
3328 
3329 	/* Parse link bonding mode */
3330 	if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3331 		if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3332 				&bond_ethdev_parse_slave_mode_kvarg,
3333 				&bonding_mode) != 0) {
3334 			RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3335 					name);
3336 			goto parse_error;
3337 		}
3338 	} else {
3339 		RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3340 				"device %s", name);
3341 		goto parse_error;
3342 	}
3343 
3344 	/* Parse socket id to create bonding device on */
3345 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3346 	if (arg_count == 1) {
3347 		if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3348 				&bond_ethdev_parse_socket_id_kvarg, &socket_id)
3349 				!= 0) {
3350 			RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3351 					"bonded device %s", name);
3352 			goto parse_error;
3353 		}
3354 	} else if (arg_count > 1) {
3355 		RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3356 				"bonded device %s", name);
3357 		goto parse_error;
3358 	} else {
3359 		socket_id = rte_socket_id();
3360 	}
3361 
3362 	dev->device.numa_node = socket_id;
3363 
3364 	/* Create link bonding eth device */
3365 	port_id = bond_alloc(dev, bonding_mode);
3366 	if (port_id < 0) {
3367 		RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3368 				"socket %u.",	name, bonding_mode, socket_id);
3369 		goto parse_error;
3370 	}
3371 	internals = rte_eth_devices[port_id].data->dev_private;
3372 	internals->kvlist = kvlist;
3373 
3374 	if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3375 		if (rte_kvargs_process(kvlist,
3376 				PMD_BOND_AGG_MODE_KVARG,
3377 				&bond_ethdev_parse_slave_agg_mode_kvarg,
3378 				&agg_mode) != 0) {
3379 			RTE_BOND_LOG(ERR,
3380 					"Failed to parse agg selection mode for bonded device %s",
3381 					name);
3382 			goto parse_error;
3383 		}
3384 
3385 		if (internals->mode == BONDING_MODE_8023AD)
3386 			internals->mode4.agg_selection = agg_mode;
3387 	} else {
3388 		internals->mode4.agg_selection = AGG_STABLE;
3389 	}
3390 
3391 	rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3392 	RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3393 			"socket %u.",	name, port_id, bonding_mode, socket_id);
3394 	return 0;
3395 
3396 parse_error:
3397 	rte_kvargs_free(kvlist);
3398 
3399 	return -1;
3400 }
3401 
3402 static int
3403 bond_remove(struct rte_vdev_device *dev)
3404 {
3405 	struct rte_eth_dev *eth_dev;
3406 	struct bond_dev_private *internals;
3407 	const char *name;
3408 
3409 	if (!dev)
3410 		return -EINVAL;
3411 
3412 	name = rte_vdev_device_name(dev);
3413 	RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3414 
3415 	/* now free all data allocation - for eth_dev structure,
3416 	 * dummy pci driver and internal (private) data
3417 	 */
3418 
3419 	/* find an ethdev entry */
3420 	eth_dev = rte_eth_dev_allocated(name);
3421 	if (eth_dev == NULL)
3422 		return -ENODEV;
3423 
3424 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3425 		return rte_eth_dev_release_port(eth_dev);
3426 
3427 	RTE_ASSERT(eth_dev->device == &dev->device);
3428 
3429 	internals = eth_dev->data->dev_private;
3430 	if (internals->slave_count != 0)
3431 		return -EBUSY;
3432 
3433 	if (eth_dev->data->dev_started == 1) {
3434 		bond_ethdev_stop(eth_dev);
3435 		bond_ethdev_close(eth_dev);
3436 	}
3437 
3438 	eth_dev->dev_ops = NULL;
3439 	eth_dev->rx_pkt_burst = NULL;
3440 	eth_dev->tx_pkt_burst = NULL;
3441 
3442 	internals = eth_dev->data->dev_private;
3443 	/* Try to release mempool used in mode6. If the bond
3444 	 * device is not mode6, free the NULL is not problem.
3445 	 */
3446 	rte_mempool_free(internals->mode6.mempool);
3447 	rte_bitmap_free(internals->vlan_filter_bmp);
3448 	rte_free(internals->vlan_filter_bmpmem);
3449 
3450 	rte_eth_dev_release_port(eth_dev);
3451 
3452 	return 0;
3453 }
3454 
3455 /* this part will resolve the slave portids after all the other pdev and vdev
3456  * have been allocated */
3457 static int
3458 bond_ethdev_configure(struct rte_eth_dev *dev)
3459 {
3460 	const char *name = dev->device->name;
3461 	struct bond_dev_private *internals = dev->data->dev_private;
3462 	struct rte_kvargs *kvlist = internals->kvlist;
3463 	int arg_count;
3464 	uint16_t port_id = dev - rte_eth_devices;
3465 	uint8_t agg_mode;
3466 
3467 	static const uint8_t default_rss_key[40] = {
3468 		0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3469 		0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3470 		0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3471 		0xBE, 0xAC, 0x01, 0xFA
3472 	};
3473 
3474 	unsigned i, j;
3475 
3476 	/*
3477 	 * If RSS is enabled, fill table with default values and
3478 	 * set key to the the value specified in port RSS configuration.
3479 	 * Fall back to default RSS key if the key is not specified
3480 	 */
3481 	if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3482 		if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3483 			internals->rss_key_len =
3484 				dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3485 			memcpy(internals->rss_key,
3486 			       dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3487 			       internals->rss_key_len);
3488 		} else {
3489 			internals->rss_key_len = sizeof(default_rss_key);
3490 			memcpy(internals->rss_key, default_rss_key,
3491 			       internals->rss_key_len);
3492 		}
3493 
3494 		for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3495 			internals->reta_conf[i].mask = ~0LL;
3496 			for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3497 				internals->reta_conf[i].reta[j] =
3498 						(i * RTE_RETA_GROUP_SIZE + j) %
3499 						dev->data->nb_rx_queues;
3500 		}
3501 	}
3502 
3503 	/* set the max_rx_pktlen */
3504 	internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3505 
3506 	/*
3507 	 * if no kvlist, it means that this bonded device has been created
3508 	 * through the bonding api.
3509 	 */
3510 	if (!kvlist)
3511 		return 0;
3512 
3513 	/* Parse MAC address for bonded device */
3514 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3515 	if (arg_count == 1) {
3516 		struct rte_ether_addr bond_mac;
3517 
3518 		if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3519 				       &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3520 			RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3521 				     name);
3522 			return -1;
3523 		}
3524 
3525 		/* Set MAC address */
3526 		if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3527 			RTE_BOND_LOG(ERR,
3528 				     "Failed to set mac address on bonded device %s",
3529 				     name);
3530 			return -1;
3531 		}
3532 	} else if (arg_count > 1) {
3533 		RTE_BOND_LOG(ERR,
3534 			     "MAC address can be specified only once for bonded device %s",
3535 			     name);
3536 		return -1;
3537 	}
3538 
3539 	/* Parse/set balance mode transmit policy */
3540 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3541 	if (arg_count == 1) {
3542 		uint8_t xmit_policy;
3543 
3544 		if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3545 				       &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3546 		    0) {
3547 			RTE_BOND_LOG(INFO,
3548 				     "Invalid xmit policy specified for bonded device %s",
3549 				     name);
3550 			return -1;
3551 		}
3552 
3553 		/* Set balance mode transmit policy*/
3554 		if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3555 			RTE_BOND_LOG(ERR,
3556 				     "Failed to set balance xmit policy on bonded device %s",
3557 				     name);
3558 			return -1;
3559 		}
3560 	} else if (arg_count > 1) {
3561 		RTE_BOND_LOG(ERR,
3562 			     "Transmit policy can be specified only once for bonded device %s",
3563 			     name);
3564 		return -1;
3565 	}
3566 
3567 	if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3568 		if (rte_kvargs_process(kvlist,
3569 				       PMD_BOND_AGG_MODE_KVARG,
3570 				       &bond_ethdev_parse_slave_agg_mode_kvarg,
3571 				       &agg_mode) != 0) {
3572 			RTE_BOND_LOG(ERR,
3573 				     "Failed to parse agg selection mode for bonded device %s",
3574 				     name);
3575 		}
3576 		if (internals->mode == BONDING_MODE_8023AD) {
3577 			int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3578 					agg_mode);
3579 			if (ret < 0) {
3580 				RTE_BOND_LOG(ERR,
3581 					"Invalid args for agg selection set for bonded device %s",
3582 					name);
3583 				return -1;
3584 			}
3585 		}
3586 	}
3587 
3588 	/* Parse/add slave ports to bonded device */
3589 	if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3590 		struct bond_ethdev_slave_ports slave_ports;
3591 		unsigned i;
3592 
3593 		memset(&slave_ports, 0, sizeof(slave_ports));
3594 
3595 		if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3596 				       &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3597 			RTE_BOND_LOG(ERR,
3598 				     "Failed to parse slave ports for bonded device %s",
3599 				     name);
3600 			return -1;
3601 		}
3602 
3603 		for (i = 0; i < slave_ports.slave_count; i++) {
3604 			if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3605 				RTE_BOND_LOG(ERR,
3606 					     "Failed to add port %d as slave to bonded device %s",
3607 					     slave_ports.slaves[i], name);
3608 			}
3609 		}
3610 
3611 	} else {
3612 		RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3613 		return -1;
3614 	}
3615 
3616 	/* Parse/set primary slave port id*/
3617 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3618 	if (arg_count == 1) {
3619 		uint16_t primary_slave_port_id;
3620 
3621 		if (rte_kvargs_process(kvlist,
3622 				       PMD_BOND_PRIMARY_SLAVE_KVARG,
3623 				       &bond_ethdev_parse_primary_slave_port_id_kvarg,
3624 				       &primary_slave_port_id) < 0) {
3625 			RTE_BOND_LOG(INFO,
3626 				     "Invalid primary slave port id specified for bonded device %s",
3627 				     name);
3628 			return -1;
3629 		}
3630 
3631 		/* Set balance mode transmit policy*/
3632 		if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3633 		    != 0) {
3634 			RTE_BOND_LOG(ERR,
3635 				     "Failed to set primary slave port %d on bonded device %s",
3636 				     primary_slave_port_id, name);
3637 			return -1;
3638 		}
3639 	} else if (arg_count > 1) {
3640 		RTE_BOND_LOG(INFO,
3641 			     "Primary slave can be specified only once for bonded device %s",
3642 			     name);
3643 		return -1;
3644 	}
3645 
3646 	/* Parse link status monitor polling interval */
3647 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3648 	if (arg_count == 1) {
3649 		uint32_t lsc_poll_interval_ms;
3650 
3651 		if (rte_kvargs_process(kvlist,
3652 				       PMD_BOND_LSC_POLL_PERIOD_KVARG,
3653 				       &bond_ethdev_parse_time_ms_kvarg,
3654 				       &lsc_poll_interval_ms) < 0) {
3655 			RTE_BOND_LOG(INFO,
3656 				     "Invalid lsc polling interval value specified for bonded"
3657 				     " device %s", name);
3658 			return -1;
3659 		}
3660 
3661 		if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3662 		    != 0) {
3663 			RTE_BOND_LOG(ERR,
3664 				     "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3665 				     lsc_poll_interval_ms, name);
3666 			return -1;
3667 		}
3668 	} else if (arg_count > 1) {
3669 		RTE_BOND_LOG(INFO,
3670 			     "LSC polling interval can be specified only once for bonded"
3671 			     " device %s", name);
3672 		return -1;
3673 	}
3674 
3675 	/* Parse link up interrupt propagation delay */
3676 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3677 	if (arg_count == 1) {
3678 		uint32_t link_up_delay_ms;
3679 
3680 		if (rte_kvargs_process(kvlist,
3681 				       PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3682 				       &bond_ethdev_parse_time_ms_kvarg,
3683 				       &link_up_delay_ms) < 0) {
3684 			RTE_BOND_LOG(INFO,
3685 				     "Invalid link up propagation delay value specified for"
3686 				     " bonded device %s", name);
3687 			return -1;
3688 		}
3689 
3690 		/* Set balance mode transmit policy*/
3691 		if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3692 		    != 0) {
3693 			RTE_BOND_LOG(ERR,
3694 				     "Failed to set link up propagation delay (%u ms) on bonded"
3695 				     " device %s", link_up_delay_ms, name);
3696 			return -1;
3697 		}
3698 	} else if (arg_count > 1) {
3699 		RTE_BOND_LOG(INFO,
3700 			     "Link up propagation delay can be specified only once for"
3701 			     " bonded device %s", name);
3702 		return -1;
3703 	}
3704 
3705 	/* Parse link down interrupt propagation delay */
3706 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3707 	if (arg_count == 1) {
3708 		uint32_t link_down_delay_ms;
3709 
3710 		if (rte_kvargs_process(kvlist,
3711 				       PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3712 				       &bond_ethdev_parse_time_ms_kvarg,
3713 				       &link_down_delay_ms) < 0) {
3714 			RTE_BOND_LOG(INFO,
3715 				     "Invalid link down propagation delay value specified for"
3716 				     " bonded device %s", name);
3717 			return -1;
3718 		}
3719 
3720 		/* Set balance mode transmit policy*/
3721 		if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3722 		    != 0) {
3723 			RTE_BOND_LOG(ERR,
3724 				     "Failed to set link down propagation delay (%u ms) on bonded device %s",
3725 				     link_down_delay_ms, name);
3726 			return -1;
3727 		}
3728 	} else if (arg_count > 1) {
3729 		RTE_BOND_LOG(INFO,
3730 			     "Link down propagation delay can be specified only once for  bonded device %s",
3731 			     name);
3732 		return -1;
3733 	}
3734 
3735 	return 0;
3736 }
3737 
3738 struct rte_vdev_driver pmd_bond_drv = {
3739 	.probe = bond_probe,
3740 	.remove = bond_remove,
3741 };
3742 
3743 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3744 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3745 
3746 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3747 	"slave=<ifc> "
3748 	"primary=<ifc> "
3749 	"mode=[0-6] "
3750 	"xmit_policy=[l2 | l23 | l34] "
3751 	"agg_mode=[count | stable | bandwidth] "
3752 	"socket_id=<int> "
3753 	"mac=<mac addr> "
3754 	"lsc_poll_period_ms=<int> "
3755 	"up_delay=<int> "
3756 	"down_delay=<int>");
3757 
3758 RTE_LOG_REGISTER(bond_logtype, pmd.net.bond, NOTICE);
3759