xref: /dpdk/drivers/net/bonding/rte_eth_bond_pmd.c (revision c9902a15bd005b6d4fe072cf7b60fe4ee679155f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <stdbool.h>
6 #include <netinet/in.h>
7 
8 #include <rte_mbuf.h>
9 #include <rte_malloc.h>
10 #include <ethdev_driver.h>
11 #include <ethdev_vdev.h>
12 #include <rte_tcp.h>
13 #include <rte_udp.h>
14 #include <rte_ip.h>
15 #include <rte_ip_frag.h>
16 #include <rte_devargs.h>
17 #include <rte_kvargs.h>
18 #include <rte_bus_vdev.h>
19 #include <rte_alarm.h>
20 #include <rte_cycles.h>
21 #include <rte_string_fns.h>
22 
23 #include "rte_eth_bond.h"
24 #include "eth_bond_private.h"
25 #include "eth_bond_8023ad_private.h"
26 
27 #define REORDER_PERIOD_MS 10
28 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
29 #define BOND_MAX_MAC_ADDRS 16
30 
31 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
32 
33 /* Table for statistics in mode 5 TLB */
34 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
35 
36 static inline size_t
37 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
38 {
39 	size_t vlan_offset = 0;
40 
41 	if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
42 		rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
43 		struct rte_vlan_hdr *vlan_hdr =
44 			(struct rte_vlan_hdr *)(eth_hdr + 1);
45 
46 		vlan_offset = sizeof(struct rte_vlan_hdr);
47 		*proto = vlan_hdr->eth_proto;
48 
49 		if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
50 			vlan_hdr = vlan_hdr + 1;
51 			*proto = vlan_hdr->eth_proto;
52 			vlan_offset += sizeof(struct rte_vlan_hdr);
53 		}
54 	}
55 	return vlan_offset;
56 }
57 
58 static uint16_t
59 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
60 {
61 	struct bond_dev_private *internals;
62 
63 	uint16_t num_rx_total = 0;
64 	uint16_t slave_count;
65 	uint16_t active_slave;
66 	int i;
67 
68 	/* Cast to structure, containing bonded device's port id and queue id */
69 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
70 	internals = bd_rx_q->dev_private;
71 	slave_count = internals->active_slave_count;
72 	active_slave = bd_rx_q->active_slave;
73 
74 	for (i = 0; i < slave_count && nb_pkts; i++) {
75 		uint16_t num_rx_slave;
76 
77 		/* Offset of pointer to *bufs increases as packets are received
78 		 * from other slaves */
79 		num_rx_slave =
80 			rte_eth_rx_burst(internals->active_slaves[active_slave],
81 					 bd_rx_q->queue_id,
82 					 bufs + num_rx_total, nb_pkts);
83 		num_rx_total += num_rx_slave;
84 		nb_pkts -= num_rx_slave;
85 		if (++active_slave == slave_count)
86 			active_slave = 0;
87 	}
88 
89 	if (++bd_rx_q->active_slave >= slave_count)
90 		bd_rx_q->active_slave = 0;
91 	return num_rx_total;
92 }
93 
94 static uint16_t
95 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
96 		uint16_t nb_pkts)
97 {
98 	struct bond_dev_private *internals;
99 
100 	/* Cast to structure, containing bonded device's port id and queue id */
101 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
102 
103 	internals = bd_rx_q->dev_private;
104 
105 	return rte_eth_rx_burst(internals->current_primary_port,
106 			bd_rx_q->queue_id, bufs, nb_pkts);
107 }
108 
109 static inline uint8_t
110 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
111 {
112 	const uint16_t ether_type_slow_be =
113 		rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
114 
115 	return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
116 		(ethertype == ether_type_slow_be &&
117 		(subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
118 }
119 
120 /*****************************************************************************
121  * Flow director's setup for mode 4 optimization
122  */
123 
124 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
125 	.dst.addr_bytes = { 0 },
126 	.src.addr_bytes = { 0 },
127 	.type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
128 };
129 
130 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
131 	.dst.addr_bytes = { 0 },
132 	.src.addr_bytes = { 0 },
133 	.type = 0xFFFF,
134 };
135 
136 static struct rte_flow_item flow_item_8023ad[] = {
137 	{
138 		.type = RTE_FLOW_ITEM_TYPE_ETH,
139 		.spec = &flow_item_eth_type_8023ad,
140 		.last = NULL,
141 		.mask = &flow_item_eth_mask_type_8023ad,
142 	},
143 	{
144 		.type = RTE_FLOW_ITEM_TYPE_END,
145 		.spec = NULL,
146 		.last = NULL,
147 		.mask = NULL,
148 	}
149 };
150 
151 const struct rte_flow_attr flow_attr_8023ad = {
152 	.group = 0,
153 	.priority = 0,
154 	.ingress = 1,
155 	.egress = 0,
156 	.reserved = 0,
157 };
158 
159 int
160 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
161 		uint16_t slave_port) {
162 	struct rte_eth_dev_info slave_info;
163 	struct rte_flow_error error;
164 	struct bond_dev_private *internals = bond_dev->data->dev_private;
165 
166 	const struct rte_flow_action_queue lacp_queue_conf = {
167 		.index = 0,
168 	};
169 
170 	const struct rte_flow_action actions[] = {
171 		{
172 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
173 			.conf = &lacp_queue_conf
174 		},
175 		{
176 			.type = RTE_FLOW_ACTION_TYPE_END,
177 		}
178 	};
179 
180 	int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
181 			flow_item_8023ad, actions, &error);
182 	if (ret < 0) {
183 		RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
184 				__func__, error.message, slave_port,
185 				internals->mode4.dedicated_queues.rx_qid);
186 		return -1;
187 	}
188 
189 	ret = rte_eth_dev_info_get(slave_port, &slave_info);
190 	if (ret != 0) {
191 		RTE_BOND_LOG(ERR,
192 			"%s: Error during getting device (port %u) info: %s\n",
193 			__func__, slave_port, strerror(-ret));
194 
195 		return ret;
196 	}
197 
198 	if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
199 			slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
200 		RTE_BOND_LOG(ERR,
201 			"%s: Slave %d capabilities doesn't allow to allocate additional queues",
202 			__func__, slave_port);
203 		return -1;
204 	}
205 
206 	return 0;
207 }
208 
209 int
210 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
211 	struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
212 	struct bond_dev_private *internals = bond_dev->data->dev_private;
213 	struct rte_eth_dev_info bond_info;
214 	uint16_t idx;
215 	int ret;
216 
217 	/* Verify if all slaves in bonding supports flow director and */
218 	if (internals->slave_count > 0) {
219 		ret = rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
220 		if (ret != 0) {
221 			RTE_BOND_LOG(ERR,
222 				"%s: Error during getting device (port %u) info: %s\n",
223 				__func__, bond_dev->data->port_id,
224 				strerror(-ret));
225 
226 			return ret;
227 		}
228 
229 		internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
230 		internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
231 
232 		for (idx = 0; idx < internals->slave_count; idx++) {
233 			if (bond_ethdev_8023ad_flow_verify(bond_dev,
234 					internals->slaves[idx].port_id) != 0)
235 				return -1;
236 		}
237 	}
238 
239 	return 0;
240 }
241 
242 int
243 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
244 
245 	struct rte_flow_error error;
246 	struct bond_dev_private *internals = bond_dev->data->dev_private;
247 	struct rte_flow_action_queue lacp_queue_conf = {
248 		.index = internals->mode4.dedicated_queues.rx_qid,
249 	};
250 
251 	const struct rte_flow_action actions[] = {
252 		{
253 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
254 			.conf = &lacp_queue_conf
255 		},
256 		{
257 			.type = RTE_FLOW_ACTION_TYPE_END,
258 		}
259 	};
260 
261 	internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
262 			&flow_attr_8023ad, flow_item_8023ad, actions, &error);
263 	if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
264 		RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
265 				"(slave_port=%d queue_id=%d)",
266 				error.message, slave_port,
267 				internals->mode4.dedicated_queues.rx_qid);
268 		return -1;
269 	}
270 
271 	return 0;
272 }
273 
274 static inline uint16_t
275 rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts,
276 		bool dedicated_rxq)
277 {
278 	/* Cast to structure, containing bonded device's port id and queue id */
279 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
280 	struct bond_dev_private *internals = bd_rx_q->dev_private;
281 	struct rte_eth_dev *bonded_eth_dev =
282 					&rte_eth_devices[internals->port_id];
283 	struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
284 	struct rte_ether_hdr *hdr;
285 
286 	const uint16_t ether_type_slow_be =
287 		rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
288 	uint16_t num_rx_total = 0;	/* Total number of received packets */
289 	uint16_t slaves[RTE_MAX_ETHPORTS];
290 	uint16_t slave_count, idx;
291 
292 	uint8_t collecting;  /* current slave collecting status */
293 	const uint8_t promisc = rte_eth_promiscuous_get(internals->port_id);
294 	const uint8_t allmulti = rte_eth_allmulticast_get(internals->port_id);
295 	uint8_t subtype;
296 	uint16_t i;
297 	uint16_t j;
298 	uint16_t k;
299 
300 	/* Copy slave list to protect against slave up/down changes during tx
301 	 * bursting */
302 	slave_count = internals->active_slave_count;
303 	memcpy(slaves, internals->active_slaves,
304 			sizeof(internals->active_slaves[0]) * slave_count);
305 
306 	idx = bd_rx_q->active_slave;
307 	if (idx >= slave_count) {
308 		bd_rx_q->active_slave = 0;
309 		idx = 0;
310 	}
311 	for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
312 		j = num_rx_total;
313 		collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
314 					 COLLECTING);
315 
316 		/* Read packets from this slave */
317 		num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
318 				&bufs[num_rx_total], nb_pkts - num_rx_total);
319 
320 		for (k = j; k < 2 && k < num_rx_total; k++)
321 			rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
322 
323 		/* Handle slow protocol packets. */
324 		while (j < num_rx_total) {
325 			if (j + 3 < num_rx_total)
326 				rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
327 
328 			hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
329 			subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
330 
331 			/* Remove packet from array if:
332 			 * - it is slow packet but no dedicated rxq is present,
333 			 * - slave is not in collecting state,
334 			 * - bonding interface is not in promiscuous mode:
335 			 *   - packet is unicast and address does not match,
336 			 *   - packet is multicast and bonding interface
337 			 *     is not in allmulti,
338 			 */
339 			if (unlikely(
340 				(!dedicated_rxq &&
341 				 is_lacp_packets(hdr->ether_type, subtype,
342 						 bufs[j])) ||
343 				!collecting ||
344 				(!promisc &&
345 				 ((rte_is_unicast_ether_addr(&hdr->d_addr) &&
346 				   !rte_is_same_ether_addr(bond_mac,
347 						       &hdr->d_addr)) ||
348 				  (!allmulti &&
349 				   rte_is_multicast_ether_addr(&hdr->d_addr)))))) {
350 
351 				if (hdr->ether_type == ether_type_slow_be) {
352 					bond_mode_8023ad_handle_slow_pkt(
353 					    internals, slaves[idx], bufs[j]);
354 				} else
355 					rte_pktmbuf_free(bufs[j]);
356 
357 				/* Packet is managed by mode 4 or dropped, shift the array */
358 				num_rx_total--;
359 				if (j < num_rx_total) {
360 					memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
361 						(num_rx_total - j));
362 				}
363 			} else
364 				j++;
365 		}
366 		if (unlikely(++idx == slave_count))
367 			idx = 0;
368 	}
369 
370 	if (++bd_rx_q->active_slave >= slave_count)
371 		bd_rx_q->active_slave = 0;
372 
373 	return num_rx_total;
374 }
375 
376 static uint16_t
377 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
378 		uint16_t nb_pkts)
379 {
380 	return rx_burst_8023ad(queue, bufs, nb_pkts, false);
381 }
382 
383 static uint16_t
384 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
385 		uint16_t nb_pkts)
386 {
387 	return rx_burst_8023ad(queue, bufs, nb_pkts, true);
388 }
389 
390 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
391 uint32_t burstnumberRX;
392 uint32_t burstnumberTX;
393 
394 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
395 
396 static void
397 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
398 {
399 	switch (arp_op) {
400 	case RTE_ARP_OP_REQUEST:
401 		strlcpy(buf, "ARP Request", buf_len);
402 		return;
403 	case RTE_ARP_OP_REPLY:
404 		strlcpy(buf, "ARP Reply", buf_len);
405 		return;
406 	case RTE_ARP_OP_REVREQUEST:
407 		strlcpy(buf, "Reverse ARP Request", buf_len);
408 		return;
409 	case RTE_ARP_OP_REVREPLY:
410 		strlcpy(buf, "Reverse ARP Reply", buf_len);
411 		return;
412 	case RTE_ARP_OP_INVREQUEST:
413 		strlcpy(buf, "Peer Identify Request", buf_len);
414 		return;
415 	case RTE_ARP_OP_INVREPLY:
416 		strlcpy(buf, "Peer Identify Reply", buf_len);
417 		return;
418 	default:
419 		break;
420 	}
421 	strlcpy(buf, "Unknown", buf_len);
422 	return;
423 }
424 #endif
425 #define MaxIPv4String	16
426 static void
427 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
428 {
429 	uint32_t ipv4_addr;
430 
431 	ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
432 	snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
433 		(ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
434 		ipv4_addr & 0xFF);
435 }
436 
437 #define MAX_CLIENTS_NUMBER	128
438 uint8_t active_clients;
439 struct client_stats_t {
440 	uint16_t port;
441 	uint32_t ipv4_addr;
442 	uint32_t ipv4_rx_packets;
443 	uint32_t ipv4_tx_packets;
444 };
445 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
446 
447 static void
448 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
449 {
450 	int i = 0;
451 
452 	for (; i < MAX_CLIENTS_NUMBER; i++)	{
453 		if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))	{
454 			/* Just update RX packets number for this client */
455 			if (TXorRXindicator == &burstnumberRX)
456 				client_stats[i].ipv4_rx_packets++;
457 			else
458 				client_stats[i].ipv4_tx_packets++;
459 			return;
460 		}
461 	}
462 	/* We have a new client. Insert him to the table, and increment stats */
463 	if (TXorRXindicator == &burstnumberRX)
464 		client_stats[active_clients].ipv4_rx_packets++;
465 	else
466 		client_stats[active_clients].ipv4_tx_packets++;
467 	client_stats[active_clients].ipv4_addr = addr;
468 	client_stats[active_clients].port = port;
469 	active_clients++;
470 
471 }
472 
473 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
474 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
475 	rte_log(RTE_LOG_DEBUG, bond_logtype,				\
476 		"%s port:%d SrcMAC:" RTE_ETHER_ADDR_PRT_FMT " SrcIP:%s " \
477 		"DstMAC:" RTE_ETHER_ADDR_PRT_FMT " DstIP:%s %s %d\n", \
478 		info,							\
479 		port,							\
480 		RTE_ETHER_ADDR_BYTES(&eth_h->s_addr),                  \
481 		src_ip,							\
482 		RTE_ETHER_ADDR_BYTES(&eth_h->d_addr),                  \
483 		dst_ip,							\
484 		arp_op, ++burstnumber)
485 #endif
486 
487 static void
488 mode6_debug(const char __rte_unused *info,
489 	struct rte_ether_hdr *eth_h, uint16_t port,
490 	uint32_t __rte_unused *burstnumber)
491 {
492 	struct rte_ipv4_hdr *ipv4_h;
493 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
494 	struct rte_arp_hdr *arp_h;
495 	char dst_ip[16];
496 	char ArpOp[24];
497 	char buf[16];
498 #endif
499 	char src_ip[16];
500 
501 	uint16_t ether_type = eth_h->ether_type;
502 	uint16_t offset = get_vlan_offset(eth_h, &ether_type);
503 
504 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
505 	strlcpy(buf, info, 16);
506 #endif
507 
508 	if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
509 		ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
510 		ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
511 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
512 		ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
513 		MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
514 #endif
515 		update_client_stats(ipv4_h->src_addr, port, burstnumber);
516 	}
517 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
518 	else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
519 		arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
520 		ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
521 		ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
522 		arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
523 				ArpOp, sizeof(ArpOp));
524 		MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
525 	}
526 #endif
527 }
528 #endif
529 
530 static uint16_t
531 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
532 {
533 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
534 	struct bond_dev_private *internals = bd_rx_q->dev_private;
535 	struct rte_ether_hdr *eth_h;
536 	uint16_t ether_type, offset;
537 	uint16_t nb_recv_pkts;
538 	int i;
539 
540 	nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
541 
542 	for (i = 0; i < nb_recv_pkts; i++) {
543 		eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
544 		ether_type = eth_h->ether_type;
545 		offset = get_vlan_offset(eth_h, &ether_type);
546 
547 		if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
548 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
549 			mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
550 #endif
551 			bond_mode_alb_arp_recv(eth_h, offset, internals);
552 		}
553 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
554 		else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
555 			mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
556 #endif
557 	}
558 
559 	return nb_recv_pkts;
560 }
561 
562 static uint16_t
563 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
564 		uint16_t nb_pkts)
565 {
566 	struct bond_dev_private *internals;
567 	struct bond_tx_queue *bd_tx_q;
568 
569 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
570 	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
571 
572 	uint16_t num_of_slaves;
573 	uint16_t slaves[RTE_MAX_ETHPORTS];
574 
575 	uint16_t num_tx_total = 0, num_tx_slave;
576 
577 	static int slave_idx = 0;
578 	int i, cslave_idx = 0, tx_fail_total = 0;
579 
580 	bd_tx_q = (struct bond_tx_queue *)queue;
581 	internals = bd_tx_q->dev_private;
582 
583 	/* Copy slave list to protect against slave up/down changes during tx
584 	 * bursting */
585 	num_of_slaves = internals->active_slave_count;
586 	memcpy(slaves, internals->active_slaves,
587 			sizeof(internals->active_slaves[0]) * num_of_slaves);
588 
589 	if (num_of_slaves < 1)
590 		return num_tx_total;
591 
592 	/* Populate slaves mbuf with which packets are to be sent on it  */
593 	for (i = 0; i < nb_pkts; i++) {
594 		cslave_idx = (slave_idx + i) % num_of_slaves;
595 		slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
596 	}
597 
598 	/* increment current slave index so the next call to tx burst starts on the
599 	 * next slave */
600 	slave_idx = ++cslave_idx;
601 
602 	/* Send packet burst on each slave device */
603 	for (i = 0; i < num_of_slaves; i++) {
604 		if (slave_nb_pkts[i] > 0) {
605 			num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
606 					slave_bufs[i], slave_nb_pkts[i]);
607 
608 			/* if tx burst fails move packets to end of bufs */
609 			if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
610 				int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
611 
612 				tx_fail_total += tx_fail_slave;
613 
614 				memcpy(&bufs[nb_pkts - tx_fail_total],
615 				       &slave_bufs[i][num_tx_slave],
616 				       tx_fail_slave * sizeof(bufs[0]));
617 			}
618 			num_tx_total += num_tx_slave;
619 		}
620 	}
621 
622 	return num_tx_total;
623 }
624 
625 static uint16_t
626 bond_ethdev_tx_burst_active_backup(void *queue,
627 		struct rte_mbuf **bufs, uint16_t nb_pkts)
628 {
629 	struct bond_dev_private *internals;
630 	struct bond_tx_queue *bd_tx_q;
631 
632 	bd_tx_q = (struct bond_tx_queue *)queue;
633 	internals = bd_tx_q->dev_private;
634 
635 	if (internals->active_slave_count < 1)
636 		return 0;
637 
638 	return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
639 			bufs, nb_pkts);
640 }
641 
642 static inline uint16_t
643 ether_hash(struct rte_ether_hdr *eth_hdr)
644 {
645 	unaligned_uint16_t *word_src_addr =
646 		(unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
647 	unaligned_uint16_t *word_dst_addr =
648 		(unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
649 
650 	return (word_src_addr[0] ^ word_dst_addr[0]) ^
651 			(word_src_addr[1] ^ word_dst_addr[1]) ^
652 			(word_src_addr[2] ^ word_dst_addr[2]);
653 }
654 
655 static inline uint32_t
656 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
657 {
658 	return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
659 }
660 
661 static inline uint32_t
662 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
663 {
664 	unaligned_uint32_t *word_src_addr =
665 		(unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
666 	unaligned_uint32_t *word_dst_addr =
667 		(unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
668 
669 	return (word_src_addr[0] ^ word_dst_addr[0]) ^
670 			(word_src_addr[1] ^ word_dst_addr[1]) ^
671 			(word_src_addr[2] ^ word_dst_addr[2]) ^
672 			(word_src_addr[3] ^ word_dst_addr[3]);
673 }
674 
675 
676 void
677 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
678 		uint16_t slave_count, uint16_t *slaves)
679 {
680 	struct rte_ether_hdr *eth_hdr;
681 	uint32_t hash;
682 	int i;
683 
684 	for (i = 0; i < nb_pkts; i++) {
685 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
686 
687 		hash = ether_hash(eth_hdr);
688 
689 		slaves[i] = (hash ^= hash >> 8) % slave_count;
690 	}
691 }
692 
693 void
694 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
695 		uint16_t slave_count, uint16_t *slaves)
696 {
697 	uint16_t i;
698 	struct rte_ether_hdr *eth_hdr;
699 	uint16_t proto;
700 	size_t vlan_offset;
701 	uint32_t hash, l3hash;
702 
703 	for (i = 0; i < nb_pkts; i++) {
704 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
705 		l3hash = 0;
706 
707 		proto = eth_hdr->ether_type;
708 		hash = ether_hash(eth_hdr);
709 
710 		vlan_offset = get_vlan_offset(eth_hdr, &proto);
711 
712 		if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
713 			struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
714 					((char *)(eth_hdr + 1) + vlan_offset);
715 			l3hash = ipv4_hash(ipv4_hdr);
716 
717 		} else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
718 			struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
719 					((char *)(eth_hdr + 1) + vlan_offset);
720 			l3hash = ipv6_hash(ipv6_hdr);
721 		}
722 
723 		hash = hash ^ l3hash;
724 		hash ^= hash >> 16;
725 		hash ^= hash >> 8;
726 
727 		slaves[i] = hash % slave_count;
728 	}
729 }
730 
731 void
732 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
733 		uint16_t slave_count, uint16_t *slaves)
734 {
735 	struct rte_ether_hdr *eth_hdr;
736 	uint16_t proto;
737 	size_t vlan_offset;
738 	int i;
739 
740 	struct rte_udp_hdr *udp_hdr;
741 	struct rte_tcp_hdr *tcp_hdr;
742 	uint32_t hash, l3hash, l4hash;
743 
744 	for (i = 0; i < nb_pkts; i++) {
745 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
746 		size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
747 		proto = eth_hdr->ether_type;
748 		vlan_offset = get_vlan_offset(eth_hdr, &proto);
749 		l3hash = 0;
750 		l4hash = 0;
751 
752 		if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
753 			struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
754 					((char *)(eth_hdr + 1) + vlan_offset);
755 			size_t ip_hdr_offset;
756 
757 			l3hash = ipv4_hash(ipv4_hdr);
758 
759 			/* there is no L4 header in fragmented packet */
760 			if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
761 								== 0)) {
762 				ip_hdr_offset = (ipv4_hdr->version_ihl
763 					& RTE_IPV4_HDR_IHL_MASK) *
764 					RTE_IPV4_IHL_MULTIPLIER;
765 
766 				if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
767 					tcp_hdr = (struct rte_tcp_hdr *)
768 						((char *)ipv4_hdr +
769 							ip_hdr_offset);
770 					if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
771 							< pkt_end)
772 						l4hash = HASH_L4_PORTS(tcp_hdr);
773 				} else if (ipv4_hdr->next_proto_id ==
774 								IPPROTO_UDP) {
775 					udp_hdr = (struct rte_udp_hdr *)
776 						((char *)ipv4_hdr +
777 							ip_hdr_offset);
778 					if ((size_t)udp_hdr + sizeof(*udp_hdr)
779 							< pkt_end)
780 						l4hash = HASH_L4_PORTS(udp_hdr);
781 				}
782 			}
783 		} else if  (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
784 			struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
785 					((char *)(eth_hdr + 1) + vlan_offset);
786 			l3hash = ipv6_hash(ipv6_hdr);
787 
788 			if (ipv6_hdr->proto == IPPROTO_TCP) {
789 				tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
790 				l4hash = HASH_L4_PORTS(tcp_hdr);
791 			} else if (ipv6_hdr->proto == IPPROTO_UDP) {
792 				udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
793 				l4hash = HASH_L4_PORTS(udp_hdr);
794 			}
795 		}
796 
797 		hash = l3hash ^ l4hash;
798 		hash ^= hash >> 16;
799 		hash ^= hash >> 8;
800 
801 		slaves[i] = hash % slave_count;
802 	}
803 }
804 
805 struct bwg_slave {
806 	uint64_t bwg_left_int;
807 	uint64_t bwg_left_remainder;
808 	uint16_t slave;
809 };
810 
811 void
812 bond_tlb_activate_slave(struct bond_dev_private *internals) {
813 	int i;
814 
815 	for (i = 0; i < internals->active_slave_count; i++) {
816 		tlb_last_obytets[internals->active_slaves[i]] = 0;
817 	}
818 }
819 
820 static int
821 bandwidth_cmp(const void *a, const void *b)
822 {
823 	const struct bwg_slave *bwg_a = a;
824 	const struct bwg_slave *bwg_b = b;
825 	int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
826 	int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
827 			(int64_t)bwg_a->bwg_left_remainder;
828 	if (diff > 0)
829 		return 1;
830 	else if (diff < 0)
831 		return -1;
832 	else if (diff2 > 0)
833 		return 1;
834 	else if (diff2 < 0)
835 		return -1;
836 	else
837 		return 0;
838 }
839 
840 static void
841 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
842 		struct bwg_slave *bwg_slave)
843 {
844 	struct rte_eth_link link_status;
845 	int ret;
846 
847 	ret = rte_eth_link_get_nowait(port_id, &link_status);
848 	if (ret < 0) {
849 		RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
850 			     port_id, rte_strerror(-ret));
851 		return;
852 	}
853 	uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
854 	if (link_bwg == 0)
855 		return;
856 	link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
857 	bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
858 	bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
859 }
860 
861 static void
862 bond_ethdev_update_tlb_slave_cb(void *arg)
863 {
864 	struct bond_dev_private *internals = arg;
865 	struct rte_eth_stats slave_stats;
866 	struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
867 	uint16_t slave_count;
868 	uint64_t tx_bytes;
869 
870 	uint8_t update_stats = 0;
871 	uint16_t slave_id;
872 	uint16_t i;
873 
874 	internals->slave_update_idx++;
875 
876 
877 	if (internals->slave_update_idx >= REORDER_PERIOD_MS)
878 		update_stats = 1;
879 
880 	for (i = 0; i < internals->active_slave_count; i++) {
881 		slave_id = internals->active_slaves[i];
882 		rte_eth_stats_get(slave_id, &slave_stats);
883 		tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
884 		bandwidth_left(slave_id, tx_bytes,
885 				internals->slave_update_idx, &bwg_array[i]);
886 		bwg_array[i].slave = slave_id;
887 
888 		if (update_stats) {
889 			tlb_last_obytets[slave_id] = slave_stats.obytes;
890 		}
891 	}
892 
893 	if (update_stats == 1)
894 		internals->slave_update_idx = 0;
895 
896 	slave_count = i;
897 	qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
898 	for (i = 0; i < slave_count; i++)
899 		internals->tlb_slaves_order[i] = bwg_array[i].slave;
900 
901 	rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
902 			(struct bond_dev_private *)internals);
903 }
904 
905 static uint16_t
906 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
907 {
908 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
909 	struct bond_dev_private *internals = bd_tx_q->dev_private;
910 
911 	struct rte_eth_dev *primary_port =
912 			&rte_eth_devices[internals->primary_port];
913 	uint16_t num_tx_total = 0;
914 	uint16_t i, j;
915 
916 	uint16_t num_of_slaves = internals->active_slave_count;
917 	uint16_t slaves[RTE_MAX_ETHPORTS];
918 
919 	struct rte_ether_hdr *ether_hdr;
920 	struct rte_ether_addr primary_slave_addr;
921 	struct rte_ether_addr active_slave_addr;
922 
923 	if (num_of_slaves < 1)
924 		return num_tx_total;
925 
926 	memcpy(slaves, internals->tlb_slaves_order,
927 				sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
928 
929 
930 	rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
931 
932 	if (nb_pkts > 3) {
933 		for (i = 0; i < 3; i++)
934 			rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
935 	}
936 
937 	for (i = 0; i < num_of_slaves; i++) {
938 		rte_eth_macaddr_get(slaves[i], &active_slave_addr);
939 		for (j = num_tx_total; j < nb_pkts; j++) {
940 			if (j + 3 < nb_pkts)
941 				rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
942 
943 			ether_hdr = rte_pktmbuf_mtod(bufs[j],
944 						struct rte_ether_hdr *);
945 			if (rte_is_same_ether_addr(&ether_hdr->s_addr,
946 							&primary_slave_addr))
947 				rte_ether_addr_copy(&active_slave_addr,
948 						&ether_hdr->s_addr);
949 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
950 					mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
951 #endif
952 		}
953 
954 		num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
955 				bufs + num_tx_total, nb_pkts - num_tx_total);
956 
957 		if (num_tx_total == nb_pkts)
958 			break;
959 	}
960 
961 	return num_tx_total;
962 }
963 
964 void
965 bond_tlb_disable(struct bond_dev_private *internals)
966 {
967 	rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
968 }
969 
970 void
971 bond_tlb_enable(struct bond_dev_private *internals)
972 {
973 	bond_ethdev_update_tlb_slave_cb(internals);
974 }
975 
976 static uint16_t
977 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
978 {
979 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
980 	struct bond_dev_private *internals = bd_tx_q->dev_private;
981 
982 	struct rte_ether_hdr *eth_h;
983 	uint16_t ether_type, offset;
984 
985 	struct client_data *client_info;
986 
987 	/*
988 	 * We create transmit buffers for every slave and one additional to send
989 	 * through tlb. In worst case every packet will be send on one port.
990 	 */
991 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
992 	uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
993 
994 	/*
995 	 * We create separate transmit buffers for update packets as they won't
996 	 * be counted in num_tx_total.
997 	 */
998 	struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
999 	uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1000 
1001 	struct rte_mbuf *upd_pkt;
1002 	size_t pkt_size;
1003 
1004 	uint16_t num_send, num_not_send = 0;
1005 	uint16_t num_tx_total = 0;
1006 	uint16_t slave_idx;
1007 
1008 	int i, j;
1009 
1010 	/* Search tx buffer for ARP packets and forward them to alb */
1011 	for (i = 0; i < nb_pkts; i++) {
1012 		eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
1013 		ether_type = eth_h->ether_type;
1014 		offset = get_vlan_offset(eth_h, &ether_type);
1015 
1016 		if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
1017 			slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1018 
1019 			/* Change src mac in eth header */
1020 			rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1021 
1022 			/* Add packet to slave tx buffer */
1023 			slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1024 			slave_bufs_pkts[slave_idx]++;
1025 		} else {
1026 			/* If packet is not ARP, send it with TLB policy */
1027 			slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1028 					bufs[i];
1029 			slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1030 		}
1031 	}
1032 
1033 	/* Update connected client ARP tables */
1034 	if (internals->mode6.ntt) {
1035 		for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1036 			client_info = &internals->mode6.client_table[i];
1037 
1038 			if (client_info->in_use) {
1039 				/* Allocate new packet to send ARP update on current slave */
1040 				upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1041 				if (upd_pkt == NULL) {
1042 					RTE_BOND_LOG(ERR,
1043 						     "Failed to allocate ARP packet from pool");
1044 					continue;
1045 				}
1046 				pkt_size = sizeof(struct rte_ether_hdr) +
1047 					sizeof(struct rte_arp_hdr) +
1048 					client_info->vlan_count *
1049 					sizeof(struct rte_vlan_hdr);
1050 				upd_pkt->data_len = pkt_size;
1051 				upd_pkt->pkt_len = pkt_size;
1052 
1053 				slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1054 						internals);
1055 
1056 				/* Add packet to update tx buffer */
1057 				update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1058 				update_bufs_pkts[slave_idx]++;
1059 			}
1060 		}
1061 		internals->mode6.ntt = 0;
1062 	}
1063 
1064 	/* Send ARP packets on proper slaves */
1065 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1066 		if (slave_bufs_pkts[i] > 0) {
1067 			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1068 					slave_bufs[i], slave_bufs_pkts[i]);
1069 			for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1070 				bufs[nb_pkts - 1 - num_not_send - j] =
1071 						slave_bufs[i][nb_pkts - 1 - j];
1072 			}
1073 
1074 			num_tx_total += num_send;
1075 			num_not_send += slave_bufs_pkts[i] - num_send;
1076 
1077 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1078 	/* Print TX stats including update packets */
1079 			for (j = 0; j < slave_bufs_pkts[i]; j++) {
1080 				eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1081 							struct rte_ether_hdr *);
1082 				mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1083 			}
1084 #endif
1085 		}
1086 	}
1087 
1088 	/* Send update packets on proper slaves */
1089 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1090 		if (update_bufs_pkts[i] > 0) {
1091 			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1092 					update_bufs_pkts[i]);
1093 			for (j = num_send; j < update_bufs_pkts[i]; j++) {
1094 				rte_pktmbuf_free(update_bufs[i][j]);
1095 			}
1096 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1097 			for (j = 0; j < update_bufs_pkts[i]; j++) {
1098 				eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1099 							struct rte_ether_hdr *);
1100 				mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1101 			}
1102 #endif
1103 		}
1104 	}
1105 
1106 	/* Send non-ARP packets using tlb policy */
1107 	if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1108 		num_send = bond_ethdev_tx_burst_tlb(queue,
1109 				slave_bufs[RTE_MAX_ETHPORTS],
1110 				slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1111 
1112 		for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1113 			bufs[nb_pkts - 1 - num_not_send - j] =
1114 					slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1115 		}
1116 
1117 		num_tx_total += num_send;
1118 	}
1119 
1120 	return num_tx_total;
1121 }
1122 
1123 static inline uint16_t
1124 tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1125 		 uint16_t *slave_port_ids, uint16_t slave_count)
1126 {
1127 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1128 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1129 
1130 	/* Array to sort mbufs for transmission on each slave into */
1131 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1132 	/* Number of mbufs for transmission on each slave */
1133 	uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1134 	/* Mapping array generated by hash function to map mbufs to slaves */
1135 	uint16_t bufs_slave_port_idxs[nb_bufs];
1136 
1137 	uint16_t slave_tx_count;
1138 	uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1139 
1140 	uint16_t i;
1141 
1142 	/*
1143 	 * Populate slaves mbuf with the packets which are to be sent on it
1144 	 * selecting output slave using hash based on xmit policy
1145 	 */
1146 	internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1147 			bufs_slave_port_idxs);
1148 
1149 	for (i = 0; i < nb_bufs; i++) {
1150 		/* Populate slave mbuf arrays with mbufs for that slave. */
1151 		uint16_t slave_idx = bufs_slave_port_idxs[i];
1152 
1153 		slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1154 	}
1155 
1156 	/* Send packet burst on each slave device */
1157 	for (i = 0; i < slave_count; i++) {
1158 		if (slave_nb_bufs[i] == 0)
1159 			continue;
1160 
1161 		slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1162 				bd_tx_q->queue_id, slave_bufs[i],
1163 				slave_nb_bufs[i]);
1164 
1165 		total_tx_count += slave_tx_count;
1166 
1167 		/* If tx burst fails move packets to end of bufs */
1168 		if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1169 			int slave_tx_fail_count = slave_nb_bufs[i] -
1170 					slave_tx_count;
1171 			total_tx_fail_count += slave_tx_fail_count;
1172 			memcpy(&bufs[nb_bufs - total_tx_fail_count],
1173 			       &slave_bufs[i][slave_tx_count],
1174 			       slave_tx_fail_count * sizeof(bufs[0]));
1175 		}
1176 	}
1177 
1178 	return total_tx_count;
1179 }
1180 
1181 static uint16_t
1182 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1183 		uint16_t nb_bufs)
1184 {
1185 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1186 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1187 
1188 	uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1189 	uint16_t slave_count;
1190 
1191 	if (unlikely(nb_bufs == 0))
1192 		return 0;
1193 
1194 	/* Copy slave list to protect against slave up/down changes during tx
1195 	 * bursting
1196 	 */
1197 	slave_count = internals->active_slave_count;
1198 	if (unlikely(slave_count < 1))
1199 		return 0;
1200 
1201 	memcpy(slave_port_ids, internals->active_slaves,
1202 			sizeof(slave_port_ids[0]) * slave_count);
1203 	return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids,
1204 				slave_count);
1205 }
1206 
1207 static inline uint16_t
1208 tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1209 		bool dedicated_txq)
1210 {
1211 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1212 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1213 
1214 	uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1215 	uint16_t slave_count;
1216 
1217 	uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1218 	uint16_t dist_slave_count;
1219 
1220 	uint16_t slave_tx_count;
1221 
1222 	uint16_t i;
1223 
1224 	/* Copy slave list to protect against slave up/down changes during tx
1225 	 * bursting */
1226 	slave_count = internals->active_slave_count;
1227 	if (unlikely(slave_count < 1))
1228 		return 0;
1229 
1230 	memcpy(slave_port_ids, internals->active_slaves,
1231 			sizeof(slave_port_ids[0]) * slave_count);
1232 
1233 	if (dedicated_txq)
1234 		goto skip_tx_ring;
1235 
1236 	/* Check for LACP control packets and send if available */
1237 	for (i = 0; i < slave_count; i++) {
1238 		struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1239 		struct rte_mbuf *ctrl_pkt = NULL;
1240 
1241 		if (likely(rte_ring_empty(port->tx_ring)))
1242 			continue;
1243 
1244 		if (rte_ring_dequeue(port->tx_ring,
1245 				     (void **)&ctrl_pkt) != -ENOENT) {
1246 			slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1247 					bd_tx_q->queue_id, &ctrl_pkt, 1);
1248 			/*
1249 			 * re-enqueue LAG control plane packets to buffering
1250 			 * ring if transmission fails so the packet isn't lost.
1251 			 */
1252 			if (slave_tx_count != 1)
1253 				rte_ring_enqueue(port->tx_ring,	ctrl_pkt);
1254 		}
1255 	}
1256 
1257 skip_tx_ring:
1258 	if (unlikely(nb_bufs == 0))
1259 		return 0;
1260 
1261 	dist_slave_count = 0;
1262 	for (i = 0; i < slave_count; i++) {
1263 		struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1264 
1265 		if (ACTOR_STATE(port, DISTRIBUTING))
1266 			dist_slave_port_ids[dist_slave_count++] =
1267 					slave_port_ids[i];
1268 	}
1269 
1270 	if (unlikely(dist_slave_count < 1))
1271 		return 0;
1272 
1273 	return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids,
1274 				dist_slave_count);
1275 }
1276 
1277 static uint16_t
1278 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1279 		uint16_t nb_bufs)
1280 {
1281 	return tx_burst_8023ad(queue, bufs, nb_bufs, false);
1282 }
1283 
1284 static uint16_t
1285 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
1286 		uint16_t nb_bufs)
1287 {
1288 	return tx_burst_8023ad(queue, bufs, nb_bufs, true);
1289 }
1290 
1291 static uint16_t
1292 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1293 		uint16_t nb_pkts)
1294 {
1295 	struct bond_dev_private *internals;
1296 	struct bond_tx_queue *bd_tx_q;
1297 
1298 	uint16_t slaves[RTE_MAX_ETHPORTS];
1299 	uint8_t tx_failed_flag = 0;
1300 	uint16_t num_of_slaves;
1301 
1302 	uint16_t max_nb_of_tx_pkts = 0;
1303 
1304 	int slave_tx_total[RTE_MAX_ETHPORTS];
1305 	int i, most_successful_tx_slave = -1;
1306 
1307 	bd_tx_q = (struct bond_tx_queue *)queue;
1308 	internals = bd_tx_q->dev_private;
1309 
1310 	/* Copy slave list to protect against slave up/down changes during tx
1311 	 * bursting */
1312 	num_of_slaves = internals->active_slave_count;
1313 	memcpy(slaves, internals->active_slaves,
1314 			sizeof(internals->active_slaves[0]) * num_of_slaves);
1315 
1316 	if (num_of_slaves < 1)
1317 		return 0;
1318 
1319 	/* Increment reference count on mbufs */
1320 	for (i = 0; i < nb_pkts; i++)
1321 		rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1322 
1323 	/* Transmit burst on each active slave */
1324 	for (i = 0; i < num_of_slaves; i++) {
1325 		slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1326 					bufs, nb_pkts);
1327 
1328 		if (unlikely(slave_tx_total[i] < nb_pkts))
1329 			tx_failed_flag = 1;
1330 
1331 		/* record the value and slave index for the slave which transmits the
1332 		 * maximum number of packets */
1333 		if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1334 			max_nb_of_tx_pkts = slave_tx_total[i];
1335 			most_successful_tx_slave = i;
1336 		}
1337 	}
1338 
1339 	/* if slaves fail to transmit packets from burst, the calling application
1340 	 * is not expected to know about multiple references to packets so we must
1341 	 * handle failures of all packets except those of the most successful slave
1342 	 */
1343 	if (unlikely(tx_failed_flag))
1344 		for (i = 0; i < num_of_slaves; i++)
1345 			if (i != most_successful_tx_slave)
1346 				while (slave_tx_total[i] < nb_pkts)
1347 					rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1348 
1349 	return max_nb_of_tx_pkts;
1350 }
1351 
1352 static void
1353 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1354 {
1355 	struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1356 
1357 	if (bond_ctx->mode == BONDING_MODE_8023AD) {
1358 		/**
1359 		 * If in mode 4 then save the link properties of the first
1360 		 * slave, all subsequent slaves must match these properties
1361 		 */
1362 		struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1363 
1364 		bond_link->link_autoneg = slave_link->link_autoneg;
1365 		bond_link->link_duplex = slave_link->link_duplex;
1366 		bond_link->link_speed = slave_link->link_speed;
1367 	} else {
1368 		/**
1369 		 * In any other mode the link properties are set to default
1370 		 * values of AUTONEG/DUPLEX
1371 		 */
1372 		ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1373 		ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1374 	}
1375 }
1376 
1377 static int
1378 link_properties_valid(struct rte_eth_dev *ethdev,
1379 		struct rte_eth_link *slave_link)
1380 {
1381 	struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1382 
1383 	if (bond_ctx->mode == BONDING_MODE_8023AD) {
1384 		struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1385 
1386 		if (bond_link->link_duplex != slave_link->link_duplex ||
1387 			bond_link->link_autoneg != slave_link->link_autoneg ||
1388 			bond_link->link_speed != slave_link->link_speed)
1389 			return -1;
1390 	}
1391 
1392 	return 0;
1393 }
1394 
1395 int
1396 mac_address_get(struct rte_eth_dev *eth_dev,
1397 		struct rte_ether_addr *dst_mac_addr)
1398 {
1399 	struct rte_ether_addr *mac_addr;
1400 
1401 	if (eth_dev == NULL) {
1402 		RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1403 		return -1;
1404 	}
1405 
1406 	if (dst_mac_addr == NULL) {
1407 		RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1408 		return -1;
1409 	}
1410 
1411 	mac_addr = eth_dev->data->mac_addrs;
1412 
1413 	rte_ether_addr_copy(mac_addr, dst_mac_addr);
1414 	return 0;
1415 }
1416 
1417 int
1418 mac_address_set(struct rte_eth_dev *eth_dev,
1419 		struct rte_ether_addr *new_mac_addr)
1420 {
1421 	struct rte_ether_addr *mac_addr;
1422 
1423 	if (eth_dev == NULL) {
1424 		RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1425 		return -1;
1426 	}
1427 
1428 	if (new_mac_addr == NULL) {
1429 		RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1430 		return -1;
1431 	}
1432 
1433 	mac_addr = eth_dev->data->mac_addrs;
1434 
1435 	/* If new MAC is different to current MAC then update */
1436 	if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1437 		memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1438 
1439 	return 0;
1440 }
1441 
1442 static const struct rte_ether_addr null_mac_addr;
1443 
1444 /*
1445  * Add additional MAC addresses to the slave
1446  */
1447 int
1448 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1449 		uint16_t slave_port_id)
1450 {
1451 	int i, ret;
1452 	struct rte_ether_addr *mac_addr;
1453 
1454 	for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1455 		mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1456 		if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1457 			break;
1458 
1459 		ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1460 		if (ret < 0) {
1461 			/* rollback */
1462 			for (i--; i > 0; i--)
1463 				rte_eth_dev_mac_addr_remove(slave_port_id,
1464 					&bonded_eth_dev->data->mac_addrs[i]);
1465 			return ret;
1466 		}
1467 	}
1468 
1469 	return 0;
1470 }
1471 
1472 /*
1473  * Remove additional MAC addresses from the slave
1474  */
1475 int
1476 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1477 		uint16_t slave_port_id)
1478 {
1479 	int i, rc, ret;
1480 	struct rte_ether_addr *mac_addr;
1481 
1482 	rc = 0;
1483 	for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1484 		mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1485 		if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1486 			break;
1487 
1488 		ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1489 		/* save only the first error */
1490 		if (ret < 0 && rc == 0)
1491 			rc = ret;
1492 	}
1493 
1494 	return rc;
1495 }
1496 
1497 int
1498 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1499 {
1500 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1501 	bool set;
1502 	int i;
1503 
1504 	/* Update slave devices MAC addresses */
1505 	if (internals->slave_count < 1)
1506 		return -1;
1507 
1508 	switch (internals->mode) {
1509 	case BONDING_MODE_ROUND_ROBIN:
1510 	case BONDING_MODE_BALANCE:
1511 	case BONDING_MODE_BROADCAST:
1512 		for (i = 0; i < internals->slave_count; i++) {
1513 			if (rte_eth_dev_default_mac_addr_set(
1514 					internals->slaves[i].port_id,
1515 					bonded_eth_dev->data->mac_addrs)) {
1516 				RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1517 						internals->slaves[i].port_id);
1518 				return -1;
1519 			}
1520 		}
1521 		break;
1522 	case BONDING_MODE_8023AD:
1523 		bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1524 		break;
1525 	case BONDING_MODE_ACTIVE_BACKUP:
1526 	case BONDING_MODE_TLB:
1527 	case BONDING_MODE_ALB:
1528 	default:
1529 		set = true;
1530 		for (i = 0; i < internals->slave_count; i++) {
1531 			if (internals->slaves[i].port_id ==
1532 					internals->current_primary_port) {
1533 				if (rte_eth_dev_default_mac_addr_set(
1534 						internals->current_primary_port,
1535 						bonded_eth_dev->data->mac_addrs)) {
1536 					RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1537 							internals->current_primary_port);
1538 					set = false;
1539 				}
1540 			} else {
1541 				if (rte_eth_dev_default_mac_addr_set(
1542 						internals->slaves[i].port_id,
1543 						&internals->slaves[i].persisted_mac_addr)) {
1544 					RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1545 							internals->slaves[i].port_id);
1546 				}
1547 			}
1548 		}
1549 		if (!set)
1550 			return -1;
1551 	}
1552 
1553 	return 0;
1554 }
1555 
1556 int
1557 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1558 {
1559 	struct bond_dev_private *internals;
1560 
1561 	internals = eth_dev->data->dev_private;
1562 
1563 	switch (mode) {
1564 	case BONDING_MODE_ROUND_ROBIN:
1565 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1566 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1567 		break;
1568 	case BONDING_MODE_ACTIVE_BACKUP:
1569 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1570 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1571 		break;
1572 	case BONDING_MODE_BALANCE:
1573 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1574 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1575 		break;
1576 	case BONDING_MODE_BROADCAST:
1577 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1578 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1579 		break;
1580 	case BONDING_MODE_8023AD:
1581 		if (bond_mode_8023ad_enable(eth_dev) != 0)
1582 			return -1;
1583 
1584 		if (internals->mode4.dedicated_queues.enabled == 0) {
1585 			eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1586 			eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1587 			RTE_BOND_LOG(WARNING,
1588 				"Using mode 4, it is necessary to do TX burst "
1589 				"and RX burst at least every 100ms.");
1590 		} else {
1591 			/* Use flow director's optimization */
1592 			eth_dev->rx_pkt_burst =
1593 					bond_ethdev_rx_burst_8023ad_fast_queue;
1594 			eth_dev->tx_pkt_burst =
1595 					bond_ethdev_tx_burst_8023ad_fast_queue;
1596 		}
1597 		break;
1598 	case BONDING_MODE_TLB:
1599 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1600 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1601 		break;
1602 	case BONDING_MODE_ALB:
1603 		if (bond_mode_alb_enable(eth_dev) != 0)
1604 			return -1;
1605 
1606 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1607 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1608 		break;
1609 	default:
1610 		return -1;
1611 	}
1612 
1613 	internals->mode = mode;
1614 
1615 	return 0;
1616 }
1617 
1618 
1619 static int
1620 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1621 		struct rte_eth_dev *slave_eth_dev)
1622 {
1623 	int errval = 0;
1624 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1625 	struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1626 
1627 	if (port->slow_pool == NULL) {
1628 		char mem_name[256];
1629 		int slave_id = slave_eth_dev->data->port_id;
1630 
1631 		snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1632 				slave_id);
1633 		port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1634 			250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1635 			slave_eth_dev->data->numa_node);
1636 
1637 		/* Any memory allocation failure in initialization is critical because
1638 		 * resources can't be free, so reinitialization is impossible. */
1639 		if (port->slow_pool == NULL) {
1640 			rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1641 				slave_id, mem_name, rte_strerror(rte_errno));
1642 		}
1643 	}
1644 
1645 	if (internals->mode4.dedicated_queues.enabled == 1) {
1646 		/* Configure slow Rx queue */
1647 
1648 		errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1649 				internals->mode4.dedicated_queues.rx_qid, 128,
1650 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1651 				NULL, port->slow_pool);
1652 		if (errval != 0) {
1653 			RTE_BOND_LOG(ERR,
1654 					"rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1655 					slave_eth_dev->data->port_id,
1656 					internals->mode4.dedicated_queues.rx_qid,
1657 					errval);
1658 			return errval;
1659 		}
1660 
1661 		errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1662 				internals->mode4.dedicated_queues.tx_qid, 512,
1663 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1664 				NULL);
1665 		if (errval != 0) {
1666 			RTE_BOND_LOG(ERR,
1667 				"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1668 				slave_eth_dev->data->port_id,
1669 				internals->mode4.dedicated_queues.tx_qid,
1670 				errval);
1671 			return errval;
1672 		}
1673 	}
1674 	return 0;
1675 }
1676 
1677 int
1678 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1679 		struct rte_eth_dev *slave_eth_dev)
1680 {
1681 	struct bond_rx_queue *bd_rx_q;
1682 	struct bond_tx_queue *bd_tx_q;
1683 	uint16_t nb_rx_queues;
1684 	uint16_t nb_tx_queues;
1685 
1686 	int errval;
1687 	uint16_t q_id;
1688 	struct rte_flow_error flow_error;
1689 
1690 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1691 
1692 	/* Stop slave */
1693 	errval = rte_eth_dev_stop(slave_eth_dev->data->port_id);
1694 	if (errval != 0)
1695 		RTE_BOND_LOG(ERR, "rte_eth_dev_stop: port %u, err (%d)",
1696 			     slave_eth_dev->data->port_id, errval);
1697 
1698 	/* Enable interrupts on slave device if supported */
1699 	if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1700 		slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1701 
1702 	/* If RSS is enabled for bonding, try to enable it for slaves  */
1703 	if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1704 		if (internals->rss_key_len != 0) {
1705 			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1706 					internals->rss_key_len;
1707 			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1708 					internals->rss_key;
1709 		} else {
1710 			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1711 		}
1712 
1713 		slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1714 				bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1715 		slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1716 				bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1717 	}
1718 
1719 	if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1720 			DEV_RX_OFFLOAD_VLAN_FILTER)
1721 		slave_eth_dev->data->dev_conf.rxmode.offloads |=
1722 				DEV_RX_OFFLOAD_VLAN_FILTER;
1723 	else
1724 		slave_eth_dev->data->dev_conf.rxmode.offloads &=
1725 				~DEV_RX_OFFLOAD_VLAN_FILTER;
1726 
1727 	slave_eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1728 			bonded_eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
1729 
1730 	if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1731 			DEV_RX_OFFLOAD_JUMBO_FRAME)
1732 		slave_eth_dev->data->dev_conf.rxmode.offloads |=
1733 				DEV_RX_OFFLOAD_JUMBO_FRAME;
1734 	else
1735 		slave_eth_dev->data->dev_conf.rxmode.offloads &=
1736 				~DEV_RX_OFFLOAD_JUMBO_FRAME;
1737 
1738 	nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1739 	nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1740 
1741 	if (internals->mode == BONDING_MODE_8023AD) {
1742 		if (internals->mode4.dedicated_queues.enabled == 1) {
1743 			nb_rx_queues++;
1744 			nb_tx_queues++;
1745 		}
1746 	}
1747 
1748 	errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1749 				     bonded_eth_dev->data->mtu);
1750 	if (errval != 0 && errval != -ENOTSUP) {
1751 		RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1752 				slave_eth_dev->data->port_id, errval);
1753 		return errval;
1754 	}
1755 
1756 	/* Configure device */
1757 	errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1758 			nb_rx_queues, nb_tx_queues,
1759 			&(slave_eth_dev->data->dev_conf));
1760 	if (errval != 0) {
1761 		RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1762 				slave_eth_dev->data->port_id, errval);
1763 		return errval;
1764 	}
1765 
1766 	/* Setup Rx Queues */
1767 	for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1768 		bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1769 
1770 		errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1771 				bd_rx_q->nb_rx_desc,
1772 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1773 				&(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1774 		if (errval != 0) {
1775 			RTE_BOND_LOG(ERR,
1776 					"rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1777 					slave_eth_dev->data->port_id, q_id, errval);
1778 			return errval;
1779 		}
1780 	}
1781 
1782 	/* Setup Tx Queues */
1783 	for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1784 		bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1785 
1786 		errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1787 				bd_tx_q->nb_tx_desc,
1788 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1789 				&bd_tx_q->tx_conf);
1790 		if (errval != 0) {
1791 			RTE_BOND_LOG(ERR,
1792 				"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1793 				slave_eth_dev->data->port_id, q_id, errval);
1794 			return errval;
1795 		}
1796 	}
1797 
1798 	if (internals->mode == BONDING_MODE_8023AD &&
1799 			internals->mode4.dedicated_queues.enabled == 1) {
1800 		if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1801 				!= 0)
1802 			return errval;
1803 
1804 		errval = bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1805 				slave_eth_dev->data->port_id);
1806 		if (errval != 0) {
1807 			RTE_BOND_LOG(ERR,
1808 				"bond_ethdev_8023ad_flow_verify: port=%d, err (%d)",
1809 				slave_eth_dev->data->port_id, errval);
1810 			return errval;
1811 		}
1812 
1813 		if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1814 			rte_flow_destroy(slave_eth_dev->data->port_id,
1815 					internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1816 					&flow_error);
1817 
1818 		errval = bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1819 				slave_eth_dev->data->port_id);
1820 		if (errval != 0) {
1821 			RTE_BOND_LOG(ERR,
1822 				"bond_ethdev_8023ad_flow_set: port=%d, err (%d)",
1823 				slave_eth_dev->data->port_id, errval);
1824 			return errval;
1825 		}
1826 	}
1827 
1828 	/* Start device */
1829 	errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1830 	if (errval != 0) {
1831 		RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1832 				slave_eth_dev->data->port_id, errval);
1833 		return -1;
1834 	}
1835 
1836 	/* If RSS is enabled for bonding, synchronize RETA */
1837 	if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1838 		int i;
1839 		struct bond_dev_private *internals;
1840 
1841 		internals = bonded_eth_dev->data->dev_private;
1842 
1843 		for (i = 0; i < internals->slave_count; i++) {
1844 			if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1845 				errval = rte_eth_dev_rss_reta_update(
1846 						slave_eth_dev->data->port_id,
1847 						&internals->reta_conf[0],
1848 						internals->slaves[i].reta_size);
1849 				if (errval != 0) {
1850 					RTE_BOND_LOG(WARNING,
1851 						     "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1852 						     " RSS Configuration for bonding may be inconsistent.",
1853 						     slave_eth_dev->data->port_id, errval);
1854 				}
1855 				break;
1856 			}
1857 		}
1858 	}
1859 
1860 	/* If lsc interrupt is set, check initial slave's link status */
1861 	if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1862 		slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1863 		bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1864 			RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1865 			NULL);
1866 	}
1867 
1868 	return 0;
1869 }
1870 
1871 void
1872 slave_remove(struct bond_dev_private *internals,
1873 		struct rte_eth_dev *slave_eth_dev)
1874 {
1875 	uint16_t i;
1876 
1877 	for (i = 0; i < internals->slave_count; i++)
1878 		if (internals->slaves[i].port_id ==
1879 				slave_eth_dev->data->port_id)
1880 			break;
1881 
1882 	if (i < (internals->slave_count - 1)) {
1883 		struct rte_flow *flow;
1884 
1885 		memmove(&internals->slaves[i], &internals->slaves[i + 1],
1886 				sizeof(internals->slaves[0]) *
1887 				(internals->slave_count - i - 1));
1888 		TAILQ_FOREACH(flow, &internals->flow_list, next) {
1889 			memmove(&flow->flows[i], &flow->flows[i + 1],
1890 				sizeof(flow->flows[0]) *
1891 				(internals->slave_count - i - 1));
1892 			flow->flows[internals->slave_count - 1] = NULL;
1893 		}
1894 	}
1895 
1896 	internals->slave_count--;
1897 
1898 	/* force reconfiguration of slave interfaces */
1899 	rte_eth_dev_internal_reset(slave_eth_dev);
1900 }
1901 
1902 static void
1903 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1904 
1905 void
1906 slave_add(struct bond_dev_private *internals,
1907 		struct rte_eth_dev *slave_eth_dev)
1908 {
1909 	struct bond_slave_details *slave_details =
1910 			&internals->slaves[internals->slave_count];
1911 
1912 	slave_details->port_id = slave_eth_dev->data->port_id;
1913 	slave_details->last_link_status = 0;
1914 
1915 	/* Mark slave devices that don't support interrupts so we can
1916 	 * compensate when we start the bond
1917 	 */
1918 	if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1919 		slave_details->link_status_poll_enabled = 1;
1920 	}
1921 
1922 	slave_details->link_status_wait_to_complete = 0;
1923 	/* clean tlb_last_obytes when adding port for bonding device */
1924 	memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1925 			sizeof(struct rte_ether_addr));
1926 }
1927 
1928 void
1929 bond_ethdev_primary_set(struct bond_dev_private *internals,
1930 		uint16_t slave_port_id)
1931 {
1932 	int i;
1933 
1934 	if (internals->active_slave_count < 1)
1935 		internals->current_primary_port = slave_port_id;
1936 	else
1937 		/* Search bonded device slave ports for new proposed primary port */
1938 		for (i = 0; i < internals->active_slave_count; i++) {
1939 			if (internals->active_slaves[i] == slave_port_id)
1940 				internals->current_primary_port = slave_port_id;
1941 		}
1942 }
1943 
1944 static int
1945 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1946 
1947 static int
1948 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1949 {
1950 	struct bond_dev_private *internals;
1951 	int i;
1952 
1953 	/* slave eth dev will be started by bonded device */
1954 	if (check_for_bonded_ethdev(eth_dev)) {
1955 		RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1956 				eth_dev->data->port_id);
1957 		return -1;
1958 	}
1959 
1960 	eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1961 	eth_dev->data->dev_started = 1;
1962 
1963 	internals = eth_dev->data->dev_private;
1964 
1965 	if (internals->slave_count == 0) {
1966 		RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1967 		goto out_err;
1968 	}
1969 
1970 	if (internals->user_defined_mac == 0) {
1971 		struct rte_ether_addr *new_mac_addr = NULL;
1972 
1973 		for (i = 0; i < internals->slave_count; i++)
1974 			if (internals->slaves[i].port_id == internals->primary_port)
1975 				new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1976 
1977 		if (new_mac_addr == NULL)
1978 			goto out_err;
1979 
1980 		if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1981 			RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1982 					eth_dev->data->port_id);
1983 			goto out_err;
1984 		}
1985 	}
1986 
1987 	if (internals->mode == BONDING_MODE_8023AD) {
1988 		if (internals->mode4.dedicated_queues.enabled == 1) {
1989 			internals->mode4.dedicated_queues.rx_qid =
1990 					eth_dev->data->nb_rx_queues;
1991 			internals->mode4.dedicated_queues.tx_qid =
1992 					eth_dev->data->nb_tx_queues;
1993 		}
1994 	}
1995 
1996 
1997 	/* Reconfigure each slave device if starting bonded device */
1998 	for (i = 0; i < internals->slave_count; i++) {
1999 		struct rte_eth_dev *slave_ethdev =
2000 				&(rte_eth_devices[internals->slaves[i].port_id]);
2001 		if (slave_configure(eth_dev, slave_ethdev) != 0) {
2002 			RTE_BOND_LOG(ERR,
2003 				"bonded port (%d) failed to reconfigure slave device (%d)",
2004 				eth_dev->data->port_id,
2005 				internals->slaves[i].port_id);
2006 			goto out_err;
2007 		}
2008 		/* We will need to poll for link status if any slave doesn't
2009 		 * support interrupts
2010 		 */
2011 		if (internals->slaves[i].link_status_poll_enabled)
2012 			internals->link_status_polling_enabled = 1;
2013 	}
2014 
2015 	/* start polling if needed */
2016 	if (internals->link_status_polling_enabled) {
2017 		rte_eal_alarm_set(
2018 			internals->link_status_polling_interval_ms * 1000,
2019 			bond_ethdev_slave_link_status_change_monitor,
2020 			(void *)&rte_eth_devices[internals->port_id]);
2021 	}
2022 
2023 	/* Update all slave devices MACs*/
2024 	if (mac_address_slaves_update(eth_dev) != 0)
2025 		goto out_err;
2026 
2027 	if (internals->user_defined_primary_port)
2028 		bond_ethdev_primary_set(internals, internals->primary_port);
2029 
2030 	if (internals->mode == BONDING_MODE_8023AD)
2031 		bond_mode_8023ad_start(eth_dev);
2032 
2033 	if (internals->mode == BONDING_MODE_TLB ||
2034 			internals->mode == BONDING_MODE_ALB)
2035 		bond_tlb_enable(internals);
2036 
2037 	return 0;
2038 
2039 out_err:
2040 	eth_dev->data->dev_started = 0;
2041 	return -1;
2042 }
2043 
2044 static void
2045 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2046 {
2047 	uint16_t i;
2048 
2049 	if (dev->data->rx_queues != NULL) {
2050 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
2051 			rte_free(dev->data->rx_queues[i]);
2052 			dev->data->rx_queues[i] = NULL;
2053 		}
2054 		dev->data->nb_rx_queues = 0;
2055 	}
2056 
2057 	if (dev->data->tx_queues != NULL) {
2058 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
2059 			rte_free(dev->data->tx_queues[i]);
2060 			dev->data->tx_queues[i] = NULL;
2061 		}
2062 		dev->data->nb_tx_queues = 0;
2063 	}
2064 }
2065 
2066 int
2067 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2068 {
2069 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2070 	uint16_t i;
2071 	int ret;
2072 
2073 	if (internals->mode == BONDING_MODE_8023AD) {
2074 		struct port *port;
2075 		void *pkt = NULL;
2076 
2077 		bond_mode_8023ad_stop(eth_dev);
2078 
2079 		/* Discard all messages to/from mode 4 state machines */
2080 		for (i = 0; i < internals->active_slave_count; i++) {
2081 			port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2082 
2083 			RTE_ASSERT(port->rx_ring != NULL);
2084 			while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2085 				rte_pktmbuf_free(pkt);
2086 
2087 			RTE_ASSERT(port->tx_ring != NULL);
2088 			while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2089 				rte_pktmbuf_free(pkt);
2090 		}
2091 	}
2092 
2093 	if (internals->mode == BONDING_MODE_TLB ||
2094 			internals->mode == BONDING_MODE_ALB) {
2095 		bond_tlb_disable(internals);
2096 		for (i = 0; i < internals->active_slave_count; i++)
2097 			tlb_last_obytets[internals->active_slaves[i]] = 0;
2098 	}
2099 
2100 	eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2101 	eth_dev->data->dev_started = 0;
2102 
2103 	internals->link_status_polling_enabled = 0;
2104 	for (i = 0; i < internals->slave_count; i++) {
2105 		uint16_t slave_id = internals->slaves[i].port_id;
2106 		if (find_slave_by_id(internals->active_slaves,
2107 				internals->active_slave_count, slave_id) !=
2108 						internals->active_slave_count) {
2109 			internals->slaves[i].last_link_status = 0;
2110 			ret = rte_eth_dev_stop(slave_id);
2111 			if (ret != 0) {
2112 				RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2113 					     slave_id);
2114 				return ret;
2115 			}
2116 			deactivate_slave(eth_dev, slave_id);
2117 		}
2118 	}
2119 
2120 	return 0;
2121 }
2122 
2123 int
2124 bond_ethdev_close(struct rte_eth_dev *dev)
2125 {
2126 	struct bond_dev_private *internals = dev->data->dev_private;
2127 	uint16_t bond_port_id = internals->port_id;
2128 	int skipped = 0;
2129 	struct rte_flow_error ferror;
2130 
2131 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2132 		return 0;
2133 
2134 	RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2135 	while (internals->slave_count != skipped) {
2136 		uint16_t port_id = internals->slaves[skipped].port_id;
2137 
2138 		if (rte_eth_dev_stop(port_id) != 0) {
2139 			RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2140 				     port_id);
2141 			skipped++;
2142 		}
2143 
2144 		if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2145 			RTE_BOND_LOG(ERR,
2146 				     "Failed to remove port %d from bonded device %s",
2147 				     port_id, dev->device->name);
2148 			skipped++;
2149 		}
2150 	}
2151 	bond_flow_ops.flush(dev, &ferror);
2152 	bond_ethdev_free_queues(dev);
2153 	rte_bitmap_reset(internals->vlan_filter_bmp);
2154 	rte_bitmap_free(internals->vlan_filter_bmp);
2155 	rte_free(internals->vlan_filter_bmpmem);
2156 
2157 	/* Try to release mempool used in mode6. If the bond
2158 	 * device is not mode6, free the NULL is not problem.
2159 	 */
2160 	rte_mempool_free(internals->mode6.mempool);
2161 
2162 	return 0;
2163 }
2164 
2165 /* forward declaration */
2166 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2167 
2168 static int
2169 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2170 {
2171 	struct bond_dev_private *internals = dev->data->dev_private;
2172 	struct bond_slave_details slave;
2173 	int ret;
2174 
2175 	uint16_t max_nb_rx_queues = UINT16_MAX;
2176 	uint16_t max_nb_tx_queues = UINT16_MAX;
2177 	uint16_t max_rx_desc_lim = UINT16_MAX;
2178 	uint16_t max_tx_desc_lim = UINT16_MAX;
2179 
2180 	dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2181 
2182 	dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2183 			internals->candidate_max_rx_pktlen :
2184 			RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2185 
2186 	/* Max number of tx/rx queues that the bonded device can support is the
2187 	 * minimum values of the bonded slaves, as all slaves must be capable
2188 	 * of supporting the same number of tx/rx queues.
2189 	 */
2190 	if (internals->slave_count > 0) {
2191 		struct rte_eth_dev_info slave_info;
2192 		uint16_t idx;
2193 
2194 		for (idx = 0; idx < internals->slave_count; idx++) {
2195 			slave = internals->slaves[idx];
2196 			ret = rte_eth_dev_info_get(slave.port_id, &slave_info);
2197 			if (ret != 0) {
2198 				RTE_BOND_LOG(ERR,
2199 					"%s: Error during getting device (port %u) info: %s\n",
2200 					__func__,
2201 					slave.port_id,
2202 					strerror(-ret));
2203 
2204 				return ret;
2205 			}
2206 
2207 			if (slave_info.max_rx_queues < max_nb_rx_queues)
2208 				max_nb_rx_queues = slave_info.max_rx_queues;
2209 
2210 			if (slave_info.max_tx_queues < max_nb_tx_queues)
2211 				max_nb_tx_queues = slave_info.max_tx_queues;
2212 
2213 			if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2214 				max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2215 
2216 			if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2217 				max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2218 		}
2219 	}
2220 
2221 	dev_info->max_rx_queues = max_nb_rx_queues;
2222 	dev_info->max_tx_queues = max_nb_tx_queues;
2223 
2224 	memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2225 	       sizeof(dev_info->default_rxconf));
2226 	memcpy(&dev_info->default_txconf, &internals->default_txconf,
2227 	       sizeof(dev_info->default_txconf));
2228 
2229 	dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2230 	dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2231 
2232 	/**
2233 	 * If dedicated hw queues enabled for link bonding device in LACP mode
2234 	 * then we need to reduce the maximum number of data path queues by 1.
2235 	 */
2236 	if (internals->mode == BONDING_MODE_8023AD &&
2237 		internals->mode4.dedicated_queues.enabled == 1) {
2238 		dev_info->max_rx_queues--;
2239 		dev_info->max_tx_queues--;
2240 	}
2241 
2242 	dev_info->min_rx_bufsize = 0;
2243 
2244 	dev_info->rx_offload_capa = internals->rx_offload_capa;
2245 	dev_info->tx_offload_capa = internals->tx_offload_capa;
2246 	dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2247 	dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2248 	dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2249 
2250 	dev_info->reta_size = internals->reta_size;
2251 
2252 	return 0;
2253 }
2254 
2255 static int
2256 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2257 {
2258 	int res;
2259 	uint16_t i;
2260 	struct bond_dev_private *internals = dev->data->dev_private;
2261 
2262 	/* don't do this while a slave is being added */
2263 	rte_spinlock_lock(&internals->lock);
2264 
2265 	if (on)
2266 		rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2267 	else
2268 		rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2269 
2270 	for (i = 0; i < internals->slave_count; i++) {
2271 		uint16_t port_id = internals->slaves[i].port_id;
2272 
2273 		res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2274 		if (res == ENOTSUP)
2275 			RTE_BOND_LOG(WARNING,
2276 				     "Setting VLAN filter on slave port %u not supported.",
2277 				     port_id);
2278 	}
2279 
2280 	rte_spinlock_unlock(&internals->lock);
2281 	return 0;
2282 }
2283 
2284 static int
2285 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2286 		uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2287 		const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2288 {
2289 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2290 			rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2291 					0, dev->data->numa_node);
2292 	if (bd_rx_q == NULL)
2293 		return -1;
2294 
2295 	bd_rx_q->queue_id = rx_queue_id;
2296 	bd_rx_q->dev_private = dev->data->dev_private;
2297 
2298 	bd_rx_q->nb_rx_desc = nb_rx_desc;
2299 
2300 	memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2301 	bd_rx_q->mb_pool = mb_pool;
2302 
2303 	dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2304 
2305 	return 0;
2306 }
2307 
2308 static int
2309 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2310 		uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2311 		const struct rte_eth_txconf *tx_conf)
2312 {
2313 	struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2314 			rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2315 					0, dev->data->numa_node);
2316 
2317 	if (bd_tx_q == NULL)
2318 		return -1;
2319 
2320 	bd_tx_q->queue_id = tx_queue_id;
2321 	bd_tx_q->dev_private = dev->data->dev_private;
2322 
2323 	bd_tx_q->nb_tx_desc = nb_tx_desc;
2324 	memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2325 
2326 	dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2327 
2328 	return 0;
2329 }
2330 
2331 static void
2332 bond_ethdev_rx_queue_release(void *queue)
2333 {
2334 	if (queue == NULL)
2335 		return;
2336 
2337 	rte_free(queue);
2338 }
2339 
2340 static void
2341 bond_ethdev_tx_queue_release(void *queue)
2342 {
2343 	if (queue == NULL)
2344 		return;
2345 
2346 	rte_free(queue);
2347 }
2348 
2349 static void
2350 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2351 {
2352 	struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2353 	struct bond_dev_private *internals;
2354 
2355 	/* Default value for polling slave found is true as we don't want to
2356 	 * disable the polling thread if we cannot get the lock */
2357 	int i, polling_slave_found = 1;
2358 
2359 	if (cb_arg == NULL)
2360 		return;
2361 
2362 	bonded_ethdev = cb_arg;
2363 	internals = bonded_ethdev->data->dev_private;
2364 
2365 	if (!bonded_ethdev->data->dev_started ||
2366 		!internals->link_status_polling_enabled)
2367 		return;
2368 
2369 	/* If device is currently being configured then don't check slaves link
2370 	 * status, wait until next period */
2371 	if (rte_spinlock_trylock(&internals->lock)) {
2372 		if (internals->slave_count > 0)
2373 			polling_slave_found = 0;
2374 
2375 		for (i = 0; i < internals->slave_count; i++) {
2376 			if (!internals->slaves[i].link_status_poll_enabled)
2377 				continue;
2378 
2379 			slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2380 			polling_slave_found = 1;
2381 
2382 			/* Update slave link status */
2383 			(*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2384 					internals->slaves[i].link_status_wait_to_complete);
2385 
2386 			/* if link status has changed since last checked then call lsc
2387 			 * event callback */
2388 			if (slave_ethdev->data->dev_link.link_status !=
2389 					internals->slaves[i].last_link_status) {
2390 				internals->slaves[i].last_link_status =
2391 						slave_ethdev->data->dev_link.link_status;
2392 
2393 				bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2394 						RTE_ETH_EVENT_INTR_LSC,
2395 						&bonded_ethdev->data->port_id,
2396 						NULL);
2397 			}
2398 		}
2399 		rte_spinlock_unlock(&internals->lock);
2400 	}
2401 
2402 	if (polling_slave_found)
2403 		/* Set alarm to continue monitoring link status of slave ethdev's */
2404 		rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2405 				bond_ethdev_slave_link_status_change_monitor, cb_arg);
2406 }
2407 
2408 static int
2409 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2410 {
2411 	int (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2412 
2413 	struct bond_dev_private *bond_ctx;
2414 	struct rte_eth_link slave_link;
2415 
2416 	bool one_link_update_succeeded;
2417 	uint32_t idx;
2418 	int ret;
2419 
2420 	bond_ctx = ethdev->data->dev_private;
2421 
2422 	ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2423 
2424 	if (ethdev->data->dev_started == 0 ||
2425 			bond_ctx->active_slave_count == 0) {
2426 		ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2427 		return 0;
2428 	}
2429 
2430 	ethdev->data->dev_link.link_status = ETH_LINK_UP;
2431 
2432 	if (wait_to_complete)
2433 		link_update = rte_eth_link_get;
2434 	else
2435 		link_update = rte_eth_link_get_nowait;
2436 
2437 	switch (bond_ctx->mode) {
2438 	case BONDING_MODE_BROADCAST:
2439 		/**
2440 		 * Setting link speed to UINT32_MAX to ensure we pick up the
2441 		 * value of the first active slave
2442 		 */
2443 		ethdev->data->dev_link.link_speed = UINT32_MAX;
2444 
2445 		/**
2446 		 * link speed is minimum value of all the slaves link speed as
2447 		 * packet loss will occur on this slave if transmission at rates
2448 		 * greater than this are attempted
2449 		 */
2450 		for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2451 			ret = link_update(bond_ctx->active_slaves[idx],
2452 					  &slave_link);
2453 			if (ret < 0) {
2454 				ethdev->data->dev_link.link_speed =
2455 					ETH_SPEED_NUM_NONE;
2456 				RTE_BOND_LOG(ERR,
2457 					"Slave (port %u) link get failed: %s",
2458 					bond_ctx->active_slaves[idx],
2459 					rte_strerror(-ret));
2460 				return 0;
2461 			}
2462 
2463 			if (slave_link.link_speed <
2464 					ethdev->data->dev_link.link_speed)
2465 				ethdev->data->dev_link.link_speed =
2466 						slave_link.link_speed;
2467 		}
2468 		break;
2469 	case BONDING_MODE_ACTIVE_BACKUP:
2470 		/* Current primary slave */
2471 		ret = link_update(bond_ctx->current_primary_port, &slave_link);
2472 		if (ret < 0) {
2473 			RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
2474 				bond_ctx->current_primary_port,
2475 				rte_strerror(-ret));
2476 			return 0;
2477 		}
2478 
2479 		ethdev->data->dev_link.link_speed = slave_link.link_speed;
2480 		break;
2481 	case BONDING_MODE_8023AD:
2482 		ethdev->data->dev_link.link_autoneg =
2483 				bond_ctx->mode4.slave_link.link_autoneg;
2484 		ethdev->data->dev_link.link_duplex =
2485 				bond_ctx->mode4.slave_link.link_duplex;
2486 		/* fall through */
2487 		/* to update link speed */
2488 	case BONDING_MODE_ROUND_ROBIN:
2489 	case BONDING_MODE_BALANCE:
2490 	case BONDING_MODE_TLB:
2491 	case BONDING_MODE_ALB:
2492 	default:
2493 		/**
2494 		 * In theses mode the maximum theoretical link speed is the sum
2495 		 * of all the slaves
2496 		 */
2497 		ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2498 		one_link_update_succeeded = false;
2499 
2500 		for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2501 			ret = link_update(bond_ctx->active_slaves[idx],
2502 					&slave_link);
2503 			if (ret < 0) {
2504 				RTE_BOND_LOG(ERR,
2505 					"Slave (port %u) link get failed: %s",
2506 					bond_ctx->active_slaves[idx],
2507 					rte_strerror(-ret));
2508 				continue;
2509 			}
2510 
2511 			one_link_update_succeeded = true;
2512 			ethdev->data->dev_link.link_speed +=
2513 					slave_link.link_speed;
2514 		}
2515 
2516 		if (!one_link_update_succeeded) {
2517 			RTE_BOND_LOG(ERR, "All slaves link get failed");
2518 			return 0;
2519 		}
2520 	}
2521 
2522 
2523 	return 0;
2524 }
2525 
2526 
2527 static int
2528 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2529 {
2530 	struct bond_dev_private *internals = dev->data->dev_private;
2531 	struct rte_eth_stats slave_stats;
2532 	int i, j;
2533 
2534 	for (i = 0; i < internals->slave_count; i++) {
2535 		rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2536 
2537 		stats->ipackets += slave_stats.ipackets;
2538 		stats->opackets += slave_stats.opackets;
2539 		stats->ibytes += slave_stats.ibytes;
2540 		stats->obytes += slave_stats.obytes;
2541 		stats->imissed += slave_stats.imissed;
2542 		stats->ierrors += slave_stats.ierrors;
2543 		stats->oerrors += slave_stats.oerrors;
2544 		stats->rx_nombuf += slave_stats.rx_nombuf;
2545 
2546 		for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2547 			stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2548 			stats->q_opackets[j] += slave_stats.q_opackets[j];
2549 			stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2550 			stats->q_obytes[j] += slave_stats.q_obytes[j];
2551 			stats->q_errors[j] += slave_stats.q_errors[j];
2552 		}
2553 
2554 	}
2555 
2556 	return 0;
2557 }
2558 
2559 static int
2560 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2561 {
2562 	struct bond_dev_private *internals = dev->data->dev_private;
2563 	int i;
2564 	int err;
2565 	int ret;
2566 
2567 	for (i = 0, err = 0; i < internals->slave_count; i++) {
2568 		ret = rte_eth_stats_reset(internals->slaves[i].port_id);
2569 		if (ret != 0)
2570 			err = ret;
2571 	}
2572 
2573 	return err;
2574 }
2575 
2576 static int
2577 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2578 {
2579 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2580 	int i;
2581 	int ret = 0;
2582 	uint16_t port_id;
2583 
2584 	switch (internals->mode) {
2585 	/* Promiscuous mode is propagated to all slaves */
2586 	case BONDING_MODE_ROUND_ROBIN:
2587 	case BONDING_MODE_BALANCE:
2588 	case BONDING_MODE_BROADCAST:
2589 	case BONDING_MODE_8023AD: {
2590 		unsigned int slave_ok = 0;
2591 
2592 		for (i = 0; i < internals->slave_count; i++) {
2593 			port_id = internals->slaves[i].port_id;
2594 
2595 			ret = rte_eth_promiscuous_enable(port_id);
2596 			if (ret != 0)
2597 				RTE_BOND_LOG(ERR,
2598 					"Failed to enable promiscuous mode for port %u: %s",
2599 					port_id, rte_strerror(-ret));
2600 			else
2601 				slave_ok++;
2602 		}
2603 		/*
2604 		 * Report success if operation is successful on at least
2605 		 * on one slave. Otherwise return last error code.
2606 		 */
2607 		if (slave_ok > 0)
2608 			ret = 0;
2609 		break;
2610 	}
2611 	/* Promiscuous mode is propagated only to primary slave */
2612 	case BONDING_MODE_ACTIVE_BACKUP:
2613 	case BONDING_MODE_TLB:
2614 	case BONDING_MODE_ALB:
2615 	default:
2616 		/* Do not touch promisc when there cannot be primary ports */
2617 		if (internals->slave_count == 0)
2618 			break;
2619 		port_id = internals->current_primary_port;
2620 		ret = rte_eth_promiscuous_enable(port_id);
2621 		if (ret != 0)
2622 			RTE_BOND_LOG(ERR,
2623 				"Failed to enable promiscuous mode for port %u: %s",
2624 				port_id, rte_strerror(-ret));
2625 	}
2626 
2627 	return ret;
2628 }
2629 
2630 static int
2631 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2632 {
2633 	struct bond_dev_private *internals = dev->data->dev_private;
2634 	int i;
2635 	int ret = 0;
2636 	uint16_t port_id;
2637 
2638 	switch (internals->mode) {
2639 	/* Promiscuous mode is propagated to all slaves */
2640 	case BONDING_MODE_ROUND_ROBIN:
2641 	case BONDING_MODE_BALANCE:
2642 	case BONDING_MODE_BROADCAST:
2643 	case BONDING_MODE_8023AD: {
2644 		unsigned int slave_ok = 0;
2645 
2646 		for (i = 0; i < internals->slave_count; i++) {
2647 			port_id = internals->slaves[i].port_id;
2648 
2649 			if (internals->mode == BONDING_MODE_8023AD &&
2650 			    bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2651 					BOND_8023AD_FORCED_PROMISC) {
2652 				slave_ok++;
2653 				continue;
2654 			}
2655 			ret = rte_eth_promiscuous_disable(port_id);
2656 			if (ret != 0)
2657 				RTE_BOND_LOG(ERR,
2658 					"Failed to disable promiscuous mode for port %u: %s",
2659 					port_id, rte_strerror(-ret));
2660 			else
2661 				slave_ok++;
2662 		}
2663 		/*
2664 		 * Report success if operation is successful on at least
2665 		 * on one slave. Otherwise return last error code.
2666 		 */
2667 		if (slave_ok > 0)
2668 			ret = 0;
2669 		break;
2670 	}
2671 	/* Promiscuous mode is propagated only to primary slave */
2672 	case BONDING_MODE_ACTIVE_BACKUP:
2673 	case BONDING_MODE_TLB:
2674 	case BONDING_MODE_ALB:
2675 	default:
2676 		/* Do not touch promisc when there cannot be primary ports */
2677 		if (internals->slave_count == 0)
2678 			break;
2679 		port_id = internals->current_primary_port;
2680 		ret = rte_eth_promiscuous_disable(port_id);
2681 		if (ret != 0)
2682 			RTE_BOND_LOG(ERR,
2683 				"Failed to disable promiscuous mode for port %u: %s",
2684 				port_id, rte_strerror(-ret));
2685 	}
2686 
2687 	return ret;
2688 }
2689 
2690 static int
2691 bond_ethdev_allmulticast_enable(struct rte_eth_dev *eth_dev)
2692 {
2693 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2694 	int i;
2695 	int ret = 0;
2696 	uint16_t port_id;
2697 
2698 	switch (internals->mode) {
2699 	/* allmulti mode is propagated to all slaves */
2700 	case BONDING_MODE_ROUND_ROBIN:
2701 	case BONDING_MODE_BALANCE:
2702 	case BONDING_MODE_BROADCAST:
2703 	case BONDING_MODE_8023AD: {
2704 		unsigned int slave_ok = 0;
2705 
2706 		for (i = 0; i < internals->slave_count; i++) {
2707 			port_id = internals->slaves[i].port_id;
2708 
2709 			ret = rte_eth_allmulticast_enable(port_id);
2710 			if (ret != 0)
2711 				RTE_BOND_LOG(ERR,
2712 					"Failed to enable allmulti mode for port %u: %s",
2713 					port_id, rte_strerror(-ret));
2714 			else
2715 				slave_ok++;
2716 		}
2717 		/*
2718 		 * Report success if operation is successful on at least
2719 		 * on one slave. Otherwise return last error code.
2720 		 */
2721 		if (slave_ok > 0)
2722 			ret = 0;
2723 		break;
2724 	}
2725 	/* allmulti mode is propagated only to primary slave */
2726 	case BONDING_MODE_ACTIVE_BACKUP:
2727 	case BONDING_MODE_TLB:
2728 	case BONDING_MODE_ALB:
2729 	default:
2730 		/* Do not touch allmulti when there cannot be primary ports */
2731 		if (internals->slave_count == 0)
2732 			break;
2733 		port_id = internals->current_primary_port;
2734 		ret = rte_eth_allmulticast_enable(port_id);
2735 		if (ret != 0)
2736 			RTE_BOND_LOG(ERR,
2737 				"Failed to enable allmulti mode for port %u: %s",
2738 				port_id, rte_strerror(-ret));
2739 	}
2740 
2741 	return ret;
2742 }
2743 
2744 static int
2745 bond_ethdev_allmulticast_disable(struct rte_eth_dev *eth_dev)
2746 {
2747 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2748 	int i;
2749 	int ret = 0;
2750 	uint16_t port_id;
2751 
2752 	switch (internals->mode) {
2753 	/* allmulti mode is propagated to all slaves */
2754 	case BONDING_MODE_ROUND_ROBIN:
2755 	case BONDING_MODE_BALANCE:
2756 	case BONDING_MODE_BROADCAST:
2757 	case BONDING_MODE_8023AD: {
2758 		unsigned int slave_ok = 0;
2759 
2760 		for (i = 0; i < internals->slave_count; i++) {
2761 			uint16_t port_id = internals->slaves[i].port_id;
2762 
2763 			if (internals->mode == BONDING_MODE_8023AD &&
2764 			    bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2765 					BOND_8023AD_FORCED_ALLMULTI)
2766 				continue;
2767 
2768 			ret = rte_eth_allmulticast_disable(port_id);
2769 			if (ret != 0)
2770 				RTE_BOND_LOG(ERR,
2771 					"Failed to disable allmulti mode for port %u: %s",
2772 					port_id, rte_strerror(-ret));
2773 			else
2774 				slave_ok++;
2775 		}
2776 		/*
2777 		 * Report success if operation is successful on at least
2778 		 * on one slave. Otherwise return last error code.
2779 		 */
2780 		if (slave_ok > 0)
2781 			ret = 0;
2782 		break;
2783 	}
2784 	/* allmulti mode is propagated only to primary slave */
2785 	case BONDING_MODE_ACTIVE_BACKUP:
2786 	case BONDING_MODE_TLB:
2787 	case BONDING_MODE_ALB:
2788 	default:
2789 		/* Do not touch allmulti when there cannot be primary ports */
2790 		if (internals->slave_count == 0)
2791 			break;
2792 		port_id = internals->current_primary_port;
2793 		ret = rte_eth_allmulticast_disable(port_id);
2794 		if (ret != 0)
2795 			RTE_BOND_LOG(ERR,
2796 				"Failed to disable allmulti mode for port %u: %s",
2797 				port_id, rte_strerror(-ret));
2798 	}
2799 
2800 	return ret;
2801 }
2802 
2803 static void
2804 bond_ethdev_delayed_lsc_propagation(void *arg)
2805 {
2806 	if (arg == NULL)
2807 		return;
2808 
2809 	rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2810 			RTE_ETH_EVENT_INTR_LSC, NULL);
2811 }
2812 
2813 int
2814 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2815 		void *param, void *ret_param __rte_unused)
2816 {
2817 	struct rte_eth_dev *bonded_eth_dev;
2818 	struct bond_dev_private *internals;
2819 	struct rte_eth_link link;
2820 	int rc = -1;
2821 	int ret;
2822 
2823 	uint8_t lsc_flag = 0;
2824 	int valid_slave = 0;
2825 	uint16_t active_pos;
2826 	uint16_t i;
2827 
2828 	if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2829 		return rc;
2830 
2831 	bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2832 
2833 	if (check_for_bonded_ethdev(bonded_eth_dev))
2834 		return rc;
2835 
2836 	internals = bonded_eth_dev->data->dev_private;
2837 
2838 	/* If the device isn't started don't handle interrupts */
2839 	if (!bonded_eth_dev->data->dev_started)
2840 		return rc;
2841 
2842 	/* verify that port_id is a valid slave of bonded port */
2843 	for (i = 0; i < internals->slave_count; i++) {
2844 		if (internals->slaves[i].port_id == port_id) {
2845 			valid_slave = 1;
2846 			break;
2847 		}
2848 	}
2849 
2850 	if (!valid_slave)
2851 		return rc;
2852 
2853 	/* Synchronize lsc callback parallel calls either by real link event
2854 	 * from the slaves PMDs or by the bonding PMD itself.
2855 	 */
2856 	rte_spinlock_lock(&internals->lsc_lock);
2857 
2858 	/* Search for port in active port list */
2859 	active_pos = find_slave_by_id(internals->active_slaves,
2860 			internals->active_slave_count, port_id);
2861 
2862 	ret = rte_eth_link_get_nowait(port_id, &link);
2863 	if (ret < 0)
2864 		RTE_BOND_LOG(ERR, "Slave (port %u) link get failed", port_id);
2865 
2866 	if (ret == 0 && link.link_status) {
2867 		if (active_pos < internals->active_slave_count)
2868 			goto link_update;
2869 
2870 		/* check link state properties if bonded link is up*/
2871 		if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2872 			if (link_properties_valid(bonded_eth_dev, &link) != 0)
2873 				RTE_BOND_LOG(ERR, "Invalid link properties "
2874 					     "for slave %d in bonding mode %d",
2875 					     port_id, internals->mode);
2876 		} else {
2877 			/* inherit slave link properties */
2878 			link_properties_set(bonded_eth_dev, &link);
2879 		}
2880 
2881 		/* If no active slave ports then set this port to be
2882 		 * the primary port.
2883 		 */
2884 		if (internals->active_slave_count < 1) {
2885 			/* If first active slave, then change link status */
2886 			bonded_eth_dev->data->dev_link.link_status =
2887 								ETH_LINK_UP;
2888 			internals->current_primary_port = port_id;
2889 			lsc_flag = 1;
2890 
2891 			mac_address_slaves_update(bonded_eth_dev);
2892 		}
2893 
2894 		activate_slave(bonded_eth_dev, port_id);
2895 
2896 		/* If the user has defined the primary port then default to
2897 		 * using it.
2898 		 */
2899 		if (internals->user_defined_primary_port &&
2900 				internals->primary_port == port_id)
2901 			bond_ethdev_primary_set(internals, port_id);
2902 	} else {
2903 		if (active_pos == internals->active_slave_count)
2904 			goto link_update;
2905 
2906 		/* Remove from active slave list */
2907 		deactivate_slave(bonded_eth_dev, port_id);
2908 
2909 		if (internals->active_slave_count < 1)
2910 			lsc_flag = 1;
2911 
2912 		/* Update primary id, take first active slave from list or if none
2913 		 * available set to -1 */
2914 		if (port_id == internals->current_primary_port) {
2915 			if (internals->active_slave_count > 0)
2916 				bond_ethdev_primary_set(internals,
2917 						internals->active_slaves[0]);
2918 			else
2919 				internals->current_primary_port = internals->primary_port;
2920 			mac_address_slaves_update(bonded_eth_dev);
2921 		}
2922 	}
2923 
2924 link_update:
2925 	/**
2926 	 * Update bonded device link properties after any change to active
2927 	 * slaves
2928 	 */
2929 	bond_ethdev_link_update(bonded_eth_dev, 0);
2930 
2931 	if (lsc_flag) {
2932 		/* Cancel any possible outstanding interrupts if delays are enabled */
2933 		if (internals->link_up_delay_ms > 0 ||
2934 			internals->link_down_delay_ms > 0)
2935 			rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2936 					bonded_eth_dev);
2937 
2938 		if (bonded_eth_dev->data->dev_link.link_status) {
2939 			if (internals->link_up_delay_ms > 0)
2940 				rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2941 						bond_ethdev_delayed_lsc_propagation,
2942 						(void *)bonded_eth_dev);
2943 			else
2944 				rte_eth_dev_callback_process(bonded_eth_dev,
2945 						RTE_ETH_EVENT_INTR_LSC,
2946 						NULL);
2947 
2948 		} else {
2949 			if (internals->link_down_delay_ms > 0)
2950 				rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2951 						bond_ethdev_delayed_lsc_propagation,
2952 						(void *)bonded_eth_dev);
2953 			else
2954 				rte_eth_dev_callback_process(bonded_eth_dev,
2955 						RTE_ETH_EVENT_INTR_LSC,
2956 						NULL);
2957 		}
2958 	}
2959 
2960 	rte_spinlock_unlock(&internals->lsc_lock);
2961 
2962 	return rc;
2963 }
2964 
2965 static int
2966 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2967 		struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2968 {
2969 	unsigned i, j;
2970 	int result = 0;
2971 	int slave_reta_size;
2972 	unsigned reta_count;
2973 	struct bond_dev_private *internals = dev->data->dev_private;
2974 
2975 	if (reta_size != internals->reta_size)
2976 		return -EINVAL;
2977 
2978 	 /* Copy RETA table */
2979 	reta_count = (reta_size + RTE_RETA_GROUP_SIZE - 1) /
2980 			RTE_RETA_GROUP_SIZE;
2981 
2982 	for (i = 0; i < reta_count; i++) {
2983 		internals->reta_conf[i].mask = reta_conf[i].mask;
2984 		for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2985 			if ((reta_conf[i].mask >> j) & 0x01)
2986 				internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2987 	}
2988 
2989 	/* Fill rest of array */
2990 	for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2991 		memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2992 				sizeof(internals->reta_conf[0]) * reta_count);
2993 
2994 	/* Propagate RETA over slaves */
2995 	for (i = 0; i < internals->slave_count; i++) {
2996 		slave_reta_size = internals->slaves[i].reta_size;
2997 		result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2998 				&internals->reta_conf[0], slave_reta_size);
2999 		if (result < 0)
3000 			return result;
3001 	}
3002 
3003 	return 0;
3004 }
3005 
3006 static int
3007 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
3008 		struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
3009 {
3010 	int i, j;
3011 	struct bond_dev_private *internals = dev->data->dev_private;
3012 
3013 	if (reta_size != internals->reta_size)
3014 		return -EINVAL;
3015 
3016 	 /* Copy RETA table */
3017 	for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
3018 		for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3019 			if ((reta_conf[i].mask >> j) & 0x01)
3020 				reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
3021 
3022 	return 0;
3023 }
3024 
3025 static int
3026 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
3027 		struct rte_eth_rss_conf *rss_conf)
3028 {
3029 	int i, result = 0;
3030 	struct bond_dev_private *internals = dev->data->dev_private;
3031 	struct rte_eth_rss_conf bond_rss_conf;
3032 
3033 	memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
3034 
3035 	bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
3036 
3037 	if (bond_rss_conf.rss_hf != 0)
3038 		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
3039 
3040 	if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
3041 			sizeof(internals->rss_key)) {
3042 		if (bond_rss_conf.rss_key_len == 0)
3043 			bond_rss_conf.rss_key_len = 40;
3044 		internals->rss_key_len = bond_rss_conf.rss_key_len;
3045 		memcpy(internals->rss_key, bond_rss_conf.rss_key,
3046 				internals->rss_key_len);
3047 	}
3048 
3049 	for (i = 0; i < internals->slave_count; i++) {
3050 		result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
3051 				&bond_rss_conf);
3052 		if (result < 0)
3053 			return result;
3054 	}
3055 
3056 	return 0;
3057 }
3058 
3059 static int
3060 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
3061 		struct rte_eth_rss_conf *rss_conf)
3062 {
3063 	struct bond_dev_private *internals = dev->data->dev_private;
3064 
3065 	rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
3066 	rss_conf->rss_key_len = internals->rss_key_len;
3067 	if (rss_conf->rss_key)
3068 		memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
3069 
3070 	return 0;
3071 }
3072 
3073 static int
3074 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
3075 {
3076 	struct rte_eth_dev *slave_eth_dev;
3077 	struct bond_dev_private *internals = dev->data->dev_private;
3078 	int ret, i;
3079 
3080 	rte_spinlock_lock(&internals->lock);
3081 
3082 	for (i = 0; i < internals->slave_count; i++) {
3083 		slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3084 		if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
3085 			rte_spinlock_unlock(&internals->lock);
3086 			return -ENOTSUP;
3087 		}
3088 	}
3089 	for (i = 0; i < internals->slave_count; i++) {
3090 		ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
3091 		if (ret < 0) {
3092 			rte_spinlock_unlock(&internals->lock);
3093 			return ret;
3094 		}
3095 	}
3096 
3097 	rte_spinlock_unlock(&internals->lock);
3098 	return 0;
3099 }
3100 
3101 static int
3102 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
3103 			struct rte_ether_addr *addr)
3104 {
3105 	if (mac_address_set(dev, addr)) {
3106 		RTE_BOND_LOG(ERR, "Failed to update MAC address");
3107 		return -EINVAL;
3108 	}
3109 
3110 	return 0;
3111 }
3112 
3113 static int
3114 bond_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
3115 		  const struct rte_flow_ops **ops)
3116 {
3117 	*ops = &bond_flow_ops;
3118 	return 0;
3119 }
3120 
3121 static int
3122 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
3123 			struct rte_ether_addr *mac_addr,
3124 			__rte_unused uint32_t index, uint32_t vmdq)
3125 {
3126 	struct rte_eth_dev *slave_eth_dev;
3127 	struct bond_dev_private *internals = dev->data->dev_private;
3128 	int ret, i;
3129 
3130 	rte_spinlock_lock(&internals->lock);
3131 
3132 	for (i = 0; i < internals->slave_count; i++) {
3133 		slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3134 		if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
3135 			 *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
3136 			ret = -ENOTSUP;
3137 			goto end;
3138 		}
3139 	}
3140 
3141 	for (i = 0; i < internals->slave_count; i++) {
3142 		ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
3143 				mac_addr, vmdq);
3144 		if (ret < 0) {
3145 			/* rollback */
3146 			for (i--; i >= 0; i--)
3147 				rte_eth_dev_mac_addr_remove(
3148 					internals->slaves[i].port_id, mac_addr);
3149 			goto end;
3150 		}
3151 	}
3152 
3153 	ret = 0;
3154 end:
3155 	rte_spinlock_unlock(&internals->lock);
3156 	return ret;
3157 }
3158 
3159 static void
3160 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
3161 {
3162 	struct rte_eth_dev *slave_eth_dev;
3163 	struct bond_dev_private *internals = dev->data->dev_private;
3164 	int i;
3165 
3166 	rte_spinlock_lock(&internals->lock);
3167 
3168 	for (i = 0; i < internals->slave_count; i++) {
3169 		slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3170 		if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3171 			goto end;
3172 	}
3173 
3174 	struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
3175 
3176 	for (i = 0; i < internals->slave_count; i++)
3177 		rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3178 				mac_addr);
3179 
3180 end:
3181 	rte_spinlock_unlock(&internals->lock);
3182 }
3183 
3184 const struct eth_dev_ops default_dev_ops = {
3185 	.dev_start            = bond_ethdev_start,
3186 	.dev_stop             = bond_ethdev_stop,
3187 	.dev_close            = bond_ethdev_close,
3188 	.dev_configure        = bond_ethdev_configure,
3189 	.dev_infos_get        = bond_ethdev_info,
3190 	.vlan_filter_set      = bond_ethdev_vlan_filter_set,
3191 	.rx_queue_setup       = bond_ethdev_rx_queue_setup,
3192 	.tx_queue_setup       = bond_ethdev_tx_queue_setup,
3193 	.rx_queue_release     = bond_ethdev_rx_queue_release,
3194 	.tx_queue_release     = bond_ethdev_tx_queue_release,
3195 	.link_update          = bond_ethdev_link_update,
3196 	.stats_get            = bond_ethdev_stats_get,
3197 	.stats_reset          = bond_ethdev_stats_reset,
3198 	.promiscuous_enable   = bond_ethdev_promiscuous_enable,
3199 	.promiscuous_disable  = bond_ethdev_promiscuous_disable,
3200 	.allmulticast_enable  = bond_ethdev_allmulticast_enable,
3201 	.allmulticast_disable = bond_ethdev_allmulticast_disable,
3202 	.reta_update          = bond_ethdev_rss_reta_update,
3203 	.reta_query           = bond_ethdev_rss_reta_query,
3204 	.rss_hash_update      = bond_ethdev_rss_hash_update,
3205 	.rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3206 	.mtu_set              = bond_ethdev_mtu_set,
3207 	.mac_addr_set         = bond_ethdev_mac_address_set,
3208 	.mac_addr_add         = bond_ethdev_mac_addr_add,
3209 	.mac_addr_remove      = bond_ethdev_mac_addr_remove,
3210 	.flow_ops_get         = bond_flow_ops_get
3211 };
3212 
3213 static int
3214 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3215 {
3216 	const char *name = rte_vdev_device_name(dev);
3217 	uint8_t socket_id = dev->device.numa_node;
3218 	struct bond_dev_private *internals = NULL;
3219 	struct rte_eth_dev *eth_dev = NULL;
3220 	uint32_t vlan_filter_bmp_size;
3221 
3222 	/* now do all data allocation - for eth_dev structure, dummy pci driver
3223 	 * and internal (private) data
3224 	 */
3225 
3226 	/* reserve an ethdev entry */
3227 	eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3228 	if (eth_dev == NULL) {
3229 		RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3230 		goto err;
3231 	}
3232 
3233 	internals = eth_dev->data->dev_private;
3234 	eth_dev->data->nb_rx_queues = (uint16_t)1;
3235 	eth_dev->data->nb_tx_queues = (uint16_t)1;
3236 
3237 	/* Allocate memory for storing MAC addresses */
3238 	eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
3239 			BOND_MAX_MAC_ADDRS, 0, socket_id);
3240 	if (eth_dev->data->mac_addrs == NULL) {
3241 		RTE_BOND_LOG(ERR,
3242 			     "Failed to allocate %u bytes needed to store MAC addresses",
3243 			     RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3244 		goto err;
3245 	}
3246 
3247 	eth_dev->dev_ops = &default_dev_ops;
3248 	eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
3249 					RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
3250 
3251 	rte_spinlock_init(&internals->lock);
3252 	rte_spinlock_init(&internals->lsc_lock);
3253 
3254 	internals->port_id = eth_dev->data->port_id;
3255 	internals->mode = BONDING_MODE_INVALID;
3256 	internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3257 	internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3258 	internals->burst_xmit_hash = burst_xmit_l2_hash;
3259 	internals->user_defined_mac = 0;
3260 
3261 	internals->link_status_polling_enabled = 0;
3262 
3263 	internals->link_status_polling_interval_ms =
3264 		DEFAULT_POLLING_INTERVAL_10_MS;
3265 	internals->link_down_delay_ms = 0;
3266 	internals->link_up_delay_ms = 0;
3267 
3268 	internals->slave_count = 0;
3269 	internals->active_slave_count = 0;
3270 	internals->rx_offload_capa = 0;
3271 	internals->tx_offload_capa = 0;
3272 	internals->rx_queue_offload_capa = 0;
3273 	internals->tx_queue_offload_capa = 0;
3274 	internals->candidate_max_rx_pktlen = 0;
3275 	internals->max_rx_pktlen = 0;
3276 
3277 	/* Initially allow to choose any offload type */
3278 	internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3279 
3280 	memset(&internals->default_rxconf, 0,
3281 	       sizeof(internals->default_rxconf));
3282 	memset(&internals->default_txconf, 0,
3283 	       sizeof(internals->default_txconf));
3284 
3285 	memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3286 	memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3287 
3288 	memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3289 	memset(internals->slaves, 0, sizeof(internals->slaves));
3290 
3291 	TAILQ_INIT(&internals->flow_list);
3292 	internals->flow_isolated_valid = 0;
3293 
3294 	/* Set mode 4 default configuration */
3295 	bond_mode_8023ad_setup(eth_dev, NULL);
3296 	if (bond_ethdev_mode_set(eth_dev, mode)) {
3297 		RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3298 				 eth_dev->data->port_id, mode);
3299 		goto err;
3300 	}
3301 
3302 	vlan_filter_bmp_size =
3303 		rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3304 	internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3305 						   RTE_CACHE_LINE_SIZE);
3306 	if (internals->vlan_filter_bmpmem == NULL) {
3307 		RTE_BOND_LOG(ERR,
3308 			     "Failed to allocate vlan bitmap for bonded device %u",
3309 			     eth_dev->data->port_id);
3310 		goto err;
3311 	}
3312 
3313 	internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3314 			internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3315 	if (internals->vlan_filter_bmp == NULL) {
3316 		RTE_BOND_LOG(ERR,
3317 			     "Failed to init vlan bitmap for bonded device %u",
3318 			     eth_dev->data->port_id);
3319 		rte_free(internals->vlan_filter_bmpmem);
3320 		goto err;
3321 	}
3322 
3323 	return eth_dev->data->port_id;
3324 
3325 err:
3326 	rte_free(internals);
3327 	if (eth_dev != NULL)
3328 		eth_dev->data->dev_private = NULL;
3329 	rte_eth_dev_release_port(eth_dev);
3330 	return -1;
3331 }
3332 
3333 static int
3334 bond_probe(struct rte_vdev_device *dev)
3335 {
3336 	const char *name;
3337 	struct bond_dev_private *internals;
3338 	struct rte_kvargs *kvlist;
3339 	uint8_t bonding_mode;
3340 	int arg_count, port_id;
3341 	int socket_id;
3342 	uint8_t agg_mode;
3343 	struct rte_eth_dev *eth_dev;
3344 
3345 	if (!dev)
3346 		return -EINVAL;
3347 
3348 	name = rte_vdev_device_name(dev);
3349 	RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3350 
3351 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3352 		eth_dev = rte_eth_dev_attach_secondary(name);
3353 		if (!eth_dev) {
3354 			RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3355 			return -1;
3356 		}
3357 		/* TODO: request info from primary to set up Rx and Tx */
3358 		eth_dev->dev_ops = &default_dev_ops;
3359 		eth_dev->device = &dev->device;
3360 		rte_eth_dev_probing_finish(eth_dev);
3361 		return 0;
3362 	}
3363 
3364 	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3365 		pmd_bond_init_valid_arguments);
3366 	if (kvlist == NULL)
3367 		return -1;
3368 
3369 	/* Parse link bonding mode */
3370 	if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3371 		if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3372 				&bond_ethdev_parse_slave_mode_kvarg,
3373 				&bonding_mode) != 0) {
3374 			RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3375 					name);
3376 			goto parse_error;
3377 		}
3378 	} else {
3379 		RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3380 				"device %s", name);
3381 		goto parse_error;
3382 	}
3383 
3384 	/* Parse socket id to create bonding device on */
3385 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3386 	if (arg_count == 1) {
3387 		if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3388 				&bond_ethdev_parse_socket_id_kvarg, &socket_id)
3389 				!= 0) {
3390 			RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3391 					"bonded device %s", name);
3392 			goto parse_error;
3393 		}
3394 	} else if (arg_count > 1) {
3395 		RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3396 				"bonded device %s", name);
3397 		goto parse_error;
3398 	} else {
3399 		socket_id = rte_socket_id();
3400 	}
3401 
3402 	dev->device.numa_node = socket_id;
3403 
3404 	/* Create link bonding eth device */
3405 	port_id = bond_alloc(dev, bonding_mode);
3406 	if (port_id < 0) {
3407 		RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3408 				"socket %u.",	name, bonding_mode, socket_id);
3409 		goto parse_error;
3410 	}
3411 	internals = rte_eth_devices[port_id].data->dev_private;
3412 	internals->kvlist = kvlist;
3413 
3414 	if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3415 		if (rte_kvargs_process(kvlist,
3416 				PMD_BOND_AGG_MODE_KVARG,
3417 				&bond_ethdev_parse_slave_agg_mode_kvarg,
3418 				&agg_mode) != 0) {
3419 			RTE_BOND_LOG(ERR,
3420 					"Failed to parse agg selection mode for bonded device %s",
3421 					name);
3422 			goto parse_error;
3423 		}
3424 
3425 		if (internals->mode == BONDING_MODE_8023AD)
3426 			internals->mode4.agg_selection = agg_mode;
3427 	} else {
3428 		internals->mode4.agg_selection = AGG_STABLE;
3429 	}
3430 
3431 	rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3432 	RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3433 			"socket %u.",	name, port_id, bonding_mode, socket_id);
3434 	return 0;
3435 
3436 parse_error:
3437 	rte_kvargs_free(kvlist);
3438 
3439 	return -1;
3440 }
3441 
3442 static int
3443 bond_remove(struct rte_vdev_device *dev)
3444 {
3445 	struct rte_eth_dev *eth_dev;
3446 	struct bond_dev_private *internals;
3447 	const char *name;
3448 	int ret = 0;
3449 
3450 	if (!dev)
3451 		return -EINVAL;
3452 
3453 	name = rte_vdev_device_name(dev);
3454 	RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3455 
3456 	/* find an ethdev entry */
3457 	eth_dev = rte_eth_dev_allocated(name);
3458 	if (eth_dev == NULL)
3459 		return 0; /* port already released */
3460 
3461 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3462 		return rte_eth_dev_release_port(eth_dev);
3463 
3464 	RTE_ASSERT(eth_dev->device == &dev->device);
3465 
3466 	internals = eth_dev->data->dev_private;
3467 	if (internals->slave_count != 0)
3468 		return -EBUSY;
3469 
3470 	if (eth_dev->data->dev_started == 1) {
3471 		ret = bond_ethdev_stop(eth_dev);
3472 		bond_ethdev_close(eth_dev);
3473 	}
3474 	if (internals->kvlist != NULL)
3475 		rte_kvargs_free(internals->kvlist);
3476 	rte_eth_dev_release_port(eth_dev);
3477 
3478 	return ret;
3479 }
3480 
3481 /* this part will resolve the slave portids after all the other pdev and vdev
3482  * have been allocated */
3483 static int
3484 bond_ethdev_configure(struct rte_eth_dev *dev)
3485 {
3486 	const char *name = dev->device->name;
3487 	struct bond_dev_private *internals = dev->data->dev_private;
3488 	struct rte_kvargs *kvlist = internals->kvlist;
3489 	int arg_count;
3490 	uint16_t port_id = dev - rte_eth_devices;
3491 	uint8_t agg_mode;
3492 
3493 	static const uint8_t default_rss_key[40] = {
3494 		0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3495 		0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3496 		0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3497 		0xBE, 0xAC, 0x01, 0xFA
3498 	};
3499 
3500 	unsigned i, j;
3501 
3502 	/*
3503 	 * If RSS is enabled, fill table with default values and
3504 	 * set key to the the value specified in port RSS configuration.
3505 	 * Fall back to default RSS key if the key is not specified
3506 	 */
3507 	if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3508 		if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3509 			internals->rss_key_len =
3510 				dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3511 			memcpy(internals->rss_key,
3512 			       dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3513 			       internals->rss_key_len);
3514 		} else {
3515 			internals->rss_key_len = sizeof(default_rss_key);
3516 			memcpy(internals->rss_key, default_rss_key,
3517 			       internals->rss_key_len);
3518 		}
3519 
3520 		for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3521 			internals->reta_conf[i].mask = ~0LL;
3522 			for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3523 				internals->reta_conf[i].reta[j] =
3524 						(i * RTE_RETA_GROUP_SIZE + j) %
3525 						dev->data->nb_rx_queues;
3526 		}
3527 	}
3528 
3529 	/* set the max_rx_pktlen */
3530 	internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3531 
3532 	/*
3533 	 * if no kvlist, it means that this bonded device has been created
3534 	 * through the bonding api.
3535 	 */
3536 	if (!kvlist)
3537 		return 0;
3538 
3539 	/* Parse MAC address for bonded device */
3540 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3541 	if (arg_count == 1) {
3542 		struct rte_ether_addr bond_mac;
3543 
3544 		if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3545 				       &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3546 			RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3547 				     name);
3548 			return -1;
3549 		}
3550 
3551 		/* Set MAC address */
3552 		if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3553 			RTE_BOND_LOG(ERR,
3554 				     "Failed to set mac address on bonded device %s",
3555 				     name);
3556 			return -1;
3557 		}
3558 	} else if (arg_count > 1) {
3559 		RTE_BOND_LOG(ERR,
3560 			     "MAC address can be specified only once for bonded device %s",
3561 			     name);
3562 		return -1;
3563 	}
3564 
3565 	/* Parse/set balance mode transmit policy */
3566 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3567 	if (arg_count == 1) {
3568 		uint8_t xmit_policy;
3569 
3570 		if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3571 				       &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3572 		    0) {
3573 			RTE_BOND_LOG(INFO,
3574 				     "Invalid xmit policy specified for bonded device %s",
3575 				     name);
3576 			return -1;
3577 		}
3578 
3579 		/* Set balance mode transmit policy*/
3580 		if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3581 			RTE_BOND_LOG(ERR,
3582 				     "Failed to set balance xmit policy on bonded device %s",
3583 				     name);
3584 			return -1;
3585 		}
3586 	} else if (arg_count > 1) {
3587 		RTE_BOND_LOG(ERR,
3588 			     "Transmit policy can be specified only once for bonded device %s",
3589 			     name);
3590 		return -1;
3591 	}
3592 
3593 	if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3594 		if (rte_kvargs_process(kvlist,
3595 				       PMD_BOND_AGG_MODE_KVARG,
3596 				       &bond_ethdev_parse_slave_agg_mode_kvarg,
3597 				       &agg_mode) != 0) {
3598 			RTE_BOND_LOG(ERR,
3599 				     "Failed to parse agg selection mode for bonded device %s",
3600 				     name);
3601 		}
3602 		if (internals->mode == BONDING_MODE_8023AD) {
3603 			int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3604 					agg_mode);
3605 			if (ret < 0) {
3606 				RTE_BOND_LOG(ERR,
3607 					"Invalid args for agg selection set for bonded device %s",
3608 					name);
3609 				return -1;
3610 			}
3611 		}
3612 	}
3613 
3614 	/* Parse/add slave ports to bonded device */
3615 	if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3616 		struct bond_ethdev_slave_ports slave_ports;
3617 		unsigned i;
3618 
3619 		memset(&slave_ports, 0, sizeof(slave_ports));
3620 
3621 		if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3622 				       &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3623 			RTE_BOND_LOG(ERR,
3624 				     "Failed to parse slave ports for bonded device %s",
3625 				     name);
3626 			return -1;
3627 		}
3628 
3629 		for (i = 0; i < slave_ports.slave_count; i++) {
3630 			if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3631 				RTE_BOND_LOG(ERR,
3632 					     "Failed to add port %d as slave to bonded device %s",
3633 					     slave_ports.slaves[i], name);
3634 			}
3635 		}
3636 
3637 	} else {
3638 		RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3639 		return -1;
3640 	}
3641 
3642 	/* Parse/set primary slave port id*/
3643 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3644 	if (arg_count == 1) {
3645 		uint16_t primary_slave_port_id;
3646 
3647 		if (rte_kvargs_process(kvlist,
3648 				       PMD_BOND_PRIMARY_SLAVE_KVARG,
3649 				       &bond_ethdev_parse_primary_slave_port_id_kvarg,
3650 				       &primary_slave_port_id) < 0) {
3651 			RTE_BOND_LOG(INFO,
3652 				     "Invalid primary slave port id specified for bonded device %s",
3653 				     name);
3654 			return -1;
3655 		}
3656 
3657 		/* Set balance mode transmit policy*/
3658 		if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3659 		    != 0) {
3660 			RTE_BOND_LOG(ERR,
3661 				     "Failed to set primary slave port %d on bonded device %s",
3662 				     primary_slave_port_id, name);
3663 			return -1;
3664 		}
3665 	} else if (arg_count > 1) {
3666 		RTE_BOND_LOG(INFO,
3667 			     "Primary slave can be specified only once for bonded device %s",
3668 			     name);
3669 		return -1;
3670 	}
3671 
3672 	/* Parse link status monitor polling interval */
3673 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3674 	if (arg_count == 1) {
3675 		uint32_t lsc_poll_interval_ms;
3676 
3677 		if (rte_kvargs_process(kvlist,
3678 				       PMD_BOND_LSC_POLL_PERIOD_KVARG,
3679 				       &bond_ethdev_parse_time_ms_kvarg,
3680 				       &lsc_poll_interval_ms) < 0) {
3681 			RTE_BOND_LOG(INFO,
3682 				     "Invalid lsc polling interval value specified for bonded"
3683 				     " device %s", name);
3684 			return -1;
3685 		}
3686 
3687 		if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3688 		    != 0) {
3689 			RTE_BOND_LOG(ERR,
3690 				     "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3691 				     lsc_poll_interval_ms, name);
3692 			return -1;
3693 		}
3694 	} else if (arg_count > 1) {
3695 		RTE_BOND_LOG(INFO,
3696 			     "LSC polling interval can be specified only once for bonded"
3697 			     " device %s", name);
3698 		return -1;
3699 	}
3700 
3701 	/* Parse link up interrupt propagation delay */
3702 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3703 	if (arg_count == 1) {
3704 		uint32_t link_up_delay_ms;
3705 
3706 		if (rte_kvargs_process(kvlist,
3707 				       PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3708 				       &bond_ethdev_parse_time_ms_kvarg,
3709 				       &link_up_delay_ms) < 0) {
3710 			RTE_BOND_LOG(INFO,
3711 				     "Invalid link up propagation delay value specified for"
3712 				     " bonded device %s", name);
3713 			return -1;
3714 		}
3715 
3716 		/* Set balance mode transmit policy*/
3717 		if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3718 		    != 0) {
3719 			RTE_BOND_LOG(ERR,
3720 				     "Failed to set link up propagation delay (%u ms) on bonded"
3721 				     " device %s", link_up_delay_ms, name);
3722 			return -1;
3723 		}
3724 	} else if (arg_count > 1) {
3725 		RTE_BOND_LOG(INFO,
3726 			     "Link up propagation delay can be specified only once for"
3727 			     " bonded device %s", name);
3728 		return -1;
3729 	}
3730 
3731 	/* Parse link down interrupt propagation delay */
3732 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3733 	if (arg_count == 1) {
3734 		uint32_t link_down_delay_ms;
3735 
3736 		if (rte_kvargs_process(kvlist,
3737 				       PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3738 				       &bond_ethdev_parse_time_ms_kvarg,
3739 				       &link_down_delay_ms) < 0) {
3740 			RTE_BOND_LOG(INFO,
3741 				     "Invalid link down propagation delay value specified for"
3742 				     " bonded device %s", name);
3743 			return -1;
3744 		}
3745 
3746 		/* Set balance mode transmit policy*/
3747 		if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3748 		    != 0) {
3749 			RTE_BOND_LOG(ERR,
3750 				     "Failed to set link down propagation delay (%u ms) on bonded device %s",
3751 				     link_down_delay_ms, name);
3752 			return -1;
3753 		}
3754 	} else if (arg_count > 1) {
3755 		RTE_BOND_LOG(INFO,
3756 			     "Link down propagation delay can be specified only once for  bonded device %s",
3757 			     name);
3758 		return -1;
3759 	}
3760 
3761 	return 0;
3762 }
3763 
3764 struct rte_vdev_driver pmd_bond_drv = {
3765 	.probe = bond_probe,
3766 	.remove = bond_remove,
3767 };
3768 
3769 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3770 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3771 
3772 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3773 	"slave=<ifc> "
3774 	"primary=<ifc> "
3775 	"mode=[0-6] "
3776 	"xmit_policy=[l2 | l23 | l34] "
3777 	"agg_mode=[count | stable | bandwidth] "
3778 	"socket_id=<int> "
3779 	"mac=<mac addr> "
3780 	"lsc_poll_period_ms=<int> "
3781 	"up_delay=<int> "
3782 	"down_delay=<int>");
3783 
3784 /* We can't use RTE_LOG_REGISTER_DEFAULT because of the forced name for
3785  * this library, see meson.build.
3786  */
3787 RTE_LOG_REGISTER(bond_logtype, pmd.net.bonding, NOTICE);
3788