xref: /dpdk/drivers/net/bonding/rte_eth_bond_pmd.c (revision 68a03efeed657e6e05f281479b33b51102797e15)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <stdbool.h>
6 #include <netinet/in.h>
7 
8 #include <rte_mbuf.h>
9 #include <rte_malloc.h>
10 #include <ethdev_driver.h>
11 #include <ethdev_vdev.h>
12 #include <rte_tcp.h>
13 #include <rte_udp.h>
14 #include <rte_ip.h>
15 #include <rte_ip_frag.h>
16 #include <rte_devargs.h>
17 #include <rte_kvargs.h>
18 #include <rte_bus_vdev.h>
19 #include <rte_alarm.h>
20 #include <rte_cycles.h>
21 #include <rte_string_fns.h>
22 
23 #include "rte_eth_bond.h"
24 #include "eth_bond_private.h"
25 #include "eth_bond_8023ad_private.h"
26 
27 #define REORDER_PERIOD_MS 10
28 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
29 #define BOND_MAX_MAC_ADDRS 16
30 
31 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
32 
33 /* Table for statistics in mode 5 TLB */
34 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
35 
36 static inline size_t
37 get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto)
38 {
39 	size_t vlan_offset = 0;
40 
41 	if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto ||
42 		rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) {
43 		struct rte_vlan_hdr *vlan_hdr =
44 			(struct rte_vlan_hdr *)(eth_hdr + 1);
45 
46 		vlan_offset = sizeof(struct rte_vlan_hdr);
47 		*proto = vlan_hdr->eth_proto;
48 
49 		if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) {
50 			vlan_hdr = vlan_hdr + 1;
51 			*proto = vlan_hdr->eth_proto;
52 			vlan_offset += sizeof(struct rte_vlan_hdr);
53 		}
54 	}
55 	return vlan_offset;
56 }
57 
58 static uint16_t
59 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
60 {
61 	struct bond_dev_private *internals;
62 
63 	uint16_t num_rx_total = 0;
64 	uint16_t slave_count;
65 	uint16_t active_slave;
66 	int i;
67 
68 	/* Cast to structure, containing bonded device's port id and queue id */
69 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
70 	internals = bd_rx_q->dev_private;
71 	slave_count = internals->active_slave_count;
72 	active_slave = bd_rx_q->active_slave;
73 
74 	for (i = 0; i < slave_count && nb_pkts; i++) {
75 		uint16_t num_rx_slave;
76 
77 		/* Offset of pointer to *bufs increases as packets are received
78 		 * from other slaves */
79 		num_rx_slave =
80 			rte_eth_rx_burst(internals->active_slaves[active_slave],
81 					 bd_rx_q->queue_id,
82 					 bufs + num_rx_total, nb_pkts);
83 		num_rx_total += num_rx_slave;
84 		nb_pkts -= num_rx_slave;
85 		if (++active_slave == slave_count)
86 			active_slave = 0;
87 	}
88 
89 	if (++bd_rx_q->active_slave >= slave_count)
90 		bd_rx_q->active_slave = 0;
91 	return num_rx_total;
92 }
93 
94 static uint16_t
95 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
96 		uint16_t nb_pkts)
97 {
98 	struct bond_dev_private *internals;
99 
100 	/* Cast to structure, containing bonded device's port id and queue id */
101 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
102 
103 	internals = bd_rx_q->dev_private;
104 
105 	return rte_eth_rx_burst(internals->current_primary_port,
106 			bd_rx_q->queue_id, bufs, nb_pkts);
107 }
108 
109 static inline uint8_t
110 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
111 {
112 	const uint16_t ether_type_slow_be =
113 		rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
114 
115 	return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
116 		(ethertype == ether_type_slow_be &&
117 		(subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
118 }
119 
120 /*****************************************************************************
121  * Flow director's setup for mode 4 optimization
122  */
123 
124 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
125 	.dst.addr_bytes = { 0 },
126 	.src.addr_bytes = { 0 },
127 	.type = RTE_BE16(RTE_ETHER_TYPE_SLOW),
128 };
129 
130 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
131 	.dst.addr_bytes = { 0 },
132 	.src.addr_bytes = { 0 },
133 	.type = 0xFFFF,
134 };
135 
136 static struct rte_flow_item flow_item_8023ad[] = {
137 	{
138 		.type = RTE_FLOW_ITEM_TYPE_ETH,
139 		.spec = &flow_item_eth_type_8023ad,
140 		.last = NULL,
141 		.mask = &flow_item_eth_mask_type_8023ad,
142 	},
143 	{
144 		.type = RTE_FLOW_ITEM_TYPE_END,
145 		.spec = NULL,
146 		.last = NULL,
147 		.mask = NULL,
148 	}
149 };
150 
151 const struct rte_flow_attr flow_attr_8023ad = {
152 	.group = 0,
153 	.priority = 0,
154 	.ingress = 1,
155 	.egress = 0,
156 	.reserved = 0,
157 };
158 
159 int
160 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
161 		uint16_t slave_port) {
162 	struct rte_eth_dev_info slave_info;
163 	struct rte_flow_error error;
164 	struct bond_dev_private *internals = bond_dev->data->dev_private;
165 
166 	const struct rte_flow_action_queue lacp_queue_conf = {
167 		.index = 0,
168 	};
169 
170 	const struct rte_flow_action actions[] = {
171 		{
172 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
173 			.conf = &lacp_queue_conf
174 		},
175 		{
176 			.type = RTE_FLOW_ACTION_TYPE_END,
177 		}
178 	};
179 
180 	int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
181 			flow_item_8023ad, actions, &error);
182 	if (ret < 0) {
183 		RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
184 				__func__, error.message, slave_port,
185 				internals->mode4.dedicated_queues.rx_qid);
186 		return -1;
187 	}
188 
189 	ret = rte_eth_dev_info_get(slave_port, &slave_info);
190 	if (ret != 0) {
191 		RTE_BOND_LOG(ERR,
192 			"%s: Error during getting device (port %u) info: %s\n",
193 			__func__, slave_port, strerror(-ret));
194 
195 		return ret;
196 	}
197 
198 	if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
199 			slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
200 		RTE_BOND_LOG(ERR,
201 			"%s: Slave %d capabilities doesn't allow to allocate additional queues",
202 			__func__, slave_port);
203 		return -1;
204 	}
205 
206 	return 0;
207 }
208 
209 int
210 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
211 	struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
212 	struct bond_dev_private *internals = bond_dev->data->dev_private;
213 	struct rte_eth_dev_info bond_info;
214 	uint16_t idx;
215 	int ret;
216 
217 	/* Verify if all slaves in bonding supports flow director and */
218 	if (internals->slave_count > 0) {
219 		ret = rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
220 		if (ret != 0) {
221 			RTE_BOND_LOG(ERR,
222 				"%s: Error during getting device (port %u) info: %s\n",
223 				__func__, bond_dev->data->port_id,
224 				strerror(-ret));
225 
226 			return ret;
227 		}
228 
229 		internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
230 		internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
231 
232 		for (idx = 0; idx < internals->slave_count; idx++) {
233 			if (bond_ethdev_8023ad_flow_verify(bond_dev,
234 					internals->slaves[idx].port_id) != 0)
235 				return -1;
236 		}
237 	}
238 
239 	return 0;
240 }
241 
242 int
243 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
244 
245 	struct rte_flow_error error;
246 	struct bond_dev_private *internals = bond_dev->data->dev_private;
247 	struct rte_flow_action_queue lacp_queue_conf = {
248 		.index = internals->mode4.dedicated_queues.rx_qid,
249 	};
250 
251 	const struct rte_flow_action actions[] = {
252 		{
253 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
254 			.conf = &lacp_queue_conf
255 		},
256 		{
257 			.type = RTE_FLOW_ACTION_TYPE_END,
258 		}
259 	};
260 
261 	internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
262 			&flow_attr_8023ad, flow_item_8023ad, actions, &error);
263 	if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
264 		RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
265 				"(slave_port=%d queue_id=%d)",
266 				error.message, slave_port,
267 				internals->mode4.dedicated_queues.rx_qid);
268 		return -1;
269 	}
270 
271 	return 0;
272 }
273 
274 static inline uint16_t
275 rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts,
276 		bool dedicated_rxq)
277 {
278 	/* Cast to structure, containing bonded device's port id and queue id */
279 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
280 	struct bond_dev_private *internals = bd_rx_q->dev_private;
281 	struct rte_eth_dev *bonded_eth_dev =
282 					&rte_eth_devices[internals->port_id];
283 	struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
284 	struct rte_ether_hdr *hdr;
285 
286 	const uint16_t ether_type_slow_be =
287 		rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW);
288 	uint16_t num_rx_total = 0;	/* Total number of received packets */
289 	uint16_t slaves[RTE_MAX_ETHPORTS];
290 	uint16_t slave_count, idx;
291 
292 	uint8_t collecting;  /* current slave collecting status */
293 	const uint8_t promisc = rte_eth_promiscuous_get(internals->port_id);
294 	const uint8_t allmulti = rte_eth_allmulticast_get(internals->port_id);
295 	uint8_t subtype;
296 	uint16_t i;
297 	uint16_t j;
298 	uint16_t k;
299 
300 	/* Copy slave list to protect against slave up/down changes during tx
301 	 * bursting */
302 	slave_count = internals->active_slave_count;
303 	memcpy(slaves, internals->active_slaves,
304 			sizeof(internals->active_slaves[0]) * slave_count);
305 
306 	idx = bd_rx_q->active_slave;
307 	if (idx >= slave_count) {
308 		bd_rx_q->active_slave = 0;
309 		idx = 0;
310 	}
311 	for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
312 		j = num_rx_total;
313 		collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
314 					 COLLECTING);
315 
316 		/* Read packets from this slave */
317 		num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
318 				&bufs[num_rx_total], nb_pkts - num_rx_total);
319 
320 		for (k = j; k < 2 && k < num_rx_total; k++)
321 			rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
322 
323 		/* Handle slow protocol packets. */
324 		while (j < num_rx_total) {
325 			if (j + 3 < num_rx_total)
326 				rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
327 
328 			hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *);
329 			subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
330 
331 			/* Remove packet from array if:
332 			 * - it is slow packet but no dedicated rxq is present,
333 			 * - slave is not in collecting state,
334 			 * - bonding interface is not in promiscuous mode:
335 			 *   - packet is unicast and address does not match,
336 			 *   - packet is multicast and bonding interface
337 			 *     is not in allmulti,
338 			 */
339 			if (unlikely(
340 				(!dedicated_rxq &&
341 				 is_lacp_packets(hdr->ether_type, subtype,
342 						 bufs[j])) ||
343 				!collecting ||
344 				(!promisc &&
345 				 ((rte_is_unicast_ether_addr(&hdr->d_addr) &&
346 				   !rte_is_same_ether_addr(bond_mac,
347 						       &hdr->d_addr)) ||
348 				  (!allmulti &&
349 				   rte_is_multicast_ether_addr(&hdr->d_addr)))))) {
350 
351 				if (hdr->ether_type == ether_type_slow_be) {
352 					bond_mode_8023ad_handle_slow_pkt(
353 					    internals, slaves[idx], bufs[j]);
354 				} else
355 					rte_pktmbuf_free(bufs[j]);
356 
357 				/* Packet is managed by mode 4 or dropped, shift the array */
358 				num_rx_total--;
359 				if (j < num_rx_total) {
360 					memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
361 						(num_rx_total - j));
362 				}
363 			} else
364 				j++;
365 		}
366 		if (unlikely(++idx == slave_count))
367 			idx = 0;
368 	}
369 
370 	if (++bd_rx_q->active_slave >= slave_count)
371 		bd_rx_q->active_slave = 0;
372 
373 	return num_rx_total;
374 }
375 
376 static uint16_t
377 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
378 		uint16_t nb_pkts)
379 {
380 	return rx_burst_8023ad(queue, bufs, nb_pkts, false);
381 }
382 
383 static uint16_t
384 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
385 		uint16_t nb_pkts)
386 {
387 	return rx_burst_8023ad(queue, bufs, nb_pkts, true);
388 }
389 
390 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
391 uint32_t burstnumberRX;
392 uint32_t burstnumberTX;
393 
394 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
395 
396 static void
397 arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
398 {
399 	switch (arp_op) {
400 	case RTE_ARP_OP_REQUEST:
401 		strlcpy(buf, "ARP Request", buf_len);
402 		return;
403 	case RTE_ARP_OP_REPLY:
404 		strlcpy(buf, "ARP Reply", buf_len);
405 		return;
406 	case RTE_ARP_OP_REVREQUEST:
407 		strlcpy(buf, "Reverse ARP Request", buf_len);
408 		return;
409 	case RTE_ARP_OP_REVREPLY:
410 		strlcpy(buf, "Reverse ARP Reply", buf_len);
411 		return;
412 	case RTE_ARP_OP_INVREQUEST:
413 		strlcpy(buf, "Peer Identify Request", buf_len);
414 		return;
415 	case RTE_ARP_OP_INVREPLY:
416 		strlcpy(buf, "Peer Identify Reply", buf_len);
417 		return;
418 	default:
419 		break;
420 	}
421 	strlcpy(buf, "Unknown", buf_len);
422 	return;
423 }
424 #endif
425 #define MaxIPv4String	16
426 static void
427 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
428 {
429 	uint32_t ipv4_addr;
430 
431 	ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
432 	snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
433 		(ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
434 		ipv4_addr & 0xFF);
435 }
436 
437 #define MAX_CLIENTS_NUMBER	128
438 uint8_t active_clients;
439 struct client_stats_t {
440 	uint16_t port;
441 	uint32_t ipv4_addr;
442 	uint32_t ipv4_rx_packets;
443 	uint32_t ipv4_tx_packets;
444 };
445 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
446 
447 static void
448 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
449 {
450 	int i = 0;
451 
452 	for (; i < MAX_CLIENTS_NUMBER; i++)	{
453 		if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))	{
454 			/* Just update RX packets number for this client */
455 			if (TXorRXindicator == &burstnumberRX)
456 				client_stats[i].ipv4_rx_packets++;
457 			else
458 				client_stats[i].ipv4_tx_packets++;
459 			return;
460 		}
461 	}
462 	/* We have a new client. Insert him to the table, and increment stats */
463 	if (TXorRXindicator == &burstnumberRX)
464 		client_stats[active_clients].ipv4_rx_packets++;
465 	else
466 		client_stats[active_clients].ipv4_tx_packets++;
467 	client_stats[active_clients].ipv4_addr = addr;
468 	client_stats[active_clients].port = port;
469 	active_clients++;
470 
471 }
472 
473 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
474 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
475 	rte_log(RTE_LOG_DEBUG, bond_logtype,				\
476 		"%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
477 		"DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
478 		info,							\
479 		port,							\
480 		eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
481 		eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
482 		eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
483 		src_ip,							\
484 		eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
485 		eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
486 		eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
487 		dst_ip,							\
488 		arp_op, ++burstnumber)
489 #endif
490 
491 static void
492 mode6_debug(const char __rte_unused *info,
493 	struct rte_ether_hdr *eth_h, uint16_t port,
494 	uint32_t __rte_unused *burstnumber)
495 {
496 	struct rte_ipv4_hdr *ipv4_h;
497 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
498 	struct rte_arp_hdr *arp_h;
499 	char dst_ip[16];
500 	char ArpOp[24];
501 	char buf[16];
502 #endif
503 	char src_ip[16];
504 
505 	uint16_t ether_type = eth_h->ether_type;
506 	uint16_t offset = get_vlan_offset(eth_h, &ether_type);
507 
508 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
509 	strlcpy(buf, info, 16);
510 #endif
511 
512 	if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
513 		ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset);
514 		ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
515 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
516 		ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
517 		MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
518 #endif
519 		update_client_stats(ipv4_h->src_addr, port, burstnumber);
520 	}
521 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
522 	else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
523 		arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset);
524 		ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
525 		ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
526 		arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode),
527 				ArpOp, sizeof(ArpOp));
528 		MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
529 	}
530 #endif
531 }
532 #endif
533 
534 static uint16_t
535 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
536 {
537 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
538 	struct bond_dev_private *internals = bd_rx_q->dev_private;
539 	struct rte_ether_hdr *eth_h;
540 	uint16_t ether_type, offset;
541 	uint16_t nb_recv_pkts;
542 	int i;
543 
544 	nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
545 
546 	for (i = 0; i < nb_recv_pkts; i++) {
547 		eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
548 		ether_type = eth_h->ether_type;
549 		offset = get_vlan_offset(eth_h, &ether_type);
550 
551 		if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
552 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
553 			mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
554 #endif
555 			bond_mode_alb_arp_recv(eth_h, offset, internals);
556 		}
557 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
558 		else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
559 			mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
560 #endif
561 	}
562 
563 	return nb_recv_pkts;
564 }
565 
566 static uint16_t
567 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
568 		uint16_t nb_pkts)
569 {
570 	struct bond_dev_private *internals;
571 	struct bond_tx_queue *bd_tx_q;
572 
573 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
574 	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
575 
576 	uint16_t num_of_slaves;
577 	uint16_t slaves[RTE_MAX_ETHPORTS];
578 
579 	uint16_t num_tx_total = 0, num_tx_slave;
580 
581 	static int slave_idx = 0;
582 	int i, cslave_idx = 0, tx_fail_total = 0;
583 
584 	bd_tx_q = (struct bond_tx_queue *)queue;
585 	internals = bd_tx_q->dev_private;
586 
587 	/* Copy slave list to protect against slave up/down changes during tx
588 	 * bursting */
589 	num_of_slaves = internals->active_slave_count;
590 	memcpy(slaves, internals->active_slaves,
591 			sizeof(internals->active_slaves[0]) * num_of_slaves);
592 
593 	if (num_of_slaves < 1)
594 		return num_tx_total;
595 
596 	/* Populate slaves mbuf with which packets are to be sent on it  */
597 	for (i = 0; i < nb_pkts; i++) {
598 		cslave_idx = (slave_idx + i) % num_of_slaves;
599 		slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
600 	}
601 
602 	/* increment current slave index so the next call to tx burst starts on the
603 	 * next slave */
604 	slave_idx = ++cslave_idx;
605 
606 	/* Send packet burst on each slave device */
607 	for (i = 0; i < num_of_slaves; i++) {
608 		if (slave_nb_pkts[i] > 0) {
609 			num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
610 					slave_bufs[i], slave_nb_pkts[i]);
611 
612 			/* if tx burst fails move packets to end of bufs */
613 			if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
614 				int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
615 
616 				tx_fail_total += tx_fail_slave;
617 
618 				memcpy(&bufs[nb_pkts - tx_fail_total],
619 				       &slave_bufs[i][num_tx_slave],
620 				       tx_fail_slave * sizeof(bufs[0]));
621 			}
622 			num_tx_total += num_tx_slave;
623 		}
624 	}
625 
626 	return num_tx_total;
627 }
628 
629 static uint16_t
630 bond_ethdev_tx_burst_active_backup(void *queue,
631 		struct rte_mbuf **bufs, uint16_t nb_pkts)
632 {
633 	struct bond_dev_private *internals;
634 	struct bond_tx_queue *bd_tx_q;
635 
636 	bd_tx_q = (struct bond_tx_queue *)queue;
637 	internals = bd_tx_q->dev_private;
638 
639 	if (internals->active_slave_count < 1)
640 		return 0;
641 
642 	return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
643 			bufs, nb_pkts);
644 }
645 
646 static inline uint16_t
647 ether_hash(struct rte_ether_hdr *eth_hdr)
648 {
649 	unaligned_uint16_t *word_src_addr =
650 		(unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
651 	unaligned_uint16_t *word_dst_addr =
652 		(unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
653 
654 	return (word_src_addr[0] ^ word_dst_addr[0]) ^
655 			(word_src_addr[1] ^ word_dst_addr[1]) ^
656 			(word_src_addr[2] ^ word_dst_addr[2]);
657 }
658 
659 static inline uint32_t
660 ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr)
661 {
662 	return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
663 }
664 
665 static inline uint32_t
666 ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr)
667 {
668 	unaligned_uint32_t *word_src_addr =
669 		(unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
670 	unaligned_uint32_t *word_dst_addr =
671 		(unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
672 
673 	return (word_src_addr[0] ^ word_dst_addr[0]) ^
674 			(word_src_addr[1] ^ word_dst_addr[1]) ^
675 			(word_src_addr[2] ^ word_dst_addr[2]) ^
676 			(word_src_addr[3] ^ word_dst_addr[3]);
677 }
678 
679 
680 void
681 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
682 		uint16_t slave_count, uint16_t *slaves)
683 {
684 	struct rte_ether_hdr *eth_hdr;
685 	uint32_t hash;
686 	int i;
687 
688 	for (i = 0; i < nb_pkts; i++) {
689 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
690 
691 		hash = ether_hash(eth_hdr);
692 
693 		slaves[i] = (hash ^= hash >> 8) % slave_count;
694 	}
695 }
696 
697 void
698 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
699 		uint16_t slave_count, uint16_t *slaves)
700 {
701 	uint16_t i;
702 	struct rte_ether_hdr *eth_hdr;
703 	uint16_t proto;
704 	size_t vlan_offset;
705 	uint32_t hash, l3hash;
706 
707 	for (i = 0; i < nb_pkts; i++) {
708 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
709 		l3hash = 0;
710 
711 		proto = eth_hdr->ether_type;
712 		hash = ether_hash(eth_hdr);
713 
714 		vlan_offset = get_vlan_offset(eth_hdr, &proto);
715 
716 		if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
717 			struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
718 					((char *)(eth_hdr + 1) + vlan_offset);
719 			l3hash = ipv4_hash(ipv4_hdr);
720 
721 		} else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
722 			struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
723 					((char *)(eth_hdr + 1) + vlan_offset);
724 			l3hash = ipv6_hash(ipv6_hdr);
725 		}
726 
727 		hash = hash ^ l3hash;
728 		hash ^= hash >> 16;
729 		hash ^= hash >> 8;
730 
731 		slaves[i] = hash % slave_count;
732 	}
733 }
734 
735 void
736 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
737 		uint16_t slave_count, uint16_t *slaves)
738 {
739 	struct rte_ether_hdr *eth_hdr;
740 	uint16_t proto;
741 	size_t vlan_offset;
742 	int i;
743 
744 	struct rte_udp_hdr *udp_hdr;
745 	struct rte_tcp_hdr *tcp_hdr;
746 	uint32_t hash, l3hash, l4hash;
747 
748 	for (i = 0; i < nb_pkts; i++) {
749 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *);
750 		size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
751 		proto = eth_hdr->ether_type;
752 		vlan_offset = get_vlan_offset(eth_hdr, &proto);
753 		l3hash = 0;
754 		l4hash = 0;
755 
756 		if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) {
757 			struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *)
758 					((char *)(eth_hdr + 1) + vlan_offset);
759 			size_t ip_hdr_offset;
760 
761 			l3hash = ipv4_hash(ipv4_hdr);
762 
763 			/* there is no L4 header in fragmented packet */
764 			if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
765 								== 0)) {
766 				ip_hdr_offset = (ipv4_hdr->version_ihl
767 					& RTE_IPV4_HDR_IHL_MASK) *
768 					RTE_IPV4_IHL_MULTIPLIER;
769 
770 				if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
771 					tcp_hdr = (struct rte_tcp_hdr *)
772 						((char *)ipv4_hdr +
773 							ip_hdr_offset);
774 					if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
775 							< pkt_end)
776 						l4hash = HASH_L4_PORTS(tcp_hdr);
777 				} else if (ipv4_hdr->next_proto_id ==
778 								IPPROTO_UDP) {
779 					udp_hdr = (struct rte_udp_hdr *)
780 						((char *)ipv4_hdr +
781 							ip_hdr_offset);
782 					if ((size_t)udp_hdr + sizeof(*udp_hdr)
783 							< pkt_end)
784 						l4hash = HASH_L4_PORTS(udp_hdr);
785 				}
786 			}
787 		} else if  (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) {
788 			struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *)
789 					((char *)(eth_hdr + 1) + vlan_offset);
790 			l3hash = ipv6_hash(ipv6_hdr);
791 
792 			if (ipv6_hdr->proto == IPPROTO_TCP) {
793 				tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1);
794 				l4hash = HASH_L4_PORTS(tcp_hdr);
795 			} else if (ipv6_hdr->proto == IPPROTO_UDP) {
796 				udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1);
797 				l4hash = HASH_L4_PORTS(udp_hdr);
798 			}
799 		}
800 
801 		hash = l3hash ^ l4hash;
802 		hash ^= hash >> 16;
803 		hash ^= hash >> 8;
804 
805 		slaves[i] = hash % slave_count;
806 	}
807 }
808 
809 struct bwg_slave {
810 	uint64_t bwg_left_int;
811 	uint64_t bwg_left_remainder;
812 	uint16_t slave;
813 };
814 
815 void
816 bond_tlb_activate_slave(struct bond_dev_private *internals) {
817 	int i;
818 
819 	for (i = 0; i < internals->active_slave_count; i++) {
820 		tlb_last_obytets[internals->active_slaves[i]] = 0;
821 	}
822 }
823 
824 static int
825 bandwidth_cmp(const void *a, const void *b)
826 {
827 	const struct bwg_slave *bwg_a = a;
828 	const struct bwg_slave *bwg_b = b;
829 	int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
830 	int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
831 			(int64_t)bwg_a->bwg_left_remainder;
832 	if (diff > 0)
833 		return 1;
834 	else if (diff < 0)
835 		return -1;
836 	else if (diff2 > 0)
837 		return 1;
838 	else if (diff2 < 0)
839 		return -1;
840 	else
841 		return 0;
842 }
843 
844 static void
845 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
846 		struct bwg_slave *bwg_slave)
847 {
848 	struct rte_eth_link link_status;
849 	int ret;
850 
851 	ret = rte_eth_link_get_nowait(port_id, &link_status);
852 	if (ret < 0) {
853 		RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
854 			     port_id, rte_strerror(-ret));
855 		return;
856 	}
857 	uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
858 	if (link_bwg == 0)
859 		return;
860 	link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
861 	bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
862 	bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
863 }
864 
865 static void
866 bond_ethdev_update_tlb_slave_cb(void *arg)
867 {
868 	struct bond_dev_private *internals = arg;
869 	struct rte_eth_stats slave_stats;
870 	struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
871 	uint16_t slave_count;
872 	uint64_t tx_bytes;
873 
874 	uint8_t update_stats = 0;
875 	uint16_t slave_id;
876 	uint16_t i;
877 
878 	internals->slave_update_idx++;
879 
880 
881 	if (internals->slave_update_idx >= REORDER_PERIOD_MS)
882 		update_stats = 1;
883 
884 	for (i = 0; i < internals->active_slave_count; i++) {
885 		slave_id = internals->active_slaves[i];
886 		rte_eth_stats_get(slave_id, &slave_stats);
887 		tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
888 		bandwidth_left(slave_id, tx_bytes,
889 				internals->slave_update_idx, &bwg_array[i]);
890 		bwg_array[i].slave = slave_id;
891 
892 		if (update_stats) {
893 			tlb_last_obytets[slave_id] = slave_stats.obytes;
894 		}
895 	}
896 
897 	if (update_stats == 1)
898 		internals->slave_update_idx = 0;
899 
900 	slave_count = i;
901 	qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
902 	for (i = 0; i < slave_count; i++)
903 		internals->tlb_slaves_order[i] = bwg_array[i].slave;
904 
905 	rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
906 			(struct bond_dev_private *)internals);
907 }
908 
909 static uint16_t
910 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
911 {
912 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
913 	struct bond_dev_private *internals = bd_tx_q->dev_private;
914 
915 	struct rte_eth_dev *primary_port =
916 			&rte_eth_devices[internals->primary_port];
917 	uint16_t num_tx_total = 0;
918 	uint16_t i, j;
919 
920 	uint16_t num_of_slaves = internals->active_slave_count;
921 	uint16_t slaves[RTE_MAX_ETHPORTS];
922 
923 	struct rte_ether_hdr *ether_hdr;
924 	struct rte_ether_addr primary_slave_addr;
925 	struct rte_ether_addr active_slave_addr;
926 
927 	if (num_of_slaves < 1)
928 		return num_tx_total;
929 
930 	memcpy(slaves, internals->tlb_slaves_order,
931 				sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
932 
933 
934 	rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
935 
936 	if (nb_pkts > 3) {
937 		for (i = 0; i < 3; i++)
938 			rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
939 	}
940 
941 	for (i = 0; i < num_of_slaves; i++) {
942 		rte_eth_macaddr_get(slaves[i], &active_slave_addr);
943 		for (j = num_tx_total; j < nb_pkts; j++) {
944 			if (j + 3 < nb_pkts)
945 				rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
946 
947 			ether_hdr = rte_pktmbuf_mtod(bufs[j],
948 						struct rte_ether_hdr *);
949 			if (rte_is_same_ether_addr(&ether_hdr->s_addr,
950 							&primary_slave_addr))
951 				rte_ether_addr_copy(&active_slave_addr,
952 						&ether_hdr->s_addr);
953 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
954 					mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
955 #endif
956 		}
957 
958 		num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
959 				bufs + num_tx_total, nb_pkts - num_tx_total);
960 
961 		if (num_tx_total == nb_pkts)
962 			break;
963 	}
964 
965 	return num_tx_total;
966 }
967 
968 void
969 bond_tlb_disable(struct bond_dev_private *internals)
970 {
971 	rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
972 }
973 
974 void
975 bond_tlb_enable(struct bond_dev_private *internals)
976 {
977 	bond_ethdev_update_tlb_slave_cb(internals);
978 }
979 
980 static uint16_t
981 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
982 {
983 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
984 	struct bond_dev_private *internals = bd_tx_q->dev_private;
985 
986 	struct rte_ether_hdr *eth_h;
987 	uint16_t ether_type, offset;
988 
989 	struct client_data *client_info;
990 
991 	/*
992 	 * We create transmit buffers for every slave and one additional to send
993 	 * through tlb. In worst case every packet will be send on one port.
994 	 */
995 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
996 	uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
997 
998 	/*
999 	 * We create separate transmit buffers for update packets as they won't
1000 	 * be counted in num_tx_total.
1001 	 */
1002 	struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1003 	uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1004 
1005 	struct rte_mbuf *upd_pkt;
1006 	size_t pkt_size;
1007 
1008 	uint16_t num_send, num_not_send = 0;
1009 	uint16_t num_tx_total = 0;
1010 	uint16_t slave_idx;
1011 
1012 	int i, j;
1013 
1014 	/* Search tx buffer for ARP packets and forward them to alb */
1015 	for (i = 0; i < nb_pkts; i++) {
1016 		eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *);
1017 		ether_type = eth_h->ether_type;
1018 		offset = get_vlan_offset(eth_h, &ether_type);
1019 
1020 		if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
1021 			slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1022 
1023 			/* Change src mac in eth header */
1024 			rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1025 
1026 			/* Add packet to slave tx buffer */
1027 			slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1028 			slave_bufs_pkts[slave_idx]++;
1029 		} else {
1030 			/* If packet is not ARP, send it with TLB policy */
1031 			slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1032 					bufs[i];
1033 			slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1034 		}
1035 	}
1036 
1037 	/* Update connected client ARP tables */
1038 	if (internals->mode6.ntt) {
1039 		for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1040 			client_info = &internals->mode6.client_table[i];
1041 
1042 			if (client_info->in_use) {
1043 				/* Allocate new packet to send ARP update on current slave */
1044 				upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1045 				if (upd_pkt == NULL) {
1046 					RTE_BOND_LOG(ERR,
1047 						     "Failed to allocate ARP packet from pool");
1048 					continue;
1049 				}
1050 				pkt_size = sizeof(struct rte_ether_hdr) +
1051 					sizeof(struct rte_arp_hdr) +
1052 					client_info->vlan_count *
1053 					sizeof(struct rte_vlan_hdr);
1054 				upd_pkt->data_len = pkt_size;
1055 				upd_pkt->pkt_len = pkt_size;
1056 
1057 				slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1058 						internals);
1059 
1060 				/* Add packet to update tx buffer */
1061 				update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1062 				update_bufs_pkts[slave_idx]++;
1063 			}
1064 		}
1065 		internals->mode6.ntt = 0;
1066 	}
1067 
1068 	/* Send ARP packets on proper slaves */
1069 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1070 		if (slave_bufs_pkts[i] > 0) {
1071 			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1072 					slave_bufs[i], slave_bufs_pkts[i]);
1073 			for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1074 				bufs[nb_pkts - 1 - num_not_send - j] =
1075 						slave_bufs[i][nb_pkts - 1 - j];
1076 			}
1077 
1078 			num_tx_total += num_send;
1079 			num_not_send += slave_bufs_pkts[i] - num_send;
1080 
1081 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1082 	/* Print TX stats including update packets */
1083 			for (j = 0; j < slave_bufs_pkts[i]; j++) {
1084 				eth_h = rte_pktmbuf_mtod(slave_bufs[i][j],
1085 							struct rte_ether_hdr *);
1086 				mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1087 			}
1088 #endif
1089 		}
1090 	}
1091 
1092 	/* Send update packets on proper slaves */
1093 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1094 		if (update_bufs_pkts[i] > 0) {
1095 			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1096 					update_bufs_pkts[i]);
1097 			for (j = num_send; j < update_bufs_pkts[i]; j++) {
1098 				rte_pktmbuf_free(update_bufs[i][j]);
1099 			}
1100 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1101 			for (j = 0; j < update_bufs_pkts[i]; j++) {
1102 				eth_h = rte_pktmbuf_mtod(update_bufs[i][j],
1103 							struct rte_ether_hdr *);
1104 				mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1105 			}
1106 #endif
1107 		}
1108 	}
1109 
1110 	/* Send non-ARP packets using tlb policy */
1111 	if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1112 		num_send = bond_ethdev_tx_burst_tlb(queue,
1113 				slave_bufs[RTE_MAX_ETHPORTS],
1114 				slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1115 
1116 		for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1117 			bufs[nb_pkts - 1 - num_not_send - j] =
1118 					slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1119 		}
1120 
1121 		num_tx_total += num_send;
1122 	}
1123 
1124 	return num_tx_total;
1125 }
1126 
1127 static inline uint16_t
1128 tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1129 		 uint16_t *slave_port_ids, uint16_t slave_count)
1130 {
1131 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1132 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1133 
1134 	/* Array to sort mbufs for transmission on each slave into */
1135 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1136 	/* Number of mbufs for transmission on each slave */
1137 	uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1138 	/* Mapping array generated by hash function to map mbufs to slaves */
1139 	uint16_t bufs_slave_port_idxs[nb_bufs];
1140 
1141 	uint16_t slave_tx_count;
1142 	uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1143 
1144 	uint16_t i;
1145 
1146 	/*
1147 	 * Populate slaves mbuf with the packets which are to be sent on it
1148 	 * selecting output slave using hash based on xmit policy
1149 	 */
1150 	internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1151 			bufs_slave_port_idxs);
1152 
1153 	for (i = 0; i < nb_bufs; i++) {
1154 		/* Populate slave mbuf arrays with mbufs for that slave. */
1155 		uint16_t slave_idx = bufs_slave_port_idxs[i];
1156 
1157 		slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1158 	}
1159 
1160 	/* Send packet burst on each slave device */
1161 	for (i = 0; i < slave_count; i++) {
1162 		if (slave_nb_bufs[i] == 0)
1163 			continue;
1164 
1165 		slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1166 				bd_tx_q->queue_id, slave_bufs[i],
1167 				slave_nb_bufs[i]);
1168 
1169 		total_tx_count += slave_tx_count;
1170 
1171 		/* If tx burst fails move packets to end of bufs */
1172 		if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1173 			int slave_tx_fail_count = slave_nb_bufs[i] -
1174 					slave_tx_count;
1175 			total_tx_fail_count += slave_tx_fail_count;
1176 			memcpy(&bufs[nb_bufs - total_tx_fail_count],
1177 			       &slave_bufs[i][slave_tx_count],
1178 			       slave_tx_fail_count * sizeof(bufs[0]));
1179 		}
1180 	}
1181 
1182 	return total_tx_count;
1183 }
1184 
1185 static uint16_t
1186 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1187 		uint16_t nb_bufs)
1188 {
1189 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1190 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1191 
1192 	uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1193 	uint16_t slave_count;
1194 
1195 	if (unlikely(nb_bufs == 0))
1196 		return 0;
1197 
1198 	/* Copy slave list to protect against slave up/down changes during tx
1199 	 * bursting
1200 	 */
1201 	slave_count = internals->active_slave_count;
1202 	if (unlikely(slave_count < 1))
1203 		return 0;
1204 
1205 	memcpy(slave_port_ids, internals->active_slaves,
1206 			sizeof(slave_port_ids[0]) * slave_count);
1207 	return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids,
1208 				slave_count);
1209 }
1210 
1211 static inline uint16_t
1212 tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs,
1213 		bool dedicated_txq)
1214 {
1215 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1216 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1217 
1218 	uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1219 	uint16_t slave_count;
1220 
1221 	uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1222 	uint16_t dist_slave_count;
1223 
1224 	uint16_t slave_tx_count;
1225 
1226 	uint16_t i;
1227 
1228 	/* Copy slave list to protect against slave up/down changes during tx
1229 	 * bursting */
1230 	slave_count = internals->active_slave_count;
1231 	if (unlikely(slave_count < 1))
1232 		return 0;
1233 
1234 	memcpy(slave_port_ids, internals->active_slaves,
1235 			sizeof(slave_port_ids[0]) * slave_count);
1236 
1237 	if (dedicated_txq)
1238 		goto skip_tx_ring;
1239 
1240 	/* Check for LACP control packets and send if available */
1241 	for (i = 0; i < slave_count; i++) {
1242 		struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1243 		struct rte_mbuf *ctrl_pkt = NULL;
1244 
1245 		if (likely(rte_ring_empty(port->tx_ring)))
1246 			continue;
1247 
1248 		if (rte_ring_dequeue(port->tx_ring,
1249 				     (void **)&ctrl_pkt) != -ENOENT) {
1250 			slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1251 					bd_tx_q->queue_id, &ctrl_pkt, 1);
1252 			/*
1253 			 * re-enqueue LAG control plane packets to buffering
1254 			 * ring if transmission fails so the packet isn't lost.
1255 			 */
1256 			if (slave_tx_count != 1)
1257 				rte_ring_enqueue(port->tx_ring,	ctrl_pkt);
1258 		}
1259 	}
1260 
1261 skip_tx_ring:
1262 	if (unlikely(nb_bufs == 0))
1263 		return 0;
1264 
1265 	dist_slave_count = 0;
1266 	for (i = 0; i < slave_count; i++) {
1267 		struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1268 
1269 		if (ACTOR_STATE(port, DISTRIBUTING))
1270 			dist_slave_port_ids[dist_slave_count++] =
1271 					slave_port_ids[i];
1272 	}
1273 
1274 	if (unlikely(dist_slave_count < 1))
1275 		return 0;
1276 
1277 	return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids,
1278 				dist_slave_count);
1279 }
1280 
1281 static uint16_t
1282 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1283 		uint16_t nb_bufs)
1284 {
1285 	return tx_burst_8023ad(queue, bufs, nb_bufs, false);
1286 }
1287 
1288 static uint16_t
1289 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
1290 		uint16_t nb_bufs)
1291 {
1292 	return tx_burst_8023ad(queue, bufs, nb_bufs, true);
1293 }
1294 
1295 static uint16_t
1296 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1297 		uint16_t nb_pkts)
1298 {
1299 	struct bond_dev_private *internals;
1300 	struct bond_tx_queue *bd_tx_q;
1301 
1302 	uint16_t slaves[RTE_MAX_ETHPORTS];
1303 	uint8_t tx_failed_flag = 0;
1304 	uint16_t num_of_slaves;
1305 
1306 	uint16_t max_nb_of_tx_pkts = 0;
1307 
1308 	int slave_tx_total[RTE_MAX_ETHPORTS];
1309 	int i, most_successful_tx_slave = -1;
1310 
1311 	bd_tx_q = (struct bond_tx_queue *)queue;
1312 	internals = bd_tx_q->dev_private;
1313 
1314 	/* Copy slave list to protect against slave up/down changes during tx
1315 	 * bursting */
1316 	num_of_slaves = internals->active_slave_count;
1317 	memcpy(slaves, internals->active_slaves,
1318 			sizeof(internals->active_slaves[0]) * num_of_slaves);
1319 
1320 	if (num_of_slaves < 1)
1321 		return 0;
1322 
1323 	/* Increment reference count on mbufs */
1324 	for (i = 0; i < nb_pkts; i++)
1325 		rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1326 
1327 	/* Transmit burst on each active slave */
1328 	for (i = 0; i < num_of_slaves; i++) {
1329 		slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1330 					bufs, nb_pkts);
1331 
1332 		if (unlikely(slave_tx_total[i] < nb_pkts))
1333 			tx_failed_flag = 1;
1334 
1335 		/* record the value and slave index for the slave which transmits the
1336 		 * maximum number of packets */
1337 		if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1338 			max_nb_of_tx_pkts = slave_tx_total[i];
1339 			most_successful_tx_slave = i;
1340 		}
1341 	}
1342 
1343 	/* if slaves fail to transmit packets from burst, the calling application
1344 	 * is not expected to know about multiple references to packets so we must
1345 	 * handle failures of all packets except those of the most successful slave
1346 	 */
1347 	if (unlikely(tx_failed_flag))
1348 		for (i = 0; i < num_of_slaves; i++)
1349 			if (i != most_successful_tx_slave)
1350 				while (slave_tx_total[i] < nb_pkts)
1351 					rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1352 
1353 	return max_nb_of_tx_pkts;
1354 }
1355 
1356 static void
1357 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1358 {
1359 	struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1360 
1361 	if (bond_ctx->mode == BONDING_MODE_8023AD) {
1362 		/**
1363 		 * If in mode 4 then save the link properties of the first
1364 		 * slave, all subsequent slaves must match these properties
1365 		 */
1366 		struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1367 
1368 		bond_link->link_autoneg = slave_link->link_autoneg;
1369 		bond_link->link_duplex = slave_link->link_duplex;
1370 		bond_link->link_speed = slave_link->link_speed;
1371 	} else {
1372 		/**
1373 		 * In any other mode the link properties are set to default
1374 		 * values of AUTONEG/DUPLEX
1375 		 */
1376 		ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1377 		ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1378 	}
1379 }
1380 
1381 static int
1382 link_properties_valid(struct rte_eth_dev *ethdev,
1383 		struct rte_eth_link *slave_link)
1384 {
1385 	struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1386 
1387 	if (bond_ctx->mode == BONDING_MODE_8023AD) {
1388 		struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1389 
1390 		if (bond_link->link_duplex != slave_link->link_duplex ||
1391 			bond_link->link_autoneg != slave_link->link_autoneg ||
1392 			bond_link->link_speed != slave_link->link_speed)
1393 			return -1;
1394 	}
1395 
1396 	return 0;
1397 }
1398 
1399 int
1400 mac_address_get(struct rte_eth_dev *eth_dev,
1401 		struct rte_ether_addr *dst_mac_addr)
1402 {
1403 	struct rte_ether_addr *mac_addr;
1404 
1405 	if (eth_dev == NULL) {
1406 		RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1407 		return -1;
1408 	}
1409 
1410 	if (dst_mac_addr == NULL) {
1411 		RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1412 		return -1;
1413 	}
1414 
1415 	mac_addr = eth_dev->data->mac_addrs;
1416 
1417 	rte_ether_addr_copy(mac_addr, dst_mac_addr);
1418 	return 0;
1419 }
1420 
1421 int
1422 mac_address_set(struct rte_eth_dev *eth_dev,
1423 		struct rte_ether_addr *new_mac_addr)
1424 {
1425 	struct rte_ether_addr *mac_addr;
1426 
1427 	if (eth_dev == NULL) {
1428 		RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1429 		return -1;
1430 	}
1431 
1432 	if (new_mac_addr == NULL) {
1433 		RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1434 		return -1;
1435 	}
1436 
1437 	mac_addr = eth_dev->data->mac_addrs;
1438 
1439 	/* If new MAC is different to current MAC then update */
1440 	if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1441 		memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1442 
1443 	return 0;
1444 }
1445 
1446 static const struct rte_ether_addr null_mac_addr;
1447 
1448 /*
1449  * Add additional MAC addresses to the slave
1450  */
1451 int
1452 slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1453 		uint16_t slave_port_id)
1454 {
1455 	int i, ret;
1456 	struct rte_ether_addr *mac_addr;
1457 
1458 	for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1459 		mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1460 		if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1461 			break;
1462 
1463 		ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1464 		if (ret < 0) {
1465 			/* rollback */
1466 			for (i--; i > 0; i--)
1467 				rte_eth_dev_mac_addr_remove(slave_port_id,
1468 					&bonded_eth_dev->data->mac_addrs[i]);
1469 			return ret;
1470 		}
1471 	}
1472 
1473 	return 0;
1474 }
1475 
1476 /*
1477  * Remove additional MAC addresses from the slave
1478  */
1479 int
1480 slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1481 		uint16_t slave_port_id)
1482 {
1483 	int i, rc, ret;
1484 	struct rte_ether_addr *mac_addr;
1485 
1486 	rc = 0;
1487 	for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1488 		mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1489 		if (rte_is_same_ether_addr(mac_addr, &null_mac_addr))
1490 			break;
1491 
1492 		ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1493 		/* save only the first error */
1494 		if (ret < 0 && rc == 0)
1495 			rc = ret;
1496 	}
1497 
1498 	return rc;
1499 }
1500 
1501 int
1502 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1503 {
1504 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1505 	bool set;
1506 	int i;
1507 
1508 	/* Update slave devices MAC addresses */
1509 	if (internals->slave_count < 1)
1510 		return -1;
1511 
1512 	switch (internals->mode) {
1513 	case BONDING_MODE_ROUND_ROBIN:
1514 	case BONDING_MODE_BALANCE:
1515 	case BONDING_MODE_BROADCAST:
1516 		for (i = 0; i < internals->slave_count; i++) {
1517 			if (rte_eth_dev_default_mac_addr_set(
1518 					internals->slaves[i].port_id,
1519 					bonded_eth_dev->data->mac_addrs)) {
1520 				RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1521 						internals->slaves[i].port_id);
1522 				return -1;
1523 			}
1524 		}
1525 		break;
1526 	case BONDING_MODE_8023AD:
1527 		bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1528 		break;
1529 	case BONDING_MODE_ACTIVE_BACKUP:
1530 	case BONDING_MODE_TLB:
1531 	case BONDING_MODE_ALB:
1532 	default:
1533 		set = true;
1534 		for (i = 0; i < internals->slave_count; i++) {
1535 			if (internals->slaves[i].port_id ==
1536 					internals->current_primary_port) {
1537 				if (rte_eth_dev_default_mac_addr_set(
1538 						internals->current_primary_port,
1539 						bonded_eth_dev->data->mac_addrs)) {
1540 					RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1541 							internals->current_primary_port);
1542 					set = false;
1543 				}
1544 			} else {
1545 				if (rte_eth_dev_default_mac_addr_set(
1546 						internals->slaves[i].port_id,
1547 						&internals->slaves[i].persisted_mac_addr)) {
1548 					RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1549 							internals->slaves[i].port_id);
1550 				}
1551 			}
1552 		}
1553 		if (!set)
1554 			return -1;
1555 	}
1556 
1557 	return 0;
1558 }
1559 
1560 int
1561 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1562 {
1563 	struct bond_dev_private *internals;
1564 
1565 	internals = eth_dev->data->dev_private;
1566 
1567 	switch (mode) {
1568 	case BONDING_MODE_ROUND_ROBIN:
1569 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1570 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1571 		break;
1572 	case BONDING_MODE_ACTIVE_BACKUP:
1573 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1574 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1575 		break;
1576 	case BONDING_MODE_BALANCE:
1577 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1578 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1579 		break;
1580 	case BONDING_MODE_BROADCAST:
1581 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1582 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1583 		break;
1584 	case BONDING_MODE_8023AD:
1585 		if (bond_mode_8023ad_enable(eth_dev) != 0)
1586 			return -1;
1587 
1588 		if (internals->mode4.dedicated_queues.enabled == 0) {
1589 			eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1590 			eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1591 			RTE_BOND_LOG(WARNING,
1592 				"Using mode 4, it is necessary to do TX burst "
1593 				"and RX burst at least every 100ms.");
1594 		} else {
1595 			/* Use flow director's optimization */
1596 			eth_dev->rx_pkt_burst =
1597 					bond_ethdev_rx_burst_8023ad_fast_queue;
1598 			eth_dev->tx_pkt_burst =
1599 					bond_ethdev_tx_burst_8023ad_fast_queue;
1600 		}
1601 		break;
1602 	case BONDING_MODE_TLB:
1603 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1604 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1605 		break;
1606 	case BONDING_MODE_ALB:
1607 		if (bond_mode_alb_enable(eth_dev) != 0)
1608 			return -1;
1609 
1610 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1611 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1612 		break;
1613 	default:
1614 		return -1;
1615 	}
1616 
1617 	internals->mode = mode;
1618 
1619 	return 0;
1620 }
1621 
1622 
1623 static int
1624 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1625 		struct rte_eth_dev *slave_eth_dev)
1626 {
1627 	int errval = 0;
1628 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1629 	struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
1630 
1631 	if (port->slow_pool == NULL) {
1632 		char mem_name[256];
1633 		int slave_id = slave_eth_dev->data->port_id;
1634 
1635 		snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1636 				slave_id);
1637 		port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1638 			250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1639 			slave_eth_dev->data->numa_node);
1640 
1641 		/* Any memory allocation failure in initialization is critical because
1642 		 * resources can't be free, so reinitialization is impossible. */
1643 		if (port->slow_pool == NULL) {
1644 			rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1645 				slave_id, mem_name, rte_strerror(rte_errno));
1646 		}
1647 	}
1648 
1649 	if (internals->mode4.dedicated_queues.enabled == 1) {
1650 		/* Configure slow Rx queue */
1651 
1652 		errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1653 				internals->mode4.dedicated_queues.rx_qid, 128,
1654 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1655 				NULL, port->slow_pool);
1656 		if (errval != 0) {
1657 			RTE_BOND_LOG(ERR,
1658 					"rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1659 					slave_eth_dev->data->port_id,
1660 					internals->mode4.dedicated_queues.rx_qid,
1661 					errval);
1662 			return errval;
1663 		}
1664 
1665 		errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1666 				internals->mode4.dedicated_queues.tx_qid, 512,
1667 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1668 				NULL);
1669 		if (errval != 0) {
1670 			RTE_BOND_LOG(ERR,
1671 				"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1672 				slave_eth_dev->data->port_id,
1673 				internals->mode4.dedicated_queues.tx_qid,
1674 				errval);
1675 			return errval;
1676 		}
1677 	}
1678 	return 0;
1679 }
1680 
1681 int
1682 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1683 		struct rte_eth_dev *slave_eth_dev)
1684 {
1685 	struct bond_rx_queue *bd_rx_q;
1686 	struct bond_tx_queue *bd_tx_q;
1687 	uint16_t nb_rx_queues;
1688 	uint16_t nb_tx_queues;
1689 
1690 	int errval;
1691 	uint16_t q_id;
1692 	struct rte_flow_error flow_error;
1693 
1694 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1695 
1696 	/* Stop slave */
1697 	errval = rte_eth_dev_stop(slave_eth_dev->data->port_id);
1698 	if (errval != 0)
1699 		RTE_BOND_LOG(ERR, "rte_eth_dev_stop: port %u, err (%d)",
1700 			     slave_eth_dev->data->port_id, errval);
1701 
1702 	/* Enable interrupts on slave device if supported */
1703 	if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1704 		slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1705 
1706 	/* If RSS is enabled for bonding, try to enable it for slaves  */
1707 	if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1708 		if (internals->rss_key_len != 0) {
1709 			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1710 					internals->rss_key_len;
1711 			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1712 					internals->rss_key;
1713 		} else {
1714 			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1715 		}
1716 
1717 		slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1718 				bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1719 		slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1720 				bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1721 	}
1722 
1723 	if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1724 			DEV_RX_OFFLOAD_VLAN_FILTER)
1725 		slave_eth_dev->data->dev_conf.rxmode.offloads |=
1726 				DEV_RX_OFFLOAD_VLAN_FILTER;
1727 	else
1728 		slave_eth_dev->data->dev_conf.rxmode.offloads &=
1729 				~DEV_RX_OFFLOAD_VLAN_FILTER;
1730 
1731 	slave_eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1732 			bonded_eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
1733 
1734 	if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1735 			DEV_RX_OFFLOAD_JUMBO_FRAME)
1736 		slave_eth_dev->data->dev_conf.rxmode.offloads |=
1737 				DEV_RX_OFFLOAD_JUMBO_FRAME;
1738 	else
1739 		slave_eth_dev->data->dev_conf.rxmode.offloads &=
1740 				~DEV_RX_OFFLOAD_JUMBO_FRAME;
1741 
1742 	nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1743 	nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1744 
1745 	if (internals->mode == BONDING_MODE_8023AD) {
1746 		if (internals->mode4.dedicated_queues.enabled == 1) {
1747 			nb_rx_queues++;
1748 			nb_tx_queues++;
1749 		}
1750 	}
1751 
1752 	errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1753 				     bonded_eth_dev->data->mtu);
1754 	if (errval != 0 && errval != -ENOTSUP) {
1755 		RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1756 				slave_eth_dev->data->port_id, errval);
1757 		return errval;
1758 	}
1759 
1760 	/* Configure device */
1761 	errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1762 			nb_rx_queues, nb_tx_queues,
1763 			&(slave_eth_dev->data->dev_conf));
1764 	if (errval != 0) {
1765 		RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
1766 				slave_eth_dev->data->port_id, errval);
1767 		return errval;
1768 	}
1769 
1770 	/* Setup Rx Queues */
1771 	for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1772 		bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1773 
1774 		errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1775 				bd_rx_q->nb_rx_desc,
1776 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1777 				&(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1778 		if (errval != 0) {
1779 			RTE_BOND_LOG(ERR,
1780 					"rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1781 					slave_eth_dev->data->port_id, q_id, errval);
1782 			return errval;
1783 		}
1784 	}
1785 
1786 	/* Setup Tx Queues */
1787 	for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1788 		bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1789 
1790 		errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1791 				bd_tx_q->nb_tx_desc,
1792 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1793 				&bd_tx_q->tx_conf);
1794 		if (errval != 0) {
1795 			RTE_BOND_LOG(ERR,
1796 				"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1797 				slave_eth_dev->data->port_id, q_id, errval);
1798 			return errval;
1799 		}
1800 	}
1801 
1802 	if (internals->mode == BONDING_MODE_8023AD &&
1803 			internals->mode4.dedicated_queues.enabled == 1) {
1804 		if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1805 				!= 0)
1806 			return errval;
1807 
1808 		if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1809 				slave_eth_dev->data->port_id) != 0) {
1810 			RTE_BOND_LOG(ERR,
1811 				"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1812 				slave_eth_dev->data->port_id, q_id, errval);
1813 			return -1;
1814 		}
1815 
1816 		if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1817 			rte_flow_destroy(slave_eth_dev->data->port_id,
1818 					internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1819 					&flow_error);
1820 
1821 		bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1822 				slave_eth_dev->data->port_id);
1823 	}
1824 
1825 	/* Start device */
1826 	errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1827 	if (errval != 0) {
1828 		RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1829 				slave_eth_dev->data->port_id, errval);
1830 		return -1;
1831 	}
1832 
1833 	/* If RSS is enabled for bonding, synchronize RETA */
1834 	if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1835 		int i;
1836 		struct bond_dev_private *internals;
1837 
1838 		internals = bonded_eth_dev->data->dev_private;
1839 
1840 		for (i = 0; i < internals->slave_count; i++) {
1841 			if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1842 				errval = rte_eth_dev_rss_reta_update(
1843 						slave_eth_dev->data->port_id,
1844 						&internals->reta_conf[0],
1845 						internals->slaves[i].reta_size);
1846 				if (errval != 0) {
1847 					RTE_BOND_LOG(WARNING,
1848 						     "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1849 						     " RSS Configuration for bonding may be inconsistent.",
1850 						     slave_eth_dev->data->port_id, errval);
1851 				}
1852 				break;
1853 			}
1854 		}
1855 	}
1856 
1857 	/* If lsc interrupt is set, check initial slave's link status */
1858 	if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1859 		slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1860 		bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1861 			RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1862 			NULL);
1863 	}
1864 
1865 	return 0;
1866 }
1867 
1868 void
1869 slave_remove(struct bond_dev_private *internals,
1870 		struct rte_eth_dev *slave_eth_dev)
1871 {
1872 	uint16_t i;
1873 
1874 	for (i = 0; i < internals->slave_count; i++)
1875 		if (internals->slaves[i].port_id ==
1876 				slave_eth_dev->data->port_id)
1877 			break;
1878 
1879 	if (i < (internals->slave_count - 1)) {
1880 		struct rte_flow *flow;
1881 
1882 		memmove(&internals->slaves[i], &internals->slaves[i + 1],
1883 				sizeof(internals->slaves[0]) *
1884 				(internals->slave_count - i - 1));
1885 		TAILQ_FOREACH(flow, &internals->flow_list, next) {
1886 			memmove(&flow->flows[i], &flow->flows[i + 1],
1887 				sizeof(flow->flows[0]) *
1888 				(internals->slave_count - i - 1));
1889 			flow->flows[internals->slave_count - 1] = NULL;
1890 		}
1891 	}
1892 
1893 	internals->slave_count--;
1894 
1895 	/* force reconfiguration of slave interfaces */
1896 	rte_eth_dev_internal_reset(slave_eth_dev);
1897 }
1898 
1899 static void
1900 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1901 
1902 void
1903 slave_add(struct bond_dev_private *internals,
1904 		struct rte_eth_dev *slave_eth_dev)
1905 {
1906 	struct bond_slave_details *slave_details =
1907 			&internals->slaves[internals->slave_count];
1908 
1909 	slave_details->port_id = slave_eth_dev->data->port_id;
1910 	slave_details->last_link_status = 0;
1911 
1912 	/* Mark slave devices that don't support interrupts so we can
1913 	 * compensate when we start the bond
1914 	 */
1915 	if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1916 		slave_details->link_status_poll_enabled = 1;
1917 	}
1918 
1919 	slave_details->link_status_wait_to_complete = 0;
1920 	/* clean tlb_last_obytes when adding port for bonding device */
1921 	memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1922 			sizeof(struct rte_ether_addr));
1923 }
1924 
1925 void
1926 bond_ethdev_primary_set(struct bond_dev_private *internals,
1927 		uint16_t slave_port_id)
1928 {
1929 	int i;
1930 
1931 	if (internals->active_slave_count < 1)
1932 		internals->current_primary_port = slave_port_id;
1933 	else
1934 		/* Search bonded device slave ports for new proposed primary port */
1935 		for (i = 0; i < internals->active_slave_count; i++) {
1936 			if (internals->active_slaves[i] == slave_port_id)
1937 				internals->current_primary_port = slave_port_id;
1938 		}
1939 }
1940 
1941 static int
1942 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1943 
1944 static int
1945 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1946 {
1947 	struct bond_dev_private *internals;
1948 	int i;
1949 
1950 	/* slave eth dev will be started by bonded device */
1951 	if (check_for_bonded_ethdev(eth_dev)) {
1952 		RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1953 				eth_dev->data->port_id);
1954 		return -1;
1955 	}
1956 
1957 	eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1958 	eth_dev->data->dev_started = 1;
1959 
1960 	internals = eth_dev->data->dev_private;
1961 
1962 	if (internals->slave_count == 0) {
1963 		RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1964 		goto out_err;
1965 	}
1966 
1967 	if (internals->user_defined_mac == 0) {
1968 		struct rte_ether_addr *new_mac_addr = NULL;
1969 
1970 		for (i = 0; i < internals->slave_count; i++)
1971 			if (internals->slaves[i].port_id == internals->primary_port)
1972 				new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1973 
1974 		if (new_mac_addr == NULL)
1975 			goto out_err;
1976 
1977 		if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1978 			RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1979 					eth_dev->data->port_id);
1980 			goto out_err;
1981 		}
1982 	}
1983 
1984 	if (internals->mode == BONDING_MODE_8023AD) {
1985 		if (internals->mode4.dedicated_queues.enabled == 1) {
1986 			internals->mode4.dedicated_queues.rx_qid =
1987 					eth_dev->data->nb_rx_queues;
1988 			internals->mode4.dedicated_queues.tx_qid =
1989 					eth_dev->data->nb_tx_queues;
1990 		}
1991 	}
1992 
1993 
1994 	/* Reconfigure each slave device if starting bonded device */
1995 	for (i = 0; i < internals->slave_count; i++) {
1996 		struct rte_eth_dev *slave_ethdev =
1997 				&(rte_eth_devices[internals->slaves[i].port_id]);
1998 		if (slave_configure(eth_dev, slave_ethdev) != 0) {
1999 			RTE_BOND_LOG(ERR,
2000 				"bonded port (%d) failed to reconfigure slave device (%d)",
2001 				eth_dev->data->port_id,
2002 				internals->slaves[i].port_id);
2003 			goto out_err;
2004 		}
2005 		/* We will need to poll for link status if any slave doesn't
2006 		 * support interrupts
2007 		 */
2008 		if (internals->slaves[i].link_status_poll_enabled)
2009 			internals->link_status_polling_enabled = 1;
2010 	}
2011 
2012 	/* start polling if needed */
2013 	if (internals->link_status_polling_enabled) {
2014 		rte_eal_alarm_set(
2015 			internals->link_status_polling_interval_ms * 1000,
2016 			bond_ethdev_slave_link_status_change_monitor,
2017 			(void *)&rte_eth_devices[internals->port_id]);
2018 	}
2019 
2020 	/* Update all slave devices MACs*/
2021 	if (mac_address_slaves_update(eth_dev) != 0)
2022 		goto out_err;
2023 
2024 	if (internals->user_defined_primary_port)
2025 		bond_ethdev_primary_set(internals, internals->primary_port);
2026 
2027 	if (internals->mode == BONDING_MODE_8023AD)
2028 		bond_mode_8023ad_start(eth_dev);
2029 
2030 	if (internals->mode == BONDING_MODE_TLB ||
2031 			internals->mode == BONDING_MODE_ALB)
2032 		bond_tlb_enable(internals);
2033 
2034 	return 0;
2035 
2036 out_err:
2037 	eth_dev->data->dev_started = 0;
2038 	return -1;
2039 }
2040 
2041 static void
2042 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2043 {
2044 	uint16_t i;
2045 
2046 	if (dev->data->rx_queues != NULL) {
2047 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
2048 			rte_free(dev->data->rx_queues[i]);
2049 			dev->data->rx_queues[i] = NULL;
2050 		}
2051 		dev->data->nb_rx_queues = 0;
2052 	}
2053 
2054 	if (dev->data->tx_queues != NULL) {
2055 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
2056 			rte_free(dev->data->tx_queues[i]);
2057 			dev->data->tx_queues[i] = NULL;
2058 		}
2059 		dev->data->nb_tx_queues = 0;
2060 	}
2061 }
2062 
2063 int
2064 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2065 {
2066 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2067 	uint16_t i;
2068 	int ret;
2069 
2070 	if (internals->mode == BONDING_MODE_8023AD) {
2071 		struct port *port;
2072 		void *pkt = NULL;
2073 
2074 		bond_mode_8023ad_stop(eth_dev);
2075 
2076 		/* Discard all messages to/from mode 4 state machines */
2077 		for (i = 0; i < internals->active_slave_count; i++) {
2078 			port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
2079 
2080 			RTE_ASSERT(port->rx_ring != NULL);
2081 			while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2082 				rte_pktmbuf_free(pkt);
2083 
2084 			RTE_ASSERT(port->tx_ring != NULL);
2085 			while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2086 				rte_pktmbuf_free(pkt);
2087 		}
2088 	}
2089 
2090 	if (internals->mode == BONDING_MODE_TLB ||
2091 			internals->mode == BONDING_MODE_ALB) {
2092 		bond_tlb_disable(internals);
2093 		for (i = 0; i < internals->active_slave_count; i++)
2094 			tlb_last_obytets[internals->active_slaves[i]] = 0;
2095 	}
2096 
2097 	eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2098 	eth_dev->data->dev_started = 0;
2099 
2100 	internals->link_status_polling_enabled = 0;
2101 	for (i = 0; i < internals->slave_count; i++) {
2102 		uint16_t slave_id = internals->slaves[i].port_id;
2103 		if (find_slave_by_id(internals->active_slaves,
2104 				internals->active_slave_count, slave_id) !=
2105 						internals->active_slave_count) {
2106 			internals->slaves[i].last_link_status = 0;
2107 			ret = rte_eth_dev_stop(slave_id);
2108 			if (ret != 0) {
2109 				RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2110 					     slave_id);
2111 				return ret;
2112 			}
2113 			deactivate_slave(eth_dev, slave_id);
2114 		}
2115 	}
2116 
2117 	return 0;
2118 }
2119 
2120 int
2121 bond_ethdev_close(struct rte_eth_dev *dev)
2122 {
2123 	struct bond_dev_private *internals = dev->data->dev_private;
2124 	uint16_t bond_port_id = internals->port_id;
2125 	int skipped = 0;
2126 	struct rte_flow_error ferror;
2127 
2128 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2129 		return 0;
2130 
2131 	RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2132 	while (internals->slave_count != skipped) {
2133 		uint16_t port_id = internals->slaves[skipped].port_id;
2134 
2135 		if (rte_eth_dev_stop(port_id) != 0) {
2136 			RTE_BOND_LOG(ERR, "Failed to stop device on port %u",
2137 				     port_id);
2138 			skipped++;
2139 		}
2140 
2141 		if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2142 			RTE_BOND_LOG(ERR,
2143 				     "Failed to remove port %d from bonded device %s",
2144 				     port_id, dev->device->name);
2145 			skipped++;
2146 		}
2147 	}
2148 	bond_flow_ops.flush(dev, &ferror);
2149 	bond_ethdev_free_queues(dev);
2150 	rte_bitmap_reset(internals->vlan_filter_bmp);
2151 	rte_bitmap_free(internals->vlan_filter_bmp);
2152 	rte_free(internals->vlan_filter_bmpmem);
2153 
2154 	/* Try to release mempool used in mode6. If the bond
2155 	 * device is not mode6, free the NULL is not problem.
2156 	 */
2157 	rte_mempool_free(internals->mode6.mempool);
2158 
2159 	return 0;
2160 }
2161 
2162 /* forward declaration */
2163 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2164 
2165 static int
2166 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2167 {
2168 	struct bond_dev_private *internals = dev->data->dev_private;
2169 	struct bond_slave_details slave;
2170 	int ret;
2171 
2172 	uint16_t max_nb_rx_queues = UINT16_MAX;
2173 	uint16_t max_nb_tx_queues = UINT16_MAX;
2174 	uint16_t max_rx_desc_lim = UINT16_MAX;
2175 	uint16_t max_tx_desc_lim = UINT16_MAX;
2176 
2177 	dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
2178 
2179 	dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2180 			internals->candidate_max_rx_pktlen :
2181 			RTE_ETHER_MAX_JUMBO_FRAME_LEN;
2182 
2183 	/* Max number of tx/rx queues that the bonded device can support is the
2184 	 * minimum values of the bonded slaves, as all slaves must be capable
2185 	 * of supporting the same number of tx/rx queues.
2186 	 */
2187 	if (internals->slave_count > 0) {
2188 		struct rte_eth_dev_info slave_info;
2189 		uint16_t idx;
2190 
2191 		for (idx = 0; idx < internals->slave_count; idx++) {
2192 			slave = internals->slaves[idx];
2193 			ret = rte_eth_dev_info_get(slave.port_id, &slave_info);
2194 			if (ret != 0) {
2195 				RTE_BOND_LOG(ERR,
2196 					"%s: Error during getting device (port %u) info: %s\n",
2197 					__func__,
2198 					slave.port_id,
2199 					strerror(-ret));
2200 
2201 				return ret;
2202 			}
2203 
2204 			if (slave_info.max_rx_queues < max_nb_rx_queues)
2205 				max_nb_rx_queues = slave_info.max_rx_queues;
2206 
2207 			if (slave_info.max_tx_queues < max_nb_tx_queues)
2208 				max_nb_tx_queues = slave_info.max_tx_queues;
2209 
2210 			if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2211 				max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2212 
2213 			if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2214 				max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
2215 		}
2216 	}
2217 
2218 	dev_info->max_rx_queues = max_nb_rx_queues;
2219 	dev_info->max_tx_queues = max_nb_tx_queues;
2220 
2221 	memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2222 	       sizeof(dev_info->default_rxconf));
2223 	memcpy(&dev_info->default_txconf, &internals->default_txconf,
2224 	       sizeof(dev_info->default_txconf));
2225 
2226 	dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2227 	dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2228 
2229 	/**
2230 	 * If dedicated hw queues enabled for link bonding device in LACP mode
2231 	 * then we need to reduce the maximum number of data path queues by 1.
2232 	 */
2233 	if (internals->mode == BONDING_MODE_8023AD &&
2234 		internals->mode4.dedicated_queues.enabled == 1) {
2235 		dev_info->max_rx_queues--;
2236 		dev_info->max_tx_queues--;
2237 	}
2238 
2239 	dev_info->min_rx_bufsize = 0;
2240 
2241 	dev_info->rx_offload_capa = internals->rx_offload_capa;
2242 	dev_info->tx_offload_capa = internals->tx_offload_capa;
2243 	dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2244 	dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2245 	dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2246 
2247 	dev_info->reta_size = internals->reta_size;
2248 
2249 	return 0;
2250 }
2251 
2252 static int
2253 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2254 {
2255 	int res;
2256 	uint16_t i;
2257 	struct bond_dev_private *internals = dev->data->dev_private;
2258 
2259 	/* don't do this while a slave is being added */
2260 	rte_spinlock_lock(&internals->lock);
2261 
2262 	if (on)
2263 		rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2264 	else
2265 		rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2266 
2267 	for (i = 0; i < internals->slave_count; i++) {
2268 		uint16_t port_id = internals->slaves[i].port_id;
2269 
2270 		res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2271 		if (res == ENOTSUP)
2272 			RTE_BOND_LOG(WARNING,
2273 				     "Setting VLAN filter on slave port %u not supported.",
2274 				     port_id);
2275 	}
2276 
2277 	rte_spinlock_unlock(&internals->lock);
2278 	return 0;
2279 }
2280 
2281 static int
2282 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2283 		uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2284 		const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2285 {
2286 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2287 			rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2288 					0, dev->data->numa_node);
2289 	if (bd_rx_q == NULL)
2290 		return -1;
2291 
2292 	bd_rx_q->queue_id = rx_queue_id;
2293 	bd_rx_q->dev_private = dev->data->dev_private;
2294 
2295 	bd_rx_q->nb_rx_desc = nb_rx_desc;
2296 
2297 	memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2298 	bd_rx_q->mb_pool = mb_pool;
2299 
2300 	dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2301 
2302 	return 0;
2303 }
2304 
2305 static int
2306 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2307 		uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2308 		const struct rte_eth_txconf *tx_conf)
2309 {
2310 	struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2311 			rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2312 					0, dev->data->numa_node);
2313 
2314 	if (bd_tx_q == NULL)
2315 		return -1;
2316 
2317 	bd_tx_q->queue_id = tx_queue_id;
2318 	bd_tx_q->dev_private = dev->data->dev_private;
2319 
2320 	bd_tx_q->nb_tx_desc = nb_tx_desc;
2321 	memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2322 
2323 	dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2324 
2325 	return 0;
2326 }
2327 
2328 static void
2329 bond_ethdev_rx_queue_release(void *queue)
2330 {
2331 	if (queue == NULL)
2332 		return;
2333 
2334 	rte_free(queue);
2335 }
2336 
2337 static void
2338 bond_ethdev_tx_queue_release(void *queue)
2339 {
2340 	if (queue == NULL)
2341 		return;
2342 
2343 	rte_free(queue);
2344 }
2345 
2346 static void
2347 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2348 {
2349 	struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2350 	struct bond_dev_private *internals;
2351 
2352 	/* Default value for polling slave found is true as we don't want to
2353 	 * disable the polling thread if we cannot get the lock */
2354 	int i, polling_slave_found = 1;
2355 
2356 	if (cb_arg == NULL)
2357 		return;
2358 
2359 	bonded_ethdev = cb_arg;
2360 	internals = bonded_ethdev->data->dev_private;
2361 
2362 	if (!bonded_ethdev->data->dev_started ||
2363 		!internals->link_status_polling_enabled)
2364 		return;
2365 
2366 	/* If device is currently being configured then don't check slaves link
2367 	 * status, wait until next period */
2368 	if (rte_spinlock_trylock(&internals->lock)) {
2369 		if (internals->slave_count > 0)
2370 			polling_slave_found = 0;
2371 
2372 		for (i = 0; i < internals->slave_count; i++) {
2373 			if (!internals->slaves[i].link_status_poll_enabled)
2374 				continue;
2375 
2376 			slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2377 			polling_slave_found = 1;
2378 
2379 			/* Update slave link status */
2380 			(*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2381 					internals->slaves[i].link_status_wait_to_complete);
2382 
2383 			/* if link status has changed since last checked then call lsc
2384 			 * event callback */
2385 			if (slave_ethdev->data->dev_link.link_status !=
2386 					internals->slaves[i].last_link_status) {
2387 				internals->slaves[i].last_link_status =
2388 						slave_ethdev->data->dev_link.link_status;
2389 
2390 				bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2391 						RTE_ETH_EVENT_INTR_LSC,
2392 						&bonded_ethdev->data->port_id,
2393 						NULL);
2394 			}
2395 		}
2396 		rte_spinlock_unlock(&internals->lock);
2397 	}
2398 
2399 	if (polling_slave_found)
2400 		/* Set alarm to continue monitoring link status of slave ethdev's */
2401 		rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2402 				bond_ethdev_slave_link_status_change_monitor, cb_arg);
2403 }
2404 
2405 static int
2406 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2407 {
2408 	int (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2409 
2410 	struct bond_dev_private *bond_ctx;
2411 	struct rte_eth_link slave_link;
2412 
2413 	bool one_link_update_succeeded;
2414 	uint32_t idx;
2415 	int ret;
2416 
2417 	bond_ctx = ethdev->data->dev_private;
2418 
2419 	ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2420 
2421 	if (ethdev->data->dev_started == 0 ||
2422 			bond_ctx->active_slave_count == 0) {
2423 		ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2424 		return 0;
2425 	}
2426 
2427 	ethdev->data->dev_link.link_status = ETH_LINK_UP;
2428 
2429 	if (wait_to_complete)
2430 		link_update = rte_eth_link_get;
2431 	else
2432 		link_update = rte_eth_link_get_nowait;
2433 
2434 	switch (bond_ctx->mode) {
2435 	case BONDING_MODE_BROADCAST:
2436 		/**
2437 		 * Setting link speed to UINT32_MAX to ensure we pick up the
2438 		 * value of the first active slave
2439 		 */
2440 		ethdev->data->dev_link.link_speed = UINT32_MAX;
2441 
2442 		/**
2443 		 * link speed is minimum value of all the slaves link speed as
2444 		 * packet loss will occur on this slave if transmission at rates
2445 		 * greater than this are attempted
2446 		 */
2447 		for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2448 			ret = link_update(bond_ctx->active_slaves[idx],
2449 					  &slave_link);
2450 			if (ret < 0) {
2451 				ethdev->data->dev_link.link_speed =
2452 					ETH_SPEED_NUM_NONE;
2453 				RTE_BOND_LOG(ERR,
2454 					"Slave (port %u) link get failed: %s",
2455 					bond_ctx->active_slaves[idx],
2456 					rte_strerror(-ret));
2457 				return 0;
2458 			}
2459 
2460 			if (slave_link.link_speed <
2461 					ethdev->data->dev_link.link_speed)
2462 				ethdev->data->dev_link.link_speed =
2463 						slave_link.link_speed;
2464 		}
2465 		break;
2466 	case BONDING_MODE_ACTIVE_BACKUP:
2467 		/* Current primary slave */
2468 		ret = link_update(bond_ctx->current_primary_port, &slave_link);
2469 		if (ret < 0) {
2470 			RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s",
2471 				bond_ctx->current_primary_port,
2472 				rte_strerror(-ret));
2473 			return 0;
2474 		}
2475 
2476 		ethdev->data->dev_link.link_speed = slave_link.link_speed;
2477 		break;
2478 	case BONDING_MODE_8023AD:
2479 		ethdev->data->dev_link.link_autoneg =
2480 				bond_ctx->mode4.slave_link.link_autoneg;
2481 		ethdev->data->dev_link.link_duplex =
2482 				bond_ctx->mode4.slave_link.link_duplex;
2483 		/* fall through */
2484 		/* to update link speed */
2485 	case BONDING_MODE_ROUND_ROBIN:
2486 	case BONDING_MODE_BALANCE:
2487 	case BONDING_MODE_TLB:
2488 	case BONDING_MODE_ALB:
2489 	default:
2490 		/**
2491 		 * In theses mode the maximum theoretical link speed is the sum
2492 		 * of all the slaves
2493 		 */
2494 		ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2495 		one_link_update_succeeded = false;
2496 
2497 		for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2498 			ret = link_update(bond_ctx->active_slaves[idx],
2499 					&slave_link);
2500 			if (ret < 0) {
2501 				RTE_BOND_LOG(ERR,
2502 					"Slave (port %u) link get failed: %s",
2503 					bond_ctx->active_slaves[idx],
2504 					rte_strerror(-ret));
2505 				continue;
2506 			}
2507 
2508 			one_link_update_succeeded = true;
2509 			ethdev->data->dev_link.link_speed +=
2510 					slave_link.link_speed;
2511 		}
2512 
2513 		if (!one_link_update_succeeded) {
2514 			RTE_BOND_LOG(ERR, "All slaves link get failed");
2515 			return 0;
2516 		}
2517 	}
2518 
2519 
2520 	return 0;
2521 }
2522 
2523 
2524 static int
2525 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2526 {
2527 	struct bond_dev_private *internals = dev->data->dev_private;
2528 	struct rte_eth_stats slave_stats;
2529 	int i, j;
2530 
2531 	for (i = 0; i < internals->slave_count; i++) {
2532 		rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2533 
2534 		stats->ipackets += slave_stats.ipackets;
2535 		stats->opackets += slave_stats.opackets;
2536 		stats->ibytes += slave_stats.ibytes;
2537 		stats->obytes += slave_stats.obytes;
2538 		stats->imissed += slave_stats.imissed;
2539 		stats->ierrors += slave_stats.ierrors;
2540 		stats->oerrors += slave_stats.oerrors;
2541 		stats->rx_nombuf += slave_stats.rx_nombuf;
2542 
2543 		for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2544 			stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2545 			stats->q_opackets[j] += slave_stats.q_opackets[j];
2546 			stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2547 			stats->q_obytes[j] += slave_stats.q_obytes[j];
2548 			stats->q_errors[j] += slave_stats.q_errors[j];
2549 		}
2550 
2551 	}
2552 
2553 	return 0;
2554 }
2555 
2556 static int
2557 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2558 {
2559 	struct bond_dev_private *internals = dev->data->dev_private;
2560 	int i;
2561 	int err;
2562 	int ret;
2563 
2564 	for (i = 0, err = 0; i < internals->slave_count; i++) {
2565 		ret = rte_eth_stats_reset(internals->slaves[i].port_id);
2566 		if (ret != 0)
2567 			err = ret;
2568 	}
2569 
2570 	return err;
2571 }
2572 
2573 static int
2574 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2575 {
2576 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2577 	int i;
2578 	int ret = 0;
2579 	uint16_t port_id;
2580 
2581 	switch (internals->mode) {
2582 	/* Promiscuous mode is propagated to all slaves */
2583 	case BONDING_MODE_ROUND_ROBIN:
2584 	case BONDING_MODE_BALANCE:
2585 	case BONDING_MODE_BROADCAST:
2586 	case BONDING_MODE_8023AD: {
2587 		unsigned int slave_ok = 0;
2588 
2589 		for (i = 0; i < internals->slave_count; i++) {
2590 			port_id = internals->slaves[i].port_id;
2591 
2592 			ret = rte_eth_promiscuous_enable(port_id);
2593 			if (ret != 0)
2594 				RTE_BOND_LOG(ERR,
2595 					"Failed to enable promiscuous mode for port %u: %s",
2596 					port_id, rte_strerror(-ret));
2597 			else
2598 				slave_ok++;
2599 		}
2600 		/*
2601 		 * Report success if operation is successful on at least
2602 		 * on one slave. Otherwise return last error code.
2603 		 */
2604 		if (slave_ok > 0)
2605 			ret = 0;
2606 		break;
2607 	}
2608 	/* Promiscuous mode is propagated only to primary slave */
2609 	case BONDING_MODE_ACTIVE_BACKUP:
2610 	case BONDING_MODE_TLB:
2611 	case BONDING_MODE_ALB:
2612 	default:
2613 		/* Do not touch promisc when there cannot be primary ports */
2614 		if (internals->slave_count == 0)
2615 			break;
2616 		port_id = internals->current_primary_port;
2617 		ret = rte_eth_promiscuous_enable(port_id);
2618 		if (ret != 0)
2619 			RTE_BOND_LOG(ERR,
2620 				"Failed to enable promiscuous mode for port %u: %s",
2621 				port_id, rte_strerror(-ret));
2622 	}
2623 
2624 	return ret;
2625 }
2626 
2627 static int
2628 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2629 {
2630 	struct bond_dev_private *internals = dev->data->dev_private;
2631 	int i;
2632 	int ret = 0;
2633 	uint16_t port_id;
2634 
2635 	switch (internals->mode) {
2636 	/* Promiscuous mode is propagated to all slaves */
2637 	case BONDING_MODE_ROUND_ROBIN:
2638 	case BONDING_MODE_BALANCE:
2639 	case BONDING_MODE_BROADCAST:
2640 	case BONDING_MODE_8023AD: {
2641 		unsigned int slave_ok = 0;
2642 
2643 		for (i = 0; i < internals->slave_count; i++) {
2644 			port_id = internals->slaves[i].port_id;
2645 
2646 			if (internals->mode == BONDING_MODE_8023AD &&
2647 			    bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2648 					BOND_8023AD_FORCED_PROMISC) {
2649 				slave_ok++;
2650 				continue;
2651 			}
2652 			ret = rte_eth_promiscuous_disable(port_id);
2653 			if (ret != 0)
2654 				RTE_BOND_LOG(ERR,
2655 					"Failed to disable promiscuous mode for port %u: %s",
2656 					port_id, rte_strerror(-ret));
2657 			else
2658 				slave_ok++;
2659 		}
2660 		/*
2661 		 * Report success if operation is successful on at least
2662 		 * on one slave. Otherwise return last error code.
2663 		 */
2664 		if (slave_ok > 0)
2665 			ret = 0;
2666 		break;
2667 	}
2668 	/* Promiscuous mode is propagated only to primary slave */
2669 	case BONDING_MODE_ACTIVE_BACKUP:
2670 	case BONDING_MODE_TLB:
2671 	case BONDING_MODE_ALB:
2672 	default:
2673 		/* Do not touch promisc when there cannot be primary ports */
2674 		if (internals->slave_count == 0)
2675 			break;
2676 		port_id = internals->current_primary_port;
2677 		ret = rte_eth_promiscuous_disable(port_id);
2678 		if (ret != 0)
2679 			RTE_BOND_LOG(ERR,
2680 				"Failed to disable promiscuous mode for port %u: %s",
2681 				port_id, rte_strerror(-ret));
2682 	}
2683 
2684 	return ret;
2685 }
2686 
2687 static int
2688 bond_ethdev_allmulticast_enable(struct rte_eth_dev *eth_dev)
2689 {
2690 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2691 	int i;
2692 	int ret = 0;
2693 	uint16_t port_id;
2694 
2695 	switch (internals->mode) {
2696 	/* allmulti mode is propagated to all slaves */
2697 	case BONDING_MODE_ROUND_ROBIN:
2698 	case BONDING_MODE_BALANCE:
2699 	case BONDING_MODE_BROADCAST:
2700 	case BONDING_MODE_8023AD: {
2701 		unsigned int slave_ok = 0;
2702 
2703 		for (i = 0; i < internals->slave_count; i++) {
2704 			port_id = internals->slaves[i].port_id;
2705 
2706 			ret = rte_eth_allmulticast_enable(port_id);
2707 			if (ret != 0)
2708 				RTE_BOND_LOG(ERR,
2709 					"Failed to enable allmulti mode for port %u: %s",
2710 					port_id, rte_strerror(-ret));
2711 			else
2712 				slave_ok++;
2713 		}
2714 		/*
2715 		 * Report success if operation is successful on at least
2716 		 * on one slave. Otherwise return last error code.
2717 		 */
2718 		if (slave_ok > 0)
2719 			ret = 0;
2720 		break;
2721 	}
2722 	/* allmulti mode is propagated only to primary slave */
2723 	case BONDING_MODE_ACTIVE_BACKUP:
2724 	case BONDING_MODE_TLB:
2725 	case BONDING_MODE_ALB:
2726 	default:
2727 		/* Do not touch allmulti when there cannot be primary ports */
2728 		if (internals->slave_count == 0)
2729 			break;
2730 		port_id = internals->current_primary_port;
2731 		ret = rte_eth_allmulticast_enable(port_id);
2732 		if (ret != 0)
2733 			RTE_BOND_LOG(ERR,
2734 				"Failed to enable allmulti mode for port %u: %s",
2735 				port_id, rte_strerror(-ret));
2736 	}
2737 
2738 	return ret;
2739 }
2740 
2741 static int
2742 bond_ethdev_allmulticast_disable(struct rte_eth_dev *eth_dev)
2743 {
2744 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2745 	int i;
2746 	int ret = 0;
2747 	uint16_t port_id;
2748 
2749 	switch (internals->mode) {
2750 	/* allmulti mode is propagated to all slaves */
2751 	case BONDING_MODE_ROUND_ROBIN:
2752 	case BONDING_MODE_BALANCE:
2753 	case BONDING_MODE_BROADCAST:
2754 	case BONDING_MODE_8023AD: {
2755 		unsigned int slave_ok = 0;
2756 
2757 		for (i = 0; i < internals->slave_count; i++) {
2758 			uint16_t port_id = internals->slaves[i].port_id;
2759 
2760 			if (internals->mode == BONDING_MODE_8023AD &&
2761 			    bond_mode_8023ad_ports[port_id].forced_rx_flags ==
2762 					BOND_8023AD_FORCED_ALLMULTI)
2763 				continue;
2764 
2765 			ret = rte_eth_allmulticast_disable(port_id);
2766 			if (ret != 0)
2767 				RTE_BOND_LOG(ERR,
2768 					"Failed to disable allmulti mode for port %u: %s",
2769 					port_id, rte_strerror(-ret));
2770 			else
2771 				slave_ok++;
2772 		}
2773 		/*
2774 		 * Report success if operation is successful on at least
2775 		 * on one slave. Otherwise return last error code.
2776 		 */
2777 		if (slave_ok > 0)
2778 			ret = 0;
2779 		break;
2780 	}
2781 	/* allmulti mode is propagated only to primary slave */
2782 	case BONDING_MODE_ACTIVE_BACKUP:
2783 	case BONDING_MODE_TLB:
2784 	case BONDING_MODE_ALB:
2785 	default:
2786 		/* Do not touch allmulti when there cannot be primary ports */
2787 		if (internals->slave_count == 0)
2788 			break;
2789 		port_id = internals->current_primary_port;
2790 		ret = rte_eth_allmulticast_disable(port_id);
2791 		if (ret != 0)
2792 			RTE_BOND_LOG(ERR,
2793 				"Failed to disable allmulti mode for port %u: %s",
2794 				port_id, rte_strerror(-ret));
2795 	}
2796 
2797 	return ret;
2798 }
2799 
2800 static void
2801 bond_ethdev_delayed_lsc_propagation(void *arg)
2802 {
2803 	if (arg == NULL)
2804 		return;
2805 
2806 	rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2807 			RTE_ETH_EVENT_INTR_LSC, NULL);
2808 }
2809 
2810 int
2811 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2812 		void *param, void *ret_param __rte_unused)
2813 {
2814 	struct rte_eth_dev *bonded_eth_dev;
2815 	struct bond_dev_private *internals;
2816 	struct rte_eth_link link;
2817 	int rc = -1;
2818 	int ret;
2819 
2820 	uint8_t lsc_flag = 0;
2821 	int valid_slave = 0;
2822 	uint16_t active_pos;
2823 	uint16_t i;
2824 
2825 	if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2826 		return rc;
2827 
2828 	bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
2829 
2830 	if (check_for_bonded_ethdev(bonded_eth_dev))
2831 		return rc;
2832 
2833 	internals = bonded_eth_dev->data->dev_private;
2834 
2835 	/* If the device isn't started don't handle interrupts */
2836 	if (!bonded_eth_dev->data->dev_started)
2837 		return rc;
2838 
2839 	/* verify that port_id is a valid slave of bonded port */
2840 	for (i = 0; i < internals->slave_count; i++) {
2841 		if (internals->slaves[i].port_id == port_id) {
2842 			valid_slave = 1;
2843 			break;
2844 		}
2845 	}
2846 
2847 	if (!valid_slave)
2848 		return rc;
2849 
2850 	/* Synchronize lsc callback parallel calls either by real link event
2851 	 * from the slaves PMDs or by the bonding PMD itself.
2852 	 */
2853 	rte_spinlock_lock(&internals->lsc_lock);
2854 
2855 	/* Search for port in active port list */
2856 	active_pos = find_slave_by_id(internals->active_slaves,
2857 			internals->active_slave_count, port_id);
2858 
2859 	ret = rte_eth_link_get_nowait(port_id, &link);
2860 	if (ret < 0)
2861 		RTE_BOND_LOG(ERR, "Slave (port %u) link get failed", port_id);
2862 
2863 	if (ret == 0 && link.link_status) {
2864 		if (active_pos < internals->active_slave_count)
2865 			goto link_update;
2866 
2867 		/* check link state properties if bonded link is up*/
2868 		if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2869 			if (link_properties_valid(bonded_eth_dev, &link) != 0)
2870 				RTE_BOND_LOG(ERR, "Invalid link properties "
2871 					     "for slave %d in bonding mode %d",
2872 					     port_id, internals->mode);
2873 		} else {
2874 			/* inherit slave link properties */
2875 			link_properties_set(bonded_eth_dev, &link);
2876 		}
2877 
2878 		/* If no active slave ports then set this port to be
2879 		 * the primary port.
2880 		 */
2881 		if (internals->active_slave_count < 1) {
2882 			/* If first active slave, then change link status */
2883 			bonded_eth_dev->data->dev_link.link_status =
2884 								ETH_LINK_UP;
2885 			internals->current_primary_port = port_id;
2886 			lsc_flag = 1;
2887 
2888 			mac_address_slaves_update(bonded_eth_dev);
2889 		}
2890 
2891 		activate_slave(bonded_eth_dev, port_id);
2892 
2893 		/* If the user has defined the primary port then default to
2894 		 * using it.
2895 		 */
2896 		if (internals->user_defined_primary_port &&
2897 				internals->primary_port == port_id)
2898 			bond_ethdev_primary_set(internals, port_id);
2899 	} else {
2900 		if (active_pos == internals->active_slave_count)
2901 			goto link_update;
2902 
2903 		/* Remove from active slave list */
2904 		deactivate_slave(bonded_eth_dev, port_id);
2905 
2906 		if (internals->active_slave_count < 1)
2907 			lsc_flag = 1;
2908 
2909 		/* Update primary id, take first active slave from list or if none
2910 		 * available set to -1 */
2911 		if (port_id == internals->current_primary_port) {
2912 			if (internals->active_slave_count > 0)
2913 				bond_ethdev_primary_set(internals,
2914 						internals->active_slaves[0]);
2915 			else
2916 				internals->current_primary_port = internals->primary_port;
2917 			mac_address_slaves_update(bonded_eth_dev);
2918 		}
2919 	}
2920 
2921 link_update:
2922 	/**
2923 	 * Update bonded device link properties after any change to active
2924 	 * slaves
2925 	 */
2926 	bond_ethdev_link_update(bonded_eth_dev, 0);
2927 
2928 	if (lsc_flag) {
2929 		/* Cancel any possible outstanding interrupts if delays are enabled */
2930 		if (internals->link_up_delay_ms > 0 ||
2931 			internals->link_down_delay_ms > 0)
2932 			rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2933 					bonded_eth_dev);
2934 
2935 		if (bonded_eth_dev->data->dev_link.link_status) {
2936 			if (internals->link_up_delay_ms > 0)
2937 				rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2938 						bond_ethdev_delayed_lsc_propagation,
2939 						(void *)bonded_eth_dev);
2940 			else
2941 				rte_eth_dev_callback_process(bonded_eth_dev,
2942 						RTE_ETH_EVENT_INTR_LSC,
2943 						NULL);
2944 
2945 		} else {
2946 			if (internals->link_down_delay_ms > 0)
2947 				rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2948 						bond_ethdev_delayed_lsc_propagation,
2949 						(void *)bonded_eth_dev);
2950 			else
2951 				rte_eth_dev_callback_process(bonded_eth_dev,
2952 						RTE_ETH_EVENT_INTR_LSC,
2953 						NULL);
2954 		}
2955 	}
2956 
2957 	rte_spinlock_unlock(&internals->lsc_lock);
2958 
2959 	return rc;
2960 }
2961 
2962 static int
2963 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2964 		struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2965 {
2966 	unsigned i, j;
2967 	int result = 0;
2968 	int slave_reta_size;
2969 	unsigned reta_count;
2970 	struct bond_dev_private *internals = dev->data->dev_private;
2971 
2972 	if (reta_size != internals->reta_size)
2973 		return -EINVAL;
2974 
2975 	 /* Copy RETA table */
2976 	reta_count = (reta_size + RTE_RETA_GROUP_SIZE - 1) /
2977 			RTE_RETA_GROUP_SIZE;
2978 
2979 	for (i = 0; i < reta_count; i++) {
2980 		internals->reta_conf[i].mask = reta_conf[i].mask;
2981 		for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2982 			if ((reta_conf[i].mask >> j) & 0x01)
2983 				internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2984 	}
2985 
2986 	/* Fill rest of array */
2987 	for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2988 		memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2989 				sizeof(internals->reta_conf[0]) * reta_count);
2990 
2991 	/* Propagate RETA over slaves */
2992 	for (i = 0; i < internals->slave_count; i++) {
2993 		slave_reta_size = internals->slaves[i].reta_size;
2994 		result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2995 				&internals->reta_conf[0], slave_reta_size);
2996 		if (result < 0)
2997 			return result;
2998 	}
2999 
3000 	return 0;
3001 }
3002 
3003 static int
3004 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
3005 		struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
3006 {
3007 	int i, j;
3008 	struct bond_dev_private *internals = dev->data->dev_private;
3009 
3010 	if (reta_size != internals->reta_size)
3011 		return -EINVAL;
3012 
3013 	 /* Copy RETA table */
3014 	for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
3015 		for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3016 			if ((reta_conf[i].mask >> j) & 0x01)
3017 				reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
3018 
3019 	return 0;
3020 }
3021 
3022 static int
3023 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
3024 		struct rte_eth_rss_conf *rss_conf)
3025 {
3026 	int i, result = 0;
3027 	struct bond_dev_private *internals = dev->data->dev_private;
3028 	struct rte_eth_rss_conf bond_rss_conf;
3029 
3030 	memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
3031 
3032 	bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
3033 
3034 	if (bond_rss_conf.rss_hf != 0)
3035 		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
3036 
3037 	if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
3038 			sizeof(internals->rss_key)) {
3039 		if (bond_rss_conf.rss_key_len == 0)
3040 			bond_rss_conf.rss_key_len = 40;
3041 		internals->rss_key_len = bond_rss_conf.rss_key_len;
3042 		memcpy(internals->rss_key, bond_rss_conf.rss_key,
3043 				internals->rss_key_len);
3044 	}
3045 
3046 	for (i = 0; i < internals->slave_count; i++) {
3047 		result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
3048 				&bond_rss_conf);
3049 		if (result < 0)
3050 			return result;
3051 	}
3052 
3053 	return 0;
3054 }
3055 
3056 static int
3057 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
3058 		struct rte_eth_rss_conf *rss_conf)
3059 {
3060 	struct bond_dev_private *internals = dev->data->dev_private;
3061 
3062 	rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
3063 	rss_conf->rss_key_len = internals->rss_key_len;
3064 	if (rss_conf->rss_key)
3065 		memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
3066 
3067 	return 0;
3068 }
3069 
3070 static int
3071 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
3072 {
3073 	struct rte_eth_dev *slave_eth_dev;
3074 	struct bond_dev_private *internals = dev->data->dev_private;
3075 	int ret, i;
3076 
3077 	rte_spinlock_lock(&internals->lock);
3078 
3079 	for (i = 0; i < internals->slave_count; i++) {
3080 		slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3081 		if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
3082 			rte_spinlock_unlock(&internals->lock);
3083 			return -ENOTSUP;
3084 		}
3085 	}
3086 	for (i = 0; i < internals->slave_count; i++) {
3087 		ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
3088 		if (ret < 0) {
3089 			rte_spinlock_unlock(&internals->lock);
3090 			return ret;
3091 		}
3092 	}
3093 
3094 	rte_spinlock_unlock(&internals->lock);
3095 	return 0;
3096 }
3097 
3098 static int
3099 bond_ethdev_mac_address_set(struct rte_eth_dev *dev,
3100 			struct rte_ether_addr *addr)
3101 {
3102 	if (mac_address_set(dev, addr)) {
3103 		RTE_BOND_LOG(ERR, "Failed to update MAC address");
3104 		return -EINVAL;
3105 	}
3106 
3107 	return 0;
3108 }
3109 
3110 static int
3111 bond_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
3112 		  const struct rte_flow_ops **ops)
3113 {
3114 	*ops = &bond_flow_ops;
3115 	return 0;
3116 }
3117 
3118 static int
3119 bond_ethdev_mac_addr_add(struct rte_eth_dev *dev,
3120 			struct rte_ether_addr *mac_addr,
3121 			__rte_unused uint32_t index, uint32_t vmdq)
3122 {
3123 	struct rte_eth_dev *slave_eth_dev;
3124 	struct bond_dev_private *internals = dev->data->dev_private;
3125 	int ret, i;
3126 
3127 	rte_spinlock_lock(&internals->lock);
3128 
3129 	for (i = 0; i < internals->slave_count; i++) {
3130 		slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3131 		if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
3132 			 *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
3133 			ret = -ENOTSUP;
3134 			goto end;
3135 		}
3136 	}
3137 
3138 	for (i = 0; i < internals->slave_count; i++) {
3139 		ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
3140 				mac_addr, vmdq);
3141 		if (ret < 0) {
3142 			/* rollback */
3143 			for (i--; i >= 0; i--)
3144 				rte_eth_dev_mac_addr_remove(
3145 					internals->slaves[i].port_id, mac_addr);
3146 			goto end;
3147 		}
3148 	}
3149 
3150 	ret = 0;
3151 end:
3152 	rte_spinlock_unlock(&internals->lock);
3153 	return ret;
3154 }
3155 
3156 static void
3157 bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
3158 {
3159 	struct rte_eth_dev *slave_eth_dev;
3160 	struct bond_dev_private *internals = dev->data->dev_private;
3161 	int i;
3162 
3163 	rte_spinlock_lock(&internals->lock);
3164 
3165 	for (i = 0; i < internals->slave_count; i++) {
3166 		slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3167 		if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3168 			goto end;
3169 	}
3170 
3171 	struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index];
3172 
3173 	for (i = 0; i < internals->slave_count; i++)
3174 		rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3175 				mac_addr);
3176 
3177 end:
3178 	rte_spinlock_unlock(&internals->lock);
3179 }
3180 
3181 const struct eth_dev_ops default_dev_ops = {
3182 	.dev_start            = bond_ethdev_start,
3183 	.dev_stop             = bond_ethdev_stop,
3184 	.dev_close            = bond_ethdev_close,
3185 	.dev_configure        = bond_ethdev_configure,
3186 	.dev_infos_get        = bond_ethdev_info,
3187 	.vlan_filter_set      = bond_ethdev_vlan_filter_set,
3188 	.rx_queue_setup       = bond_ethdev_rx_queue_setup,
3189 	.tx_queue_setup       = bond_ethdev_tx_queue_setup,
3190 	.rx_queue_release     = bond_ethdev_rx_queue_release,
3191 	.tx_queue_release     = bond_ethdev_tx_queue_release,
3192 	.link_update          = bond_ethdev_link_update,
3193 	.stats_get            = bond_ethdev_stats_get,
3194 	.stats_reset          = bond_ethdev_stats_reset,
3195 	.promiscuous_enable   = bond_ethdev_promiscuous_enable,
3196 	.promiscuous_disable  = bond_ethdev_promiscuous_disable,
3197 	.allmulticast_enable  = bond_ethdev_allmulticast_enable,
3198 	.allmulticast_disable = bond_ethdev_allmulticast_disable,
3199 	.reta_update          = bond_ethdev_rss_reta_update,
3200 	.reta_query           = bond_ethdev_rss_reta_query,
3201 	.rss_hash_update      = bond_ethdev_rss_hash_update,
3202 	.rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
3203 	.mtu_set              = bond_ethdev_mtu_set,
3204 	.mac_addr_set         = bond_ethdev_mac_address_set,
3205 	.mac_addr_add         = bond_ethdev_mac_addr_add,
3206 	.mac_addr_remove      = bond_ethdev_mac_addr_remove,
3207 	.flow_ops_get         = bond_flow_ops_get
3208 };
3209 
3210 static int
3211 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3212 {
3213 	const char *name = rte_vdev_device_name(dev);
3214 	uint8_t socket_id = dev->device.numa_node;
3215 	struct bond_dev_private *internals = NULL;
3216 	struct rte_eth_dev *eth_dev = NULL;
3217 	uint32_t vlan_filter_bmp_size;
3218 
3219 	/* now do all data allocation - for eth_dev structure, dummy pci driver
3220 	 * and internal (private) data
3221 	 */
3222 
3223 	/* reserve an ethdev entry */
3224 	eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3225 	if (eth_dev == NULL) {
3226 		RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3227 		goto err;
3228 	}
3229 
3230 	internals = eth_dev->data->dev_private;
3231 	eth_dev->data->nb_rx_queues = (uint16_t)1;
3232 	eth_dev->data->nb_tx_queues = (uint16_t)1;
3233 
3234 	/* Allocate memory for storing MAC addresses */
3235 	eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN *
3236 			BOND_MAX_MAC_ADDRS, 0, socket_id);
3237 	if (eth_dev->data->mac_addrs == NULL) {
3238 		RTE_BOND_LOG(ERR,
3239 			     "Failed to allocate %u bytes needed to store MAC addresses",
3240 			     RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3241 		goto err;
3242 	}
3243 
3244 	eth_dev->dev_ops = &default_dev_ops;
3245 	eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC |
3246 					RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
3247 
3248 	rte_spinlock_init(&internals->lock);
3249 	rte_spinlock_init(&internals->lsc_lock);
3250 
3251 	internals->port_id = eth_dev->data->port_id;
3252 	internals->mode = BONDING_MODE_INVALID;
3253 	internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3254 	internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3255 	internals->burst_xmit_hash = burst_xmit_l2_hash;
3256 	internals->user_defined_mac = 0;
3257 
3258 	internals->link_status_polling_enabled = 0;
3259 
3260 	internals->link_status_polling_interval_ms =
3261 		DEFAULT_POLLING_INTERVAL_10_MS;
3262 	internals->link_down_delay_ms = 0;
3263 	internals->link_up_delay_ms = 0;
3264 
3265 	internals->slave_count = 0;
3266 	internals->active_slave_count = 0;
3267 	internals->rx_offload_capa = 0;
3268 	internals->tx_offload_capa = 0;
3269 	internals->rx_queue_offload_capa = 0;
3270 	internals->tx_queue_offload_capa = 0;
3271 	internals->candidate_max_rx_pktlen = 0;
3272 	internals->max_rx_pktlen = 0;
3273 
3274 	/* Initially allow to choose any offload type */
3275 	internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3276 
3277 	memset(&internals->default_rxconf, 0,
3278 	       sizeof(internals->default_rxconf));
3279 	memset(&internals->default_txconf, 0,
3280 	       sizeof(internals->default_txconf));
3281 
3282 	memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3283 	memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3284 
3285 	memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3286 	memset(internals->slaves, 0, sizeof(internals->slaves));
3287 
3288 	TAILQ_INIT(&internals->flow_list);
3289 	internals->flow_isolated_valid = 0;
3290 
3291 	/* Set mode 4 default configuration */
3292 	bond_mode_8023ad_setup(eth_dev, NULL);
3293 	if (bond_ethdev_mode_set(eth_dev, mode)) {
3294 		RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3295 				 eth_dev->data->port_id, mode);
3296 		goto err;
3297 	}
3298 
3299 	vlan_filter_bmp_size =
3300 		rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1);
3301 	internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3302 						   RTE_CACHE_LINE_SIZE);
3303 	if (internals->vlan_filter_bmpmem == NULL) {
3304 		RTE_BOND_LOG(ERR,
3305 			     "Failed to allocate vlan bitmap for bonded device %u",
3306 			     eth_dev->data->port_id);
3307 		goto err;
3308 	}
3309 
3310 	internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1,
3311 			internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3312 	if (internals->vlan_filter_bmp == NULL) {
3313 		RTE_BOND_LOG(ERR,
3314 			     "Failed to init vlan bitmap for bonded device %u",
3315 			     eth_dev->data->port_id);
3316 		rte_free(internals->vlan_filter_bmpmem);
3317 		goto err;
3318 	}
3319 
3320 	return eth_dev->data->port_id;
3321 
3322 err:
3323 	rte_free(internals);
3324 	if (eth_dev != NULL)
3325 		eth_dev->data->dev_private = NULL;
3326 	rte_eth_dev_release_port(eth_dev);
3327 	return -1;
3328 }
3329 
3330 static int
3331 bond_probe(struct rte_vdev_device *dev)
3332 {
3333 	const char *name;
3334 	struct bond_dev_private *internals;
3335 	struct rte_kvargs *kvlist;
3336 	uint8_t bonding_mode, socket_id/*, agg_mode*/;
3337 	int  arg_count, port_id;
3338 	uint8_t agg_mode;
3339 	struct rte_eth_dev *eth_dev;
3340 
3341 	if (!dev)
3342 		return -EINVAL;
3343 
3344 	name = rte_vdev_device_name(dev);
3345 	RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
3346 
3347 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
3348 		eth_dev = rte_eth_dev_attach_secondary(name);
3349 		if (!eth_dev) {
3350 			RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3351 			return -1;
3352 		}
3353 		/* TODO: request info from primary to set up Rx and Tx */
3354 		eth_dev->dev_ops = &default_dev_ops;
3355 		eth_dev->device = &dev->device;
3356 		rte_eth_dev_probing_finish(eth_dev);
3357 		return 0;
3358 	}
3359 
3360 	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3361 		pmd_bond_init_valid_arguments);
3362 	if (kvlist == NULL)
3363 		return -1;
3364 
3365 	/* Parse link bonding mode */
3366 	if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3367 		if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3368 				&bond_ethdev_parse_slave_mode_kvarg,
3369 				&bonding_mode) != 0) {
3370 			RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
3371 					name);
3372 			goto parse_error;
3373 		}
3374 	} else {
3375 		RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3376 				"device %s", name);
3377 		goto parse_error;
3378 	}
3379 
3380 	/* Parse socket id to create bonding device on */
3381 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3382 	if (arg_count == 1) {
3383 		if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3384 				&bond_ethdev_parse_socket_id_kvarg, &socket_id)
3385 				!= 0) {
3386 			RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3387 					"bonded device %s", name);
3388 			goto parse_error;
3389 		}
3390 	} else if (arg_count > 1) {
3391 		RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3392 				"bonded device %s", name);
3393 		goto parse_error;
3394 	} else {
3395 		socket_id = rte_socket_id();
3396 	}
3397 
3398 	dev->device.numa_node = socket_id;
3399 
3400 	/* Create link bonding eth device */
3401 	port_id = bond_alloc(dev, bonding_mode);
3402 	if (port_id < 0) {
3403 		RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3404 				"socket %u.",	name, bonding_mode, socket_id);
3405 		goto parse_error;
3406 	}
3407 	internals = rte_eth_devices[port_id].data->dev_private;
3408 	internals->kvlist = kvlist;
3409 
3410 	if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3411 		if (rte_kvargs_process(kvlist,
3412 				PMD_BOND_AGG_MODE_KVARG,
3413 				&bond_ethdev_parse_slave_agg_mode_kvarg,
3414 				&agg_mode) != 0) {
3415 			RTE_BOND_LOG(ERR,
3416 					"Failed to parse agg selection mode for bonded device %s",
3417 					name);
3418 			goto parse_error;
3419 		}
3420 
3421 		if (internals->mode == BONDING_MODE_8023AD)
3422 			internals->mode4.agg_selection = agg_mode;
3423 	} else {
3424 		internals->mode4.agg_selection = AGG_STABLE;
3425 	}
3426 
3427 	rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
3428 	RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3429 			"socket %u.",	name, port_id, bonding_mode, socket_id);
3430 	return 0;
3431 
3432 parse_error:
3433 	rte_kvargs_free(kvlist);
3434 
3435 	return -1;
3436 }
3437 
3438 static int
3439 bond_remove(struct rte_vdev_device *dev)
3440 {
3441 	struct rte_eth_dev *eth_dev;
3442 	struct bond_dev_private *internals;
3443 	const char *name;
3444 	int ret = 0;
3445 
3446 	if (!dev)
3447 		return -EINVAL;
3448 
3449 	name = rte_vdev_device_name(dev);
3450 	RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3451 
3452 	/* find an ethdev entry */
3453 	eth_dev = rte_eth_dev_allocated(name);
3454 	if (eth_dev == NULL)
3455 		return 0; /* port already released */
3456 
3457 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3458 		return rte_eth_dev_release_port(eth_dev);
3459 
3460 	RTE_ASSERT(eth_dev->device == &dev->device);
3461 
3462 	internals = eth_dev->data->dev_private;
3463 	if (internals->slave_count != 0)
3464 		return -EBUSY;
3465 
3466 	if (eth_dev->data->dev_started == 1) {
3467 		ret = bond_ethdev_stop(eth_dev);
3468 		bond_ethdev_close(eth_dev);
3469 	}
3470 	rte_eth_dev_release_port(eth_dev);
3471 
3472 	return ret;
3473 }
3474 
3475 /* this part will resolve the slave portids after all the other pdev and vdev
3476  * have been allocated */
3477 static int
3478 bond_ethdev_configure(struct rte_eth_dev *dev)
3479 {
3480 	const char *name = dev->device->name;
3481 	struct bond_dev_private *internals = dev->data->dev_private;
3482 	struct rte_kvargs *kvlist = internals->kvlist;
3483 	int arg_count;
3484 	uint16_t port_id = dev - rte_eth_devices;
3485 	uint8_t agg_mode;
3486 
3487 	static const uint8_t default_rss_key[40] = {
3488 		0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3489 		0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3490 		0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3491 		0xBE, 0xAC, 0x01, 0xFA
3492 	};
3493 
3494 	unsigned i, j;
3495 
3496 	/*
3497 	 * If RSS is enabled, fill table with default values and
3498 	 * set key to the the value specified in port RSS configuration.
3499 	 * Fall back to default RSS key if the key is not specified
3500 	 */
3501 	if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3502 		if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3503 			internals->rss_key_len =
3504 				dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3505 			memcpy(internals->rss_key,
3506 			       dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3507 			       internals->rss_key_len);
3508 		} else {
3509 			internals->rss_key_len = sizeof(default_rss_key);
3510 			memcpy(internals->rss_key, default_rss_key,
3511 			       internals->rss_key_len);
3512 		}
3513 
3514 		for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3515 			internals->reta_conf[i].mask = ~0LL;
3516 			for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3517 				internals->reta_conf[i].reta[j] =
3518 						(i * RTE_RETA_GROUP_SIZE + j) %
3519 						dev->data->nb_rx_queues;
3520 		}
3521 	}
3522 
3523 	/* set the max_rx_pktlen */
3524 	internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3525 
3526 	/*
3527 	 * if no kvlist, it means that this bonded device has been created
3528 	 * through the bonding api.
3529 	 */
3530 	if (!kvlist)
3531 		return 0;
3532 
3533 	/* Parse MAC address for bonded device */
3534 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3535 	if (arg_count == 1) {
3536 		struct rte_ether_addr bond_mac;
3537 
3538 		if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3539 				       &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3540 			RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3541 				     name);
3542 			return -1;
3543 		}
3544 
3545 		/* Set MAC address */
3546 		if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3547 			RTE_BOND_LOG(ERR,
3548 				     "Failed to set mac address on bonded device %s",
3549 				     name);
3550 			return -1;
3551 		}
3552 	} else if (arg_count > 1) {
3553 		RTE_BOND_LOG(ERR,
3554 			     "MAC address can be specified only once for bonded device %s",
3555 			     name);
3556 		return -1;
3557 	}
3558 
3559 	/* Parse/set balance mode transmit policy */
3560 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3561 	if (arg_count == 1) {
3562 		uint8_t xmit_policy;
3563 
3564 		if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3565 				       &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3566 		    0) {
3567 			RTE_BOND_LOG(INFO,
3568 				     "Invalid xmit policy specified for bonded device %s",
3569 				     name);
3570 			return -1;
3571 		}
3572 
3573 		/* Set balance mode transmit policy*/
3574 		if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3575 			RTE_BOND_LOG(ERR,
3576 				     "Failed to set balance xmit policy on bonded device %s",
3577 				     name);
3578 			return -1;
3579 		}
3580 	} else if (arg_count > 1) {
3581 		RTE_BOND_LOG(ERR,
3582 			     "Transmit policy can be specified only once for bonded device %s",
3583 			     name);
3584 		return -1;
3585 	}
3586 
3587 	if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3588 		if (rte_kvargs_process(kvlist,
3589 				       PMD_BOND_AGG_MODE_KVARG,
3590 				       &bond_ethdev_parse_slave_agg_mode_kvarg,
3591 				       &agg_mode) != 0) {
3592 			RTE_BOND_LOG(ERR,
3593 				     "Failed to parse agg selection mode for bonded device %s",
3594 				     name);
3595 		}
3596 		if (internals->mode == BONDING_MODE_8023AD) {
3597 			int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3598 					agg_mode);
3599 			if (ret < 0) {
3600 				RTE_BOND_LOG(ERR,
3601 					"Invalid args for agg selection set for bonded device %s",
3602 					name);
3603 				return -1;
3604 			}
3605 		}
3606 	}
3607 
3608 	/* Parse/add slave ports to bonded device */
3609 	if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3610 		struct bond_ethdev_slave_ports slave_ports;
3611 		unsigned i;
3612 
3613 		memset(&slave_ports, 0, sizeof(slave_ports));
3614 
3615 		if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3616 				       &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3617 			RTE_BOND_LOG(ERR,
3618 				     "Failed to parse slave ports for bonded device %s",
3619 				     name);
3620 			return -1;
3621 		}
3622 
3623 		for (i = 0; i < slave_ports.slave_count; i++) {
3624 			if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3625 				RTE_BOND_LOG(ERR,
3626 					     "Failed to add port %d as slave to bonded device %s",
3627 					     slave_ports.slaves[i], name);
3628 			}
3629 		}
3630 
3631 	} else {
3632 		RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
3633 		return -1;
3634 	}
3635 
3636 	/* Parse/set primary slave port id*/
3637 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3638 	if (arg_count == 1) {
3639 		uint16_t primary_slave_port_id;
3640 
3641 		if (rte_kvargs_process(kvlist,
3642 				       PMD_BOND_PRIMARY_SLAVE_KVARG,
3643 				       &bond_ethdev_parse_primary_slave_port_id_kvarg,
3644 				       &primary_slave_port_id) < 0) {
3645 			RTE_BOND_LOG(INFO,
3646 				     "Invalid primary slave port id specified for bonded device %s",
3647 				     name);
3648 			return -1;
3649 		}
3650 
3651 		/* Set balance mode transmit policy*/
3652 		if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3653 		    != 0) {
3654 			RTE_BOND_LOG(ERR,
3655 				     "Failed to set primary slave port %d on bonded device %s",
3656 				     primary_slave_port_id, name);
3657 			return -1;
3658 		}
3659 	} else if (arg_count > 1) {
3660 		RTE_BOND_LOG(INFO,
3661 			     "Primary slave can be specified only once for bonded device %s",
3662 			     name);
3663 		return -1;
3664 	}
3665 
3666 	/* Parse link status monitor polling interval */
3667 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3668 	if (arg_count == 1) {
3669 		uint32_t lsc_poll_interval_ms;
3670 
3671 		if (rte_kvargs_process(kvlist,
3672 				       PMD_BOND_LSC_POLL_PERIOD_KVARG,
3673 				       &bond_ethdev_parse_time_ms_kvarg,
3674 				       &lsc_poll_interval_ms) < 0) {
3675 			RTE_BOND_LOG(INFO,
3676 				     "Invalid lsc polling interval value specified for bonded"
3677 				     " device %s", name);
3678 			return -1;
3679 		}
3680 
3681 		if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3682 		    != 0) {
3683 			RTE_BOND_LOG(ERR,
3684 				     "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3685 				     lsc_poll_interval_ms, name);
3686 			return -1;
3687 		}
3688 	} else if (arg_count > 1) {
3689 		RTE_BOND_LOG(INFO,
3690 			     "LSC polling interval can be specified only once for bonded"
3691 			     " device %s", name);
3692 		return -1;
3693 	}
3694 
3695 	/* Parse link up interrupt propagation delay */
3696 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3697 	if (arg_count == 1) {
3698 		uint32_t link_up_delay_ms;
3699 
3700 		if (rte_kvargs_process(kvlist,
3701 				       PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3702 				       &bond_ethdev_parse_time_ms_kvarg,
3703 				       &link_up_delay_ms) < 0) {
3704 			RTE_BOND_LOG(INFO,
3705 				     "Invalid link up propagation delay value specified for"
3706 				     " bonded device %s", name);
3707 			return -1;
3708 		}
3709 
3710 		/* Set balance mode transmit policy*/
3711 		if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3712 		    != 0) {
3713 			RTE_BOND_LOG(ERR,
3714 				     "Failed to set link up propagation delay (%u ms) on bonded"
3715 				     " device %s", link_up_delay_ms, name);
3716 			return -1;
3717 		}
3718 	} else if (arg_count > 1) {
3719 		RTE_BOND_LOG(INFO,
3720 			     "Link up propagation delay can be specified only once for"
3721 			     " bonded device %s", name);
3722 		return -1;
3723 	}
3724 
3725 	/* Parse link down interrupt propagation delay */
3726 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3727 	if (arg_count == 1) {
3728 		uint32_t link_down_delay_ms;
3729 
3730 		if (rte_kvargs_process(kvlist,
3731 				       PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3732 				       &bond_ethdev_parse_time_ms_kvarg,
3733 				       &link_down_delay_ms) < 0) {
3734 			RTE_BOND_LOG(INFO,
3735 				     "Invalid link down propagation delay value specified for"
3736 				     " bonded device %s", name);
3737 			return -1;
3738 		}
3739 
3740 		/* Set balance mode transmit policy*/
3741 		if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3742 		    != 0) {
3743 			RTE_BOND_LOG(ERR,
3744 				     "Failed to set link down propagation delay (%u ms) on bonded device %s",
3745 				     link_down_delay_ms, name);
3746 			return -1;
3747 		}
3748 	} else if (arg_count > 1) {
3749 		RTE_BOND_LOG(INFO,
3750 			     "Link down propagation delay can be specified only once for  bonded device %s",
3751 			     name);
3752 		return -1;
3753 	}
3754 
3755 	return 0;
3756 }
3757 
3758 struct rte_vdev_driver pmd_bond_drv = {
3759 	.probe = bond_probe,
3760 	.remove = bond_remove,
3761 };
3762 
3763 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3764 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3765 
3766 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3767 	"slave=<ifc> "
3768 	"primary=<ifc> "
3769 	"mode=[0-6] "
3770 	"xmit_policy=[l2 | l23 | l34] "
3771 	"agg_mode=[count | stable | bandwidth] "
3772 	"socket_id=<int> "
3773 	"mac=<mac addr> "
3774 	"lsc_poll_period_ms=<int> "
3775 	"up_delay=<int> "
3776 	"down_delay=<int>");
3777 
3778 RTE_LOG_REGISTER(bond_logtype, pmd.net.bonding, NOTICE);
3779