xref: /dpdk/drivers/net/bonding/rte_eth_bond_pmd.c (revision a997a33b2a0145ad3e6320ea1fc7df8d51a2fcdf)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <stdlib.h>
34 #include <netinet/in.h>
35 
36 #include <rte_mbuf.h>
37 #include <rte_malloc.h>
38 #include <rte_ethdev.h>
39 #include <rte_tcp.h>
40 #include <rte_udp.h>
41 #include <rte_ip.h>
42 #include <rte_ip_frag.h>
43 #include <rte_devargs.h>
44 #include <rte_kvargs.h>
45 #include <rte_vdev.h>
46 #include <rte_alarm.h>
47 #include <rte_cycles.h>
48 
49 #include "rte_eth_bond.h"
50 #include "rte_eth_bond_private.h"
51 #include "rte_eth_bond_8023ad_private.h"
52 
53 #define REORDER_PERIOD_MS 10
54 
55 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
56 
57 /* Table for statistics in mode 5 TLB */
58 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
59 
60 static inline size_t
61 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
62 {
63 	size_t vlan_offset = 0;
64 
65 	if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
66 		struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
67 
68 		vlan_offset = sizeof(struct vlan_hdr);
69 		*proto = vlan_hdr->eth_proto;
70 
71 		if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
72 			vlan_hdr = vlan_hdr + 1;
73 			*proto = vlan_hdr->eth_proto;
74 			vlan_offset += sizeof(struct vlan_hdr);
75 		}
76 	}
77 	return vlan_offset;
78 }
79 
80 static uint16_t
81 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
82 {
83 	struct bond_dev_private *internals;
84 
85 	uint16_t num_rx_slave = 0;
86 	uint16_t num_rx_total = 0;
87 
88 	int i;
89 
90 	/* Cast to structure, containing bonded device's port id and queue id */
91 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
92 
93 	internals = bd_rx_q->dev_private;
94 
95 
96 	for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
97 		/* Offset of pointer to *bufs increases as packets are received
98 		 * from other slaves */
99 		num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
100 				bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
101 		if (num_rx_slave) {
102 			num_rx_total += num_rx_slave;
103 			nb_pkts -= num_rx_slave;
104 		}
105 	}
106 
107 	return num_rx_total;
108 }
109 
110 static uint16_t
111 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
112 		uint16_t nb_pkts)
113 {
114 	struct bond_dev_private *internals;
115 
116 	/* Cast to structure, containing bonded device's port id and queue id */
117 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
118 
119 	internals = bd_rx_q->dev_private;
120 
121 	return rte_eth_rx_burst(internals->current_primary_port,
122 			bd_rx_q->queue_id, bufs, nb_pkts);
123 }
124 
125 static inline uint8_t
126 is_lacp_packets(uint16_t ethertype, uint8_t subtype, uint16_t vlan_tci)
127 {
128 	const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
129 
130 	return !vlan_tci && (ethertype == ether_type_slow_be &&
131 		(subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
132 }
133 
134 static uint16_t
135 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
136 		uint16_t nb_pkts)
137 {
138 	/* Cast to structure, containing bonded device's port id and queue id */
139 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
140 	struct bond_dev_private *internals = bd_rx_q->dev_private;
141 	struct ether_addr bond_mac;
142 
143 	struct ether_hdr *hdr;
144 
145 	const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
146 	uint16_t num_rx_total = 0;	/* Total number of received packets */
147 	uint8_t slaves[RTE_MAX_ETHPORTS];
148 	uint8_t slave_count;
149 
150 	uint8_t collecting;  /* current slave collecting status */
151 	const uint8_t promisc = internals->promiscuous_en;
152 	uint8_t i, j, k;
153 	uint8_t subtype;
154 
155 	rte_eth_macaddr_get(internals->port_id, &bond_mac);
156 	/* Copy slave list to protect against slave up/down changes during tx
157 	 * bursting */
158 	slave_count = internals->active_slave_count;
159 	memcpy(slaves, internals->active_slaves,
160 			sizeof(internals->active_slaves[0]) * slave_count);
161 
162 	for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
163 		j = num_rx_total;
164 		collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[i]], COLLECTING);
165 
166 		/* Read packets from this slave */
167 		num_rx_total += rte_eth_rx_burst(slaves[i], bd_rx_q->queue_id,
168 				&bufs[num_rx_total], nb_pkts - num_rx_total);
169 
170 		for (k = j; k < 2 && k < num_rx_total; k++)
171 			rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
172 
173 		/* Handle slow protocol packets. */
174 		while (j < num_rx_total) {
175 			if (j + 3 < num_rx_total)
176 				rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
177 
178 			hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
179 			subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
180 
181 			/* Remove packet from array if it is slow packet or slave is not
182 			 * in collecting state or bondign interface is not in promiscus
183 			 * mode and packet address does not match. */
184 			if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]->vlan_tci) ||
185 				!collecting || (!promisc &&
186 					!is_multicast_ether_addr(&hdr->d_addr) &&
187 					!is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
188 
189 				if (hdr->ether_type == ether_type_slow_be) {
190 					bond_mode_8023ad_handle_slow_pkt(internals, slaves[i],
191 						bufs[j]);
192 				} else
193 					rte_pktmbuf_free(bufs[j]);
194 
195 				/* Packet is managed by mode 4 or dropped, shift the array */
196 				num_rx_total--;
197 				if (j < num_rx_total) {
198 					memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
199 						(num_rx_total - j));
200 				}
201 			} else
202 				j++;
203 		}
204 	}
205 
206 	return num_rx_total;
207 }
208 
209 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
210 uint32_t burstnumberRX;
211 uint32_t burstnumberTX;
212 
213 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
214 
215 static void
216 arp_op_name(uint16_t arp_op, char *buf)
217 {
218 	switch (arp_op) {
219 	case ARP_OP_REQUEST:
220 		snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
221 		return;
222 	case ARP_OP_REPLY:
223 		snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
224 		return;
225 	case ARP_OP_REVREQUEST:
226 		snprintf(buf, sizeof("Reverse ARP Request"), "%s",
227 				"Reverse ARP Request");
228 		return;
229 	case ARP_OP_REVREPLY:
230 		snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
231 				"Reverse ARP Reply");
232 		return;
233 	case ARP_OP_INVREQUEST:
234 		snprintf(buf, sizeof("Peer Identify Request"), "%s",
235 				"Peer Identify Request");
236 		return;
237 	case ARP_OP_INVREPLY:
238 		snprintf(buf, sizeof("Peer Identify Reply"), "%s",
239 				"Peer Identify Reply");
240 		return;
241 	default:
242 		break;
243 	}
244 	snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
245 	return;
246 }
247 #endif
248 #define MaxIPv4String	16
249 static void
250 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
251 {
252 	uint32_t ipv4_addr;
253 
254 	ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
255 	snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
256 		(ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
257 		ipv4_addr & 0xFF);
258 }
259 
260 #define MAX_CLIENTS_NUMBER	128
261 uint8_t active_clients;
262 struct client_stats_t {
263 	uint8_t port;
264 	uint32_t ipv4_addr;
265 	uint32_t ipv4_rx_packets;
266 	uint32_t ipv4_tx_packets;
267 };
268 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
269 
270 static void
271 update_client_stats(uint32_t addr, uint8_t port, uint32_t *TXorRXindicator)
272 {
273 	int i = 0;
274 
275 	for (; i < MAX_CLIENTS_NUMBER; i++)	{
276 		if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))	{
277 			/* Just update RX packets number for this client */
278 			if (TXorRXindicator == &burstnumberRX)
279 				client_stats[i].ipv4_rx_packets++;
280 			else
281 				client_stats[i].ipv4_tx_packets++;
282 			return;
283 		}
284 	}
285 	/* We have a new client. Insert him to the table, and increment stats */
286 	if (TXorRXindicator == &burstnumberRX)
287 		client_stats[active_clients].ipv4_rx_packets++;
288 	else
289 		client_stats[active_clients].ipv4_tx_packets++;
290 	client_stats[active_clients].ipv4_addr = addr;
291 	client_stats[active_clients].port = port;
292 	active_clients++;
293 
294 }
295 
296 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
297 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)	\
298 		RTE_LOG(DEBUG, PMD, \
299 		"%s " \
300 		"port:%d " \
301 		"SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
302 		"SrcIP:%s " \
303 		"DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
304 		"DstIP:%s " \
305 		"%s " \
306 		"%d\n", \
307 		info, \
308 		port, \
309 		eth_h->s_addr.addr_bytes[0], \
310 		eth_h->s_addr.addr_bytes[1], \
311 		eth_h->s_addr.addr_bytes[2], \
312 		eth_h->s_addr.addr_bytes[3], \
313 		eth_h->s_addr.addr_bytes[4], \
314 		eth_h->s_addr.addr_bytes[5], \
315 		src_ip, \
316 		eth_h->d_addr.addr_bytes[0], \
317 		eth_h->d_addr.addr_bytes[1], \
318 		eth_h->d_addr.addr_bytes[2], \
319 		eth_h->d_addr.addr_bytes[3], \
320 		eth_h->d_addr.addr_bytes[4], \
321 		eth_h->d_addr.addr_bytes[5], \
322 		dst_ip, \
323 		arp_op, \
324 		++burstnumber)
325 #endif
326 
327 static void
328 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
329 		uint8_t port, uint32_t __attribute__((unused)) *burstnumber)
330 {
331 	struct ipv4_hdr *ipv4_h;
332 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
333 	struct arp_hdr *arp_h;
334 	char dst_ip[16];
335 	char ArpOp[24];
336 	char buf[16];
337 #endif
338 	char src_ip[16];
339 
340 	uint16_t ether_type = eth_h->ether_type;
341 	uint16_t offset = get_vlan_offset(eth_h, &ether_type);
342 
343 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
344 	snprintf(buf, 16, "%s", info);
345 #endif
346 
347 	if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
348 		ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
349 		ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
350 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
351 		ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
352 		MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
353 #endif
354 		update_client_stats(ipv4_h->src_addr, port, burstnumber);
355 	}
356 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
357 	else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
358 		arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
359 		ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
360 		ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
361 		arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
362 		MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
363 	}
364 #endif
365 }
366 #endif
367 
368 static uint16_t
369 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
370 {
371 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
372 	struct bond_dev_private *internals = bd_tx_q->dev_private;
373 	struct ether_hdr *eth_h;
374 	uint16_t ether_type, offset;
375 	uint16_t nb_recv_pkts;
376 	int i;
377 
378 	nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
379 
380 	for (i = 0; i < nb_recv_pkts; i++) {
381 		eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
382 		ether_type = eth_h->ether_type;
383 		offset = get_vlan_offset(eth_h, &ether_type);
384 
385 		if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
386 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
387 			mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
388 #endif
389 			bond_mode_alb_arp_recv(eth_h, offset, internals);
390 		}
391 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
392 		else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
393 			mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
394 #endif
395 	}
396 
397 	return nb_recv_pkts;
398 }
399 
400 static uint16_t
401 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
402 		uint16_t nb_pkts)
403 {
404 	struct bond_dev_private *internals;
405 	struct bond_tx_queue *bd_tx_q;
406 
407 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
408 	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
409 
410 	uint8_t num_of_slaves;
411 	uint8_t slaves[RTE_MAX_ETHPORTS];
412 
413 	uint16_t num_tx_total = 0, num_tx_slave;
414 
415 	static int slave_idx = 0;
416 	int i, cslave_idx = 0, tx_fail_total = 0;
417 
418 	bd_tx_q = (struct bond_tx_queue *)queue;
419 	internals = bd_tx_q->dev_private;
420 
421 	/* Copy slave list to protect against slave up/down changes during tx
422 	 * bursting */
423 	num_of_slaves = internals->active_slave_count;
424 	memcpy(slaves, internals->active_slaves,
425 			sizeof(internals->active_slaves[0]) * num_of_slaves);
426 
427 	if (num_of_slaves < 1)
428 		return num_tx_total;
429 
430 	/* Populate slaves mbuf with which packets are to be sent on it  */
431 	for (i = 0; i < nb_pkts; i++) {
432 		cslave_idx = (slave_idx + i) % num_of_slaves;
433 		slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
434 	}
435 
436 	/* increment current slave index so the next call to tx burst starts on the
437 	 * next slave */
438 	slave_idx = ++cslave_idx;
439 
440 	/* Send packet burst on each slave device */
441 	for (i = 0; i < num_of_slaves; i++) {
442 		if (slave_nb_pkts[i] > 0) {
443 			num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
444 					slave_bufs[i], slave_nb_pkts[i]);
445 
446 			/* if tx burst fails move packets to end of bufs */
447 			if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
448 				int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
449 
450 				tx_fail_total += tx_fail_slave;
451 
452 				memcpy(&bufs[nb_pkts - tx_fail_total],
453 						&slave_bufs[i][num_tx_slave],
454 						tx_fail_slave * sizeof(bufs[0]));
455 			}
456 			num_tx_total += num_tx_slave;
457 		}
458 	}
459 
460 	return num_tx_total;
461 }
462 
463 static uint16_t
464 bond_ethdev_tx_burst_active_backup(void *queue,
465 		struct rte_mbuf **bufs, uint16_t nb_pkts)
466 {
467 	struct bond_dev_private *internals;
468 	struct bond_tx_queue *bd_tx_q;
469 
470 	bd_tx_q = (struct bond_tx_queue *)queue;
471 	internals = bd_tx_q->dev_private;
472 
473 	if (internals->active_slave_count < 1)
474 		return 0;
475 
476 	return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
477 			bufs, nb_pkts);
478 }
479 
480 static inline uint16_t
481 ether_hash(struct ether_hdr *eth_hdr)
482 {
483 	unaligned_uint16_t *word_src_addr =
484 		(unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
485 	unaligned_uint16_t *word_dst_addr =
486 		(unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
487 
488 	return (word_src_addr[0] ^ word_dst_addr[0]) ^
489 			(word_src_addr[1] ^ word_dst_addr[1]) ^
490 			(word_src_addr[2] ^ word_dst_addr[2]);
491 }
492 
493 static inline uint32_t
494 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
495 {
496 	return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
497 }
498 
499 static inline uint32_t
500 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
501 {
502 	unaligned_uint32_t *word_src_addr =
503 		(unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
504 	unaligned_uint32_t *word_dst_addr =
505 		(unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
506 
507 	return (word_src_addr[0] ^ word_dst_addr[0]) ^
508 			(word_src_addr[1] ^ word_dst_addr[1]) ^
509 			(word_src_addr[2] ^ word_dst_addr[2]) ^
510 			(word_src_addr[3] ^ word_dst_addr[3]);
511 }
512 
513 uint16_t
514 xmit_l2_hash(const struct rte_mbuf *buf, uint8_t slave_count)
515 {
516 	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
517 
518 	uint32_t hash = ether_hash(eth_hdr);
519 
520 	return (hash ^= hash >> 8) % slave_count;
521 }
522 
523 uint16_t
524 xmit_l23_hash(const struct rte_mbuf *buf, uint8_t slave_count)
525 {
526 	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
527 	uint16_t proto = eth_hdr->ether_type;
528 	size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
529 	uint32_t hash, l3hash = 0;
530 
531 	hash = ether_hash(eth_hdr);
532 
533 	if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
534 		struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
535 				((char *)(eth_hdr + 1) + vlan_offset);
536 		l3hash = ipv4_hash(ipv4_hdr);
537 
538 	} else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
539 		struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
540 				((char *)(eth_hdr + 1) + vlan_offset);
541 		l3hash = ipv6_hash(ipv6_hdr);
542 	}
543 
544 	hash = hash ^ l3hash;
545 	hash ^= hash >> 16;
546 	hash ^= hash >> 8;
547 
548 	return hash % slave_count;
549 }
550 
551 uint16_t
552 xmit_l34_hash(const struct rte_mbuf *buf, uint8_t slave_count)
553 {
554 	struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(buf, struct ether_hdr *);
555 	uint16_t proto = eth_hdr->ether_type;
556 	size_t vlan_offset = get_vlan_offset(eth_hdr, &proto);
557 
558 	struct udp_hdr *udp_hdr = NULL;
559 	struct tcp_hdr *tcp_hdr = NULL;
560 	uint32_t hash, l3hash = 0, l4hash = 0;
561 
562 	if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
563 		struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
564 				((char *)(eth_hdr + 1) + vlan_offset);
565 		size_t ip_hdr_offset;
566 
567 		l3hash = ipv4_hash(ipv4_hdr);
568 
569 		/* there is no L4 header in fragmented packet */
570 		if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) == 0)) {
571 			ip_hdr_offset = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
572 					IPV4_IHL_MULTIPLIER;
573 
574 			if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
575 				tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
576 						ip_hdr_offset);
577 				l4hash = HASH_L4_PORTS(tcp_hdr);
578 			} else if (ipv4_hdr->next_proto_id == IPPROTO_UDP) {
579 				udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
580 						ip_hdr_offset);
581 				l4hash = HASH_L4_PORTS(udp_hdr);
582 			}
583 		}
584 	} else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
585 		struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
586 				((char *)(eth_hdr + 1) + vlan_offset);
587 		l3hash = ipv6_hash(ipv6_hdr);
588 
589 		if (ipv6_hdr->proto == IPPROTO_TCP) {
590 			tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
591 			l4hash = HASH_L4_PORTS(tcp_hdr);
592 		} else if (ipv6_hdr->proto == IPPROTO_UDP) {
593 			udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
594 			l4hash = HASH_L4_PORTS(udp_hdr);
595 		}
596 	}
597 
598 	hash = l3hash ^ l4hash;
599 	hash ^= hash >> 16;
600 	hash ^= hash >> 8;
601 
602 	return hash % slave_count;
603 }
604 
605 struct bwg_slave {
606 	uint64_t bwg_left_int;
607 	uint64_t bwg_left_remainder;
608 	uint8_t slave;
609 };
610 
611 void
612 bond_tlb_activate_slave(struct bond_dev_private *internals) {
613 	int i;
614 
615 	for (i = 0; i < internals->active_slave_count; i++) {
616 		tlb_last_obytets[internals->active_slaves[i]] = 0;
617 	}
618 }
619 
620 static int
621 bandwidth_cmp(const void *a, const void *b)
622 {
623 	const struct bwg_slave *bwg_a = a;
624 	const struct bwg_slave *bwg_b = b;
625 	int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
626 	int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
627 			(int64_t)bwg_a->bwg_left_remainder;
628 	if (diff > 0)
629 		return 1;
630 	else if (diff < 0)
631 		return -1;
632 	else if (diff2 > 0)
633 		return 1;
634 	else if (diff2 < 0)
635 		return -1;
636 	else
637 		return 0;
638 }
639 
640 static void
641 bandwidth_left(uint8_t port_id, uint64_t load, uint8_t update_idx,
642 		struct bwg_slave *bwg_slave)
643 {
644 	struct rte_eth_link link_status;
645 
646 	rte_eth_link_get(port_id, &link_status);
647 	uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
648 	if (link_bwg == 0)
649 		return;
650 	link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
651 	bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
652 	bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
653 }
654 
655 static void
656 bond_ethdev_update_tlb_slave_cb(void *arg)
657 {
658 	struct bond_dev_private *internals = arg;
659 	struct rte_eth_stats slave_stats;
660 	struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
661 	uint8_t slave_count;
662 	uint64_t tx_bytes;
663 
664 	uint8_t update_stats = 0;
665 	uint8_t i, slave_id;
666 
667 	internals->slave_update_idx++;
668 
669 
670 	if (internals->slave_update_idx >= REORDER_PERIOD_MS)
671 		update_stats = 1;
672 
673 	for (i = 0; i < internals->active_slave_count; i++) {
674 		slave_id = internals->active_slaves[i];
675 		rte_eth_stats_get(slave_id, &slave_stats);
676 		tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
677 		bandwidth_left(slave_id, tx_bytes,
678 				internals->slave_update_idx, &bwg_array[i]);
679 		bwg_array[i].slave = slave_id;
680 
681 		if (update_stats) {
682 			tlb_last_obytets[slave_id] = slave_stats.obytes;
683 		}
684 	}
685 
686 	if (update_stats == 1)
687 		internals->slave_update_idx = 0;
688 
689 	slave_count = i;
690 	qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
691 	for (i = 0; i < slave_count; i++)
692 		internals->tlb_slaves_order[i] = bwg_array[i].slave;
693 
694 	rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
695 			(struct bond_dev_private *)internals);
696 }
697 
698 static uint16_t
699 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
700 {
701 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
702 	struct bond_dev_private *internals = bd_tx_q->dev_private;
703 
704 	struct rte_eth_dev *primary_port =
705 			&rte_eth_devices[internals->primary_port];
706 	uint16_t num_tx_total = 0;
707 	uint8_t i, j;
708 
709 	uint8_t num_of_slaves = internals->active_slave_count;
710 	uint8_t slaves[RTE_MAX_ETHPORTS];
711 
712 	struct ether_hdr *ether_hdr;
713 	struct ether_addr primary_slave_addr;
714 	struct ether_addr active_slave_addr;
715 
716 	if (num_of_slaves < 1)
717 		return num_tx_total;
718 
719 	memcpy(slaves, internals->tlb_slaves_order,
720 				sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
721 
722 
723 	ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
724 
725 	if (nb_pkts > 3) {
726 		for (i = 0; i < 3; i++)
727 			rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
728 	}
729 
730 	for (i = 0; i < num_of_slaves; i++) {
731 		rte_eth_macaddr_get(slaves[i], &active_slave_addr);
732 		for (j = num_tx_total; j < nb_pkts; j++) {
733 			if (j + 3 < nb_pkts)
734 				rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
735 
736 			ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
737 			if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
738 				ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
739 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
740 					mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
741 #endif
742 		}
743 
744 		num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
745 				bufs + num_tx_total, nb_pkts - num_tx_total);
746 
747 		if (num_tx_total == nb_pkts)
748 			break;
749 	}
750 
751 	return num_tx_total;
752 }
753 
754 void
755 bond_tlb_disable(struct bond_dev_private *internals)
756 {
757 	rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
758 }
759 
760 void
761 bond_tlb_enable(struct bond_dev_private *internals)
762 {
763 	bond_ethdev_update_tlb_slave_cb(internals);
764 }
765 
766 static uint16_t
767 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
768 {
769 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
770 	struct bond_dev_private *internals = bd_tx_q->dev_private;
771 
772 	struct ether_hdr *eth_h;
773 	uint16_t ether_type, offset;
774 
775 	struct client_data *client_info;
776 
777 	/*
778 	 * We create transmit buffers for every slave and one additional to send
779 	 * through tlb. In worst case every packet will be send on one port.
780 	 */
781 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
782 	uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
783 
784 	/*
785 	 * We create separate transmit buffers for update packets as they wont be
786 	 * counted in num_tx_total.
787 	 */
788 	struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
789 	uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
790 
791 	struct rte_mbuf *upd_pkt;
792 	size_t pkt_size;
793 
794 	uint16_t num_send, num_not_send = 0;
795 	uint16_t num_tx_total = 0;
796 	uint8_t slave_idx;
797 
798 	int i, j;
799 
800 	/* Search tx buffer for ARP packets and forward them to alb */
801 	for (i = 0; i < nb_pkts; i++) {
802 		eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
803 		ether_type = eth_h->ether_type;
804 		offset = get_vlan_offset(eth_h, &ether_type);
805 
806 		if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
807 			slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
808 
809 			/* Change src mac in eth header */
810 			rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
811 
812 			/* Add packet to slave tx buffer */
813 			slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
814 			slave_bufs_pkts[slave_idx]++;
815 		} else {
816 			/* If packet is not ARP, send it with TLB policy */
817 			slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
818 					bufs[i];
819 			slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
820 		}
821 	}
822 
823 	/* Update connected client ARP tables */
824 	if (internals->mode6.ntt) {
825 		for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
826 			client_info = &internals->mode6.client_table[i];
827 
828 			if (client_info->in_use) {
829 				/* Allocate new packet to send ARP update on current slave */
830 				upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
831 				if (upd_pkt == NULL) {
832 					RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
833 					continue;
834 				}
835 				pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
836 						+ client_info->vlan_count * sizeof(struct vlan_hdr);
837 				upd_pkt->data_len = pkt_size;
838 				upd_pkt->pkt_len = pkt_size;
839 
840 				slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
841 						internals);
842 
843 				/* Add packet to update tx buffer */
844 				update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
845 				update_bufs_pkts[slave_idx]++;
846 			}
847 		}
848 		internals->mode6.ntt = 0;
849 	}
850 
851 	/* Send ARP packets on proper slaves */
852 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
853 		if (slave_bufs_pkts[i] > 0) {
854 			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
855 					slave_bufs[i], slave_bufs_pkts[i]);
856 			for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
857 				bufs[nb_pkts - 1 - num_not_send - j] =
858 						slave_bufs[i][nb_pkts - 1 - j];
859 			}
860 
861 			num_tx_total += num_send;
862 			num_not_send += slave_bufs_pkts[i] - num_send;
863 
864 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
865 	/* Print TX stats including update packets */
866 			for (j = 0; j < slave_bufs_pkts[i]; j++) {
867 				eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
868 				mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
869 			}
870 #endif
871 		}
872 	}
873 
874 	/* Send update packets on proper slaves */
875 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
876 		if (update_bufs_pkts[i] > 0) {
877 			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
878 					update_bufs_pkts[i]);
879 			for (j = num_send; j < update_bufs_pkts[i]; j++) {
880 				rte_pktmbuf_free(update_bufs[i][j]);
881 			}
882 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
883 			for (j = 0; j < update_bufs_pkts[i]; j++) {
884 				eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
885 				mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
886 			}
887 #endif
888 		}
889 	}
890 
891 	/* Send non-ARP packets using tlb policy */
892 	if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
893 		num_send = bond_ethdev_tx_burst_tlb(queue,
894 				slave_bufs[RTE_MAX_ETHPORTS],
895 				slave_bufs_pkts[RTE_MAX_ETHPORTS]);
896 
897 		for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
898 			bufs[nb_pkts - 1 - num_not_send - j] =
899 					slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
900 		}
901 
902 		num_tx_total += num_send;
903 		num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send;
904 	}
905 
906 	return num_tx_total;
907 }
908 
909 static uint16_t
910 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
911 		uint16_t nb_pkts)
912 {
913 	struct bond_dev_private *internals;
914 	struct bond_tx_queue *bd_tx_q;
915 
916 	uint8_t num_of_slaves;
917 	uint8_t slaves[RTE_MAX_ETHPORTS];
918 
919 	uint16_t num_tx_total = 0, num_tx_slave = 0, tx_fail_total = 0;
920 
921 	int i, op_slave_id;
922 
923 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
924 	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
925 
926 	bd_tx_q = (struct bond_tx_queue *)queue;
927 	internals = bd_tx_q->dev_private;
928 
929 	/* Copy slave list to protect against slave up/down changes during tx
930 	 * bursting */
931 	num_of_slaves = internals->active_slave_count;
932 	memcpy(slaves, internals->active_slaves,
933 			sizeof(internals->active_slaves[0]) * num_of_slaves);
934 
935 	if (num_of_slaves < 1)
936 		return num_tx_total;
937 
938 	/* Populate slaves mbuf with the packets which are to be sent on it  */
939 	for (i = 0; i < nb_pkts; i++) {
940 		/* Select output slave using hash based on xmit policy */
941 		op_slave_id = internals->xmit_hash(bufs[i], num_of_slaves);
942 
943 		/* Populate slave mbuf arrays with mbufs for that slave */
944 		slave_bufs[op_slave_id][slave_nb_pkts[op_slave_id]++] = bufs[i];
945 	}
946 
947 	/* Send packet burst on each slave device */
948 	for (i = 0; i < num_of_slaves; i++) {
949 		if (slave_nb_pkts[i] > 0) {
950 			num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
951 					slave_bufs[i], slave_nb_pkts[i]);
952 
953 			/* if tx burst fails move packets to end of bufs */
954 			if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
955 				int slave_tx_fail_count = slave_nb_pkts[i] - num_tx_slave;
956 
957 				tx_fail_total += slave_tx_fail_count;
958 				memcpy(&bufs[nb_pkts - tx_fail_total],
959 						&slave_bufs[i][num_tx_slave],
960 						slave_tx_fail_count * sizeof(bufs[0]));
961 			}
962 
963 			num_tx_total += num_tx_slave;
964 		}
965 	}
966 
967 	return num_tx_total;
968 }
969 
970 static uint16_t
971 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
972 		uint16_t nb_pkts)
973 {
974 	struct bond_dev_private *internals;
975 	struct bond_tx_queue *bd_tx_q;
976 
977 	uint8_t num_of_slaves;
978 	uint8_t slaves[RTE_MAX_ETHPORTS];
979 	 /* positions in slaves, not ID */
980 	uint8_t distributing_offsets[RTE_MAX_ETHPORTS];
981 	uint8_t distributing_count;
982 
983 	uint16_t num_tx_slave, num_tx_total = 0, num_tx_fail_total = 0;
984 	uint16_t i, j, op_slave_idx;
985 	const uint16_t buffs_size = nb_pkts + BOND_MODE_8023AX_SLAVE_TX_PKTS + 1;
986 
987 	/* Allocate additional packets in case 8023AD mode. */
988 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][buffs_size];
989 	void *slow_pkts[BOND_MODE_8023AX_SLAVE_TX_PKTS] = { NULL };
990 
991 	/* Total amount of packets in slave_bufs */
992 	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
993 	/* Slow packets placed in each slave */
994 	uint8_t slave_slow_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
995 
996 	bd_tx_q = (struct bond_tx_queue *)queue;
997 	internals = bd_tx_q->dev_private;
998 
999 	/* Copy slave list to protect against slave up/down changes during tx
1000 	 * bursting */
1001 	num_of_slaves = internals->active_slave_count;
1002 	if (num_of_slaves < 1)
1003 		return num_tx_total;
1004 
1005 	memcpy(slaves, internals->active_slaves, sizeof(slaves[0]) * num_of_slaves);
1006 
1007 	distributing_count = 0;
1008 	for (i = 0; i < num_of_slaves; i++) {
1009 		struct port *port = &mode_8023ad_ports[slaves[i]];
1010 
1011 		slave_slow_nb_pkts[i] = rte_ring_dequeue_burst(port->tx_ring,
1012 				slow_pkts, BOND_MODE_8023AX_SLAVE_TX_PKTS);
1013 		slave_nb_pkts[i] = slave_slow_nb_pkts[i];
1014 
1015 		for (j = 0; j < slave_slow_nb_pkts[i]; j++)
1016 			slave_bufs[i][j] = slow_pkts[j];
1017 
1018 		if (ACTOR_STATE(port, DISTRIBUTING))
1019 			distributing_offsets[distributing_count++] = i;
1020 	}
1021 
1022 	if (likely(distributing_count > 0)) {
1023 		/* Populate slaves mbuf with the packets which are to be sent on it */
1024 		for (i = 0; i < nb_pkts; i++) {
1025 			/* Select output slave using hash based on xmit policy */
1026 			op_slave_idx = internals->xmit_hash(bufs[i], distributing_count);
1027 
1028 			/* Populate slave mbuf arrays with mbufs for that slave. Use only
1029 			 * slaves that are currently distributing. */
1030 			uint8_t slave_offset = distributing_offsets[op_slave_idx];
1031 			slave_bufs[slave_offset][slave_nb_pkts[slave_offset]] = bufs[i];
1032 			slave_nb_pkts[slave_offset]++;
1033 		}
1034 	}
1035 
1036 	/* Send packet burst on each slave device */
1037 	for (i = 0; i < num_of_slaves; i++) {
1038 		if (slave_nb_pkts[i] == 0)
1039 			continue;
1040 
1041 		num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1042 				slave_bufs[i], slave_nb_pkts[i]);
1043 
1044 		/* If tx burst fails drop slow packets */
1045 		for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
1046 			rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);
1047 
1048 		num_tx_total += num_tx_slave - slave_slow_nb_pkts[i];
1049 		num_tx_fail_total += slave_nb_pkts[i] - num_tx_slave;
1050 
1051 		/* If tx burst fails move packets to end of bufs */
1052 		if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
1053 			uint16_t j = nb_pkts - num_tx_fail_total;
1054 			for ( ; num_tx_slave < slave_nb_pkts[i]; j++, num_tx_slave++)
1055 				bufs[j] = slave_bufs[i][num_tx_slave];
1056 		}
1057 	}
1058 
1059 	return num_tx_total;
1060 }
1061 
1062 static uint16_t
1063 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1064 		uint16_t nb_pkts)
1065 {
1066 	struct bond_dev_private *internals;
1067 	struct bond_tx_queue *bd_tx_q;
1068 
1069 	uint8_t tx_failed_flag = 0, num_of_slaves;
1070 	uint8_t slaves[RTE_MAX_ETHPORTS];
1071 
1072 	uint16_t max_nb_of_tx_pkts = 0;
1073 
1074 	int slave_tx_total[RTE_MAX_ETHPORTS];
1075 	int i, most_successful_tx_slave = -1;
1076 
1077 	bd_tx_q = (struct bond_tx_queue *)queue;
1078 	internals = bd_tx_q->dev_private;
1079 
1080 	/* Copy slave list to protect against slave up/down changes during tx
1081 	 * bursting */
1082 	num_of_slaves = internals->active_slave_count;
1083 	memcpy(slaves, internals->active_slaves,
1084 			sizeof(internals->active_slaves[0]) * num_of_slaves);
1085 
1086 	if (num_of_slaves < 1)
1087 		return 0;
1088 
1089 	/* Increment reference count on mbufs */
1090 	for (i = 0; i < nb_pkts; i++)
1091 		rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1092 
1093 	/* Transmit burst on each active slave */
1094 	for (i = 0; i < num_of_slaves; i++) {
1095 		slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1096 					bufs, nb_pkts);
1097 
1098 		if (unlikely(slave_tx_total[i] < nb_pkts))
1099 			tx_failed_flag = 1;
1100 
1101 		/* record the value and slave index for the slave which transmits the
1102 		 * maximum number of packets */
1103 		if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1104 			max_nb_of_tx_pkts = slave_tx_total[i];
1105 			most_successful_tx_slave = i;
1106 		}
1107 	}
1108 
1109 	/* if slaves fail to transmit packets from burst, the calling application
1110 	 * is not expected to know about multiple references to packets so we must
1111 	 * handle failures of all packets except those of the most successful slave
1112 	 */
1113 	if (unlikely(tx_failed_flag))
1114 		for (i = 0; i < num_of_slaves; i++)
1115 			if (i != most_successful_tx_slave)
1116 				while (slave_tx_total[i] < nb_pkts)
1117 					rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1118 
1119 	return max_nb_of_tx_pkts;
1120 }
1121 
1122 void
1123 link_properties_set(struct rte_eth_dev *bonded_eth_dev,
1124 		struct rte_eth_link *slave_dev_link)
1125 {
1126 	struct rte_eth_link *bonded_dev_link = &bonded_eth_dev->data->dev_link;
1127 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1128 
1129 	if (slave_dev_link->link_status &&
1130 		bonded_eth_dev->data->dev_started) {
1131 		bonded_dev_link->link_duplex = slave_dev_link->link_duplex;
1132 		bonded_dev_link->link_speed = slave_dev_link->link_speed;
1133 
1134 		internals->link_props_set = 1;
1135 	}
1136 }
1137 
1138 void
1139 link_properties_reset(struct rte_eth_dev *bonded_eth_dev)
1140 {
1141 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1142 
1143 	memset(&(bonded_eth_dev->data->dev_link), 0,
1144 			sizeof(bonded_eth_dev->data->dev_link));
1145 
1146 	internals->link_props_set = 0;
1147 }
1148 
1149 int
1150 link_properties_valid(struct rte_eth_link *bonded_dev_link,
1151 		struct rte_eth_link *slave_dev_link)
1152 {
1153 	if (bonded_dev_link->link_duplex != slave_dev_link->link_duplex ||
1154 		bonded_dev_link->link_speed !=  slave_dev_link->link_speed)
1155 		return -1;
1156 
1157 	return 0;
1158 }
1159 
1160 int
1161 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1162 {
1163 	struct ether_addr *mac_addr;
1164 
1165 	if (eth_dev == NULL) {
1166 		RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1167 		return -1;
1168 	}
1169 
1170 	if (dst_mac_addr == NULL) {
1171 		RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1172 		return -1;
1173 	}
1174 
1175 	mac_addr = eth_dev->data->mac_addrs;
1176 
1177 	ether_addr_copy(mac_addr, dst_mac_addr);
1178 	return 0;
1179 }
1180 
1181 int
1182 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1183 {
1184 	struct ether_addr *mac_addr;
1185 
1186 	if (eth_dev == NULL) {
1187 		RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1188 		return -1;
1189 	}
1190 
1191 	if (new_mac_addr == NULL) {
1192 		RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1193 		return -1;
1194 	}
1195 
1196 	mac_addr = eth_dev->data->mac_addrs;
1197 
1198 	/* If new MAC is different to current MAC then update */
1199 	if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1200 		memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1201 
1202 	return 0;
1203 }
1204 
1205 int
1206 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1207 {
1208 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1209 	int i;
1210 
1211 	/* Update slave devices MAC addresses */
1212 	if (internals->slave_count < 1)
1213 		return -1;
1214 
1215 	switch (internals->mode) {
1216 	case BONDING_MODE_ROUND_ROBIN:
1217 	case BONDING_MODE_BALANCE:
1218 	case BONDING_MODE_BROADCAST:
1219 		for (i = 0; i < internals->slave_count; i++) {
1220 			if (mac_address_set(&rte_eth_devices[internals->slaves[i].port_id],
1221 					bonded_eth_dev->data->mac_addrs)) {
1222 				RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1223 						internals->slaves[i].port_id);
1224 				return -1;
1225 			}
1226 		}
1227 		break;
1228 	case BONDING_MODE_8023AD:
1229 		bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1230 		break;
1231 	case BONDING_MODE_ACTIVE_BACKUP:
1232 	case BONDING_MODE_TLB:
1233 	case BONDING_MODE_ALB:
1234 	default:
1235 		for (i = 0; i < internals->slave_count; i++) {
1236 			if (internals->slaves[i].port_id ==
1237 					internals->current_primary_port) {
1238 				if (mac_address_set(&rte_eth_devices[internals->primary_port],
1239 						bonded_eth_dev->data->mac_addrs)) {
1240 					RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1241 							internals->current_primary_port);
1242 					return -1;
1243 				}
1244 			} else {
1245 				if (mac_address_set(
1246 						&rte_eth_devices[internals->slaves[i].port_id],
1247 						&internals->slaves[i].persisted_mac_addr)) {
1248 					RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1249 							internals->slaves[i].port_id);
1250 					return -1;
1251 				}
1252 			}
1253 		}
1254 	}
1255 
1256 	return 0;
1257 }
1258 
1259 int
1260 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1261 {
1262 	struct bond_dev_private *internals;
1263 
1264 	internals = eth_dev->data->dev_private;
1265 
1266 	switch (mode) {
1267 	case BONDING_MODE_ROUND_ROBIN:
1268 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1269 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1270 		break;
1271 	case BONDING_MODE_ACTIVE_BACKUP:
1272 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1273 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1274 		break;
1275 	case BONDING_MODE_BALANCE:
1276 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1277 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1278 		break;
1279 	case BONDING_MODE_BROADCAST:
1280 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1281 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1282 		break;
1283 	case BONDING_MODE_8023AD:
1284 		if (bond_mode_8023ad_enable(eth_dev) != 0)
1285 			return -1;
1286 
1287 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1288 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1289 		RTE_LOG(WARNING, PMD,
1290 				"Using mode 4, it is necessary to do TX burst and RX burst "
1291 				"at least every 100ms.\n");
1292 		break;
1293 	case BONDING_MODE_TLB:
1294 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1295 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1296 		break;
1297 	case BONDING_MODE_ALB:
1298 		if (bond_mode_alb_enable(eth_dev) != 0)
1299 			return -1;
1300 
1301 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1302 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1303 		break;
1304 	default:
1305 		return -1;
1306 	}
1307 
1308 	internals->mode = mode;
1309 
1310 	return 0;
1311 }
1312 
1313 int
1314 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1315 		struct rte_eth_dev *slave_eth_dev)
1316 {
1317 	struct bond_rx_queue *bd_rx_q;
1318 	struct bond_tx_queue *bd_tx_q;
1319 
1320 	int errval;
1321 	uint16_t q_id;
1322 
1323 	/* Stop slave */
1324 	rte_eth_dev_stop(slave_eth_dev->data->port_id);
1325 
1326 	/* Enable interrupts on slave device if supported */
1327 	if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1328 		slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1329 
1330 	/* If RSS is enabled for bonding, try to enable it for slaves  */
1331 	if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1332 		if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1333 				!= 0) {
1334 			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1335 					bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1336 			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1337 					bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1338 		} else {
1339 			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1340 		}
1341 
1342 		slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1343 				bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1344 		slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1345 				bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1346 	}
1347 
1348 	slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter =
1349 			bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter;
1350 
1351 	/* Configure device */
1352 	errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1353 			bonded_eth_dev->data->nb_rx_queues,
1354 			bonded_eth_dev->data->nb_tx_queues,
1355 			&(slave_eth_dev->data->dev_conf));
1356 	if (errval != 0) {
1357 		RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1358 				slave_eth_dev->data->port_id, errval);
1359 		return errval;
1360 	}
1361 
1362 	/* Setup Rx Queues */
1363 	for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1364 		bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1365 
1366 		errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1367 				bd_rx_q->nb_rx_desc,
1368 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1369 				&(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1370 		if (errval != 0) {
1371 			RTE_BOND_LOG(ERR,
1372 					"rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1373 					slave_eth_dev->data->port_id, q_id, errval);
1374 			return errval;
1375 		}
1376 	}
1377 
1378 	/* Setup Tx Queues */
1379 	for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1380 		bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1381 
1382 		errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1383 				bd_tx_q->nb_tx_desc,
1384 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1385 				&bd_tx_q->tx_conf);
1386 		if (errval != 0) {
1387 			RTE_BOND_LOG(ERR,
1388 					"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1389 					slave_eth_dev->data->port_id, q_id, errval);
1390 			return errval;
1391 		}
1392 	}
1393 
1394 	/* Start device */
1395 	errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1396 	if (errval != 0) {
1397 		RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1398 				slave_eth_dev->data->port_id, errval);
1399 		return -1;
1400 	}
1401 
1402 	/* If RSS is enabled for bonding, synchronize RETA */
1403 	if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1404 		int i;
1405 		struct bond_dev_private *internals;
1406 
1407 		internals = bonded_eth_dev->data->dev_private;
1408 
1409 		for (i = 0; i < internals->slave_count; i++) {
1410 			if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1411 				errval = rte_eth_dev_rss_reta_update(
1412 						slave_eth_dev->data->port_id,
1413 						&internals->reta_conf[0],
1414 						internals->slaves[i].reta_size);
1415 				if (errval != 0) {
1416 					RTE_LOG(WARNING, PMD,
1417 							"rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1418 							" RSS Configuration for bonding may be inconsistent.\n",
1419 							slave_eth_dev->data->port_id, errval);
1420 				}
1421 				break;
1422 			}
1423 		}
1424 	}
1425 
1426 	/* If lsc interrupt is set, check initial slave's link status */
1427 	if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1428 		bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1429 			RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id);
1430 
1431 	return 0;
1432 }
1433 
1434 void
1435 slave_remove(struct bond_dev_private *internals,
1436 		struct rte_eth_dev *slave_eth_dev)
1437 {
1438 	uint8_t i;
1439 
1440 	for (i = 0; i < internals->slave_count; i++)
1441 		if (internals->slaves[i].port_id ==
1442 				slave_eth_dev->data->port_id)
1443 			break;
1444 
1445 	if (i < (internals->slave_count - 1))
1446 		memmove(&internals->slaves[i], &internals->slaves[i + 1],
1447 				sizeof(internals->slaves[0]) *
1448 				(internals->slave_count - i - 1));
1449 
1450 	internals->slave_count--;
1451 
1452 	/* force reconfiguration of slave interfaces */
1453 	_rte_eth_dev_reset(slave_eth_dev);
1454 }
1455 
1456 static void
1457 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1458 
1459 void
1460 slave_add(struct bond_dev_private *internals,
1461 		struct rte_eth_dev *slave_eth_dev)
1462 {
1463 	struct bond_slave_details *slave_details =
1464 			&internals->slaves[internals->slave_count];
1465 
1466 	slave_details->port_id = slave_eth_dev->data->port_id;
1467 	slave_details->last_link_status = 0;
1468 
1469 	/* Mark slave devices that don't support interrupts so we can
1470 	 * compensate when we start the bond
1471 	 */
1472 	if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
1473 		slave_details->link_status_poll_enabled = 1;
1474 	}
1475 
1476 	slave_details->link_status_wait_to_complete = 0;
1477 	/* clean tlb_last_obytes when adding port for bonding device */
1478 	memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
1479 			sizeof(struct ether_addr));
1480 }
1481 
1482 void
1483 bond_ethdev_primary_set(struct bond_dev_private *internals,
1484 		uint8_t slave_port_id)
1485 {
1486 	int i;
1487 
1488 	if (internals->active_slave_count < 1)
1489 		internals->current_primary_port = slave_port_id;
1490 	else
1491 		/* Search bonded device slave ports for new proposed primary port */
1492 		for (i = 0; i < internals->active_slave_count; i++) {
1493 			if (internals->active_slaves[i] == slave_port_id)
1494 				internals->current_primary_port = slave_port_id;
1495 		}
1496 }
1497 
1498 static void
1499 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
1500 
1501 static int
1502 bond_ethdev_start(struct rte_eth_dev *eth_dev)
1503 {
1504 	struct bond_dev_private *internals;
1505 	int i;
1506 
1507 	/* slave eth dev will be started by bonded device */
1508 	if (check_for_bonded_ethdev(eth_dev)) {
1509 		RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
1510 				eth_dev->data->port_id);
1511 		return -1;
1512 	}
1513 
1514 	eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1515 	eth_dev->data->dev_started = 1;
1516 
1517 	internals = eth_dev->data->dev_private;
1518 
1519 	if (internals->slave_count == 0) {
1520 		RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
1521 		return -1;
1522 	}
1523 
1524 	if (internals->user_defined_mac == 0) {
1525 		struct ether_addr *new_mac_addr = NULL;
1526 
1527 		for (i = 0; i < internals->slave_count; i++)
1528 			if (internals->slaves[i].port_id == internals->primary_port)
1529 				new_mac_addr = &internals->slaves[i].persisted_mac_addr;
1530 
1531 		if (new_mac_addr == NULL)
1532 			return -1;
1533 
1534 		if (mac_address_set(eth_dev, new_mac_addr) != 0) {
1535 			RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
1536 					eth_dev->data->port_id);
1537 			return -1;
1538 		}
1539 	}
1540 
1541 	/* Update all slave devices MACs*/
1542 	if (mac_address_slaves_update(eth_dev) != 0)
1543 		return -1;
1544 
1545 	/* If bonded device is configure in promiscuous mode then re-apply config */
1546 	if (internals->promiscuous_en)
1547 		bond_ethdev_promiscuous_enable(eth_dev);
1548 
1549 	/* Reconfigure each slave device if starting bonded device */
1550 	for (i = 0; i < internals->slave_count; i++) {
1551 		if (slave_configure(eth_dev,
1552 				&(rte_eth_devices[internals->slaves[i].port_id])) != 0) {
1553 			RTE_BOND_LOG(ERR,
1554 					"bonded port (%d) failed to reconfigure slave device (%d)",
1555 					eth_dev->data->port_id, internals->slaves[i].port_id);
1556 			return -1;
1557 		}
1558 		/* We will need to poll for link status if any slave doesn't
1559 		 * support interrupts
1560 		 */
1561 		if (internals->slaves[i].link_status_poll_enabled)
1562 			internals->link_status_polling_enabled = 1;
1563 	}
1564 	/* start polling if needed */
1565 	if (internals->link_status_polling_enabled) {
1566 		rte_eal_alarm_set(
1567 			internals->link_status_polling_interval_ms * 1000,
1568 			bond_ethdev_slave_link_status_change_monitor,
1569 			(void *)&rte_eth_devices[internals->port_id]);
1570 	}
1571 
1572 	if (internals->user_defined_primary_port)
1573 		bond_ethdev_primary_set(internals, internals->primary_port);
1574 
1575 	if (internals->mode == BONDING_MODE_8023AD)
1576 		bond_mode_8023ad_start(eth_dev);
1577 
1578 	if (internals->mode == BONDING_MODE_TLB ||
1579 			internals->mode == BONDING_MODE_ALB)
1580 		bond_tlb_enable(internals);
1581 
1582 	return 0;
1583 }
1584 
1585 static void
1586 bond_ethdev_free_queues(struct rte_eth_dev *dev)
1587 {
1588 	uint8_t i;
1589 
1590 	if (dev->data->rx_queues != NULL) {
1591 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
1592 			rte_free(dev->data->rx_queues[i]);
1593 			dev->data->rx_queues[i] = NULL;
1594 		}
1595 		dev->data->nb_rx_queues = 0;
1596 	}
1597 
1598 	if (dev->data->tx_queues != NULL) {
1599 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
1600 			rte_free(dev->data->tx_queues[i]);
1601 			dev->data->tx_queues[i] = NULL;
1602 		}
1603 		dev->data->nb_tx_queues = 0;
1604 	}
1605 }
1606 
1607 void
1608 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
1609 {
1610 	struct bond_dev_private *internals = eth_dev->data->dev_private;
1611 	uint8_t i;
1612 
1613 	if (internals->mode == BONDING_MODE_8023AD) {
1614 		struct port *port;
1615 		void *pkt = NULL;
1616 
1617 		bond_mode_8023ad_stop(eth_dev);
1618 
1619 		/* Discard all messages to/from mode 4 state machines */
1620 		for (i = 0; i < internals->active_slave_count; i++) {
1621 			port = &mode_8023ad_ports[internals->active_slaves[i]];
1622 
1623 			RTE_ASSERT(port->rx_ring != NULL);
1624 			while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
1625 				rte_pktmbuf_free(pkt);
1626 
1627 			RTE_ASSERT(port->tx_ring != NULL);
1628 			while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
1629 				rte_pktmbuf_free(pkt);
1630 		}
1631 	}
1632 
1633 	if (internals->mode == BONDING_MODE_TLB ||
1634 			internals->mode == BONDING_MODE_ALB) {
1635 		bond_tlb_disable(internals);
1636 		for (i = 0; i < internals->active_slave_count; i++)
1637 			tlb_last_obytets[internals->active_slaves[i]] = 0;
1638 	}
1639 
1640 	internals->active_slave_count = 0;
1641 	internals->link_status_polling_enabled = 0;
1642 	for (i = 0; i < internals->slave_count; i++)
1643 		internals->slaves[i].last_link_status = 0;
1644 
1645 	eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1646 	eth_dev->data->dev_started = 0;
1647 }
1648 
1649 void
1650 bond_ethdev_close(struct rte_eth_dev *dev)
1651 {
1652 	struct bond_dev_private *internals = dev->data->dev_private;
1653 
1654 	bond_ethdev_free_queues(dev);
1655 	rte_bitmap_reset(internals->vlan_filter_bmp);
1656 }
1657 
1658 /* forward declaration */
1659 static int bond_ethdev_configure(struct rte_eth_dev *dev);
1660 
1661 static void
1662 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
1663 {
1664 	struct bond_dev_private *internals = dev->data->dev_private;
1665 
1666 	dev_info->max_mac_addrs = 1;
1667 
1668 	dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
1669 				  internals->candidate_max_rx_pktlen : 2048;
1670 
1671 	dev_info->max_rx_queues = (uint16_t)128;
1672 	dev_info->max_tx_queues = (uint16_t)512;
1673 
1674 	dev_info->min_rx_bufsize = 0;
1675 	dev_info->pci_dev = NULL;
1676 
1677 	dev_info->rx_offload_capa = internals->rx_offload_capa;
1678 	dev_info->tx_offload_capa = internals->tx_offload_capa;
1679 	dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
1680 
1681 	dev_info->reta_size = internals->reta_size;
1682 }
1683 
1684 static int
1685 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
1686 {
1687 	int res;
1688 	uint8_t i;
1689 	struct bond_dev_private *internals = dev->data->dev_private;
1690 
1691 	/* don't do this while a slave is being added */
1692 	rte_spinlock_lock(&internals->lock);
1693 
1694 	if (on)
1695 		rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
1696 	else
1697 		rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
1698 
1699 	for (i = 0; i < internals->slave_count; i++) {
1700 		uint8_t port_id = internals->slaves[i].port_id;
1701 
1702 		res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
1703 		if (res == ENOTSUP)
1704 			RTE_LOG(WARNING, PMD,
1705 				"Setting VLAN filter on slave port %u not supported.\n",
1706 				port_id);
1707 	}
1708 
1709 	rte_spinlock_unlock(&internals->lock);
1710 	return 0;
1711 }
1712 
1713 static int
1714 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1715 		uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
1716 		const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
1717 {
1718 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
1719 			rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
1720 					0, dev->data->numa_node);
1721 	if (bd_rx_q == NULL)
1722 		return -1;
1723 
1724 	bd_rx_q->queue_id = rx_queue_id;
1725 	bd_rx_q->dev_private = dev->data->dev_private;
1726 
1727 	bd_rx_q->nb_rx_desc = nb_rx_desc;
1728 
1729 	memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
1730 	bd_rx_q->mb_pool = mb_pool;
1731 
1732 	dev->data->rx_queues[rx_queue_id] = bd_rx_q;
1733 
1734 	return 0;
1735 }
1736 
1737 static int
1738 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1739 		uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
1740 		const struct rte_eth_txconf *tx_conf)
1741 {
1742 	struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
1743 			rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
1744 					0, dev->data->numa_node);
1745 
1746 	if (bd_tx_q == NULL)
1747 		return -1;
1748 
1749 	bd_tx_q->queue_id = tx_queue_id;
1750 	bd_tx_q->dev_private = dev->data->dev_private;
1751 
1752 	bd_tx_q->nb_tx_desc = nb_tx_desc;
1753 	memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
1754 
1755 	dev->data->tx_queues[tx_queue_id] = bd_tx_q;
1756 
1757 	return 0;
1758 }
1759 
1760 static void
1761 bond_ethdev_rx_queue_release(void *queue)
1762 {
1763 	if (queue == NULL)
1764 		return;
1765 
1766 	rte_free(queue);
1767 }
1768 
1769 static void
1770 bond_ethdev_tx_queue_release(void *queue)
1771 {
1772 	if (queue == NULL)
1773 		return;
1774 
1775 	rte_free(queue);
1776 }
1777 
1778 static void
1779 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
1780 {
1781 	struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
1782 	struct bond_dev_private *internals;
1783 
1784 	/* Default value for polling slave found is true as we don't want to
1785 	 * disable the polling thread if we cannot get the lock */
1786 	int i, polling_slave_found = 1;
1787 
1788 	if (cb_arg == NULL)
1789 		return;
1790 
1791 	bonded_ethdev = (struct rte_eth_dev *)cb_arg;
1792 	internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
1793 
1794 	if (!bonded_ethdev->data->dev_started ||
1795 		!internals->link_status_polling_enabled)
1796 		return;
1797 
1798 	/* If device is currently being configured then don't check slaves link
1799 	 * status, wait until next period */
1800 	if (rte_spinlock_trylock(&internals->lock)) {
1801 		if (internals->slave_count > 0)
1802 			polling_slave_found = 0;
1803 
1804 		for (i = 0; i < internals->slave_count; i++) {
1805 			if (!internals->slaves[i].link_status_poll_enabled)
1806 				continue;
1807 
1808 			slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
1809 			polling_slave_found = 1;
1810 
1811 			/* Update slave link status */
1812 			(*slave_ethdev->dev_ops->link_update)(slave_ethdev,
1813 					internals->slaves[i].link_status_wait_to_complete);
1814 
1815 			/* if link status has changed since last checked then call lsc
1816 			 * event callback */
1817 			if (slave_ethdev->data->dev_link.link_status !=
1818 					internals->slaves[i].last_link_status) {
1819 				internals->slaves[i].last_link_status =
1820 						slave_ethdev->data->dev_link.link_status;
1821 
1822 				bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
1823 						RTE_ETH_EVENT_INTR_LSC,
1824 						&bonded_ethdev->data->port_id);
1825 			}
1826 		}
1827 		rte_spinlock_unlock(&internals->lock);
1828 	}
1829 
1830 	if (polling_slave_found)
1831 		/* Set alarm to continue monitoring link status of slave ethdev's */
1832 		rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
1833 				bond_ethdev_slave_link_status_change_monitor, cb_arg);
1834 }
1835 
1836 static int
1837 bond_ethdev_link_update(struct rte_eth_dev *bonded_eth_dev,
1838 		int wait_to_complete)
1839 {
1840 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1841 
1842 	if (!bonded_eth_dev->data->dev_started ||
1843 		internals->active_slave_count == 0) {
1844 		bonded_eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
1845 		return 0;
1846 	} else {
1847 		struct rte_eth_dev *slave_eth_dev;
1848 		int i, link_up = 0;
1849 
1850 		for (i = 0; i < internals->active_slave_count; i++) {
1851 			slave_eth_dev = &rte_eth_devices[internals->active_slaves[i]];
1852 
1853 			(*slave_eth_dev->dev_ops->link_update)(slave_eth_dev,
1854 					wait_to_complete);
1855 			if (slave_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
1856 				link_up = 1;
1857 				break;
1858 			}
1859 		}
1860 
1861 		bonded_eth_dev->data->dev_link.link_status = link_up;
1862 	}
1863 
1864 	return 0;
1865 }
1866 
1867 static void
1868 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1869 {
1870 	struct bond_dev_private *internals = dev->data->dev_private;
1871 	struct rte_eth_stats slave_stats;
1872 	int i, j;
1873 
1874 	for (i = 0; i < internals->slave_count; i++) {
1875 		rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
1876 
1877 		stats->ipackets += slave_stats.ipackets;
1878 		stats->opackets += slave_stats.opackets;
1879 		stats->ibytes += slave_stats.ibytes;
1880 		stats->obytes += slave_stats.obytes;
1881 		stats->imissed += slave_stats.imissed;
1882 		stats->ierrors += slave_stats.ierrors;
1883 		stats->oerrors += slave_stats.oerrors;
1884 		stats->rx_nombuf += slave_stats.rx_nombuf;
1885 
1886 		for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
1887 			stats->q_ipackets[j] += slave_stats.q_ipackets[j];
1888 			stats->q_opackets[j] += slave_stats.q_opackets[j];
1889 			stats->q_ibytes[j] += slave_stats.q_ibytes[j];
1890 			stats->q_obytes[j] += slave_stats.q_obytes[j];
1891 			stats->q_errors[j] += slave_stats.q_errors[j];
1892 		}
1893 
1894 	}
1895 }
1896 
1897 static void
1898 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
1899 {
1900 	struct bond_dev_private *internals = dev->data->dev_private;
1901 	int i;
1902 
1903 	for (i = 0; i < internals->slave_count; i++)
1904 		rte_eth_stats_reset(internals->slaves[i].port_id);
1905 }
1906 
1907 static void
1908 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
1909 {
1910 	struct bond_dev_private *internals = eth_dev->data->dev_private;
1911 	int i;
1912 
1913 	internals->promiscuous_en = 1;
1914 
1915 	switch (internals->mode) {
1916 	/* Promiscuous mode is propagated to all slaves */
1917 	case BONDING_MODE_ROUND_ROBIN:
1918 	case BONDING_MODE_BALANCE:
1919 	case BONDING_MODE_BROADCAST:
1920 		for (i = 0; i < internals->slave_count; i++)
1921 			rte_eth_promiscuous_enable(internals->slaves[i].port_id);
1922 		break;
1923 	/* In mode4 promiscus mode is managed when slave is added/removed */
1924 	case BONDING_MODE_8023AD:
1925 		break;
1926 	/* Promiscuous mode is propagated only to primary slave */
1927 	case BONDING_MODE_ACTIVE_BACKUP:
1928 	case BONDING_MODE_TLB:
1929 	case BONDING_MODE_ALB:
1930 	default:
1931 		rte_eth_promiscuous_enable(internals->current_primary_port);
1932 	}
1933 }
1934 
1935 static void
1936 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
1937 {
1938 	struct bond_dev_private *internals = dev->data->dev_private;
1939 	int i;
1940 
1941 	internals->promiscuous_en = 0;
1942 
1943 	switch (internals->mode) {
1944 	/* Promiscuous mode is propagated to all slaves */
1945 	case BONDING_MODE_ROUND_ROBIN:
1946 	case BONDING_MODE_BALANCE:
1947 	case BONDING_MODE_BROADCAST:
1948 		for (i = 0; i < internals->slave_count; i++)
1949 			rte_eth_promiscuous_disable(internals->slaves[i].port_id);
1950 		break;
1951 	/* In mode4 promiscus mode is set managed when slave is added/removed */
1952 	case BONDING_MODE_8023AD:
1953 		break;
1954 	/* Promiscuous mode is propagated only to primary slave */
1955 	case BONDING_MODE_ACTIVE_BACKUP:
1956 	case BONDING_MODE_TLB:
1957 	case BONDING_MODE_ALB:
1958 	default:
1959 		rte_eth_promiscuous_disable(internals->current_primary_port);
1960 	}
1961 }
1962 
1963 static void
1964 bond_ethdev_delayed_lsc_propagation(void *arg)
1965 {
1966 	if (arg == NULL)
1967 		return;
1968 
1969 	_rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
1970 			RTE_ETH_EVENT_INTR_LSC, NULL);
1971 }
1972 
1973 void
1974 bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
1975 		void *param)
1976 {
1977 	struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
1978 	struct bond_dev_private *internals;
1979 	struct rte_eth_link link;
1980 
1981 	int i, valid_slave = 0;
1982 	uint8_t active_pos;
1983 	uint8_t lsc_flag = 0;
1984 
1985 	if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
1986 		return;
1987 
1988 	bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
1989 	slave_eth_dev = &rte_eth_devices[port_id];
1990 
1991 	if (check_for_bonded_ethdev(bonded_eth_dev))
1992 		return;
1993 
1994 	internals = bonded_eth_dev->data->dev_private;
1995 
1996 	/* If the device isn't started don't handle interrupts */
1997 	if (!bonded_eth_dev->data->dev_started)
1998 		return;
1999 
2000 	/* verify that port_id is a valid slave of bonded port */
2001 	for (i = 0; i < internals->slave_count; i++) {
2002 		if (internals->slaves[i].port_id == port_id) {
2003 			valid_slave = 1;
2004 			break;
2005 		}
2006 	}
2007 
2008 	if (!valid_slave)
2009 		return;
2010 
2011 	/* Search for port in active port list */
2012 	active_pos = find_slave_by_id(internals->active_slaves,
2013 			internals->active_slave_count, port_id);
2014 
2015 	rte_eth_link_get_nowait(port_id, &link);
2016 	if (link.link_status) {
2017 		if (active_pos < internals->active_slave_count)
2018 			return;
2019 
2020 		/* if no active slave ports then set this port to be primary port */
2021 		if (internals->active_slave_count < 1) {
2022 			/* If first active slave, then change link status */
2023 			bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2024 			internals->current_primary_port = port_id;
2025 			lsc_flag = 1;
2026 
2027 			mac_address_slaves_update(bonded_eth_dev);
2028 
2029 			/* Inherit eth dev link properties from first active slave */
2030 			link_properties_set(bonded_eth_dev,
2031 					&(slave_eth_dev->data->dev_link));
2032 		} else {
2033 			if (link_properties_valid(
2034 				&bonded_eth_dev->data->dev_link, &link) != 0) {
2035 				slave_eth_dev->data->dev_flags &=
2036 					(~RTE_ETH_DEV_BONDED_SLAVE);
2037 				RTE_LOG(ERR, PMD,
2038 					"port %u invalid speed/duplex\n",
2039 					port_id);
2040 				return;
2041 			}
2042 		}
2043 
2044 		activate_slave(bonded_eth_dev, port_id);
2045 
2046 		/* If user has defined the primary port then default to using it */
2047 		if (internals->user_defined_primary_port &&
2048 				internals->primary_port == port_id)
2049 			bond_ethdev_primary_set(internals, port_id);
2050 	} else {
2051 		if (active_pos == internals->active_slave_count)
2052 			return;
2053 
2054 		/* Remove from active slave list */
2055 		deactivate_slave(bonded_eth_dev, port_id);
2056 
2057 		/* No active slaves, change link status to down and reset other
2058 		 * link properties */
2059 		if (internals->active_slave_count < 1) {
2060 			lsc_flag = 1;
2061 			bonded_eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2062 
2063 			link_properties_reset(bonded_eth_dev);
2064 		}
2065 
2066 		/* Update primary id, take first active slave from list or if none
2067 		 * available set to -1 */
2068 		if (port_id == internals->current_primary_port) {
2069 			if (internals->active_slave_count > 0)
2070 				bond_ethdev_primary_set(internals,
2071 						internals->active_slaves[0]);
2072 			else
2073 				internals->current_primary_port = internals->primary_port;
2074 		}
2075 	}
2076 
2077 	if (lsc_flag) {
2078 		/* Cancel any possible outstanding interrupts if delays are enabled */
2079 		if (internals->link_up_delay_ms > 0 ||
2080 			internals->link_down_delay_ms > 0)
2081 			rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2082 					bonded_eth_dev);
2083 
2084 		if (bonded_eth_dev->data->dev_link.link_status) {
2085 			if (internals->link_up_delay_ms > 0)
2086 				rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2087 						bond_ethdev_delayed_lsc_propagation,
2088 						(void *)bonded_eth_dev);
2089 			else
2090 				_rte_eth_dev_callback_process(bonded_eth_dev,
2091 						RTE_ETH_EVENT_INTR_LSC, NULL);
2092 
2093 		} else {
2094 			if (internals->link_down_delay_ms > 0)
2095 				rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2096 						bond_ethdev_delayed_lsc_propagation,
2097 						(void *)bonded_eth_dev);
2098 			else
2099 				_rte_eth_dev_callback_process(bonded_eth_dev,
2100 						RTE_ETH_EVENT_INTR_LSC, NULL);
2101 		}
2102 	}
2103 }
2104 
2105 static int
2106 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2107 		struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2108 {
2109 	unsigned i, j;
2110 	int result = 0;
2111 	int slave_reta_size;
2112 	unsigned reta_count;
2113 	struct bond_dev_private *internals = dev->data->dev_private;
2114 
2115 	if (reta_size != internals->reta_size)
2116 		return -EINVAL;
2117 
2118 	 /* Copy RETA table */
2119 	reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2120 
2121 	for (i = 0; i < reta_count; i++) {
2122 		internals->reta_conf[i].mask = reta_conf[i].mask;
2123 		for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2124 			if ((reta_conf[i].mask >> j) & 0x01)
2125 				internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2126 	}
2127 
2128 	/* Fill rest of array */
2129 	for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2130 		memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2131 				sizeof(internals->reta_conf[0]) * reta_count);
2132 
2133 	/* Propagate RETA over slaves */
2134 	for (i = 0; i < internals->slave_count; i++) {
2135 		slave_reta_size = internals->slaves[i].reta_size;
2136 		result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2137 				&internals->reta_conf[0], slave_reta_size);
2138 		if (result < 0)
2139 			return result;
2140 	}
2141 
2142 	return 0;
2143 }
2144 
2145 static int
2146 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2147 		struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2148 {
2149 	int i, j;
2150 	struct bond_dev_private *internals = dev->data->dev_private;
2151 
2152 	if (reta_size != internals->reta_size)
2153 		return -EINVAL;
2154 
2155 	 /* Copy RETA table */
2156 	for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2157 		for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2158 			if ((reta_conf[i].mask >> j) & 0x01)
2159 				reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2160 
2161 	return 0;
2162 }
2163 
2164 static int
2165 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2166 		struct rte_eth_rss_conf *rss_conf)
2167 {
2168 	int i, result = 0;
2169 	struct bond_dev_private *internals = dev->data->dev_private;
2170 	struct rte_eth_rss_conf bond_rss_conf;
2171 
2172 	memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2173 
2174 	bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2175 
2176 	if (bond_rss_conf.rss_hf != 0)
2177 		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2178 
2179 	if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2180 			sizeof(internals->rss_key)) {
2181 		if (bond_rss_conf.rss_key_len == 0)
2182 			bond_rss_conf.rss_key_len = 40;
2183 		internals->rss_key_len = bond_rss_conf.rss_key_len;
2184 		memcpy(internals->rss_key, bond_rss_conf.rss_key,
2185 				internals->rss_key_len);
2186 	}
2187 
2188 	for (i = 0; i < internals->slave_count; i++) {
2189 		result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2190 				&bond_rss_conf);
2191 		if (result < 0)
2192 			return result;
2193 	}
2194 
2195 	return 0;
2196 }
2197 
2198 static int
2199 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2200 		struct rte_eth_rss_conf *rss_conf)
2201 {
2202 	struct bond_dev_private *internals = dev->data->dev_private;
2203 
2204 	rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2205 	rss_conf->rss_key_len = internals->rss_key_len;
2206 	if (rss_conf->rss_key)
2207 		memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2208 
2209 	return 0;
2210 }
2211 
2212 const struct eth_dev_ops default_dev_ops = {
2213 	.dev_start            = bond_ethdev_start,
2214 	.dev_stop             = bond_ethdev_stop,
2215 	.dev_close            = bond_ethdev_close,
2216 	.dev_configure        = bond_ethdev_configure,
2217 	.dev_infos_get        = bond_ethdev_info,
2218 	.vlan_filter_set      = bond_ethdev_vlan_filter_set,
2219 	.rx_queue_setup       = bond_ethdev_rx_queue_setup,
2220 	.tx_queue_setup       = bond_ethdev_tx_queue_setup,
2221 	.rx_queue_release     = bond_ethdev_rx_queue_release,
2222 	.tx_queue_release     = bond_ethdev_tx_queue_release,
2223 	.link_update          = bond_ethdev_link_update,
2224 	.stats_get            = bond_ethdev_stats_get,
2225 	.stats_reset          = bond_ethdev_stats_reset,
2226 	.promiscuous_enable   = bond_ethdev_promiscuous_enable,
2227 	.promiscuous_disable  = bond_ethdev_promiscuous_disable,
2228 	.reta_update          = bond_ethdev_rss_reta_update,
2229 	.reta_query           = bond_ethdev_rss_reta_query,
2230 	.rss_hash_update      = bond_ethdev_rss_hash_update,
2231 	.rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
2232 };
2233 
2234 static int
2235 bond_probe(const char *name, const char *params)
2236 {
2237 	struct bond_dev_private *internals;
2238 	struct rte_kvargs *kvlist;
2239 	uint8_t bonding_mode, socket_id;
2240 	int  arg_count, port_id;
2241 
2242 	RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
2243 
2244 	kvlist = rte_kvargs_parse(params, pmd_bond_init_valid_arguments);
2245 	if (kvlist == NULL)
2246 		return -1;
2247 
2248 	/* Parse link bonding mode */
2249 	if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
2250 		if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
2251 				&bond_ethdev_parse_slave_mode_kvarg,
2252 				&bonding_mode) != 0) {
2253 			RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
2254 					name);
2255 			goto parse_error;
2256 		}
2257 	} else {
2258 		RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
2259 				"device %s\n", name);
2260 		goto parse_error;
2261 	}
2262 
2263 	/* Parse socket id to create bonding device on */
2264 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
2265 	if (arg_count == 1) {
2266 		if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
2267 				&bond_ethdev_parse_socket_id_kvarg, &socket_id)
2268 				!= 0) {
2269 			RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
2270 					"bonded device %s\n", name);
2271 			goto parse_error;
2272 		}
2273 	} else if (arg_count > 1) {
2274 		RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
2275 				"bonded device %s\n", name);
2276 		goto parse_error;
2277 	} else {
2278 		socket_id = rte_socket_id();
2279 	}
2280 
2281 	/* Create link bonding eth device */
2282 	port_id = rte_eth_bond_create(name, bonding_mode, socket_id);
2283 	if (port_id < 0) {
2284 		RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
2285 				"socket %u.\n",	name, bonding_mode, socket_id);
2286 		goto parse_error;
2287 	}
2288 	internals = rte_eth_devices[port_id].data->dev_private;
2289 	internals->kvlist = kvlist;
2290 
2291 	RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
2292 			"socket %u.\n",	name, port_id, bonding_mode, socket_id);
2293 	return 0;
2294 
2295 parse_error:
2296 	rte_kvargs_free(kvlist);
2297 
2298 	return -1;
2299 }
2300 
2301 static int
2302 bond_remove(const char *name)
2303 {
2304 	int  ret;
2305 
2306 	if (name == NULL)
2307 		return -EINVAL;
2308 
2309 	RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
2310 
2311 	/* free link bonding eth device */
2312 	ret = rte_eth_bond_free(name);
2313 	if (ret < 0)
2314 		RTE_LOG(ERR, EAL, "Failed to free %s\n", name);
2315 
2316 	return ret;
2317 }
2318 
2319 /* this part will resolve the slave portids after all the other pdev and vdev
2320  * have been allocated */
2321 static int
2322 bond_ethdev_configure(struct rte_eth_dev *dev)
2323 {
2324 	char *name = dev->data->name;
2325 	struct bond_dev_private *internals = dev->data->dev_private;
2326 	struct rte_kvargs *kvlist = internals->kvlist;
2327 	int arg_count;
2328 	uint8_t port_id = dev - rte_eth_devices;
2329 
2330 	static const uint8_t default_rss_key[40] = {
2331 		0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
2332 		0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
2333 		0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
2334 		0xBE, 0xAC, 0x01, 0xFA
2335 	};
2336 
2337 	unsigned i, j;
2338 
2339 	/* If RSS is enabled, fill table and key with default values */
2340 	if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
2341 		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
2342 		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
2343 		memcpy(internals->rss_key, default_rss_key, 40);
2344 
2345 		for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
2346 			internals->reta_conf[i].mask = ~0LL;
2347 			for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2348 				internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
2349 		}
2350 	}
2351 
2352 	/* set the max_rx_pktlen */
2353 	internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
2354 
2355 	/*
2356 	 * if no kvlist, it means that this bonded device has been created
2357 	 * through the bonding api.
2358 	 */
2359 	if (!kvlist)
2360 		return 0;
2361 
2362 	/* Parse MAC address for bonded device */
2363 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
2364 	if (arg_count == 1) {
2365 		struct ether_addr bond_mac;
2366 
2367 		if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
2368 				&bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
2369 			RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
2370 					name);
2371 			return -1;
2372 		}
2373 
2374 		/* Set MAC address */
2375 		if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
2376 			RTE_LOG(ERR, EAL,
2377 					"Failed to set mac address on bonded device %s\n",
2378 					name);
2379 			return -1;
2380 		}
2381 	} else if (arg_count > 1) {
2382 		RTE_LOG(ERR, EAL,
2383 				"MAC address can be specified only once for bonded device %s\n",
2384 				name);
2385 		return -1;
2386 	}
2387 
2388 	/* Parse/set balance mode transmit policy */
2389 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
2390 	if (arg_count == 1) {
2391 		uint8_t xmit_policy;
2392 
2393 		if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
2394 				&bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
2395 						0) {
2396 			RTE_LOG(INFO, EAL,
2397 					"Invalid xmit policy specified for bonded device %s\n",
2398 					name);
2399 			return -1;
2400 		}
2401 
2402 		/* Set balance mode transmit policy*/
2403 		if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
2404 			RTE_LOG(ERR, EAL,
2405 					"Failed to set balance xmit policy on bonded device %s\n",
2406 					name);
2407 			return -1;
2408 		}
2409 	} else if (arg_count > 1) {
2410 		RTE_LOG(ERR, EAL,
2411 				"Transmit policy can be specified only once for bonded device"
2412 				" %s\n", name);
2413 		return -1;
2414 	}
2415 
2416 	/* Parse/add slave ports to bonded device */
2417 	if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
2418 		struct bond_ethdev_slave_ports slave_ports;
2419 		unsigned i;
2420 
2421 		memset(&slave_ports, 0, sizeof(slave_ports));
2422 
2423 		if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
2424 				&bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
2425 			RTE_LOG(ERR, EAL,
2426 					"Failed to parse slave ports for bonded device %s\n",
2427 					name);
2428 			return -1;
2429 		}
2430 
2431 		for (i = 0; i < slave_ports.slave_count; i++) {
2432 			if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
2433 				RTE_LOG(ERR, EAL,
2434 						"Failed to add port %d as slave to bonded device %s\n",
2435 						slave_ports.slaves[i], name);
2436 			}
2437 		}
2438 
2439 	} else {
2440 		RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
2441 		return -1;
2442 	}
2443 
2444 	/* Parse/set primary slave port id*/
2445 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
2446 	if (arg_count == 1) {
2447 		uint8_t primary_slave_port_id;
2448 
2449 		if (rte_kvargs_process(kvlist,
2450 				PMD_BOND_PRIMARY_SLAVE_KVARG,
2451 				&bond_ethdev_parse_primary_slave_port_id_kvarg,
2452 				&primary_slave_port_id) < 0) {
2453 			RTE_LOG(INFO, EAL,
2454 					"Invalid primary slave port id specified for bonded device"
2455 					" %s\n", name);
2456 			return -1;
2457 		}
2458 
2459 		/* Set balance mode transmit policy*/
2460 		if (rte_eth_bond_primary_set(port_id, (uint8_t)primary_slave_port_id)
2461 				!= 0) {
2462 			RTE_LOG(ERR, EAL,
2463 					"Failed to set primary slave port %d on bonded device %s\n",
2464 					primary_slave_port_id, name);
2465 			return -1;
2466 		}
2467 	} else if (arg_count > 1) {
2468 		RTE_LOG(INFO, EAL,
2469 				"Primary slave can be specified only once for bonded device"
2470 				" %s\n", name);
2471 		return -1;
2472 	}
2473 
2474 	/* Parse link status monitor polling interval */
2475 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
2476 	if (arg_count == 1) {
2477 		uint32_t lsc_poll_interval_ms;
2478 
2479 		if (rte_kvargs_process(kvlist,
2480 				PMD_BOND_LSC_POLL_PERIOD_KVARG,
2481 				&bond_ethdev_parse_time_ms_kvarg,
2482 				&lsc_poll_interval_ms) < 0) {
2483 			RTE_LOG(INFO, EAL,
2484 					"Invalid lsc polling interval value specified for bonded"
2485 					" device %s\n", name);
2486 			return -1;
2487 		}
2488 
2489 		if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
2490 				!= 0) {
2491 			RTE_LOG(ERR, EAL,
2492 					"Failed to set lsc monitor polling interval (%u ms) on"
2493 					" bonded device %s\n", lsc_poll_interval_ms, name);
2494 			return -1;
2495 		}
2496 	} else if (arg_count > 1) {
2497 		RTE_LOG(INFO, EAL,
2498 				"LSC polling interval can be specified only once for bonded"
2499 				" device %s\n", name);
2500 		return -1;
2501 	}
2502 
2503 	/* Parse link up interrupt propagation delay */
2504 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
2505 	if (arg_count == 1) {
2506 		uint32_t link_up_delay_ms;
2507 
2508 		if (rte_kvargs_process(kvlist,
2509 				PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
2510 				&bond_ethdev_parse_time_ms_kvarg,
2511 				&link_up_delay_ms) < 0) {
2512 			RTE_LOG(INFO, EAL,
2513 					"Invalid link up propagation delay value specified for"
2514 					" bonded device %s\n", name);
2515 			return -1;
2516 		}
2517 
2518 		/* Set balance mode transmit policy*/
2519 		if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
2520 				!= 0) {
2521 			RTE_LOG(ERR, EAL,
2522 					"Failed to set link up propagation delay (%u ms) on bonded"
2523 					" device %s\n", link_up_delay_ms, name);
2524 			return -1;
2525 		}
2526 	} else if (arg_count > 1) {
2527 		RTE_LOG(INFO, EAL,
2528 				"Link up propagation delay can be specified only once for"
2529 				" bonded device %s\n", name);
2530 		return -1;
2531 	}
2532 
2533 	/* Parse link down interrupt propagation delay */
2534 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
2535 	if (arg_count == 1) {
2536 		uint32_t link_down_delay_ms;
2537 
2538 		if (rte_kvargs_process(kvlist,
2539 				PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
2540 				&bond_ethdev_parse_time_ms_kvarg,
2541 				&link_down_delay_ms) < 0) {
2542 			RTE_LOG(INFO, EAL,
2543 					"Invalid link down propagation delay value specified for"
2544 					" bonded device %s\n", name);
2545 			return -1;
2546 		}
2547 
2548 		/* Set balance mode transmit policy*/
2549 		if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
2550 				!= 0) {
2551 			RTE_LOG(ERR, EAL,
2552 					"Failed to set link down propagation delay (%u ms) on"
2553 					" bonded device %s\n", link_down_delay_ms, name);
2554 			return -1;
2555 		}
2556 	} else if (arg_count > 1) {
2557 		RTE_LOG(INFO, EAL,
2558 				"Link down propagation delay can be specified only once for"
2559 				" bonded device %s\n", name);
2560 		return -1;
2561 	}
2562 
2563 	return 0;
2564 }
2565 
2566 static struct rte_vdev_driver bond_drv = {
2567 	.probe = bond_probe,
2568 	.remove = bond_remove,
2569 };
2570 
2571 RTE_PMD_REGISTER_VDEV(net_bonding, bond_drv);
2572 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
2573 
2574 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
2575 	"slave=<ifc> "
2576 	"primary=<ifc> "
2577 	"mode=[0-6] "
2578 	"xmit_policy=[l2 | l23 | l34] "
2579 	"socket_id=<int> "
2580 	"mac=<mac addr> "
2581 	"lsc_poll_period_ms=<int> "
2582 	"up_delay=<int> "
2583 	"down_delay=<int>");
2584