xref: /dpdk/drivers/net/bonding/rte_eth_bond_pmd.c (revision 08e0b3440baffc20a9317a5e3dc83c6de5d39c24)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2017 Intel Corporation
3  */
4 #include <stdlib.h>
5 #include <netinet/in.h>
6 
7 #include <rte_mbuf.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_ethdev_vdev.h>
11 #include <rte_tcp.h>
12 #include <rte_udp.h>
13 #include <rte_ip.h>
14 #include <rte_ip_frag.h>
15 #include <rte_devargs.h>
16 #include <rte_kvargs.h>
17 #include <rte_bus_vdev.h>
18 #include <rte_alarm.h>
19 #include <rte_cycles.h>
20 #include <rte_string_fns.h>
21 
22 #include "rte_eth_bond.h"
23 #include "rte_eth_bond_private.h"
24 #include "rte_eth_bond_8023ad_private.h"
25 
26 #define REORDER_PERIOD_MS 10
27 #define DEFAULT_POLLING_INTERVAL_10_MS (10)
28 
29 #define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
30 
31 /* Table for statistics in mode 5 TLB */
32 static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
33 
34 static inline size_t
35 get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
36 {
37 	size_t vlan_offset = 0;
38 
39 	if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
40 		struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
41 
42 		vlan_offset = sizeof(struct vlan_hdr);
43 		*proto = vlan_hdr->eth_proto;
44 
45 		if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
46 			vlan_hdr = vlan_hdr + 1;
47 			*proto = vlan_hdr->eth_proto;
48 			vlan_offset += sizeof(struct vlan_hdr);
49 		}
50 	}
51 	return vlan_offset;
52 }
53 
54 static uint16_t
55 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
56 {
57 	struct bond_dev_private *internals;
58 
59 	uint16_t num_rx_slave = 0;
60 	uint16_t num_rx_total = 0;
61 
62 	int i;
63 
64 	/* Cast to structure, containing bonded device's port id and queue id */
65 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
66 
67 	internals = bd_rx_q->dev_private;
68 
69 
70 	for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
71 		/* Offset of pointer to *bufs increases as packets are received
72 		 * from other slaves */
73 		num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
74 				bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
75 		if (num_rx_slave) {
76 			num_rx_total += num_rx_slave;
77 			nb_pkts -= num_rx_slave;
78 		}
79 	}
80 
81 	return num_rx_total;
82 }
83 
84 static uint16_t
85 bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
86 		uint16_t nb_pkts)
87 {
88 	struct bond_dev_private *internals;
89 
90 	/* Cast to structure, containing bonded device's port id and queue id */
91 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
92 
93 	internals = bd_rx_q->dev_private;
94 
95 	return rte_eth_rx_burst(internals->current_primary_port,
96 			bd_rx_q->queue_id, bufs, nb_pkts);
97 }
98 
99 static inline uint8_t
100 is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
101 {
102 	const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
103 
104 	return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
105 		(ethertype == ether_type_slow_be &&
106 		(subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
107 }
108 
109 /*****************************************************************************
110  * Flow director's setup for mode 4 optimization
111  */
112 
113 static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
114 	.dst.addr_bytes = { 0 },
115 	.src.addr_bytes = { 0 },
116 	.type = RTE_BE16(ETHER_TYPE_SLOW),
117 };
118 
119 static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
120 	.dst.addr_bytes = { 0 },
121 	.src.addr_bytes = { 0 },
122 	.type = 0xFFFF,
123 };
124 
125 static struct rte_flow_item flow_item_8023ad[] = {
126 	{
127 		.type = RTE_FLOW_ITEM_TYPE_ETH,
128 		.spec = &flow_item_eth_type_8023ad,
129 		.last = NULL,
130 		.mask = &flow_item_eth_mask_type_8023ad,
131 	},
132 	{
133 		.type = RTE_FLOW_ITEM_TYPE_END,
134 		.spec = NULL,
135 		.last = NULL,
136 		.mask = NULL,
137 	}
138 };
139 
140 const struct rte_flow_attr flow_attr_8023ad = {
141 	.group = 0,
142 	.priority = 0,
143 	.ingress = 1,
144 	.egress = 0,
145 	.reserved = 0,
146 };
147 
148 int
149 bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
150 		uint16_t slave_port) {
151 	struct rte_eth_dev_info slave_info;
152 	struct rte_flow_error error;
153 	struct bond_dev_private *internals = (struct bond_dev_private *)
154 			(bond_dev->data->dev_private);
155 
156 	const struct rte_flow_action_queue lacp_queue_conf = {
157 		.index = 0,
158 	};
159 
160 	const struct rte_flow_action actions[] = {
161 		{
162 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
163 			.conf = &lacp_queue_conf
164 		},
165 		{
166 			.type = RTE_FLOW_ACTION_TYPE_END,
167 		}
168 	};
169 
170 	int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
171 			flow_item_8023ad, actions, &error);
172 	if (ret < 0) {
173 		RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
174 				__func__, error.message, slave_port,
175 				internals->mode4.dedicated_queues.rx_qid);
176 		return -1;
177 	}
178 
179 	rte_eth_dev_info_get(slave_port, &slave_info);
180 	if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
181 			slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
182 		RTE_BOND_LOG(ERR,
183 			"%s: Slave %d capabilities doesn't allow to allocate additional queues",
184 			__func__, slave_port);
185 		return -1;
186 	}
187 
188 	return 0;
189 }
190 
191 int
192 bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
193 	struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
194 	struct bond_dev_private *internals = (struct bond_dev_private *)
195 			(bond_dev->data->dev_private);
196 	struct rte_eth_dev_info bond_info;
197 	uint16_t idx;
198 
199 	/* Verify if all slaves in bonding supports flow director and */
200 	if (internals->slave_count > 0) {
201 		rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
202 
203 		internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
204 		internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
205 
206 		for (idx = 0; idx < internals->slave_count; idx++) {
207 			if (bond_ethdev_8023ad_flow_verify(bond_dev,
208 					internals->slaves[idx].port_id) != 0)
209 				return -1;
210 		}
211 	}
212 
213 	return 0;
214 }
215 
216 int
217 bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
218 
219 	struct rte_flow_error error;
220 	struct bond_dev_private *internals = (struct bond_dev_private *)
221 			(bond_dev->data->dev_private);
222 
223 	struct rte_flow_action_queue lacp_queue_conf = {
224 		.index = internals->mode4.dedicated_queues.rx_qid,
225 	};
226 
227 	const struct rte_flow_action actions[] = {
228 		{
229 			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
230 			.conf = &lacp_queue_conf
231 		},
232 		{
233 			.type = RTE_FLOW_ACTION_TYPE_END,
234 		}
235 	};
236 
237 	internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
238 			&flow_attr_8023ad, flow_item_8023ad, actions, &error);
239 	if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
240 		RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
241 				"(slave_port=%d queue_id=%d)",
242 				error.message, slave_port,
243 				internals->mode4.dedicated_queues.rx_qid);
244 		return -1;
245 	}
246 
247 	return 0;
248 }
249 
250 static uint16_t
251 bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
252 		uint16_t nb_pkts)
253 {
254 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
255 	struct bond_dev_private *internals = bd_rx_q->dev_private;
256 	uint16_t num_rx_total = 0;	/* Total number of received packets */
257 	uint16_t slaves[RTE_MAX_ETHPORTS];
258 	uint16_t slave_count;
259 
260 	uint16_t i, idx;
261 
262 	/* Copy slave list to protect against slave up/down changes during tx
263 	 * bursting */
264 	slave_count = internals->active_slave_count;
265 	memcpy(slaves, internals->active_slaves,
266 			sizeof(internals->active_slaves[0]) * slave_count);
267 
268 	for (i = 0, idx = internals->active_slave;
269 			i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
270 		idx = idx % slave_count;
271 
272 		/* Read packets from this slave */
273 		num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
274 				&bufs[num_rx_total], nb_pkts - num_rx_total);
275 	}
276 
277 	internals->active_slave = idx;
278 
279 	return num_rx_total;
280 }
281 
282 static uint16_t
283 bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
284 		uint16_t nb_bufs)
285 {
286 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
287 	struct bond_dev_private *internals = bd_tx_q->dev_private;
288 
289 	uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
290 	uint16_t slave_count;
291 
292 	uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
293 	uint16_t dist_slave_count;
294 
295 	/* 2-D array to sort mbufs for transmission on each slave into */
296 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
297 	/* Number of mbufs for transmission on each slave */
298 	uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
299 	/* Mapping array generated by hash function to map mbufs to slaves */
300 	uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
301 
302 	uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
303 	uint16_t total_tx_count = 0, total_tx_fail_count = 0;
304 
305 	uint16_t i, j;
306 
307 	if (unlikely(nb_bufs == 0))
308 		return 0;
309 
310 	/* Copy slave list to protect against slave up/down changes during tx
311 	 * bursting */
312 	slave_count = internals->active_slave_count;
313 	if (unlikely(slave_count < 1))
314 		return 0;
315 
316 	memcpy(slave_port_ids, internals->active_slaves,
317 			sizeof(slave_port_ids[0]) * slave_count);
318 
319 
320 	dist_slave_count = 0;
321 	for (i = 0; i < slave_count; i++) {
322 		struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
323 
324 		if (ACTOR_STATE(port, DISTRIBUTING))
325 			dist_slave_port_ids[dist_slave_count++] =
326 					slave_port_ids[i];
327 	}
328 
329 	if (unlikely(dist_slave_count < 1))
330 		return 0;
331 
332 	/*
333 	 * Populate slaves mbuf with the packets which are to be sent on it
334 	 * selecting output slave using hash based on xmit policy
335 	 */
336 	internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
337 			bufs_slave_port_idxs);
338 
339 	for (i = 0; i < nb_bufs; i++) {
340 		/* Populate slave mbuf arrays with mbufs for that slave. */
341 		uint8_t slave_idx = bufs_slave_port_idxs[i];
342 
343 		slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
344 	}
345 
346 
347 	/* Send packet burst on each slave device */
348 	for (i = 0; i < dist_slave_count; i++) {
349 		if (slave_nb_bufs[i] == 0)
350 			continue;
351 
352 		slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
353 				bd_tx_q->queue_id, slave_bufs[i],
354 				slave_nb_bufs[i]);
355 
356 		total_tx_count += slave_tx_count;
357 
358 		/* If tx burst fails move packets to end of bufs */
359 		if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
360 			slave_tx_fail_count[i] = slave_nb_bufs[i] -
361 					slave_tx_count;
362 			total_tx_fail_count += slave_tx_fail_count[i];
363 
364 			/*
365 			 * Shift bufs to beginning of array to allow reordering
366 			 * later
367 			 */
368 			for (j = 0; j < slave_tx_fail_count[i]; j++) {
369 				slave_bufs[i][j] =
370 					slave_bufs[i][(slave_tx_count - 1) + j];
371 			}
372 		}
373 	}
374 
375 	/*
376 	 * If there are tx burst failures we move packets to end of bufs to
377 	 * preserve expected PMD behaviour of all failed transmitted being
378 	 * at the end of the input mbuf array
379 	 */
380 	if (unlikely(total_tx_fail_count > 0)) {
381 		int bufs_idx = nb_bufs - total_tx_fail_count - 1;
382 
383 		for (i = 0; i < slave_count; i++) {
384 			if (slave_tx_fail_count[i] > 0) {
385 				for (j = 0; j < slave_tx_fail_count[i]; j++)
386 					bufs[bufs_idx++] = slave_bufs[i][j];
387 			}
388 		}
389 	}
390 
391 	return total_tx_count;
392 }
393 
394 
395 static uint16_t
396 bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
397 		uint16_t nb_pkts)
398 {
399 	/* Cast to structure, containing bonded device's port id and queue id */
400 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
401 	struct bond_dev_private *internals = bd_rx_q->dev_private;
402 	struct ether_addr bond_mac;
403 
404 	struct ether_hdr *hdr;
405 
406 	const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
407 	uint16_t num_rx_total = 0;	/* Total number of received packets */
408 	uint16_t slaves[RTE_MAX_ETHPORTS];
409 	uint16_t slave_count, idx;
410 
411 	uint8_t collecting;  /* current slave collecting status */
412 	const uint8_t promisc = internals->promiscuous_en;
413 	uint8_t i, j, k;
414 	uint8_t subtype;
415 
416 	rte_eth_macaddr_get(internals->port_id, &bond_mac);
417 	/* Copy slave list to protect against slave up/down changes during tx
418 	 * bursting */
419 	slave_count = internals->active_slave_count;
420 	memcpy(slaves, internals->active_slaves,
421 			sizeof(internals->active_slaves[0]) * slave_count);
422 
423 	idx = internals->active_slave;
424 	if (idx >= slave_count) {
425 		internals->active_slave = 0;
426 		idx = 0;
427 	}
428 	for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
429 		j = num_rx_total;
430 		collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
431 					 COLLECTING);
432 
433 		/* Read packets from this slave */
434 		num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
435 				&bufs[num_rx_total], nb_pkts - num_rx_total);
436 
437 		for (k = j; k < 2 && k < num_rx_total; k++)
438 			rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
439 
440 		/* Handle slow protocol packets. */
441 		while (j < num_rx_total) {
442 
443 			/* If packet is not pure L2 and is known, skip it */
444 			if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
445 				j++;
446 				continue;
447 			}
448 
449 			if (j + 3 < num_rx_total)
450 				rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
451 
452 			hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
453 			subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
454 
455 			/* Remove packet from array if it is slow packet or slave is not
456 			 * in collecting state or bonding interface is not in promiscuous
457 			 * mode and packet address does not match. */
458 			if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
459 				!collecting || (!promisc &&
460 					!is_multicast_ether_addr(&hdr->d_addr) &&
461 					!is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
462 
463 				if (hdr->ether_type == ether_type_slow_be) {
464 					bond_mode_8023ad_handle_slow_pkt(
465 					    internals, slaves[idx], bufs[j]);
466 				} else
467 					rte_pktmbuf_free(bufs[j]);
468 
469 				/* Packet is managed by mode 4 or dropped, shift the array */
470 				num_rx_total--;
471 				if (j < num_rx_total) {
472 					memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
473 						(num_rx_total - j));
474 				}
475 			} else
476 				j++;
477 		}
478 		if (unlikely(++idx == slave_count))
479 			idx = 0;
480 	}
481 
482 	internals->active_slave = idx;
483 	return num_rx_total;
484 }
485 
486 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
487 uint32_t burstnumberRX;
488 uint32_t burstnumberTX;
489 
490 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
491 
492 static void
493 arp_op_name(uint16_t arp_op, char *buf)
494 {
495 	switch (arp_op) {
496 	case ARP_OP_REQUEST:
497 		snprintf(buf, sizeof("ARP Request"), "%s", "ARP Request");
498 		return;
499 	case ARP_OP_REPLY:
500 		snprintf(buf, sizeof("ARP Reply"), "%s", "ARP Reply");
501 		return;
502 	case ARP_OP_REVREQUEST:
503 		snprintf(buf, sizeof("Reverse ARP Request"), "%s",
504 				"Reverse ARP Request");
505 		return;
506 	case ARP_OP_REVREPLY:
507 		snprintf(buf, sizeof("Reverse ARP Reply"), "%s",
508 				"Reverse ARP Reply");
509 		return;
510 	case ARP_OP_INVREQUEST:
511 		snprintf(buf, sizeof("Peer Identify Request"), "%s",
512 				"Peer Identify Request");
513 		return;
514 	case ARP_OP_INVREPLY:
515 		snprintf(buf, sizeof("Peer Identify Reply"), "%s",
516 				"Peer Identify Reply");
517 		return;
518 	default:
519 		break;
520 	}
521 	snprintf(buf, sizeof("Unknown"), "%s", "Unknown");
522 	return;
523 }
524 #endif
525 #define MaxIPv4String	16
526 static void
527 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
528 {
529 	uint32_t ipv4_addr;
530 
531 	ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
532 	snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
533 		(ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
534 		ipv4_addr & 0xFF);
535 }
536 
537 #define MAX_CLIENTS_NUMBER	128
538 uint8_t active_clients;
539 struct client_stats_t {
540 	uint16_t port;
541 	uint32_t ipv4_addr;
542 	uint32_t ipv4_rx_packets;
543 	uint32_t ipv4_tx_packets;
544 };
545 struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
546 
547 static void
548 update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
549 {
550 	int i = 0;
551 
552 	for (; i < MAX_CLIENTS_NUMBER; i++)	{
553 		if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port))	{
554 			/* Just update RX packets number for this client */
555 			if (TXorRXindicator == &burstnumberRX)
556 				client_stats[i].ipv4_rx_packets++;
557 			else
558 				client_stats[i].ipv4_tx_packets++;
559 			return;
560 		}
561 	}
562 	/* We have a new client. Insert him to the table, and increment stats */
563 	if (TXorRXindicator == &burstnumberRX)
564 		client_stats[active_clients].ipv4_rx_packets++;
565 	else
566 		client_stats[active_clients].ipv4_tx_packets++;
567 	client_stats[active_clients].ipv4_addr = addr;
568 	client_stats[active_clients].port = port;
569 	active_clients++;
570 
571 }
572 
573 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
574 #define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber)	\
575 		RTE_LOG(DEBUG, PMD, \
576 		"%s " \
577 		"port:%d " \
578 		"SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
579 		"SrcIP:%s " \
580 		"DstMAC:%02X:%02X:%02X:%02X:%02X:%02X " \
581 		"DstIP:%s " \
582 		"%s " \
583 		"%d\n", \
584 		info, \
585 		port, \
586 		eth_h->s_addr.addr_bytes[0], \
587 		eth_h->s_addr.addr_bytes[1], \
588 		eth_h->s_addr.addr_bytes[2], \
589 		eth_h->s_addr.addr_bytes[3], \
590 		eth_h->s_addr.addr_bytes[4], \
591 		eth_h->s_addr.addr_bytes[5], \
592 		src_ip, \
593 		eth_h->d_addr.addr_bytes[0], \
594 		eth_h->d_addr.addr_bytes[1], \
595 		eth_h->d_addr.addr_bytes[2], \
596 		eth_h->d_addr.addr_bytes[3], \
597 		eth_h->d_addr.addr_bytes[4], \
598 		eth_h->d_addr.addr_bytes[5], \
599 		dst_ip, \
600 		arp_op, \
601 		++burstnumber)
602 #endif
603 
604 static void
605 mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
606 		uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
607 {
608 	struct ipv4_hdr *ipv4_h;
609 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610 	struct arp_hdr *arp_h;
611 	char dst_ip[16];
612 	char ArpOp[24];
613 	char buf[16];
614 #endif
615 	char src_ip[16];
616 
617 	uint16_t ether_type = eth_h->ether_type;
618 	uint16_t offset = get_vlan_offset(eth_h, &ether_type);
619 
620 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
621 	strlcpy(buf, info, 16);
622 #endif
623 
624 	if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
625 		ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
626 		ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
627 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
628 		ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
629 		MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
630 #endif
631 		update_client_stats(ipv4_h->src_addr, port, burstnumber);
632 	}
633 #ifdef RTE_LIBRTE_BOND_DEBUG_ALB
634 	else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
635 		arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
636 		ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
637 		ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
638 		arp_op_name(rte_be_to_cpu_16(arp_h->arp_op), ArpOp);
639 		MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
640 	}
641 #endif
642 }
643 #endif
644 
645 static uint16_t
646 bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
647 {
648 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
649 	struct bond_dev_private *internals = bd_tx_q->dev_private;
650 	struct ether_hdr *eth_h;
651 	uint16_t ether_type, offset;
652 	uint16_t nb_recv_pkts;
653 	int i;
654 
655 	nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
656 
657 	for (i = 0; i < nb_recv_pkts; i++) {
658 		eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
659 		ether_type = eth_h->ether_type;
660 		offset = get_vlan_offset(eth_h, &ether_type);
661 
662 		if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
663 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
664 			mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
665 #endif
666 			bond_mode_alb_arp_recv(eth_h, offset, internals);
667 		}
668 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
669 		else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
670 			mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
671 #endif
672 	}
673 
674 	return nb_recv_pkts;
675 }
676 
677 static uint16_t
678 bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
679 		uint16_t nb_pkts)
680 {
681 	struct bond_dev_private *internals;
682 	struct bond_tx_queue *bd_tx_q;
683 
684 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
685 	uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
686 
687 	uint16_t num_of_slaves;
688 	uint16_t slaves[RTE_MAX_ETHPORTS];
689 
690 	uint16_t num_tx_total = 0, num_tx_slave;
691 
692 	static int slave_idx = 0;
693 	int i, cslave_idx = 0, tx_fail_total = 0;
694 
695 	bd_tx_q = (struct bond_tx_queue *)queue;
696 	internals = bd_tx_q->dev_private;
697 
698 	/* Copy slave list to protect against slave up/down changes during tx
699 	 * bursting */
700 	num_of_slaves = internals->active_slave_count;
701 	memcpy(slaves, internals->active_slaves,
702 			sizeof(internals->active_slaves[0]) * num_of_slaves);
703 
704 	if (num_of_slaves < 1)
705 		return num_tx_total;
706 
707 	/* Populate slaves mbuf with which packets are to be sent on it  */
708 	for (i = 0; i < nb_pkts; i++) {
709 		cslave_idx = (slave_idx + i) % num_of_slaves;
710 		slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
711 	}
712 
713 	/* increment current slave index so the next call to tx burst starts on the
714 	 * next slave */
715 	slave_idx = ++cslave_idx;
716 
717 	/* Send packet burst on each slave device */
718 	for (i = 0; i < num_of_slaves; i++) {
719 		if (slave_nb_pkts[i] > 0) {
720 			num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
721 					slave_bufs[i], slave_nb_pkts[i]);
722 
723 			/* if tx burst fails move packets to end of bufs */
724 			if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
725 				int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
726 
727 				tx_fail_total += tx_fail_slave;
728 
729 				memcpy(&bufs[nb_pkts - tx_fail_total],
730 						&slave_bufs[i][num_tx_slave],
731 						tx_fail_slave * sizeof(bufs[0]));
732 			}
733 			num_tx_total += num_tx_slave;
734 		}
735 	}
736 
737 	return num_tx_total;
738 }
739 
740 static uint16_t
741 bond_ethdev_tx_burst_active_backup(void *queue,
742 		struct rte_mbuf **bufs, uint16_t nb_pkts)
743 {
744 	struct bond_dev_private *internals;
745 	struct bond_tx_queue *bd_tx_q;
746 
747 	bd_tx_q = (struct bond_tx_queue *)queue;
748 	internals = bd_tx_q->dev_private;
749 
750 	if (internals->active_slave_count < 1)
751 		return 0;
752 
753 	return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
754 			bufs, nb_pkts);
755 }
756 
757 static inline uint16_t
758 ether_hash(struct ether_hdr *eth_hdr)
759 {
760 	unaligned_uint16_t *word_src_addr =
761 		(unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
762 	unaligned_uint16_t *word_dst_addr =
763 		(unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
764 
765 	return (word_src_addr[0] ^ word_dst_addr[0]) ^
766 			(word_src_addr[1] ^ word_dst_addr[1]) ^
767 			(word_src_addr[2] ^ word_dst_addr[2]);
768 }
769 
770 static inline uint32_t
771 ipv4_hash(struct ipv4_hdr *ipv4_hdr)
772 {
773 	return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
774 }
775 
776 static inline uint32_t
777 ipv6_hash(struct ipv6_hdr *ipv6_hdr)
778 {
779 	unaligned_uint32_t *word_src_addr =
780 		(unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
781 	unaligned_uint32_t *word_dst_addr =
782 		(unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
783 
784 	return (word_src_addr[0] ^ word_dst_addr[0]) ^
785 			(word_src_addr[1] ^ word_dst_addr[1]) ^
786 			(word_src_addr[2] ^ word_dst_addr[2]) ^
787 			(word_src_addr[3] ^ word_dst_addr[3]);
788 }
789 
790 
791 void
792 burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
793 		uint8_t slave_count, uint16_t *slaves)
794 {
795 	struct ether_hdr *eth_hdr;
796 	uint32_t hash;
797 	int i;
798 
799 	for (i = 0; i < nb_pkts; i++) {
800 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
801 
802 		hash = ether_hash(eth_hdr);
803 
804 		slaves[i] = (hash ^= hash >> 8) % slave_count;
805 	}
806 }
807 
808 void
809 burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
810 		uint8_t slave_count, uint16_t *slaves)
811 {
812 	uint16_t i;
813 	struct ether_hdr *eth_hdr;
814 	uint16_t proto;
815 	size_t vlan_offset;
816 	uint32_t hash, l3hash;
817 
818 	for (i = 0; i < nb_pkts; i++) {
819 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
820 		l3hash = 0;
821 
822 		proto = eth_hdr->ether_type;
823 		hash = ether_hash(eth_hdr);
824 
825 		vlan_offset = get_vlan_offset(eth_hdr, &proto);
826 
827 		if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
828 			struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
829 					((char *)(eth_hdr + 1) + vlan_offset);
830 			l3hash = ipv4_hash(ipv4_hdr);
831 
832 		} else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
833 			struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
834 					((char *)(eth_hdr + 1) + vlan_offset);
835 			l3hash = ipv6_hash(ipv6_hdr);
836 		}
837 
838 		hash = hash ^ l3hash;
839 		hash ^= hash >> 16;
840 		hash ^= hash >> 8;
841 
842 		slaves[i] = hash % slave_count;
843 	}
844 }
845 
846 void
847 burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
848 		uint8_t slave_count, uint16_t *slaves)
849 {
850 	struct ether_hdr *eth_hdr;
851 	uint16_t proto;
852 	size_t vlan_offset;
853 	int i;
854 
855 	struct udp_hdr *udp_hdr;
856 	struct tcp_hdr *tcp_hdr;
857 	uint32_t hash, l3hash, l4hash;
858 
859 	for (i = 0; i < nb_pkts; i++) {
860 		eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
861 		proto = eth_hdr->ether_type;
862 		vlan_offset = get_vlan_offset(eth_hdr, &proto);
863 		l3hash = 0;
864 		l4hash = 0;
865 
866 		if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
867 			struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
868 					((char *)(eth_hdr + 1) + vlan_offset);
869 			size_t ip_hdr_offset;
870 
871 			l3hash = ipv4_hash(ipv4_hdr);
872 
873 			/* there is no L4 header in fragmented packet */
874 			if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
875 								== 0)) {
876 				ip_hdr_offset = (ipv4_hdr->version_ihl
877 					& IPV4_HDR_IHL_MASK) *
878 					IPV4_IHL_MULTIPLIER;
879 
880 				if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
881 					tcp_hdr = (struct tcp_hdr *)
882 						((char *)ipv4_hdr +
883 							ip_hdr_offset);
884 					l4hash = HASH_L4_PORTS(tcp_hdr);
885 				} else if (ipv4_hdr->next_proto_id ==
886 								IPPROTO_UDP) {
887 					udp_hdr = (struct udp_hdr *)
888 						((char *)ipv4_hdr +
889 							ip_hdr_offset);
890 					l4hash = HASH_L4_PORTS(udp_hdr);
891 				}
892 			}
893 		} else if  (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
894 			struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
895 					((char *)(eth_hdr + 1) + vlan_offset);
896 			l3hash = ipv6_hash(ipv6_hdr);
897 
898 			if (ipv6_hdr->proto == IPPROTO_TCP) {
899 				tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
900 				l4hash = HASH_L4_PORTS(tcp_hdr);
901 			} else if (ipv6_hdr->proto == IPPROTO_UDP) {
902 				udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
903 				l4hash = HASH_L4_PORTS(udp_hdr);
904 			}
905 		}
906 
907 		hash = l3hash ^ l4hash;
908 		hash ^= hash >> 16;
909 		hash ^= hash >> 8;
910 
911 		slaves[i] = hash % slave_count;
912 	}
913 }
914 
915 struct bwg_slave {
916 	uint64_t bwg_left_int;
917 	uint64_t bwg_left_remainder;
918 	uint8_t slave;
919 };
920 
921 void
922 bond_tlb_activate_slave(struct bond_dev_private *internals) {
923 	int i;
924 
925 	for (i = 0; i < internals->active_slave_count; i++) {
926 		tlb_last_obytets[internals->active_slaves[i]] = 0;
927 	}
928 }
929 
930 static int
931 bandwidth_cmp(const void *a, const void *b)
932 {
933 	const struct bwg_slave *bwg_a = a;
934 	const struct bwg_slave *bwg_b = b;
935 	int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
936 	int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
937 			(int64_t)bwg_a->bwg_left_remainder;
938 	if (diff > 0)
939 		return 1;
940 	else if (diff < 0)
941 		return -1;
942 	else if (diff2 > 0)
943 		return 1;
944 	else if (diff2 < 0)
945 		return -1;
946 	else
947 		return 0;
948 }
949 
950 static void
951 bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
952 		struct bwg_slave *bwg_slave)
953 {
954 	struct rte_eth_link link_status;
955 
956 	rte_eth_link_get_nowait(port_id, &link_status);
957 	uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
958 	if (link_bwg == 0)
959 		return;
960 	link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
961 	bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
962 	bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
963 }
964 
965 static void
966 bond_ethdev_update_tlb_slave_cb(void *arg)
967 {
968 	struct bond_dev_private *internals = arg;
969 	struct rte_eth_stats slave_stats;
970 	struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
971 	uint8_t slave_count;
972 	uint64_t tx_bytes;
973 
974 	uint8_t update_stats = 0;
975 	uint8_t i, slave_id;
976 
977 	internals->slave_update_idx++;
978 
979 
980 	if (internals->slave_update_idx >= REORDER_PERIOD_MS)
981 		update_stats = 1;
982 
983 	for (i = 0; i < internals->active_slave_count; i++) {
984 		slave_id = internals->active_slaves[i];
985 		rte_eth_stats_get(slave_id, &slave_stats);
986 		tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
987 		bandwidth_left(slave_id, tx_bytes,
988 				internals->slave_update_idx, &bwg_array[i]);
989 		bwg_array[i].slave = slave_id;
990 
991 		if (update_stats) {
992 			tlb_last_obytets[slave_id] = slave_stats.obytes;
993 		}
994 	}
995 
996 	if (update_stats == 1)
997 		internals->slave_update_idx = 0;
998 
999 	slave_count = i;
1000 	qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
1001 	for (i = 0; i < slave_count; i++)
1002 		internals->tlb_slaves_order[i] = bwg_array[i].slave;
1003 
1004 	rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
1005 			(struct bond_dev_private *)internals);
1006 }
1007 
1008 static uint16_t
1009 bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1010 {
1011 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1012 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1013 
1014 	struct rte_eth_dev *primary_port =
1015 			&rte_eth_devices[internals->primary_port];
1016 	uint16_t num_tx_total = 0;
1017 	uint16_t i, j;
1018 
1019 	uint16_t num_of_slaves = internals->active_slave_count;
1020 	uint16_t slaves[RTE_MAX_ETHPORTS];
1021 
1022 	struct ether_hdr *ether_hdr;
1023 	struct ether_addr primary_slave_addr;
1024 	struct ether_addr active_slave_addr;
1025 
1026 	if (num_of_slaves < 1)
1027 		return num_tx_total;
1028 
1029 	memcpy(slaves, internals->tlb_slaves_order,
1030 				sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1031 
1032 
1033 	ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1034 
1035 	if (nb_pkts > 3) {
1036 		for (i = 0; i < 3; i++)
1037 			rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1038 	}
1039 
1040 	for (i = 0; i < num_of_slaves; i++) {
1041 		rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1042 		for (j = num_tx_total; j < nb_pkts; j++) {
1043 			if (j + 3 < nb_pkts)
1044 				rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1045 
1046 			ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1047 			if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1048 				ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1049 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1050 					mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1051 #endif
1052 		}
1053 
1054 		num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1055 				bufs + num_tx_total, nb_pkts - num_tx_total);
1056 
1057 		if (num_tx_total == nb_pkts)
1058 			break;
1059 	}
1060 
1061 	return num_tx_total;
1062 }
1063 
1064 void
1065 bond_tlb_disable(struct bond_dev_private *internals)
1066 {
1067 	rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1068 }
1069 
1070 void
1071 bond_tlb_enable(struct bond_dev_private *internals)
1072 {
1073 	bond_ethdev_update_tlb_slave_cb(internals);
1074 }
1075 
1076 static uint16_t
1077 bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1078 {
1079 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1080 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1081 
1082 	struct ether_hdr *eth_h;
1083 	uint16_t ether_type, offset;
1084 
1085 	struct client_data *client_info;
1086 
1087 	/*
1088 	 * We create transmit buffers for every slave and one additional to send
1089 	 * through tlb. In worst case every packet will be send on one port.
1090 	 */
1091 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1092 	uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1093 
1094 	/*
1095 	 * We create separate transmit buffers for update packets as they won't
1096 	 * be counted in num_tx_total.
1097 	 */
1098 	struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1099 	uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1100 
1101 	struct rte_mbuf *upd_pkt;
1102 	size_t pkt_size;
1103 
1104 	uint16_t num_send, num_not_send = 0;
1105 	uint16_t num_tx_total = 0;
1106 	uint16_t slave_idx;
1107 
1108 	int i, j;
1109 
1110 	/* Search tx buffer for ARP packets and forward them to alb */
1111 	for (i = 0; i < nb_pkts; i++) {
1112 		eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1113 		ether_type = eth_h->ether_type;
1114 		offset = get_vlan_offset(eth_h, &ether_type);
1115 
1116 		if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1117 			slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1118 
1119 			/* Change src mac in eth header */
1120 			rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1121 
1122 			/* Add packet to slave tx buffer */
1123 			slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1124 			slave_bufs_pkts[slave_idx]++;
1125 		} else {
1126 			/* If packet is not ARP, send it with TLB policy */
1127 			slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1128 					bufs[i];
1129 			slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1130 		}
1131 	}
1132 
1133 	/* Update connected client ARP tables */
1134 	if (internals->mode6.ntt) {
1135 		for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1136 			client_info = &internals->mode6.client_table[i];
1137 
1138 			if (client_info->in_use) {
1139 				/* Allocate new packet to send ARP update on current slave */
1140 				upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1141 				if (upd_pkt == NULL) {
1142 					RTE_LOG(ERR, PMD, "Failed to allocate ARP packet from pool\n");
1143 					continue;
1144 				}
1145 				pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1146 						+ client_info->vlan_count * sizeof(struct vlan_hdr);
1147 				upd_pkt->data_len = pkt_size;
1148 				upd_pkt->pkt_len = pkt_size;
1149 
1150 				slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1151 						internals);
1152 
1153 				/* Add packet to update tx buffer */
1154 				update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1155 				update_bufs_pkts[slave_idx]++;
1156 			}
1157 		}
1158 		internals->mode6.ntt = 0;
1159 	}
1160 
1161 	/* Send ARP packets on proper slaves */
1162 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1163 		if (slave_bufs_pkts[i] > 0) {
1164 			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1165 					slave_bufs[i], slave_bufs_pkts[i]);
1166 			for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1167 				bufs[nb_pkts - 1 - num_not_send - j] =
1168 						slave_bufs[i][nb_pkts - 1 - j];
1169 			}
1170 
1171 			num_tx_total += num_send;
1172 			num_not_send += slave_bufs_pkts[i] - num_send;
1173 
1174 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1175 	/* Print TX stats including update packets */
1176 			for (j = 0; j < slave_bufs_pkts[i]; j++) {
1177 				eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1178 				mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1179 			}
1180 #endif
1181 		}
1182 	}
1183 
1184 	/* Send update packets on proper slaves */
1185 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1186 		if (update_bufs_pkts[i] > 0) {
1187 			num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1188 					update_bufs_pkts[i]);
1189 			for (j = num_send; j < update_bufs_pkts[i]; j++) {
1190 				rte_pktmbuf_free(update_bufs[i][j]);
1191 			}
1192 #if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1193 			for (j = 0; j < update_bufs_pkts[i]; j++) {
1194 				eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1195 				mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1196 			}
1197 #endif
1198 		}
1199 	}
1200 
1201 	/* Send non-ARP packets using tlb policy */
1202 	if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1203 		num_send = bond_ethdev_tx_burst_tlb(queue,
1204 				slave_bufs[RTE_MAX_ETHPORTS],
1205 				slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1206 
1207 		for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1208 			bufs[nb_pkts - 1 - num_not_send - j] =
1209 					slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1210 		}
1211 
1212 		num_tx_total += num_send;
1213 	}
1214 
1215 	return num_tx_total;
1216 }
1217 
1218 static uint16_t
1219 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
1220 		uint16_t nb_bufs)
1221 {
1222 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1223 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1224 
1225 	uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1226 	uint16_t slave_count;
1227 
1228 	/* Array to sort mbufs for transmission on each slave into */
1229 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1230 	/* Number of mbufs for transmission on each slave */
1231 	uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1232 	/* Mapping array generated by hash function to map mbufs to slaves */
1233 	uint16_t bufs_slave_port_idxs[nb_bufs];
1234 
1235 	uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1236 	uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1237 
1238 	uint16_t i, j;
1239 
1240 	if (unlikely(nb_bufs == 0))
1241 		return 0;
1242 
1243 	/* Copy slave list to protect against slave up/down changes during tx
1244 	 * bursting */
1245 	slave_count = internals->active_slave_count;
1246 	if (unlikely(slave_count < 1))
1247 		return 0;
1248 
1249 	memcpy(slave_port_ids, internals->active_slaves,
1250 			sizeof(slave_port_ids[0]) * slave_count);
1251 
1252 	/*
1253 	 * Populate slaves mbuf with the packets which are to be sent on it
1254 	 * selecting output slave using hash based on xmit policy
1255 	 */
1256 	internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1257 			bufs_slave_port_idxs);
1258 
1259 	for (i = 0; i < nb_bufs; i++) {
1260 		/* Populate slave mbuf arrays with mbufs for that slave. */
1261 		uint8_t slave_idx = bufs_slave_port_idxs[i];
1262 
1263 		slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
1264 	}
1265 
1266 	/* Send packet burst on each slave device */
1267 	for (i = 0; i < slave_count; i++) {
1268 		if (slave_nb_bufs[i] == 0)
1269 			continue;
1270 
1271 		slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1272 				bd_tx_q->queue_id, slave_bufs[i],
1273 				slave_nb_bufs[i]);
1274 
1275 		total_tx_count += slave_tx_count;
1276 
1277 		/* If tx burst fails move packets to end of bufs */
1278 		if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1279 			slave_tx_fail_count[i] = slave_nb_bufs[i] -
1280 					slave_tx_count;
1281 			total_tx_fail_count += slave_tx_fail_count[i];
1282 
1283 			/*
1284 			 * Shift bufs to beginning of array to allow reordering
1285 			 * later
1286 			 */
1287 			for (j = 0; j < slave_tx_fail_count[i]; j++) {
1288 				slave_bufs[i][j] =
1289 					slave_bufs[i][(slave_tx_count - 1) + j];
1290 			}
1291 		}
1292 	}
1293 
1294 	/*
1295 	 * If there are tx burst failures we move packets to end of bufs to
1296 	 * preserve expected PMD behaviour of all failed transmitted being
1297 	 * at the end of the input mbuf array
1298 	 */
1299 	if (unlikely(total_tx_fail_count > 0)) {
1300 		int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1301 
1302 		for (i = 0; i < slave_count; i++) {
1303 			if (slave_tx_fail_count[i] > 0) {
1304 				for (j = 0; j < slave_tx_fail_count[i]; j++)
1305 					bufs[bufs_idx++] = slave_bufs[i][j];
1306 			}
1307 		}
1308 	}
1309 
1310 	return total_tx_count;
1311 }
1312 
1313 static uint16_t
1314 bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
1315 		uint16_t nb_bufs)
1316 {
1317 	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1318 	struct bond_dev_private *internals = bd_tx_q->dev_private;
1319 
1320 	uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1321 	uint16_t slave_count;
1322 
1323 	uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1324 	uint16_t dist_slave_count;
1325 
1326 	/* 2-D array to sort mbufs for transmission on each slave into */
1327 	struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1328 	/* Number of mbufs for transmission on each slave */
1329 	uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1330 	/* Mapping array generated by hash function to map mbufs to slaves */
1331 	uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
1332 
1333 	uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
1334 	uint16_t total_tx_count = 0, total_tx_fail_count = 0;
1335 
1336 	uint16_t i, j;
1337 
1338 	if (unlikely(nb_bufs == 0))
1339 		return 0;
1340 
1341 	/* Copy slave list to protect against slave up/down changes during tx
1342 	 * bursting */
1343 	slave_count = internals->active_slave_count;
1344 	if (unlikely(slave_count < 1))
1345 		return 0;
1346 
1347 	memcpy(slave_port_ids, internals->active_slaves,
1348 			sizeof(slave_port_ids[0]) * slave_count);
1349 
1350 	dist_slave_count = 0;
1351 	for (i = 0; i < slave_count; i++) {
1352 		struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1353 
1354 		if (ACTOR_STATE(port, DISTRIBUTING))
1355 			dist_slave_port_ids[dist_slave_count++] =
1356 					slave_port_ids[i];
1357 	}
1358 
1359 	if (likely(dist_slave_count > 1)) {
1360 
1361 		/*
1362 		 * Populate slaves mbuf with the packets which are to be sent
1363 		 * on it, selecting output slave using hash based on xmit policy
1364 		 */
1365 		internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1366 				bufs_slave_port_idxs);
1367 
1368 		for (i = 0; i < nb_bufs; i++) {
1369 			/*
1370 			 * Populate slave mbuf arrays with mbufs for that
1371 			 * slave
1372 			 */
1373 			uint8_t slave_idx = bufs_slave_port_idxs[i];
1374 
1375 			slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1376 					bufs[i];
1377 		}
1378 
1379 
1380 		/* Send packet burst on each slave device */
1381 		for (i = 0; i < dist_slave_count; i++) {
1382 			if (slave_nb_bufs[i] == 0)
1383 				continue;
1384 
1385 			slave_tx_count = rte_eth_tx_burst(
1386 					dist_slave_port_ids[i],
1387 					bd_tx_q->queue_id, slave_bufs[i],
1388 					slave_nb_bufs[i]);
1389 
1390 			total_tx_count += slave_tx_count;
1391 
1392 			/* If tx burst fails move packets to end of bufs */
1393 			if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
1394 				slave_tx_fail_count[i] = slave_nb_bufs[i] -
1395 						slave_tx_count;
1396 				total_tx_fail_count += slave_tx_fail_count[i];
1397 
1398 				/*
1399 				 * Shift bufs to beginning of array to allow
1400 				 * reordering later
1401 				 */
1402 				for (j = 0; j < slave_tx_fail_count[i]; j++)
1403 					slave_bufs[i][j] =
1404 						slave_bufs[i]
1405 							[(slave_tx_count - 1)
1406 							+ j];
1407 			}
1408 		}
1409 
1410 		/*
1411 		 * If there are tx burst failures we move packets to end of
1412 		 * bufs to preserve expected PMD behaviour of all failed
1413 		 * transmitted being at the end of the input mbuf array
1414 		 */
1415 		if (unlikely(total_tx_fail_count > 0)) {
1416 			int bufs_idx = nb_bufs - total_tx_fail_count - 1;
1417 
1418 			for (i = 0; i < slave_count; i++) {
1419 				if (slave_tx_fail_count[i] > 0) {
1420 					for (j = 0;
1421 						j < slave_tx_fail_count[i];
1422 						j++) {
1423 						bufs[bufs_idx++] =
1424 							slave_bufs[i][j];
1425 					}
1426 				}
1427 			}
1428 		}
1429 	}
1430 
1431 	/* Check for LACP control packets and send if available */
1432 	for (i = 0; i < slave_count; i++) {
1433 		struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
1434 		struct rte_mbuf *ctrl_pkt = NULL;
1435 
1436 		if (likely(rte_ring_empty(port->tx_ring)))
1437 			continue;
1438 
1439 		if (rte_ring_dequeue(port->tx_ring,
1440 				     (void **)&ctrl_pkt) != -ENOENT) {
1441 			slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1442 					bd_tx_q->queue_id, &ctrl_pkt, 1);
1443 			/*
1444 			 * re-enqueue LAG control plane packets to buffering
1445 			 * ring if transmission fails so the packet isn't lost.
1446 			 */
1447 			if (slave_tx_count != 1)
1448 				rte_ring_enqueue(port->tx_ring,	ctrl_pkt);
1449 		}
1450 	}
1451 
1452 	return total_tx_count;
1453 }
1454 
1455 static uint16_t
1456 bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1457 		uint16_t nb_pkts)
1458 {
1459 	struct bond_dev_private *internals;
1460 	struct bond_tx_queue *bd_tx_q;
1461 
1462 	uint8_t tx_failed_flag = 0, num_of_slaves;
1463 	uint16_t slaves[RTE_MAX_ETHPORTS];
1464 
1465 	uint16_t max_nb_of_tx_pkts = 0;
1466 
1467 	int slave_tx_total[RTE_MAX_ETHPORTS];
1468 	int i, most_successful_tx_slave = -1;
1469 
1470 	bd_tx_q = (struct bond_tx_queue *)queue;
1471 	internals = bd_tx_q->dev_private;
1472 
1473 	/* Copy slave list to protect against slave up/down changes during tx
1474 	 * bursting */
1475 	num_of_slaves = internals->active_slave_count;
1476 	memcpy(slaves, internals->active_slaves,
1477 			sizeof(internals->active_slaves[0]) * num_of_slaves);
1478 
1479 	if (num_of_slaves < 1)
1480 		return 0;
1481 
1482 	/* Increment reference count on mbufs */
1483 	for (i = 0; i < nb_pkts; i++)
1484 		rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1485 
1486 	/* Transmit burst on each active slave */
1487 	for (i = 0; i < num_of_slaves; i++) {
1488 		slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1489 					bufs, nb_pkts);
1490 
1491 		if (unlikely(slave_tx_total[i] < nb_pkts))
1492 			tx_failed_flag = 1;
1493 
1494 		/* record the value and slave index for the slave which transmits the
1495 		 * maximum number of packets */
1496 		if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1497 			max_nb_of_tx_pkts = slave_tx_total[i];
1498 			most_successful_tx_slave = i;
1499 		}
1500 	}
1501 
1502 	/* if slaves fail to transmit packets from burst, the calling application
1503 	 * is not expected to know about multiple references to packets so we must
1504 	 * handle failures of all packets except those of the most successful slave
1505 	 */
1506 	if (unlikely(tx_failed_flag))
1507 		for (i = 0; i < num_of_slaves; i++)
1508 			if (i != most_successful_tx_slave)
1509 				while (slave_tx_total[i] < nb_pkts)
1510 					rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1511 
1512 	return max_nb_of_tx_pkts;
1513 }
1514 
1515 void
1516 link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
1517 {
1518 	struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1519 
1520 	if (bond_ctx->mode == BONDING_MODE_8023AD) {
1521 		/**
1522 		 * If in mode 4 then save the link properties of the first
1523 		 * slave, all subsequent slaves must match these properties
1524 		 */
1525 		struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1526 
1527 		bond_link->link_autoneg = slave_link->link_autoneg;
1528 		bond_link->link_duplex = slave_link->link_duplex;
1529 		bond_link->link_speed = slave_link->link_speed;
1530 	} else {
1531 		/**
1532 		 * In any other mode the link properties are set to default
1533 		 * values of AUTONEG/DUPLEX
1534 		 */
1535 		ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1536 		ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
1537 	}
1538 }
1539 
1540 int
1541 link_properties_valid(struct rte_eth_dev *ethdev,
1542 		struct rte_eth_link *slave_link)
1543 {
1544 	struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
1545 
1546 	if (bond_ctx->mode == BONDING_MODE_8023AD) {
1547 		struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
1548 
1549 		if (bond_link->link_duplex != slave_link->link_duplex ||
1550 			bond_link->link_autoneg != slave_link->link_autoneg ||
1551 			bond_link->link_speed != slave_link->link_speed)
1552 			return -1;
1553 	}
1554 
1555 	return 0;
1556 }
1557 
1558 int
1559 mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1560 {
1561 	struct ether_addr *mac_addr;
1562 
1563 	if (eth_dev == NULL) {
1564 		RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__);
1565 		return -1;
1566 	}
1567 
1568 	if (dst_mac_addr == NULL) {
1569 		RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__);
1570 		return -1;
1571 	}
1572 
1573 	mac_addr = eth_dev->data->mac_addrs;
1574 
1575 	ether_addr_copy(mac_addr, dst_mac_addr);
1576 	return 0;
1577 }
1578 
1579 int
1580 mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1581 {
1582 	struct ether_addr *mac_addr;
1583 
1584 	if (eth_dev == NULL) {
1585 		RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1586 		return -1;
1587 	}
1588 
1589 	if (new_mac_addr == NULL) {
1590 		RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1591 		return -1;
1592 	}
1593 
1594 	mac_addr = eth_dev->data->mac_addrs;
1595 
1596 	/* If new MAC is different to current MAC then update */
1597 	if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1598 		memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1599 
1600 	return 0;
1601 }
1602 
1603 int
1604 mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1605 {
1606 	struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1607 	int i;
1608 
1609 	/* Update slave devices MAC addresses */
1610 	if (internals->slave_count < 1)
1611 		return -1;
1612 
1613 	switch (internals->mode) {
1614 	case BONDING_MODE_ROUND_ROBIN:
1615 	case BONDING_MODE_BALANCE:
1616 	case BONDING_MODE_BROADCAST:
1617 		for (i = 0; i < internals->slave_count; i++) {
1618 			if (rte_eth_dev_default_mac_addr_set(
1619 					internals->slaves[i].port_id,
1620 					bonded_eth_dev->data->mac_addrs)) {
1621 				RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1622 						internals->slaves[i].port_id);
1623 				return -1;
1624 			}
1625 		}
1626 		break;
1627 	case BONDING_MODE_8023AD:
1628 		bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1629 		break;
1630 	case BONDING_MODE_ACTIVE_BACKUP:
1631 	case BONDING_MODE_TLB:
1632 	case BONDING_MODE_ALB:
1633 	default:
1634 		for (i = 0; i < internals->slave_count; i++) {
1635 			if (internals->slaves[i].port_id ==
1636 					internals->current_primary_port) {
1637 				if (rte_eth_dev_default_mac_addr_set(
1638 						internals->primary_port,
1639 						bonded_eth_dev->data->mac_addrs)) {
1640 					RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1641 							internals->current_primary_port);
1642 					return -1;
1643 				}
1644 			} else {
1645 				if (rte_eth_dev_default_mac_addr_set(
1646 						internals->slaves[i].port_id,
1647 						&internals->slaves[i].persisted_mac_addr)) {
1648 					RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1649 							internals->slaves[i].port_id);
1650 					return -1;
1651 				}
1652 			}
1653 		}
1654 	}
1655 
1656 	return 0;
1657 }
1658 
1659 int
1660 bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1661 {
1662 	struct bond_dev_private *internals;
1663 
1664 	internals = eth_dev->data->dev_private;
1665 
1666 	switch (mode) {
1667 	case BONDING_MODE_ROUND_ROBIN:
1668 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1669 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1670 		break;
1671 	case BONDING_MODE_ACTIVE_BACKUP:
1672 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1673 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1674 		break;
1675 	case BONDING_MODE_BALANCE:
1676 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1677 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1678 		break;
1679 	case BONDING_MODE_BROADCAST:
1680 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1681 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1682 		break;
1683 	case BONDING_MODE_8023AD:
1684 		if (bond_mode_8023ad_enable(eth_dev) != 0)
1685 			return -1;
1686 
1687 		if (internals->mode4.dedicated_queues.enabled == 0) {
1688 			eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1689 			eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1690 			RTE_LOG(WARNING, PMD,
1691 				"Using mode 4, it is necessary to do TX burst "
1692 				"and RX burst at least every 100ms.\n");
1693 		} else {
1694 			/* Use flow director's optimization */
1695 			eth_dev->rx_pkt_burst =
1696 					bond_ethdev_rx_burst_8023ad_fast_queue;
1697 			eth_dev->tx_pkt_burst =
1698 					bond_ethdev_tx_burst_8023ad_fast_queue;
1699 		}
1700 		break;
1701 	case BONDING_MODE_TLB:
1702 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1703 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1704 		break;
1705 	case BONDING_MODE_ALB:
1706 		if (bond_mode_alb_enable(eth_dev) != 0)
1707 			return -1;
1708 
1709 		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1710 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1711 		break;
1712 	default:
1713 		return -1;
1714 	}
1715 
1716 	internals->mode = mode;
1717 
1718 	return 0;
1719 }
1720 
1721 
1722 static int
1723 slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1724 		struct rte_eth_dev *slave_eth_dev)
1725 {
1726 	int errval = 0;
1727 	struct bond_dev_private *internals = (struct bond_dev_private *)
1728 		bonded_eth_dev->data->dev_private;
1729 	struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
1730 
1731 	if (port->slow_pool == NULL) {
1732 		char mem_name[256];
1733 		int slave_id = slave_eth_dev->data->port_id;
1734 
1735 		snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1736 				slave_id);
1737 		port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1738 			250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1739 			slave_eth_dev->data->numa_node);
1740 
1741 		/* Any memory allocation failure in initialization is critical because
1742 		 * resources can't be free, so reinitialization is impossible. */
1743 		if (port->slow_pool == NULL) {
1744 			rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1745 				slave_id, mem_name, rte_strerror(rte_errno));
1746 		}
1747 	}
1748 
1749 	if (internals->mode4.dedicated_queues.enabled == 1) {
1750 		/* Configure slow Rx queue */
1751 
1752 		errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1753 				internals->mode4.dedicated_queues.rx_qid, 128,
1754 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1755 				NULL, port->slow_pool);
1756 		if (errval != 0) {
1757 			RTE_BOND_LOG(ERR,
1758 					"rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1759 					slave_eth_dev->data->port_id,
1760 					internals->mode4.dedicated_queues.rx_qid,
1761 					errval);
1762 			return errval;
1763 		}
1764 
1765 		errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1766 				internals->mode4.dedicated_queues.tx_qid, 512,
1767 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1768 				NULL);
1769 		if (errval != 0) {
1770 			RTE_BOND_LOG(ERR,
1771 				"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1772 				slave_eth_dev->data->port_id,
1773 				internals->mode4.dedicated_queues.tx_qid,
1774 				errval);
1775 			return errval;
1776 		}
1777 	}
1778 	return 0;
1779 }
1780 
1781 int
1782 slave_configure(struct rte_eth_dev *bonded_eth_dev,
1783 		struct rte_eth_dev *slave_eth_dev)
1784 {
1785 	struct bond_rx_queue *bd_rx_q;
1786 	struct bond_tx_queue *bd_tx_q;
1787 	uint16_t nb_rx_queues;
1788 	uint16_t nb_tx_queues;
1789 
1790 	int errval;
1791 	uint16_t q_id;
1792 	struct rte_flow_error flow_error;
1793 
1794 	struct bond_dev_private *internals = (struct bond_dev_private *)
1795 		bonded_eth_dev->data->dev_private;
1796 
1797 	/* Stop slave */
1798 	rte_eth_dev_stop(slave_eth_dev->data->port_id);
1799 
1800 	/* Enable interrupts on slave device if supported */
1801 	if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1802 		slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1803 
1804 	/* If RSS is enabled for bonding, try to enable it for slaves  */
1805 	if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
1806 		if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
1807 				!= 0) {
1808 			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
1809 					bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
1810 			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
1811 					bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
1812 		} else {
1813 			slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1814 		}
1815 
1816 		slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1817 				bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1818 		slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1819 				bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1820 	}
1821 
1822 	if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1823 			DEV_RX_OFFLOAD_VLAN_FILTER)
1824 		slave_eth_dev->data->dev_conf.rxmode.offloads |=
1825 				DEV_RX_OFFLOAD_VLAN_FILTER;
1826 	else
1827 		slave_eth_dev->data->dev_conf.rxmode.offloads &=
1828 				~DEV_RX_OFFLOAD_VLAN_FILTER;
1829 
1830 	nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1831 	nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1832 
1833 	if (internals->mode == BONDING_MODE_8023AD) {
1834 		if (internals->mode4.dedicated_queues.enabled == 1) {
1835 			nb_rx_queues++;
1836 			nb_tx_queues++;
1837 		}
1838 	}
1839 
1840 	errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1841 				     bonded_eth_dev->data->mtu);
1842 	if (errval != 0 && errval != -ENOTSUP) {
1843 		RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1844 				slave_eth_dev->data->port_id, errval);
1845 		return errval;
1846 	}
1847 
1848 	/* Configure device */
1849 	errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
1850 			nb_rx_queues, nb_tx_queues,
1851 			&(slave_eth_dev->data->dev_conf));
1852 	if (errval != 0) {
1853 		RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u , err (%d)",
1854 				slave_eth_dev->data->port_id, errval);
1855 		return errval;
1856 	}
1857 
1858 	/* Setup Rx Queues */
1859 	for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
1860 		bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1861 
1862 		errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1863 				bd_rx_q->nb_rx_desc,
1864 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1865 				&(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1866 		if (errval != 0) {
1867 			RTE_BOND_LOG(ERR,
1868 					"rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1869 					slave_eth_dev->data->port_id, q_id, errval);
1870 			return errval;
1871 		}
1872 	}
1873 
1874 	/* Setup Tx Queues */
1875 	for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
1876 		bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1877 
1878 		errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1879 				bd_tx_q->nb_tx_desc,
1880 				rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1881 				&bd_tx_q->tx_conf);
1882 		if (errval != 0) {
1883 			RTE_BOND_LOG(ERR,
1884 				"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1885 				slave_eth_dev->data->port_id, q_id, errval);
1886 			return errval;
1887 		}
1888 	}
1889 
1890 	if (internals->mode == BONDING_MODE_8023AD &&
1891 			internals->mode4.dedicated_queues.enabled == 1) {
1892 		if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1893 				!= 0)
1894 			return errval;
1895 
1896 		if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1897 				slave_eth_dev->data->port_id) != 0) {
1898 			RTE_BOND_LOG(ERR,
1899 				"rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1900 				slave_eth_dev->data->port_id, q_id, errval);
1901 			return -1;
1902 		}
1903 
1904 		if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1905 			rte_flow_destroy(slave_eth_dev->data->port_id,
1906 					internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1907 					&flow_error);
1908 
1909 		bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1910 				slave_eth_dev->data->port_id);
1911 	}
1912 
1913 	/* Start device */
1914 	errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1915 	if (errval != 0) {
1916 		RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1917 				slave_eth_dev->data->port_id, errval);
1918 		return -1;
1919 	}
1920 
1921 	/* If RSS is enabled for bonding, synchronize RETA */
1922 	if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1923 		int i;
1924 		struct bond_dev_private *internals;
1925 
1926 		internals = bonded_eth_dev->data->dev_private;
1927 
1928 		for (i = 0; i < internals->slave_count; i++) {
1929 			if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1930 				errval = rte_eth_dev_rss_reta_update(
1931 						slave_eth_dev->data->port_id,
1932 						&internals->reta_conf[0],
1933 						internals->slaves[i].reta_size);
1934 				if (errval != 0) {
1935 					RTE_LOG(WARNING, PMD,
1936 							"rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1937 							" RSS Configuration for bonding may be inconsistent.\n",
1938 							slave_eth_dev->data->port_id, errval);
1939 				}
1940 				break;
1941 			}
1942 		}
1943 	}
1944 
1945 	/* If lsc interrupt is set, check initial slave's link status */
1946 	if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1947 		slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
1948 		bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
1949 			RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1950 			NULL);
1951 	}
1952 
1953 	return 0;
1954 }
1955 
1956 void
1957 slave_remove(struct bond_dev_private *internals,
1958 		struct rte_eth_dev *slave_eth_dev)
1959 {
1960 	uint8_t i;
1961 
1962 	for (i = 0; i < internals->slave_count; i++)
1963 		if (internals->slaves[i].port_id ==
1964 				slave_eth_dev->data->port_id)
1965 			break;
1966 
1967 	if (i < (internals->slave_count - 1)) {
1968 		struct rte_flow *flow;
1969 
1970 		memmove(&internals->slaves[i], &internals->slaves[i + 1],
1971 				sizeof(internals->slaves[0]) *
1972 				(internals->slave_count - i - 1));
1973 		TAILQ_FOREACH(flow, &internals->flow_list, next) {
1974 			memmove(&flow->flows[i], &flow->flows[i + 1],
1975 				sizeof(flow->flows[0]) *
1976 				(internals->slave_count - i - 1));
1977 			flow->flows[internals->slave_count - 1] = NULL;
1978 		}
1979 	}
1980 
1981 	internals->slave_count--;
1982 
1983 	/* force reconfiguration of slave interfaces */
1984 	_rte_eth_dev_reset(slave_eth_dev);
1985 }
1986 
1987 static void
1988 bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1989 
1990 void
1991 slave_add(struct bond_dev_private *internals,
1992 		struct rte_eth_dev *slave_eth_dev)
1993 {
1994 	struct bond_slave_details *slave_details =
1995 			&internals->slaves[internals->slave_count];
1996 
1997 	slave_details->port_id = slave_eth_dev->data->port_id;
1998 	slave_details->last_link_status = 0;
1999 
2000 	/* Mark slave devices that don't support interrupts so we can
2001 	 * compensate when we start the bond
2002 	 */
2003 	if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2004 		slave_details->link_status_poll_enabled = 1;
2005 	}
2006 
2007 	slave_details->link_status_wait_to_complete = 0;
2008 	/* clean tlb_last_obytes when adding port for bonding device */
2009 	memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
2010 			sizeof(struct ether_addr));
2011 }
2012 
2013 void
2014 bond_ethdev_primary_set(struct bond_dev_private *internals,
2015 		uint16_t slave_port_id)
2016 {
2017 	int i;
2018 
2019 	if (internals->active_slave_count < 1)
2020 		internals->current_primary_port = slave_port_id;
2021 	else
2022 		/* Search bonded device slave ports for new proposed primary port */
2023 		for (i = 0; i < internals->active_slave_count; i++) {
2024 			if (internals->active_slaves[i] == slave_port_id)
2025 				internals->current_primary_port = slave_port_id;
2026 		}
2027 }
2028 
2029 static void
2030 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2031 
2032 static int
2033 bond_ethdev_start(struct rte_eth_dev *eth_dev)
2034 {
2035 	struct bond_dev_private *internals;
2036 	int i;
2037 
2038 	/* slave eth dev will be started by bonded device */
2039 	if (check_for_bonded_ethdev(eth_dev)) {
2040 		RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2041 				eth_dev->data->port_id);
2042 		return -1;
2043 	}
2044 
2045 	eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2046 	eth_dev->data->dev_started = 1;
2047 
2048 	internals = eth_dev->data->dev_private;
2049 
2050 	if (internals->slave_count == 0) {
2051 		RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
2052 		goto out_err;
2053 	}
2054 
2055 	if (internals->user_defined_mac == 0) {
2056 		struct ether_addr *new_mac_addr = NULL;
2057 
2058 		for (i = 0; i < internals->slave_count; i++)
2059 			if (internals->slaves[i].port_id == internals->primary_port)
2060 				new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2061 
2062 		if (new_mac_addr == NULL)
2063 			goto out_err;
2064 
2065 		if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2066 			RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2067 					eth_dev->data->port_id);
2068 			goto out_err;
2069 		}
2070 	}
2071 
2072 	/* Update all slave devices MACs*/
2073 	if (mac_address_slaves_update(eth_dev) != 0)
2074 		goto out_err;
2075 
2076 	/* If bonded device is configure in promiscuous mode then re-apply config */
2077 	if (internals->promiscuous_en)
2078 		bond_ethdev_promiscuous_enable(eth_dev);
2079 
2080 	if (internals->mode == BONDING_MODE_8023AD) {
2081 		if (internals->mode4.dedicated_queues.enabled == 1) {
2082 			internals->mode4.dedicated_queues.rx_qid =
2083 					eth_dev->data->nb_rx_queues;
2084 			internals->mode4.dedicated_queues.tx_qid =
2085 					eth_dev->data->nb_tx_queues;
2086 		}
2087 	}
2088 
2089 
2090 	/* Reconfigure each slave device if starting bonded device */
2091 	for (i = 0; i < internals->slave_count; i++) {
2092 		struct rte_eth_dev *slave_ethdev =
2093 				&(rte_eth_devices[internals->slaves[i].port_id]);
2094 		if (slave_configure(eth_dev, slave_ethdev) != 0) {
2095 			RTE_BOND_LOG(ERR,
2096 				"bonded port (%d) failed to reconfigure slave device (%d)",
2097 				eth_dev->data->port_id,
2098 				internals->slaves[i].port_id);
2099 			goto out_err;
2100 		}
2101 		/* We will need to poll for link status if any slave doesn't
2102 		 * support interrupts
2103 		 */
2104 		if (internals->slaves[i].link_status_poll_enabled)
2105 			internals->link_status_polling_enabled = 1;
2106 	}
2107 
2108 	/* start polling if needed */
2109 	if (internals->link_status_polling_enabled) {
2110 		rte_eal_alarm_set(
2111 			internals->link_status_polling_interval_ms * 1000,
2112 			bond_ethdev_slave_link_status_change_monitor,
2113 			(void *)&rte_eth_devices[internals->port_id]);
2114 	}
2115 
2116 	if (internals->user_defined_primary_port)
2117 		bond_ethdev_primary_set(internals, internals->primary_port);
2118 
2119 	if (internals->mode == BONDING_MODE_8023AD)
2120 		bond_mode_8023ad_start(eth_dev);
2121 
2122 	if (internals->mode == BONDING_MODE_TLB ||
2123 			internals->mode == BONDING_MODE_ALB)
2124 		bond_tlb_enable(internals);
2125 
2126 	return 0;
2127 
2128 out_err:
2129 	eth_dev->data->dev_started = 0;
2130 	return -1;
2131 }
2132 
2133 static void
2134 bond_ethdev_free_queues(struct rte_eth_dev *dev)
2135 {
2136 	uint8_t i;
2137 
2138 	if (dev->data->rx_queues != NULL) {
2139 		for (i = 0; i < dev->data->nb_rx_queues; i++) {
2140 			rte_free(dev->data->rx_queues[i]);
2141 			dev->data->rx_queues[i] = NULL;
2142 		}
2143 		dev->data->nb_rx_queues = 0;
2144 	}
2145 
2146 	if (dev->data->tx_queues != NULL) {
2147 		for (i = 0; i < dev->data->nb_tx_queues; i++) {
2148 			rte_free(dev->data->tx_queues[i]);
2149 			dev->data->tx_queues[i] = NULL;
2150 		}
2151 		dev->data->nb_tx_queues = 0;
2152 	}
2153 }
2154 
2155 void
2156 bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2157 {
2158 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2159 	uint8_t i;
2160 
2161 	if (internals->mode == BONDING_MODE_8023AD) {
2162 		struct port *port;
2163 		void *pkt = NULL;
2164 
2165 		bond_mode_8023ad_stop(eth_dev);
2166 
2167 		/* Discard all messages to/from mode 4 state machines */
2168 		for (i = 0; i < internals->active_slave_count; i++) {
2169 			port = &mode_8023ad_ports[internals->active_slaves[i]];
2170 
2171 			RTE_ASSERT(port->rx_ring != NULL);
2172 			while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2173 				rte_pktmbuf_free(pkt);
2174 
2175 			RTE_ASSERT(port->tx_ring != NULL);
2176 			while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2177 				rte_pktmbuf_free(pkt);
2178 		}
2179 	}
2180 
2181 	if (internals->mode == BONDING_MODE_TLB ||
2182 			internals->mode == BONDING_MODE_ALB) {
2183 		bond_tlb_disable(internals);
2184 		for (i = 0; i < internals->active_slave_count; i++)
2185 			tlb_last_obytets[internals->active_slaves[i]] = 0;
2186 	}
2187 
2188 	internals->active_slave_count = 0;
2189 	internals->link_status_polling_enabled = 0;
2190 	for (i = 0; i < internals->slave_count; i++)
2191 		internals->slaves[i].last_link_status = 0;
2192 
2193 	eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2194 	eth_dev->data->dev_started = 0;
2195 }
2196 
2197 void
2198 bond_ethdev_close(struct rte_eth_dev *dev)
2199 {
2200 	struct bond_dev_private *internals = dev->data->dev_private;
2201 	uint8_t bond_port_id = internals->port_id;
2202 	int skipped = 0;
2203 	struct rte_flow_error ferror;
2204 
2205 	RTE_LOG(INFO, EAL, "Closing bonded device %s\n", dev->device->name);
2206 	while (internals->slave_count != skipped) {
2207 		uint16_t port_id = internals->slaves[skipped].port_id;
2208 
2209 		rte_eth_dev_stop(port_id);
2210 
2211 		if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2212 			RTE_LOG(ERR, EAL,
2213 				"Failed to remove port %d from bonded device "
2214 				"%s\n", port_id, dev->device->name);
2215 			skipped++;
2216 		}
2217 	}
2218 	bond_flow_ops.flush(dev, &ferror);
2219 	bond_ethdev_free_queues(dev);
2220 	rte_bitmap_reset(internals->vlan_filter_bmp);
2221 }
2222 
2223 /* forward declaration */
2224 static int bond_ethdev_configure(struct rte_eth_dev *dev);
2225 
2226 static void
2227 bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2228 {
2229 	struct bond_dev_private *internals = dev->data->dev_private;
2230 
2231 	uint16_t max_nb_rx_queues = UINT16_MAX;
2232 	uint16_t max_nb_tx_queues = UINT16_MAX;
2233 
2234 	dev_info->max_mac_addrs = 1;
2235 
2236 	dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
2237 			internals->candidate_max_rx_pktlen :
2238 			ETHER_MAX_JUMBO_FRAME_LEN;
2239 
2240 	/* Max number of tx/rx queues that the bonded device can support is the
2241 	 * minimum values of the bonded slaves, as all slaves must be capable
2242 	 * of supporting the same number of tx/rx queues.
2243 	 */
2244 	if (internals->slave_count > 0) {
2245 		struct rte_eth_dev_info slave_info;
2246 		uint8_t idx;
2247 
2248 		for (idx = 0; idx < internals->slave_count; idx++) {
2249 			rte_eth_dev_info_get(internals->slaves[idx].port_id,
2250 					&slave_info);
2251 
2252 			if (slave_info.max_rx_queues < max_nb_rx_queues)
2253 				max_nb_rx_queues = slave_info.max_rx_queues;
2254 
2255 			if (slave_info.max_tx_queues < max_nb_tx_queues)
2256 				max_nb_tx_queues = slave_info.max_tx_queues;
2257 		}
2258 	}
2259 
2260 	dev_info->max_rx_queues = max_nb_rx_queues;
2261 	dev_info->max_tx_queues = max_nb_tx_queues;
2262 
2263 	/**
2264 	 * If dedicated hw queues enabled for link bonding device in LACP mode
2265 	 * then we need to reduce the maximum number of data path queues by 1.
2266 	 */
2267 	if (internals->mode == BONDING_MODE_8023AD &&
2268 		internals->mode4.dedicated_queues.enabled == 1) {
2269 		dev_info->max_rx_queues--;
2270 		dev_info->max_tx_queues--;
2271 	}
2272 
2273 	dev_info->min_rx_bufsize = 0;
2274 
2275 	dev_info->rx_offload_capa = internals->rx_offload_capa;
2276 	dev_info->tx_offload_capa = internals->tx_offload_capa;
2277 	dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2278 	dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
2279 	dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2280 
2281 	dev_info->reta_size = internals->reta_size;
2282 }
2283 
2284 static int
2285 bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2286 {
2287 	int res;
2288 	uint16_t i;
2289 	struct bond_dev_private *internals = dev->data->dev_private;
2290 
2291 	/* don't do this while a slave is being added */
2292 	rte_spinlock_lock(&internals->lock);
2293 
2294 	if (on)
2295 		rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2296 	else
2297 		rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2298 
2299 	for (i = 0; i < internals->slave_count; i++) {
2300 		uint16_t port_id = internals->slaves[i].port_id;
2301 
2302 		res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2303 		if (res == ENOTSUP)
2304 			RTE_LOG(WARNING, PMD,
2305 				"Setting VLAN filter on slave port %u not supported.\n",
2306 				port_id);
2307 	}
2308 
2309 	rte_spinlock_unlock(&internals->lock);
2310 	return 0;
2311 }
2312 
2313 static int
2314 bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2315 		uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2316 		const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2317 {
2318 	struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2319 			rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2320 					0, dev->data->numa_node);
2321 	if (bd_rx_q == NULL)
2322 		return -1;
2323 
2324 	bd_rx_q->queue_id = rx_queue_id;
2325 	bd_rx_q->dev_private = dev->data->dev_private;
2326 
2327 	bd_rx_q->nb_rx_desc = nb_rx_desc;
2328 
2329 	memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2330 	bd_rx_q->mb_pool = mb_pool;
2331 
2332 	dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2333 
2334 	return 0;
2335 }
2336 
2337 static int
2338 bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2339 		uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2340 		const struct rte_eth_txconf *tx_conf)
2341 {
2342 	struct bond_tx_queue *bd_tx_q  = (struct bond_tx_queue *)
2343 			rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2344 					0, dev->data->numa_node);
2345 
2346 	if (bd_tx_q == NULL)
2347 		return -1;
2348 
2349 	bd_tx_q->queue_id = tx_queue_id;
2350 	bd_tx_q->dev_private = dev->data->dev_private;
2351 
2352 	bd_tx_q->nb_tx_desc = nb_tx_desc;
2353 	memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2354 
2355 	dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2356 
2357 	return 0;
2358 }
2359 
2360 static void
2361 bond_ethdev_rx_queue_release(void *queue)
2362 {
2363 	if (queue == NULL)
2364 		return;
2365 
2366 	rte_free(queue);
2367 }
2368 
2369 static void
2370 bond_ethdev_tx_queue_release(void *queue)
2371 {
2372 	if (queue == NULL)
2373 		return;
2374 
2375 	rte_free(queue);
2376 }
2377 
2378 static void
2379 bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2380 {
2381 	struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2382 	struct bond_dev_private *internals;
2383 
2384 	/* Default value for polling slave found is true as we don't want to
2385 	 * disable the polling thread if we cannot get the lock */
2386 	int i, polling_slave_found = 1;
2387 
2388 	if (cb_arg == NULL)
2389 		return;
2390 
2391 	bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2392 	internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2393 
2394 	if (!bonded_ethdev->data->dev_started ||
2395 		!internals->link_status_polling_enabled)
2396 		return;
2397 
2398 	/* If device is currently being configured then don't check slaves link
2399 	 * status, wait until next period */
2400 	if (rte_spinlock_trylock(&internals->lock)) {
2401 		if (internals->slave_count > 0)
2402 			polling_slave_found = 0;
2403 
2404 		for (i = 0; i < internals->slave_count; i++) {
2405 			if (!internals->slaves[i].link_status_poll_enabled)
2406 				continue;
2407 
2408 			slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2409 			polling_slave_found = 1;
2410 
2411 			/* Update slave link status */
2412 			(*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2413 					internals->slaves[i].link_status_wait_to_complete);
2414 
2415 			/* if link status has changed since last checked then call lsc
2416 			 * event callback */
2417 			if (slave_ethdev->data->dev_link.link_status !=
2418 					internals->slaves[i].last_link_status) {
2419 				internals->slaves[i].last_link_status =
2420 						slave_ethdev->data->dev_link.link_status;
2421 
2422 				bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2423 						RTE_ETH_EVENT_INTR_LSC,
2424 						&bonded_ethdev->data->port_id,
2425 						NULL);
2426 			}
2427 		}
2428 		rte_spinlock_unlock(&internals->lock);
2429 	}
2430 
2431 	if (polling_slave_found)
2432 		/* Set alarm to continue monitoring link status of slave ethdev's */
2433 		rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2434 				bond_ethdev_slave_link_status_change_monitor, cb_arg);
2435 }
2436 
2437 static int
2438 bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
2439 {
2440 	void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2441 
2442 	struct bond_dev_private *bond_ctx;
2443 	struct rte_eth_link slave_link;
2444 
2445 	uint32_t idx;
2446 
2447 	bond_ctx = ethdev->data->dev_private;
2448 
2449 	ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2450 
2451 	if (ethdev->data->dev_started == 0 ||
2452 			bond_ctx->active_slave_count == 0) {
2453 		ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
2454 		return 0;
2455 	}
2456 
2457 	ethdev->data->dev_link.link_status = ETH_LINK_UP;
2458 
2459 	if (wait_to_complete)
2460 		link_update = rte_eth_link_get;
2461 	else
2462 		link_update = rte_eth_link_get_nowait;
2463 
2464 	switch (bond_ctx->mode) {
2465 	case BONDING_MODE_BROADCAST:
2466 		/**
2467 		 * Setting link speed to UINT32_MAX to ensure we pick up the
2468 		 * value of the first active slave
2469 		 */
2470 		ethdev->data->dev_link.link_speed = UINT32_MAX;
2471 
2472 		/**
2473 		 * link speed is minimum value of all the slaves link speed as
2474 		 * packet loss will occur on this slave if transmission at rates
2475 		 * greater than this are attempted
2476 		 */
2477 		for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2478 			link_update(bond_ctx->active_slaves[0],	&slave_link);
2479 
2480 			if (slave_link.link_speed <
2481 					ethdev->data->dev_link.link_speed)
2482 				ethdev->data->dev_link.link_speed =
2483 						slave_link.link_speed;
2484 		}
2485 		break;
2486 	case BONDING_MODE_ACTIVE_BACKUP:
2487 		/* Current primary slave */
2488 		link_update(bond_ctx->current_primary_port, &slave_link);
2489 
2490 		ethdev->data->dev_link.link_speed = slave_link.link_speed;
2491 		break;
2492 	case BONDING_MODE_8023AD:
2493 		ethdev->data->dev_link.link_autoneg =
2494 				bond_ctx->mode4.slave_link.link_autoneg;
2495 		ethdev->data->dev_link.link_duplex =
2496 				bond_ctx->mode4.slave_link.link_duplex;
2497 		/* fall through to update link speed */
2498 	case BONDING_MODE_ROUND_ROBIN:
2499 	case BONDING_MODE_BALANCE:
2500 	case BONDING_MODE_TLB:
2501 	case BONDING_MODE_ALB:
2502 	default:
2503 		/**
2504 		 * In theses mode the maximum theoretical link speed is the sum
2505 		 * of all the slaves
2506 		 */
2507 		ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2508 
2509 		for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2510 			link_update(bond_ctx->active_slaves[idx], &slave_link);
2511 
2512 			ethdev->data->dev_link.link_speed +=
2513 					slave_link.link_speed;
2514 		}
2515 	}
2516 
2517 
2518 	return 0;
2519 }
2520 
2521 
2522 static int
2523 bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2524 {
2525 	struct bond_dev_private *internals = dev->data->dev_private;
2526 	struct rte_eth_stats slave_stats;
2527 	int i, j;
2528 
2529 	for (i = 0; i < internals->slave_count; i++) {
2530 		rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2531 
2532 		stats->ipackets += slave_stats.ipackets;
2533 		stats->opackets += slave_stats.opackets;
2534 		stats->ibytes += slave_stats.ibytes;
2535 		stats->obytes += slave_stats.obytes;
2536 		stats->imissed += slave_stats.imissed;
2537 		stats->ierrors += slave_stats.ierrors;
2538 		stats->oerrors += slave_stats.oerrors;
2539 		stats->rx_nombuf += slave_stats.rx_nombuf;
2540 
2541 		for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2542 			stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2543 			stats->q_opackets[j] += slave_stats.q_opackets[j];
2544 			stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2545 			stats->q_obytes[j] += slave_stats.q_obytes[j];
2546 			stats->q_errors[j] += slave_stats.q_errors[j];
2547 		}
2548 
2549 	}
2550 
2551 	return 0;
2552 }
2553 
2554 static void
2555 bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2556 {
2557 	struct bond_dev_private *internals = dev->data->dev_private;
2558 	int i;
2559 
2560 	for (i = 0; i < internals->slave_count; i++)
2561 		rte_eth_stats_reset(internals->slaves[i].port_id);
2562 }
2563 
2564 static void
2565 bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2566 {
2567 	struct bond_dev_private *internals = eth_dev->data->dev_private;
2568 	int i;
2569 
2570 	internals->promiscuous_en = 1;
2571 
2572 	switch (internals->mode) {
2573 	/* Promiscuous mode is propagated to all slaves */
2574 	case BONDING_MODE_ROUND_ROBIN:
2575 	case BONDING_MODE_BALANCE:
2576 	case BONDING_MODE_BROADCAST:
2577 		for (i = 0; i < internals->slave_count; i++)
2578 			rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2579 		break;
2580 	/* In mode4 promiscus mode is managed when slave is added/removed */
2581 	case BONDING_MODE_8023AD:
2582 		break;
2583 	/* Promiscuous mode is propagated only to primary slave */
2584 	case BONDING_MODE_ACTIVE_BACKUP:
2585 	case BONDING_MODE_TLB:
2586 	case BONDING_MODE_ALB:
2587 	default:
2588 		rte_eth_promiscuous_enable(internals->current_primary_port);
2589 	}
2590 }
2591 
2592 static void
2593 bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2594 {
2595 	struct bond_dev_private *internals = dev->data->dev_private;
2596 	int i;
2597 
2598 	internals->promiscuous_en = 0;
2599 
2600 	switch (internals->mode) {
2601 	/* Promiscuous mode is propagated to all slaves */
2602 	case BONDING_MODE_ROUND_ROBIN:
2603 	case BONDING_MODE_BALANCE:
2604 	case BONDING_MODE_BROADCAST:
2605 		for (i = 0; i < internals->slave_count; i++)
2606 			rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2607 		break;
2608 	/* In mode4 promiscus mode is set managed when slave is added/removed */
2609 	case BONDING_MODE_8023AD:
2610 		break;
2611 	/* Promiscuous mode is propagated only to primary slave */
2612 	case BONDING_MODE_ACTIVE_BACKUP:
2613 	case BONDING_MODE_TLB:
2614 	case BONDING_MODE_ALB:
2615 	default:
2616 		rte_eth_promiscuous_disable(internals->current_primary_port);
2617 	}
2618 }
2619 
2620 static void
2621 bond_ethdev_delayed_lsc_propagation(void *arg)
2622 {
2623 	if (arg == NULL)
2624 		return;
2625 
2626 	_rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2627 			RTE_ETH_EVENT_INTR_LSC, NULL);
2628 }
2629 
2630 int
2631 bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2632 		void *param, void *ret_param __rte_unused)
2633 {
2634 	struct rte_eth_dev *bonded_eth_dev;
2635 	struct bond_dev_private *internals;
2636 	struct rte_eth_link link;
2637 	int rc = -1;
2638 
2639 	int i, valid_slave = 0;
2640 	uint8_t active_pos;
2641 	uint8_t lsc_flag = 0;
2642 
2643 	if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
2644 		return rc;
2645 
2646 	bonded_eth_dev = &rte_eth_devices[*(uint8_t *)param];
2647 
2648 	if (check_for_bonded_ethdev(bonded_eth_dev))
2649 		return rc;
2650 
2651 	internals = bonded_eth_dev->data->dev_private;
2652 
2653 	/* If the device isn't started don't handle interrupts */
2654 	if (!bonded_eth_dev->data->dev_started)
2655 		return rc;
2656 
2657 	/* verify that port_id is a valid slave of bonded port */
2658 	for (i = 0; i < internals->slave_count; i++) {
2659 		if (internals->slaves[i].port_id == port_id) {
2660 			valid_slave = 1;
2661 			break;
2662 		}
2663 	}
2664 
2665 	if (!valid_slave)
2666 		return rc;
2667 
2668 	/* Search for port in active port list */
2669 	active_pos = find_slave_by_id(internals->active_slaves,
2670 			internals->active_slave_count, port_id);
2671 
2672 	rte_eth_link_get_nowait(port_id, &link);
2673 	if (link.link_status) {
2674 		if (active_pos < internals->active_slave_count)
2675 			return rc;
2676 
2677 		/* if no active slave ports then set this port to be primary port */
2678 		if (internals->active_slave_count < 1) {
2679 			/* If first active slave, then change link status */
2680 			bonded_eth_dev->data->dev_link.link_status = ETH_LINK_UP;
2681 			internals->current_primary_port = port_id;
2682 			lsc_flag = 1;
2683 
2684 			mac_address_slaves_update(bonded_eth_dev);
2685 		}
2686 
2687 		activate_slave(bonded_eth_dev, port_id);
2688 
2689 		/* If user has defined the primary port then default to using it */
2690 		if (internals->user_defined_primary_port &&
2691 				internals->primary_port == port_id)
2692 			bond_ethdev_primary_set(internals, port_id);
2693 	} else {
2694 		if (active_pos == internals->active_slave_count)
2695 			return rc;
2696 
2697 		/* Remove from active slave list */
2698 		deactivate_slave(bonded_eth_dev, port_id);
2699 
2700 		if (internals->active_slave_count < 1)
2701 			lsc_flag = 1;
2702 
2703 		/* Update primary id, take first active slave from list or if none
2704 		 * available set to -1 */
2705 		if (port_id == internals->current_primary_port) {
2706 			if (internals->active_slave_count > 0)
2707 				bond_ethdev_primary_set(internals,
2708 						internals->active_slaves[0]);
2709 			else
2710 				internals->current_primary_port = internals->primary_port;
2711 		}
2712 	}
2713 
2714 	/**
2715 	 * Update bonded device link properties after any change to active
2716 	 * slaves
2717 	 */
2718 	bond_ethdev_link_update(bonded_eth_dev, 0);
2719 
2720 	if (lsc_flag) {
2721 		/* Cancel any possible outstanding interrupts if delays are enabled */
2722 		if (internals->link_up_delay_ms > 0 ||
2723 			internals->link_down_delay_ms > 0)
2724 			rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2725 					bonded_eth_dev);
2726 
2727 		if (bonded_eth_dev->data->dev_link.link_status) {
2728 			if (internals->link_up_delay_ms > 0)
2729 				rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2730 						bond_ethdev_delayed_lsc_propagation,
2731 						(void *)bonded_eth_dev);
2732 			else
2733 				_rte_eth_dev_callback_process(bonded_eth_dev,
2734 						RTE_ETH_EVENT_INTR_LSC,
2735 						NULL);
2736 
2737 		} else {
2738 			if (internals->link_down_delay_ms > 0)
2739 				rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2740 						bond_ethdev_delayed_lsc_propagation,
2741 						(void *)bonded_eth_dev);
2742 			else
2743 				_rte_eth_dev_callback_process(bonded_eth_dev,
2744 						RTE_ETH_EVENT_INTR_LSC,
2745 						NULL);
2746 		}
2747 	}
2748 	return 0;
2749 }
2750 
2751 static int
2752 bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2753 		struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2754 {
2755 	unsigned i, j;
2756 	int result = 0;
2757 	int slave_reta_size;
2758 	unsigned reta_count;
2759 	struct bond_dev_private *internals = dev->data->dev_private;
2760 
2761 	if (reta_size != internals->reta_size)
2762 		return -EINVAL;
2763 
2764 	 /* Copy RETA table */
2765 	reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2766 
2767 	for (i = 0; i < reta_count; i++) {
2768 		internals->reta_conf[i].mask = reta_conf[i].mask;
2769 		for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2770 			if ((reta_conf[i].mask >> j) & 0x01)
2771 				internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2772 	}
2773 
2774 	/* Fill rest of array */
2775 	for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2776 		memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2777 				sizeof(internals->reta_conf[0]) * reta_count);
2778 
2779 	/* Propagate RETA over slaves */
2780 	for (i = 0; i < internals->slave_count; i++) {
2781 		slave_reta_size = internals->slaves[i].reta_size;
2782 		result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2783 				&internals->reta_conf[0], slave_reta_size);
2784 		if (result < 0)
2785 			return result;
2786 	}
2787 
2788 	return 0;
2789 }
2790 
2791 static int
2792 bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2793 		struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2794 {
2795 	int i, j;
2796 	struct bond_dev_private *internals = dev->data->dev_private;
2797 
2798 	if (reta_size != internals->reta_size)
2799 		return -EINVAL;
2800 
2801 	 /* Copy RETA table */
2802 	for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2803 		for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2804 			if ((reta_conf[i].mask >> j) & 0x01)
2805 				reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2806 
2807 	return 0;
2808 }
2809 
2810 static int
2811 bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2812 		struct rte_eth_rss_conf *rss_conf)
2813 {
2814 	int i, result = 0;
2815 	struct bond_dev_private *internals = dev->data->dev_private;
2816 	struct rte_eth_rss_conf bond_rss_conf;
2817 
2818 	memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2819 
2820 	bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2821 
2822 	if (bond_rss_conf.rss_hf != 0)
2823 		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2824 
2825 	if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2826 			sizeof(internals->rss_key)) {
2827 		if (bond_rss_conf.rss_key_len == 0)
2828 			bond_rss_conf.rss_key_len = 40;
2829 		internals->rss_key_len = bond_rss_conf.rss_key_len;
2830 		memcpy(internals->rss_key, bond_rss_conf.rss_key,
2831 				internals->rss_key_len);
2832 	}
2833 
2834 	for (i = 0; i < internals->slave_count; i++) {
2835 		result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2836 				&bond_rss_conf);
2837 		if (result < 0)
2838 			return result;
2839 	}
2840 
2841 	return 0;
2842 }
2843 
2844 static int
2845 bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2846 		struct rte_eth_rss_conf *rss_conf)
2847 {
2848 	struct bond_dev_private *internals = dev->data->dev_private;
2849 
2850 	rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2851 	rss_conf->rss_key_len = internals->rss_key_len;
2852 	if (rss_conf->rss_key)
2853 		memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2854 
2855 	return 0;
2856 }
2857 
2858 static int
2859 bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2860 {
2861 	struct rte_eth_dev *slave_eth_dev;
2862 	struct bond_dev_private *internals = dev->data->dev_private;
2863 	int ret, i;
2864 
2865 	rte_spinlock_lock(&internals->lock);
2866 
2867 	for (i = 0; i < internals->slave_count; i++) {
2868 		slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2869 		if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2870 			rte_spinlock_unlock(&internals->lock);
2871 			return -ENOTSUP;
2872 		}
2873 	}
2874 	for (i = 0; i < internals->slave_count; i++) {
2875 		ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2876 		if (ret < 0) {
2877 			rte_spinlock_unlock(&internals->lock);
2878 			return ret;
2879 		}
2880 	}
2881 
2882 	rte_spinlock_unlock(&internals->lock);
2883 	return 0;
2884 }
2885 
2886 static int
2887 bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2888 {
2889 	if (mac_address_set(dev, addr)) {
2890 		RTE_BOND_LOG(ERR, "Failed to update MAC address");
2891 		return -EINVAL;
2892 	}
2893 
2894 	return 0;
2895 }
2896 
2897 static int
2898 bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2899 		 enum rte_filter_type type, enum rte_filter_op op, void *arg)
2900 {
2901 	if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2902 		*(const void **)arg = &bond_flow_ops;
2903 		return 0;
2904 	}
2905 	return -ENOTSUP;
2906 }
2907 
2908 const struct eth_dev_ops default_dev_ops = {
2909 	.dev_start            = bond_ethdev_start,
2910 	.dev_stop             = bond_ethdev_stop,
2911 	.dev_close            = bond_ethdev_close,
2912 	.dev_configure        = bond_ethdev_configure,
2913 	.dev_infos_get        = bond_ethdev_info,
2914 	.vlan_filter_set      = bond_ethdev_vlan_filter_set,
2915 	.rx_queue_setup       = bond_ethdev_rx_queue_setup,
2916 	.tx_queue_setup       = bond_ethdev_tx_queue_setup,
2917 	.rx_queue_release     = bond_ethdev_rx_queue_release,
2918 	.tx_queue_release     = bond_ethdev_tx_queue_release,
2919 	.link_update          = bond_ethdev_link_update,
2920 	.stats_get            = bond_ethdev_stats_get,
2921 	.stats_reset          = bond_ethdev_stats_reset,
2922 	.promiscuous_enable   = bond_ethdev_promiscuous_enable,
2923 	.promiscuous_disable  = bond_ethdev_promiscuous_disable,
2924 	.reta_update          = bond_ethdev_rss_reta_update,
2925 	.reta_query           = bond_ethdev_rss_reta_query,
2926 	.rss_hash_update      = bond_ethdev_rss_hash_update,
2927 	.rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
2928 	.mtu_set              = bond_ethdev_mtu_set,
2929 	.mac_addr_set         = bond_ethdev_mac_address_set,
2930 	.filter_ctrl          = bond_filter_ctrl
2931 };
2932 
2933 static int
2934 bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
2935 {
2936 	const char *name = rte_vdev_device_name(dev);
2937 	uint8_t socket_id = dev->device.numa_node;
2938 	struct bond_dev_private *internals = NULL;
2939 	struct rte_eth_dev *eth_dev = NULL;
2940 	uint32_t vlan_filter_bmp_size;
2941 
2942 	/* now do all data allocation - for eth_dev structure, dummy pci driver
2943 	 * and internal (private) data
2944 	 */
2945 
2946 	/* reserve an ethdev entry */
2947 	eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
2948 	if (eth_dev == NULL) {
2949 		RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
2950 		goto err;
2951 	}
2952 
2953 	internals = eth_dev->data->dev_private;
2954 	eth_dev->data->nb_rx_queues = (uint16_t)1;
2955 	eth_dev->data->nb_tx_queues = (uint16_t)1;
2956 
2957 	eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0,
2958 			socket_id);
2959 	if (eth_dev->data->mac_addrs == NULL) {
2960 		RTE_BOND_LOG(ERR, "Unable to malloc mac_addrs");
2961 		goto err;
2962 	}
2963 
2964 	eth_dev->dev_ops = &default_dev_ops;
2965 	eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
2966 
2967 	rte_spinlock_init(&internals->lock);
2968 
2969 	internals->port_id = eth_dev->data->port_id;
2970 	internals->mode = BONDING_MODE_INVALID;
2971 	internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
2972 	internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
2973 	internals->burst_xmit_hash = burst_xmit_l2_hash;
2974 	internals->user_defined_mac = 0;
2975 
2976 	internals->link_status_polling_enabled = 0;
2977 
2978 	internals->link_status_polling_interval_ms =
2979 		DEFAULT_POLLING_INTERVAL_10_MS;
2980 	internals->link_down_delay_ms = 0;
2981 	internals->link_up_delay_ms = 0;
2982 
2983 	internals->slave_count = 0;
2984 	internals->active_slave_count = 0;
2985 	internals->rx_offload_capa = 0;
2986 	internals->tx_offload_capa = 0;
2987 	internals->rx_queue_offload_capa = 0;
2988 	internals->tx_queue_offload_capa = 0;
2989 	internals->candidate_max_rx_pktlen = 0;
2990 	internals->max_rx_pktlen = 0;
2991 
2992 	/* Initially allow to choose any offload type */
2993 	internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
2994 
2995 	memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
2996 	memset(internals->slaves, 0, sizeof(internals->slaves));
2997 
2998 	TAILQ_INIT(&internals->flow_list);
2999 	internals->flow_isolated_valid = 0;
3000 
3001 	/* Set mode 4 default configuration */
3002 	bond_mode_8023ad_setup(eth_dev, NULL);
3003 	if (bond_ethdev_mode_set(eth_dev, mode)) {
3004 		RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d\n",
3005 				 eth_dev->data->port_id, mode);
3006 		goto err;
3007 	}
3008 
3009 	vlan_filter_bmp_size =
3010 		rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3011 	internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3012 						   RTE_CACHE_LINE_SIZE);
3013 	if (internals->vlan_filter_bmpmem == NULL) {
3014 		RTE_BOND_LOG(ERR,
3015 			     "Failed to allocate vlan bitmap for bonded device %u\n",
3016 			     eth_dev->data->port_id);
3017 		goto err;
3018 	}
3019 
3020 	internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3021 			internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3022 	if (internals->vlan_filter_bmp == NULL) {
3023 		RTE_BOND_LOG(ERR,
3024 			     "Failed to init vlan bitmap for bonded device %u\n",
3025 			     eth_dev->data->port_id);
3026 		rte_free(internals->vlan_filter_bmpmem);
3027 		goto err;
3028 	}
3029 
3030 	return eth_dev->data->port_id;
3031 
3032 err:
3033 	rte_free(internals);
3034 	if (eth_dev != NULL) {
3035 		rte_free(eth_dev->data->mac_addrs);
3036 		rte_eth_dev_release_port(eth_dev);
3037 	}
3038 	return -1;
3039 }
3040 
3041 static int
3042 bond_probe(struct rte_vdev_device *dev)
3043 {
3044 	const char *name;
3045 	struct bond_dev_private *internals;
3046 	struct rte_kvargs *kvlist;
3047 	uint8_t bonding_mode, socket_id/*, agg_mode*/;
3048 	int  arg_count, port_id;
3049 	uint8_t agg_mode;
3050 	struct rte_eth_dev *eth_dev;
3051 
3052 	if (!dev)
3053 		return -EINVAL;
3054 
3055 	name = rte_vdev_device_name(dev);
3056 	RTE_LOG(INFO, EAL, "Initializing pmd_bond for %s\n", name);
3057 
3058 	if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
3059 	    strlen(rte_vdev_device_args(dev)) == 0) {
3060 		eth_dev = rte_eth_dev_attach_secondary(name);
3061 		if (!eth_dev) {
3062 			RTE_LOG(ERR, PMD, "Failed to probe %s\n", name);
3063 			return -1;
3064 		}
3065 		/* TODO: request info from primary to set up Rx and Tx */
3066 		eth_dev->dev_ops = &default_dev_ops;
3067 		return 0;
3068 	}
3069 
3070 	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3071 		pmd_bond_init_valid_arguments);
3072 	if (kvlist == NULL)
3073 		return -1;
3074 
3075 	/* Parse link bonding mode */
3076 	if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3077 		if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3078 				&bond_ethdev_parse_slave_mode_kvarg,
3079 				&bonding_mode) != 0) {
3080 			RTE_LOG(ERR, EAL, "Invalid mode for bonded device %s\n",
3081 					name);
3082 			goto parse_error;
3083 		}
3084 	} else {
3085 		RTE_LOG(ERR, EAL, "Mode must be specified only once for bonded "
3086 				"device %s\n", name);
3087 		goto parse_error;
3088 	}
3089 
3090 	/* Parse socket id to create bonding device on */
3091 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3092 	if (arg_count == 1) {
3093 		if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3094 				&bond_ethdev_parse_socket_id_kvarg, &socket_id)
3095 				!= 0) {
3096 			RTE_LOG(ERR, EAL, "Invalid socket Id specified for "
3097 					"bonded device %s\n", name);
3098 			goto parse_error;
3099 		}
3100 	} else if (arg_count > 1) {
3101 		RTE_LOG(ERR, EAL, "Socket Id can be specified only once for "
3102 				"bonded device %s\n", name);
3103 		goto parse_error;
3104 	} else {
3105 		socket_id = rte_socket_id();
3106 	}
3107 
3108 	dev->device.numa_node = socket_id;
3109 
3110 	/* Create link bonding eth device */
3111 	port_id = bond_alloc(dev, bonding_mode);
3112 	if (port_id < 0) {
3113 		RTE_LOG(ERR, EAL, "Failed to create socket %s in mode %u on "
3114 				"socket %u.\n",	name, bonding_mode, socket_id);
3115 		goto parse_error;
3116 	}
3117 	internals = rte_eth_devices[port_id].data->dev_private;
3118 	internals->kvlist = kvlist;
3119 
3120 
3121 	if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3122 		if (rte_kvargs_process(kvlist,
3123 				PMD_BOND_AGG_MODE_KVARG,
3124 				&bond_ethdev_parse_slave_agg_mode_kvarg,
3125 				&agg_mode) != 0) {
3126 			RTE_LOG(ERR, EAL,
3127 					"Failed to parse agg selection mode for bonded device %s\n",
3128 					name);
3129 			goto parse_error;
3130 		}
3131 
3132 		if (internals->mode == BONDING_MODE_8023AD)
3133 			rte_eth_bond_8023ad_agg_selection_set(port_id,
3134 					agg_mode);
3135 	} else {
3136 		rte_eth_bond_8023ad_agg_selection_set(port_id, AGG_STABLE);
3137 	}
3138 
3139 	RTE_LOG(INFO, EAL, "Create bonded device %s on port %d in mode %u on "
3140 			"socket %u.\n",	name, port_id, bonding_mode, socket_id);
3141 	return 0;
3142 
3143 parse_error:
3144 	rte_kvargs_free(kvlist);
3145 
3146 	return -1;
3147 }
3148 
3149 static int
3150 bond_remove(struct rte_vdev_device *dev)
3151 {
3152 	struct rte_eth_dev *eth_dev;
3153 	struct bond_dev_private *internals;
3154 	const char *name;
3155 
3156 	if (!dev)
3157 		return -EINVAL;
3158 
3159 	name = rte_vdev_device_name(dev);
3160 	RTE_LOG(INFO, EAL, "Uninitializing pmd_bond for %s\n", name);
3161 
3162 	/* now free all data allocation - for eth_dev structure,
3163 	 * dummy pci driver and internal (private) data
3164 	 */
3165 
3166 	/* find an ethdev entry */
3167 	eth_dev = rte_eth_dev_allocated(name);
3168 	if (eth_dev == NULL)
3169 		return -ENODEV;
3170 
3171 	RTE_ASSERT(eth_dev->device == &dev->device);
3172 
3173 	internals = eth_dev->data->dev_private;
3174 	if (internals->slave_count != 0)
3175 		return -EBUSY;
3176 
3177 	if (eth_dev->data->dev_started == 1) {
3178 		bond_ethdev_stop(eth_dev);
3179 		bond_ethdev_close(eth_dev);
3180 	}
3181 
3182 	eth_dev->dev_ops = NULL;
3183 	eth_dev->rx_pkt_burst = NULL;
3184 	eth_dev->tx_pkt_burst = NULL;
3185 
3186 	internals = eth_dev->data->dev_private;
3187 	/* Try to release mempool used in mode6. If the bond
3188 	 * device is not mode6, free the NULL is not problem.
3189 	 */
3190 	rte_mempool_free(internals->mode6.mempool);
3191 	rte_bitmap_free(internals->vlan_filter_bmp);
3192 	rte_free(internals->vlan_filter_bmpmem);
3193 	rte_free(eth_dev->data->dev_private);
3194 	rte_free(eth_dev->data->mac_addrs);
3195 
3196 	rte_eth_dev_release_port(eth_dev);
3197 
3198 	return 0;
3199 }
3200 
3201 /* this part will resolve the slave portids after all the other pdev and vdev
3202  * have been allocated */
3203 static int
3204 bond_ethdev_configure(struct rte_eth_dev *dev)
3205 {
3206 	const char *name = dev->device->name;
3207 	struct bond_dev_private *internals = dev->data->dev_private;
3208 	struct rte_kvargs *kvlist = internals->kvlist;
3209 	int arg_count;
3210 	uint16_t port_id = dev - rte_eth_devices;
3211 	uint8_t agg_mode;
3212 
3213 	static const uint8_t default_rss_key[40] = {
3214 		0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3215 		0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3216 		0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3217 		0xBE, 0xAC, 0x01, 0xFA
3218 	};
3219 
3220 	unsigned i, j;
3221 
3222 	/* If RSS is enabled, fill table and key with default values */
3223 	if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
3224 		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
3225 		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
3226 		memcpy(internals->rss_key, default_rss_key, 40);
3227 
3228 		for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3229 			internals->reta_conf[i].mask = ~0LL;
3230 			for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
3231 				internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
3232 		}
3233 	}
3234 
3235 	/* set the max_rx_pktlen */
3236 	internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3237 
3238 	/*
3239 	 * if no kvlist, it means that this bonded device has been created
3240 	 * through the bonding api.
3241 	 */
3242 	if (!kvlist)
3243 		return 0;
3244 
3245 	/* Parse MAC address for bonded device */
3246 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3247 	if (arg_count == 1) {
3248 		struct ether_addr bond_mac;
3249 
3250 		if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
3251 				&bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3252 			RTE_LOG(INFO, EAL, "Invalid mac address for bonded device %s\n",
3253 					name);
3254 			return -1;
3255 		}
3256 
3257 		/* Set MAC address */
3258 		if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
3259 			RTE_LOG(ERR, EAL,
3260 					"Failed to set mac address on bonded device %s\n",
3261 					name);
3262 			return -1;
3263 		}
3264 	} else if (arg_count > 1) {
3265 		RTE_LOG(ERR, EAL,
3266 				"MAC address can be specified only once for bonded device %s\n",
3267 				name);
3268 		return -1;
3269 	}
3270 
3271 	/* Parse/set balance mode transmit policy */
3272 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3273 	if (arg_count == 1) {
3274 		uint8_t xmit_policy;
3275 
3276 		if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
3277 				&bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3278 						0) {
3279 			RTE_LOG(INFO, EAL,
3280 					"Invalid xmit policy specified for bonded device %s\n",
3281 					name);
3282 			return -1;
3283 		}
3284 
3285 		/* Set balance mode transmit policy*/
3286 		if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
3287 			RTE_LOG(ERR, EAL,
3288 					"Failed to set balance xmit policy on bonded device %s\n",
3289 					name);
3290 			return -1;
3291 		}
3292 	} else if (arg_count > 1) {
3293 		RTE_LOG(ERR, EAL,
3294 				"Transmit policy can be specified only once for bonded device"
3295 				" %s\n", name);
3296 		return -1;
3297 	}
3298 
3299 	if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3300 		if (rte_kvargs_process(kvlist,
3301 				PMD_BOND_AGG_MODE_KVARG,
3302 				&bond_ethdev_parse_slave_agg_mode_kvarg,
3303 				&agg_mode) != 0) {
3304 			RTE_LOG(ERR, EAL,
3305 					"Failed to parse agg selection mode for bonded device %s\n",
3306 					name);
3307 		}
3308 		if (internals->mode == BONDING_MODE_8023AD)
3309 				rte_eth_bond_8023ad_agg_selection_set(port_id,
3310 						agg_mode);
3311 	}
3312 
3313 	/* Parse/add slave ports to bonded device */
3314 	if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3315 		struct bond_ethdev_slave_ports slave_ports;
3316 		unsigned i;
3317 
3318 		memset(&slave_ports, 0, sizeof(slave_ports));
3319 
3320 		if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
3321 				&bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3322 			RTE_LOG(ERR, EAL,
3323 					"Failed to parse slave ports for bonded device %s\n",
3324 					name);
3325 			return -1;
3326 		}
3327 
3328 		for (i = 0; i < slave_ports.slave_count; i++) {
3329 			if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
3330 				RTE_LOG(ERR, EAL,
3331 						"Failed to add port %d as slave to bonded device %s\n",
3332 						slave_ports.slaves[i], name);
3333 			}
3334 		}
3335 
3336 	} else {
3337 		RTE_LOG(INFO, EAL, "No slaves specified for bonded device %s\n", name);
3338 		return -1;
3339 	}
3340 
3341 	/* Parse/set primary slave port id*/
3342 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3343 	if (arg_count == 1) {
3344 		uint16_t primary_slave_port_id;
3345 
3346 		if (rte_kvargs_process(kvlist,
3347 				PMD_BOND_PRIMARY_SLAVE_KVARG,
3348 				&bond_ethdev_parse_primary_slave_port_id_kvarg,
3349 				&primary_slave_port_id) < 0) {
3350 			RTE_LOG(INFO, EAL,
3351 					"Invalid primary slave port id specified for bonded device"
3352 					" %s\n", name);
3353 			return -1;
3354 		}
3355 
3356 		/* Set balance mode transmit policy*/
3357 		if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3358 				!= 0) {
3359 			RTE_LOG(ERR, EAL,
3360 					"Failed to set primary slave port %d on bonded device %s\n",
3361 					primary_slave_port_id, name);
3362 			return -1;
3363 		}
3364 	} else if (arg_count > 1) {
3365 		RTE_LOG(INFO, EAL,
3366 				"Primary slave can be specified only once for bonded device"
3367 				" %s\n", name);
3368 		return -1;
3369 	}
3370 
3371 	/* Parse link status monitor polling interval */
3372 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3373 	if (arg_count == 1) {
3374 		uint32_t lsc_poll_interval_ms;
3375 
3376 		if (rte_kvargs_process(kvlist,
3377 				PMD_BOND_LSC_POLL_PERIOD_KVARG,
3378 				&bond_ethdev_parse_time_ms_kvarg,
3379 				&lsc_poll_interval_ms) < 0) {
3380 			RTE_LOG(INFO, EAL,
3381 					"Invalid lsc polling interval value specified for bonded"
3382 					" device %s\n", name);
3383 			return -1;
3384 		}
3385 
3386 		if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
3387 				!= 0) {
3388 			RTE_LOG(ERR, EAL,
3389 					"Failed to set lsc monitor polling interval (%u ms) on"
3390 					" bonded device %s\n", lsc_poll_interval_ms, name);
3391 			return -1;
3392 		}
3393 	} else if (arg_count > 1) {
3394 		RTE_LOG(INFO, EAL,
3395 				"LSC polling interval can be specified only once for bonded"
3396 				" device %s\n", name);
3397 		return -1;
3398 	}
3399 
3400 	/* Parse link up interrupt propagation delay */
3401 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3402 	if (arg_count == 1) {
3403 		uint32_t link_up_delay_ms;
3404 
3405 		if (rte_kvargs_process(kvlist,
3406 				PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3407 				&bond_ethdev_parse_time_ms_kvarg,
3408 				&link_up_delay_ms) < 0) {
3409 			RTE_LOG(INFO, EAL,
3410 					"Invalid link up propagation delay value specified for"
3411 					" bonded device %s\n", name);
3412 			return -1;
3413 		}
3414 
3415 		/* Set balance mode transmit policy*/
3416 		if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
3417 				!= 0) {
3418 			RTE_LOG(ERR, EAL,
3419 					"Failed to set link up propagation delay (%u ms) on bonded"
3420 					" device %s\n", link_up_delay_ms, name);
3421 			return -1;
3422 		}
3423 	} else if (arg_count > 1) {
3424 		RTE_LOG(INFO, EAL,
3425 				"Link up propagation delay can be specified only once for"
3426 				" bonded device %s\n", name);
3427 		return -1;
3428 	}
3429 
3430 	/* Parse link down interrupt propagation delay */
3431 	arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3432 	if (arg_count == 1) {
3433 		uint32_t link_down_delay_ms;
3434 
3435 		if (rte_kvargs_process(kvlist,
3436 				PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3437 				&bond_ethdev_parse_time_ms_kvarg,
3438 				&link_down_delay_ms) < 0) {
3439 			RTE_LOG(INFO, EAL,
3440 					"Invalid link down propagation delay value specified for"
3441 					" bonded device %s\n", name);
3442 			return -1;
3443 		}
3444 
3445 		/* Set balance mode transmit policy*/
3446 		if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
3447 				!= 0) {
3448 			RTE_LOG(ERR, EAL,
3449 					"Failed to set link down propagation delay (%u ms) on"
3450 					" bonded device %s\n", link_down_delay_ms, name);
3451 			return -1;
3452 		}
3453 	} else if (arg_count > 1) {
3454 		RTE_LOG(INFO, EAL,
3455 				"Link down propagation delay can be specified only once for"
3456 				" bonded device %s\n", name);
3457 		return -1;
3458 	}
3459 
3460 	return 0;
3461 }
3462 
3463 struct rte_vdev_driver pmd_bond_drv = {
3464 	.probe = bond_probe,
3465 	.remove = bond_remove,
3466 };
3467 
3468 RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
3469 RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3470 
3471 RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3472 	"slave=<ifc> "
3473 	"primary=<ifc> "
3474 	"mode=[0-6] "
3475 	"xmit_policy=[l2 | l23 | l34] "
3476 	"agg_mode=[count | stable | bandwidth] "
3477 	"socket_id=<int> "
3478 	"mac=<mac addr> "
3479 	"lsc_poll_period_ms=<int> "
3480 	"up_delay=<int> "
3481 	"down_delay=<int>");
3482